nv-ingest-client 2025.11.27.dev20251127__py3-none-any.whl → 2025.12.28.dev20251228__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1269,7 +1269,7 @@ class NvIngestClient:
1269
1269
  ----------
1270
1270
  batch_size : Optional[int]
1271
1271
  The batch_size value to validate. None uses value from
1272
- NV_INGEST_BATCH_SIZE environment variable or default 32.
1272
+ NV_INGEST_BATCH_SIZE environment variable or default 16.
1273
1273
 
1274
1274
  Returns
1275
1275
  -------
@@ -1279,18 +1279,18 @@ class NvIngestClient:
1279
1279
  # Handle None/default case
1280
1280
  if batch_size is None:
1281
1281
  try:
1282
- batch_size = int(os.getenv("NV_INGEST_CLIENT_BATCH_SIZE", "32"))
1282
+ batch_size = int(os.getenv("NV_INGEST_CLIENT_BATCH_SIZE", "16"))
1283
1283
  except ValueError:
1284
- batch_size = 32
1284
+ batch_size = 16
1285
1285
 
1286
1286
  # Validate type and range
1287
1287
  if not isinstance(batch_size, int):
1288
- logger.warning(f"batch_size must be an integer, got {type(batch_size).__name__}. Using default 32.")
1289
- return 32
1288
+ logger.warning(f"batch_size must be an integer, got {type(batch_size).__name__}. Using default 16.")
1289
+ return 16
1290
1290
 
1291
1291
  if batch_size < 1:
1292
- logger.warning(f"batch_size must be >= 1, got {batch_size}. Using default 32.")
1293
- return 32
1292
+ logger.warning(f"batch_size must be >= 1, got {batch_size}. Using default 16.")
1293
+ return 16
1294
1294
 
1295
1295
  # Performance guidance warnings
1296
1296
  if batch_size < 8:
@@ -53,6 +53,7 @@ from nv_ingest_client.primitives.tasks import SplitTask
53
53
  from nv_ingest_client.primitives.tasks import StoreTask
54
54
  from nv_ingest_client.primitives.tasks import StoreEmbedTask
55
55
  from nv_ingest_client.primitives.tasks import UDFTask
56
+ from nv_ingest_client.util.file_processing.extract import EXTENSION_TO_DOCUMENT_TYPE
56
57
  from nv_ingest_client.util.processing import check_schema
57
58
  from nv_ingest_client.util.system import ensure_directory_with_permissions
58
59
  from nv_ingest_client.util.util import filter_function_kwargs, apply_pdf_split_config_to_job_specs
@@ -421,6 +422,92 @@ class Ingestor:
421
422
 
422
423
  return self
423
424
 
425
+ def _resolve_source_name(self, job_id: str, results_data: Optional[Union[List, Dict]] = None) -> str:
426
+ """
427
+ Resolves the source name for a given job ID using available metadata or fallback options.
428
+
429
+ Parameters
430
+ ----------
431
+ job_id : str
432
+ The job identifier.
433
+ results_data : Any, optional
434
+ The data associated with the job result, which might contain metadata.
435
+
436
+ Returns
437
+ -------
438
+ str
439
+ The resolved source name.
440
+ """
441
+ source_name = "unknown_source"
442
+ job_spec = self._client._job_index_to_job_spec.get(job_id)
443
+
444
+ if job_spec:
445
+ source_name = job_spec.source_name
446
+ else:
447
+ try:
448
+ if results_data:
449
+ first_item = results_data[0] if isinstance(results_data, list) and results_data else results_data
450
+ if isinstance(first_item, dict):
451
+ source_name = first_item.get("metadata", {}).get("source_metadata", {}).get("source_id", "")
452
+ if not source_name:
453
+ source_name = f"{job_id}"
454
+ except (IndexError, KeyError, TypeError):
455
+ source_name = f"{job_id}"
456
+
457
+ return source_name
458
+
459
+ def _write_results_to_disk(self, doc_data: Any, source_name: str, job_id: str) -> Optional[LazyLoadedList]:
460
+ """
461
+ Writes the results for a single job to a JSONL file and returns a LazyLoadedList.
462
+
463
+ Parameters
464
+ ----------
465
+ doc_data : Any
466
+ The result data to save.
467
+ source_name : str
468
+ The name of the source document.
469
+ job_id : str
470
+ The job identifier.
471
+
472
+ Returns
473
+ -------
474
+ Optional[LazyLoadedList]
475
+ A proxy object to the saved file, or None if the save failed.
476
+ """
477
+ if not self._output_config:
478
+ logger.warning("Attempted to write results to disk without output configuration.")
479
+ return None
480
+
481
+ try:
482
+ output_dir = self._output_config["output_directory"]
483
+ clean_source_basename = get_valid_filename(os.path.basename(source_name))
484
+ file_name, file_ext = os.path.splitext(clean_source_basename)
485
+ file_suffix = f".{file_ext.strip('.')}.results.jsonl"
486
+ if self._output_config["compression"] == "gzip":
487
+ file_suffix += ".gz"
488
+ jsonl_filepath = os.path.join(output_dir, safe_filename(output_dir, file_name, file_suffix))
489
+
490
+ data_to_save = doc_data if isinstance(doc_data, list) else [doc_data]
491
+
492
+ num_items_saved = save_document_results_to_jsonl(
493
+ data_to_save,
494
+ jsonl_filepath,
495
+ source_name,
496
+ ensure_parent_dir_exists=False,
497
+ compression=self._output_config["compression"],
498
+ )
499
+
500
+ if num_items_saved > 0:
501
+ return LazyLoadedList(
502
+ jsonl_filepath, expected_len=num_items_saved, compression=self._output_config["compression"]
503
+ )
504
+ except Exception as e_save:
505
+ logger.error(
506
+ f"Disk save I/O task error for job {job_id} (source: {source_name}): {e_save}",
507
+ exc_info=True,
508
+ )
509
+ return None
510
+
424
511
  def ingest(
425
512
  self,
426
513
  show_progress: bool = False,
@@ -488,52 +575,19 @@ class Ingestor:
488
575
 
489
576
  def _perform_save_task(doc_data, job_id, source_name):
490
577
  # This function runs in the io_executor
491
- try:
492
- output_dir = self._output_config["output_directory"]
493
- clean_source_basename = get_valid_filename(os.path.basename(source_name))
494
- file_name, file_ext = os.path.splitext(clean_source_basename)
495
- file_suffix = f".{file_ext.strip('.')}.results.jsonl"
496
- if self._output_config["compression"] == "gzip":
497
- file_suffix += ".gz"
498
- jsonl_filepath = os.path.join(output_dir, safe_filename(output_dir, file_name, file_suffix))
499
-
500
- num_items_saved = save_document_results_to_jsonl(
501
- doc_data,
502
- jsonl_filepath,
503
- source_name,
504
- ensure_parent_dir_exists=False,
505
- compression=self._output_config["compression"],
506
- )
507
-
508
- if num_items_saved > 0:
509
- results = LazyLoadedList(
510
- jsonl_filepath, expected_len=num_items_saved, compression=self._output_config["compression"]
511
- )
512
- if results_lock:
513
- with results_lock:
514
- final_results_payload_list.append(results)
515
- else: # Should not happen if io_executor is used
578
+ results = self._write_results_to_disk(doc_data, source_name, job_id)
579
+ if results:
580
+ if results_lock:
581
+ with results_lock:
516
582
  final_results_payload_list.append(results)
517
- except Exception as e_save:
518
- logger.error(
519
- f"Disk save I/O task error for job {job_id} (source: {source_name}): {e_save}",
520
- exc_info=True,
521
- )
583
+ else: # Should not happen if io_executor is used
584
+ final_results_payload_list.append(results)
522
585
 
523
586
  def _disk_save_callback(
524
587
  results_data: Dict[str, Any],
525
588
  job_id: str,
526
589
  ):
527
- source_name = "unknown_source_in_callback"
528
- job_spec = self._client._job_index_to_job_spec.get(job_id)
529
- if job_spec:
530
- source_name = job_spec.source_name
531
- else:
532
- try:
533
- if results_data:
534
- source_name = results_data[0]["metadata"]["source_metadata"]["source_id"]
535
- except (IndexError, KeyError, TypeError):
536
- source_name = f"{job_id}"
590
+ source_name = self._resolve_source_name(job_id, results_data)
537
591
 
538
592
  if not results_data:
539
593
  logger.warning(f"No data in response for job {job_id} (source: {source_name}). Skipping save.")
@@ -734,12 +788,49 @@ class Ingestor:
734
788
 
735
789
  proc_kwargs = filter_function_kwargs(self._client.process_jobs_concurrently_async, **kwargs)
736
790
 
791
+ stream_to_callback_only = False
792
+ completion_callback = None
793
+ async_results_map = {}
794
+
795
+ io_executor = None
796
+ io_futures = []
797
+
798
+ if self._output_config:
799
+ stream_to_callback_only = True
800
+ output_dir = self._output_config["output_directory"]
801
+
802
+ os.makedirs(output_dir, exist_ok=True)
803
+
804
+ io_executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="IngestAsyncIO")
805
+
806
+ def _io_task(data: Dict[str, Any], job_id: str):
807
+ try:
808
+ source_name = self._resolve_source_name(job_id, data)
809
+ result = self._write_results_to_disk(data, source_name, job_id)
810
+ if result:
811
+ # Store the LazyLoadedList in our map using job_id as key
812
+ async_results_map[job_id] = result
813
+ except Exception as e:
814
+ logger.error(f"Error in async I/O task for job {job_id}: {e}", exc_info=True)
815
+
816
+ def _composite_callback(data: Dict[str, Any], job_id: str):
817
+ """Callback executed by worker threads to save data to disk."""
818
+ try:
819
+ future = io_executor.submit(_io_task, data, job_id)
820
+ io_futures.append(future)
821
+ except Exception as e:
822
+ logger.error(f"Error in async callback for job {job_id}: {e}", exc_info=True)
823
+
824
+ completion_callback = _composite_callback
825
+
737
826
  final_future: Future = Future()
738
827
 
739
828
  processor_future = self._client.process_jobs_concurrently_async(
740
829
  job_indices=self._job_ids,
741
830
  job_queue_id=self._job_queue_id,
742
831
  return_traces=return_traces,
832
+ completion_callback=completion_callback,
833
+ stream_to_callback_only=stream_to_callback_only,
743
834
  **proc_kwargs,
744
835
  )
745
836
 
@@ -759,6 +850,20 @@ class Ingestor:
759
850
 
760
851
  results, failures, traces_list = proc_future.result()
761
852
 
853
+ if io_executor:
854
+ for f in as_completed(io_futures):
855
+ if f.exception():
856
+ logger.error(f"Async I/O task failed: {f.exception()}")
857
+ io_executor.shutdown(wait=True)
858
+
859
+ final_results_list = []
860
+ if self._output_config:
861
+ for item in results:
862
+ if isinstance(item, str) and item in async_results_map:
863
+ final_results_list.append(async_results_map[item])
864
+ else:
865
+ final_results_list = results
866
+
762
867
  failed_job_ids = set()
763
868
  for job_id_with_source, error_msg in failures:
764
869
  job_id = job_id_with_source.split(":", 1)[0]
@@ -775,18 +880,22 @@ class Ingestor:
775
880
  if self._job_states[job_id].state != JobStateEnum.COMPLETED:
776
881
  self._job_states[job_id].state = JobStateEnum.COMPLETED
777
882
 
778
- if self._vdb_bulk_upload and results:
883
+ if self._vdb_bulk_upload and final_results_list:
779
884
  with ThreadPoolExecutor(max_workers=1, thread_name_prefix="VDB_Uploader") as vdb_executor:
780
885
  results_future = Future()
781
- results_future.set_result(results)
886
+ results_future.set_result(final_results_list)
782
887
  vdb_future = vdb_executor.submit(self._vdb_bulk_upload.run_async, results_future)
783
888
  vdb_future.result()
784
889
 
890
+ if self._purge_results_after_vdb_upload and self._output_config:
891
+ logger.info("Purging saved results from disk after successful VDB upload.")
892
+ self._purge_saved_results(final_results_list)
893
+
785
894
  parent_trace_ids = (
786
895
  self._client.consume_completed_parent_trace_ids() if include_parent_trace_ids else []
787
896
  )
788
897
 
789
- returns = [results]
898
+ returns = [final_results_list]
790
899
  if return_failures:
791
900
  returns.append(failures)
792
901
  if return_traces:
@@ -794,7 +903,7 @@ class Ingestor:
794
903
  if include_parent_trace_ids:
795
904
  returns.append(parent_trace_ids)
796
905
 
797
- final_result = tuple(returns) if len(returns) > 1 else results
906
+ final_result = tuple(returns) if len(returns) > 1 else final_results_list
798
907
 
799
908
  if not final_future.done():
800
909
  final_future.set_result(final_result)
@@ -812,6 +921,9 @@ class Ingestor:
812
921
  ):
813
922
  job_state.state = final_state
814
923
 
924
+ if io_executor:
925
+ io_executor.shutdown(wait=False)
926
+
815
927
  processor_future.add_done_callback(_processor_done_callback)
816
928
  return final_future
817
929
 
@@ -963,11 +1075,18 @@ class Ingestor:
963
1075
  **kwargs,
964
1076
  )
965
1077
 
1078
+ api_document_type = EXTENSION_TO_DOCUMENT_TYPE.get(document_type.lower(), document_type)
1079
+
966
1080
  # Extract method from task_options for API schema
967
1081
  method = task_options.pop("extract_method", None)
968
1082
  if method is None:
969
1083
  # Let ExtractTask constructor handle default method selection
970
- method = "pdfium" # Default fallback
1084
+ if api_document_type == "docx":
1085
+ method = "python_docx"
1086
+ elif api_document_type == "pptx":
1087
+ method = "python_pptx"
1088
+ else:
1089
+ method = "pdfium" # Default fallback
971
1090
 
972
1091
  # Build params dict for API schema
973
1092
  params = {k: v for k, v in task_options.items() if k != "document_type"}
@@ -1088,13 +1207,9 @@ class Ingestor:
1088
1207
  Ingestor
1089
1208
  Returns self for chaining.
1090
1209
  """
1091
- # Handle parameter name mapping: store_method -> method for API schema
1092
- if "store_method" in kwargs:
1093
- kwargs["method"] = kwargs.pop("store_method")
1094
-
1095
- # Provide default method if not specified (matching client StoreTask behavior)
1096
- if "method" not in kwargs:
1097
- kwargs["method"] = "minio"
1210
+ deprecated_method = kwargs.pop("store_method", None)
1211
+ if deprecated_method is not None:
1212
+ logger.warning("`store_method` is deprecated and no longer used. Configure storage_uri instead.")
1098
1213
 
1099
1214
  task_options = check_schema(IngestTaskStoreSchema, kwargs, "store", json.dumps(kwargs))
1100
1215
 
@@ -1102,7 +1217,9 @@ class Ingestor:
1102
1217
  store_params = {
1103
1218
  "structured": task_options.structured,
1104
1219
  "images": task_options.images,
1105
- "store_method": task_options.method, # Map method back to store_method
1220
+ "storage_uri": task_options.storage_uri,
1221
+ "storage_options": task_options.storage_options,
1222
+ "public_base_url": task_options.public_base_url,
1106
1223
  "params": task_options.params,
1107
1224
  }
1108
1225
  store_task = StoreTask(**store_params)
@@ -1347,6 +1464,7 @@ class Ingestor:
1347
1464
  "api_key": task_options.api_key,
1348
1465
  "endpoint_url": task_options.endpoint_url,
1349
1466
  "prompt": task_options.prompt,
1467
+ "system_prompt": task_options.system_prompt,
1350
1468
  "model_name": task_options.model_name,
1351
1469
  }
1352
1470
  caption_task = CaptionTask(**caption_params)
@@ -76,7 +76,7 @@ logger = logging.getLogger(__name__)
76
76
  @click.option("--client_kwargs", help="Additional arguments to pass to the client.", default="{}")
77
77
  @click.option(
78
78
  "--api_version",
79
- default="v1",
79
+ default="v2",
80
80
  type=click.Choice(["v1", "v2"], case_sensitive=False),
81
81
  help="API version to use (v1 or v2). V2 required for PDF split page count feature.",
82
82
  )
@@ -120,7 +120,7 @@ Each task must be specified with its type and corresponding options in the '[tas
120
120
  Example:
121
121
  --task 'split:{"split_by":"page", "split_length":10}'
122
122
  --task 'extract:{"document_type":"pdf", "extract_text":true}'
123
- --task 'extract:{"document_type":"pdf", "extract_method":"nemoretriever_parse"}'
123
+ --task 'extract:{"document_type":"pdf", "extract_method":"nemotron_parse"}'
124
124
  --task 'extract:{"document_type":"pdf", "extract_method":"unstructured_io"}'
125
125
  --task 'extract:{"document_type":"docx", "extract_text":true, "extract_images":true}'
126
126
  --task 'embed'
@@ -22,18 +22,24 @@ class CaptionTask(Task):
22
22
  api_key: str = None,
23
23
  endpoint_url: str = None,
24
24
  prompt: str = None,
25
+ system_prompt: str = None,
25
26
  model_name: str = None,
26
27
  ) -> None:
27
28
  super().__init__()
28
29
 
29
30
  # Use the API schema for validation
30
31
  validated_data = IngestTaskCaptionSchema(
31
- api_key=api_key, endpoint_url=endpoint_url, prompt=prompt, model_name=model_name
32
+ api_key=api_key,
33
+ endpoint_url=endpoint_url,
34
+ prompt=prompt,
35
+ system_prompt=system_prompt,
36
+ model_name=model_name,
32
37
  )
33
38
 
34
39
  self._api_key = validated_data.api_key
35
40
  self._endpoint_url = validated_data.endpoint_url
36
41
  self._prompt = validated_data.prompt
42
+ self._system_prompt = validated_data.system_prompt
37
43
  self._model_name = validated_data.model_name
38
44
 
39
45
  def __str__(self) -> str:
@@ -49,6 +55,8 @@ class CaptionTask(Task):
49
55
  info += f" endpoint_url: {self._endpoint_url}\n"
50
56
  if self._prompt:
51
57
  info += f" prompt: {self._prompt}\n"
58
+ if self._system_prompt:
59
+ info += f" system_prompt: {self._system_prompt}\n"
52
60
  if self._model_name:
53
61
  info += f" model_name: {self._model_name}\n"
54
62
 
@@ -69,6 +77,9 @@ class CaptionTask(Task):
69
77
  if self._prompt:
70
78
  task_properties["prompt"] = self._prompt
71
79
 
80
+ if self._system_prompt:
81
+ task_properties["system_prompt"] = self._system_prompt
82
+
72
83
  if self._model_name:
73
84
  task_properties["model_name"] = self._model_name
74
85
 
@@ -8,6 +8,8 @@
8
8
 
9
9
  import logging
10
10
  import os
11
+ import warnings
12
+ from typing import get_args
11
13
  from typing import Any
12
14
  from typing import Dict
13
15
  from typing import Literal
@@ -52,15 +54,27 @@ _DEFAULT_EXTRACTOR_MAP = {
52
54
 
53
55
  _Type_Extract_Method_PDF = Literal[
54
56
  "adobe",
55
- "nemoretriever_parse",
57
+ "nemotron_parse",
56
58
  "haystack",
57
59
  "llama_parse",
58
60
  "pdfium",
59
61
  "tika",
60
62
  "unstructured_io",
63
+ "unstructured_local",
64
+ "pdfium_hybrid",
61
65
  "ocr",
62
66
  ]
63
67
 
68
+ _Type_Extract_Method_DOCX = Literal[
69
+ "python_docx",
70
+ "render_as_pdf",
71
+ ]
72
+
73
+ _Type_Extract_Method_PPTX = Literal[
74
+ "python_pptx",
75
+ "render_as_pdf",
76
+ ]
77
+
64
78
  _Type_Extract_Images_Method = Literal["group", "yolox"]
65
79
 
66
80
  _Type_Extract_Tables_Method_PDF = Literal["yolox", "paddle"]
@@ -74,7 +88,7 @@ class ExtractTask(Task):
74
88
  def __init__(
75
89
  self,
76
90
  document_type,
77
- extract_method: _Type_Extract_Method_PDF = None,
91
+ extract_method: Optional[str] = None,
78
92
  extract_text: bool = False,
79
93
  extract_images: bool = False,
80
94
  extract_tables: bool = False,
@@ -109,6 +123,12 @@ class ExtractTask(Task):
109
123
  )
110
124
  extract_method = _DEFAULT_EXTRACTOR_MAP[document_type_lower]
111
125
 
126
+ if extract_method == "nemoretriever_parse":
127
+ logger.warning("'nemoretriever_parse' is deprecated. Please use 'nemotron_parse' instead.")
128
+ extract_method = "nemotron_parse"
129
+
130
+ self._validate_extract_method(document_type, extract_method)
131
+
112
132
  # Set default extract_charts if None
113
133
  if extract_charts is None:
114
134
  extract_charts = extract_tables
@@ -240,3 +260,31 @@ class ExtractTask(Task):
240
260
  @property
241
261
  def document_type(self):
242
262
  return self._document_type.value
263
+
264
+ def _validate_extract_method(self, document_type: str, extract_method: str):
265
+ doc_type = document_type.lower()
266
+
267
+ valid_docx = set(get_args(_Type_Extract_Method_DOCX))
268
+ valid_pptx = set(get_args(_Type_Extract_Method_PPTX))
269
+ valid_pdf = set(get_args(_Type_Extract_Method_PDF))
270
+
271
+ if doc_type == "docx" and extract_method not in valid_docx:
272
+ raise ValueError(f"'{extract_method}' is invalid for DOCX. Options: {valid_docx}")
273
+
274
+ elif doc_type == "pptx" and extract_method not in valid_pptx:
275
+ raise ValueError(f"'{extract_method}' is invalid for PPTX. Options: {valid_pptx}")
276
+
277
+ elif doc_type == "pdf" and extract_method not in valid_pdf:
278
+ raise ValueError(f"'{extract_method}' is invalid for PDF. Options: {valid_pdf}")
279
+
280
+ elif doc_type not in ["docx", "pptx", "pdf"]:
281
+ is_docx_method = extract_method in valid_docx
282
+ is_pptx_method = extract_method in valid_pptx
283
+ is_pdf_method = extract_method in valid_pdf
284
+
285
+ if (is_docx_method or is_pptx_method) and not is_pdf_method:
286
+ warnings.warn(
287
+ f"extract_method '{extract_method}' is valid for Office documents but NOT for PDFs. "
288
+ "If your batch includes PDFs, extraction may fail for those files. "
289
+ "Consider leaving extract_method=None for mixed batches."
290
+ )
@@ -7,8 +7,7 @@
7
7
  # pylint: disable=too-many-arguments
8
8
 
9
9
  import logging
10
- from typing import Dict
11
- from typing import Literal
10
+ from typing import Dict, Literal, Optional
12
11
 
13
12
  from nv_ingest_api.internal.schemas.meta.ingest_job_schema import IngestTaskStoreSchema
14
13
  from nv_ingest_api.internal.schemas.meta.ingest_job_schema import IngestTaskStoreEmbedSchema
@@ -17,23 +16,19 @@ from .task_base import Task
17
16
 
18
17
  logger = logging.getLogger(__name__)
19
18
 
20
- _DEFAULT_STORE_METHOD = "minio"
21
-
22
19
 
23
20
  class StoreTask(Task):
24
21
  """
25
22
  Object for image storage task.
26
23
  """
27
24
 
28
- _Type_Content_Type = Literal["image",]
29
-
30
- _Type_Store_Method = Literal["minio",]
31
-
32
25
  def __init__(
33
26
  self,
34
27
  structured: bool = True,
35
28
  images: bool = False,
36
- store_method: _Type_Store_Method = None,
29
+ storage_uri: Optional[str] = None,
30
+ storage_options: Optional[dict] = None,
31
+ public_base_url: Optional[str] = None,
37
32
  params: dict = None,
38
33
  **extra_params,
39
34
  ) -> None:
@@ -51,12 +46,19 @@ class StoreTask(Task):
51
46
 
52
47
  # Use the API schema for validation
53
48
  validated_data = IngestTaskStoreSchema(
54
- structured=structured, images=images, method=store_method or _DEFAULT_STORE_METHOD, params=merged_params
49
+ structured=structured,
50
+ images=images,
51
+ storage_uri=storage_uri,
52
+ storage_options=storage_options or {},
53
+ public_base_url=public_base_url,
54
+ params=merged_params,
55
55
  )
56
56
 
57
57
  self._structured = validated_data.structured
58
58
  self._images = validated_data.images
59
- self._store_method = validated_data.method
59
+ self._storage_uri = validated_data.storage_uri
60
+ self._storage_options = validated_data.storage_options
61
+ self._public_base_url = validated_data.public_base_url
60
62
  self._params = validated_data.params
61
63
  self._extra_params = extra_params
62
64
 
@@ -68,7 +70,8 @@ class StoreTask(Task):
68
70
  info += "Store Task:\n"
69
71
  info += f" store structured types: {self._structured}\n"
70
72
  info += f" store image types: {self._images}\n"
71
- info += f" store method: {self._store_method}\n"
73
+ info += f" storage uri: {self._storage_uri}\n"
74
+ info += f" public base url: {self._public_base_url}\n"
72
75
  for key, value in self._extra_params.items():
73
76
  info += f" {key}: {value}\n"
74
77
  for key, value in self._params.items():
@@ -81,9 +84,11 @@ class StoreTask(Task):
81
84
  """
82
85
 
83
86
  task_properties = {
84
- "method": self._store_method,
85
87
  "structured": self._structured,
86
88
  "images": self._images,
89
+ "storage_uri": self._storage_uri,
90
+ "storage_options": self._storage_options,
91
+ "public_base_url": self._public_base_url,
87
92
  "params": self._params,
88
93
  **self._extra_params,
89
94
  }
@@ -0,0 +1,276 @@
1
+ import logging
2
+
3
+
4
+ from nv_ingest_client.util.vdb.adt_vdb import VDB
5
+ from datetime import timedelta
6
+ from functools import partial
7
+ from urllib.parse import urlparse
8
+ from nv_ingest_client.util.transport import infer_microservice
9
+ import lancedb
10
+ import pyarrow as pa
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def create_lancedb_results(results):
16
+ """Transform NV-Ingest pipeline results into LanceDB ingestible rows.
17
+
18
+ The NV-Ingest pipeline provides nested lists of record dictionaries. This
19
+ helper extracts the inner `metadata` dict for each record, filters out
20
+ entries without an embedding, and returns a list of dictionaries with the
21
+ exact fields expected by the LanceDB table schema used in
22
+ `LanceDB.create_index`.
23
+
24
+ Parameters
25
+ ----------
26
+ results : list
27
+ Nested list-of-lists containing record dicts in the NV-Ingest format.
28
+
29
+ Returns
30
+ -------
31
+ list
32
+ List of dictionaries with keys: `vector` (embedding list), `text`
33
+ (string content), `metadata` (page number) and `source` (source id).
34
+
35
+ Notes
36
+ -----
37
+ - The function expects each inner record to have a `metadata` mapping
38
+ containing `embedding`, `content`, `content_metadata.page_number`, and
39
+ `source_metadata.source_id`.
40
+ - Records with `embedding is None` are skipped.
41
+ """
42
+ old_results = [res["metadata"] for result in results for res in result]
43
+ results = []
44
+ for result in old_results:
45
+ if result["embedding"] is None:
46
+ continue
47
+ results.append(
48
+ {
49
+ "vector": result["embedding"],
50
+ "text": result["content"],
51
+ "metadata": result["content_metadata"]["page_number"],
52
+ "source": result["source_metadata"]["source_id"],
53
+ }
54
+ )
55
+ return results
56
+
57
+
58
+ class LanceDB(VDB):
59
+ """LanceDB operator implementing the VDB interface.
60
+
61
+ This class adapts NV-Ingest records to LanceDB, providing index creation,
62
+ ingestion, and retrieval hooks. The implementation is intentionally small
63
+ and focuses on the example configuration used in NV-Ingest evaluation
64
+ scripts.
65
+ """
66
+
67
+ def __init__(
68
+ self,
69
+ uri=None,
70
+ overwrite=True,
71
+ table_name="nv-ingest",
72
+ index_type="IVF_HNSW_SQ",
73
+ metric="l2",
74
+ num_partitions=16,
75
+ num_sub_vectors=256,
76
+ **kwargs
77
+ ):
78
+ """Initialize the LanceDB VDB operator.
79
+
80
+ Parameters
81
+ ----------
82
+ uri: str, optional
83
+ LanceDB connection URI (default is "lancedb" for local file-based
84
+ storage).
85
+ overwrite : bool, optional
86
+ If True, existing tables will be overwritten during index creation.
87
+ If False, new data will be appended to existing tables.
88
+ table_name : str, optional
89
+ Name of the LanceDB table to create/use (default is "nv-ingest").
90
+ index_type : str, optional
91
+ Type of vector index to create (default is "IVF_HNSW_SQ").
92
+ metric : str, optional
93
+ Distance metric for the vector index (default is "l2").
94
+ num_partitions : int, optional
95
+ Number of partitions for the vector index (default is 16).
96
+ num_sub_vectors : int, optional
97
+ Number of sub-vectors for the vector index (default is 256).
98
+ **kwargs : dict
99
+ Forwarded configuration options. This implementation does not
100
+ actively consume specific keys, but passing parameters such as
101
+ `uri`, `index_name`, or security options is supported by the
102
+ interface pattern and may be used by future enhancements.
103
+ """
104
+ self.uri = uri or "lancedb"
105
+ self.overwrite = overwrite
106
+ self.table_name = table_name
107
+ self.index_type = index_type
108
+ self.metric = metric
109
+ self.num_partitions = num_partitions
110
+ self.num_sub_vectors = num_sub_vectors
111
+ super().__init__(**kwargs)
112
+
113
+ def create_index(self, records=None, table_name="nv-ingest", **kwargs):
114
+ """Create a LanceDB table and populate it with transformed records.
115
+
116
+ This method connects to LanceDB, transforms NV-Ingest records using
117
+ `create_lancedb_results`, builds a PyArrow schema that matches the
118
+ expected table layout, and creates/overwrites a table named `bo`.
119
+
120
+ Parameters
121
+ ----------
122
+ records : list, optional
123
+ NV-Ingest records in nested list format (the same structure passed
124
+ to `run`). If ``None``, an empty table will be created.
125
+
126
+ table_name : str, optional
127
+ Name of the LanceDB table to create (default is "nv-ingest").
128
+
129
+ Returns
130
+ -------
131
+ table
132
+ The LanceDB table object returned by `db.create_table`.
133
+ """
134
+ db = lancedb.connect(uri=self.uri)
135
+ results = create_lancedb_results(records)
136
+ schema = pa.schema(
137
+ [
138
+ pa.field("vector", pa.list_(pa.float32(), 2048)),
139
+ pa.field("text", pa.string()),
140
+ pa.field("metadata", pa.string()),
141
+ pa.field("source", pa.string()),
142
+ ]
143
+ )
144
+ table = db.create_table(
145
+ table_name, data=results, schema=schema, mode="overwrite" if self.overwrite else "append"
146
+ )
147
+ return table
148
+
149
+ def write_to_index(
150
+ self,
151
+ records,
152
+ table=None,
153
+ index_type="IVF_HNSW_SQ",
154
+ metric="l2",
155
+ num_partitions=16,
156
+ num_sub_vectors=256,
157
+ **kwargs
158
+ ):
159
+ """Create an index on the LanceDB table and wait for it to become ready.
160
+
161
+ This function calls `table.create_index` with an IVF+HNSW+SQ index
162
+ configuration used in NV-Ingest benchmarks. After requesting index
163
+ construction it lists available indices and waits for each one to
164
+ reach a ready state using `table.wait_for_index`.
165
+
166
+ Parameters
167
+ ----------
168
+ records : list
169
+ The original records being indexed (not used directly in this
170
+ implementation but kept in the signature for consistency).
171
+ table : object
172
+ LanceDB table object returned by `create_index`.
173
+ """
174
+ table.create_index(
175
+ index_type=index_type,
176
+ metric=metric,
177
+ num_partitions=num_partitions,
178
+ num_sub_vectors=num_sub_vectors,
179
+ # accelerator="cuda",
180
+ vector_column_name="vector",
181
+ )
182
+ for index_stub in table.list_indices():
183
+ table.wait_for_index([index_stub.name], timeout=timedelta(seconds=600))
184
+
185
+ def retrieval(
186
+ self,
187
+ queries,
188
+ table=None,
189
+ embedding_endpoint="http://localhost:8012/v1",
190
+ nvidia_api_key=None,
191
+ model_name="nvidia/llama-3.2-nv-embedqa-1b-v2",
192
+ result_fields=["text", "metadata", "source"],
193
+ top_k=10,
194
+ **kwargs
195
+ ):
196
+ """Run similarity search for a list of text queries.
197
+
198
+ This method converts textual queries to embeddings by calling the
199
+ transport helper `infer_microservice` (configured to use an NVIDIA
200
+ embedding model in the example) and performs a vector search against
201
+ the LanceDB `table`.
202
+
203
+ Parameters
204
+ ----------
205
+ queries : list[str]
206
+ Text queries to be embedded and searched.
207
+ table : object
208
+ LanceDB table object with a built vector index.
209
+ embedding_endpoint : str, optional
210
+ URL of the embedding microservice (default is
211
+ "http://localhost:8012/v1").
212
+ nvidia_api_key : str, optional
213
+ NVIDIA API key for authentication with the embedding service. If
214
+ ``None``, no authentication is used.
215
+ model_name : str, optional
216
+ Name of the embedding model to use (default is
217
+ "nvidia/llama-3.2-nv-embedqa-1b-v2").
218
+ result_fields : list, optional
219
+ List of field names to retrieve from each hit document (default is
220
+ `["text", "metadata", "source"]`).
221
+ top_k : int, optional
222
+ Number of top results to return per query (default is 10).
223
+
224
+ Returns
225
+ -------
226
+ list[list[dict]]
227
+ For each input query, a list of hit documents (each document is a
228
+ dict with fields such as `text`, `metadata`, and `source`). The
229
+ example limits each query to 20 results.
230
+ """
231
+ embed_model = partial(
232
+ infer_microservice,
233
+ model_name=model_name,
234
+ embedding_endpoint=embedding_endpoint,
235
+ nvidia_api_key=nvidia_api_key,
236
+ input_type="query",
237
+ output_names=["embeddings"],
238
+ grpc=not ("http" in urlparse(embedding_endpoint).scheme),
239
+ )
240
+ results = []
241
+ query_embeddings = embed_model(queries)
242
+ for query_embed in query_embeddings:
243
+ results.append(
244
+ table.search([query_embed], vector_column_name="vector").select(result_fields).limit(top_k).to_list()
245
+ )
246
+ return results
247
+
248
+ def run(self, records):
249
+ """Orchestrate index creation and data ingestion.
250
+
251
+ The `run` method is the public entry point used by NV-Ingest pipeline
252
+ tasks. A minimal implementation first ensures the table exists by
253
+ calling `create_index` and then kicks off index construction with
254
+ `write_to_index`.
255
+
256
+ Parameters
257
+ ----------
258
+ records : list
259
+ NV-Ingest records to index.
260
+
261
+ Returns
262
+ -------
263
+ list
264
+ The original `records` list is returned unchanged to make the
265
+ operator composable in pipelines.
266
+ """
267
+ table = self.create_index(records=records, table_name=self.table_name)
268
+ self.write_to_index(
269
+ records,
270
+ table=table,
271
+ index_type=self.index_type,
272
+ metric=self.metric,
273
+ num_partitions=self.num_partitions,
274
+ num_sub_vectors=self.num_sub_vectors,
275
+ )
276
+ return records
@@ -287,6 +287,10 @@ def create_nvingest_index_params(
287
287
  gpu_index: bool = True,
288
288
  gpu_search: bool = False,
289
289
  local_index: bool = True,
290
+ intermediate_graph_degree: int = 128,
291
+ graph_degree: int = 100,
292
+ m: int = 64,
293
+ ef_construction: int = 512,
290
294
  ) -> IndexParams:
291
295
  """
292
296
  Creates index params necessary to create an index for a collection. At a minimum,
@@ -326,8 +330,8 @@ def create_nvingest_index_params(
326
330
  index_type="GPU_CAGRA",
327
331
  metric_type="L2",
328
332
  params={
329
- "intermediate_graph_degree": 128,
330
- "graph_degree": 100,
333
+ "intermediate_graph_degree": intermediate_graph_degree,
334
+ "graph_degree": graph_degree,
331
335
  "build_algo": "NN_DESCENT",
332
336
  "cache_dataset_on_device": "true",
333
337
  "adapt_for_cpu": "false" if gpu_search else "true",
@@ -339,7 +343,7 @@ def create_nvingest_index_params(
339
343
  index_name=DENSE_INDEX_NAME,
340
344
  index_type="HNSW",
341
345
  metric_type="L2",
342
- params={"M": 64, "efConstruction": 512},
346
+ params={"M": m, "efConstruction": ef_construction},
343
347
  )
344
348
  if sparse and local_index:
345
349
  index_params.add_index(
@@ -407,6 +411,10 @@ def create_nvingest_collection(
407
411
  recreate_meta: bool = False,
408
412
  username: str = None,
409
413
  password: str = None,
414
+ intermediate_graph_degree: int = 128,
415
+ graph_degree: int = 100,
416
+ m: int = 64,
417
+ ef_construction: int = 512,
410
418
  ) -> CollectionSchema:
411
419
  """
412
420
  Creates a milvus collection with an nv-ingest compatible schema under
@@ -457,6 +465,10 @@ def create_nvingest_collection(
457
465
  gpu_index=gpu_index,
458
466
  gpu_search=gpu_search,
459
467
  local_index=local_index,
468
+ intermediate_graph_degree=intermediate_graph_degree,
469
+ graph_degree=graph_degree,
470
+ m=m,
471
+ ef_construction=ef_construction,
460
472
  )
461
473
  create_collection(client, collection_name, schema, index_params, recreate=recreate)
462
474
  d_idx, s_idx = _get_index_types(index_params, sparse=sparse)
@@ -949,6 +961,7 @@ def write_to_nvingest_collection(
949
961
  stream: bool = False,
950
962
  username: str = None,
951
963
  password: str = None,
964
+ no_wait_index: bool = False,
952
965
  **kwargs,
953
966
  ):
954
967
  """
@@ -1036,7 +1049,8 @@ def write_to_nvingest_collection(
1036
1049
  )
1037
1050
  num_elements = len(cleaned_records)
1038
1051
  if num_elements == 0:
1039
- raise ValueError("No records with Embeddings to insert detected.")
1052
+ logger.warning("No records with Embeddings to insert detected.")
1053
+ return
1040
1054
  logger.info(f"{num_elements} elements to insert to milvus")
1041
1055
  logger.info(f"threshold for streaming is {threshold}")
1042
1056
  if num_elements < threshold:
@@ -1054,7 +1068,7 @@ def write_to_nvingest_collection(
1054
1068
  client,
1055
1069
  collection_name,
1056
1070
  )
1057
- if not local_index:
1071
+ if not local_index and not no_wait_index:
1058
1072
  # Make sure all rows are indexed, decided not to wrap in a timeout because we dont
1059
1073
  # know how long this should take, it is num_elements dependent.
1060
1074
  wait_for_index(collection_name, expected_rows, client)
@@ -1971,6 +1985,7 @@ class Milvus(VDB):
1971
1985
  threshold: int = 1000,
1972
1986
  username: str = None,
1973
1987
  password: str = None,
1988
+ no_wait_index: bool = False,
1974
1989
  **kwargs,
1975
1990
  ):
1976
1991
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.11.27.dev20251127
3
+ Version: 2025.12.28.dev20251228
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -223,6 +223,7 @@ Requires-Dist: pydantic-settings>2.0.0
223
223
  Requires-Dist: requests>=2.28.2
224
224
  Requires-Dist: setuptools>=78.1.1
225
225
  Requires-Dist: tqdm>=4.67.1
226
+ Requires-Dist: lancedb>=0.25.3
226
227
  Provides-Extra: milvus
227
228
  Requires-Dist: pymilvus==2.5.10; extra == "milvus"
228
229
  Requires-Dist: pymilvus[bulk_writer,model]; extra == "milvus"
@@ -1,14 +1,14 @@
1
1
  nv_ingest_client/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
2
- nv_ingest_client/nv_ingest_cli.py,sha256=84fc0-6TUe-0BMasRIiRH4okfjno4AKCaKvUwJEZ45k,14457
2
+ nv_ingest_client/nv_ingest_cli.py,sha256=qeZJZq_ltnNFiytQNwMY3VAL7nBUXW2HnwMzBGaKQJ0,14452
3
3
  nv_ingest_client/cli/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
4
4
  nv_ingest_client/cli/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
5
5
  nv_ingest_client/cli/util/click.py,sha256=YjQU1uF148FU5D3ozC2m1kkfOOJxO1U8U552-T8PjU4,20029
6
6
  nv_ingest_client/cli/util/processing.py,sha256=ULGCYQF1RTDQV_b35YM1WQRqIjR2wQRMJWu41DogagE,6259
7
7
  nv_ingest_client/cli/util/system.py,sha256=AQLq0DD2Ns8jRanrKu1tmVBKPA9rl-F3-ZsGI6FXLqE,1105
8
8
  nv_ingest_client/client/__init__.py,sha256=eEX9l1qmkLH2lAAZU3eP17SCV06ZjjrshHAB_xbboHA,375
9
- nv_ingest_client/client/client.py,sha256=Mb5V3nQRg_jzr07-jmK5jwgx3_WmzaGmGXrEKfoyjHU,82103
9
+ nv_ingest_client/client/client.py,sha256=YjYfPrFwmDxp3PMyWEKc-c3t2wdBSmKwQ7Xg0KUwYJU,82103
10
10
  nv_ingest_client/client/ingest_job_handler.py,sha256=4exvMwXbzwC-tb0dWleXE-AwhJkvxvhkf_u_1bJt30U,18387
11
- nv_ingest_client/client/interface.py,sha256=XQ2hHNBsL-Nnsk_w48UMxFqxfkO0CdQ2AOQZEdXU3OA,59990
11
+ nv_ingest_client/client/interface.py,sha256=1gmFQ7bVQDiEweChN_Divv1Y87a4cNkEgH2Shp4tIMw,64915
12
12
  nv_ingest_client/client/util/processing.py,sha256=Ky7x7QbLn3BlgYwmrmoIc-o1VwmlmrcP9tn7GVTi0t0,2502
13
13
  nv_ingest_client/primitives/__init__.py,sha256=3rbpLCI7Bl0pntGatAxXD_V01y6dcLhHFheI3wqet-I,269
14
14
  nv_ingest_client/primitives/jobs/__init__.py,sha256=-yohgHv3LcCtSleHSaxjv1oO7nNcMCjN3ZYoOkIypIk,469
@@ -16,16 +16,16 @@ nv_ingest_client/primitives/jobs/job_spec.py,sha256=qT8d9zxEO4ODAcwIlyU7yN1HSuQb
16
16
  nv_ingest_client/primitives/jobs/job_state.py,sha256=CEe_oZr4p_MobauWIyhuNrP8y7AUwxhIGBuO7dN-VOQ,5277
17
17
  nv_ingest_client/primitives/tasks/__init__.py,sha256=D8X4XuwCxk4g_sMSpNRL1XsjVE1eACYaUdEjSanSEfU,1130
18
18
  nv_ingest_client/primitives/tasks/audio_extraction.py,sha256=KD5VvaRm6PYelfofZq_-83CbOmupgosokZzFERI5wDA,3559
19
- nv_ingest_client/primitives/tasks/caption.py,sha256=I1nOpfGb1Ts7QsElwfayhw-F_UcYqtesS-HaZzeh4rI,2130
19
+ nv_ingest_client/primitives/tasks/caption.py,sha256=w-xPKN77zruUel0md4OA-x2ciELSLY-8Px1ds76gak0,2498
20
20
  nv_ingest_client/primitives/tasks/chart_extraction.py,sha256=s5hsljgSXxQMZHGekpAg6OYJ9k3-DHk5NmFpvtKJ6Zs,1493
21
21
  nv_ingest_client/primitives/tasks/dedup.py,sha256=qort6p3t6ZJuK_74sfOOLp3vMT3hkB5DAu3467WenyY,1719
22
22
  nv_ingest_client/primitives/tasks/embed.py,sha256=ZLk7txs_0OHSjjxvRTYB5jm9RvvXRFo3i32Mj9d2mfc,7048
23
- nv_ingest_client/primitives/tasks/extract.py,sha256=ec2aKPU9OMOOw-oalQKAPaNRqgkREQ0ByLkFVqutD6E,9339
23
+ nv_ingest_client/primitives/tasks/extract.py,sha256=jTCOSQG1MG0RoQg4DxPgmYgeHQR7O24hmysygkWYyIY,11270
24
24
  nv_ingest_client/primitives/tasks/filter.py,sha256=dr6fWnh94i50MsGbrz9m_oN6DJKWIWsp7sMwm6Mjz8A,2617
25
25
  nv_ingest_client/primitives/tasks/infographic_extraction.py,sha256=SyTjZQbdVA3QwM5yVm4fUzE4Gu4zm4tAfNLDZMvySV8,1537
26
26
  nv_ingest_client/primitives/tasks/ocr_extraction.py,sha256=w4uNITktOs-FLczL4ZzVdQTP4t_Ha-9PzCJWlXeOEN0,1486
27
27
  nv_ingest_client/primitives/tasks/split.py,sha256=8UkB3EialsOTEbsOZLxzmnDIfTJzC6uvjNv21IbgAVA,2332
28
- nv_ingest_client/primitives/tasks/store.py,sha256=nIOnCH8vw4FLCLVBJYnsS5Unc0QmuO_jEtUp7-E9FU4,4199
28
+ nv_ingest_client/primitives/tasks/store.py,sha256=UeIspL_RDPBbUV3gv8SK3tIoYNun8r4cSSMxXvBSaks,4575
29
29
  nv_ingest_client/primitives/tasks/table_extraction.py,sha256=wQIC70ZNFt0DNQ1lxfvyR3Ci8hl5uAymHXTC0p6v0FY,1107
30
30
  nv_ingest_client/primitives/tasks/task_base.py,sha256=Mrx6kgePJHolYd3Im6mVISXcVgdulLst2MYG5gPov9I,1687
31
31
  nv_ingest_client/primitives/tasks/task_factory.py,sha256=uvGQXjgWmeF015jPWmBhiclzfrUf3_yD2PPeirQBczM,3218
@@ -46,11 +46,12 @@ nv_ingest_client/util/file_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
46
46
  nv_ingest_client/util/file_processing/extract.py,sha256=sJBfyv4N2P0-izN4RyCsnSDKuDNugG_tW8XCqN9Uqck,5574
47
47
  nv_ingest_client/util/vdb/__init__.py,sha256=ZmoEzeM9LzwwrVvu_DVUnjRNx-x8ahkNeIrSfSKzbAk,513
48
48
  nv_ingest_client/util/vdb/adt_vdb.py,sha256=wT3LJMAy2VQu6daXhc3Pte4Ijs6jN-YP6B9-rnuH_FA,10868
49
- nv_ingest_client/util/vdb/milvus.py,sha256=jCQyWb6xoQ6utGNccASmN09eJbwF2HlgrGGIkpoUfI8,80792
49
+ nv_ingest_client/util/vdb/lancedb.py,sha256=mLykdOFkLC5-SpRvHAvt0do9rhyQDqy_H48D6hEtegw,10037
50
+ nv_ingest_client/util/vdb/milvus.py,sha256=AePCvGcyt0Vxql6gtu2BR7cnoWCV5m3gAI5YMR1kcOQ,81329
50
51
  nv_ingest_client/util/vdb/opensearch.py,sha256=I4FzF95VWCOkyzhfm-szdfK1Zd9ugUc8AxxpAdEMWGE,7538
51
- nv_ingest_client-2025.11.27.dev20251127.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
52
- nv_ingest_client-2025.11.27.dev20251127.dist-info/METADATA,sha256=jul59WHL8-9IYR27iL9ilxkw7IQRnqb7EMqBfJh7IGk,30627
53
- nv_ingest_client-2025.11.27.dev20251127.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
54
- nv_ingest_client-2025.11.27.dev20251127.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
55
- nv_ingest_client-2025.11.27.dev20251127.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
56
- nv_ingest_client-2025.11.27.dev20251127.dist-info/RECORD,,
52
+ nv_ingest_client-2025.12.28.dev20251228.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
53
+ nv_ingest_client-2025.12.28.dev20251228.dist-info/METADATA,sha256=CI4MV74LzLyC6VY6QkTRRppB56x7iwMythFlt_taxcg,30658
54
+ nv_ingest_client-2025.12.28.dev20251228.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
55
+ nv_ingest_client-2025.12.28.dev20251228.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
56
+ nv_ingest_client-2025.12.28.dev20251228.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
57
+ nv_ingest_client-2025.12.28.dev20251228.dist-info/RECORD,,