nv-ingest-client 2025.11.17.dev20251117__py3-none-any.whl → 2025.12.17.dev20251217__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,276 @@
1
+ import logging
2
+
3
+
4
+ from nv_ingest_client.util.vdb.adt_vdb import VDB
5
+ from datetime import timedelta
6
+ from functools import partial
7
+ from urllib.parse import urlparse
8
+ from nv_ingest_client.util.transport import infer_microservice
9
+ import lancedb
10
+ import pyarrow as pa
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def create_lancedb_results(results):
16
+ """Transform NV-Ingest pipeline results into LanceDB ingestible rows.
17
+
18
+ The NV-Ingest pipeline provides nested lists of record dictionaries. This
19
+ helper extracts the inner `metadata` dict for each record, filters out
20
+ entries without an embedding, and returns a list of dictionaries with the
21
+ exact fields expected by the LanceDB table schema used in
22
+ `LanceDB.create_index`.
23
+
24
+ Parameters
25
+ ----------
26
+ results : list
27
+ Nested list-of-lists containing record dicts in the NV-Ingest format.
28
+
29
+ Returns
30
+ -------
31
+ list
32
+ List of dictionaries with keys: `vector` (embedding list), `text`
33
+ (string content), `metadata` (page number) and `source` (source id).
34
+
35
+ Notes
36
+ -----
37
+ - The function expects each inner record to have a `metadata` mapping
38
+ containing `embedding`, `content`, `content_metadata.page_number`, and
39
+ `source_metadata.source_id`.
40
+ - Records with `embedding is None` are skipped.
41
+ """
42
+ old_results = [res["metadata"] for result in results for res in result]
43
+ results = []
44
+ for result in old_results:
45
+ if result["embedding"] is None:
46
+ continue
47
+ results.append(
48
+ {
49
+ "vector": result["embedding"],
50
+ "text": result["content"],
51
+ "metadata": result["content_metadata"]["page_number"],
52
+ "source": result["source_metadata"]["source_id"],
53
+ }
54
+ )
55
+ return results
56
+
57
+
58
+ class LanceDB(VDB):
59
+ """LanceDB operator implementing the VDB interface.
60
+
61
+ This class adapts NV-Ingest records to LanceDB, providing index creation,
62
+ ingestion, and retrieval hooks. The implementation is intentionally small
63
+ and focuses on the example configuration used in NV-Ingest evaluation
64
+ scripts.
65
+ """
66
+
67
+ def __init__(
68
+ self,
69
+ uri=None,
70
+ overwrite=True,
71
+ table_name="nv-ingest",
72
+ index_type="IVF_HNSW_SQ",
73
+ metric="l2",
74
+ num_partitions=16,
75
+ num_sub_vectors=256,
76
+ **kwargs
77
+ ):
78
+ """Initialize the LanceDB VDB operator.
79
+
80
+ Parameters
81
+ ----------
82
+ uri: str, optional
83
+ LanceDB connection URI (default is "lancedb" for local file-based
84
+ storage).
85
+ overwrite : bool, optional
86
+ If True, existing tables will be overwritten during index creation.
87
+ If False, new data will be appended to existing tables.
88
+ table_name : str, optional
89
+ Name of the LanceDB table to create/use (default is "nv-ingest").
90
+ index_type : str, optional
91
+ Type of vector index to create (default is "IVF_HNSW_SQ").
92
+ metric : str, optional
93
+ Distance metric for the vector index (default is "l2").
94
+ num_partitions : int, optional
95
+ Number of partitions for the vector index (default is 16).
96
+ num_sub_vectors : int, optional
97
+ Number of sub-vectors for the vector index (default is 256).
98
+ **kwargs : dict
99
+ Forwarded configuration options. This implementation does not
100
+ actively consume specific keys, but passing parameters such as
101
+ `uri`, `index_name`, or security options is supported by the
102
+ interface pattern and may be used by future enhancements.
103
+ """
104
+ self.uri = uri or "lancedb"
105
+ self.overwrite = overwrite
106
+ self.table_name = table_name
107
+ self.index_type = index_type
108
+ self.metric = metric
109
+ self.num_partitions = num_partitions
110
+ self.num_sub_vectors = num_sub_vectors
111
+ super().__init__(**kwargs)
112
+
113
+ def create_index(self, records=None, table_name="nv-ingest", **kwargs):
114
+ """Create a LanceDB table and populate it with transformed records.
115
+
116
+ This method connects to LanceDB, transforms NV-Ingest records using
117
+ `create_lancedb_results`, builds a PyArrow schema that matches the
118
+ expected table layout, and creates/overwrites a table named `bo`.
119
+
120
+ Parameters
121
+ ----------
122
+ records : list, optional
123
+ NV-Ingest records in nested list format (the same structure passed
124
+ to `run`). If ``None``, an empty table will be created.
125
+
126
+ table_name : str, optional
127
+ Name of the LanceDB table to create (default is "nv-ingest").
128
+
129
+ Returns
130
+ -------
131
+ table
132
+ The LanceDB table object returned by `db.create_table`.
133
+ """
134
+ db = lancedb.connect(uri=self.uri)
135
+ results = create_lancedb_results(records)
136
+ schema = pa.schema(
137
+ [
138
+ pa.field("vector", pa.list_(pa.float32(), 2048)),
139
+ pa.field("text", pa.string()),
140
+ pa.field("metadata", pa.string()),
141
+ pa.field("source", pa.string()),
142
+ ]
143
+ )
144
+ table = db.create_table(
145
+ table_name, data=results, schema=schema, mode="overwrite" if self.overwrite else "append"
146
+ )
147
+ return table
148
+
149
+ def write_to_index(
150
+ self,
151
+ records,
152
+ table=None,
153
+ index_type="IVF_HNSW_SQ",
154
+ metric="l2",
155
+ num_partitions=16,
156
+ num_sub_vectors=256,
157
+ **kwargs
158
+ ):
159
+ """Create an index on the LanceDB table and wait for it to become ready.
160
+
161
+ This function calls `table.create_index` with an IVF+HNSW+SQ index
162
+ configuration used in NV-Ingest benchmarks. After requesting index
163
+ construction it lists available indices and waits for each one to
164
+ reach a ready state using `table.wait_for_index`.
165
+
166
+ Parameters
167
+ ----------
168
+ records : list
169
+ The original records being indexed (not used directly in this
170
+ implementation but kept in the signature for consistency).
171
+ table : object
172
+ LanceDB table object returned by `create_index`.
173
+ """
174
+ table.create_index(
175
+ index_type=index_type,
176
+ metric=metric,
177
+ num_partitions=num_partitions,
178
+ num_sub_vectors=num_sub_vectors,
179
+ # accelerator="cuda",
180
+ vector_column_name="vector",
181
+ )
182
+ for index_stub in table.list_indices():
183
+ table.wait_for_index([index_stub.name], timeout=timedelta(seconds=600))
184
+
185
+ def retrieval(
186
+ self,
187
+ queries,
188
+ table=None,
189
+ embedding_endpoint="http://localhost:8012/v1",
190
+ nvidia_api_key=None,
191
+ model_name="nvidia/llama-3.2-nv-embedqa-1b-v2",
192
+ result_fields=["text", "metadata", "source"],
193
+ top_k=10,
194
+ **kwargs
195
+ ):
196
+ """Run similarity search for a list of text queries.
197
+
198
+ This method converts textual queries to embeddings by calling the
199
+ transport helper `infer_microservice` (configured to use an NVIDIA
200
+ embedding model in the example) and performs a vector search against
201
+ the LanceDB `table`.
202
+
203
+ Parameters
204
+ ----------
205
+ queries : list[str]
206
+ Text queries to be embedded and searched.
207
+ table : object
208
+ LanceDB table object with a built vector index.
209
+ embedding_endpoint : str, optional
210
+ URL of the embedding microservice (default is
211
+ "http://localhost:8012/v1").
212
+ nvidia_api_key : str, optional
213
+ NVIDIA API key for authentication with the embedding service. If
214
+ ``None``, no authentication is used.
215
+ model_name : str, optional
216
+ Name of the embedding model to use (default is
217
+ "nvidia/llama-3.2-nv-embedqa-1b-v2").
218
+ result_fields : list, optional
219
+ List of field names to retrieve from each hit document (default is
220
+ `["text", "metadata", "source"]`).
221
+ top_k : int, optional
222
+ Number of top results to return per query (default is 10).
223
+
224
+ Returns
225
+ -------
226
+ list[list[dict]]
227
+ For each input query, a list of hit documents (each document is a
228
+ dict with fields such as `text`, `metadata`, and `source`). The
229
+ example limits each query to 20 results.
230
+ """
231
+ embed_model = partial(
232
+ infer_microservice,
233
+ model_name=model_name,
234
+ embedding_endpoint=embedding_endpoint,
235
+ nvidia_api_key=nvidia_api_key,
236
+ input_type="query",
237
+ output_names=["embeddings"],
238
+ grpc=not ("http" in urlparse(embedding_endpoint).scheme),
239
+ )
240
+ results = []
241
+ query_embeddings = embed_model(queries)
242
+ for query_embed in query_embeddings:
243
+ results.append(
244
+ table.search([query_embed], vector_column_name="vector").select(result_fields).limit(top_k).to_list()
245
+ )
246
+ return results
247
+
248
+ def run(self, records):
249
+ """Orchestrate index creation and data ingestion.
250
+
251
+ The `run` method is the public entry point used by NV-Ingest pipeline
252
+ tasks. A minimal implementation first ensures the table exists by
253
+ calling `create_index` and then kicks off index construction with
254
+ `write_to_index`.
255
+
256
+ Parameters
257
+ ----------
258
+ records : list
259
+ NV-Ingest records to index.
260
+
261
+ Returns
262
+ -------
263
+ list
264
+ The original `records` list is returned unchanged to make the
265
+ operator composable in pipelines.
266
+ """
267
+ table = self.create_index(records=records, table_name=self.table_name)
268
+ self.write_to_index(
269
+ records,
270
+ table=table,
271
+ index_type=self.index_type,
272
+ metric=self.metric,
273
+ num_partitions=self.num_partitions,
274
+ num_sub_vectors=self.num_sub_vectors,
275
+ )
276
+ return records
@@ -287,6 +287,10 @@ def create_nvingest_index_params(
287
287
  gpu_index: bool = True,
288
288
  gpu_search: bool = False,
289
289
  local_index: bool = True,
290
+ intermediate_graph_degree: int = 128,
291
+ graph_degree: int = 100,
292
+ m: int = 64,
293
+ ef_construction: int = 512,
290
294
  ) -> IndexParams:
291
295
  """
292
296
  Creates index params necessary to create an index for a collection. At a minimum,
@@ -326,8 +330,8 @@ def create_nvingest_index_params(
326
330
  index_type="GPU_CAGRA",
327
331
  metric_type="L2",
328
332
  params={
329
- "intermediate_graph_degree": 128,
330
- "graph_degree": 100,
333
+ "intermediate_graph_degree": intermediate_graph_degree,
334
+ "graph_degree": graph_degree,
331
335
  "build_algo": "NN_DESCENT",
332
336
  "cache_dataset_on_device": "true",
333
337
  "adapt_for_cpu": "false" if gpu_search else "true",
@@ -339,7 +343,7 @@ def create_nvingest_index_params(
339
343
  index_name=DENSE_INDEX_NAME,
340
344
  index_type="HNSW",
341
345
  metric_type="L2",
342
- params={"M": 64, "efConstruction": 512},
346
+ params={"M": m, "efConstruction": ef_construction},
343
347
  )
344
348
  if sparse and local_index:
345
349
  index_params.add_index(
@@ -407,6 +411,10 @@ def create_nvingest_collection(
407
411
  recreate_meta: bool = False,
408
412
  username: str = None,
409
413
  password: str = None,
414
+ intermediate_graph_degree: int = 128,
415
+ graph_degree: int = 100,
416
+ m: int = 64,
417
+ ef_construction: int = 512,
410
418
  ) -> CollectionSchema:
411
419
  """
412
420
  Creates a milvus collection with an nv-ingest compatible schema under
@@ -457,6 +465,10 @@ def create_nvingest_collection(
457
465
  gpu_index=gpu_index,
458
466
  gpu_search=gpu_search,
459
467
  local_index=local_index,
468
+ intermediate_graph_degree=intermediate_graph_degree,
469
+ graph_degree=graph_degree,
470
+ m=m,
471
+ ef_construction=ef_construction,
460
472
  )
461
473
  create_collection(client, collection_name, schema, index_params, recreate=recreate)
462
474
  d_idx, s_idx = _get_index_types(index_params, sparse=sparse)
@@ -892,7 +904,7 @@ def stream_insert_milvus(records, client: MilvusClient, collection_name: str, ba
892
904
  logger.info(f"streamed {count} records")
893
905
 
894
906
 
895
- def wait_for_index(collection_name: str, num_elements: int, client: MilvusClient):
907
+ def wait_for_index(collection_name: str, expected_rows_dict: dict, client: MilvusClient):
896
908
  """
897
909
  This function waits for the index to be built. It checks
898
910
  the indexed_rows of the index and waits for it to be equal
@@ -901,32 +913,28 @@ def wait_for_index(collection_name: str, num_elements: int, client: MilvusClient
901
913
  (refer to MilvusClient.refresh_load for bulk inserts).
902
914
  """
903
915
  client.flush(collection_name)
904
- # index_names = utility.list_indexes(collection_name)
905
916
  indexed_rows = 0
906
917
  # observe dense_index, all indexes get populated simultaneously
907
- for index_name in [DENSE_INDEX_NAME]:
908
- indexed_rows = 0
909
- expected_rows = client.describe_index(collection_name, index_name)["indexed_rows"] + num_elements
910
- while indexed_rows < expected_rows:
911
- pos_movement = 10 # number of iteration allowed without noticing an increase in indexed_rows
918
+ for index_name, rows_expected in expected_rows_dict.items():
919
+ indexed_rows = client.describe_index(collection_name, index_name)["indexed_rows"]
920
+ while indexed_rows < rows_expected:
921
+ # 0.5% of rows expected allowed without noticing an increase in indexed_rows
922
+ pos_movement = start_pos_movement = max((rows_expected - indexed_rows) * 0.005, 10)
912
923
  for i in range(20):
913
- current_indexed_rows = client.describe_index(collection_name, index_name)["indexed_rows"]
924
+ prev_indexed_rows = indexed_rows
925
+ indexed_rows = client.describe_index(collection_name, index_name)["indexed_rows"]
914
926
  time.sleep(1)
915
- logger.info(
916
- f"Indexed rows, {collection_name}, {index_name} - {current_indexed_rows} / {expected_rows}"
917
- )
918
- if current_indexed_rows == expected_rows:
919
- indexed_rows = current_indexed_rows
927
+ logger.info(f"Indexed rows, {collection_name}, {index_name} - {indexed_rows} / {rows_expected}")
928
+ if indexed_rows == rows_expected:
920
929
  break
921
930
  # check if indexed_rows is staying the same, too many times means something is wrong
922
- if current_indexed_rows == indexed_rows:
931
+ if indexed_rows == prev_indexed_rows:
923
932
  pos_movement -= 1
924
933
  else:
925
- pos_movement = 10
934
+ pos_movement = start_pos_movement
926
935
  # if pos_movement is 0, raise an error, means the rows are not getting indexed as expected
927
936
  if pos_movement == 0:
928
937
  raise ValueError(f"Rows are not getting indexed as expected for: {index_name} - {collection_name}")
929
- indexed_rows = current_indexed_rows
930
938
  return indexed_rows
931
939
 
932
940
 
@@ -953,6 +961,7 @@ def write_to_nvingest_collection(
953
961
  stream: bool = False,
954
962
  username: str = None,
955
963
  password: str = None,
964
+ no_wait_index: bool = False,
956
965
  **kwargs,
957
966
  ):
958
967
  """
@@ -1046,15 +1055,22 @@ def write_to_nvingest_collection(
1046
1055
  if num_elements < threshold:
1047
1056
  stream = True
1048
1057
  if stream:
1058
+ # most be accessed/saved before adding new records
1059
+ index_names = utility.list_indexes(collection_name)
1060
+ expected_rows = {}
1061
+ for index_name in index_names:
1062
+ expected_rows[index_name] = (
1063
+ int(client.describe_index(collection_name, index_name)["indexed_rows"]) + num_elements
1064
+ )
1049
1065
  stream_insert_milvus(
1050
1066
  cleaned_records,
1051
1067
  client,
1052
1068
  collection_name,
1053
1069
  )
1054
- if not local_index:
1070
+ if not local_index and not no_wait_index:
1055
1071
  # Make sure all rows are indexed, decided not to wrap in a timeout because we dont
1056
1072
  # know how long this should take, it is num_elements dependent.
1057
- wait_for_index(collection_name, num_elements, client)
1073
+ wait_for_index(collection_name, expected_rows, client)
1058
1074
  else:
1059
1075
  minio_client = Minio(minio_endpoint, access_key=access_key, secret_key=secret_key, secure=False)
1060
1076
  bucket_name = bucket_name if bucket_name else ClientConfigSchema().minio_bucket_name
@@ -1968,6 +1984,7 @@ class Milvus(VDB):
1968
1984
  threshold: int = 1000,
1969
1985
  username: str = None,
1970
1986
  password: str = None,
1987
+ no_wait_index: bool = False,
1971
1988
  **kwargs,
1972
1989
  ):
1973
1990
  """
@@ -2005,6 +2022,12 @@ class Milvus(VDB):
2005
2022
  """
2006
2023
  kwargs = locals().copy()
2007
2024
  kwargs.pop("self", None)
2025
+ bucket_name = kwargs.get("bucket_name", None)
2026
+ if bucket_name is not None and bucket_name != ClientConfigSchema().minio_bucket_name:
2027
+ raise ValueError(
2028
+ "You must use the environment variable MINIO_BUCKET to specify bucket_name, detected:",
2029
+ f"`bucket_name`: {bucket_name} and MINIO_BUCKET: {ClientConfigSchema().minio_bucket_name}",
2030
+ )
2008
2031
  super().__init__(**kwargs)
2009
2032
 
2010
2033
  def create_index(self, **kwargs):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.11.17.dev20251117
3
+ Version: 2025.12.17.dev20251217
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -223,6 +223,7 @@ Requires-Dist: pydantic-settings>2.0.0
223
223
  Requires-Dist: requests>=2.28.2
224
224
  Requires-Dist: setuptools>=78.1.1
225
225
  Requires-Dist: tqdm>=4.67.1
226
+ Requires-Dist: lancedb>=0.25.3
226
227
  Provides-Extra: milvus
227
228
  Requires-Dist: pymilvus==2.5.10; extra == "milvus"
228
229
  Requires-Dist: pymilvus[bulk_writer,model]; extra == "milvus"
@@ -1,31 +1,31 @@
1
1
  nv_ingest_client/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
2
- nv_ingest_client/nv_ingest_cli.py,sha256=84fc0-6TUe-0BMasRIiRH4okfjno4AKCaKvUwJEZ45k,14457
2
+ nv_ingest_client/nv_ingest_cli.py,sha256=qeZJZq_ltnNFiytQNwMY3VAL7nBUXW2HnwMzBGaKQJ0,14452
3
3
  nv_ingest_client/cli/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
4
4
  nv_ingest_client/cli/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
5
5
  nv_ingest_client/cli/util/click.py,sha256=YjQU1uF148FU5D3ozC2m1kkfOOJxO1U8U552-T8PjU4,20029
6
6
  nv_ingest_client/cli/util/processing.py,sha256=ULGCYQF1RTDQV_b35YM1WQRqIjR2wQRMJWu41DogagE,6259
7
7
  nv_ingest_client/cli/util/system.py,sha256=AQLq0DD2Ns8jRanrKu1tmVBKPA9rl-F3-ZsGI6FXLqE,1105
8
8
  nv_ingest_client/client/__init__.py,sha256=eEX9l1qmkLH2lAAZU3eP17SCV06ZjjrshHAB_xbboHA,375
9
- nv_ingest_client/client/client.py,sha256=3uA54D4Y6lSS-Nvz8R8uzkHkoV8vJu8GPQQRPoc-Uxk,77368
9
+ nv_ingest_client/client/client.py,sha256=Mb5V3nQRg_jzr07-jmK5jwgx3_WmzaGmGXrEKfoyjHU,82103
10
10
  nv_ingest_client/client/ingest_job_handler.py,sha256=4exvMwXbzwC-tb0dWleXE-AwhJkvxvhkf_u_1bJt30U,18387
11
- nv_ingest_client/client/interface.py,sha256=Y6JnjaRytlBrhgbU6MJYm2dblLvoYxWEB35TETZDSwk,55022
11
+ nv_ingest_client/client/interface.py,sha256=1gmFQ7bVQDiEweChN_Divv1Y87a4cNkEgH2Shp4tIMw,64915
12
12
  nv_ingest_client/client/util/processing.py,sha256=Ky7x7QbLn3BlgYwmrmoIc-o1VwmlmrcP9tn7GVTi0t0,2502
13
13
  nv_ingest_client/primitives/__init__.py,sha256=3rbpLCI7Bl0pntGatAxXD_V01y6dcLhHFheI3wqet-I,269
14
14
  nv_ingest_client/primitives/jobs/__init__.py,sha256=-yohgHv3LcCtSleHSaxjv1oO7nNcMCjN3ZYoOkIypIk,469
15
- nv_ingest_client/primitives/jobs/job_spec.py,sha256=TBz5u7KRdQjQvqD0mMzwjTK9Jl3p7yTIknQQs0lfnV8,15909
15
+ nv_ingest_client/primitives/jobs/job_spec.py,sha256=qT8d9zxEO4ODAcwIlyU7yN1HSuQbDkhCXhLA9hNOURc,16831
16
16
  nv_ingest_client/primitives/jobs/job_state.py,sha256=CEe_oZr4p_MobauWIyhuNrP8y7AUwxhIGBuO7dN-VOQ,5277
17
17
  nv_ingest_client/primitives/tasks/__init__.py,sha256=D8X4XuwCxk4g_sMSpNRL1XsjVE1eACYaUdEjSanSEfU,1130
18
18
  nv_ingest_client/primitives/tasks/audio_extraction.py,sha256=KD5VvaRm6PYelfofZq_-83CbOmupgosokZzFERI5wDA,3559
19
- nv_ingest_client/primitives/tasks/caption.py,sha256=I1nOpfGb1Ts7QsElwfayhw-F_UcYqtesS-HaZzeh4rI,2130
19
+ nv_ingest_client/primitives/tasks/caption.py,sha256=w-xPKN77zruUel0md4OA-x2ciELSLY-8Px1ds76gak0,2498
20
20
  nv_ingest_client/primitives/tasks/chart_extraction.py,sha256=s5hsljgSXxQMZHGekpAg6OYJ9k3-DHk5NmFpvtKJ6Zs,1493
21
21
  nv_ingest_client/primitives/tasks/dedup.py,sha256=qort6p3t6ZJuK_74sfOOLp3vMT3hkB5DAu3467WenyY,1719
22
22
  nv_ingest_client/primitives/tasks/embed.py,sha256=ZLk7txs_0OHSjjxvRTYB5jm9RvvXRFo3i32Mj9d2mfc,7048
23
- nv_ingest_client/primitives/tasks/extract.py,sha256=ec2aKPU9OMOOw-oalQKAPaNRqgkREQ0ByLkFVqutD6E,9339
23
+ nv_ingest_client/primitives/tasks/extract.py,sha256=jTCOSQG1MG0RoQg4DxPgmYgeHQR7O24hmysygkWYyIY,11270
24
24
  nv_ingest_client/primitives/tasks/filter.py,sha256=dr6fWnh94i50MsGbrz9m_oN6DJKWIWsp7sMwm6Mjz8A,2617
25
25
  nv_ingest_client/primitives/tasks/infographic_extraction.py,sha256=SyTjZQbdVA3QwM5yVm4fUzE4Gu4zm4tAfNLDZMvySV8,1537
26
26
  nv_ingest_client/primitives/tasks/ocr_extraction.py,sha256=w4uNITktOs-FLczL4ZzVdQTP4t_Ha-9PzCJWlXeOEN0,1486
27
27
  nv_ingest_client/primitives/tasks/split.py,sha256=8UkB3EialsOTEbsOZLxzmnDIfTJzC6uvjNv21IbgAVA,2332
28
- nv_ingest_client/primitives/tasks/store.py,sha256=nIOnCH8vw4FLCLVBJYnsS5Unc0QmuO_jEtUp7-E9FU4,4199
28
+ nv_ingest_client/primitives/tasks/store.py,sha256=UeIspL_RDPBbUV3gv8SK3tIoYNun8r4cSSMxXvBSaks,4575
29
29
  nv_ingest_client/primitives/tasks/table_extraction.py,sha256=wQIC70ZNFt0DNQ1lxfvyR3Ci8hl5uAymHXTC0p6v0FY,1107
30
30
  nv_ingest_client/primitives/tasks/task_base.py,sha256=Mrx6kgePJHolYd3Im6mVISXcVgdulLst2MYG5gPov9I,1687
31
31
  nv_ingest_client/primitives/tasks/task_factory.py,sha256=uvGQXjgWmeF015jPWmBhiclzfrUf3_yD2PPeirQBczM,3218
@@ -40,17 +40,18 @@ nv_ingest_client/util/process_json_files.py,sha256=YKR-fGT4kM8zO2p8r5tpo5-vvFywk
40
40
  nv_ingest_client/util/processing.py,sha256=bAy8it-OUgGFO3pcy6D3ezpyZ6p2DfmoQUGhx3QmVf8,8989
41
41
  nv_ingest_client/util/system.py,sha256=DVIRLlEWkpqftqxazCuPNdaFSjQiHGMYcHzBufJSRUM,2216
42
42
  nv_ingest_client/util/transport.py,sha256=Kwi3r-EUD5yOInW2rH7tYm2DXnzP3aU9l95V-BbXO90,1836
43
- nv_ingest_client/util/util.py,sha256=qwJ4MqF8w4-lws76z8iz1V0Hz_ebDYN8yAKyJPGuHuU,15828
43
+ nv_ingest_client/util/util.py,sha256=zvWgIxIeATrtrS8olo_8-fHQ4aDd83yg2SjNDcHIv4g,16805
44
44
  nv_ingest_client/util/zipkin.py,sha256=p2tMtTVAqrZGxmAxWKE42wkx7U5KywiX5munI7rJt_k,4473
45
45
  nv_ingest_client/util/file_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
- nv_ingest_client/util/file_processing/extract.py,sha256=Hjtem4bJWum1bbUPw7_TG-0Z2-7PsH4bBuqTF7bLn88,4794
46
+ nv_ingest_client/util/file_processing/extract.py,sha256=sJBfyv4N2P0-izN4RyCsnSDKuDNugG_tW8XCqN9Uqck,5574
47
47
  nv_ingest_client/util/vdb/__init__.py,sha256=ZmoEzeM9LzwwrVvu_DVUnjRNx-x8ahkNeIrSfSKzbAk,513
48
- nv_ingest_client/util/vdb/adt_vdb.py,sha256=UubzAMSfyrqqpD-OQErpBs25hC2Mw8zGZ4waenGXPOk,515
49
- nv_ingest_client/util/vdb/milvus.py,sha256=LHZ4Z6fHk8vQUGQFJ3FZ5iay0Ike6Zur-K9yMiPxe44,80141
48
+ nv_ingest_client/util/vdb/adt_vdb.py,sha256=wT3LJMAy2VQu6daXhc3Pte4Ijs6jN-YP6B9-rnuH_FA,10868
49
+ nv_ingest_client/util/vdb/lancedb.py,sha256=mLykdOFkLC5-SpRvHAvt0do9rhyQDqy_H48D6hEtegw,10037
50
+ nv_ingest_client/util/vdb/milvus.py,sha256=NLlsYU5LdESh0r_Psvn0vzGiNN-70iouOGr3RgZaMVg,81316
50
51
  nv_ingest_client/util/vdb/opensearch.py,sha256=I4FzF95VWCOkyzhfm-szdfK1Zd9ugUc8AxxpAdEMWGE,7538
51
- nv_ingest_client-2025.11.17.dev20251117.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
52
- nv_ingest_client-2025.11.17.dev20251117.dist-info/METADATA,sha256=bgCG3WP30zjURzJ_SZEm3fDbby-NoICZDYfbiA3sSjg,30627
53
- nv_ingest_client-2025.11.17.dev20251117.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
54
- nv_ingest_client-2025.11.17.dev20251117.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
55
- nv_ingest_client-2025.11.17.dev20251117.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
56
- nv_ingest_client-2025.11.17.dev20251117.dist-info/RECORD,,
52
+ nv_ingest_client-2025.12.17.dev20251217.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
53
+ nv_ingest_client-2025.12.17.dev20251217.dist-info/METADATA,sha256=EbEZoUk3-GvCBAB2z0hqZjgMOGasw75hZCWTDk7yxpk,30658
54
+ nv_ingest_client-2025.12.17.dev20251217.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
55
+ nv_ingest_client-2025.12.17.dev20251217.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
56
+ nv_ingest_client-2025.12.17.dev20251217.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
57
+ nv_ingest_client-2025.12.17.dev20251217.dist-info/RECORD,,