nv-ingest-client 2025.9.26.dev20250926__py3-none-any.whl → 2025.11.2.dev20251102__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-client might be problematic. Click here for more details.

@@ -350,6 +350,32 @@ def create_job_specs_for_batch(files_batch: List[str]) -> List[JobSpec]:
350
350
  return job_specs
351
351
 
352
352
 
353
+ def apply_pdf_split_config_to_job_specs(job_specs: List[JobSpec], pages_per_chunk: int) -> None:
354
+ """
355
+ Apply PDF split configuration to a list of JobSpec objects.
356
+
357
+ Modifies job specs in-place by adding pdf_config to extended_options for PDF files only.
358
+
359
+ Parameters
360
+ ----------
361
+ job_specs : List[JobSpec]
362
+ List of job specifications to potentially modify
363
+ pages_per_chunk : int
364
+ Number of pages per PDF chunk (will be stored as-is; server performs clamping)
365
+
366
+ Notes
367
+ -----
368
+ - Only modifies job specs with document_type == "pdf" (case-insensitive)
369
+ - Modifies job specs in-place
370
+ - Safe to call on mixed document types (only PDFs are affected)
371
+ """
372
+ for job_spec in job_specs:
373
+ if job_spec.document_type.lower() == "pdf":
374
+ if "pdf_config" not in job_spec._extended_options:
375
+ job_spec._extended_options["pdf_config"] = {}
376
+ job_spec._extended_options["pdf_config"]["split_page_count"] = pages_per_chunk
377
+
378
+
353
379
  def filter_function_kwargs(func, **kwargs):
354
380
  """
355
381
  Filters and returns keyword arguments that match the parameters of a given function.
@@ -776,13 +776,13 @@ def bulk_insert_milvus(
776
776
  t_bulk_start = time.time()
777
777
  task_ids = []
778
778
 
779
- task_ids.append(
780
- utility.do_bulk_insert(
779
+ for files in writer.batch_files:
780
+ task_id = utility.do_bulk_insert(
781
781
  collection_name=collection_name,
782
- files=[file for files in writer.batch_files for file in files],
782
+ files=files,
783
783
  consistency_level=CONSISTENCY,
784
784
  )
785
- )
785
+ task_ids.append(task_id)
786
786
 
787
787
  while len(task_ids) > 0:
788
788
  time.sleep(1)
@@ -917,7 +917,9 @@ def wait_for_index(collection_name: str, num_elements: int, client: MilvusClient
917
917
  break
918
918
  # check if indexed_rows is staying the same, too many times means something is wrong
919
919
  if new_indexed_rows == indexed_rows:
920
- pos_movement = -1
920
+ pos_movement -= 1
921
+ else:
922
+ pos_movement = 10
921
923
  # if pos_movement is 0, raise an error, means the rows are not getting indexed as expected
922
924
  if pos_movement == 0:
923
925
  raise ValueError("Rows are not getting indexed as expected")
@@ -1046,9 +1048,10 @@ def write_to_nvingest_collection(
1046
1048
  client,
1047
1049
  collection_name,
1048
1050
  )
1049
- # Make sure all rows are indexed, decided not to wrap in a timeout because we dont
1050
- # know how long this should take, it is num_elements dependent.
1051
- wait_for_index(collection_name, num_elements, client)
1051
+ if not local_index:
1052
+ # Make sure all rows are indexed, decided not to wrap in a timeout because we dont
1053
+ # know how long this should take, it is num_elements dependent.
1054
+ wait_for_index(collection_name, num_elements, client)
1052
1055
  else:
1053
1056
  minio_client = Minio(minio_endpoint, access_key=access_key, secret_key=secret_key, secure=False)
1054
1057
  bucket_name = bucket_name if bucket_name else ClientConfigSchema().minio_bucket_name
@@ -1349,7 +1352,7 @@ def nvingest_retrieval(
1349
1352
  nvidia_api_key=nvidia_api_key,
1350
1353
  input_type="query",
1351
1354
  output_names=["embeddings"],
1352
- grpc=not (urlparse(embedding_endpoint).scheme == "http"),
1355
+ grpc=not ("http" in urlparse(embedding_endpoint).scheme),
1353
1356
  )
1354
1357
  client = client or MilvusClient(milvus_uri, token=f"{username}:{password}")
1355
1358
  final_top_k = top_k
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.9.26.dev20250926
3
+ Version: 2025.11.2.dev20251102
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -1,54 +1,55 @@
1
1
  nv_ingest_client/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
2
- nv_ingest_client/nv_ingest_cli.py,sha256=GG7x_fe423NHQRDmpNcTtNI2P_g1xgg9SQ5JjbdBAIU,13592
2
+ nv_ingest_client/nv_ingest_cli.py,sha256=84fc0-6TUe-0BMasRIiRH4okfjno4AKCaKvUwJEZ45k,14457
3
3
  nv_ingest_client/cli/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
4
4
  nv_ingest_client/cli/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
5
5
  nv_ingest_client/cli/util/click.py,sha256=YjQU1uF148FU5D3ozC2m1kkfOOJxO1U8U552-T8PjU4,20029
6
- nv_ingest_client/cli/util/processing.py,sha256=7mXPjjNjLzWQY7WSxpm6et6ZEZOj0GYhLqvz-jx6MO4,24002
6
+ nv_ingest_client/cli/util/processing.py,sha256=ULGCYQF1RTDQV_b35YM1WQRqIjR2wQRMJWu41DogagE,6259
7
7
  nv_ingest_client/cli/util/system.py,sha256=AQLq0DD2Ns8jRanrKu1tmVBKPA9rl-F3-ZsGI6FXLqE,1105
8
8
  nv_ingest_client/client/__init__.py,sha256=eEX9l1qmkLH2lAAZU3eP17SCV06ZjjrshHAB_xbboHA,375
9
- nv_ingest_client/client/client.py,sha256=wgPeLUByBNcQRkl1FXe7neHNNC5eY2sVve99g5sW41k,65068
10
- nv_ingest_client/client/interface.py,sha256=D4kosPM5q-DpeGIe6hKNNrX_p5V9nT6LvCKufkEvYAc,46261
11
- nv_ingest_client/client/util/processing.py,sha256=MtVRtGnRB8unwTa5b6-LYODx-7kg-RYP3wLmjdqymXw,2195
9
+ nv_ingest_client/client/client.py,sha256=WH2KRuaqoRm0qe3XAomAJQUetCDXp84xqcsMdumICbk,77505
10
+ nv_ingest_client/client/ingest_job_handler.py,sha256=4exvMwXbzwC-tb0dWleXE-AwhJkvxvhkf_u_1bJt30U,18387
11
+ nv_ingest_client/client/interface.py,sha256=vmRdooNkaMVBv6RSxcgMYHfmMs0E3ZBnyrp5mmmhCOI,51247
12
+ nv_ingest_client/client/util/processing.py,sha256=Ky7x7QbLn3BlgYwmrmoIc-o1VwmlmrcP9tn7GVTi0t0,2502
12
13
  nv_ingest_client/primitives/__init__.py,sha256=3rbpLCI7Bl0pntGatAxXD_V01y6dcLhHFheI3wqet-I,269
13
14
  nv_ingest_client/primitives/jobs/__init__.py,sha256=-yohgHv3LcCtSleHSaxjv1oO7nNcMCjN3ZYoOkIypIk,469
14
- nv_ingest_client/primitives/jobs/job_spec.py,sha256=NYT8K31b6p2v0zbIYugcARqJ8DTHpSNf_D1-V6M8YXA,15609
15
+ nv_ingest_client/primitives/jobs/job_spec.py,sha256=teAZbpvxn25jIEUP5YJsAX_E_z9iWhejS-uy5opshFM,15681
15
16
  nv_ingest_client/primitives/jobs/job_state.py,sha256=CEe_oZr4p_MobauWIyhuNrP8y7AUwxhIGBuO7dN-VOQ,5277
16
17
  nv_ingest_client/primitives/tasks/__init__.py,sha256=D8X4XuwCxk4g_sMSpNRL1XsjVE1eACYaUdEjSanSEfU,1130
17
18
  nv_ingest_client/primitives/tasks/audio_extraction.py,sha256=KD5VvaRm6PYelfofZq_-83CbOmupgosokZzFERI5wDA,3559
18
19
  nv_ingest_client/primitives/tasks/caption.py,sha256=I1nOpfGb1Ts7QsElwfayhw-F_UcYqtesS-HaZzeh4rI,2130
19
20
  nv_ingest_client/primitives/tasks/chart_extraction.py,sha256=s5hsljgSXxQMZHGekpAg6OYJ9k3-DHk5NmFpvtKJ6Zs,1493
20
21
  nv_ingest_client/primitives/tasks/dedup.py,sha256=qort6p3t6ZJuK_74sfOOLp3vMT3hkB5DAu3467WenyY,1719
21
- nv_ingest_client/primitives/tasks/embed.py,sha256=I6Irmvm1Qj9oqzDGSgfykCtfz8pz9LNxiXO-t29nXv8,5916
22
- nv_ingest_client/primitives/tasks/extract.py,sha256=yJEMGIiquhPlIofE6ERbM-U5tXk-GjZvvnnWOnU7YOA,9335
23
- nv_ingest_client/primitives/tasks/filter.py,sha256=wjcfSBGhdEyPh2tf42NMcyKZziigm24CO9B4obpQytU,2618
22
+ nv_ingest_client/primitives/tasks/embed.py,sha256=YFnymU1UWID2gSrz1anlaL_SRMmDr3dNTeZv2UDu9kQ,6739
23
+ nv_ingest_client/primitives/tasks/extract.py,sha256=bRriVkQyXN-UwzprHIt4Lp0iwmAojLEXqBb-IUrf3vY,9328
24
+ nv_ingest_client/primitives/tasks/filter.py,sha256=dr6fWnh94i50MsGbrz9m_oN6DJKWIWsp7sMwm6Mjz8A,2617
24
25
  nv_ingest_client/primitives/tasks/infographic_extraction.py,sha256=SyTjZQbdVA3QwM5yVm4fUzE4Gu4zm4tAfNLDZMvySV8,1537
25
26
  nv_ingest_client/primitives/tasks/split.py,sha256=8UkB3EialsOTEbsOZLxzmnDIfTJzC6uvjNv21IbgAVA,2332
26
27
  nv_ingest_client/primitives/tasks/store.py,sha256=nIOnCH8vw4FLCLVBJYnsS5Unc0QmuO_jEtUp7-E9FU4,4199
27
28
  nv_ingest_client/primitives/tasks/table_extraction.py,sha256=wQIC70ZNFt0DNQ1lxfvyR3Ci8hl5uAymHXTC0p6v0FY,1107
28
29
  nv_ingest_client/primitives/tasks/task_base.py,sha256=Mrx6kgePJHolYd3Im6mVISXcVgdulLst2MYG5gPov9I,1687
29
- nv_ingest_client/primitives/tasks/task_factory.py,sha256=x8FXrhlgRYTxM0rLvsUvM8whLntXsOSWXrBZ196KO5I,2983
30
- nv_ingest_client/primitives/tasks/udf.py,sha256=5e_WJVgocnK-z0EGCEwPO_zG8WJEhuIsOUTjPmr8REY,12833
30
+ nv_ingest_client/primitives/tasks/task_factory.py,sha256=uvGQXjgWmeF015jPWmBhiclzfrUf3_yD2PPeirQBczM,3218
31
+ nv_ingest_client/primitives/tasks/udf.py,sha256=GZgckhrWSTIQMYLkw4R4XFtx2YeUesAJI22LsNwvBjc,12773
31
32
  nv_ingest_client/primitives/tasks/vdb_upload.py,sha256=mXOyQJfQfaoN96nntzevd0sKUs60-AHi8lc1jxG3DAw,1765
32
33
  nv_ingest_client/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
34
  nv_ingest_client/util/dataset.py,sha256=2yDPs47HNj8AOdOAfJL4XVji0BMRJq_NH8CG4s4xT-Q,3701
34
- nv_ingest_client/util/document_analysis.py,sha256=8m_qQhQW7ykgHwg7YdRsNuV_MIP6ige4gwCbkyEoKqA,10568
35
+ nv_ingest_client/util/document_analysis.py,sha256=T4olsfjwm4BZmT9xXT8M8RWKhdCPSASsDpzQmJDflts,10569
35
36
  nv_ingest_client/util/image_disk_utils.py,sha256=M-lSRBvNlOMm20uiYygQ0Oh4GMKspih7G03rKNRzOSE,11507
36
37
  nv_ingest_client/util/milvus.py,sha256=MwBix_UBg54i7xONBIwjcqeKSBkqunxBJBK2f0bPMoo,61
37
38
  nv_ingest_client/util/process_json_files.py,sha256=YKR-fGT4kM8zO2p8r5tpo5-vvFywkcLuNieozvPWvo0,3785
38
39
  nv_ingest_client/util/processing.py,sha256=bAy8it-OUgGFO3pcy6D3ezpyZ6p2DfmoQUGhx3QmVf8,8989
39
40
  nv_ingest_client/util/system.py,sha256=DVIRLlEWkpqftqxazCuPNdaFSjQiHGMYcHzBufJSRUM,2216
40
41
  nv_ingest_client/util/transport.py,sha256=Kwi3r-EUD5yOInW2rH7tYm2DXnzP3aU9l95V-BbXO90,1836
41
- nv_ingest_client/util/util.py,sha256=0hmru3s3J-lgqTDK88x3ZWZGmumAYudjT6vlMpeHVnw,14825
42
+ nv_ingest_client/util/util.py,sha256=qwJ4MqF8w4-lws76z8iz1V0Hz_ebDYN8yAKyJPGuHuU,15828
42
43
  nv_ingest_client/util/zipkin.py,sha256=p2tMtTVAqrZGxmAxWKE42wkx7U5KywiX5munI7rJt_k,4473
43
44
  nv_ingest_client/util/file_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
45
  nv_ingest_client/util/file_processing/extract.py,sha256=uXEATBYZXjxdymGTNQvvzDD2eHgpuq4PdU6HsMl0Lp0,4662
45
46
  nv_ingest_client/util/vdb/__init__.py,sha256=ZmoEzeM9LzwwrVvu_DVUnjRNx-x8ahkNeIrSfSKzbAk,513
46
47
  nv_ingest_client/util/vdb/adt_vdb.py,sha256=UubzAMSfyrqqpD-OQErpBs25hC2Mw8zGZ4waenGXPOk,515
47
- nv_ingest_client/util/vdb/milvus.py,sha256=dYXszrWdwYYASBW6t8lMI6QK9-BzhV6HAUYjt3cIDsE,78602
48
+ nv_ingest_client/util/vdb/milvus.py,sha256=6XWRh2SDJlgVZOKZVXG3cZTB4L-ZHIiiTenuIzkxp2Y,78704
48
49
  nv_ingest_client/util/vdb/opensearch.py,sha256=I4FzF95VWCOkyzhfm-szdfK1Zd9ugUc8AxxpAdEMWGE,7538
49
- nv_ingest_client-2025.9.26.dev20250926.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
50
- nv_ingest_client-2025.9.26.dev20250926.dist-info/METADATA,sha256=s86EHC2XHJzwHUxVVVEhT9mupz_49N5lki4B_98M2Wg,30626
51
- nv_ingest_client-2025.9.26.dev20250926.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
52
- nv_ingest_client-2025.9.26.dev20250926.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
53
- nv_ingest_client-2025.9.26.dev20250926.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
54
- nv_ingest_client-2025.9.26.dev20250926.dist-info/RECORD,,
50
+ nv_ingest_client-2025.11.2.dev20251102.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
51
+ nv_ingest_client-2025.11.2.dev20251102.dist-info/METADATA,sha256=5WbspmKFTwC952iUCOqw5Wt07eWhsY2XwgdKl2DwbzE,30626
52
+ nv_ingest_client-2025.11.2.dev20251102.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
+ nv_ingest_client-2025.11.2.dev20251102.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
54
+ nv_ingest_client-2025.11.2.dev20251102.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
55
+ nv_ingest_client-2025.11.2.dev20251102.dist-info/RECORD,,