nv-ingest-client 2025.7.24.dev20250724__py3-none-any.whl → 2025.11.2.dev20251102__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-client might be problematic. Click here for more details.

Files changed (38) hide show
  1. nv_ingest_client/cli/util/click.py +182 -30
  2. nv_ingest_client/cli/util/processing.py +0 -393
  3. nv_ingest_client/client/client.py +561 -207
  4. nv_ingest_client/client/ingest_job_handler.py +412 -0
  5. nv_ingest_client/client/interface.py +466 -59
  6. nv_ingest_client/client/util/processing.py +11 -1
  7. nv_ingest_client/nv_ingest_cli.py +58 -6
  8. nv_ingest_client/primitives/jobs/job_spec.py +32 -10
  9. nv_ingest_client/primitives/tasks/__init__.py +6 -4
  10. nv_ingest_client/primitives/tasks/audio_extraction.py +27 -23
  11. nv_ingest_client/primitives/tasks/caption.py +10 -16
  12. nv_ingest_client/primitives/tasks/chart_extraction.py +16 -10
  13. nv_ingest_client/primitives/tasks/dedup.py +12 -21
  14. nv_ingest_client/primitives/tasks/embed.py +37 -76
  15. nv_ingest_client/primitives/tasks/extract.py +68 -169
  16. nv_ingest_client/primitives/tasks/filter.py +22 -28
  17. nv_ingest_client/primitives/tasks/infographic_extraction.py +16 -13
  18. nv_ingest_client/primitives/tasks/split.py +17 -18
  19. nv_ingest_client/primitives/tasks/store.py +29 -29
  20. nv_ingest_client/primitives/tasks/task_base.py +1 -72
  21. nv_ingest_client/primitives/tasks/task_factory.py +10 -11
  22. nv_ingest_client/primitives/tasks/udf.py +349 -0
  23. nv_ingest_client/util/dataset.py +8 -2
  24. nv_ingest_client/util/document_analysis.py +314 -0
  25. nv_ingest_client/util/image_disk_utils.py +300 -0
  26. nv_ingest_client/util/transport.py +12 -6
  27. nv_ingest_client/util/util.py +66 -0
  28. nv_ingest_client/util/vdb/milvus.py +220 -75
  29. {nv_ingest_client-2025.7.24.dev20250724.dist-info → nv_ingest_client-2025.11.2.dev20251102.dist-info}/METADATA +1 -3
  30. nv_ingest_client-2025.11.2.dev20251102.dist-info/RECORD +55 -0
  31. nv_ingest_client/cli/util/tasks.py +0 -3
  32. nv_ingest_client/primitives/exceptions.py +0 -0
  33. nv_ingest_client/primitives/tasks/transform.py +0 -0
  34. nv_ingest_client-2025.7.24.dev20250724.dist-info/RECORD +0 -54
  35. {nv_ingest_client-2025.7.24.dev20250724.dist-info → nv_ingest_client-2025.11.2.dev20251102.dist-info}/WHEEL +0 -0
  36. {nv_ingest_client-2025.7.24.dev20250724.dist-info → nv_ingest_client-2025.11.2.dev20251102.dist-info}/entry_points.txt +0 -0
  37. {nv_ingest_client-2025.7.24.dev20250724.dist-info → nv_ingest_client-2025.11.2.dev20251102.dist-info}/licenses/LICENSE +0 -0
  38. {nv_ingest_client-2025.7.24.dev20250724.dist-info → nv_ingest_client-2025.11.2.dev20251102.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.7.24.dev20250724
3
+ Version: 2025.11.2.dev20251102
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -218,7 +218,6 @@ Requires-Dist: charset-normalizer>=3.4.1
218
218
  Requires-Dist: click>=8.1.8
219
219
  Requires-Dist: fsspec>=2025.2.0
220
220
  Requires-Dist: httpx>=0.28.1
221
- Requires-Dist: llama-index-embeddings-nvidia==0.1.5
222
221
  Requires-Dist: pydantic>2.0.0
223
222
  Requires-Dist: pydantic-settings>2.0.0
224
223
  Requires-Dist: requests>=2.28.2
@@ -227,7 +226,6 @@ Requires-Dist: tqdm>=4.67.1
227
226
  Provides-Extra: milvus
228
227
  Requires-Dist: pymilvus==2.5.10; extra == "milvus"
229
228
  Requires-Dist: pymilvus[bulk_writer,model]; extra == "milvus"
230
- Requires-Dist: langchain-milvus>=0.1.10; extra == "milvus"
231
229
  Provides-Extra: minio
232
230
  Requires-Dist: minio>=7.2.15; extra == "minio"
233
231
  Dynamic: license-file
@@ -0,0 +1,55 @@
1
+ nv_ingest_client/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
2
+ nv_ingest_client/nv_ingest_cli.py,sha256=84fc0-6TUe-0BMasRIiRH4okfjno4AKCaKvUwJEZ45k,14457
3
+ nv_ingest_client/cli/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
4
+ nv_ingest_client/cli/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
5
+ nv_ingest_client/cli/util/click.py,sha256=YjQU1uF148FU5D3ozC2m1kkfOOJxO1U8U552-T8PjU4,20029
6
+ nv_ingest_client/cli/util/processing.py,sha256=ULGCYQF1RTDQV_b35YM1WQRqIjR2wQRMJWu41DogagE,6259
7
+ nv_ingest_client/cli/util/system.py,sha256=AQLq0DD2Ns8jRanrKu1tmVBKPA9rl-F3-ZsGI6FXLqE,1105
8
+ nv_ingest_client/client/__init__.py,sha256=eEX9l1qmkLH2lAAZU3eP17SCV06ZjjrshHAB_xbboHA,375
9
+ nv_ingest_client/client/client.py,sha256=WH2KRuaqoRm0qe3XAomAJQUetCDXp84xqcsMdumICbk,77505
10
+ nv_ingest_client/client/ingest_job_handler.py,sha256=4exvMwXbzwC-tb0dWleXE-AwhJkvxvhkf_u_1bJt30U,18387
11
+ nv_ingest_client/client/interface.py,sha256=vmRdooNkaMVBv6RSxcgMYHfmMs0E3ZBnyrp5mmmhCOI,51247
12
+ nv_ingest_client/client/util/processing.py,sha256=Ky7x7QbLn3BlgYwmrmoIc-o1VwmlmrcP9tn7GVTi0t0,2502
13
+ nv_ingest_client/primitives/__init__.py,sha256=3rbpLCI7Bl0pntGatAxXD_V01y6dcLhHFheI3wqet-I,269
14
+ nv_ingest_client/primitives/jobs/__init__.py,sha256=-yohgHv3LcCtSleHSaxjv1oO7nNcMCjN3ZYoOkIypIk,469
15
+ nv_ingest_client/primitives/jobs/job_spec.py,sha256=teAZbpvxn25jIEUP5YJsAX_E_z9iWhejS-uy5opshFM,15681
16
+ nv_ingest_client/primitives/jobs/job_state.py,sha256=CEe_oZr4p_MobauWIyhuNrP8y7AUwxhIGBuO7dN-VOQ,5277
17
+ nv_ingest_client/primitives/tasks/__init__.py,sha256=D8X4XuwCxk4g_sMSpNRL1XsjVE1eACYaUdEjSanSEfU,1130
18
+ nv_ingest_client/primitives/tasks/audio_extraction.py,sha256=KD5VvaRm6PYelfofZq_-83CbOmupgosokZzFERI5wDA,3559
19
+ nv_ingest_client/primitives/tasks/caption.py,sha256=I1nOpfGb1Ts7QsElwfayhw-F_UcYqtesS-HaZzeh4rI,2130
20
+ nv_ingest_client/primitives/tasks/chart_extraction.py,sha256=s5hsljgSXxQMZHGekpAg6OYJ9k3-DHk5NmFpvtKJ6Zs,1493
21
+ nv_ingest_client/primitives/tasks/dedup.py,sha256=qort6p3t6ZJuK_74sfOOLp3vMT3hkB5DAu3467WenyY,1719
22
+ nv_ingest_client/primitives/tasks/embed.py,sha256=YFnymU1UWID2gSrz1anlaL_SRMmDr3dNTeZv2UDu9kQ,6739
23
+ nv_ingest_client/primitives/tasks/extract.py,sha256=bRriVkQyXN-UwzprHIt4Lp0iwmAojLEXqBb-IUrf3vY,9328
24
+ nv_ingest_client/primitives/tasks/filter.py,sha256=dr6fWnh94i50MsGbrz9m_oN6DJKWIWsp7sMwm6Mjz8A,2617
25
+ nv_ingest_client/primitives/tasks/infographic_extraction.py,sha256=SyTjZQbdVA3QwM5yVm4fUzE4Gu4zm4tAfNLDZMvySV8,1537
26
+ nv_ingest_client/primitives/tasks/split.py,sha256=8UkB3EialsOTEbsOZLxzmnDIfTJzC6uvjNv21IbgAVA,2332
27
+ nv_ingest_client/primitives/tasks/store.py,sha256=nIOnCH8vw4FLCLVBJYnsS5Unc0QmuO_jEtUp7-E9FU4,4199
28
+ nv_ingest_client/primitives/tasks/table_extraction.py,sha256=wQIC70ZNFt0DNQ1lxfvyR3Ci8hl5uAymHXTC0p6v0FY,1107
29
+ nv_ingest_client/primitives/tasks/task_base.py,sha256=Mrx6kgePJHolYd3Im6mVISXcVgdulLst2MYG5gPov9I,1687
30
+ nv_ingest_client/primitives/tasks/task_factory.py,sha256=uvGQXjgWmeF015jPWmBhiclzfrUf3_yD2PPeirQBczM,3218
31
+ nv_ingest_client/primitives/tasks/udf.py,sha256=GZgckhrWSTIQMYLkw4R4XFtx2YeUesAJI22LsNwvBjc,12773
32
+ nv_ingest_client/primitives/tasks/vdb_upload.py,sha256=mXOyQJfQfaoN96nntzevd0sKUs60-AHi8lc1jxG3DAw,1765
33
+ nv_ingest_client/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ nv_ingest_client/util/dataset.py,sha256=2yDPs47HNj8AOdOAfJL4XVji0BMRJq_NH8CG4s4xT-Q,3701
35
+ nv_ingest_client/util/document_analysis.py,sha256=T4olsfjwm4BZmT9xXT8M8RWKhdCPSASsDpzQmJDflts,10569
36
+ nv_ingest_client/util/image_disk_utils.py,sha256=M-lSRBvNlOMm20uiYygQ0Oh4GMKspih7G03rKNRzOSE,11507
37
+ nv_ingest_client/util/milvus.py,sha256=MwBix_UBg54i7xONBIwjcqeKSBkqunxBJBK2f0bPMoo,61
38
+ nv_ingest_client/util/process_json_files.py,sha256=YKR-fGT4kM8zO2p8r5tpo5-vvFywkcLuNieozvPWvo0,3785
39
+ nv_ingest_client/util/processing.py,sha256=bAy8it-OUgGFO3pcy6D3ezpyZ6p2DfmoQUGhx3QmVf8,8989
40
+ nv_ingest_client/util/system.py,sha256=DVIRLlEWkpqftqxazCuPNdaFSjQiHGMYcHzBufJSRUM,2216
41
+ nv_ingest_client/util/transport.py,sha256=Kwi3r-EUD5yOInW2rH7tYm2DXnzP3aU9l95V-BbXO90,1836
42
+ nv_ingest_client/util/util.py,sha256=qwJ4MqF8w4-lws76z8iz1V0Hz_ebDYN8yAKyJPGuHuU,15828
43
+ nv_ingest_client/util/zipkin.py,sha256=p2tMtTVAqrZGxmAxWKE42wkx7U5KywiX5munI7rJt_k,4473
44
+ nv_ingest_client/util/file_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
+ nv_ingest_client/util/file_processing/extract.py,sha256=uXEATBYZXjxdymGTNQvvzDD2eHgpuq4PdU6HsMl0Lp0,4662
46
+ nv_ingest_client/util/vdb/__init__.py,sha256=ZmoEzeM9LzwwrVvu_DVUnjRNx-x8ahkNeIrSfSKzbAk,513
47
+ nv_ingest_client/util/vdb/adt_vdb.py,sha256=UubzAMSfyrqqpD-OQErpBs25hC2Mw8zGZ4waenGXPOk,515
48
+ nv_ingest_client/util/vdb/milvus.py,sha256=6XWRh2SDJlgVZOKZVXG3cZTB4L-ZHIiiTenuIzkxp2Y,78704
49
+ nv_ingest_client/util/vdb/opensearch.py,sha256=I4FzF95VWCOkyzhfm-szdfK1Zd9ugUc8AxxpAdEMWGE,7538
50
+ nv_ingest_client-2025.11.2.dev20251102.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
51
+ nv_ingest_client-2025.11.2.dev20251102.dist-info/METADATA,sha256=5WbspmKFTwC952iUCOqw5Wt07eWhsY2XwgdKl2DwbzE,30626
52
+ nv_ingest_client-2025.11.2.dev20251102.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
+ nv_ingest_client-2025.11.2.dev20251102.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
54
+ nv_ingest_client-2025.11.2.dev20251102.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
55
+ nv_ingest_client-2025.11.2.dev20251102.dist-info/RECORD,,
@@ -1,3 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
- # All rights reserved.
3
- # SPDX-License-Identifier: Apache-2.0
File without changes
File without changes
@@ -1,54 +0,0 @@
1
- nv_ingest_client/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
2
- nv_ingest_client/nv_ingest_cli.py,sha256=CXci0OZcm8DGzZmEkFgmBC8x57smUnhrDuiHpGeav0g,11822
3
- nv_ingest_client/cli/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
4
- nv_ingest_client/cli/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
5
- nv_ingest_client/cli/util/click.py,sha256=TZacPGWFz0U7ycVZ3BDGfJ1H0HKGJceFms1O0cfr6PI,11618
6
- nv_ingest_client/cli/util/processing.py,sha256=7mXPjjNjLzWQY7WSxpm6et6ZEZOj0GYhLqvz-jx6MO4,24002
7
- nv_ingest_client/cli/util/system.py,sha256=AQLq0DD2Ns8jRanrKu1tmVBKPA9rl-F3-ZsGI6FXLqE,1105
8
- nv_ingest_client/cli/util/tasks.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
9
- nv_ingest_client/client/__init__.py,sha256=eEX9l1qmkLH2lAAZU3eP17SCV06ZjjrshHAB_xbboHA,375
10
- nv_ingest_client/client/client.py,sha256=s8g8JWduR86fNycbten9lW5RN3-ytvOz6jEH0E4MZ5c,63318
11
- nv_ingest_client/client/interface.py,sha256=i38jTPATGnOa10HiPocrt8mLE3V3VYiRPQJvbh42ViM,32987
12
- nv_ingest_client/client/util/processing.py,sha256=MtVRtGnRB8unwTa5b6-LYODx-7kg-RYP3wLmjdqymXw,2195
13
- nv_ingest_client/primitives/__init__.py,sha256=3rbpLCI7Bl0pntGatAxXD_V01y6dcLhHFheI3wqet-I,269
14
- nv_ingest_client/primitives/exceptions.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- nv_ingest_client/primitives/jobs/__init__.py,sha256=-yohgHv3LcCtSleHSaxjv1oO7nNcMCjN3ZYoOkIypIk,469
16
- nv_ingest_client/primitives/jobs/job_spec.py,sha256=XHHvED_F1QVG4JPDLVo9f3AJKzydm14AwJ1sN5XDR3w,14712
17
- nv_ingest_client/primitives/jobs/job_state.py,sha256=CEe_oZr4p_MobauWIyhuNrP8y7AUwxhIGBuO7dN-VOQ,5277
18
- nv_ingest_client/primitives/tasks/__init__.py,sha256=BnmFhmdP0wntJV6BNww8MhGN8BaRQOY4aBcV1J4-sFA,1090
19
- nv_ingest_client/primitives/tasks/audio_extraction.py,sha256=8-Px1TKUE4-0DY8p9exDj6Y9vStpDeTFjqtpIA4_ID0,3172
20
- nv_ingest_client/primitives/tasks/caption.py,sha256=7umiuyxDs0o8ayRJVm5Q20Z8EjNUDZNlSEx2rAKcSAU,2127
21
- nv_ingest_client/primitives/tasks/chart_extraction.py,sha256=g2uCI34BuuKquhK2VtnrhsPjH_i8ngZbeONWGYBKcsU,1106
22
- nv_ingest_client/primitives/tasks/dedup.py,sha256=5Q8ok2PA-MoT5NZ8GFUDrKsSMoQWcyBFNP3hRHFqOho,1834
23
- nv_ingest_client/primitives/tasks/embed.py,sha256=M5zhob3Is8hEIh3vcITnvdORe6SHsGXNlP0b5eu42us,7799
24
- nv_ingest_client/primitives/tasks/extract.py,sha256=GMxXXrzEWdUlUa_YFIfD0H7McccuRUen0xzTJUSXhE4,13714
25
- nv_ingest_client/primitives/tasks/filter.py,sha256=-gi-r2s3JNM5eEokS9GHnB1MxTtowrmO7rN0TSYLcbU,2603
26
- nv_ingest_client/primitives/tasks/infographic_extraction.py,sha256=HnJPTRGqZOAX0eROKHwYIVa4S_0Noij-FHhTpDgJzY4,1136
27
- nv_ingest_client/primitives/tasks/split.py,sha256=0fqXhvlgMxS9X_9HuUSx2yW6C9aYiXRryjl8S-ZAIH4,2104
28
- nv_ingest_client/primitives/tasks/store.py,sha256=Vvay-YTBScsJh_XCWxuZryBb5VvXyR79VQoUFfyR93Y,3637
29
- nv_ingest_client/primitives/tasks/table_extraction.py,sha256=wQIC70ZNFt0DNQ1lxfvyR3Ci8hl5uAymHXTC0p6v0FY,1107
30
- nv_ingest_client/primitives/tasks/task_base.py,sha256=kk9ln-fVxtOSq9bzB_J8z3QefE9a_0uHnAuQUmyVYtg,3768
31
- nv_ingest_client/primitives/tasks/task_factory.py,sha256=Lv-_Uh5jOYaTX8Nnoc70Qsu3jXxtUmKRtAFfUIOUT6Q,2931
32
- nv_ingest_client/primitives/tasks/transform.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
- nv_ingest_client/primitives/tasks/vdb_upload.py,sha256=mXOyQJfQfaoN96nntzevd0sKUs60-AHi8lc1jxG3DAw,1765
34
- nv_ingest_client/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- nv_ingest_client/util/dataset.py,sha256=b6if_hM15iUJC4rvSHS0cmGBsSuZ3W-NoKDMTulx4b8,3316
36
- nv_ingest_client/util/milvus.py,sha256=MwBix_UBg54i7xONBIwjcqeKSBkqunxBJBK2f0bPMoo,61
37
- nv_ingest_client/util/process_json_files.py,sha256=YKR-fGT4kM8zO2p8r5tpo5-vvFywkcLuNieozvPWvo0,3785
38
- nv_ingest_client/util/processing.py,sha256=bAy8it-OUgGFO3pcy6D3ezpyZ6p2DfmoQUGhx3QmVf8,8989
39
- nv_ingest_client/util/system.py,sha256=DVIRLlEWkpqftqxazCuPNdaFSjQiHGMYcHzBufJSRUM,2216
40
- nv_ingest_client/util/transport.py,sha256=Rzdj9GxYsJVbGuh95H2AoHTMsFj-oZC1TiN9pT5vRPA,1674
41
- nv_ingest_client/util/util.py,sha256=TnVAxkDcxsfoctmB4KfDGVADr9wqrKS6vY9PSbBrX2s,13475
42
- nv_ingest_client/util/zipkin.py,sha256=p2tMtTVAqrZGxmAxWKE42wkx7U5KywiX5munI7rJt_k,4473
43
- nv_ingest_client/util/file_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- nv_ingest_client/util/file_processing/extract.py,sha256=uXEATBYZXjxdymGTNQvvzDD2eHgpuq4PdU6HsMl0Lp0,4662
45
- nv_ingest_client/util/vdb/__init__.py,sha256=ZmoEzeM9LzwwrVvu_DVUnjRNx-x8ahkNeIrSfSKzbAk,513
46
- nv_ingest_client/util/vdb/adt_vdb.py,sha256=UubzAMSfyrqqpD-OQErpBs25hC2Mw8zGZ4waenGXPOk,515
47
- nv_ingest_client/util/vdb/milvus.py,sha256=WZnC6sFF6bu24odd4lITI8p0HWV45SV1WHs4bxgNIvM,73391
48
- nv_ingest_client/util/vdb/opensearch.py,sha256=I4FzF95VWCOkyzhfm-szdfK1Zd9ugUc8AxxpAdEMWGE,7538
49
- nv_ingest_client-2025.7.24.dev20250724.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
50
- nv_ingest_client-2025.7.24.dev20250724.dist-info/METADATA,sha256=NK4AuSJSCVptHbJLC9iN8wBJZ-kpbYXRqdQ0HZlqVRw,30737
51
- nv_ingest_client-2025.7.24.dev20250724.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
52
- nv_ingest_client-2025.7.24.dev20250724.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
53
- nv_ingest_client-2025.7.24.dev20250724.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
54
- nv_ingest_client-2025.7.24.dev20250724.dist-info/RECORD,,