nv-ingest 2025.12.27.dev20251227__py3-none-any.whl → 2026.1.14.dev20260114__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nv_ingest/api/v2/ingest.py +11 -6
- nv_ingest/pipeline/default_libmode_pipeline_impl.py +9 -9
- {nv_ingest-2025.12.27.dev20251227.dist-info → nv_ingest-2026.1.14.dev20260114.dist-info}/METADATA +2 -3
- {nv_ingest-2025.12.27.dev20251227.dist-info → nv_ingest-2026.1.14.dev20260114.dist-info}/RECORD +7 -7
- {nv_ingest-2025.12.27.dev20251227.dist-info → nv_ingest-2026.1.14.dev20260114.dist-info}/WHEEL +0 -0
- {nv_ingest-2025.12.27.dev20251227.dist-info → nv_ingest-2026.1.14.dev20260114.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest-2025.12.27.dev20251227.dist-info → nv_ingest-2026.1.14.dev20260114.dist-info}/top_level.txt +0 -0
nv_ingest/api/v2/ingest.py
CHANGED
|
@@ -122,11 +122,16 @@ def get_pdf_split_page_count(client_override: Optional[int] = None) -> int:
|
|
|
122
122
|
)
|
|
123
123
|
return DEFAULT_PDF_SPLIT_PAGE_COUNT
|
|
124
124
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
125
|
+
clamped = max(MIN_PAGES, min(parsed, MAX_PAGES))
|
|
126
|
+
if clamped != parsed:
|
|
127
|
+
logger.warning(
|
|
128
|
+
"Env PDF_SPLIT_PAGE_COUNT=%s clamped to %s (min=%s, max=%s)",
|
|
129
|
+
parsed,
|
|
130
|
+
clamped,
|
|
131
|
+
MIN_PAGES,
|
|
132
|
+
MAX_PAGES,
|
|
133
|
+
)
|
|
134
|
+
return clamped
|
|
130
135
|
|
|
131
136
|
|
|
132
137
|
def split_pdf_to_chunks(pdf_content: bytes, pages_per_chunk: int) -> List[Dict[str, Any]]:
|
|
@@ -955,7 +960,7 @@ async def submit_job_v2(
|
|
|
955
960
|
"subjob_order": subjob_ids,
|
|
956
961
|
}
|
|
957
962
|
)
|
|
958
|
-
elif document_types and payloads and document_types[0].lower() in ["mp4", "mov", "avi", "mp3", "wav"]:
|
|
963
|
+
elif document_types and payloads and document_types[0].lower() in ["mp4", "mov", "avi", "mp3", "wav", "mkv"]:
|
|
959
964
|
document_type = document_types[0]
|
|
960
965
|
upload_path = f"./{Path(original_source_id).name}"
|
|
961
966
|
# dump the payload to a file, just came from client
|
|
@@ -68,7 +68,7 @@ stages:
|
|
|
68
68
|
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
69
69
|
yolox_endpoints: [
|
|
70
70
|
$YOLOX_GRPC_ENDPOINT|"",
|
|
71
|
-
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-
|
|
71
|
+
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3"
|
|
72
72
|
]
|
|
73
73
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
74
74
|
nemotron_parse_config:
|
|
@@ -81,7 +81,7 @@ stages:
|
|
|
81
81
|
nemotron_parse_model_name: $NEMOTRON_PARSE_MODEL_NAME|"nvidia/nemotron-parse"
|
|
82
82
|
yolox_endpoints: [
|
|
83
83
|
$YOLOX_GRPC_ENDPOINT|"",
|
|
84
|
-
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-
|
|
84
|
+
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3"
|
|
85
85
|
]
|
|
86
86
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
87
87
|
replicas:
|
|
@@ -124,14 +124,14 @@ stages:
|
|
|
124
124
|
docx_extraction_config:
|
|
125
125
|
yolox_endpoints: [
|
|
126
126
|
$YOLOX_GRPC_ENDPOINT|"",
|
|
127
|
-
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-
|
|
127
|
+
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3"
|
|
128
128
|
]
|
|
129
129
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
130
130
|
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
131
131
|
pdfium_config:
|
|
132
132
|
yolox_endpoints: [
|
|
133
133
|
$YOLOX_GRPC_ENDPOINT|"",
|
|
134
|
-
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-
|
|
134
|
+
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3"
|
|
135
135
|
]
|
|
136
136
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
137
137
|
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
@@ -152,14 +152,14 @@ stages:
|
|
|
152
152
|
pptx_extraction_config:
|
|
153
153
|
yolox_endpoints: [
|
|
154
154
|
$YOLOX_GRPC_ENDPOINT|"",
|
|
155
|
-
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-
|
|
155
|
+
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3"
|
|
156
156
|
]
|
|
157
157
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
158
158
|
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
159
159
|
pdfium_config:
|
|
160
160
|
yolox_endpoints: [
|
|
161
161
|
$YOLOX_GRPC_ENDPOINT|"",
|
|
162
|
-
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-
|
|
162
|
+
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3"
|
|
163
163
|
]
|
|
164
164
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
165
165
|
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
@@ -180,7 +180,7 @@ stages:
|
|
|
180
180
|
image_extraction_config:
|
|
181
181
|
yolox_endpoints: [
|
|
182
182
|
$YOLOX_GRPC_ENDPOINT|"",
|
|
183
|
-
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-
|
|
183
|
+
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3"
|
|
184
184
|
]
|
|
185
185
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
186
186
|
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
@@ -243,7 +243,7 @@ stages:
|
|
|
243
243
|
$OCR_GRPC_ENDPOINT|"",
|
|
244
244
|
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
|
|
245
245
|
]
|
|
246
|
-
ocr_infer_protocol: $
|
|
246
|
+
ocr_infer_protocol: $OCR_INFER_PROTOCOL|"http"
|
|
247
247
|
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
248
248
|
replicas:
|
|
249
249
|
min_replicas: 0
|
|
@@ -332,7 +332,7 @@ stages:
|
|
|
332
332
|
actor: "nv_ingest.framework.orchestration.ray.stages.transforms.image_caption:ImageCaptionTransformStage"
|
|
333
333
|
config:
|
|
334
334
|
api_key: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
335
|
-
endpoint_url: $VLM_CAPTION_ENDPOINT|"
|
|
335
|
+
endpoint_url: $VLM_CAPTION_ENDPOINT|"https://integrate.api.nvidia.com/v1/chat/completions"
|
|
336
336
|
model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/nemotron-nano-12b-v2-vl"
|
|
337
337
|
prompt: $VLM_CAPTION_PROMPT|"Caption the content of this image:"
|
|
338
338
|
system_prompt: $VLM_CAPTION_SYSTEM_PROMPT|"/no_think"
|
{nv_ingest-2025.12.27.dev20251227.dist-info → nv_ingest-2026.1.14.dev20260114.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nv-ingest
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2026.1.14.dev20260114
|
|
4
4
|
Summary: Python module for multimodal document ingestion
|
|
5
5
|
Author-email: Jeremy Dyer <jdyer@nvidia.com>
|
|
6
6
|
License: Apache License
|
|
@@ -227,7 +227,7 @@ Requires-Dist: httpx>=0.28.1
|
|
|
227
227
|
Requires-Dist: isodate>=0.7.2
|
|
228
228
|
Requires-Dist: langdetect>=1.0.9
|
|
229
229
|
Requires-Dist: minio>=7.2.12
|
|
230
|
-
Requires-Dist: librosa
|
|
230
|
+
Requires-Dist: librosa==0.10.2
|
|
231
231
|
Requires-Dist: opentelemetry-api>=1.27.0
|
|
232
232
|
Requires-Dist: opentelemetry-exporter-otlp>=1.27.0
|
|
233
233
|
Requires-Dist: opentelemetry-sdk>=1.27.0
|
|
@@ -254,7 +254,6 @@ Requires-Dist: transformers>=4.47.0
|
|
|
254
254
|
Requires-Dist: tqdm>=4.67.1
|
|
255
255
|
Requires-Dist: uvicorn
|
|
256
256
|
Requires-Dist: pip
|
|
257
|
-
Requires-Dist: llama-index-embeddings-nvidia
|
|
258
257
|
Requires-Dist: opencv-python
|
|
259
258
|
Requires-Dist: pymilvus>=2.5.10
|
|
260
259
|
Requires-Dist: pymilvus[bulk_writer,model]
|
{nv_ingest-2025.12.27.dev20251227.dist-info → nv_ingest-2026.1.14.dev20260114.dist-info}/RECORD
RENAMED
|
@@ -9,7 +9,7 @@ nv_ingest/api/v1/ingest.py,sha256=LWk3LN4lBd3uO8h30EN42g3LHCVcO00avVd5ohVK7NI,19
|
|
|
9
9
|
nv_ingest/api/v1/metrics.py,sha256=ZGVRApYLnzc2f2C7wRgGd7deqiXan-jxfA-33a16clY,981
|
|
10
10
|
nv_ingest/api/v2/README.md,sha256=VhpdjEmCyr3qIOhwqISFx9C5WezJFcxYc-NB9S98HMg,7562
|
|
11
11
|
nv_ingest/api/v2/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
12
|
-
nv_ingest/api/v2/ingest.py,sha256=
|
|
12
|
+
nv_ingest/api/v2/ingest.py,sha256=DjPqw1SwQqwqBraQ7n1WajejnGeHbqGmXpzzyfRohH0,53256
|
|
13
13
|
nv_ingest/framework/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
14
14
|
nv_ingest/framework/orchestration/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
15
15
|
nv_ingest/framework/orchestration/execution/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
@@ -112,15 +112,15 @@ nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py,sha256=QS3uN
|
|
|
112
112
|
nv_ingest/framework/util/telemetry/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
113
113
|
nv_ingest/framework/util/telemetry/global_stats.py,sha256=nq65pEEdiwjAfGiqsxG1CeQMC96O3CfQxsZuGFCY-ds,4554
|
|
114
114
|
nv_ingest/pipeline/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
115
|
-
nv_ingest/pipeline/default_libmode_pipeline_impl.py,sha256=
|
|
115
|
+
nv_ingest/pipeline/default_libmode_pipeline_impl.py,sha256=PSFWMwrKCEBlo6e8nCdL5mg84SawoExwrUbc4nOHu5M,16281
|
|
116
116
|
nv_ingest/pipeline/default_pipeline_impl.py,sha256=6SykgH_LJ8uuE2jrWGIT7OkJP6EjPyB8Ju6LMDu5IK0,16800
|
|
117
117
|
nv_ingest/pipeline/ingest_pipeline.py,sha256=wHAJhqAM2s8nbY-8itVogmSU-yVN4PZONGWcKnhzgfg,17794
|
|
118
118
|
nv_ingest/pipeline/pipeline_schema.py,sha256=rLZZz2It2o2hVNWrZUJU8CarrqRei1fho3ZEMkkoBcg,17940
|
|
119
119
|
nv_ingest/pipeline/config/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
120
120
|
nv_ingest/pipeline/config/loaders.py,sha256=75Yr9WYO7j7ghvKTnYLfZXQZEH3J3VEZo5J4TunC_Us,7590
|
|
121
121
|
nv_ingest/pipeline/config/replica_resolver.py,sha256=dEwqMXNttfw0QeisTGGkp24785jqzVCDAEFyQIffeGc,9369
|
|
122
|
-
nv_ingest-
|
|
123
|
-
nv_ingest-
|
|
124
|
-
nv_ingest-
|
|
125
|
-
nv_ingest-
|
|
126
|
-
nv_ingest-
|
|
122
|
+
nv_ingest-2026.1.14.dev20260114.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
123
|
+
nv_ingest-2026.1.14.dev20260114.dist-info/METADATA,sha256=UZHSNIHvkqd7dlc7kZaaMN5L5vK2ZKiJAyvrIubfGpk,15117
|
|
124
|
+
nv_ingest-2026.1.14.dev20260114.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
125
|
+
nv_ingest-2026.1.14.dev20260114.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
|
|
126
|
+
nv_ingest-2026.1.14.dev20260114.dist-info/RECORD,,
|
{nv_ingest-2025.12.27.dev20251227.dist-info → nv_ingest-2026.1.14.dev20260114.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|