nv-ingest 2026.1.4.dev20260104__py3-none-any.whl → 2026.1.5.dev20260105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -122,11 +122,16 @@ def get_pdf_split_page_count(client_override: Optional[int] = None) -> int:
122
122
  )
123
123
  return DEFAULT_PDF_SPLIT_PAGE_COUNT
124
124
 
125
- if parsed <= 0:
126
- logger.warning("PDF_SPLIT_PAGE_COUNT must be >= 1; received %s. Using 1.", parsed)
127
- return 1
128
-
129
- return parsed
125
+ clamped = max(MIN_PAGES, min(parsed, MAX_PAGES))
126
+ if clamped != parsed:
127
+ logger.warning(
128
+ "Env PDF_SPLIT_PAGE_COUNT=%s clamped to %s (min=%s, max=%s)",
129
+ parsed,
130
+ clamped,
131
+ MIN_PAGES,
132
+ MAX_PAGES,
133
+ )
134
+ return clamped
130
135
 
131
136
 
132
137
  def split_pdf_to_chunks(pdf_content: bytes, pages_per_chunk: int) -> List[Dict[str, Any]]:
@@ -955,7 +960,7 @@ async def submit_job_v2(
955
960
  "subjob_order": subjob_ids,
956
961
  }
957
962
  )
958
- elif document_types and payloads and document_types[0].lower() in ["mp4", "mov", "avi", "mp3", "wav"]:
963
+ elif document_types and payloads and document_types[0].lower() in ["mp4", "mov", "avi", "mp3", "wav", "mkv"]:
959
964
  document_type = document_types[0]
960
965
  upload_path = f"./{Path(original_source_id).name}"
961
966
  # dump the payload to a file, just came from client
@@ -243,7 +243,7 @@ stages:
243
243
  $OCR_GRPC_ENDPOINT|"",
244
244
  $OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
245
245
  ]
246
- ocr_infer_protocol: $PADDLE_INFER_PROTOCOL|"http"
246
+ ocr_infer_protocol: $OCR_INFER_PROTOCOL|"http"
247
247
  auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
248
248
  replicas:
249
249
  min_replicas: 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2026.1.4.dev20260104
3
+ Version: 2026.1.5.dev20260105
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -227,7 +227,7 @@ Requires-Dist: httpx>=0.28.1
227
227
  Requires-Dist: isodate>=0.7.2
228
228
  Requires-Dist: langdetect>=1.0.9
229
229
  Requires-Dist: minio>=7.2.12
230
- Requires-Dist: librosa>=0.10.2
230
+ Requires-Dist: librosa==0.10.2
231
231
  Requires-Dist: opentelemetry-api>=1.27.0
232
232
  Requires-Dist: opentelemetry-exporter-otlp>=1.27.0
233
233
  Requires-Dist: opentelemetry-sdk>=1.27.0
@@ -9,7 +9,7 @@ nv_ingest/api/v1/ingest.py,sha256=LWk3LN4lBd3uO8h30EN42g3LHCVcO00avVd5ohVK7NI,19
9
9
  nv_ingest/api/v1/metrics.py,sha256=ZGVRApYLnzc2f2C7wRgGd7deqiXan-jxfA-33a16clY,981
10
10
  nv_ingest/api/v2/README.md,sha256=VhpdjEmCyr3qIOhwqISFx9C5WezJFcxYc-NB9S98HMg,7562
11
11
  nv_ingest/api/v2/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
12
- nv_ingest/api/v2/ingest.py,sha256=vjjb2xOOtlTVoTMc4rNdUI6yKYdEeR-umA_pwP_Rt64,53103
12
+ nv_ingest/api/v2/ingest.py,sha256=DjPqw1SwQqwqBraQ7n1WajejnGeHbqGmXpzzyfRohH0,53256
13
13
  nv_ingest/framework/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
14
14
  nv_ingest/framework/orchestration/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
15
15
  nv_ingest/framework/orchestration/execution/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
@@ -112,15 +112,15 @@ nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py,sha256=QS3uN
112
112
  nv_ingest/framework/util/telemetry/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
113
113
  nv_ingest/framework/util/telemetry/global_stats.py,sha256=nq65pEEdiwjAfGiqsxG1CeQMC96O3CfQxsZuGFCY-ds,4554
114
114
  nv_ingest/pipeline/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
115
- nv_ingest/pipeline/default_libmode_pipeline_impl.py,sha256=YYASfM68qNhGL5PcK0Fv72qmRZfE2TtY3cq2Oz-L478,16267
115
+ nv_ingest/pipeline/default_libmode_pipeline_impl.py,sha256=moZzuk9vHkXp_VbPF3C9jrHl591pQaAEj2WjJv1L-Kw,16264
116
116
  nv_ingest/pipeline/default_pipeline_impl.py,sha256=6SykgH_LJ8uuE2jrWGIT7OkJP6EjPyB8Ju6LMDu5IK0,16800
117
117
  nv_ingest/pipeline/ingest_pipeline.py,sha256=wHAJhqAM2s8nbY-8itVogmSU-yVN4PZONGWcKnhzgfg,17794
118
118
  nv_ingest/pipeline/pipeline_schema.py,sha256=rLZZz2It2o2hVNWrZUJU8CarrqRei1fho3ZEMkkoBcg,17940
119
119
  nv_ingest/pipeline/config/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
120
120
  nv_ingest/pipeline/config/loaders.py,sha256=75Yr9WYO7j7ghvKTnYLfZXQZEH3J3VEZo5J4TunC_Us,7590
121
121
  nv_ingest/pipeline/config/replica_resolver.py,sha256=dEwqMXNttfw0QeisTGGkp24785jqzVCDAEFyQIffeGc,9369
122
- nv_ingest-2026.1.4.dev20260104.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
123
- nv_ingest-2026.1.4.dev20260104.dist-info/METADATA,sha256=bfXM8TXozFJG2V6qPt0Vv8Upp06oRUYtuoR8IxgDK7Q,15161
124
- nv_ingest-2026.1.4.dev20260104.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
125
- nv_ingest-2026.1.4.dev20260104.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
126
- nv_ingest-2026.1.4.dev20260104.dist-info/RECORD,,
122
+ nv_ingest-2026.1.5.dev20260105.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
123
+ nv_ingest-2026.1.5.dev20260105.dist-info/METADATA,sha256=_xmhmLTLgz5xpAIay1wyleM4HXYdYruSsuosad22Vfs,15161
124
+ nv_ingest-2026.1.5.dev20260105.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
125
+ nv_ingest-2026.1.5.dev20260105.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
126
+ nv_ingest-2026.1.5.dev20260105.dist-info/RECORD,,