nv-ingest 2025.12.27.dev20251227__py3-none-any.whl → 2026.1.23.dev20260123__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nv_ingest/api/main.py CHANGED
@@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
23
23
  app = FastAPI(
24
24
  title="NV-Ingest Microservice",
25
25
  description="Service for ingesting heterogenous datatypes",
26
- version="26.1.0",
26
+ version="26.1.2",
27
27
  contact={
28
28
  "name": "NVIDIA Corporation",
29
29
  "url": "https://nvidia.com",
@@ -122,11 +122,16 @@ def get_pdf_split_page_count(client_override: Optional[int] = None) -> int:
122
122
  )
123
123
  return DEFAULT_PDF_SPLIT_PAGE_COUNT
124
124
 
125
- if parsed <= 0:
126
- logger.warning("PDF_SPLIT_PAGE_COUNT must be >= 1; received %s. Using 1.", parsed)
127
- return 1
128
-
129
- return parsed
125
+ clamped = max(MIN_PAGES, min(parsed, MAX_PAGES))
126
+ if clamped != parsed:
127
+ logger.warning(
128
+ "Env PDF_SPLIT_PAGE_COUNT=%s clamped to %s (min=%s, max=%s)",
129
+ parsed,
130
+ clamped,
131
+ MIN_PAGES,
132
+ MAX_PAGES,
133
+ )
134
+ return clamped
130
135
 
131
136
 
132
137
  def split_pdf_to_chunks(pdf_content: bytes, pages_per_chunk: int) -> List[Dict[str, Any]]:
@@ -955,7 +960,7 @@ async def submit_job_v2(
955
960
  "subjob_order": subjob_ids,
956
961
  }
957
962
  )
958
- elif document_types and payloads and document_types[0].lower() in ["mp4", "mov", "avi", "mp3", "wav"]:
963
+ elif document_types and payloads and document_types[0].lower() in ["mp4", "mov", "avi", "mp3", "wav", "mkv"]:
959
964
  document_type = document_types[0]
960
965
  upload_path = f"./{Path(original_source_id).name}"
961
966
  # dump the payload to a file, just came from client
@@ -68,7 +68,7 @@ stages:
68
68
  auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
69
69
  yolox_endpoints: [
70
70
  $YOLOX_GRPC_ENDPOINT|"",
71
- $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
71
+ $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3"
72
72
  ]
73
73
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
74
74
  nemotron_parse_config:
@@ -81,7 +81,7 @@ stages:
81
81
  nemotron_parse_model_name: $NEMOTRON_PARSE_MODEL_NAME|"nvidia/nemotron-parse"
82
82
  yolox_endpoints: [
83
83
  $YOLOX_GRPC_ENDPOINT|"",
84
- $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
84
+ $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3"
85
85
  ]
86
86
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
87
87
  replicas:
@@ -124,14 +124,14 @@ stages:
124
124
  docx_extraction_config:
125
125
  yolox_endpoints: [
126
126
  $YOLOX_GRPC_ENDPOINT|"",
127
- $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
127
+ $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3"
128
128
  ]
129
129
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
130
130
  auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
131
131
  pdfium_config:
132
132
  yolox_endpoints: [
133
133
  $YOLOX_GRPC_ENDPOINT|"",
134
- $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
134
+ $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3"
135
135
  ]
136
136
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
137
137
  auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
@@ -152,14 +152,14 @@ stages:
152
152
  pptx_extraction_config:
153
153
  yolox_endpoints: [
154
154
  $YOLOX_GRPC_ENDPOINT|"",
155
- $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
155
+ $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3"
156
156
  ]
157
157
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
158
158
  auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
159
159
  pdfium_config:
160
160
  yolox_endpoints: [
161
161
  $YOLOX_GRPC_ENDPOINT|"",
162
- $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
162
+ $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3"
163
163
  ]
164
164
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
165
165
  auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
@@ -180,7 +180,7 @@ stages:
180
180
  image_extraction_config:
181
181
  yolox_endpoints: [
182
182
  $YOLOX_GRPC_ENDPOINT|"",
183
- $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
183
+ $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3"
184
184
  ]
185
185
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
186
186
  auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
@@ -243,7 +243,7 @@ stages:
243
243
  $OCR_GRPC_ENDPOINT|"",
244
244
  $OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
245
245
  ]
246
- ocr_infer_protocol: $PADDLE_INFER_PROTOCOL|"http"
246
+ ocr_infer_protocol: $OCR_INFER_PROTOCOL|"http"
247
247
  auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
248
248
  replicas:
249
249
  min_replicas: 0
@@ -332,7 +332,7 @@ stages:
332
332
  actor: "nv_ingest.framework.orchestration.ray.stages.transforms.image_caption:ImageCaptionTransformStage"
333
333
  config:
334
334
  api_key: $NGC_API_KEY|$NVIDIA_API_KEY
335
- endpoint_url: $VLM_CAPTION_ENDPOINT|"http://vlm:8000/v1/chat/completions"
335
+ endpoint_url: $VLM_CAPTION_ENDPOINT|"https://integrate.api.nvidia.com/v1/chat/completions"
336
336
  model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/nemotron-nano-12b-v2-vl"
337
337
  prompt: $VLM_CAPTION_PROMPT|"Caption the content of this image:"
338
338
  system_prompt: $VLM_CAPTION_SYSTEM_PROMPT|"/no_think"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.12.27.dev20251227
3
+ Version: 2026.1.23.dev20260123
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -227,7 +227,7 @@ Requires-Dist: httpx>=0.28.1
227
227
  Requires-Dist: isodate>=0.7.2
228
228
  Requires-Dist: langdetect>=1.0.9
229
229
  Requires-Dist: minio>=7.2.12
230
- Requires-Dist: librosa>=0.10.2
230
+ Requires-Dist: librosa==0.10.2
231
231
  Requires-Dist: opentelemetry-api>=1.27.0
232
232
  Requires-Dist: opentelemetry-exporter-otlp>=1.27.0
233
233
  Requires-Dist: opentelemetry-sdk>=1.27.0
@@ -254,7 +254,6 @@ Requires-Dist: transformers>=4.47.0
254
254
  Requires-Dist: tqdm>=4.67.1
255
255
  Requires-Dist: uvicorn
256
256
  Requires-Dist: pip
257
- Requires-Dist: llama-index-embeddings-nvidia
258
257
  Requires-Dist: opencv-python
259
258
  Requires-Dist: pymilvus>=2.5.10
260
259
  Requires-Dist: pymilvus[bulk_writer,model]
@@ -1,7 +1,7 @@
1
1
  nv_ingest/__init__.py,sha256=vJLPeuxiIHqbxXPJSu9qe3MS-GPavbOUExyRq83DxxM,895
2
2
  nv_ingest/version.py,sha256=MG7DxlzpnoJI56vqxwzs9WeMAEI3uPhfDiNLs6GN6wI,986
3
3
  nv_ingest/api/__init__.py,sha256=ED07QUqwVyJalH0ahhnnjvc2W_in6TpZZ5nJ6NWU9-Y,271
4
- nv_ingest/api/main.py,sha256=qXV8YVrC_Jz2dqyirFD4WEKvSTGHsZEFqLMGgHg8TYc,1706
4
+ nv_ingest/api/main.py,sha256=WRFf1-UT8LrSMj49igMsT1WJu4L8VQw-urflIQp_QtE,1706
5
5
  nv_ingest/api/tracing.py,sha256=NkqMuUiB6ixGU5MYp3TrODsZDQepJ1kbH8JFHsYjuE0,2940
6
6
  nv_ingest/api/v1/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
7
7
  nv_ingest/api/v1/health.py,sha256=pV-RoVq5y0iBPp0qZoLzd1xKpd0JiHAi0UMyMj99LqU,4740
@@ -9,7 +9,7 @@ nv_ingest/api/v1/ingest.py,sha256=LWk3LN4lBd3uO8h30EN42g3LHCVcO00avVd5ohVK7NI,19
9
9
  nv_ingest/api/v1/metrics.py,sha256=ZGVRApYLnzc2f2C7wRgGd7deqiXan-jxfA-33a16clY,981
10
10
  nv_ingest/api/v2/README.md,sha256=VhpdjEmCyr3qIOhwqISFx9C5WezJFcxYc-NB9S98HMg,7562
11
11
  nv_ingest/api/v2/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
12
- nv_ingest/api/v2/ingest.py,sha256=vjjb2xOOtlTVoTMc4rNdUI6yKYdEeR-umA_pwP_Rt64,53103
12
+ nv_ingest/api/v2/ingest.py,sha256=DjPqw1SwQqwqBraQ7n1WajejnGeHbqGmXpzzyfRohH0,53256
13
13
  nv_ingest/framework/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
14
14
  nv_ingest/framework/orchestration/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
15
15
  nv_ingest/framework/orchestration/execution/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
@@ -112,15 +112,15 @@ nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py,sha256=QS3uN
112
112
  nv_ingest/framework/util/telemetry/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
113
113
  nv_ingest/framework/util/telemetry/global_stats.py,sha256=nq65pEEdiwjAfGiqsxG1CeQMC96O3CfQxsZuGFCY-ds,4554
114
114
  nv_ingest/pipeline/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
115
- nv_ingest/pipeline/default_libmode_pipeline_impl.py,sha256=YYASfM68qNhGL5PcK0Fv72qmRZfE2TtY3cq2Oz-L478,16267
115
+ nv_ingest/pipeline/default_libmode_pipeline_impl.py,sha256=PSFWMwrKCEBlo6e8nCdL5mg84SawoExwrUbc4nOHu5M,16281
116
116
  nv_ingest/pipeline/default_pipeline_impl.py,sha256=6SykgH_LJ8uuE2jrWGIT7OkJP6EjPyB8Ju6LMDu5IK0,16800
117
117
  nv_ingest/pipeline/ingest_pipeline.py,sha256=wHAJhqAM2s8nbY-8itVogmSU-yVN4PZONGWcKnhzgfg,17794
118
118
  nv_ingest/pipeline/pipeline_schema.py,sha256=rLZZz2It2o2hVNWrZUJU8CarrqRei1fho3ZEMkkoBcg,17940
119
119
  nv_ingest/pipeline/config/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
120
120
  nv_ingest/pipeline/config/loaders.py,sha256=75Yr9WYO7j7ghvKTnYLfZXQZEH3J3VEZo5J4TunC_Us,7590
121
121
  nv_ingest/pipeline/config/replica_resolver.py,sha256=dEwqMXNttfw0QeisTGGkp24785jqzVCDAEFyQIffeGc,9369
122
- nv_ingest-2025.12.27.dev20251227.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
123
- nv_ingest-2025.12.27.dev20251227.dist-info/METADATA,sha256=y2hPzoDUslthurcySM2XsuE0fiNG9hShORDJxrH0vm8,15163
124
- nv_ingest-2025.12.27.dev20251227.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
125
- nv_ingest-2025.12.27.dev20251227.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
126
- nv_ingest-2025.12.27.dev20251227.dist-info/RECORD,,
122
+ nv_ingest-2026.1.23.dev20260123.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
123
+ nv_ingest-2026.1.23.dev20260123.dist-info/METADATA,sha256=8AbYgYA1N0L6eiq2xJezTfQKMmB6KVZpbwqVsbHY7WY,15117
124
+ nv_ingest-2026.1.23.dev20260123.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
125
+ nv_ingest-2026.1.23.dev20260123.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
126
+ nv_ingest-2026.1.23.dev20260123.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5