nv-ingest 2025.10.4.dev20251004__py3-none-any.whl → 2025.12.10.dev20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. nv_ingest/api/__init__.py +6 -0
  2. nv_ingest/api/main.py +2 -0
  3. nv_ingest/api/tracing.py +82 -0
  4. nv_ingest/api/v2/README.md +203 -0
  5. nv_ingest/api/v2/__init__.py +3 -0
  6. nv_ingest/api/v2/ingest.py +1300 -0
  7. nv_ingest/framework/orchestration/process/dependent_services.py +17 -10
  8. nv_ingest/framework/orchestration/process/strategies.py +6 -2
  9. nv_ingest/framework/orchestration/process/termination.py +49 -9
  10. nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +2 -2
  11. nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -2
  12. nv_ingest/framework/orchestration/ray/stages/extractors/ocr_extractor.py +71 -0
  13. nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +41 -8
  14. nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +72 -6
  15. nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +215 -11
  16. nv_ingest/pipeline/config/replica_resolver.py +12 -2
  17. nv_ingest/pipeline/default_libmode_pipeline_impl.py +32 -18
  18. nv_ingest/pipeline/default_pipeline_impl.py +75 -33
  19. {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/METADATA +4 -2
  20. {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/RECORD +23 -18
  21. {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/WHEEL +0 -0
  22. {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/licenses/LICENSE +0 -0
  23. {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/top_level.txt +0 -0
@@ -64,14 +64,14 @@ stages:
64
64
  actor: "nv_ingest.framework.orchestration.ray.stages.extractors.pdf_extractor:PDFExtractorStage"
65
65
  config:
66
66
  pdfium_config:
67
- auth_token: $NGC_API_KEY|""
67
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
68
68
  yolox_endpoints: [
69
69
  $YOLOX_GRPC_ENDPOINT|"page-elements:8001",
70
70
  $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
71
71
  ]
72
72
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
73
73
  nemoretriever_parse_config:
74
- auth_token: $NGC_API_KEY|""
74
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
75
75
  nemoretriever_parse_endpoints: [
76
76
  $NEMORETRIEVER_PARSE_GRPC_ENDPOINT|"",
77
77
  $NEMORETRIEVER_PARSE_HTTP_ENDPOINT|"http://nemoretriever-parse:8000/v1/chat/completions",
@@ -105,7 +105,7 @@ stages:
105
105
  ]
106
106
  function_id: $AUDIO_FUNCTION_ID|""
107
107
  audio_infer_protocol: $AUDIO_INFER_PROTOCOL|grpc
108
- auth_token: $NGC_API_KEY|""
108
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
109
109
  replicas:
110
110
  min_replicas: 0
111
111
  max_replicas:
@@ -123,10 +123,17 @@ stages:
123
123
  docx_extraction_config:
124
124
  yolox_endpoints: [
125
125
  $YOLOX_GRPC_ENDPOINT|"page-elements:8001",
126
- $YOLOX_HTTP_ENDPOINT|"",
126
+ $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
127
+ ]
128
+ yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
129
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
130
+ pdfium_config:
131
+ yolox_endpoints: [
132
+ $YOLOX_GRPC_ENDPOINT|"page-elements:8001",
133
+ $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
127
134
  ]
128
135
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
129
- auth_token: $NGC_API_KEY|""
136
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
130
137
  replicas:
131
138
  min_replicas: 0
132
139
  max_replicas:
@@ -147,7 +154,14 @@ stages:
147
154
  $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
148
155
  ]
149
156
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
150
- auth_token: $NGC_API_KEY|""
157
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
158
+ pdfium_config:
159
+ yolox_endpoints: [
160
+ $YOLOX_GRPC_ENDPOINT|"page-elements:8001",
161
+ $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
162
+ ]
163
+ yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
164
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
151
165
  replicas:
152
166
  min_replicas: 0
153
167
  max_replicas:
@@ -168,7 +182,7 @@ stages:
168
182
  $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
169
183
  ]
170
184
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
171
- auth_token: $NGC_API_KEY|""
185
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
172
186
  replicas:
173
187
  min_replicas: 0
174
188
  max_replicas:
@@ -192,6 +206,27 @@ stages:
192
206
  strategy: "static"
193
207
  value: 1
194
208
 
209
+ - name: "ocr_extractor"
210
+ type: "stage"
211
+ phase: 1 # EXTRACTION
212
+ actor: "nv_ingest.framework.orchestration.ray.stages.extractors.ocr_extractor:OCRExtractorStage"
213
+ config:
214
+ endpoint_config:
215
+ ocr_endpoints: [
216
+ $OCR_GRPC_ENDPOINT|"ocr:8001",
217
+ $OCR_HTTP_ENDPOINT|"http://ocr:8000/v1/infer",
218
+ ]
219
+ ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
220
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
221
+ replicas:
222
+ min_replicas: 0
223
+ max_replicas:
224
+ strategy: "static"
225
+ value: 4
226
+ static_replicas:
227
+ strategy: "static"
228
+ value: 3
229
+
195
230
  - name: "infographic_extractor"
196
231
  type: "stage"
197
232
  phase: 1 # EXTRACTION
@@ -203,7 +238,7 @@ stages:
203
238
  $OCR_HTTP_ENDPOINT|"http://ocr:8000/v1/infer",
204
239
  ]
205
240
  ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
206
- auth_token: $NGC_API_KEY|""
241
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
207
242
  replicas:
208
243
  min_replicas: 0
209
244
  max_replicas:
@@ -229,7 +264,7 @@ stages:
229
264
  $OCR_HTTP_ENDPOINT|"http://ocr:8000/v1/infer",
230
265
  ]
231
266
  ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
232
- auth_token: $NGC_API_KEY|""
267
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
233
268
  replicas:
234
269
  min_replicas: 0
235
270
  max_replicas:
@@ -256,7 +291,7 @@ stages:
256
291
  $OCR_HTTP_ENDPOINT|""
257
292
  ]
258
293
  ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
259
- auth_token: $NGC_API_KEY|""
294
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
260
295
  replicas:
261
296
  min_replicas: 0
262
297
  max_replicas:
@@ -316,8 +351,9 @@ stages:
316
351
  phase: 4 # TRANSFORM
317
352
  actor: "nv_ingest.framework.orchestration.ray.stages.transforms.image_caption:ImageCaptionTransformStage"
318
353
  config:
319
- api_key: $NGC_API_KEY|""
320
- model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
354
+ api_key: $NGC_API_KEY|$NVIDIA_API_KEY
355
+ model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/nemotron-nano-12b-v2-vl"
356
+ endpoint_url: $VLM_CAPTION_ENDPOINT|"http://vlm:8000/v1/chat/completions"
321
357
  prompt: "Caption the content of this image:"
322
358
  replicas:
323
359
  min_replicas: 0
@@ -333,7 +369,7 @@ stages:
333
369
  phase: 4 # TRANSFORM
334
370
  actor: "nv_ingest.framework.orchestration.ray.stages.transforms.text_embed:TextEmbeddingTransformStage"
335
371
  config:
336
- api_key: $NGC_API_KEY|""
372
+ api_key: $NGC_API_KEY|$NVIDIA_API_KEY
337
373
  embedding_model: $EMBEDDING_NIM_MODEL_NAME|"nvidia/llama-3.2-nv-embedqa-1b-v2"
338
374
  embedding_nim_endpoint: $EMBEDDING_NIM_ENDPOINT|"http://embedding:8000/v1"
339
375
  replicas:
@@ -350,6 +386,9 @@ stages:
350
386
  type: "stage"
351
387
  phase: 5 # RESPONSE
352
388
  actor: "nv_ingest.framework.orchestration.ray.stages.storage.image_storage:ImageStorageStage"
389
+ config:
390
+ storage_uri: $IMAGE_STORAGE_URI|"s3://nv-ingest/artifacts/store/images"
391
+ public_base_url: $IMAGE_STORAGE_PUBLIC_BASE_URL|""
353
392
  replicas:
354
393
  min_replicas: 0
355
394
  max_replicas:
@@ -427,76 +466,79 @@ edges:
427
466
  # Intake
428
467
  - from: "source_stage"
429
468
  to: "metadata_injector"
430
- queue_size: 32
469
+ queue_size: 4
431
470
 
432
471
  # Document Extractors
433
472
  - from: "metadata_injector"
434
473
  to: "pdf_extractor"
435
- queue_size: 32
474
+ queue_size: 8
436
475
  - from: "pdf_extractor"
437
476
  to: "audio_extractor"
438
- queue_size: 32
477
+ queue_size: 4
439
478
  - from: "audio_extractor"
440
479
  to: "docx_extractor"
441
- queue_size: 32
480
+ queue_size: 4
442
481
  - from: "docx_extractor"
443
482
  to: "pptx_extractor"
444
- queue_size: 32
483
+ queue_size: 4
445
484
  - from: "pptx_extractor"
446
485
  to: "image_extractor"
447
- queue_size: 32
486
+ queue_size: 4
448
487
  - from: "image_extractor"
449
488
  to: "html_extractor"
450
- queue_size: 32
489
+ queue_size: 4
451
490
  - from: "html_extractor"
452
491
  to: "infographic_extractor"
453
- queue_size: 32
492
+ queue_size: 4
454
493
 
455
494
  # Primitive Extractors
456
495
  - from: "infographic_extractor"
457
496
  to: "table_extractor"
458
- queue_size: 32
497
+ queue_size: 4
459
498
  - from: "table_extractor"
460
499
  to: "chart_extractor"
461
- queue_size: 32
500
+ queue_size: 4
462
501
  - from: "chart_extractor"
502
+ to: "ocr_extractor"
503
+ queue_size: 8
504
+ - from: "ocr_extractor"
463
505
  to: "image_filter"
464
- queue_size: 32
506
+ queue_size: 4
465
507
 
466
508
  # Primitive Mutators
467
509
  - from: "image_filter"
468
510
  to: "image_dedup"
469
- queue_size: 32
511
+ queue_size: 4
470
512
  - from: "image_dedup"
471
513
  to: "text_splitter"
472
- queue_size: 32
514
+ queue_size: 4
473
515
 
474
516
  # Primitive Transforms
475
517
  - from: "text_splitter"
476
518
  to: "image_caption"
477
- queue_size: 32
519
+ queue_size: 4
478
520
  - from: "image_caption"
479
521
  to: "text_embedder"
480
- queue_size: 32
522
+ queue_size: 4
481
523
  - from: "text_embedder"
482
524
  to: "image_storage"
483
- queue_size: 32
525
+ queue_size: 4
484
526
 
485
527
  # Primitive Storage
486
528
  - from: "image_storage"
487
529
  to: "embedding_storage"
488
- queue_size: 32
530
+ queue_size: 4
489
531
  - from: "embedding_storage"
490
532
  to: "broker_response"
491
- queue_size: 32
533
+ queue_size: 4
492
534
 
493
535
  # Response and Telemetry
494
536
  - from: "broker_response"
495
537
  to: "otel_tracer"
496
- queue_size: 32
538
+ queue_size: 4
497
539
  - from: "otel_tracer"
498
540
  to: "default_drain"
499
- queue_size: 32
541
+ queue_size: 4
500
542
 
501
543
  # Pipeline Runtime Configuration
502
544
  pipeline:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.10.4.dev20251004
3
+ Version: 2025.12.10.dev20251210
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -219,6 +219,8 @@ Requires-Dist: diskcache>=5.6.3
219
219
  Requires-Dist: fastapi>=0.115.6
220
220
  Requires-Dist: fastparquet>=2024.11.0
221
221
  Requires-Dist: fsspec>=2024.10.0
222
+ Requires-Dist: universal_pathlib>=0.2.6
223
+ Requires-Dist: s3fs>=2024.10.0
222
224
  Requires-Dist: gunicorn
223
225
  Requires-Dist: h11>=0.16.0
224
226
  Requires-Dist: httpx>=0.28.1
@@ -226,10 +228,10 @@ Requires-Dist: isodate>=0.7.2
226
228
  Requires-Dist: langdetect>=1.0.9
227
229
  Requires-Dist: minio>=7.2.12
228
230
  Requires-Dist: librosa>=0.10.2
229
- Requires-Dist: openai>=1.82.0
230
231
  Requires-Dist: opentelemetry-api>=1.27.0
231
232
  Requires-Dist: opentelemetry-exporter-otlp>=1.27.0
232
233
  Requires-Dist: opentelemetry-sdk>=1.27.0
234
+ Requires-Dist: psutil>=7.1.0
233
235
  Requires-Dist: pydantic>2.0.0
234
236
  Requires-Dist: pydantic-settings>2.0.0
235
237
  Requires-Dist: pypdfium2==4.30.0
@@ -1,29 +1,33 @@
1
1
  nv_ingest/__init__.py,sha256=vJLPeuxiIHqbxXPJSu9qe3MS-GPavbOUExyRq83DxxM,895
2
2
  nv_ingest/version.py,sha256=MG7DxlzpnoJI56vqxwzs9WeMAEI3uPhfDiNLs6GN6wI,986
3
- nv_ingest/api/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
4
- nv_ingest/api/main.py,sha256=XE-p4lJp1E7CCDOB8ENtYFrf63Dtq2bzQiGxpRfL2LA,1603
3
+ nv_ingest/api/__init__.py,sha256=ED07QUqwVyJalH0ahhnnjvc2W_in6TpZZ5nJ6NWU9-Y,271
4
+ nv_ingest/api/main.py,sha256=uCCkUNLS1xE9TDYKDOdxEfo_9jQWumpQAPWrxj5m9Go,1706
5
+ nv_ingest/api/tracing.py,sha256=NkqMuUiB6ixGU5MYp3TrODsZDQepJ1kbH8JFHsYjuE0,2940
5
6
  nv_ingest/api/v1/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
6
7
  nv_ingest/api/v1/health.py,sha256=pV-RoVq5y0iBPp0qZoLzd1xKpd0JiHAi0UMyMj99LqU,4740
7
8
  nv_ingest/api/v1/ingest.py,sha256=LWk3LN4lBd3uO8h30EN42g3LHCVcO00avVd5ohVK7NI,19392
8
9
  nv_ingest/api/v1/metrics.py,sha256=ZGVRApYLnzc2f2C7wRgGd7deqiXan-jxfA-33a16clY,981
10
+ nv_ingest/api/v2/README.md,sha256=VhpdjEmCyr3qIOhwqISFx9C5WezJFcxYc-NB9S98HMg,7562
11
+ nv_ingest/api/v2/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
12
+ nv_ingest/api/v2/ingest.py,sha256=vjjb2xOOtlTVoTMc4rNdUI6yKYdEeR-umA_pwP_Rt64,53103
9
13
  nv_ingest/framework/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
10
14
  nv_ingest/framework/orchestration/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
11
15
  nv_ingest/framework/orchestration/execution/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
12
16
  nv_ingest/framework/orchestration/execution/helpers.py,sha256=-F8SZh7ISWtzJz6X1O2LQ133t-17Jxi8lL-NHz4rwj0,2818
13
17
  nv_ingest/framework/orchestration/execution/options.py,sha256=Ms1t4591EIv4ZrMRdhsCYPgLnMVXJosG3MURCbPXUoA,3983
14
18
  nv_ingest/framework/orchestration/process/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
15
- nv_ingest/framework/orchestration/process/dependent_services.py,sha256=ERf2M4O6pvbLDFrvayBHHL7M-FIwECeDEDTY3bi7MBg,2940
19
+ nv_ingest/framework/orchestration/process/dependent_services.py,sha256=s0j_rsFtCKHFIuvOkBe9NEAkPNPhSYse_ApeHka8gyg,3032
16
20
  nv_ingest/framework/orchestration/process/execution.py,sha256=P1kzpYV23e4QYrKw9Td1TCZK3CK1ENVqqnI_axRCqBk,19814
17
21
  nv_ingest/framework/orchestration/process/lifecycle.py,sha256=L5NDwnzSMQPGjqJDC8jC75L1YqWey-dtK8N_HgBzb0E,8001
18
- nv_ingest/framework/orchestration/process/strategies.py,sha256=D7fdTPA7uuteoj6McA6hm1J5ArqoDdSZ7W6_ONDX7N0,7845
19
- nv_ingest/framework/orchestration/process/termination.py,sha256=_aI2ZzCasGfqwu0fcvufOlr1BGAay_Noxq5pAu67gv4,3593
22
+ nv_ingest/framework/orchestration/process/strategies.py,sha256=Q1Q04PPseF775omeS0FoXfK187NiS_bbqTaaJRwzKn8,7972
23
+ nv_ingest/framework/orchestration/process/termination.py,sha256=PAogFeW0FATFS6Mcp_UkZgq_SbWV18RtdZN-0NbComw,5042
20
24
  nv_ingest/framework/orchestration/ray/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
21
25
  nv_ingest/framework/orchestration/ray/edges/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
22
26
  nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py,sha256=PQliU_kyGbO9o42njpb8FrDMLrbLqwZzmBNXifxyG5Y,2312
23
27
  nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py,sha256=VFii2yxJuikimOxie3edKq5JN06g78AF8bdHSHVX8p8,2677
24
28
  nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py,sha256=N6NH4KgZJ60e_JkGRcSmfQtX37qtX4TMcavOR-n3heE,2549
25
29
  nv_ingest/framework/orchestration/ray/examples/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
26
- nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py,sha256=hnRLybIpVTj3mXkLW0ErWVn4vRsInjNZmA80JqDiQuw,16473
30
+ nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py,sha256=Bn4rjkO14BwvvUNG_HBCSVXetYk7DKqRRsYHJADWqjc,16455
27
31
  nv_ingest/framework/orchestration/ray/examples/task_source_harness.py,sha256=Yt7uxThg7s8WuMiaHLKC8r1XAG7QixegfkT-juE5oNw,1953
28
32
  nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py,sha256=XkvsoIzH5ftXvAZ4ox7mxbx7ESVx6D8Xupcwbqgd52w,3277
29
33
  nv_ingest/framework/orchestration/ray/primitives/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
@@ -34,12 +38,13 @@ nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py,sha256=t9lf6zTj
34
38
  nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py,sha256=GGY6_i6_g5xTFzdo9Qmsu9i4knMTq6pJfgm-aaPEt_o,17226
35
39
  nv_ingest/framework/orchestration/ray/stages/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
36
40
  nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
37
- nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py,sha256=4SdgvzI9oJ_OK5oWGir9wXVIPV4Pont2EKv9mwcWMC0,3631
41
+ nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py,sha256=UVp_kDmkaBlfO0Mbl_IxKq6imzLvs4-DKHgUHJIh3mo,3629
38
42
  nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py,sha256=rfaDx6PqRCguhSYkJI6iVmMMtAlJNxzKfUrLmw_fKqs,4381
39
43
  nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py,sha256=R4vshPcAUN2U6BIv8BCZQ862wLx8RJhCGXfpQ3K09Bs,3627
40
44
  nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py,sha256=7JrZSVIrK4_wr2s7TOTss7pgTY2F9GPQ7Ze3F_WFlKU,3642
41
45
  nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py,sha256=iY9fEfucfgCmO2ixX6qwn418J97nJz_FQGh7B6yziVo,3980
42
46
  nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py,sha256=v5J7dnJBEaDfjoTz_N_yC3RAt6lwMLgLT28V-ahquLE,3261
47
+ nv_ingest/framework/orchestration/ray/stages/extractors/ocr_extractor.py,sha256=pwVoA5-CF9GVWusoFZOMGBvSyW5udD9bdxVJXA_SghE,3188
43
48
  nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py,sha256=QagIA99AsHLihjRbXm-2BphdoQGHwzOHlqLyz7oDOSk,4992
44
49
  nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py,sha256=RMbbl7Cuj4BT-TcgUx_0k8R-DLdw-o3fHxcIBIgrWt4,3776
45
50
  nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py,sha256=p71ktv6v5T-9npYpCbgbwW6-fS-65UWS7rCm8OWr2Bc,4170
@@ -57,9 +62,9 @@ nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py,sha256=wQSlVx3T14
57
62
  nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py,sha256=_USW1Vq8G2Wn-QFdPfFQCrtKG46hHeJvkEGbBxdpbVM,1488
58
63
  nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py,sha256=QcvMQXIJ7EWIxty76Mo5Xv38Oj6X2KuS8qXQlf7E1uA,11676
59
64
  nv_ingest/framework/orchestration/ray/stages/sources/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
60
- nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py,sha256=Qm9XtTNX2CcUAlZRw33BS3Ql0djcsMGp52FPA2zHu3Q,22340
65
+ nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py,sha256=LrqaWpWyuiAHlpXWKYSyHZJBFegGXfNlpCXrucbK5NM,24067
61
66
  nv_ingest/framework/orchestration/ray/stages/storage/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
62
- nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py,sha256=WZN_-3Li-izDaPtk8IMrtn2os1ckT3U8Rb2PsfOWrcI,4009
67
+ nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py,sha256=f1iA7rjYFA1G1EXqFM6URUi_QRql1Y1OrnMPKONsSqo,6907
63
68
  nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py,sha256=EUtwhSDf-qGLVEhWEInr1VaLsvpcHUSyzCmHQVai-Ps,3547
64
69
  nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
65
70
  nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py,sha256=jEtEUibqs6IS6QakrzWY9zmxSUzuBpg_hzXy2R-I10Y,2870
@@ -99,22 +104,22 @@ nv_ingest/framework/util/flow_control/udf_intercept.py,sha256=zQ9uuCcHLEd0P52Eiw
99
104
  nv_ingest/framework/util/service/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
100
105
  nv_ingest/framework/util/service/impl/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
101
106
  nv_ingest/framework/util/service/impl/ingest/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
102
- nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py,sha256=KbzQFo7qVbCITiKYVPcGN0x4NI8piJy70Dz-8jf59Xs,15415
107
+ nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py,sha256=59P-BMWnFY37GJm5w23-TMxgLhiZGZpJogC0gjDBaTA,23835
103
108
  nv_ingest/framework/util/service/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
104
109
  nv_ingest/framework/util/service/meta/ingest/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
105
110
  nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py,sha256=QS3uNxWBl5dIcmIpJKNe8_TLcTUuN2vcKyHeAwa-eSo,1589
106
111
  nv_ingest/framework/util/telemetry/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
107
112
  nv_ingest/framework/util/telemetry/global_stats.py,sha256=nq65pEEdiwjAfGiqsxG1CeQMC96O3CfQxsZuGFCY-ds,4554
108
113
  nv_ingest/pipeline/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
109
- nv_ingest/pipeline/default_libmode_pipeline_impl.py,sha256=jcpTTC3yPt77EvNSyXxdnRryHLKQ2cMTMAo_0ZuLSNg,15450
110
- nv_ingest/pipeline/default_pipeline_impl.py,sha256=Vlui2jk27DFD9lPX_FYLPzPGIfk1AqRvbcf5lGZeQPE,15145
114
+ nv_ingest/pipeline/default_libmode_pipeline_impl.py,sha256=M31VN1xVTdoiNdjaSSPKEZr-yKhXDSwQ1hAVIkpJZLw,16232
115
+ nv_ingest/pipeline/default_pipeline_impl.py,sha256=TW9N9UcgsBL5SG1pxuSdgBIyFpBORskbHCmvJBmIIuw,16770
111
116
  nv_ingest/pipeline/ingest_pipeline.py,sha256=wHAJhqAM2s8nbY-8itVogmSU-yVN4PZONGWcKnhzgfg,17794
112
117
  nv_ingest/pipeline/pipeline_schema.py,sha256=rLZZz2It2o2hVNWrZUJU8CarrqRei1fho3ZEMkkoBcg,17940
113
118
  nv_ingest/pipeline/config/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
114
119
  nv_ingest/pipeline/config/loaders.py,sha256=75Yr9WYO7j7ghvKTnYLfZXQZEH3J3VEZo5J4TunC_Us,7590
115
- nv_ingest/pipeline/config/replica_resolver.py,sha256=3zjh8gmepEYORFZRM4inq7GoBW0YL3gzUDiixUugjzQ,8899
116
- nv_ingest-2025.10.4.dev20251004.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
117
- nv_ingest-2025.10.4.dev20251004.dist-info/METADATA,sha256=icWVUzzGzIwqq52N_eCUsNow75JzIlO6Op2zOxKQZR8,15092
118
- nv_ingest-2025.10.4.dev20251004.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
119
- nv_ingest-2025.10.4.dev20251004.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
120
- nv_ingest-2025.10.4.dev20251004.dist-info/RECORD,,
120
+ nv_ingest/pipeline/config/replica_resolver.py,sha256=dEwqMXNttfw0QeisTGGkp24785jqzVCDAEFyQIffeGc,9369
121
+ nv_ingest-2025.12.10.dev20251210.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
122
+ nv_ingest-2025.12.10.dev20251210.dist-info/METADATA,sha256=4wQaqrQjyq98-3vTXm-gQsgDmgzyrv8RGC0hsCN7jSs,15163
123
+ nv_ingest-2025.12.10.dev20251210.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
124
+ nv_ingest-2025.12.10.dev20251210.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
125
+ nv_ingest-2025.12.10.dev20251210.dist-info/RECORD,,