nv-ingest 2025.8.19.dev20250819__py3-none-any.whl → 2025.8.21.dev20250821__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

Files changed (29) hide show
  1. nv_ingest/framework/orchestration/process/dependent_services.py +32 -10
  2. nv_ingest/framework/orchestration/process/execution.py +92 -94
  3. nv_ingest/framework/orchestration/process/lifecycle.py +98 -6
  4. nv_ingest/framework/orchestration/process/strategies.py +36 -4
  5. nv_ingest/framework/orchestration/process/termination.py +107 -0
  6. nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +9 -15
  7. nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +2 -1
  8. nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +5 -2
  9. nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +2 -1
  10. nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +2 -1
  11. nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +2 -1
  12. nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +5 -2
  13. nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +2 -1
  14. nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +2 -1
  15. nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +2 -1
  16. nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +2 -1
  17. nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +5 -1
  18. nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +2 -1
  19. nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +5 -1
  20. nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +5 -1
  21. nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +4 -3
  22. nv_ingest/pipeline/config/loaders.py +33 -2
  23. nv_ingest/pipeline/default_libmode_pipeline_impl.py +514 -0
  24. nv_ingest/pipeline/default_pipeline_impl.py +54 -56
  25. {nv_ingest-2025.8.19.dev20250819.dist-info → nv_ingest-2025.8.21.dev20250821.dist-info}/METADATA +1 -1
  26. {nv_ingest-2025.8.19.dev20250819.dist-info → nv_ingest-2025.8.21.dev20250821.dist-info}/RECORD +29 -27
  27. {nv_ingest-2025.8.19.dev20250819.dist-info → nv_ingest-2025.8.21.dev20250821.dist-info}/WHEEL +0 -0
  28. {nv_ingest-2025.8.19.dev20250819.dist-info → nv_ingest-2025.8.21.dev20250821.dist-info}/licenses/LICENSE +0 -0
  29. {nv_ingest-2025.8.19.dev20250819.dist-info → nv_ingest-2025.8.21.dev20250821.dist-info}/top_level.txt +0 -0
@@ -7,17 +7,17 @@
7
7
  # pylint: disable=line-too-long
8
8
 
9
9
  """
10
- Default pipeline implementation for libmode.
10
+ Default pipeline implementation (runtime default).
11
11
 
12
- This module contains the default libmode pipeline configuration as a string,
13
- allowing the pipeline to be loaded without requiring external YAML files.
12
+ This module embeds the exact contents of config/default_pipeline.yaml so code can
13
+ load the default pipeline without reading the YAML file at runtime.
14
14
  """
15
15
 
16
- DEFAULT_LIBMODE_PIPELINE_YAML = """# Default Ingestion Pipeline Configuration for Library Mode
16
+ DEFAULT_PIPELINE_YAML = """# Default Ingestion Pipeline Configuration
17
17
  # This file replicates the static pipeline defined in pipeline_builders.py
18
18
 
19
- name: "NVIngest default libmode pipeline"
20
- description: "This is the default ingestion pipeline for NVIngest in library mode"
19
+ name: "NVIngest default pipeline"
20
+ description: "This is the default ingestion pipeline for NVIngest"
21
21
  stages:
22
22
  # Source
23
23
  - name: "source_stage"
@@ -26,20 +26,19 @@ stages:
26
26
  actor: "nv_ingest.framework.orchestration.ray.stages.sources.message_broker_task_source:MessageBrokerTaskSourceStage"
27
27
  config:
28
28
  broker_client:
29
- client_type: "simple"
30
- host: $MESSAGE_CLIENT_HOST|"0.0.0.0"
31
- port: $MESSAGE_CLIENT_PORT|7671
29
+ client_type: $MESSAGE_CLIENT_TYPE|"redis"
30
+ host: $MESSAGE_CLIENT_HOST|"redis"
31
+ port: $MESSAGE_CLIENT_PORT|6379
32
32
  task_queue: "ingest_task_queue"
33
33
  poll_interval: 0.1
34
34
  replicas:
35
- min_replicas: 0
35
+ min_replicas: 1
36
36
  max_replicas:
37
37
  strategy: "static"
38
38
  value: 1
39
39
  static_replicas:
40
40
  strategy: "static"
41
41
  value: 1
42
- runs_after: []
43
42
 
44
43
  # Pre-processing
45
44
  - name: "metadata_injector"
@@ -67,23 +66,23 @@ stages:
67
66
  pdfium_config:
68
67
  auth_token: $NGC_API_KEY|""
69
68
  yolox_endpoints: [
70
- $YOLOX_GRPC_ENDPOINT|"",
71
- $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
69
+ $YOLOX_GRPC_ENDPOINT|"page-elements:8001",
70
+ $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
72
71
  ]
73
- yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
72
+ yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
74
73
  nemoretriever_parse_config:
75
74
  auth_token: $NGC_API_KEY|""
76
75
  nemoretriever_parse_endpoints: [
77
76
  $NEMORETRIEVER_PARSE_GRPC_ENDPOINT|"",
78
- $NEMORETRIEVER_PARSE_HTTP_ENDPOINT|"https://integrate.api.nvidia.com/v1/chat/completions"
77
+ $NEMORETRIEVER_PARSE_HTTP_ENDPOINT|"http://nemoretriever-parse:8000/v1/chat/completions",
79
78
  ]
80
79
  nemoretriever_parse_infer_protocol: $NEMORETRIEVER_PARSE_INFER_PROTOCOL|http
81
80
  nemoretriever_parse_model_name: $NEMORETRIEVER_PARSE_MODEL_NAME|"nvidia/nemoretriever-parse"
82
81
  yolox_endpoints: [
83
- $YOLOX_GRPC_ENDPOINT|"",
84
- $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
82
+ $YOLOX_GRPC_ENDPOINT|"page-elements:8001",
83
+ $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
85
84
  ]
86
- yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
85
+ yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
87
86
  replicas:
88
87
  min_replicas: 0
89
88
  max_replicas:
@@ -101,10 +100,10 @@ stages:
101
100
  config:
102
101
  audio_extraction_config:
103
102
  audio_endpoints: [
104
- $AUDIO_GRPC_ENDPOINT|"grpc.nvcf.nvidia.com:443",
105
- $AUDIO_HTTP_ENDPOINT|""
103
+ $AUDIO_GRPC_ENDPOINT|"audio:50051",
104
+ $AUDIO_HTTP_ENDPOINT|"",
106
105
  ]
107
- function_id: $AUDIO_FUNCTION_ID|"1598d209-5e27-4d3c-8079-4751568b1081"
106
+ function_id: $AUDIO_FUNCTION_ID|""
108
107
  audio_infer_protocol: $AUDIO_INFER_PROTOCOL|grpc
109
108
  auth_token: $NGC_API_KEY|""
110
109
  replicas:
@@ -123,10 +122,10 @@ stages:
123
122
  config:
124
123
  docx_extraction_config:
125
124
  yolox_endpoints: [
126
- $YOLOX_GRPC_ENDPOINT|"",
127
- $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
125
+ $YOLOX_GRPC_ENDPOINT|"page-elements:8001",
126
+ $YOLOX_HTTP_ENDPOINT|"",
128
127
  ]
129
- yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
128
+ yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
130
129
  auth_token: $NGC_API_KEY|""
131
130
  replicas:
132
131
  min_replicas: 0
@@ -144,10 +143,10 @@ stages:
144
143
  config:
145
144
  pptx_extraction_config:
146
145
  yolox_endpoints: [
147
- $YOLOX_GRPC_ENDPOINT|"",
148
- $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
146
+ $YOLOX_GRPC_ENDPOINT|"page-elements:8001",
147
+ $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
149
148
  ]
150
- yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
149
+ yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
151
150
  auth_token: $NGC_API_KEY|""
152
151
  replicas:
153
152
  min_replicas: 0
@@ -165,10 +164,10 @@ stages:
165
164
  config:
166
165
  image_extraction_config:
167
166
  yolox_endpoints: [
168
- $YOLOX_GRPC_ENDPOINT|"",
169
- $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
167
+ $YOLOX_GRPC_ENDPOINT|"page-elements:8001",
168
+ $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
170
169
  ]
171
- yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
170
+ yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
172
171
  auth_token: $NGC_API_KEY|""
173
172
  replicas:
174
173
  min_replicas: 0
@@ -200,8 +199,8 @@ stages:
200
199
  config:
201
200
  endpoint_config:
202
201
  ocr_endpoints: [
203
- $OCR_GRPC_ENDPOINT|"grpc.nvcf.nvidia.com:443",
204
- $OCR_HTTP_ENDPOINT|""
202
+ $OCR_GRPC_ENDPOINT|"ocr:8001",
203
+ $OCR_HTTP_ENDPOINT|"http://ocr:8000/v1/infer",
205
204
  ]
206
205
  ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
207
206
  auth_token: $NGC_API_KEY|""
@@ -221,15 +220,15 @@ stages:
221
220
  config:
222
221
  endpoint_config:
223
222
  yolox_endpoints: [
224
- $YOLOX_TABLE_STRUCTURE_GRPC_ENDPOINT|"",
225
- $YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-table-structure-v1"
223
+ $YOLOX_TABLE_STRUCTURE_GRPC_ENDPOINT|"table-structure:8001",
224
+ $YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT|"http://table-structure:8000/v1/infer",
226
225
  ]
227
- yolox_infer_protocol: $YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL|"http"
226
+ yolox_infer_protocol: $YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL|grpc
228
227
  ocr_endpoints: [
229
- $OCR_GRPC_ENDPOINT|"",
230
- $OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/baidu/paddleocr"
228
+ $OCR_GRPC_ENDPOINT|"ocr:8001",
229
+ $OCR_HTTP_ENDPOINT|"http://ocr:8000/v1/infer",
231
230
  ]
232
- ocr_infer_protocol: $PADDLE_INFER_PROTOCOL|"http"
231
+ ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
233
232
  auth_token: $NGC_API_KEY|""
234
233
  replicas:
235
234
  min_replicas: 0
@@ -248,15 +247,15 @@ stages:
248
247
  config:
249
248
  endpoint_config:
250
249
  yolox_endpoints: [
251
- $YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT|"",
252
- $YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-graphic-elements-v1"
250
+ $YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT|"graphic-elements:8001",
251
+ $YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT|""
253
252
  ]
254
- yolox_infer_protocol: $YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL|"http"
253
+ yolox_infer_protocol: $YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL|grpc
255
254
  ocr_endpoints: [
256
- $OCR_GRPC_ENDPOINT|"",
257
- $OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/baidu/paddleocr"
255
+ $OCR_GRPC_ENDPOINT|"ocr:8001",
256
+ $OCR_HTTP_ENDPOINT|""
258
257
  ]
259
- ocr_infer_protocol: $OCR_INFER_PROTOCOL|"http"
258
+ ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
260
259
  auth_token: $NGC_API_KEY|""
261
260
  replicas:
262
261
  min_replicas: 0
@@ -318,7 +317,6 @@ stages:
318
317
  actor: "nv_ingest.framework.orchestration.ray.stages.transforms.image_caption:ImageCaptionTransformStage"
319
318
  config:
320
319
  api_key: $NGC_API_KEY|""
321
- endpoint_url: $VLM_CAPTION_ENDPOINT|"https://ai.api.nvidia.com/v1/gr/nvidia/llama-3.1-nemotron-nano-vl-8b-v1/chat/completions"
322
320
  model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
323
321
  prompt: "Caption the content of this image:"
324
322
  replicas:
@@ -337,15 +335,15 @@ stages:
337
335
  config:
338
336
  api_key: $NGC_API_KEY|""
339
337
  embedding_model: $EMBEDDING_NIM_MODEL_NAME|"nvidia/llama-3.2-nv-embedqa-1b-v2"
340
- embedding_nim_endpoint: $EMBEDDING_NIM_ENDPOINT|"https://integrate.api.nvidia.com/v1"
338
+ embedding_nim_endpoint: $EMBEDDING_NIM_ENDPOINT|"http://embedding:8000/v1"
341
339
  replicas:
342
340
  min_replicas: 0
343
341
  max_replicas:
344
342
  strategy: "static"
345
- value: 2
343
+ value: 4
346
344
  static_replicas:
347
345
  strategy: "static"
348
- value: 1
346
+ value: 3
349
347
 
350
348
  # Storage and Output
351
349
  - name: "image_storage"
@@ -380,9 +378,10 @@ stages:
380
378
  actor: "nv_ingest.framework.orchestration.ray.stages.sinks.message_broker_task_sink:MessageBrokerTaskSinkStage"
381
379
  config:
382
380
  broker_client:
383
- client_type: "simple"
384
- host: "localhost"
385
- port: 7671
381
+ client_type: $MESSAGE_CLIENT_TYPE|"redis"
382
+ host: $MESSAGE_CLIENT_HOST|localhost
383
+ port: $MESSAGE_CLIENT_PORT|6379
384
+ poll_interval: 0.1
386
385
  replicas:
387
386
  min_replicas: 1
388
387
  max_replicas:
@@ -416,7 +415,7 @@ stages:
416
415
  actor: "nv_ingest.framework.orchestration.ray.stages.sinks.default_drain:DefaultDrainSink"
417
416
  config: {}
418
417
  replicas:
419
- min_replicas: 0
418
+ min_replicas: 1
420
419
  max_replicas:
421
420
  strategy: "static"
422
421
  value: 1
@@ -500,11 +499,10 @@ edges:
500
499
  queue_size: 32
501
500
 
502
501
  # Pipeline Runtime Configuration
503
- # These parameters control dynamic scaling and PID controller behavior
504
- # All values can be overridden by environment variables from env_config.py
505
502
  pipeline:
506
- disable_dynamic_scaling: $INGEST_DISABLE_DYNAMIC_SCALING|true
503
+ disable_dynamic_scaling: $INGEST_DISABLE_DYNAMIC_SCALING|false
507
504
  dynamic_memory_threshold: $INGEST_DYNAMIC_MEMORY_THRESHOLD|0.75
505
+ static_memory_threshold: $INGEST_STATIC_MEMORY_THRESHOLD|0.75
508
506
  pid_controller:
509
507
  kp: $INGEST_DYNAMIC_MEMORY_KP|0.2
510
508
  ki: $INGEST_DYNAMIC_MEMORY_KI|0.01
@@ -513,5 +511,5 @@ pipeline:
513
511
  penalty_factor: $INGEST_DYNAMIC_MEMORY_PENALTY_FACTOR|0.1
514
512
  error_boost_factor: $INGEST_DYNAMIC_MEMORY_ERROR_BOOST_FACTOR|1.5
515
513
  rcm_memory_safety_buffer_fraction: $INGEST_DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION|0.15
516
- launch_simple_broker: true
514
+ launch_simple_broker: $INGEST_LAUNCH_SIMPLE_BROKER|false
517
515
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.8.19.dev20250819
3
+ Version: 2025.8.21.dev20250821
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -12,10 +12,11 @@ nv_ingest/framework/orchestration/execution/__init__.py,sha256=wQSlVx3T14ZgQAt-E
12
12
  nv_ingest/framework/orchestration/execution/helpers.py,sha256=-F8SZh7ISWtzJz6X1O2LQ133t-17Jxi8lL-NHz4rwj0,2818
13
13
  nv_ingest/framework/orchestration/execution/options.py,sha256=Ms1t4591EIv4ZrMRdhsCYPgLnMVXJosG3MURCbPXUoA,3983
14
14
  nv_ingest/framework/orchestration/process/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
15
- nv_ingest/framework/orchestration/process/dependent_services.py,sha256=GpYYQ-YPKJbAFTcMHk4wCQUvtkJCfMOqCss5j845x0g,1979
16
- nv_ingest/framework/orchestration/process/execution.py,sha256=YkeNKk2H7DpiF2-9ygjKH2DeIM7dpvS4a74gDVBC0Eo,19540
17
- nv_ingest/framework/orchestration/process/lifecycle.py,sha256=xM6zjMJfoI1KyFlr76fcE9N8GINV3oG_5n1aw4P7AlY,4160
18
- nv_ingest/framework/orchestration/process/strategies.py,sha256=LzJDx9g_QdHtfBzg-edNMSqdYRZfF71i9S3Fv1ghK_M,6377
15
+ nv_ingest/framework/orchestration/process/dependent_services.py,sha256=ERf2M4O6pvbLDFrvayBHHL7M-FIwECeDEDTY3bi7MBg,2940
16
+ nv_ingest/framework/orchestration/process/execution.py,sha256=P1kzpYV23e4QYrKw9Td1TCZK3CK1ENVqqnI_axRCqBk,19814
17
+ nv_ingest/framework/orchestration/process/lifecycle.py,sha256=L5NDwnzSMQPGjqJDC8jC75L1YqWey-dtK8N_HgBzb0E,8001
18
+ nv_ingest/framework/orchestration/process/strategies.py,sha256=D7fdTPA7uuteoj6McA6hm1J5ArqoDdSZ7W6_ONDX7N0,7845
19
+ nv_ingest/framework/orchestration/process/termination.py,sha256=_aI2ZzCasGfqwu0fcvufOlr1BGAay_Noxq5pAu67gv4,3593
19
20
  nv_ingest/framework/orchestration/ray/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
20
21
  nv_ingest/framework/orchestration/ray/edges/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
21
22
  nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py,sha256=PQliU_kyGbO9o42njpb8FrDMLrbLqwZzmBNXifxyG5Y,2312
@@ -29,45 +30,45 @@ nv_ingest/framework/orchestration/ray/primitives/__init__.py,sha256=wQSlVx3T14Zg
29
30
  nv_ingest/framework/orchestration/ray/primitives/dataclasses.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
31
  nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py,sha256=L8ENPiF-lxqhIXVEQwQD5CCqQMb710ynj5D_Y4ixGhs,11077
31
32
  nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py,sha256=yd2sb_q_FbBLDWiYgdKRhpPVAIl26Wg8w3yelZ7c5zQ,15741
32
- nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py,sha256=ncVOYTCrvqcipUdLBmBgH5vKpGTRrX6MrgGyBNPVknM,66656
33
+ nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py,sha256=t9lf6zTjl_P5pe8mW-5F5pbZoC5mlcvEn2RCaLn_3Tk,66521
33
34
  nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py,sha256=GGY6_i6_g5xTFzdo9Qmsu9i4knMTq6pJfgm-aaPEt_o,17226
34
35
  nv_ingest/framework/orchestration/ray/stages/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
35
36
  nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
36
- nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py,sha256=MdJA79_XECTmvksmxRZiIj5cZ46HzvIYOFNi5-S9o1k,3540
37
- nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py,sha256=D_clXtJe63CkeOcqor6C-X3Gbny7rNlp8PRMp9sRuPw,4281
38
- nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py,sha256=8DQOivEfvptUY7x7I6NsdJMTkiLdq2e_xPpPl3_bLJY,3536
39
- nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py,sha256=fp0nzRQ21PmIInxtXjTYhOiYytmgBypDlY0e-WHCmGA,3551
40
- nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py,sha256=CMIfHLCE4DUIedTxedZWWJKWlrLbWZzsuEE3j_ix_nY,3889
37
+ nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py,sha256=4SdgvzI9oJ_OK5oWGir9wXVIPV4Pont2EKv9mwcWMC0,3631
38
+ nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py,sha256=rfaDx6PqRCguhSYkJI6iVmMMtAlJNxzKfUrLmw_fKqs,4381
39
+ nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py,sha256=R4vshPcAUN2U6BIv8BCZQ862wLx8RJhCGXfpQ3K09Bs,3627
40
+ nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py,sha256=7JrZSVIrK4_wr2s7TOTss7pgTY2F9GPQ7Ze3F_WFlKU,3642
41
+ nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py,sha256=iY9fEfucfgCmO2ixX6qwn418J97nJz_FQGh7B6yziVo,3980
41
42
  nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py,sha256=v5J7dnJBEaDfjoTz_N_yC3RAt6lwMLgLT28V-ahquLE,3261
42
- nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py,sha256=5gpbf2gJiUmL0-XR6nxWih3E0_FqkMRDaLdgP_MQs-c,4892
43
+ nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py,sha256=QagIA99AsHLihjRbXm-2BphdoQGHwzOHlqLyz7oDOSk,4992
43
44
  nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py,sha256=RMbbl7Cuj4BT-TcgUx_0k8R-DLdw-o3fHxcIBIgrWt4,3776
44
- nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py,sha256=YvCsBD4hxzgY-vGQmDwXVhleClMj2PHzsX71I5_VM_o,4079
45
+ nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py,sha256=p71ktv6v5T-9npYpCbgbwW6-fS-65UWS7rCm8OWr2Bc,4170
45
46
  nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
46
- nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py,sha256=b4yve-J7ZMURSSuOPzSvfdIcyZ9fX8d-q7cYnXuxgsM,7025
47
+ nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py,sha256=gTPRFOoUGcwNrmPeqv4n5VmU-LBkha9QTYHO0ntiaIk,7116
47
48
  nv_ingest/framework/orchestration/ray/stages/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
48
49
  nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py,sha256=LnVqBJmpfCmcI-eJLbkwK-7SS-hpEp98P4iCRv_Zhb0,1726
49
50
  nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py,sha256=HQJXIuU7VjiQ6fQjHjbNNmIJX5f30cXFB0CJGixgwVo,3633
50
51
  nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py,sha256=hP25MLTP2bOEEncrYdxPPqeRyRVbij8aEurR1F1ZmhE,1811
51
52
  nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py,sha256=qiB_ZU5_3bXgvE9C2rvnXIS0Alm6M5PWLCeQm8ZxOy4,29812
52
53
  nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
53
- nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py,sha256=ipH-3dctKaXRbjfkcXfpKRPXJBHXEosqPpm_Gb-5PZ0,3618
54
- nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py,sha256=-UkLArpgP1cr433dQDzO0g5Wn8cpvjqK6RGamufO5gQ,3432
54
+ nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py,sha256=cPLG3ZEqhZkRiSsbL7cbF1zsvOAimd8K5O-qadUR9Mg,3709
55
+ nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py,sha256=f1CS8x9uifY1FJ_1lUF0fNNMExvM4zBIF012gxnSpqU,3523
55
56
  nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
56
57
  nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py,sha256=_USW1Vq8G2Wn-QFdPfFQCrtKG46hHeJvkEGbBxdpbVM,1488
57
58
  nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py,sha256=QcvMQXIJ7EWIxty76Mo5Xv38Oj6X2KuS8qXQlf7E1uA,11676
58
59
  nv_ingest/framework/orchestration/ray/stages/sources/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
59
- nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py,sha256=A_D17mOqTh1a-4_FbcEUzPOhCxK7gqrXdEHXHpnHTcU,22163
60
+ nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py,sha256=Qm9XtTNX2CcUAlZRw33BS3Ql0djcsMGp52FPA2zHu3Q,22340
60
61
  nv_ingest/framework/orchestration/ray/stages/storage/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
61
62
  nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py,sha256=WZN_-3Li-izDaPtk8IMrtn2os1ckT3U8Rb2PsfOWrcI,4009
62
- nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py,sha256=4-bV4lIAdeylBeBHMO-6bVa2xilhPdZqkwfM8OwI2rM,3456
63
+ nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py,sha256=EUtwhSDf-qGLVEhWEInr1VaLsvpcHUSyzCmHQVai-Ps,3547
63
64
  nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
64
65
  nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py,sha256=jEtEUibqs6IS6QakrzWY9zmxSUzuBpg_hzXy2R-I10Y,2870
65
66
  nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
66
67
  nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py,sha256=GqFwRpTlIu2qgb08B3OqVIKUG8QQAwSOHhjvnyL_9UE,8553
67
68
  nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
68
- nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py,sha256=Ghg09z5fSH0uyGplJPybE1e6sCD9IFpkGjzU3x8zd9o,3616
69
- nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py,sha256=RT4SkyjHPSJS7TuQhIcNW3B5a2tahEEtlxwB7M2UBYk,3309
70
- nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py,sha256=D8HGPwOCqgCA6PMoeBAHaxKfLUgPhTlSP84uiujskYM,4829
69
+ nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py,sha256=GYF41y_teXMIzB24JQXuesVmvksmgNvTGYU3uU4TzbM,3742
70
+ nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py,sha256=o6QPd3GCPxbAvQFeb1oByOP5LII-FVkKbmAkBjCioB0,3435
71
+ nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py,sha256=FAScWaZijrJHc5F5jgulHP_cdX2telS4pq3spwliFXw,4963
71
72
  nv_ingest/framework/orchestration/ray/stages/utility/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
72
73
  nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py,sha256=J7Qs745rt7cQlR3L2K9U6Mb_BDKUNpl_xoqCZMEPlks,2674
73
74
  nv_ingest/framework/orchestration/ray/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
@@ -105,14 +106,15 @@ nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py,sha256=QS3uN
105
106
  nv_ingest/framework/util/telemetry/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
106
107
  nv_ingest/framework/util/telemetry/global_stats.py,sha256=nq65pEEdiwjAfGiqsxG1CeQMC96O3CfQxsZuGFCY-ds,4554
107
108
  nv_ingest/pipeline/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
108
- nv_ingest/pipeline/default_pipeline_impl.py,sha256=TV3WvmQL7DulE54rbF5-apKboVagqI-xqPFx24Sq_8Q,15582
109
+ nv_ingest/pipeline/default_libmode_pipeline_impl.py,sha256=jcpTTC3yPt77EvNSyXxdnRryHLKQ2cMTMAo_0ZuLSNg,15450
110
+ nv_ingest/pipeline/default_pipeline_impl.py,sha256=Vlui2jk27DFD9lPX_FYLPzPGIfk1AqRvbcf5lGZeQPE,15145
109
111
  nv_ingest/pipeline/ingest_pipeline.py,sha256=wHAJhqAM2s8nbY-8itVogmSU-yVN4PZONGWcKnhzgfg,17794
110
112
  nv_ingest/pipeline/pipeline_schema.py,sha256=rLZZz2It2o2hVNWrZUJU8CarrqRei1fho3ZEMkkoBcg,17940
111
113
  nv_ingest/pipeline/config/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
112
- nv_ingest/pipeline/config/loaders.py,sha256=Zt9QOEd6g59pyN0zJZjh4VfXpjCHg2AR_4Wsbu4GkbM,6599
114
+ nv_ingest/pipeline/config/loaders.py,sha256=75Yr9WYO7j7ghvKTnYLfZXQZEH3J3VEZo5J4TunC_Us,7590
113
115
  nv_ingest/pipeline/config/replica_resolver.py,sha256=3zjh8gmepEYORFZRM4inq7GoBW0YL3gzUDiixUugjzQ,8899
114
- nv_ingest-2025.8.19.dev20250819.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
115
- nv_ingest-2025.8.19.dev20250819.dist-info/METADATA,sha256=5iRuG6d4pnP7AGTjbdXcLM2_Hv8eJwIr5YBBpFcAXKo,15061
116
- nv_ingest-2025.8.19.dev20250819.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
117
- nv_ingest-2025.8.19.dev20250819.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
118
- nv_ingest-2025.8.19.dev20250819.dist-info/RECORD,,
116
+ nv_ingest-2025.8.21.dev20250821.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
117
+ nv_ingest-2025.8.21.dev20250821.dist-info/METADATA,sha256=9OZRpNdekFmgKGLwLz3TYABiHTjBNLrv1Ul9xyVL9Hs,15061
118
+ nv_ingest-2025.8.21.dev20250821.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
119
+ nv_ingest-2025.8.21.dev20250821.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
120
+ nv_ingest-2025.8.21.dev20250821.dist-info/RECORD,,