nv-ingest 25.7.7.dev20250707__py3-none-any.whl → 25.8.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+ import os
2
+ import logging
3
+
4
+ logger = logging.getLogger(__name__)
5
+
6
+
7
+ def str_to_bool(value: str) -> bool:
8
+ """
9
+ Convert string to boolean value.
10
+
11
+ Parameters
12
+ ----------
13
+ value : str
14
+ String value to convert
15
+
16
+ Returns
17
+ -------
18
+ bool
19
+ Boolean representation of the string
20
+ """
21
+ return value.strip().lower() in {"1", "true", "yes", "on"}
22
+
23
+
24
+ def get_env_var(name: str, default, var_type=None):
25
+ """
26
+ Get environment variable with type conversion and default value.
27
+
28
+ Parameters
29
+ ----------
30
+ name : str
31
+ Environment variable name
32
+ default : Any
33
+ Default value if environment variable is not set
34
+ var_type : type, optional
35
+ Type to convert to. If None, infers from default value type
36
+
37
+ Returns
38
+ -------
39
+ Any
40
+ Environment variable value converted to the appropriate type
41
+ """
42
+ value = os.environ.get(name)
43
+ if value is None:
44
+ return default
45
+
46
+ # Determine type from default if not explicitly provided
47
+ target_type = var_type or type(default)
48
+
49
+ # Handle boolean conversion specially
50
+ if target_type is bool:
51
+ return str_to_bool(value)
52
+
53
+ # For other types, use direct conversion
54
+ try:
55
+ return target_type(value)
56
+ except (ValueError, TypeError) as e:
57
+ logger.warning(
58
+ f"Failed to convert environment variable {name}='{value}' to \
59
+ {target_type.__name__}. Using default: {default}, error: {e}"
60
+ )
61
+ return default
62
+
63
+
64
+ # Dynamic Memory Scaling Configuration
65
+ DISABLE_DYNAMIC_SCALING = get_env_var("INGEST_DISABLE_DYNAMIC_SCALING", False, bool)
66
+ DYNAMIC_MEMORY_THRESHOLD = get_env_var("INGEST_DYNAMIC_MEMORY_THRESHOLD", 0.75, float)
67
+ DYNAMIC_MEMORY_KP = get_env_var("INGEST_DYNAMIC_MEMORY_KP", 0.2, float)
68
+ DYNAMIC_MEMORY_KI = get_env_var("INGEST_DYNAMIC_MEMORY_KI", 0.01, float)
69
+ DYNAMIC_MEMORY_EMA_ALPHA = get_env_var("INGEST_DYNAMIC_MEMORY_EMA_ALPHA", 0.1, float)
70
+ DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH = get_env_var("INGEST_DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH", 0, int)
71
+ DYNAMIC_MEMORY_PENALTY_FACTOR = get_env_var("INGEST_DYNAMIC_MEMORY_PENALTY_FACTOR", 0.1, float)
72
+ DYNAMIC_MEMORY_ERROR_BOOST_FACTOR = get_env_var("INGEST_DYNAMIC_MEMORY_ERROR_BOOST_FACTOR", 1.5, float)
73
+ DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION = get_env_var(
74
+ "INGEST_DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION", 0.15, float
75
+ )
@@ -6,9 +6,7 @@ import logging
6
6
  import math
7
7
  from dataclasses import dataclass
8
8
 
9
- import numpy as np
10
- from collections import deque
11
- from typing import Dict, Any, Deque, List, Tuple, Optional
9
+ from typing import Dict, Any, List, Tuple, Optional
12
10
 
13
11
  from nv_ingest_api.util.system.hardware_info import SystemResourceProbe
14
12
 
@@ -16,7 +14,7 @@ logging.basicConfig(level=logging.INFO)
16
14
  logger = logging.getLogger(__name__)
17
15
 
18
16
  # --- Constants ---
19
- DEFAULT_STAGE_COST_MB = 5000.0 # Fallback memory cost
17
+ DEFAULT_STAGE_COST_MB = 5_000.0 # Fallback memory cost
20
18
 
21
19
 
22
20
  @dataclass
@@ -46,9 +44,7 @@ class PIDController:
46
44
  kp: float,
47
45
  ki: float,
48
46
  kd: float, # Currently unused in delta calculation
49
- stage_cost_estimates: Dict[str, int], # Static estimates (MB)
50
47
  target_queue_depth: int = 0,
51
- window_size: int = 10,
52
48
  penalty_factor: float = 0.0005,
53
49
  error_boost_factor: float = 1.5,
54
50
  ):
@@ -64,16 +60,10 @@ class PIDController:
64
60
  kd : float
65
61
  Derivative gain. Reacts to the rate of change of the error.
66
62
  (Currently set to 0 in internal calculations).
67
- stage_cost_estimates : Dict[str, int]
68
- Static estimated memory cost (in MB) per replica for each stage.
69
- Used as a fallback and minimum for dynamic estimates.
70
63
  target_queue_depth : int, optional
71
64
  Default target queue depth for stages if not specified in metrics,
72
65
  by default 0. The PID loop tries to drive the queue depth towards
73
66
  this value.
74
- window_size : int, optional
75
- Number of recent samples used for dynamic memory cost estimation
76
- per replica, by default 10.
77
67
  penalty_factor : float, optional
78
68
  Multiplier applied to the number of consecutive idle cycles for a
79
69
  stage. The resulting penalty effectively lowers the target queue
@@ -90,16 +80,11 @@ class PIDController:
90
80
  self.error_boost_factor = error_boost_factor
91
81
 
92
82
  # Per-Stage State
93
- self.stage_cost_estimates = {
94
- name: float(max(cost, 1.0)) for name, cost in stage_cost_estimates.items() # Ensure float and min 1MB
95
- }
96
83
  self.integral_error: Dict[str, float] = {}
97
84
  self.prev_error: Dict[str, float] = {}
98
- self.memory_history: Dict[str, Deque[float]] = {} # Per-replica memory history (MB)
99
85
  self.idle_cycles: Dict[str, int] = {}
100
86
 
101
87
  # Per-Stage Config
102
- self.window_size = window_size
103
88
  self.penalty_factor = penalty_factor
104
89
 
105
90
  # --- Private Methods ---
@@ -110,48 +95,7 @@ class PIDController:
110
95
  logger.debug(f"[PID-{stage}] Initializing state.")
111
96
  self.integral_error[stage] = 0.0
112
97
  self.prev_error[stage] = 0.0
113
- self.memory_history[stage] = deque(maxlen=self.window_size)
114
98
  self.idle_cycles[stage] = 0
115
- # Ensure static cost estimate exists, provide default if missing
116
- if stage not in self.stage_cost_estimates:
117
- logger.warning(f"[PID-{stage}] Missing static cost estimate. Using default {DEFAULT_STAGE_COST_MB}MB.")
118
- self.stage_cost_estimates[stage] = DEFAULT_STAGE_COST_MB
119
-
120
- def _get_conservative_cost_estimate(self, stage: str) -> float:
121
- """
122
- Estimates dynamic memory cost, using static estimate as a floor/max.
123
-
124
- Returns the maximum of the recent average dynamic cost per replica
125
- and the static estimate provided during initialization. This provides
126
- a conservative value for resource projection.
127
-
128
- Parameters
129
- ----------
130
- stage : str
131
- The name of the stage.
132
-
133
- Returns
134
- -------
135
- float
136
- The conservative memory cost estimate in MB per replica.
137
- """
138
- static_cost = self.stage_cost_estimates.get(stage, DEFAULT_STAGE_COST_MB)
139
- memory_samples = self.memory_history.get(stage)
140
-
141
- # Use numpy.mean if samples exist, otherwise fallback to static
142
- if memory_samples and len(memory_samples) > 0:
143
- try:
144
- dynamic_avg = float(np.mean(memory_samples))
145
- # Use max(dynamic, static) for projection, enforce min 1MB
146
- cost = max(dynamic_avg, static_cost, 1.0)
147
- return cost
148
- except Exception as e:
149
- logger.error(
150
- f"[PID-{stage}] Error calculating mean of memory samples: {e}. Falling back to static cost.",
151
- exc_info=False,
152
- )
153
- return max(static_cost, 1.0) # Fallback safely
154
- return max(static_cost, 1.0) # Fallback to static estimate if no history
155
99
 
156
100
  # --- Public Method ---
157
101
 
@@ -167,8 +111,8 @@ class PIDController:
167
111
  ----------
168
112
  stage_metrics : Dict[str, Dict[str, Any]]
169
113
  Dictionary mapping stage names to their current metrics. Expected keys
170
- per stage: 'replicas', 'queue_depth'. Optional: 'memory_usage',
171
- 'target_queue_depth', 'processing', 'min_replicas', 'max_replicas'.
114
+ per stage: 'replicas', 'queue_depth', 'ema_memory_per_replica'.
115
+ Optional: 'target_queue_depth', 'processing', 'min_replicas', 'max_replicas'.
172
116
 
173
117
  Returns
174
118
  -------
@@ -185,16 +129,9 @@ class PIDController:
185
129
 
186
130
  # --- Extract data and calculate current memory state ---
187
131
  replicas = metrics.get("replicas", 0)
188
- # Start with static cost as initial guess if no memory_usage provided
189
- initial_cost_guess = self.stage_cost_estimates.get(stage, DEFAULT_STAGE_COST_MB)
190
- memory_usage = metrics.get("memory_usage", initial_cost_guess * max(replicas, 1))
191
- # Calculate memory per replica safely (avoid division by zero)
192
- current_memory_per_replica = memory_usage / max(replicas, 1.0)
193
-
194
- # Update memory history *before* calculating the conservative cost for *this* cycle's proposal
195
- self.memory_history[stage].append(current_memory_per_replica)
196
- # Recalculate conservative cost *after* updating history for the proposal
197
- conservative_cost = self._get_conservative_cost_estimate(stage)
132
+ # The conservative cost is now the EMA memory passed in from the stats collector.
133
+ # Fallback to a default if not present.
134
+ conservative_cost = metrics.get("ema_memory_per_replica", DEFAULT_STAGE_COST_MB)
198
135
 
199
136
  # --- PID Calculation ---
200
137
  queue_depth = metrics.get("queue_depth", 0)
@@ -296,7 +233,6 @@ class ResourceConstraintManager:
296
233
  self,
297
234
  max_replicas: int,
298
235
  memory_threshold: int,
299
- estimated_edge_cost_mb: int,
300
236
  memory_safety_buffer_fraction: float,
301
237
  ):
302
238
  """
@@ -309,7 +245,6 @@ class ResourceConstraintManager:
309
245
 
310
246
  self.max_replicas = max_replicas
311
247
  self.memory_threshold_mb = memory_threshold
312
- self.estimated_edge_cost_mb = estimated_edge_cost_mb # Keep track, though unused
313
248
  self.memory_safety_buffer_fraction = memory_safety_buffer_fraction # Unused
314
249
  self.effective_memory_limit_mb = self.memory_threshold_mb
315
250
 
@@ -23,18 +23,21 @@ from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import (
23
23
  RayPipelineInterface,
24
24
  )
25
25
  from nv_ingest.framework.orchestration.ray.util.pipeline.pipeline_builders import setup_ingestion_pipeline
26
+ from nv_ingest.framework.orchestration.ray.util.env_config import (
27
+ DISABLE_DYNAMIC_SCALING,
28
+ DYNAMIC_MEMORY_THRESHOLD,
29
+ DYNAMIC_MEMORY_KP,
30
+ DYNAMIC_MEMORY_KI,
31
+ DYNAMIC_MEMORY_EMA_ALPHA,
32
+ DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH,
33
+ DYNAMIC_MEMORY_PENALTY_FACTOR,
34
+ DYNAMIC_MEMORY_ERROR_BOOST_FACTOR,
35
+ DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION,
36
+ )
26
37
 
27
38
  logger = logging.getLogger(__name__)
28
39
 
29
40
 
30
- def str_to_bool(value: str) -> bool:
31
- return value.strip().lower() in {"1", "true", "yes", "on"}
32
-
33
-
34
- DISABLE_DYNAMIC_SCALING = str_to_bool(os.environ.get("INGEST_DISABLE_DYNAMIC_SCALING", "false"))
35
- DYNAMIC_MEMORY_THRESHOLD = float(os.environ.get("INGEST_DYNAMIC_MEMORY_THRESHOLD", 0.75))
36
-
37
-
38
41
  class PipelineCreationSchema(BaseModel):
39
42
  """
40
43
  Schema for pipeline creation configuration.
@@ -78,15 +81,17 @@ class PipelineCreationSchema(BaseModel):
78
81
  otel_exporter_otlp_endpoint: str = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "localhost:4317")
79
82
 
80
83
  # OCR settings
81
- paddle_http_endpoint: str = os.getenv("PADDLE_HTTP_ENDPOINT", "https://ai.api.nvidia.com/v1/cv/baidu/paddleocr")
82
- paddle_infer_protocol: str = os.getenv("PADDLE_INFER_PROTOCOL", "http")
84
+ ocr_http_endpoint: str = os.getenv("OCR_HTTP_ENDPOINT", "https://ai.api.nvidia.com/v1/cv/baidu/paddleocr")
85
+ ocr_infer_protocol: str = os.getenv("OCR_INFER_PROTOCOL", "http")
86
+ ocr_model_name: str = os.getenv("OCR_MODEL_NAME", "paddle")
83
87
 
84
88
  # Task queue settings
85
89
  REDIS_INGEST_TASK_QUEUE: str = "ingest_task_queue"
86
90
 
87
91
  # Vision language model settings
88
92
  vlm_caption_endpoint: str = os.getenv(
89
- "VLM_CAPTION_ENDPOINT", "https://ai.api.nvidia.com/v1/gr/nvidia/llama-3.1-nemotron-nano-vl-8b-v1/chat/completions"
93
+ "VLM_CAPTION_ENDPOINT",
94
+ "https://integrate.api.nvidia.com/v1/chat/completions",
90
95
  )
91
96
  vlm_caption_model_name: str = os.getenv("VLM_CAPTION_MODEL_NAME", "nvidia/llama-3.1-nemotron-nano-vl-8b-v1")
92
97
 
@@ -233,7 +238,15 @@ def _launch_pipeline(
233
238
  dynamic_memory_threshold = dynamic_memory_threshold if dynamic_memory_threshold else DYNAMIC_MEMORY_THRESHOLD
234
239
 
235
240
  scaling_config = ScalingConfig(
236
- dynamic_memory_scaling=dynamic_memory_scaling, dynamic_memory_threshold=dynamic_memory_threshold
241
+ dynamic_memory_scaling=dynamic_memory_scaling,
242
+ dynamic_memory_threshold=dynamic_memory_threshold,
243
+ pid_kp=DYNAMIC_MEMORY_KP,
244
+ pid_ki=DYNAMIC_MEMORY_KI,
245
+ pid_ema_alpha=DYNAMIC_MEMORY_EMA_ALPHA,
246
+ pid_target_queue_depth=DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH,
247
+ pid_penalty_factor=DYNAMIC_MEMORY_PENALTY_FACTOR,
248
+ pid_error_boost_factor=DYNAMIC_MEMORY_ERROR_BOOST_FACTOR,
249
+ rcm_memory_safety_buffer_fraction=DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION,
237
250
  )
238
251
 
239
252
  pipeline = RayPipeline(scaling_config=scaling_config)
@@ -3,7 +3,7 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  import os
6
-
6
+ import psutil
7
7
  import click
8
8
  import logging
9
9
 
@@ -57,6 +57,7 @@ from nv_ingest_api.internal.schemas.transform.transform_image_filter_schema impo
57
57
  from nv_ingest_api.internal.schemas.transform.transform_text_embedding_schema import TextEmbeddingSchema
58
58
  from nv_ingest_api.internal.schemas.transform.transform_text_splitter_schema import TextSplitterSchema
59
59
  from nv_ingest_api.util.system.hardware_info import SystemResourceProbe
60
+ from nv_ingest.framework.orchestration.ray.util.env_config import DYNAMIC_MEMORY_THRESHOLD
60
61
 
61
62
  logger = logging.getLogger(__name__)
62
63
 
@@ -174,6 +175,16 @@ def add_metadata_injector_stage(pipeline, default_cpu_count, stage_name="metadat
174
175
 
175
176
 
176
177
  def add_pdf_extractor_stage(pipeline, default_cpu_count, stage_name="pdf_extractor"):
178
+ # Heuristic: Determine max_replicas based on system memory, capped by CPU cores.
179
+ total_memory_mb = psutil.virtual_memory().total / (1024**2)
180
+
181
+ # Allocate up to 75% of memory to this stage, using a 10GB high watermark per worker.
182
+ allocatable_memory_for_stage_mb = total_memory_mb * DYNAMIC_MEMORY_THRESHOLD
183
+ memory_based_replicas = int(allocatable_memory_for_stage_mb / 10_000.0)
184
+
185
+ # Cap the number of replicas by the number of available CPU cores.
186
+ max_replicas = max(1, min(memory_based_replicas, default_cpu_count))
187
+
177
188
  yolox_grpc, yolox_http, yolox_auth, yolox_protocol = get_nim_service("yolox")
178
189
  nemoretriever_parse_grpc, nemoretriever_parse_http, nemoretriever_parse_auth, nemoretriever_parse_protocol = (
179
190
  get_nim_service("nemoretriever_parse")
@@ -203,9 +214,8 @@ def add_pdf_extractor_stage(pipeline, default_cpu_count, stage_name="pdf_extract
203
214
  stage_actor=PDFExtractorStage,
204
215
  config=extractor_config,
205
216
  min_replicas=0,
206
- max_replicas=int(max(1, (default_cpu_count // 3))), # 33% of available CPU cores
217
+ max_replicas=max_replicas,
207
218
  )
208
-
209
219
  return stage_name
210
220
 
211
221
 
@@ -213,15 +223,15 @@ def add_table_extractor_stage(pipeline, default_cpu_count, stage_name="table_ext
213
223
  yolox_table_structure_grpc, yolox_table_structure_http, yolox_auth, yolox_table_structure_protocol = (
214
224
  get_nim_service("yolox_table_structure")
215
225
  )
216
- paddle_grpc, paddle_http, paddle_auth, paddle_protocol = get_nim_service("paddle")
226
+ ocr_grpc, ocr_http, ocr_auth, ocr_protocol = get_nim_service("ocr")
217
227
 
218
228
  table_extractor_config = TableExtractorSchema(
219
229
  **{
220
230
  "endpoint_config": {
221
231
  "yolox_endpoints": (yolox_table_structure_grpc, yolox_table_structure_http),
222
232
  "yolox_infer_protocol": yolox_table_structure_protocol,
223
- "paddle_endpoints": (paddle_grpc, paddle_http),
224
- "paddle_infer_protocol": paddle_protocol,
233
+ "ocr_endpoints": (ocr_grpc, ocr_http),
234
+ "ocr_infer_protocol": ocr_protocol,
225
235
  "auth_token": yolox_auth,
226
236
  }
227
237
  }
@@ -232,7 +242,7 @@ def add_table_extractor_stage(pipeline, default_cpu_count, stage_name="table_ext
232
242
  stage_actor=TableExtractorStage,
233
243
  config=table_extractor_config,
234
244
  min_replicas=0,
235
- max_replicas=int(max(1, (default_cpu_count // 7))), # 14% of available CPU cores
245
+ max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.20),
236
246
  )
237
247
 
238
248
  return stage_name
@@ -242,15 +252,15 @@ def add_chart_extractor_stage(pipeline, default_cpu_count, stage_name="chart_ext
242
252
  yolox_graphic_elements_grpc, yolox_graphic_elements_http, yolox_auth, yolox_graphic_elements_protocol = (
243
253
  get_nim_service("yolox_graphic_elements")
244
254
  )
245
- paddle_grpc, paddle_http, paddle_auth, paddle_protocol = get_nim_service("paddle")
255
+ ocr_grpc, ocr_http, ocr_auth, ocr_protocol = get_nim_service("ocr")
246
256
 
247
257
  chart_extractor_config = ChartExtractorSchema(
248
258
  **{
249
259
  "endpoint_config": {
250
260
  "yolox_endpoints": (yolox_graphic_elements_grpc, yolox_graphic_elements_http),
251
261
  "yolox_infer_protocol": yolox_graphic_elements_protocol,
252
- "paddle_endpoints": (paddle_grpc, paddle_http),
253
- "paddle_infer_protocol": paddle_protocol,
262
+ "ocr_endpoints": (ocr_grpc, ocr_http),
263
+ "ocr_infer_protocol": ocr_protocol,
254
264
  "auth_token": yolox_auth,
255
265
  }
256
266
  }
@@ -261,21 +271,21 @@ def add_chart_extractor_stage(pipeline, default_cpu_count, stage_name="chart_ext
261
271
  stage_actor=ChartExtractorStage,
262
272
  config=chart_extractor_config,
263
273
  min_replicas=0,
264
- max_replicas=int(max(1, (default_cpu_count // 7))), # 14% of available CPU cores
274
+ max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.20),
265
275
  )
266
276
 
267
277
  return stage_name
268
278
 
269
279
 
270
280
  def add_infographic_extractor_stage(pipeline, default_cpu_count, stage_name="infographic_extractor"):
271
- paddle_grpc, paddle_http, paddle_auth, paddle_protocol = get_nim_service("paddle")
281
+ ocr_grpc, ocr_http, ocr_auth, ocr_protocol = get_nim_service("ocr")
272
282
 
273
283
  infographic_content_extractor_config = InfographicExtractorSchema(
274
284
  **{
275
285
  "endpoint_config": {
276
- "paddle_endpoints": (paddle_grpc, paddle_http),
277
- "paddle_infer_protocol": paddle_protocol,
278
- "auth_token": paddle_auth,
286
+ "ocr_endpoints": (ocr_grpc, ocr_http),
287
+ "ocr_infer_protocol": ocr_protocol,
288
+ "auth_token": ocr_auth,
279
289
  }
280
290
  }
281
291
  )
@@ -285,7 +295,7 @@ def add_infographic_extractor_stage(pipeline, default_cpu_count, stage_name="inf
285
295
  stage_actor=InfographicExtractorStage,
286
296
  config=infographic_content_extractor_config,
287
297
  min_replicas=0,
288
- max_replicas=int(max(1, (default_cpu_count // 14))), # 7% of available CPU cores
298
+ max_replicas=2,
289
299
  )
290
300
 
291
301
  return stage_name
@@ -307,7 +317,7 @@ def add_image_extractor_stage(pipeline, default_cpu_count, stage_name="image_ext
307
317
  stage_actor=ImageExtractorStage,
308
318
  config=image_extractor_config,
309
319
  min_replicas=0,
310
- max_replicas=int(max(1, (default_cpu_count // 14))), # 7% of available CPU cores
320
+ max_replicas=2,
311
321
  )
312
322
 
313
323
  return stage_name
@@ -329,7 +339,7 @@ def add_docx_extractor_stage(pipeline, default_cpu_count, stage_name="docx_extra
329
339
  stage_actor=DocxExtractorStage,
330
340
  config=DocxExtractorSchema(**docx_extractor_config),
331
341
  min_replicas=0,
332
- max_replicas=int(max(1, (default_cpu_count // 14))), # 7% of available CPU cores
342
+ max_replicas=2,
333
343
  )
334
344
 
335
345
  return stage_name
@@ -351,7 +361,7 @@ def add_pptx_extractor_stage(pipeline, default_cpu_count, stage_name="pptx_extra
351
361
  stage_actor=PPTXExtractorStage,
352
362
  config=PPTXExtractorSchema(**pptx_extractor_config),
353
363
  min_replicas=0,
354
- max_replicas=int(max(1, (default_cpu_count // 14))), # 7% of available CPU cores
364
+ max_replicas=2,
355
365
  )
356
366
 
357
367
  return stage_name
@@ -373,11 +383,7 @@ def add_audio_extractor_stage(pipeline, default_cpu_count, stage_name="audio_ext
373
383
  )
374
384
 
375
385
  pipeline.add_stage(
376
- name=stage_name,
377
- stage_actor=AudioExtractorStage,
378
- config=audio_extractor_config,
379
- min_replicas=0,
380
- max_replicas=1, # Audio extraction is a heavy IO bound operation with minimal CPU usage
386
+ name=stage_name, stage_actor=AudioExtractorStage, config=audio_extractor_config, min_replicas=0, max_replicas=2
381
387
  )
382
388
 
383
389
  return stage_name
@@ -390,7 +396,7 @@ def add_html_extractor_stage(pipeline, default_cpu_count, stage_name="html_extra
390
396
  stage_actor=HtmlExtractorStage,
391
397
  config=HtmlExtractorSchema(),
392
398
  min_replicas=0,
393
- max_replicas=int(max(1, (default_cpu_count // 14))), # 7% of available CPU cores
399
+ max_replicas=2,
394
400
  )
395
401
 
396
402
  return stage_name
@@ -455,7 +461,7 @@ def add_text_splitter_stage(pipeline, default_cpu_count, stage_name="text_splitt
455
461
  stage_actor=TextSplitterStage,
456
462
  config=config,
457
463
  min_replicas=0,
458
- max_replicas=int(max(1, (default_cpu_count // 14))), # 7% of available CPU cores
464
+ max_replicas=2,
459
465
  )
460
466
 
461
467
  return stage_name
@@ -517,7 +523,7 @@ def add_text_embedding_stage(pipeline, default_cpu_count, stage_name="text_embed
517
523
  stage_actor=TextEmbeddingTransformStage,
518
524
  config=config,
519
525
  min_replicas=0,
520
- max_replicas=int(max(1, (default_cpu_count // 14))), # 7% of available CPU cores
526
+ max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.07, replica_limit=6),
521
527
  )
522
528
 
523
529
  return stage_name
@@ -620,3 +626,24 @@ def add_source_stage(pipeline, default_cpu_count, source_name="pipeline_source")
620
626
  start_simple_message_broker(source_config.broker_client.model_dump())
621
627
 
622
628
  return source_name
629
+
630
+
631
+ def _get_max_replicas(default_cpu_count=None, percentage_of_cpu=0.14, replica_limit=None):
632
+ """
633
+ Calculate max replicas based on CPU percentage with optional upper limit.
634
+
635
+ Args:
636
+ default_cpu_count (int, optional): CPU cores to use. Auto-detected if None.
637
+ percentage_of_cpu (float, optional): CPU percentage to allocate. Defaults to 0.14.
638
+ replica_limit (int, optional): Upper bound for replicas. Defaults to None.
639
+
640
+ Returns:
641
+ int: Maximum replicas, at least 1.
642
+ """
643
+ if default_cpu_count is None:
644
+ default_cpu_count = _system_resource_probe.get_cpu_count()
645
+
646
+ _max_replicas = int(max(1, (default_cpu_count * percentage_of_cpu)))
647
+ if replica_limit is not None:
648
+ _max_replicas = min(_max_replicas, replica_limit)
649
+ return _max_replicas
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 25.7.7.dev20250707
3
+ Version: 25.8.0rc1
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -231,7 +231,7 @@ Requires-Dist: opentelemetry-exporter-otlp>=1.27.0
231
231
  Requires-Dist: opentelemetry-sdk>=1.27.0
232
232
  Requires-Dist: pydantic>2.0.0
233
233
  Requires-Dist: pydantic-settings>2.0.0
234
- Requires-Dist: pypdfium2==4.30.1
234
+ Requires-Dist: pypdfium2==4.30.0
235
235
  Requires-Dist: pytest>=8.0.2
236
236
  Requires-Dist: pytest-mock>=3.14.0
237
237
  Requires-Dist: pytest-cov>=6.0.0
@@ -240,7 +240,6 @@ Requires-Dist: python-docx>=1.1.2
240
240
  Requires-Dist: python-dotenv>=1.0.1
241
241
  Requires-Dist: python-pptx>=1.0.2
242
242
  Requires-Dist: prometheus-client
243
- Requires-Dist: torch>=2.4.1
244
243
  Requires-Dist: ray[all]>=2.37.0
245
244
  Requires-Dist: redis>=5.2.1
246
245
  Requires-Dist: requests>=2.28.2
@@ -248,8 +247,6 @@ Requires-Dist: scikit-learn>=1.6.0
248
247
  Requires-Dist: scipy>=1.15.1
249
248
  Requires-Dist: setuptools>=78.1.1
250
249
  Requires-Dist: tabulate>=0.9.0
251
- Requires-Dist: torchvision
252
- Requires-Dist: torchaudio
253
250
  Requires-Dist: transformers>=4.47.0
254
251
  Requires-Dist: tqdm>=4.67.1
255
252
  Requires-Dist: uvicorn
@@ -3,7 +3,7 @@ nv_ingest/version.py,sha256=MG7DxlzpnoJI56vqxwzs9WeMAEI3uPhfDiNLs6GN6wI,986
3
3
  nv_ingest/api/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
4
4
  nv_ingest/api/main.py,sha256=XE-p4lJp1E7CCDOB8ENtYFrf63Dtq2bzQiGxpRfL2LA,1603
5
5
  nv_ingest/api/v1/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
6
- nv_ingest/api/v1/health.py,sha256=zqu-isMRjh4NveS4XWh5FaAZGPIlBVxpCOg3Uu8nUHQ,4746
6
+ nv_ingest/api/v1/health.py,sha256=pV-RoVq5y0iBPp0qZoLzd1xKpd0JiHAi0UMyMj99LqU,4740
7
7
  nv_ingest/api/v1/ingest.py,sha256=LWk3LN4lBd3uO8h30EN42g3LHCVcO00avVd5ohVK7NI,19392
8
8
  nv_ingest/api/v1/metrics.py,sha256=ZGVRApYLnzc2f2C7wRgGd7deqiXan-jxfA-33a16clY,981
9
9
  nv_ingest/framework/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
@@ -14,15 +14,15 @@ nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py,sha256=PQliU_kyG
14
14
  nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py,sha256=VFii2yxJuikimOxie3edKq5JN06g78AF8bdHSHVX8p8,2677
15
15
  nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py,sha256=N6NH4KgZJ60e_JkGRcSmfQtX37qtX4TMcavOR-n3heE,2549
16
16
  nv_ingest/framework/orchestration/ray/examples/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
17
- nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py,sha256=qUNvWiNBUEEzuCySY3usWmHQz9qMgTGVZuKmLWqTsi4,16412
17
+ nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py,sha256=DufjmNm-05uTkq_Mz0QQB6fHw_Rl9eX3PRtnH4sntGs,16405
18
18
  nv_ingest/framework/orchestration/ray/examples/task_source_harness.py,sha256=Yt7uxThg7s8WuMiaHLKC8r1XAG7QixegfkT-juE5oNw,1953
19
19
  nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py,sha256=XkvsoIzH5ftXvAZ4ox7mxbx7ESVx6D8Xupcwbqgd52w,3277
20
20
  nv_ingest/framework/orchestration/ray/primitives/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
21
21
  nv_ingest/framework/orchestration/ray/primitives/dataclasses.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py,sha256=L8ENPiF-lxqhIXVEQwQD5CCqQMb710ynj5D_Y4ixGhs,11077
23
- nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py,sha256=2Xg7QoKKPPFUWkLck7NtEtb1xLnK3b5uUw8LRxPhLyw,29106
24
- nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py,sha256=6i0EGWZ9WXpPVkfLwP5a2Y45gwAhQjWjobTp_kuFPsE,60478
25
- nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py,sha256=AJ79OTh_NxxoTcyBNiopq3K_nLumsB9UU_axqQS3Gus,15810
23
+ nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py,sha256=IxLQYHYc9BnMOi73TSJzuhl8KOJAwlwwp8SPdUkV3nE,15737
24
+ nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py,sha256=HGJ_TyLTKKRl10HWfyx3D-n-zrFY0Fg9TN74UbOeCm8,66584
25
+ nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py,sha256=8SpZzulHatqah7U3YHJMTLaYyPlWdCoaer_oNjhmHZo,17221
26
26
  nv_ingest/framework/orchestration/ray/stages/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
27
27
  nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
28
28
  nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py,sha256=KV4hvY0NTGG8CjZviTgcFLQzaH8WJJGkkb9PFYbROww,3417
@@ -40,7 +40,7 @@ nv_ingest/framework/orchestration/ray/stages/meta/__init__.py,sha256=wQSlVx3T14Z
40
40
  nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py,sha256=LnVqBJmpfCmcI-eJLbkwK-7SS-hpEp98P4iCRv_Zhb0,1726
41
41
  nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py,sha256=AhlZUbDK2Jckqnu8hVbJrckW8MsSixfmWc1bst9gRYk,3447
42
42
  nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py,sha256=1Pae2xRPK0_QLh53yHECVFm2guwgvZaiRRr3tp4OpYI,1744
43
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py,sha256=rAuEH8uq8-j4Ipkb1zMB8z_x_PMvxwO9LFN4iY7UXjE,28957
43
+ nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py,sha256=pvBFsURWoDiAmDWNTLv2pdm5slv-1OnuXxwYvgaKumU,25703
44
44
  nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
45
45
  nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py,sha256=UepeDvH6Cfgm5rIylRx6uOxihS0OZ4Q1DGUrjUybNaY,3493
46
46
  nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py,sha256=9ek5rVa4_GVdmVHGMJvbxacRSpIqVoUxgv28lzJwrTQ,3319
@@ -48,7 +48,7 @@ nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py,sha256=wQSlVx3T14
48
48
  nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py,sha256=0SQHJlFuXlP16YRWduX1fMKgjhUd7UhDAWQ8XZh4_0I,1471
49
49
  nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py,sha256=enylryvcPmzirpOjCahqYJbNSLsNvv1KpMnOzGqNZQQ,11509
50
50
  nv_ingest/framework/orchestration/ray/stages/sources/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
51
- nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py,sha256=9YoVytbFFt-RpIR_MN2m3T93zVTjts8tjhi0qzLJkTw,19922
51
+ nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py,sha256=b9ndnQBB1paR0iRe3NdzQ7BZ2S65LG2jbtjXvvDc_s4,21183
52
52
  nv_ingest/framework/orchestration/ray/stages/storage/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
53
53
  nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py,sha256=6NkwQzseAnaj0Ptpr3oKvab2EnJdMwTjI2p4dS_HzsI,3901
54
54
  nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py,sha256=SMLHQElZkKldnjy0_VHIKS65DBAAtOhwhdoaFe1yb9I,3337
@@ -63,11 +63,12 @@ nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py,sha256=
63
63
  nv_ingest/framework/orchestration/ray/stages/utility/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
64
64
  nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py,sha256=MB27CkoNeuirN6CUHgjsC5Wh958NF7m_N7HE4VKfx3k,2264
65
65
  nv_ingest/framework/orchestration/ray/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
66
+ nv_ingest/framework/orchestration/ray/util/env_config.py,sha256=GN9msJ_3jdOBIAPnXNxX0ds_BKtHRnRhnYxwzcAU2KY,2386
66
67
  nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
67
- nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py,sha256=AWyCFPP41vp1NOkO2urqm7vh-sTGKypJxwhdq8HxK6Q,50681
68
+ nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py,sha256=flRLS7yc5n6gheykayuL3prC7O-ZhcVY2s9Wc14SGWE,47377
68
69
  nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py,sha256=d2-GS2tqk6JOFdw65CL1AwfjdUbkC_XxUuJH8Dy-aQ0,10456
69
- nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py,sha256=C7wf0AdsOpfYUTMKWLhi8hkoUnnCAD8v3OMwOkpYgKw,14331
70
- nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py,sha256=sEpfVgZrnnM_kZ5KoSBMufO6iU4Z8v8XskMbOGumg2g,21415
70
+ nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py,sha256=-lXiAjxpk3CxIFWi7GW8ggTn1cjyugYZ9EtSfTT20mg,14844
71
+ nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py,sha256=Vgwcb4X-k-PyQKnRMjLxWehhWwE67G8w7Q-1h7EPoL0,22212
71
72
  nv_ingest/framework/orchestration/ray/util/pipeline/tools.py,sha256=LQVb8k9jURaxh2Ga44Js_XuYFCbeN4_nLgDmtExovQg,8026
72
73
  nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
73
74
  nv_ingest/framework/orchestration/ray/util/system_tools/memory.py,sha256=ICqY0LLB3hFTZk03iX5yffMSKFH2q_aQomtDVzS_mKw,2228
@@ -96,8 +97,8 @@ nv_ingest/framework/util/service/meta/ingest/__init__.py,sha256=wQSlVx3T14ZgQAt-
96
97
  nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py,sha256=QS3uNxWBl5dIcmIpJKNe8_TLcTUuN2vcKyHeAwa-eSo,1589
97
98
  nv_ingest/framework/util/telemetry/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
98
99
  nv_ingest/framework/util/telemetry/global_stats.py,sha256=nq65pEEdiwjAfGiqsxG1CeQMC96O3CfQxsZuGFCY-ds,4554
99
- nv_ingest-25.7.7.dev20250707.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
100
- nv_ingest-25.7.7.dev20250707.dist-info/METADATA,sha256=6cvQcvjAZv_9WRvsAHUMu_tRKsSI2LLfgBKY5eCioT0,15139
101
- nv_ingest-25.7.7.dev20250707.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
102
- nv_ingest-25.7.7.dev20250707.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
103
- nv_ingest-25.7.7.dev20250707.dist-info/RECORD,,
100
+ nv_ingest-25.8.0rc1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
101
+ nv_ingest-25.8.0rc1.dist-info/METADATA,sha256=6SNNz3Z9ZytUfWSmUO4pSqboflJssA_CACRAUM5dLSQ,15049
102
+ nv_ingest-25.8.0rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
103
+ nv_ingest-25.8.0rc1.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
104
+ nv_ingest-25.8.0rc1.dist-info/RECORD,,