nv-ingest 25.7.7.dev20250707__py3-none-any.whl → 25.8.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

@@ -0,0 +1,75 @@
1
+ import os
2
+ import logging
3
+
4
+ logger = logging.getLogger(__name__)
5
+
6
+
7
+ def str_to_bool(value: str) -> bool:
8
+ """
9
+ Convert string to boolean value.
10
+
11
+ Parameters
12
+ ----------
13
+ value : str
14
+ String value to convert
15
+
16
+ Returns
17
+ -------
18
+ bool
19
+ Boolean representation of the string
20
+ """
21
+ return value.strip().lower() in {"1", "true", "yes", "on"}
22
+
23
+
24
+ def get_env_var(name: str, default, var_type=None):
25
+ """
26
+ Get environment variable with type conversion and default value.
27
+
28
+ Parameters
29
+ ----------
30
+ name : str
31
+ Environment variable name
32
+ default : Any
33
+ Default value if environment variable is not set
34
+ var_type : type, optional
35
+ Type to convert to. If None, infers from default value type
36
+
37
+ Returns
38
+ -------
39
+ Any
40
+ Environment variable value converted to the appropriate type
41
+ """
42
+ value = os.environ.get(name)
43
+ if value is None:
44
+ return default
45
+
46
+ # Determine type from default if not explicitly provided
47
+ target_type = var_type or type(default)
48
+
49
+ # Handle boolean conversion specially
50
+ if target_type is bool:
51
+ return str_to_bool(value)
52
+
53
+ # For other types, use direct conversion
54
+ try:
55
+ return target_type(value)
56
+ except (ValueError, TypeError) as e:
57
+ logger.warning(
58
+ f"Failed to convert environment variable {name}='{value}' to \
59
+ {target_type.__name__}. Using default: {default}, error: {e}"
60
+ )
61
+ return default
62
+
63
+
64
+ # Dynamic Memory Scaling Configuration
65
+ DISABLE_DYNAMIC_SCALING = get_env_var("INGEST_DISABLE_DYNAMIC_SCALING", False, bool)
66
+ DYNAMIC_MEMORY_THRESHOLD = get_env_var("INGEST_DYNAMIC_MEMORY_THRESHOLD", 0.75, float)
67
+ DYNAMIC_MEMORY_KP = get_env_var("INGEST_DYNAMIC_MEMORY_KP", 0.2, float)
68
+ DYNAMIC_MEMORY_KI = get_env_var("INGEST_DYNAMIC_MEMORY_KI", 0.01, float)
69
+ DYNAMIC_MEMORY_EMA_ALPHA = get_env_var("INGEST_DYNAMIC_MEMORY_EMA_ALPHA", 0.1, float)
70
+ DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH = get_env_var("INGEST_DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH", 0, int)
71
+ DYNAMIC_MEMORY_PENALTY_FACTOR = get_env_var("INGEST_DYNAMIC_MEMORY_PENALTY_FACTOR", 0.1, float)
72
+ DYNAMIC_MEMORY_ERROR_BOOST_FACTOR = get_env_var("INGEST_DYNAMIC_MEMORY_ERROR_BOOST_FACTOR", 1.5, float)
73
+ DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION = get_env_var(
74
+ "INGEST_DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION", 0.15, float
75
+ )
@@ -6,9 +6,7 @@ import logging
6
6
  import math
7
7
  from dataclasses import dataclass
8
8
 
9
- import numpy as np
10
- from collections import deque
11
- from typing import Dict, Any, Deque, List, Tuple, Optional
9
+ from typing import Dict, Any, List, Tuple, Optional
12
10
 
13
11
  from nv_ingest_api.util.system.hardware_info import SystemResourceProbe
14
12
 
@@ -16,7 +14,7 @@ logging.basicConfig(level=logging.INFO)
16
14
  logger = logging.getLogger(__name__)
17
15
 
18
16
  # --- Constants ---
19
- DEFAULT_STAGE_COST_MB = 5000.0 # Fallback memory cost
17
+ DEFAULT_STAGE_COST_MB = 5_000.0 # Fallback memory cost
20
18
 
21
19
 
22
20
  @dataclass
@@ -46,9 +44,7 @@ class PIDController:
46
44
  kp: float,
47
45
  ki: float,
48
46
  kd: float, # Currently unused in delta calculation
49
- stage_cost_estimates: Dict[str, int], # Static estimates (MB)
50
47
  target_queue_depth: int = 0,
51
- window_size: int = 10,
52
48
  penalty_factor: float = 0.0005,
53
49
  error_boost_factor: float = 1.5,
54
50
  ):
@@ -64,16 +60,10 @@ class PIDController:
64
60
  kd : float
65
61
  Derivative gain. Reacts to the rate of change of the error.
66
62
  (Currently set to 0 in internal calculations).
67
- stage_cost_estimates : Dict[str, int]
68
- Static estimated memory cost (in MB) per replica for each stage.
69
- Used as a fallback and minimum for dynamic estimates.
70
63
  target_queue_depth : int, optional
71
64
  Default target queue depth for stages if not specified in metrics,
72
65
  by default 0. The PID loop tries to drive the queue depth towards
73
66
  this value.
74
- window_size : int, optional
75
- Number of recent samples used for dynamic memory cost estimation
76
- per replica, by default 10.
77
67
  penalty_factor : float, optional
78
68
  Multiplier applied to the number of consecutive idle cycles for a
79
69
  stage. The resulting penalty effectively lowers the target queue
@@ -90,16 +80,11 @@ class PIDController:
90
80
  self.error_boost_factor = error_boost_factor
91
81
 
92
82
  # Per-Stage State
93
- self.stage_cost_estimates = {
94
- name: float(max(cost, 1.0)) for name, cost in stage_cost_estimates.items() # Ensure float and min 1MB
95
- }
96
83
  self.integral_error: Dict[str, float] = {}
97
84
  self.prev_error: Dict[str, float] = {}
98
- self.memory_history: Dict[str, Deque[float]] = {} # Per-replica memory history (MB)
99
85
  self.idle_cycles: Dict[str, int] = {}
100
86
 
101
87
  # Per-Stage Config
102
- self.window_size = window_size
103
88
  self.penalty_factor = penalty_factor
104
89
 
105
90
  # --- Private Methods ---
@@ -110,48 +95,7 @@ class PIDController:
110
95
  logger.debug(f"[PID-{stage}] Initializing state.")
111
96
  self.integral_error[stage] = 0.0
112
97
  self.prev_error[stage] = 0.0
113
- self.memory_history[stage] = deque(maxlen=self.window_size)
114
98
  self.idle_cycles[stage] = 0
115
- # Ensure static cost estimate exists, provide default if missing
116
- if stage not in self.stage_cost_estimates:
117
- logger.warning(f"[PID-{stage}] Missing static cost estimate. Using default {DEFAULT_STAGE_COST_MB}MB.")
118
- self.stage_cost_estimates[stage] = DEFAULT_STAGE_COST_MB
119
-
120
- def _get_conservative_cost_estimate(self, stage: str) -> float:
121
- """
122
- Estimates dynamic memory cost, using static estimate as a floor/max.
123
-
124
- Returns the maximum of the recent average dynamic cost per replica
125
- and the static estimate provided during initialization. This provides
126
- a conservative value for resource projection.
127
-
128
- Parameters
129
- ----------
130
- stage : str
131
- The name of the stage.
132
-
133
- Returns
134
- -------
135
- float
136
- The conservative memory cost estimate in MB per replica.
137
- """
138
- static_cost = self.stage_cost_estimates.get(stage, DEFAULT_STAGE_COST_MB)
139
- memory_samples = self.memory_history.get(stage)
140
-
141
- # Use numpy.mean if samples exist, otherwise fallback to static
142
- if memory_samples and len(memory_samples) > 0:
143
- try:
144
- dynamic_avg = float(np.mean(memory_samples))
145
- # Use max(dynamic, static) for projection, enforce min 1MB
146
- cost = max(dynamic_avg, static_cost, 1.0)
147
- return cost
148
- except Exception as e:
149
- logger.error(
150
- f"[PID-{stage}] Error calculating mean of memory samples: {e}. Falling back to static cost.",
151
- exc_info=False,
152
- )
153
- return max(static_cost, 1.0) # Fallback safely
154
- return max(static_cost, 1.0) # Fallback to static estimate if no history
155
99
 
156
100
  # --- Public Method ---
157
101
 
@@ -167,8 +111,8 @@ class PIDController:
167
111
  ----------
168
112
  stage_metrics : Dict[str, Dict[str, Any]]
169
113
  Dictionary mapping stage names to their current metrics. Expected keys
170
- per stage: 'replicas', 'queue_depth'. Optional: 'memory_usage',
171
- 'target_queue_depth', 'processing', 'min_replicas', 'max_replicas'.
114
+ per stage: 'replicas', 'queue_depth', 'ema_memory_per_replica'.
115
+ Optional: 'target_queue_depth', 'processing', 'min_replicas', 'max_replicas'.
172
116
 
173
117
  Returns
174
118
  -------
@@ -185,16 +129,9 @@ class PIDController:
185
129
 
186
130
  # --- Extract data and calculate current memory state ---
187
131
  replicas = metrics.get("replicas", 0)
188
- # Start with static cost as initial guess if no memory_usage provided
189
- initial_cost_guess = self.stage_cost_estimates.get(stage, DEFAULT_STAGE_COST_MB)
190
- memory_usage = metrics.get("memory_usage", initial_cost_guess * max(replicas, 1))
191
- # Calculate memory per replica safely (avoid division by zero)
192
- current_memory_per_replica = memory_usage / max(replicas, 1.0)
193
-
194
- # Update memory history *before* calculating the conservative cost for *this* cycle's proposal
195
- self.memory_history[stage].append(current_memory_per_replica)
196
- # Recalculate conservative cost *after* updating history for the proposal
197
- conservative_cost = self._get_conservative_cost_estimate(stage)
132
+ # The conservative cost is now the EMA memory passed in from the stats collector.
133
+ # Fallback to a default if not present.
134
+ conservative_cost = metrics.get("ema_memory_per_replica", DEFAULT_STAGE_COST_MB)
198
135
 
199
136
  # --- PID Calculation ---
200
137
  queue_depth = metrics.get("queue_depth", 0)
@@ -296,7 +233,6 @@ class ResourceConstraintManager:
296
233
  self,
297
234
  max_replicas: int,
298
235
  memory_threshold: int,
299
- estimated_edge_cost_mb: int,
300
236
  memory_safety_buffer_fraction: float,
301
237
  ):
302
238
  """
@@ -309,7 +245,6 @@ class ResourceConstraintManager:
309
245
 
310
246
  self.max_replicas = max_replicas
311
247
  self.memory_threshold_mb = memory_threshold
312
- self.estimated_edge_cost_mb = estimated_edge_cost_mb # Keep track, though unused
313
248
  self.memory_safety_buffer_fraction = memory_safety_buffer_fraction # Unused
314
249
  self.effective_memory_limit_mb = self.memory_threshold_mb
315
250
 
@@ -9,6 +9,7 @@ import os
9
9
  from typing import Dict, Any
10
10
 
11
11
  import ray
12
+ from ray import LoggingConfig
12
13
  from pydantic import BaseModel
13
14
 
14
15
  from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import RayPipeline
@@ -47,16 +48,174 @@ def export_config_to_env(ingest_config: Any) -> None:
47
48
  os.environ.update({key.upper(): val for key, val in ingest_config.items()})
48
49
 
49
50
 
51
+ def build_logging_config_from_env() -> LoggingConfig:
52
+ """
53
+ Build Ray LoggingConfig from environment variables.
54
+
55
+ Package-level preset (sets all defaults):
56
+ - INGEST_RAY_LOG_LEVEL: PRODUCTION, DEVELOPMENT, DEBUG. Default: DEVELOPMENT
57
+
58
+ Individual environment variables (override preset defaults):
59
+ - RAY_LOGGING_LEVEL: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL). Default: INFO
60
+ - RAY_LOGGING_ENCODING: Log encoding format (TEXT, JSON). Default: TEXT
61
+ - RAY_LOGGING_ADDITIONAL_ATTRS: Comma-separated list of additional standard logger attributes
62
+ - RAY_DEDUP_LOGS: Enable/disable log deduplication (0/1). Default: 1 (enabled)
63
+ - RAY_LOG_TO_DRIVER: Enable/disable logging to driver (true/false). Default: true
64
+ - RAY_LOGGING_ROTATE_BYTES: Maximum log file size before rotation (bytes). Default: 1GB
65
+ - RAY_LOGGING_ROTATE_BACKUP_COUNT: Number of backup log files to keep. Default: 19
66
+ - RAY_DISABLE_IMPORT_WARNING: Disable Ray import warnings (0/1). Default: 0
67
+ - RAY_USAGE_STATS_ENABLED: Enable/disable usage stats collection (0/1). Default: 1
68
+ """
69
+
70
+ # Apply package-level preset defaults first
71
+ preset_level = os.environ.get("INGEST_RAY_LOG_LEVEL", "DEVELOPMENT").upper()
72
+
73
+ # Define preset configurations
74
+ presets = {
75
+ "PRODUCTION": {
76
+ "RAY_LOGGING_LEVEL": "ERROR",
77
+ "RAY_LOGGING_ENCODING": "TEXT",
78
+ "RAY_LOGGING_ADDITIONAL_ATTRS": "",
79
+ "RAY_DEDUP_LOGS": "1",
80
+ "RAY_LOG_TO_DRIVER": "0", # false
81
+ "RAY_LOGGING_ROTATE_BYTES": "1073741824", # 1GB
82
+ "RAY_LOGGING_ROTATE_BACKUP_COUNT": "9", # 10GB total
83
+ "RAY_DISABLE_IMPORT_WARNING": "1",
84
+ "RAY_USAGE_STATS_ENABLED": "0",
85
+ },
86
+ "DEVELOPMENT": {
87
+ "RAY_LOGGING_LEVEL": "INFO",
88
+ "RAY_LOGGING_ENCODING": "TEXT",
89
+ "RAY_LOGGING_ADDITIONAL_ATTRS": "",
90
+ "RAY_DEDUP_LOGS": "1",
91
+ "RAY_LOG_TO_DRIVER": "1", # true
92
+ "RAY_LOGGING_ROTATE_BYTES": "1073741824", # 1GB
93
+ "RAY_LOGGING_ROTATE_BACKUP_COUNT": "19", # 20GB total
94
+ "RAY_DISABLE_IMPORT_WARNING": "0",
95
+ "RAY_USAGE_STATS_ENABLED": "1",
96
+ },
97
+ "DEBUG": {
98
+ "RAY_LOGGING_LEVEL": "DEBUG",
99
+ "RAY_LOGGING_ENCODING": "JSON",
100
+ "RAY_LOGGING_ADDITIONAL_ATTRS": "name,funcName,lineno",
101
+ "RAY_DEDUP_LOGS": "0",
102
+ "RAY_LOG_TO_DRIVER": "1", # true
103
+ "RAY_LOGGING_ROTATE_BYTES": "536870912", # 512MB
104
+ "RAY_LOGGING_ROTATE_BACKUP_COUNT": "39", # 20GB total
105
+ "RAY_DISABLE_IMPORT_WARNING": "0",
106
+ "RAY_USAGE_STATS_ENABLED": "1",
107
+ },
108
+ }
109
+
110
+ # Validate preset level
111
+ if preset_level not in presets:
112
+ logger.warning(
113
+ f"Invalid INGEST_RAY_LOG_LEVEL '{preset_level}', using DEVELOPMENT. "
114
+ f"Valid presets: {list(presets.keys())}"
115
+ )
116
+ preset_level = "DEVELOPMENT"
117
+
118
+ # Apply preset defaults (only if env var not already set)
119
+ preset_config = presets[preset_level]
120
+ for key, default_value in preset_config.items():
121
+ if key not in os.environ:
122
+ os.environ[key] = default_value
123
+
124
+ logger.info(f"Applied Ray logging preset: {preset_level}")
125
+
126
+ # Get log level from environment, default to INFO
127
+ log_level = os.environ.get("RAY_LOGGING_LEVEL", "INFO").upper()
128
+
129
+ # Validate log level
130
+ valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
131
+ if log_level not in valid_levels:
132
+ logger.warning(f"Invalid RAY_LOGGING_LEVEL '{log_level}', using INFO. Valid levels: {valid_levels}")
133
+ log_level = "INFO"
134
+
135
+ # Get encoding format from environment, default to TEXT
136
+ encoding = os.environ.get("RAY_LOGGING_ENCODING", "TEXT").upper()
137
+
138
+ # Validate encoding
139
+ valid_encodings = ["TEXT", "JSON"]
140
+ if encoding not in valid_encodings:
141
+ logger.warning(f"Invalid RAY_LOGGING_ENCODING '{encoding}', using TEXT. Valid encodings: {valid_encodings}")
142
+ encoding = "TEXT"
143
+
144
+ # Get additional standard logger attributes
145
+ additional_attrs_str = os.environ.get("RAY_LOGGING_ADDITIONAL_ATTRS", "")
146
+ additional_log_standard_attrs = []
147
+ if additional_attrs_str:
148
+ additional_log_standard_attrs = [attr.strip() for attr in additional_attrs_str.split(",") if attr.strip()]
149
+
150
+ # Set log deduplication environment variable if specified
151
+ dedup_logs = os.environ.get("RAY_DEDUP_LOGS", "1")
152
+ if dedup_logs is not None:
153
+ os.environ["RAY_DEDUP_LOGS"] = str(dedup_logs)
154
+
155
+ # Set log to driver environment variable if specified
156
+ log_to_driver = os.environ.get("RAY_LOG_TO_DRIVER", "1")
157
+ if log_to_driver is not None:
158
+ os.environ["RAY_LOG_TO_DRIVER"] = str(log_to_driver).lower()
159
+
160
+ # Configure log rotation settings
161
+ rotate_bytes = os.environ.get("RAY_LOGGING_ROTATE_BYTES", "1073741824") # Default: 1GB per file
162
+ if rotate_bytes is not None:
163
+ try:
164
+ rotate_bytes_int = int(rotate_bytes)
165
+ os.environ["RAY_LOGGING_ROTATE_BYTES"] = str(rotate_bytes_int)
166
+ except ValueError:
167
+ logger.warning(f"Invalid RAY_LOGGING_ROTATE_BYTES '{rotate_bytes}', using default (1GB)")
168
+ os.environ["RAY_LOGGING_ROTATE_BYTES"] = "1073741824"
169
+
170
+ rotate_backup_count = os.environ.get("RAY_LOGGING_ROTATE_BACKUP_COUNT", "19") # Default: 19 backups (20GB Max)
171
+ if rotate_backup_count is not None:
172
+ try:
173
+ backup_count_int = int(rotate_backup_count)
174
+ os.environ["RAY_LOGGING_ROTATE_BACKUP_COUNT"] = str(backup_count_int)
175
+ except ValueError:
176
+ logger.warning(f"Invalid RAY_LOGGING_ROTATE_BACKUP_COUNT '{rotate_backup_count}', using default (19)")
177
+ os.environ["RAY_LOGGING_ROTATE_BACKUP_COUNT"] = "19"
178
+
179
+ # Configure Ray internal logging verbosity
180
+ disable_import_warning = os.environ.get("RAY_DISABLE_IMPORT_WARNING", "0")
181
+ if disable_import_warning is not None:
182
+ os.environ["RAY_DISABLE_IMPORT_WARNING"] = str(disable_import_warning)
183
+
184
+ # Configure usage stats collection
185
+ usage_stats_enabled = os.environ.get("RAY_USAGE_STATS_ENABLED", "1")
186
+ if usage_stats_enabled is not None:
187
+ os.environ["RAY_USAGE_STATS_ENABLED"] = str(usage_stats_enabled)
188
+
189
+ # Create LoggingConfig with validated parameters
190
+ logging_config = LoggingConfig(
191
+ encoding=encoding,
192
+ log_level=log_level,
193
+ additional_log_standard_attrs=additional_log_standard_attrs,
194
+ )
195
+
196
+ logger.info(
197
+ f"Ray logging configured: preset={preset_level}, level={log_level}, encoding={encoding}, "
198
+ f"additional_attrs={additional_log_standard_attrs}, "
199
+ f"dedup_logs={os.environ.get('RAY_DEDUP_LOGS', '1')}, "
200
+ f"log_to_driver={os.environ.get('RAY_LOG_TO_DRIVER', 'true')}, "
201
+ f"rotate_bytes={os.environ.get('RAY_LOGGING_ROTATE_BYTES', '1073741824')}, "
202
+ f"rotate_backup_count={os.environ.get('RAY_LOGGING_ROTATE_BACKUP_COUNT', '19')}"
203
+ )
204
+
205
+ return logging_config
206
+
207
+
50
208
  def setup_ingestion_pipeline(pipeline: RayPipeline, ingest_config: Dict[str, Any] = None):
51
209
  # Initialize the pipeline with the configuration
52
210
  if ingest_config:
53
211
  # Export the config to environment variables
54
212
  export_config_to_env(ingest_config)
55
213
 
56
- current_level = logging.getLogger().getEffectiveLevel()
214
+ _ = logging.getLogger().getEffectiveLevel()
215
+ logging_config = build_logging_config_from_env()
57
216
  ray_context = ray.init(
58
217
  namespace="nv_ingest_ray",
59
- logging_level=current_level,
218
+ logging_config=logging_config,
60
219
  ignore_reinit_error=True,
61
220
  dashboard_host="0.0.0.0",
62
221
  dashboard_port=8265,
@@ -23,18 +23,21 @@ from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import (
23
23
  RayPipelineInterface,
24
24
  )
25
25
  from nv_ingest.framework.orchestration.ray.util.pipeline.pipeline_builders import setup_ingestion_pipeline
26
+ from nv_ingest.framework.orchestration.ray.util.env_config import (
27
+ DISABLE_DYNAMIC_SCALING,
28
+ DYNAMIC_MEMORY_THRESHOLD,
29
+ DYNAMIC_MEMORY_KP,
30
+ DYNAMIC_MEMORY_KI,
31
+ DYNAMIC_MEMORY_EMA_ALPHA,
32
+ DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH,
33
+ DYNAMIC_MEMORY_PENALTY_FACTOR,
34
+ DYNAMIC_MEMORY_ERROR_BOOST_FACTOR,
35
+ DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION,
36
+ )
26
37
 
27
38
  logger = logging.getLogger(__name__)
28
39
 
29
40
 
30
- def str_to_bool(value: str) -> bool:
31
- return value.strip().lower() in {"1", "true", "yes", "on"}
32
-
33
-
34
- DISABLE_DYNAMIC_SCALING = str_to_bool(os.environ.get("INGEST_DISABLE_DYNAMIC_SCALING", "false"))
35
- DYNAMIC_MEMORY_THRESHOLD = float(os.environ.get("INGEST_DYNAMIC_MEMORY_THRESHOLD", 0.75))
36
-
37
-
38
41
  class PipelineCreationSchema(BaseModel):
39
42
  """
40
43
  Schema for pipeline creation configuration.
@@ -78,15 +81,17 @@ class PipelineCreationSchema(BaseModel):
78
81
  otel_exporter_otlp_endpoint: str = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "localhost:4317")
79
82
 
80
83
  # OCR settings
81
- paddle_http_endpoint: str = os.getenv("PADDLE_HTTP_ENDPOINT", "https://ai.api.nvidia.com/v1/cv/baidu/paddleocr")
82
- paddle_infer_protocol: str = os.getenv("PADDLE_INFER_PROTOCOL", "http")
84
+ ocr_http_endpoint: str = os.getenv("OCR_HTTP_ENDPOINT", "https://ai.api.nvidia.com/v1/cv/baidu/paddleocr")
85
+ ocr_infer_protocol: str = os.getenv("OCR_INFER_PROTOCOL", "http")
86
+ ocr_model_name: str = os.getenv("OCR_MODEL_NAME", "paddle")
83
87
 
84
88
  # Task queue settings
85
89
  REDIS_INGEST_TASK_QUEUE: str = "ingest_task_queue"
86
90
 
87
91
  # Vision language model settings
88
92
  vlm_caption_endpoint: str = os.getenv(
89
- "VLM_CAPTION_ENDPOINT", "https://ai.api.nvidia.com/v1/gr/nvidia/llama-3.1-nemotron-nano-vl-8b-v1/chat/completions"
93
+ "VLM_CAPTION_ENDPOINT",
94
+ "https://integrate.api.nvidia.com/v1/chat/completions",
90
95
  )
91
96
  vlm_caption_model_name: str = os.getenv("VLM_CAPTION_MODEL_NAME", "nvidia/llama-3.1-nemotron-nano-vl-8b-v1")
92
97
 
@@ -233,7 +238,15 @@ def _launch_pipeline(
233
238
  dynamic_memory_threshold = dynamic_memory_threshold if dynamic_memory_threshold else DYNAMIC_MEMORY_THRESHOLD
234
239
 
235
240
  scaling_config = ScalingConfig(
236
- dynamic_memory_scaling=dynamic_memory_scaling, dynamic_memory_threshold=dynamic_memory_threshold
241
+ dynamic_memory_scaling=dynamic_memory_scaling,
242
+ dynamic_memory_threshold=dynamic_memory_threshold,
243
+ pid_kp=DYNAMIC_MEMORY_KP,
244
+ pid_ki=DYNAMIC_MEMORY_KI,
245
+ pid_ema_alpha=DYNAMIC_MEMORY_EMA_ALPHA,
246
+ pid_target_queue_depth=DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH,
247
+ pid_penalty_factor=DYNAMIC_MEMORY_PENALTY_FACTOR,
248
+ pid_error_boost_factor=DYNAMIC_MEMORY_ERROR_BOOST_FACTOR,
249
+ rcm_memory_safety_buffer_fraction=DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION,
237
250
  )
238
251
 
239
252
  pipeline = RayPipeline(scaling_config=scaling_config)