nv-ingest 2025.5.21.dev20250521__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

Files changed (100) hide show
  1. nv_ingest/__init__.py +20 -0
  2. nv_ingest/api/__init__.py +3 -0
  3. nv_ingest/api/main.py +43 -0
  4. nv_ingest/api/v1/__init__.py +3 -0
  5. nv_ingest/api/v1/health.py +114 -0
  6. nv_ingest/api/v1/ingest.py +454 -0
  7. nv_ingest/framework/__init__.py +3 -0
  8. nv_ingest/framework/orchestration/__init__.py +3 -0
  9. nv_ingest/framework/orchestration/ray/__init__.py +3 -0
  10. nv_ingest/framework/orchestration/ray/edges/__init__.py +3 -0
  11. nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +63 -0
  12. nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +73 -0
  13. nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +72 -0
  14. nv_ingest/framework/orchestration/ray/examples/__init__.py +3 -0
  15. nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +408 -0
  16. nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +63 -0
  17. nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +94 -0
  18. nv_ingest/framework/orchestration/ray/primitives/__init__.py +3 -0
  19. nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  20. nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +239 -0
  21. nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +574 -0
  22. nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +1187 -0
  23. nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +346 -0
  24. nv_ingest/framework/orchestration/ray/stages/__init__.py +3 -0
  25. nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +3 -0
  26. nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +82 -0
  27. nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +92 -0
  28. nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +81 -0
  29. nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +85 -0
  30. nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +57 -0
  31. nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +113 -0
  32. nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +85 -0
  33. nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +90 -0
  34. nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +3 -0
  35. nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +97 -0
  36. nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +3 -0
  37. nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +70 -0
  38. nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +82 -0
  39. nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +59 -0
  40. nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +652 -0
  41. nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +3 -0
  42. nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +85 -0
  43. nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +84 -0
  44. nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +3 -0
  45. nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +41 -0
  46. nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +268 -0
  47. nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +3 -0
  48. nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +502 -0
  49. nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +3 -0
  50. nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +98 -0
  51. nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +81 -0
  52. nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +3 -0
  53. nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +66 -0
  54. nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +3 -0
  55. nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +205 -0
  56. nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +3 -0
  57. nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +81 -0
  58. nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +81 -0
  59. nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +74 -0
  60. nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +3 -0
  61. nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +65 -0
  62. nv_ingest/framework/orchestration/ray/util/__init__.py +3 -0
  63. nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +3 -0
  64. nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +989 -0
  65. nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +195 -0
  66. nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +170 -0
  67. nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +609 -0
  68. nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +3 -0
  69. nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +59 -0
  70. nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +309 -0
  71. nv_ingest/framework/schemas/__init__.py +0 -0
  72. nv_ingest/framework/schemas/framework_ingest_config_schema.py +54 -0
  73. nv_ingest/framework/schemas/framework_job_counter_schema.py +12 -0
  74. nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +18 -0
  75. nv_ingest/framework/schemas/framework_message_broker_source_schema.py +19 -0
  76. nv_ingest/framework/schemas/framework_message_wrapper_schema.py +5 -0
  77. nv_ingest/framework/schemas/framework_metadata_injector_schema.py +15 -0
  78. nv_ingest/framework/schemas/framework_otel_meter_schema.py +16 -0
  79. nv_ingest/framework/schemas/framework_otel_tracer_schema.py +12 -0
  80. nv_ingest/framework/schemas/framework_processing_job_schema.py +25 -0
  81. nv_ingest/framework/schemas/framework_task_injection_schema.py +15 -0
  82. nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +112 -0
  83. nv_ingest/framework/util/__init__.py +3 -0
  84. nv_ingest/framework/util/flow_control/__init__.py +8 -0
  85. nv_ingest/framework/util/flow_control/filter_by_task.py +227 -0
  86. nv_ingest/framework/util/service/__init__.py +3 -0
  87. nv_ingest/framework/util/service/impl/__init__.py +3 -0
  88. nv_ingest/framework/util/service/impl/ingest/__init__.py +3 -0
  89. nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +395 -0
  90. nv_ingest/framework/util/service/meta/__init__.py +3 -0
  91. nv_ingest/framework/util/service/meta/ingest/__init__.py +3 -0
  92. nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +41 -0
  93. nv_ingest/framework/util/telemetry/__init__.py +3 -0
  94. nv_ingest/framework/util/telemetry/global_stats.py +145 -0
  95. nv_ingest/version.py +38 -0
  96. nv_ingest-2025.5.21.dev20250521.dist-info/METADATA +263 -0
  97. nv_ingest-2025.5.21.dev20250521.dist-info/RECORD +100 -0
  98. nv_ingest-2025.5.21.dev20250521.dist-info/WHEEL +5 -0
  99. nv_ingest-2025.5.21.dev20250521.dist-info/licenses/LICENSE +201 -0
  100. nv_ingest-2025.5.21.dev20250521.dist-info/top_level.txt +1 -0
@@ -0,0 +1,989 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ import logging
6
+ import math
7
+ from dataclasses import dataclass
8
+
9
+ import numpy as np
10
+ from collections import deque
11
+ from typing import Dict, Any, Deque, List, Tuple, Optional
12
+
13
+ from nv_ingest_api.util.system.hardware_info import SystemResourceProbe
14
+
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # --- Constants ---
19
+ DEFAULT_STAGE_COST_MB = 5000.0 # Fallback memory cost
20
+
21
+
22
+ @dataclass
23
+ class StagePIDProposal:
24
+ """Holds the initial proposal from the PID controller for a single stage."""
25
+
26
+ name: str
27
+ current_replicas: int
28
+ proposed_replicas: int # Initial proposal based on PID / stage rate limit
29
+ # Conservative cost estimate (max(dynamic_avg, static)) used for projections
30
+ conservative_cost_estimate: float
31
+ metrics: Dict[str, Any] # Original metrics for context
32
+
33
+
34
+ class PIDController:
35
+ """
36
+ Calculates initial replica adjustment proposals based on PID control logic.
37
+
38
+ This controller focuses on the core PID algorithm reacting to the error
39
+ between the current state (queue depth) and the desired state (target depth),
40
+ adjusted by an idle penalty. It tracks memory usage per replica to provide
41
+ a dynamic cost estimate for the ResourceConstraintManager.
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ kp: float,
47
+ ki: float,
48
+ kd: float, # Currently unused in delta calculation
49
+ stage_cost_estimates: Dict[str, int], # Static estimates (MB)
50
+ target_queue_depth: int = 0,
51
+ window_size: int = 10,
52
+ penalty_factor: float = 0.0005,
53
+ error_boost_factor: float = 1.5,
54
+ ):
55
+ """
56
+ Initializes the PID controller.
57
+
58
+ Parameters
59
+ ----------
60
+ kp : float
61
+ Proportional gain. Reacts to the current error magnitude.
62
+ ki : float
63
+ Integral gain. Accumulates past errors to eliminate steady-state offsets.
64
+ kd : float
65
+ Derivative gain. Reacts to the rate of change of the error.
66
+ (Currently set to 0 in internal calculations).
67
+ stage_cost_estimates : Dict[str, int]
68
+ Static estimated memory cost (in MB) per replica for each stage.
69
+ Used as a fallback and minimum for dynamic estimates.
70
+ target_queue_depth : int, optional
71
+ Default target queue depth for stages if not specified in metrics,
72
+ by default 0. The PID loop tries to drive the queue depth towards
73
+ this value.
74
+ window_size : int, optional
75
+ Number of recent samples used for dynamic memory cost estimation
76
+ per replica, by default 10.
77
+ penalty_factor : float, optional
78
+ Multiplier applied to the number of consecutive idle cycles for a
79
+ stage. The resulting penalty effectively lowers the target queue
80
+ depth for idle stages, encouraging scale-down, by default 0.5.
81
+ error_boost_factor : float, optional
82
+ Factor to multiply the raw PID delta when the error is positive
83
+ (queue > target), potentially speeding up scale-up response,
84
+ by default 1.5.
85
+ """
86
+ self.kp = kp
87
+ self.ki = ki
88
+ self.kd = 0.0 # Explicitly disable derivative term for now
89
+ self.target_queue_depth = target_queue_depth
90
+ self.error_boost_factor = error_boost_factor
91
+
92
+ # Per-Stage State
93
+ self.stage_cost_estimates = {
94
+ name: float(max(cost, 1.0)) for name, cost in stage_cost_estimates.items() # Ensure float and min 1MB
95
+ }
96
+ self.integral_error: Dict[str, float] = {}
97
+ self.prev_error: Dict[str, float] = {}
98
+ self.memory_history: Dict[str, Deque[float]] = {} # Per-replica memory history (MB)
99
+ self.idle_cycles: Dict[str, int] = {}
100
+
101
+ # Per-Stage Config
102
+ self.window_size = window_size
103
+ self.penalty_factor = penalty_factor
104
+
105
+ # --- Private Methods ---
106
+
107
+ def _initialize_stage_state(self, stage: str) -> None:
108
+ """Initializes controller state variables for a newly seen stage."""
109
+ if stage not in self.integral_error:
110
+ logger.debug(f"[PID-{stage}] Initializing state.")
111
+ self.integral_error[stage] = 0.0
112
+ self.prev_error[stage] = 0.0
113
+ self.memory_history[stage] = deque(maxlen=self.window_size)
114
+ self.idle_cycles[stage] = 0
115
+ # Ensure static cost estimate exists, provide default if missing
116
+ if stage not in self.stage_cost_estimates:
117
+ logger.warning(f"[PID-{stage}] Missing static cost estimate. Using default {DEFAULT_STAGE_COST_MB}MB.")
118
+ self.stage_cost_estimates[stage] = DEFAULT_STAGE_COST_MB
119
+
120
+ def _get_conservative_cost_estimate(self, stage: str) -> float:
121
+ """
122
+ Estimates dynamic memory cost, using static estimate as a floor/max.
123
+
124
+ Returns the maximum of the recent average dynamic cost per replica
125
+ and the static estimate provided during initialization. This provides
126
+ a conservative value for resource projection.
127
+
128
+ Parameters
129
+ ----------
130
+ stage : str
131
+ The name of the stage.
132
+
133
+ Returns
134
+ -------
135
+ float
136
+ The conservative memory cost estimate in MB per replica.
137
+ """
138
+ static_cost = self.stage_cost_estimates.get(stage, DEFAULT_STAGE_COST_MB)
139
+ memory_samples = self.memory_history.get(stage)
140
+
141
+ # Use numpy.mean if samples exist, otherwise fallback to static
142
+ if memory_samples and len(memory_samples) > 0:
143
+ try:
144
+ dynamic_avg = float(np.mean(memory_samples))
145
+ # Use max(dynamic, static) for projection, enforce min 1MB
146
+ cost = max(dynamic_avg, static_cost, 1.0)
147
+ return cost
148
+ except Exception as e:
149
+ logger.error(
150
+ f"[PID-{stage}] Error calculating mean of memory samples: {e}. Falling back to static cost.",
151
+ exc_info=False,
152
+ )
153
+ return max(static_cost, 1.0) # Fallback safely
154
+ return max(static_cost, 1.0) # Fallback to static estimate if no history
155
+
156
+ # --- Public Method ---
157
+
158
+ def calculate_initial_proposals(self, stage_metrics: Dict[str, Dict[str, Any]]) -> Dict[str, StagePIDProposal]:
159
+ """
160
+ Calculates initial, unconstrained replica proposals for each stage.
161
+
162
+ Iterates through each stage, calculates its PID error and delta based
163
+ on queue depth and target, and returns the initial proposals
164
+ without considering global constraints. Includes dynamic cost estimates.
165
+
166
+ Parameters
167
+ ----------
168
+ stage_metrics : Dict[str, Dict[str, Any]]
169
+ Dictionary mapping stage names to their current metrics. Expected keys
170
+ per stage: 'replicas', 'queue_depth'. Optional: 'memory_usage',
171
+ 'target_queue_depth', 'processing', 'min_replicas', 'max_replicas'.
172
+
173
+ Returns
174
+ -------
175
+ Dict[str, StagePIDProposal]
176
+ Dictionary mapping stage names to their initial proposals, including
177
+ current/proposed replicas, cost estimates, and original metrics.
178
+ """
179
+ logger.debug("--- PID Controller: Calculating Initial Proposals ---")
180
+ proposals: Dict[str, StagePIDProposal] = {}
181
+
182
+ for stage, metrics in stage_metrics.items():
183
+ # Ensure state exists and initialize if necessary
184
+ self._initialize_stage_state(stage)
185
+
186
+ # --- Extract data and calculate current memory state ---
187
+ replicas = metrics.get("replicas", 0)
188
+ # Start with static cost as initial guess if no memory_usage provided
189
+ initial_cost_guess = self.stage_cost_estimates.get(stage, DEFAULT_STAGE_COST_MB)
190
+ memory_usage = metrics.get("memory_usage", initial_cost_guess * max(replicas, 1))
191
+ # Calculate memory per replica safely (avoid division by zero)
192
+ current_memory_per_replica = memory_usage / max(replicas, 1.0)
193
+
194
+ # Update memory history *before* calculating the conservative cost for *this* cycle's proposal
195
+ self.memory_history[stage].append(current_memory_per_replica)
196
+ # Recalculate conservative cost *after* updating history for the proposal
197
+ conservative_cost = self._get_conservative_cost_estimate(stage)
198
+
199
+ # --- PID Calculation ---
200
+ queue_depth = metrics.get("queue_depth", 0)
201
+ # Allow target override per stage, else use controller default
202
+ target_queue_depth = metrics.get("target_queue_depth", self.target_queue_depth)
203
+ min_replicas_metric = metrics.get("min_replicas", 0)
204
+ max_replicas_metric = metrics.get("max_replicas", 1) # Default max should likely be higher
205
+
206
+ # Idle penalty calculation
207
+ if queue_depth == 0 and metrics.get("processing", 0) == 0:
208
+ self.idle_cycles[stage] += 1
209
+ else:
210
+ self.idle_cycles[stage] = 0
211
+
212
+ # Limit how much penalty can reduce the effective target below zero
213
+ penalty = min(8, self.penalty_factor * (self.idle_cycles[stage] ** 2.0))
214
+
215
+ # Error calculation (Queue deviation from target, adjusted by idle penalty)
216
+ error = (queue_depth - target_queue_depth) - penalty
217
+
218
+ # Integral term update with basic anti-windup
219
+ # Don't accumulate integral if already at boundary AND error pushes further past boundary
220
+ should_accumulate_integral = True
221
+ if replicas >= max_replicas_metric and error > 0: # At max replicas, still have backlog
222
+ should_accumulate_integral = False
223
+ logger.debug(
224
+ f"[PID-{stage}] At max replicas ({replicas}) with positive error ({error:.2f}), pausing integral."
225
+ )
226
+ elif (
227
+ replicas <= min_replicas_metric and error < 0
228
+ ): # At min replicas, queue is below target (or penalty active)
229
+ should_accumulate_integral = False
230
+ logger.debug(
231
+ f"[PID-{stage}] At min replicas ({replicas}) with negative error ({error:.2f}), pausing integral."
232
+ )
233
+
234
+ if should_accumulate_integral:
235
+ self.integral_error[stage] += error
236
+
237
+ # Update previous error state for potential future derivative use
238
+ self.prev_error[stage] = error
239
+
240
+ # --- Delta Calculation ---
241
+ proportional_term = self.kp * error
242
+ integral_term = self.ki * self.integral_error[stage]
243
+ # derivative_term = self.kd * derivative # Still disabled
244
+
245
+ # Combine terms
246
+ raw_delta = proportional_term + integral_term # + derivative_term
247
+
248
+ # Boost scale-up signals (positive error means queue > target)
249
+ if error > 0:
250
+ boosted_delta = raw_delta * self.error_boost_factor
251
+ logger.debug(f"[PID-{stage}] Boosting positive error delta: {raw_delta:.3f} -> {boosted_delta:.3f}")
252
+ raw_delta = boosted_delta
253
+
254
+ # Round to get integer replica change
255
+ delta_replicas = int(round(raw_delta))
256
+ proposed_replicas = replicas + delta_replicas
257
+
258
+ logger.debug(
259
+ f"[PID-{stage}] R={replicas}, Q={queue_depth}, Tgt={target_queue_depth},"
260
+ f" Idle={self.idle_cycles[stage]}, Pen={penalty:.2f} -> "
261
+ f"Err={error:.2f}, P={proportional_term:.2f}, I={integral_term:.2f}"
262
+ f" (Acc={self.integral_error[stage]:.2f}) -> "
263
+ f"DeltaR={delta_replicas}, RawProp={proposed_replicas}"
264
+ )
265
+
266
+ # --- Create Final Proposal Object for this Stage ---
267
+ proposal = StagePIDProposal(
268
+ name=stage,
269
+ current_replicas=replicas,
270
+ proposed_replicas=proposed_replicas,
271
+ conservative_cost_estimate=conservative_cost, # Use updated cost
272
+ metrics=metrics, # Pass along original metrics
273
+ )
274
+
275
+ proposals[stage] = proposal
276
+
277
+ logger.debug("--- PID Controller: Initial Proposals Calculated ---")
278
+ return proposals
279
+
280
+
281
+ class ResourceConstraintManager:
282
+ """
283
+ Applies global resource constraints and safety checks to initial proposals.
284
+
285
+ Takes the initial replica proposals generated by the PIDController and
286
+ adjusts them based on global limits (max replicas, available CPU cores based
287
+ on affinity, memory budget with safety buffer), and ensures pipeline
288
+ consistency (zero-replica safety). It allocates limited resources
289
+ proportionally if multiple stages request scale-ups simultaneously.
290
+
291
+ If current global memory usage exceeds the effective limit, it aggressively
292
+ scales down stages starting with the highest replica counts.
293
+ """
294
+
295
+ def __init__(
296
+ self,
297
+ max_replicas: int,
298
+ memory_threshold: int,
299
+ estimated_edge_cost_mb: int,
300
+ memory_safety_buffer_fraction: float,
301
+ ):
302
+ """
303
+ Initializes the Resource Constraint Manager using CoreCountDetector.
304
+
305
+ Parameters are the same as before.
306
+ """
307
+ if not (0.0 <= memory_safety_buffer_fraction < 1.0):
308
+ raise ValueError("memory_safety_buffer_fraction must be between 0.0 and 1.0")
309
+
310
+ self.max_replicas = max_replicas
311
+ self.memory_threshold_mb = memory_threshold
312
+ self.estimated_edge_cost_mb = estimated_edge_cost_mb # Keep track, though unused
313
+ self.memory_safety_buffer_fraction = memory_safety_buffer_fraction # Unused
314
+ self.effective_memory_limit_mb = self.memory_threshold_mb
315
+
316
+ core_detector = SystemResourceProbe() # Instantiate the detector
317
+ self.available_cores: Optional[float] = core_detector.get_effective_cores()
318
+ self.core_detection_details: Dict[str, Any] = core_detector.get_details()
319
+
320
+ # Determine a practical replica limit based on cores (optional, but often useful)
321
+ self.core_based_replica_limit: Optional[int] = None
322
+ if self.available_cores is not None and self.available_cores > 0:
323
+ self.core_based_replica_limit = math.floor(self.available_cores)
324
+ else:
325
+ self.core_based_replica_limit = None # Treat as unlimited if detection failed
326
+
327
+ logger.info(
328
+ f"[ConstraintMgr] Initialized. MaxReplicas={max_replicas}, "
329
+ f"EffectiveCoreLimit={self.available_cores:.2f} " # Log the potentially fractional value
330
+ f"(Method: {self.core_detection_details.get('detection_method')}), "
331
+ f"CoreBasedReplicaLimit={self.core_based_replica_limit}, " # Log the derived integer limit
332
+ f"MemThreshold={memory_threshold}MB, "
333
+ f"EffectiveLimit={self.effective_memory_limit_mb:.1f}MB "
334
+ )
335
+ logger.debug(f"[ConstraintMgr] Core detection details: {self.core_detection_details}")
336
+
337
+ # --- Private Methods ---
338
+
339
+ @staticmethod
340
+ def _get_effective_min_replicas(stage_name: str, metrics: Dict[str, Any], pipeline_in_flight: int) -> int:
341
+ """Helper to calculate the effective minimum replicas for a stage."""
342
+ min_replicas_metric = metrics.get("min_replicas", 0)
343
+ # If the pipeline is active globally, enforce a minimum of 1 replica,
344
+ # unless min_replicas dictates higher.
345
+ if pipeline_in_flight > 0:
346
+ return max(1, min_replicas_metric)
347
+ else: # Pipeline is globally idle
348
+ # Allow scaling down to zero ONLY if the pipeline is idle AND min_replicas allows it.
349
+ return min_replicas_metric
350
+
351
+ def _apply_aggressive_memory_scale_down(
352
+ self,
353
+ current_proposals: Dict[str, int],
354
+ initial_proposals_meta: Dict[str, "StagePIDProposal"], # Assuming StagePIDProposal type hint
355
+ current_global_memory_usage: int,
356
+ pipeline_in_flight_global: int,
357
+ ) -> Dict[str, int]:
358
+ """
359
+ If current memory exceeds the effective limit, force scale-downs.
360
+
361
+ Reduces replicas for all stages with > 1 replica
362
+ by 25% (rounded down), ensuring they don't go below their effective minimum
363
+ or 1 replica. This is done in a single pass.
364
+
365
+ Returns:
366
+ Dict[str, int]: Updated replica proposals after aggressive scale-down.
367
+ """
368
+ if current_global_memory_usage <= self.effective_memory_limit_mb:
369
+ return current_proposals
370
+
371
+ memory_overrun = current_global_memory_usage - self.effective_memory_limit_mb
372
+ logger.warning(
373
+ f"[ConstraintMgr] Aggressive Scale-Down Triggered: "
374
+ f"Current Mem ({current_global_memory_usage:.1f}MB) > Effective Limit"
375
+ f" ({self.effective_memory_limit_mb:.1f}MB). "
376
+ f"Need to reduce by {memory_overrun:.1f}MB."
377
+ )
378
+
379
+ adjusted_proposals = current_proposals.copy()
380
+ total_memory_reduced = 0.0
381
+ stages_affected_details = {} # To store details of changes
382
+
383
+ # Iterate through all proposals to apply the 25% reduction if applicable
384
+ for name, current_replicas in current_proposals.items():
385
+ proposal_meta = initial_proposals_meta.get(name)
386
+ if not proposal_meta:
387
+ logger.error(f"[ConstraintMgr] Missing metadata for stage {name} during aggressive scale-down.")
388
+ continue
389
+
390
+ # Determine the effective minimum for this stage (ensuring at least 1)
391
+ effective_min = self._get_effective_min_replicas(name, proposal_meta.metrics, pipeline_in_flight_global)
392
+
393
+ # Cost per replica (assuming proposal_meta.conservative_cost_estimate is for ONE replica)
394
+ # If it's for all current_replicas, you'd divide by current_replicas here.
395
+ cost_per_replica = float(
396
+ proposal_meta.conservative_cost_estimate
397
+ if proposal_meta.conservative_cost_estimate and proposal_meta.conservative_cost_estimate > 0
398
+ else 1e-6
399
+ )
400
+
401
+ if current_replicas > 1: # Only consider stages with more than 1 replica
402
+ # Calculate 25% reduction
403
+ reduction_amount = math.floor(current_replicas * 0.25)
404
+
405
+ # Ensure reduction_amount is at least 1 if current_replicas > 1 and 25% is < 1
406
+ # (e.g., for 2 or 3 replicas, 25% is 0, but we want to reduce by 1 if possible)
407
+ if reduction_amount == 0 and current_replicas > 1:
408
+ reduction_amount = 1
409
+
410
+ if reduction_amount > 0:
411
+ proposed_new_replicas = current_replicas - reduction_amount
412
+
413
+ # Ensure new count doesn't go below the effective minimum (which is at least 1)
414
+ final_new_replicas = max(effective_min, proposed_new_replicas)
415
+
416
+ # Only apply if this actually results in a reduction
417
+ if final_new_replicas < current_replicas:
418
+ replicas_actually_reduced = current_replicas - final_new_replicas
419
+ memory_saved_for_stage = replicas_actually_reduced * cost_per_replica
420
+
421
+ logger.info(
422
+ f"[ConstraintMgr-{name}] Aggressive Scale-Down: Reducing from "
423
+ f"{current_replicas} -> {final_new_replicas} "
424
+ f"(by {replicas_actually_reduced} replicas, target 25% of "
425
+ f"{current_replicas} was {reduction_amount}). "
426
+ f"Est. memory saved: {memory_saved_for_stage:.2f}MB."
427
+ )
428
+ adjusted_proposals[name] = final_new_replicas
429
+ total_memory_reduced += memory_saved_for_stage
430
+ stages_affected_details[name] = {
431
+ "from": current_replicas,
432
+ "to": final_new_replicas,
433
+ "saved_mem": memory_saved_for_stage,
434
+ }
435
+ else:
436
+ logger.debug(
437
+ f"[ConstraintMgr-{name}] Aggressive Scale-Down: No reduction applied. "
438
+ f"Current: {current_replicas}, Target 25% reduction: {reduction_amount}, "
439
+ f"Proposed: {proposed_new_replicas}, Effective Min: {effective_min}."
440
+ )
441
+ else:
442
+ logger.debug(
443
+ f"[ConstraintMgr-{name}] Aggressive Scale-Down: Calculated 25% reduction is 0 for "
444
+ f"{current_replicas} replicas. No change."
445
+ )
446
+ else:
447
+ logger.debug(
448
+ f"[ConstraintMgr-{name}] Aggressive Scale-Down: Stage has {current_replicas} "
449
+ f"replica(s), not eligible for 25% reduction."
450
+ )
451
+
452
+ # After applying reductions, check the new memory overrun
453
+ # This is a projection based on our cost estimates.
454
+ projected_new_global_memory_usage = current_global_memory_usage - total_memory_reduced
455
+ new_memory_overrun = projected_new_global_memory_usage - self.effective_memory_limit_mb
456
+
457
+ if not stages_affected_details:
458
+ logger.warning("[ConstraintMgr] Aggressive Scale-Down: No stages were eligible or changed replicas.")
459
+ elif new_memory_overrun > 0:
460
+ logger.warning(
461
+ f"[ConstraintMgr] Aggressive Scale-Down: Completed. Reduced total {total_memory_reduced:.1f}MB. "
462
+ f"Stages affected: {len(stages_affected_details)}. "
463
+ f"Projected memory still over limit by {new_memory_overrun:.1f}MB."
464
+ # f"Details: {stages_affected_details}" # Potentially too verbose for warning
465
+ )
466
+ else:
467
+ logger.info(
468
+ f"[ConstraintMgr] Aggressive Scale-Down: Completed. Reduced total {total_memory_reduced:.1f}MB. "
469
+ f"Stages affected: {len(stages_affected_details)}. "
470
+ f"Projected memory now below limit (overrun {new_memory_overrun:.1f}MB)."
471
+ # f"Details: {stages_affected_details}" # Potentially too verbose for info
472
+ )
473
+ if stages_affected_details:
474
+ logger.debug(f"[ConstraintMgr] Aggressive Scale-Down Details: {stages_affected_details}")
475
+
476
+ return adjusted_proposals
477
+
478
+ def _apply_global_constraints_proportional(
479
+ self,
480
+ proposals_after_aggressive_sd: Dict[str, int], # Values from PID or after AggressiveMemSD
481
+ initial_proposals_meta: Dict[str, "StagePIDProposal"], # Contains original .current_replicas
482
+ current_global_memory_usage_mb: int,
483
+ current_effective_mins: Dict[str, int], # Effective minimum for each stage
484
+ room_to_scale_up_to_global_caps: bool,
485
+ ) -> Dict[str, int]:
486
+ """
487
+ Applies global replica, core, and memory limits to scale-up intentions.
488
+ (Docstring from previous correct version summarizing the logic is fine)
489
+ """
490
+ final_proposals_this_step = {}
491
+
492
+ if not room_to_scale_up_to_global_caps:
493
+ logger.info(
494
+ "[ConstraintMgr-Proportional] Global scaling beyond effective minimums is RESTRICTED "
495
+ "as SumOfEffectiveMins likely meets/exceeds a global Core/MaxReplica cap. "
496
+ "Proposed increases from initial current values will be nullified."
497
+ )
498
+ for name, prop_meta in initial_proposals_meta.items():
499
+ val_from_prior_phases = proposals_after_aggressive_sd.get(name, prop_meta.current_replicas)
500
+ original_current_replicas = prop_meta.current_replicas
501
+
502
+ if val_from_prior_phases > original_current_replicas:
503
+ final_proposals_this_step[name] = original_current_replicas
504
+ if val_from_prior_phases != original_current_replicas:
505
+ logger.info(
506
+ f"[ConstraintMgr-{name}] Proportional: Scaling restricted. "
507
+ f"Nullified proposed increase from {original_current_replicas} to {val_from_prior_phases}. "
508
+ f"Setting to {original_current_replicas}."
509
+ )
510
+ else:
511
+ final_proposals_this_step[name] = val_from_prior_phases
512
+ return final_proposals_this_step
513
+
514
+ # --- ELSE: room_to_scale_up_to_global_caps is TRUE ---
515
+ # We can proportionally scale *increases above each stage's effective minimum*,
516
+ # up to the global caps. The baseline sum for headroom is sum_of_effective_mins.
517
+
518
+ # Stores (stage_name, proposed_increase_above_eff_min, cost_per_replica)
519
+ upscale_deltas_above_eff_min: List[Tuple[str, int, float]] = []
520
+ total_requested_increase_replicas_above_eff_mins = 0
521
+ total_projected_mem_increase_for_deltas_mb = 0.0
522
+
523
+ # Initialize final_proposals_this_step: each stage starts at its effective minimum,
524
+ # but not less than what aggressive_sd might have proposed (e.g., if agg_sd proposed 0 and eff_min is 0).
525
+ # And not more than what PID/agg_sd proposed if that was already below effective_min.
526
+ # Essentially, the base is max(eff_min, value_from_agg_sd_if_value_is_for_scale_down_or_no_change).
527
+ # More simply: start each stage at its effective_min. The "delta" is how much PID wants *above* that.
528
+
529
+ sum_of_effective_mins_for_baseline = 0
530
+ for name, prop_meta in initial_proposals_meta.items():
531
+ eff_min_for_stage = current_effective_mins[name]
532
+ final_proposals_this_step[name] = eff_min_for_stage # Initialize with effective min
533
+ sum_of_effective_mins_for_baseline += eff_min_for_stage
534
+
535
+ # What did PID (after aggressive_sd) propose for this stage?
536
+ pid_proposed_val = proposals_after_aggressive_sd.get(name, prop_meta.current_replicas)
537
+
538
+ if pid_proposed_val > eff_min_for_stage:
539
+ # This stage wants to scale up beyond its effective minimum.
540
+ increase_delta = pid_proposed_val - eff_min_for_stage
541
+ cost = prop_meta.conservative_cost_estimate
542
+ upscale_deltas_above_eff_min.append((name, increase_delta, cost))
543
+ total_requested_increase_replicas_above_eff_mins += increase_delta
544
+ total_projected_mem_increase_for_deltas_mb += increase_delta * cost
545
+
546
+ logger.debug(
547
+ f"[ConstraintMgr-Proportional] Room to scale. BaselineSum "
548
+ f"(SumOfEffMins)={sum_of_effective_mins_for_baseline}. "
549
+ f"NumStagesRequestingUpscaleAboveEffMin={len(upscale_deltas_above_eff_min)}. "
550
+ f"TotalReplicaIncreaseReqAboveEffMin={total_requested_increase_replicas_above_eff_mins}. "
551
+ f"TotalMemIncreaseForTheseDeltas={total_projected_mem_increase_for_deltas_mb:.2f}MB."
552
+ )
553
+
554
+ reduction_factor = 1.0
555
+ limiting_reasons = []
556
+
557
+ if total_requested_increase_replicas_above_eff_mins <= 0:
558
+ logger.debug(
559
+ "[ConstraintMgr-Proportional] No upscale request beyond effective minimums. "
560
+ "Proposals remain at effective minimums (or prior phase values if lower and valid)."
561
+ )
562
+ # final_proposals_this_step already contains effective minimums.
563
+ # We need to ensure if PID proposed *lower* than effective_min (and eff_min was 0), that's respected.
564
+ # This should be: max(pid_proposed_value, eff_min_for_stage) for each stage.
565
+ for name_check in final_proposals_this_step.keys():
566
+ pid_val = proposals_after_aggressive_sd.get(
567
+ name_check, initial_proposals_meta[name_check].current_replicas
568
+ )
569
+ eff_min_val = current_effective_mins[name_check]
570
+ final_proposals_this_step[name_check] = (
571
+ max(pid_val, eff_min_val) if eff_min_val > 0 else pid_val
572
+ ) # if eff_min is 0, allow PID to go to 0
573
+ return final_proposals_this_step
574
+
575
+ projected_total_replicas_with_deltas = (
576
+ sum_of_effective_mins_for_baseline + total_requested_increase_replicas_above_eff_mins
577
+ )
578
+
579
+ # 1. Max Replicas Config
580
+ if projected_total_replicas_with_deltas > self.max_replicas:
581
+ # Headroom is how many *additional* replicas (beyond sum_of_eff_mins) we can add
582
+ permissible_increase_headroom = max(0, self.max_replicas - sum_of_effective_mins_for_baseline)
583
+ factor = permissible_increase_headroom / total_requested_increase_replicas_above_eff_mins
584
+ reduction_factor = min(reduction_factor, factor)
585
+ limiting_reasons.append(
586
+ f"MaxReplicas (Limit={self.max_replicas}, HeadroomAboveEffMins={permissible_increase_headroom}, "
587
+ f"Factor={factor:.3f})"
588
+ )
589
+
590
+ # 2. Core Based Replica Limit
591
+ if (
592
+ self.core_based_replica_limit is not None
593
+ and projected_total_replicas_with_deltas > self.core_based_replica_limit
594
+ ):
595
+ permissible_increase_headroom = max(0, self.core_based_replica_limit - sum_of_effective_mins_for_baseline)
596
+ factor = permissible_increase_headroom / total_requested_increase_replicas_above_eff_mins
597
+ reduction_factor = min(reduction_factor, factor)
598
+ limiting_reasons.append(
599
+ f"CoreLimit (Limit={self.core_based_replica_limit}, "
600
+ f"HeadroomAboveEffMins={permissible_increase_headroom}, Factor={factor:.3f})"
601
+ )
602
+
603
+ # 3. Memory Limit
604
+ # Memory check is based on current_global_memory_usage_mb + memory_for_the_increase_deltas
605
+ projected_total_global_memory_mb = current_global_memory_usage_mb + total_projected_mem_increase_for_deltas_mb
606
+ if projected_total_global_memory_mb > self.effective_memory_limit_mb:
607
+ # How much memory can we actually add without breaching the effective limit?
608
+ permissible_mem_increase_mb = max(0.0, self.effective_memory_limit_mb - current_global_memory_usage_mb)
609
+ factor_mem = (
610
+ permissible_mem_increase_mb / total_projected_mem_increase_for_deltas_mb
611
+ if total_projected_mem_increase_for_deltas_mb > 1e-9
612
+ else 0.0
613
+ )
614
+ reduction_factor = min(reduction_factor, factor_mem)
615
+ limiting_reasons.append(
616
+ f"MemoryLimit (Factor={factor_mem:.3f}, AvailableMemForIncrease={permissible_mem_increase_mb:.1f}MB)"
617
+ )
618
+
619
+ # Apply reduction to the deltas
620
+ if reduction_factor <= 0.001: # Epsilon for float
621
+ logger.info(
622
+ f"[ConstraintMgr-Proportional] Scale-up beyond effective minimums fully constrained by global limits. "
623
+ f"Reasons: {'; '.join(limiting_reasons) if limiting_reasons else 'None'}. "
624
+ f"Final ReductionFactor={reduction_factor:.3f}."
625
+ " Stages will remain at their effective minimums (or prior phase values if lower and eff_min is 0)."
626
+ )
627
+ # final_proposals_this_step already contains effective minimums.
628
+ # Need to ensure if PID wanted lower than eff_min (and eff_min was 0), that is respected.
629
+ for name_final_check in final_proposals_this_step.keys():
630
+ pid_val_final = proposals_after_aggressive_sd.get(
631
+ name_final_check, initial_proposals_meta[name_final_check].current_replicas
632
+ )
633
+ eff_min_final = current_effective_mins[name_final_check]
634
+ # If effective min is 0, allow PID's value (which could be 0). Otherwise, floor is effective min.
635
+ final_proposals_this_step[name_final_check] = (
636
+ pid_val_final if eff_min_final == 0 else max(pid_val_final, eff_min_final)
637
+ )
638
+
639
+ elif reduction_factor < 1.0:
640
+ logger.info(
641
+ f"[ConstraintMgr-Proportional] Reducing requested scale-up (beyond effective_mins) by "
642
+ f"factor {reduction_factor:.3f}. "
643
+ f"Limiting Factors: {'; '.join(limiting_reasons)}."
644
+ )
645
+ for name, increase_delta_above_eff_min, _ in upscale_deltas_above_eff_min:
646
+ allowed_increase = math.floor(increase_delta_above_eff_min * reduction_factor)
647
+ # Add this allowed increase to the stage's effective minimum
648
+ final_value_for_stage = current_effective_mins[name] + allowed_increase
649
+ final_proposals_this_step[name] = final_value_for_stage
650
+ if allowed_increase != increase_delta_above_eff_min:
651
+ logger.debug(
652
+ f"[ConstraintMgr-{name}] Proportional Adj: EffMin={current_effective_mins[name]}, "
653
+ f"ReqIncreaseAboveEffMin={increase_delta_above_eff_min}, AllowedIncrease={allowed_increase} "
654
+ f"-> FinalVal={final_value_for_stage}"
655
+ )
656
+ else: # reduction_factor is ~1.0, meaning full requested increase (above effective_mins) is allowed
657
+ logger.info(
658
+ "[ConstraintMgr-Proportional] Full requested scale-up (beyond effective_mins) "
659
+ "is permissible by global limits."
660
+ )
661
+ for name, increase_delta_above_eff_min, _ in upscale_deltas_above_eff_min:
662
+ # The full PID-intended value (which came in as proposals_after_aggressive_sd) is applied.
663
+ # Since final_proposals_this_step was initialized with effective_mins,
664
+ # and increase_delta_above_eff_min = pid_proposed_val - eff_min_for_stage,
665
+ # then eff_min_for_stage + increase_delta_above_eff_min = pid_proposed_val.
666
+ pid_intended_val = proposals_after_aggressive_sd.get(
667
+ name, initial_proposals_meta[name].current_replicas
668
+ )
669
+ final_proposals_this_step[name] = (
670
+ pid_intended_val # This effectively applies the PID's full wish for this stage
671
+ )
672
+
673
+ return final_proposals_this_step
674
+
675
+ def _enforce_replica_bounds(
676
+ self, stage_name: str, tentative_replicas: int, metrics: Dict[str, Any], pipeline_in_flight: int
677
+ ) -> int:
678
+ """Enforces per-stage min/max replica bounds and zero-replica safety logic."""
679
+ max_replicas_metric = metrics.get("max_replicas", 1)
680
+ lower_bound = self._get_effective_min_replicas(stage_name, metrics, pipeline_in_flight)
681
+ bounded_replicas = max(lower_bound, tentative_replicas)
682
+ final_replicas = min(bounded_replicas, max_replicas_metric)
683
+
684
+ if final_replicas != tentative_replicas:
685
+ min_replicas_metric = metrics.get("min_replicas", 0)
686
+ logger.debug(
687
+ f"[ConstraintMgr-{stage_name}] Bounds Applied: Tentative={tentative_replicas} ->"
688
+ f" Final={final_replicas} "
689
+ f"(MinConfig={min_replicas_metric}, MaxConfig={max_replicas_metric}, "
690
+ f"EffectiveLowerBound={lower_bound}, PipeInFlight={pipeline_in_flight})"
691
+ )
692
+ elif final_replicas == 0 and lower_bound == 0:
693
+ logger.debug(f"[ConstraintMgr-{stage_name}] Allowing scale to 0: Pipeline Idle and MinReplicas=0.")
694
+
695
+ return final_replicas
696
+
697
+ @staticmethod
698
+ def _apply_global_consistency(
699
+ final_adjustments: Dict[str, int], initial_proposals: Dict[str, StagePIDProposal]
700
+ ) -> Dict:
701
+ """Ensures pipeline doesn't get stuck if one stage scales up from zero."""
702
+ scale_up_from_zero_triggered = any(
703
+ (prop.current_replicas == 0 and final_adjustments.get(name, 0) > 0)
704
+ for name, prop in initial_proposals.items()
705
+ )
706
+
707
+ if scale_up_from_zero_triggered:
708
+ logger.debug("[ConstraintMgr] Wake-up consistency: Ensuring no stages stuck at zero.")
709
+ for name, prop in initial_proposals.items():
710
+ if prop.current_replicas == 0 and final_adjustments.get(name, 0) == 0:
711
+ min_r = prop.metrics.get("min_replicas", 0)
712
+ max_r = prop.metrics.get("max_replicas", 1)
713
+ target = max(1, min_r)
714
+ final_target = min(target, max_r)
715
+ if final_target > 0:
716
+ logger.info(
717
+ f"[ConstraintMgr-{name}] Forcing minimum {final_target} replica due to global wake-up."
718
+ )
719
+ final_adjustments[name] = final_target
720
+
721
+ return final_adjustments
722
+
723
+ def _log_final_constraint_summary(
724
+ self,
725
+ final_adjustments: Dict[str, int],
726
+ initial_proposals: Dict[str, "StagePIDProposal"], # Forward reference
727
+ global_in_flight: int,
728
+ current_global_memory_usage_mb: int,
729
+ num_edges: int,
730
+ sum_of_effective_mins: int,
731
+ can_globally_scale_beyond_effective_mins: bool,
732
+ ) -> None:
733
+ """Logs a structured and readable summary of the final state and limit checks."""
734
+
735
+ final_stage_replicas_total = sum(final_adjustments.values())
736
+ projected_final_memory_mb = sum(
737
+ final_adjustments.get(name, 0) * initial_proposals[name].conservative_cost_estimate
738
+ for name in final_adjustments
739
+ )
740
+ num_queue_actors = num_edges
741
+ total_ray_components_for_info = final_stage_replicas_total + num_queue_actors
742
+
743
+ logger.info("[ConstraintMgr] --- Final Decision & Constraint Summary ---")
744
+
745
+ # --- I. Overall Pipeline State ---
746
+ logger.info(f"[ConstraintMgr] Pipeline Activity: {global_in_flight} tasks in-flight.")
747
+ logger.info(f"[ConstraintMgr] Effective Min Replicas (Sum): {sum_of_effective_mins}")
748
+ logger.info(
749
+ f"[ConstraintMgr] └─ Global Scaling Beyond Mins Permitted? {can_globally_scale_beyond_effective_mins}"
750
+ )
751
+
752
+ # --- II. Final Component Counts ---
753
+ logger.info(f"[ConstraintMgr] Final Stage Replicas: {final_stage_replicas_total} (Target for caps)")
754
+ logger.info(f"[ConstraintMgr] Queue/Edge Actors : {num_queue_actors} (Informational)")
755
+ logger.info(f"[ConstraintMgr] Total Ray Components: {total_ray_components_for_info} (Informational)")
756
+
757
+ # --- III. Resource Limits & Projected Usage (for Stages) ---
758
+ # Configured Limits
759
+ max_r_cfg_str = str(self.max_replicas)
760
+ core_based_limit_str = (
761
+ str(self.core_based_replica_limit) if self.core_based_replica_limit is not None else "N/A"
762
+ )
763
+ eff_mem_limit_str = f"{self.effective_memory_limit_mb:.1f}MB"
764
+
765
+ logger.info("[ConstraintMgr] Global Limits (Stages):")
766
+ logger.info(f"[ConstraintMgr] ├─ MaxTotalReplicas : {max_r_cfg_str}")
767
+ logger.info(
768
+ f"[ConstraintMgr] ├─ CoreBasedRepLimit : {core_based_limit_str} "
769
+ f"(System EffCores: {self.available_cores if self.available_cores is not None else 'N/A'})"
770
+ )
771
+ logger.info(f"[ConstraintMgr] └─ EffectiveMemLimit : {eff_mem_limit_str} ")
772
+
773
+ # Usage vs Limits
774
+ logger.info("[ConstraintMgr] Projected Usage (Stages):")
775
+ logger.info(f"[ConstraintMgr] ├─ Replicas : {final_stage_replicas_total}")
776
+ logger.info(
777
+ f"[ConstraintMgr] └─ Memory : {projected_final_memory_mb:.1f}MB "
778
+ f"(Current: {current_global_memory_usage_mb:.1f}MB)"
779
+ )
780
+
781
+ # --- IV. Limit Adherence Analysis (for Stages) ---
782
+ unexpected_breaches_details = []
783
+
784
+ # 1. Max Stage Replicas
785
+ status_max_r = "OK"
786
+ if final_stage_replicas_total > self.max_replicas:
787
+ if not (sum_of_effective_mins >= self.max_replicas and final_stage_replicas_total <= sum_of_effective_mins):
788
+ status_max_r = f"BREACHED (Final={final_stage_replicas_total} > Limit={self.max_replicas})"
789
+ unexpected_breaches_details.append(f"MaxReplicas: {status_max_r}")
790
+ else:
791
+ status_max_r = f"NOTE: Limit met/exceeded by SumOfMins ({sum_of_effective_mins})"
792
+
793
+ # 2. Core-Based Stage Replica Limit
794
+ status_core_r = "N/A"
795
+ if self.core_based_replica_limit is not None:
796
+ status_core_r = "OK"
797
+ if final_stage_replicas_total > self.core_based_replica_limit:
798
+ if not (
799
+ sum_of_effective_mins >= self.core_based_replica_limit
800
+ and final_stage_replicas_total <= sum_of_effective_mins
801
+ ):
802
+ status_core_r = (
803
+ f"BREACHED (Final={final_stage_replicas_total} > Limit={self.core_based_replica_limit})"
804
+ )
805
+ unexpected_breaches_details.append(f"CoreBasedLimit: {status_core_r}")
806
+ else:
807
+ status_core_r = f"NOTE: Limit met/exceeded by SumOfMins ({sum_of_effective_mins})"
808
+
809
+ # 3. Memory Limit
810
+ tolerance = 0.01 # MB
811
+ status_mem = "OK"
812
+ if projected_final_memory_mb > (self.effective_memory_limit_mb + tolerance):
813
+ status_mem = (
814
+ f"BREACHED (Projected={projected_final_memory_mb:.1f}MB > Limit={self.effective_memory_limit_mb:.1f}MB)"
815
+ )
816
+ unexpected_breaches_details.append(f"MemoryLimit: {status_mem}")
817
+
818
+ logger.info("[ConstraintMgr] Limit Adherence (Stages):")
819
+ logger.info(f"[ConstraintMgr] ├─ MaxTotalReplicas : {status_max_r}")
820
+ logger.info(f"[ConstraintMgr] ├─ CoreBasedRepLimit : {status_core_r}")
821
+ logger.info(f"[ConstraintMgr] └─ EffectiveMemLimit : {status_mem}")
822
+
823
+ if unexpected_breaches_details:
824
+ logger.warning(f"[ConstraintMgr] └─ UNEXPECTED BREACHES: {'; '.join(unexpected_breaches_details)}")
825
+ else:
826
+ logger.info("[ConstraintMgr] └─ All hard caps (beyond tolerated minimums/wake-up) appear respected.")
827
+
828
+ # --- V. Final Decisions Per Stage ---
829
+ logger.info("[ConstraintMgr] Final Decisions (Per Stage):")
830
+ if not final_adjustments:
831
+ logger.info("[ConstraintMgr] └─ No stages to adjust.")
832
+ else:
833
+ # Determine max stage name length for alignment
834
+ max_name_len = 0
835
+ if final_adjustments: # Check if not empty
836
+ max_name_len = max(len(name) for name in final_adjustments.keys())
837
+
838
+ for stage_name, count in sorted(final_adjustments.items()):
839
+ orig_prop = initial_proposals.get(stage_name)
840
+ pid_proposed_str = f"(PID: {orig_prop.proposed_replicas if orig_prop else 'N/A'})"
841
+ current_str = f"(Current: {orig_prop.current_replicas if orig_prop else 'N/A'})"
842
+ min_replicas = self._get_effective_min_replicas(stage_name, orig_prop.metrics, global_in_flight)
843
+ eff_min_str = f"(EffMin: {min_replicas if orig_prop else 'N/A'})"
844
+
845
+ # Basic alignment, can be improved with more sophisticated padding
846
+ logger.info(
847
+ f"[ConstraintMgr] └─ {stage_name:<{max_name_len}} : "
848
+ f"{count:<3} {pid_proposed_str} {current_str} {eff_min_str}"
849
+ )
850
+
851
+ logger.info("[ConstraintMgr] --- Constraint Summary END ---")
852
+
853
+ # --- Public Method ---
854
+
855
+ def apply_constraints(
856
+ self,
857
+ initial_proposals: Dict[str, "StagePIDProposal"],
858
+ global_in_flight: int, # Renamed from global_in_flight
859
+ current_global_memory_usage_mb: int,
860
+ num_edges: int,
861
+ ) -> Dict[str, int]:
862
+ """
863
+ Applies all configured constraints to initial replica proposals.
864
+ (Docstring from previous version is fine)
865
+ """
866
+ logger.info(
867
+ f"[ConstraintMgr] --- Applying Constraints START --- "
868
+ f"GlobalInFlight={global_in_flight}, "
869
+ f"CurrentGlobalMemMB={current_global_memory_usage_mb}, "
870
+ f"NumEdges={num_edges}."
871
+ )
872
+ logger.debug("[ConstraintMgr] Initial Proposals:")
873
+ for name, prop in initial_proposals.items():
874
+ logger.debug(
875
+ f"[ConstraintMgr] Stage '{name}': Current={prop.current_replicas}, "
876
+ f"PIDProposed={prop.proposed_replicas}, CostMB={prop.conservative_cost_estimate:.2f}, "
877
+ f"MinCfg={prop.metrics.get('min_replicas', 'N/A')}, MaxCfg={prop.metrics.get('max_replicas', 'N/A')}"
878
+ )
879
+
880
+ # --- Phase 1: Initialize adjustments from PID proposals ---
881
+ intermediate_adjustments: Dict[str, int] = {
882
+ name: prop.proposed_replicas for name, prop in initial_proposals.items()
883
+ }
884
+ logger.debug(f"[ConstraintMgr] Intermediate Adjustments (Phase 1 - From PID): {intermediate_adjustments}")
885
+
886
+ # --- Phase 2: Aggressive Memory Scale-Down (Optional) ---
887
+ try:
888
+ intermediate_adjustments = self._apply_aggressive_memory_scale_down(
889
+ intermediate_adjustments, initial_proposals, current_global_memory_usage_mb, global_in_flight
890
+ )
891
+ logger.debug(
892
+ "[ConstraintMgr] Intermediate Adjustments (Phase 2 - After Aggressive Mem Scale-Down): "
893
+ f"{intermediate_adjustments}"
894
+ )
895
+ except Exception as e_agg:
896
+ logger.error(f"[ConstraintMgr] Error during aggressive memory scale-down: {e_agg}", exc_info=True)
897
+ intermediate_adjustments = {name: prop.current_replicas for name, prop in initial_proposals.items()}
898
+
899
+ # --- Calculate Current Effective Minimums and Their Sum ---
900
+ current_effective_mins: Dict[str, int] = {}
901
+ sum_of_effective_mins = 0
902
+ for name, prop in initial_proposals.items():
903
+ eff_min = self._get_effective_min_replicas(name, prop.metrics, global_in_flight)
904
+ current_effective_mins[name] = eff_min
905
+ sum_of_effective_mins += eff_min
906
+
907
+ logger.info(
908
+ f"[ConstraintMgr] Calculated Effective Minimums: TotalSum={sum_of_effective_mins}. "
909
+ # f"IndividualMins: {current_effective_mins}" # Can be verbose
910
+ )
911
+
912
+ # --- Determine if Baseline (Sum of Mins) Breaches Global Caps ---
913
+ # This logic determines if we are *allowed* to scale any stage *beyond its own effective minimum*
914
+ # if doing so would contribute to breaching a global cap that's *already threatened by the sum of minimums*.
915
+ can_globally_scale_beyond_effective_mins_due_to_cores = True
916
+ if self.core_based_replica_limit is not None and sum_of_effective_mins >= self.core_based_replica_limit:
917
+ can_globally_scale_beyond_effective_mins_due_to_cores = False
918
+
919
+ can_globally_scale_beyond_effective_mins_due_to_max_r = True
920
+ if sum_of_effective_mins >= self.max_replicas:
921
+ can_globally_scale_beyond_effective_mins_due_to_max_r = False
922
+
923
+ # Combined gatekeeper for proportional scaling logic
924
+ # If either cores or max_replicas cap is hit by sum of mins, we can't scale up further.
925
+ # (Memory is handled slightly differently in proportional scaler - it looks at available headroom for increase)
926
+ can_globally_scale_up_stages = (
927
+ can_globally_scale_beyond_effective_mins_due_to_cores
928
+ and can_globally_scale_beyond_effective_mins_due_to_max_r
929
+ )
930
+
931
+ # --- Phase 3: Apply Global Constraints & Proportional Allocation ---
932
+ try:
933
+ tentative_adjustments_from_prop = self._apply_global_constraints_proportional(
934
+ intermediate_adjustments,
935
+ initial_proposals,
936
+ current_global_memory_usage_mb,
937
+ current_effective_mins,
938
+ can_globally_scale_up_stages, # Use the combined flag
939
+ )
940
+ logger.debug(
941
+ f"[ConstraintMgr] Tentative Adjustments (Phase 3 - After Proportional Allocation): "
942
+ f"{tentative_adjustments_from_prop}"
943
+ )
944
+ except Exception as e_prop:
945
+ logger.error(f"[ConstraintMgr] Error during global proportional allocation: {e_prop}", exc_info=True)
946
+ tentative_adjustments_from_prop = {}
947
+ for name, count in intermediate_adjustments.items(): # Fallback logic
948
+ tentative_adjustments_from_prop[name] = max(count, current_effective_mins.get(name, 0))
949
+
950
+ # --- Phase 4: Enforce Per-Stage Min/Max Replica Bounds ---
951
+ final_adjustments: Dict[str, int] = {}
952
+ for stage_name, proposal_meta in initial_proposals.items():
953
+ replicas_after_proportional = tentative_adjustments_from_prop.get(
954
+ stage_name, proposal_meta.current_replicas
955
+ )
956
+ try:
957
+ bounded_replicas = self._enforce_replica_bounds(
958
+ stage_name, replicas_after_proportional, proposal_meta.metrics, global_in_flight
959
+ )
960
+ final_adjustments[stage_name] = bounded_replicas
961
+ except Exception as e_bounds:
962
+ logger.error(
963
+ f"[ConstraintMgr-{stage_name}] Error during per-stage bound enforcement: {e_bounds}", exc_info=True
964
+ )
965
+ final_adjustments[stage_name] = max(
966
+ proposal_meta.current_replicas, current_effective_mins.get(stage_name, 0)
967
+ )
968
+ logger.debug(f"[ConstraintMgr] Final Adjustments (Phase 4 - After Per-Stage Bounds): {final_adjustments}")
969
+
970
+ # --- Phase 5: Apply Global Consistency (e.g., Wake-up Safety) ---
971
+ try:
972
+ final_adjustments = self._apply_global_consistency(final_adjustments, initial_proposals)
973
+ logger.debug(f"[ConstraintMgr] Final Adjustments (Phase 5 - After Global Consistency): {final_adjustments}")
974
+ except Exception as e_gc:
975
+ logger.error(f"[ConstraintMgr] Error during global consistency application: {e_gc}", exc_info=True)
976
+
977
+ # --- Log Final Summary ---
978
+ self._log_final_constraint_summary(
979
+ final_adjustments,
980
+ initial_proposals,
981
+ global_in_flight,
982
+ current_global_memory_usage_mb,
983
+ num_edges,
984
+ sum_of_effective_mins, # Pass this calculated value
985
+ can_globally_scale_up_stages, # Pass this for context in logging
986
+ )
987
+
988
+ logger.info("[ConstraintMgr] --- Applying Constraints END ---")
989
+ return final_adjustments