nv-ingest 2025.8.4.dev20250804__py3-none-any.whl → 2025.12.10.dev20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. nv_ingest/api/__init__.py +6 -0
  2. nv_ingest/api/main.py +2 -0
  3. nv_ingest/api/tracing.py +82 -0
  4. nv_ingest/api/v2/README.md +203 -0
  5. nv_ingest/api/v2/__init__.py +3 -0
  6. nv_ingest/api/v2/ingest.py +1300 -0
  7. nv_ingest/framework/orchestration/execution/__init__.py +3 -0
  8. nv_ingest/framework/orchestration/execution/helpers.py +85 -0
  9. nv_ingest/framework/orchestration/execution/options.py +112 -0
  10. nv_ingest/framework/orchestration/process/__init__.py +3 -0
  11. nv_ingest/framework/orchestration/process/dependent_services.py +84 -0
  12. nv_ingest/framework/orchestration/process/execution.py +495 -0
  13. nv_ingest/framework/orchestration/process/lifecycle.py +214 -0
  14. nv_ingest/framework/orchestration/process/strategies.py +218 -0
  15. nv_ingest/framework/orchestration/process/termination.py +147 -0
  16. nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +3 -3
  17. nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +4 -4
  18. nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +32 -38
  19. nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +5 -5
  20. nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +10 -7
  21. nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +17 -14
  22. nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +11 -6
  23. nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +10 -5
  24. nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +12 -7
  25. nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +22 -10
  26. nv_ingest/framework/orchestration/ray/stages/extractors/ocr_extractor.py +71 -0
  27. nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +19 -15
  28. nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +10 -5
  29. nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +16 -14
  30. nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +16 -13
  31. nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +3 -0
  32. nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +3 -3
  33. nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +92 -4
  34. nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +12 -8
  35. nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +12 -9
  36. nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +4 -4
  37. nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +5 -2
  38. nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +116 -69
  39. nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +79 -11
  40. nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +10 -5
  41. nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +8 -4
  42. nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +17 -7
  43. nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +12 -6
  44. nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +17 -18
  45. nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +21 -14
  46. nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +11 -3
  47. nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +1 -2
  48. nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +33 -326
  49. nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +13 -3
  50. nv_ingest/framework/util/flow_control/udf_intercept.py +352 -0
  51. nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +215 -11
  52. nv_ingest/pipeline/__init__.py +3 -0
  53. nv_ingest/pipeline/config/__init__.py +3 -0
  54. nv_ingest/pipeline/config/loaders.py +229 -0
  55. nv_ingest/pipeline/config/replica_resolver.py +237 -0
  56. nv_ingest/pipeline/default_libmode_pipeline_impl.py +528 -0
  57. nv_ingest/pipeline/default_pipeline_impl.py +557 -0
  58. nv_ingest/pipeline/ingest_pipeline.py +389 -0
  59. nv_ingest/pipeline/pipeline_schema.py +398 -0
  60. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/METADATA +6 -3
  61. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/RECORD +64 -43
  62. nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +0 -359
  63. nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +0 -649
  64. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/WHEEL +0 -0
  65. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/licenses/LICENSE +0 -0
  66. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,229 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ """
6
+ Configuration loading and management functions for pipeline execution.
7
+
8
+ This module provides declarative functions for loading, validating, and applying
9
+ runtime overrides to pipeline configurations, replacing imperative inline logic.
10
+ """
11
+
12
+ import logging
13
+ import yaml
14
+ from typing import Optional
15
+
16
+ from nv_ingest.pipeline.pipeline_schema import PipelineConfigSchema
17
+ from nv_ingest.pipeline.default_libmode_pipeline_impl import DEFAULT_LIBMODE_PIPELINE_YAML
18
+ from nv_ingest.pipeline.default_pipeline_impl import DEFAULT_PIPELINE_YAML
19
+ from nv_ingest.framework.orchestration.execution.options import PipelineRuntimeOverrides
20
+ from nv_ingest_api.util.string_processing.yaml import substitute_env_vars_in_yaml_content
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def load_pipeline_config(config_path: str) -> PipelineConfigSchema:
26
+ """
27
+ Load a pipeline configuration file, substituting environment variables.
28
+
29
+ Parameters
30
+ ----------
31
+ config_path : str
32
+ The path to the YAML configuration file.
33
+
34
+ Returns
35
+ -------
36
+ PipelineConfigSchema
37
+ A validated PipelineConfigSchema object.
38
+
39
+ Raises
40
+ ------
41
+ ValueError
42
+ If the YAML file cannot be parsed after environment variable substitution.
43
+ """
44
+ logger.info(f"Loading pipeline configuration from: {config_path}")
45
+
46
+ # Read the raw YAML file content
47
+ with open(config_path, "r") as f:
48
+ raw_content = f.read()
49
+
50
+ # Substitute all environment variable placeholders using the utility function
51
+ substituted_content = substitute_env_vars_in_yaml_content(raw_content)
52
+
53
+ # Parse the substituted content with PyYAML, with error handling
54
+ try:
55
+ processed_config = yaml.safe_load(substituted_content)
56
+ except yaml.YAMLError as e:
57
+ error_message = (
58
+ f"Failed to parse YAML after environment variable substitution. "
59
+ f"Error: {e}\n\n"
60
+ f"--- Substituted Content ---\n{substituted_content}\n---------------------------"
61
+ )
62
+ raise ValueError(error_message) from e
63
+
64
+ # Pydantic validates the clean, substituted data against the schema
65
+ return PipelineConfigSchema(**processed_config)
66
+
67
+
68
+ def load_default_pipeline_config() -> PipelineConfigSchema:
69
+ """
70
+ Load and validate the embedded default (non-libmode) pipeline configuration.
71
+
72
+ Returns
73
+ -------
74
+ PipelineConfigSchema
75
+ Validated default pipeline configuration.
76
+
77
+ Raises
78
+ ------
79
+ ValueError
80
+ If the default YAML cannot be parsed or validated.
81
+ """
82
+ logger.info("Loading embedded default pipeline configuration")
83
+
84
+ substituted_content = substitute_env_vars_in_yaml_content(DEFAULT_PIPELINE_YAML)
85
+
86
+ try:
87
+ processed_config = yaml.safe_load(substituted_content)
88
+ except yaml.YAMLError as e:
89
+ error_message = (
90
+ f"Failed to parse embedded default pipeline YAML after environment variable substitution. Error: {e}"
91
+ )
92
+ raise ValueError(error_message) from e
93
+
94
+ return PipelineConfigSchema(**processed_config)
95
+
96
+
97
+ def load_default_libmode_config() -> PipelineConfigSchema:
98
+ """
99
+ Load and validate the default libmode pipeline configuration.
100
+
101
+ This function loads the embedded default libmode pipeline YAML,
102
+ performs environment variable substitution, and returns a validated
103
+ configuration object.
104
+
105
+ Returns
106
+ -------
107
+ PipelineConfigSchema
108
+ Validated default libmode pipeline configuration.
109
+
110
+ Raises
111
+ ------
112
+ ValueError
113
+ If the default YAML cannot be parsed or validated.
114
+ """
115
+ logger.info("Loading default libmode pipeline configuration")
116
+
117
+ # Substitute environment variables in the YAML content
118
+ substituted_content = substitute_env_vars_in_yaml_content(DEFAULT_LIBMODE_PIPELINE_YAML)
119
+
120
+ # Parse the substituted content with PyYAML
121
+ try:
122
+ processed_config = yaml.safe_load(substituted_content)
123
+ except yaml.YAMLError as e:
124
+ error_message = (
125
+ f"Failed to parse default libmode pipeline YAML after environment variable substitution. " f"Error: {e}"
126
+ )
127
+ raise ValueError(error_message) from e
128
+
129
+ # Create and return validated PipelineConfigSchema
130
+ return PipelineConfigSchema(**processed_config)
131
+
132
+
133
+ def apply_runtime_overrides(config: PipelineConfigSchema, overrides: PipelineRuntimeOverrides) -> PipelineConfigSchema:
134
+ """
135
+ Apply runtime parameter overrides to a pipeline configuration.
136
+
137
+ This function creates a copy of the provided configuration and applies
138
+ any non-None override values to the pipeline runtime settings.
139
+
140
+ Parameters
141
+ ----------
142
+ config : PipelineConfigSchema
143
+ Base pipeline configuration to modify.
144
+ overrides : PipelineRuntimeOverrides
145
+ Runtime overrides to apply. Only non-None values are applied.
146
+
147
+ Returns
148
+ -------
149
+ PipelineConfigSchema
150
+ Modified configuration with overrides applied.
151
+ """
152
+ # Create a copy to avoid modifying the original
153
+ modified_config = config.model_copy(deep=True)
154
+
155
+ # Apply overrides if provided
156
+ if overrides.disable_dynamic_scaling is not None:
157
+ modified_config.pipeline.disable_dynamic_scaling = overrides.disable_dynamic_scaling
158
+ logger.debug(f"Applied dynamic scaling override: {overrides.disable_dynamic_scaling}")
159
+
160
+ if overrides.dynamic_memory_threshold is not None:
161
+ modified_config.pipeline.dynamic_memory_threshold = overrides.dynamic_memory_threshold
162
+ logger.debug(f"Applied memory threshold override: {overrides.dynamic_memory_threshold}")
163
+
164
+ return modified_config
165
+
166
+
167
+ def validate_pipeline_config(config: Optional[PipelineConfigSchema]) -> PipelineConfigSchema:
168
+ """
169
+ Validate and ensure a pipeline configuration is available.
170
+
171
+ This function ensures that a valid pipeline configuration is available,
172
+ either from the provided config or by loading the default libmode config.
173
+
174
+ Parameters
175
+ ----------
176
+ config : Optional[PipelineConfigSchema]
177
+ Pipeline configuration to validate, or None to load default.
178
+
179
+ Returns
180
+ -------
181
+ PipelineConfigSchema
182
+ Validated pipeline configuration.
183
+
184
+ Raises
185
+ ------
186
+ ValueError
187
+ If config is None and default config cannot be loaded.
188
+ """
189
+ if config is None:
190
+ return load_default_libmode_config()
191
+
192
+ # Config is already validated by Pydantic, just return it
193
+ return config
194
+
195
+
196
+ def resolve_pipeline_config(provided_config: Optional[PipelineConfigSchema], libmode: bool) -> PipelineConfigSchema:
197
+ """
198
+ Resolve the final pipeline configuration from inputs.
199
+
200
+ This function implements the configuration resolution logic:
201
+ - If config provided: use it
202
+ - If libmode=True and no config: load default libmode config
203
+ - If libmode=False and no config: raise error
204
+
205
+ Parameters
206
+ ----------
207
+ provided_config : Optional[PipelineConfigSchema]
208
+ User-provided pipeline configuration, or None.
209
+ libmode : bool
210
+ Whether to allow loading default libmode configuration.
211
+
212
+ Returns
213
+ -------
214
+ PipelineConfigSchema
215
+ Resolved and validated pipeline configuration.
216
+
217
+ Raises
218
+ ------
219
+ ValueError
220
+ If no config provided and libmode=False.
221
+ """
222
+ if provided_config is not None:
223
+ return provided_config
224
+
225
+ if libmode:
226
+ return load_default_libmode_config()
227
+ else:
228
+ # For non-libmode, fall back to embedded default pipeline implementation
229
+ return load_default_pipeline_config()
@@ -0,0 +1,237 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ """
6
+ Runtime replica resolution for static scaling mode.
7
+
8
+ This module provides functionality to resolve replica counts for stages using
9
+ non-static strategies when dynamic scaling is disabled, ensuring total memory
10
+ consumption stays within the static_memory_threshold.
11
+ """
12
+
13
+ import logging
14
+ import os
15
+ from typing import List
16
+ from copy import deepcopy
17
+
18
+ from nv_ingest.pipeline.pipeline_schema import (
19
+ PipelineConfigSchema,
20
+ StageConfig,
21
+ ReplicaCalculationStrategy,
22
+ ReplicaStrategyConfig,
23
+ )
24
+ from nv_ingest_api.util.system.hardware_info import SystemResourceProbe
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ def resolve_static_replicas(pipeline_config: PipelineConfigSchema) -> PipelineConfigSchema:
30
+ """
31
+ Resolve static replica counts for all stages when dynamic scaling is disabled.
32
+
33
+ This function calculates the static replica counts for stages using non-static
34
+ strategies, ensuring the total memory consumption stays within the configured
35
+ static_memory_threshold. If the total exceeds the threshold, all non-static
36
+ stages are scaled down proportionally (minimum 1 replica each).
37
+
38
+ Parameters
39
+ ----------
40
+ pipeline_config : PipelineConfigSchema
41
+ The pipeline configuration with potentially unresolved replica strategies.
42
+
43
+ Returns
44
+ -------
45
+ PipelineConfigSchema
46
+ A new pipeline configuration with all static replica counts resolved.
47
+ """
48
+ # Only resolve if dynamic scaling is disabled
49
+ if not pipeline_config.pipeline.disable_dynamic_scaling:
50
+ logger.debug("Dynamic scaling enabled, skipping static replica resolution")
51
+ return pipeline_config
52
+
53
+ logger.info("Resolving static replica counts for disabled dynamic scaling mode")
54
+
55
+ # Create a deep copy to avoid modifying the original config
56
+ resolved_config = deepcopy(pipeline_config)
57
+
58
+ # Get system resource information
59
+ system_probe = SystemResourceProbe()
60
+ total_memory_mb = system_probe.total_memory_mb
61
+ available_memory_mb = int(total_memory_mb * resolved_config.pipeline.static_memory_threshold)
62
+
63
+ logger.info(
64
+ f"System memory: {total_memory_mb}MB, available for static replicas: {available_memory_mb}MB "
65
+ f"(threshold: {resolved_config.pipeline.static_memory_threshold:.1%})"
66
+ )
67
+
68
+ # Find stages with non-static strategies and calculate their baseline replica counts
69
+ non_static_stages = []
70
+ total_memory_demand_mb = 0
71
+
72
+ for stage in resolved_config.stages:
73
+ if stage.replicas and stage.replicas.static_replicas:
74
+ if isinstance(stage.replicas.static_replicas, ReplicaStrategyConfig):
75
+ strategy_config = stage.replicas.static_replicas
76
+ baseline_replicas = _calculate_baseline_static_replicas(
77
+ stage, strategy_config, system_probe, resolved_config.pipeline.static_memory_threshold
78
+ )
79
+
80
+ memory_per_replica_mb = strategy_config.memory_per_replica_mb or 0
81
+ stage_memory_demand = baseline_replicas * memory_per_replica_mb
82
+
83
+ non_static_stages.append(
84
+ {
85
+ "stage": stage,
86
+ "strategy_config": strategy_config,
87
+ "baseline_replicas": baseline_replicas,
88
+ "memory_per_replica_mb": memory_per_replica_mb,
89
+ "baseline_memory_demand_mb": stage_memory_demand,
90
+ }
91
+ )
92
+
93
+ total_memory_demand_mb += stage_memory_demand
94
+
95
+ logger.debug(
96
+ f"Stage '{stage.name}': {baseline_replicas} replicas × "
97
+ f"{memory_per_replica_mb}MB = {stage_memory_demand}MB"
98
+ )
99
+
100
+ if not non_static_stages:
101
+ logger.info("No stages with non-static strategies found")
102
+ return resolved_config
103
+
104
+ logger.info(f"Total baseline memory demand: {total_memory_demand_mb}MB from {len(non_static_stages)} stages")
105
+
106
+ # Optional bypass of global memory-based scale down via environment variable
107
+ bypass_env = os.getenv("NV_INGEST_BYPASS_STATIC_MEMORY_SCALE_DOWN", "").strip().lower()
108
+ bypass_scale_down = bypass_env in ("1", "true", "yes", "on")
109
+
110
+ # Check if we need to scale down (unless bypassed)
111
+ if bypass_scale_down:
112
+ logger.warning(
113
+ "Bypassing static memory-based replica scale-down due to NV_INGEST_BYPASS_STATIC_MEMORY_SCALE_DOWN"
114
+ )
115
+ scaling_factor = 1.0
116
+ elif total_memory_demand_mb <= available_memory_mb:
117
+ logger.info("Memory demand within threshold, applying baseline replica counts")
118
+ scaling_factor = 1.0
119
+ else:
120
+ # Calculate scaling factor to fit within memory threshold
121
+ scaling_factor = available_memory_mb / total_memory_demand_mb
122
+ logger.warning(
123
+ f"Memory demand exceeds threshold by {((total_memory_demand_mb / available_memory_mb) - 1) * 100:.1f}%, "
124
+ f"scaling down by factor of {scaling_factor:.3f}"
125
+ )
126
+
127
+ # Apply the resolved replica counts
128
+ total_actual_memory_mb = 0
129
+ for stage_info in non_static_stages:
130
+ stage = stage_info["stage"]
131
+ baseline_replicas = stage_info["baseline_replicas"]
132
+ memory_per_replica_mb = stage_info["memory_per_replica_mb"]
133
+
134
+ # Calculate scaled replica count (minimum 1)
135
+ scaled_replicas = max(1, int(baseline_replicas * scaling_factor))
136
+ actual_memory_mb = scaled_replicas * memory_per_replica_mb
137
+ total_actual_memory_mb += actual_memory_mb
138
+
139
+ # Replace the strategy config with a static replica count
140
+ stage.replicas.static_replicas = scaled_replicas
141
+
142
+ logger.info(
143
+ f"Stage '{stage.name}': {baseline_replicas} → {scaled_replicas} replicas " f"({actual_memory_mb}MB)"
144
+ )
145
+
146
+ logger.info(
147
+ f"Total actual memory allocation: {total_actual_memory_mb}MB "
148
+ f"({(total_actual_memory_mb / total_memory_mb) * 100:.1f}% of system memory)"
149
+ )
150
+
151
+ return resolved_config
152
+
153
+
154
+ def _calculate_baseline_static_replicas(
155
+ stage: StageConfig,
156
+ strategy_config: ReplicaStrategyConfig,
157
+ system_probe: SystemResourceProbe,
158
+ static_memory_threshold: float = 0.75,
159
+ ) -> int:
160
+ """
161
+ Calculate the baseline static replica count for a stage based on its strategy.
162
+
163
+ Parameters
164
+ ----------
165
+ stage : StageConfig
166
+ The stage configuration.
167
+ strategy_config : ReplicaStrategyConfig
168
+ The replica strategy configuration.
169
+ system_probe : SystemResourceProbe
170
+ System resource information.
171
+ static_memory_threshold : float, optional
172
+ The global static memory threshold (default: 0.75).
173
+
174
+ Returns
175
+ -------
176
+ int
177
+ The calculated baseline replica count.
178
+ """
179
+ strategy = strategy_config.strategy
180
+
181
+ if strategy == ReplicaCalculationStrategy.STATIC:
182
+ return strategy_config.value or 1
183
+
184
+ elif strategy == ReplicaCalculationStrategy.CPU_PERCENTAGE:
185
+ cpu_percent = strategy_config.cpu_percent or 0.5
186
+ limit = strategy_config.limit or system_probe.cpu_count
187
+ calculated = max(1, int(system_probe.cpu_count * cpu_percent))
188
+ return min(calculated, limit)
189
+
190
+ elif strategy == ReplicaCalculationStrategy.MEMORY_THRESHOLDING:
191
+ # For memory thresholding, use a conservative approach for static mode
192
+ memory_per_replica_mb = strategy_config.memory_per_replica_mb or 1000
193
+ available_memory_mb = int(system_probe.total_memory_mb * 0.7) # Conservative 70%
194
+ calculated = max(1, available_memory_mb // memory_per_replica_mb)
195
+ limit = strategy_config.limit or calculated
196
+ return min(calculated, limit)
197
+
198
+ elif strategy == ReplicaCalculationStrategy.MEMORY_STATIC_GLOBAL_PERCENT:
199
+ # Use the global static memory threshold for calculation
200
+ memory_per_replica_mb = strategy_config.memory_per_replica_mb or 1000
201
+ available_memory_mb = int(system_probe.total_memory_mb * static_memory_threshold)
202
+ calculated = max(1, available_memory_mb // memory_per_replica_mb)
203
+ limit = strategy_config.limit or calculated
204
+ return min(calculated, limit)
205
+
206
+ else:
207
+ logger.warning(f"Unknown replica strategy '{strategy}' for stage '{stage.name}', defaulting to 1 replica")
208
+ return 1
209
+
210
+
211
+ def get_memory_intensive_stages(pipeline_config: PipelineConfigSchema) -> List[str]:
212
+ """
213
+ Identify stages that are memory-intensive and may need special handling.
214
+
215
+ Parameters
216
+ ----------
217
+ pipeline_config : PipelineConfigSchema
218
+ The pipeline configuration.
219
+
220
+ Returns
221
+ -------
222
+ List[str]
223
+ List of stage names that are memory-intensive.
224
+ """
225
+ memory_intensive_stages = []
226
+
227
+ for stage in pipeline_config.stages:
228
+ if stage.replicas and stage.replicas.static_replicas:
229
+ if isinstance(stage.replicas.static_replicas, ReplicaStrategyConfig):
230
+ strategy_config = stage.replicas.static_replicas
231
+ memory_per_replica_mb = strategy_config.memory_per_replica_mb or 0
232
+
233
+ # Consider stages using >5GB per replica as memory-intensive
234
+ if memory_per_replica_mb > 5000:
235
+ memory_intensive_stages.append(stage.name)
236
+
237
+ return memory_intensive_stages