nv-ingest-api 2025.8.13.dev20250813__py3-none-any.whl → 2025.8.15.dev20250815__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (24) hide show
  1. nv_ingest_api/internal/enums/common.py +37 -0
  2. nv_ingest_api/internal/extract/image/image_extractor.py +5 -1
  3. nv_ingest_api/internal/meta/__init__.py +3 -0
  4. nv_ingest_api/internal/meta/udf.py +232 -0
  5. nv_ingest_api/internal/primitives/ingest_control_message.py +63 -22
  6. nv_ingest_api/internal/primitives/tracing/tagging.py +102 -15
  7. nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +40 -4
  8. nv_ingest_api/internal/schemas/meta/udf.py +23 -0
  9. nv_ingest_api/internal/transform/embed_text.py +5 -0
  10. nv_ingest_api/util/exception_handlers/decorators.py +104 -156
  11. nv_ingest_api/util/imports/callable_signatures.py +59 -1
  12. nv_ingest_api/util/imports/dynamic_resolvers.py +53 -5
  13. nv_ingest_api/util/introspection/__init__.py +3 -0
  14. nv_ingest_api/util/introspection/class_inspect.py +145 -0
  15. nv_ingest_api/util/introspection/function_inspect.py +65 -0
  16. nv_ingest_api/util/logging/configuration.py +71 -7
  17. nv_ingest_api/util/string_processing/configuration.py +682 -0
  18. nv_ingest_api/util/string_processing/yaml.py +45 -0
  19. nv_ingest_api/util/system/hardware_info.py +178 -13
  20. {nv_ingest_api-2025.8.13.dev20250813.dist-info → nv_ingest_api-2025.8.15.dev20250815.dist-info}/METADATA +1 -1
  21. {nv_ingest_api-2025.8.13.dev20250813.dist-info → nv_ingest_api-2025.8.15.dev20250815.dist-info}/RECORD +24 -16
  22. {nv_ingest_api-2025.8.13.dev20250813.dist-info → nv_ingest_api-2025.8.15.dev20250815.dist-info}/WHEEL +0 -0
  23. {nv_ingest_api-2025.8.13.dev20250813.dist-info → nv_ingest_api-2025.8.15.dev20250815.dist-info}/licenses/LICENSE +0 -0
  24. {nv_ingest_api-2025.8.13.dev20250813.dist-info → nv_ingest_api-2025.8.15.dev20250815.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,682 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ # flake8: noqa: E541
6
+
7
+ from nv_ingest.pipeline.pipeline_schema import PipelineConfigSchema
8
+ import logging
9
+ import os
10
+ from collections import defaultdict, deque
11
+ from typing import Dict, List, Set
12
+ from nv_ingest_api.util.system.hardware_info import SystemResourceProbe
13
+
14
+ # Optional import for graphviz
15
+ try:
16
+ import graphviz
17
+
18
+ GRAPHVIZ_AVAILABLE = True
19
+ except ImportError:
20
+ GRAPHVIZ_AVAILABLE = False
21
+
22
+ # Optional import for PyArrow
23
+ try:
24
+ import pyarrow as pa
25
+
26
+ PYARROW_AVAILABLE = True
27
+ except ImportError:
28
+ PYARROW_AVAILABLE = False
29
+
30
+ # Optional import for Ray
31
+ try:
32
+ import ray
33
+
34
+ RAY_AVAILABLE = True
35
+ except ImportError:
36
+ RAY_AVAILABLE = False
37
+
38
+ # Color palette for pipeline phases
39
+ PHASE_COLORS = {
40
+ "PRE_PROCESSING": "#e6e0d4",
41
+ "EXTRACTION": "#d4e6e0",
42
+ "POST_PROCESSING": "#e0d4e6",
43
+ "MUTATION": "#d4d4e6",
44
+ "TRANSFORM": "#e6d4d4",
45
+ "RESPONSE": "#e6e6d4",
46
+ }
47
+
48
+ logger = logging.getLogger(__name__)
49
+
50
+
51
+ def pretty_print_pipeline_config(config: PipelineConfigSchema, config_path: str = None) -> str:
52
+ """
53
+ Generate a comprehensive, human-readable representation of a pipeline configuration.
54
+
55
+ This function creates a detailed, formatted string that displays all aspects
56
+ of a pipeline configuration including stages, dependencies, scaling settings,
57
+ and execution topology in a clear, hierarchical format.
58
+
59
+ Parameters
60
+ ----------
61
+ config : PipelineConfigSchema
62
+ The pipeline configuration to format and display.
63
+ config_path : str, optional
64
+ The file path of the configuration file to display in the header.
65
+
66
+ Returns
67
+ -------
68
+ str
69
+ A comprehensive pretty-printed string of the pipeline structure and runtime details.
70
+ """
71
+ output = []
72
+
73
+ # Header with pipeline overview
74
+ output.append("=" * 80)
75
+ output.append(f"🚀 PIPELINE CONFIGURATION: {config.name}")
76
+ if config_path:
77
+ output.append(f"📁 Configuration File: {config_path}")
78
+ output.append(f"📋 Description: {config.description}")
79
+ output.append("=" * 80)
80
+
81
+ # Runtime Configuration Summary
82
+ if config.pipeline:
83
+ output.append("\n⚙️ RUNTIME CONFIGURATION:")
84
+ output.append(f" • Dynamic Scaling: {'Disabled' if config.pipeline.disable_dynamic_scaling else 'Enabled'}")
85
+ output.append(f" • Dynamic Memory Threshold: {config.pipeline.dynamic_memory_threshold:.1%}")
86
+ output.append(f" • Static Memory Threshold: {config.pipeline.static_memory_threshold:.1%}")
87
+ output.append(f" • PID Kp: {config.pipeline.pid_controller.kp}")
88
+ output.append(f" • PID Ki: {config.pipeline.pid_controller.ki}")
89
+ output.append(f" • PID EMA Alpha: {config.pipeline.pid_controller.ema_alpha}")
90
+ output.append(f" • PID Target Queue Depth: {config.pipeline.pid_controller.target_queue_depth}")
91
+ output.append(f" • PID Penalty Factor: {config.pipeline.pid_controller.penalty_factor}")
92
+ output.append(f" • PID Error Boost Factor: {config.pipeline.pid_controller.error_boost_factor}")
93
+
94
+ # System Resource Information
95
+ system_probe = SystemResourceProbe()
96
+ details = system_probe.get_details()
97
+
98
+ output.append("\n🖥️ SYSTEM RESOURCE INFORMATION:")
99
+ output.append(f" • Effective CPU Cores: {system_probe.effective_cores:.2f}")
100
+ output.append(f" • CPU Detection Method: {system_probe.detection_method}")
101
+
102
+ if system_probe.total_memory_mb:
103
+ output.append(f" • Total Memory: {system_probe.total_memory_mb / 1024:.2f} GB")
104
+ output.append(f" • Memory Detection Method: {details.get('memory_detection_method', 'unknown')}")
105
+
106
+ # Show cgroup information if available
107
+ if details.get("cgroup_type"):
108
+ output.append(f" • Container Runtime: {details['cgroup_type']} cgroups detected")
109
+ if details.get("cgroup_quota_cores"):
110
+ output.append(f" • CPU Limit (cgroup): {details['cgroup_quota_cores']:.2f} cores")
111
+ if details.get("cgroup_memory_limit_bytes"):
112
+ cgroup_memory_gb = details["cgroup_memory_limit_bytes"] / (1024**3)
113
+ output.append(f" • Memory Limit (cgroup): {cgroup_memory_gb:.2f} GB")
114
+ else:
115
+ output.append(" • Container Runtime: No cgroup limits detected (bare metal/VM)")
116
+
117
+ # Show static memory threshold if dynamic scaling is disabled
118
+ if config.pipeline.disable_dynamic_scaling:
119
+ threshold = config.pipeline.static_memory_threshold
120
+ available_memory_gb = (system_probe.total_memory_mb or 0) * threshold / 1024
121
+ output.append(
122
+ f" • Static Memory Threshold: {threshold:.1%} ({available_memory_gb:.2f} GB available for replicas)"
123
+ )
124
+
125
+ # PyArrow Configuration Information
126
+ if PYARROW_AVAILABLE:
127
+ output.append("\n🏹 PYARROW CONFIGURATION:")
128
+
129
+ # Get default memory pool type from environment or PyArrow
130
+ arrow_memory_pool_env = os.environ.get("ARROW_DEFAULT_MEMORY_POOL")
131
+
132
+ try:
133
+ # Get actual memory pool information
134
+ default_pool = pa.default_memory_pool()
135
+ try:
136
+ # Get memory pool type using backend_name property
137
+ pool_type = default_pool.backend_name
138
+ except AttributeError:
139
+ # Fallback to class name parsing for older PyArrow versions
140
+ pool_type = type(default_pool).__name__.replace("MemoryPool", "").lower()
141
+
142
+ # Get pool statistics if available
143
+ pool_bytes_allocated = getattr(default_pool, "bytes_allocated", lambda: 0)()
144
+ pool_max_memory = getattr(default_pool, "max_memory", lambda: -1)()
145
+
146
+ output.append(f" • Default Memory Pool: {pool_type}")
147
+ output.append(f" • Environment Setting: ARROW_DEFAULT_MEMORY_POOL={arrow_memory_pool_env}")
148
+ output.append(f" • Current Allocated: {pool_bytes_allocated / (1024**2):.2f} MB")
149
+
150
+ if pool_max_memory > 0:
151
+ output.append(f" • Max Memory Limit: {pool_max_memory / (1024**2):.2f} MB")
152
+ else:
153
+ output.append(" • Max Memory Limit: No limit set")
154
+
155
+ except Exception as e:
156
+ output.append(f" • Memory Pool: Unable to query ({str(e)})")
157
+
158
+ # Show PyArrow version and build info
159
+ output.append(f" • PyArrow Version: {pa.__version__}")
160
+
161
+ # Check for memory mapping support
162
+ try:
163
+ memory_map_support = hasattr(pa, "memory_map") and hasattr(pa, "create_memory_map")
164
+ output.append(f" • Memory Mapping Support: {'Available' if memory_map_support else 'Not available'}")
165
+ except Exception:
166
+ output.append(" • Memory Mapping Support: Unknown")
167
+
168
+ else:
169
+ output.append("\n🏹 PYARROW CONFIGURATION:")
170
+ output.append(" • PyArrow: Not available (not installed)")
171
+
172
+ # Ray Configuration Information
173
+ if RAY_AVAILABLE:
174
+ output.append("\n⚡ RAY CONFIGURATION:")
175
+
176
+ # Ray version and initialization status
177
+ try:
178
+ output.append(f" • Ray Version: {ray.__version__}")
179
+
180
+ # Check if Ray is initialized
181
+ if ray.is_initialized():
182
+ output.append(" • Ray Status: Initialized")
183
+
184
+ # Get cluster information if available
185
+ try:
186
+ cluster_resources = ray.cluster_resources()
187
+ available_resources = ray.available_resources()
188
+
189
+ total_cpus = cluster_resources.get("CPU", 0)
190
+ available_cpus = available_resources.get("CPU", 0)
191
+ total_memory = cluster_resources.get("memory", 0) / (1024**3) # Convert to GB
192
+ available_memory = available_resources.get("memory", 0) / (1024**3)
193
+
194
+ output.append(f" • Cluster CPUs: {available_cpus:.1f}/{total_cpus:.1f} available")
195
+ if total_memory > 0:
196
+ output.append(f" • Cluster Memory: {available_memory:.2f}/{total_memory:.2f} GB available")
197
+
198
+ except Exception as e:
199
+ output.append(f" • Cluster Resources: Unable to query ({str(e)})")
200
+ else:
201
+ output.append(" • Ray Status: Not initialized")
202
+
203
+ except Exception as e:
204
+ output.append(f" • Ray Status: Error querying ({str(e)})")
205
+
206
+ # Ray environment variables - threading configuration
207
+ ray_env_vars = ["RAY_num_grpc_threads", "RAY_num_server_call_thread", "RAY_worker_num_grpc_internal_threads"]
208
+
209
+ output.append(" • Threading Configuration:")
210
+ for var in ray_env_vars:
211
+ value = os.environ.get(var, "not set")
212
+ output.append(f" - {var}: {value}")
213
+
214
+ # Additional Ray environment variables that might be relevant
215
+ other_ray_vars = [
216
+ "RAY_DEDUP_LOGS",
217
+ "RAY_LOG_TO_DRIVER",
218
+ "RAY_DISABLE_IMPORT_WARNING",
219
+ "RAY_USAGE_STATS_ENABLED",
220
+ ]
221
+
222
+ ray_other_set = []
223
+ for var in other_ray_vars:
224
+ value = os.environ.get(var)
225
+ if value is not None:
226
+ ray_other_set.append(f"{var}={value}")
227
+
228
+ if ray_other_set:
229
+ output.append(" • Other Ray Settings:")
230
+ for setting in ray_other_set:
231
+ output.append(f" - {setting}")
232
+
233
+ else:
234
+ output.append("\n⚡ RAY CONFIGURATION:")
235
+ output.append(" • Ray: Not available (not installed)")
236
+
237
+ # Check if detailed stage configuration should be shown
238
+ show_detailed_stages = logger.isEnabledFor(logging.DEBUG)
239
+
240
+ if show_detailed_stages:
241
+ # Detailed Stage Configuration
242
+ output.append("\n📋 DETAILED STAGE CONFIGURATION:")
243
+ output.append("-" * 60)
244
+
245
+ # Group stages by numeric phase for proper ordering
246
+ phases_by_number = defaultdict(list)
247
+ for stage in config.stages:
248
+ # Extract the actual numeric phase value
249
+ phase_number = stage.phase
250
+ phases_by_number[phase_number].append(stage)
251
+
252
+ # Sort stages within each phase by dependencies and type
253
+ for phase_number in phases_by_number:
254
+ phase_stages = phases_by_number[phase_number]
255
+
256
+ # Simple dependency-aware sorting within phase
257
+ def stage_sort_key(stage):
258
+ # Sources first, then stages with fewer dependencies, then sinks
259
+ type_priority = 0 if stage.type.value == "source" else 2 if stage.type.value == "sink" else 1
260
+ dep_count = len(stage.runs_after) if stage.runs_after else 0
261
+ return (type_priority, dep_count, stage.name)
262
+
263
+ phase_stages.sort(key=stage_sort_key)
264
+ phases_by_number[phase_number] = phase_stages
265
+
266
+ # Display phases in numerical order
267
+ for phase_number in sorted(phases_by_number.keys()):
268
+ phase_stages = phases_by_number[phase_number]
269
+ if not phase_stages:
270
+ continue
271
+
272
+ # Get phase name for display
273
+ first_stage = phase_stages[0]
274
+ phase_name = first_stage.phase.name if hasattr(first_stage.phase, "name") else f"Phase_{first_stage.phase}"
275
+
276
+ output.append(f"\n📊 {phase_name}:")
277
+
278
+ for stage in phase_stages:
279
+ # Stage header with type icon
280
+ stage_icon = "📥" if stage.type.value == "source" else "📤" if stage.type.value == "sink" else "⚙️"
281
+ output.append(f"\n{stage_icon} STAGE: {stage.name}")
282
+ output.append(f" Type: {stage.type.value}")
283
+
284
+ # Actor or callable
285
+ if stage.actor:
286
+ output.append(f" Actor: {stage.actor}")
287
+ elif stage.callable:
288
+ output.append(f" Callable: {stage.callable}")
289
+
290
+ # Phase with better formatting
291
+ phase_display = stage.phase.name if hasattr(stage.phase, "name") else str(stage.phase)
292
+ output.append(f" Phase: {phase_display}")
293
+
294
+ # Scaling configuration - handle both count and percentage based configs
295
+ replica_info = []
296
+ if stage.replicas:
297
+ if stage.replicas.cpu_count_min is not None:
298
+ replica_info.append(f"{stage.replicas.cpu_count_min} min")
299
+ elif stage.replicas.cpu_percent_min is not None:
300
+ replica_info.append(f"{stage.replicas.cpu_percent_min*100:.1f}% min")
301
+
302
+ if stage.replicas.cpu_count_max is not None:
303
+ replica_info.append(f"{stage.replicas.cpu_count_max} max")
304
+ elif stage.replicas.cpu_percent_max is not None:
305
+ replica_info.append(f"{stage.replicas.cpu_percent_max*100:.1f}% max")
306
+
307
+ if replica_info:
308
+ output.append(f" Scaling: {' → '.join(replica_info)} replicas")
309
+ else:
310
+ output.append(f" Scaling: Default")
311
+
312
+ # Dependencies
313
+ if stage.runs_after:
314
+ deps = ", ".join(stage.runs_after)
315
+ output.append(f" Dependencies: {deps}")
316
+ else:
317
+ output.append(f" Dependencies: None (can start immediately)")
318
+
319
+ # Enabled status
320
+ if not stage.enabled:
321
+ output.append(f" Status: ⚠️ DISABLED")
322
+
323
+ # Task filters for callable stages
324
+ if stage.callable and stage.task_filters:
325
+ output.append(f" Task Filters: {stage.task_filters}")
326
+
327
+ # Stage Execution Flow
328
+ output.append("\n🔄 PIPELINE EXECUTION FLOW:")
329
+ output.append("-" * 50)
330
+
331
+ # Group stages by numeric phase for proper ordering - ignore the broken topological sort
332
+ phases_by_number = defaultdict(list)
333
+ for stage in config.stages:
334
+ # Extract the actual numeric phase value
335
+ phase_number = stage.phase
336
+ phases_by_number[phase_number].append(stage)
337
+
338
+ # Sort stages within each phase by dependencies and type
339
+ for phase_number in phases_by_number:
340
+ phase_stages = phases_by_number[phase_number]
341
+
342
+ # Simple dependency-aware sorting within phase
343
+ def stage_sort_key(stage):
344
+ # Sources first, then stages with fewer dependencies, then sinks
345
+ type_priority = 0 if stage.type.value == "source" else 2 if stage.type.value == "sink" else 1
346
+ dep_count = len(stage.runs_after) if stage.runs_after else 0
347
+ return (type_priority, dep_count, stage.name)
348
+
349
+ phase_stages.sort(key=stage_sort_key)
350
+ phases_by_number[phase_number] = phase_stages
351
+
352
+ # Display phases in numerical order
353
+ for phase_number in sorted(phases_by_number.keys()):
354
+ phase_stages = phases_by_number[phase_number]
355
+ if not phase_stages:
356
+ continue
357
+
358
+ # Get phase name for display
359
+ first_stage = phase_stages[0]
360
+ phase_name = first_stage.phase.name if hasattr(first_stage.phase, "name") else f"Phase_{first_stage.phase}"
361
+
362
+ output.append(f"\n📊 {phase_name}:")
363
+
364
+ for stage in phase_stages:
365
+ # Stage info with proper indentation
366
+ stage_icon = "📥" if stage.type.value == "source" else "📤" if stage.type.value == "sink" else "⚙️"
367
+ status_icon = "" if stage.enabled else " ⚠️ DISABLED"
368
+
369
+ # Show dependencies inline for better flow understanding
370
+ deps_info = ""
371
+ if stage.runs_after:
372
+ deps_info = f" (after: {', '.join(stage.runs_after)})"
373
+
374
+ # Add replica information
375
+ replica_info = _get_replica_display_info(stage, config)
376
+
377
+ output.append(f" {stage_icon} {stage.name}{deps_info}{replica_info}{status_icon}")
378
+
379
+ # Pipeline Topology in Execution Order
380
+ output.append("\n🔗 PIPELINE TOPOLOGY (Execution Flow):")
381
+ output.append("-" * 50)
382
+
383
+ # Build a more sophisticated topology view
384
+ edge_map = {}
385
+ reverse_edge_map = {} # to_stage -> [from_stages]
386
+
387
+ for edge in config.edges:
388
+ if edge.from_stage not in edge_map:
389
+ edge_map[edge.from_stage] = []
390
+ edge_map[edge.from_stage].append(edge.to_stage)
391
+
392
+ if edge.to_stage not in reverse_edge_map:
393
+ reverse_edge_map[edge.to_stage] = []
394
+ reverse_edge_map[edge.to_stage].append(edge.from_stage)
395
+
396
+ # Show topology in execution order
397
+ shown_stages = set()
398
+
399
+ def show_stage_connections(stage_name, indent_level=0):
400
+ """Recursively show stage connections in execution order."""
401
+
402
+ if stage_name in shown_stages:
403
+ return
404
+
405
+ shown_stages.add(stage_name)
406
+ indent = " " * indent_level
407
+
408
+ # Find the stage object for type icon
409
+ stage_obj = next((s for s in config.stages if s.name == stage_name), None)
410
+ if stage_obj:
411
+ stage_icon = "📥" if stage_obj.type.value == "source" else "📤" if stage_obj.type.value == "sink" else "⚙️"
412
+ else:
413
+ stage_icon = "❓"
414
+
415
+ # Add replica information
416
+ replica_info = _get_replica_display_info(stage_obj, config)
417
+
418
+ # Show outgoing connections
419
+ if stage_name in edge_map:
420
+ targets = sorted(edge_map[stage_name])
421
+ if len(targets) == 1:
422
+ output.append(f"{indent}{stage_icon} {stage_name}{replica_info} → {targets[0]}")
423
+ # Recursively show the target's connections
424
+ show_stage_connections(targets[0], indent_level)
425
+ else:
426
+ output.append(f"{indent}{stage_icon} {stage_name}{replica_info} → [{', '.join(targets)}]")
427
+ # Show each target's connections
428
+ for target in targets:
429
+ show_stage_connections(target, indent_level + 1)
430
+ else:
431
+ # Terminal stage (no outgoing connections)
432
+ output.append(f"{indent}{stage_icon} {stage_name}{replica_info} (terminal)")
433
+
434
+ # Start with source stages (stages with no incoming edges)
435
+ source_stages = []
436
+ for stage in config.stages:
437
+ if stage.name not in reverse_edge_map and stage.type.value == "source":
438
+ source_stages.append(stage.name)
439
+
440
+ # If no clear sources found, start with all stages that have no dependencies
441
+ if not source_stages:
442
+ for stage in config.stages:
443
+ if stage.name not in reverse_edge_map:
444
+ source_stages.append(stage.name)
445
+
446
+ # Show connections starting from sources
447
+ for source in sorted(source_stages):
448
+ show_stage_connections(source)
449
+
450
+ # Show any remaining stages that weren't connected
451
+ for stage in config.stages:
452
+ if stage.name not in shown_stages:
453
+ stage_icon = "📥" if stage.type.value == "source" else "📤" if stage.type.value == "sink" else "⚙️"
454
+ replica_info = _get_replica_display_info(stage, config)
455
+ output.append(f" {stage_icon} {stage.name}{replica_info} (isolated)")
456
+
457
+ # Summary Statistics
458
+ enabled_stages = [s for s in config.stages if s.enabled]
459
+ disabled_stages = [s for s in config.stages if not s.enabled]
460
+ source_stages = [s for s in enabled_stages if s.type.value == "source"]
461
+ sink_stages = [s for s in enabled_stages if s.type.value == "sink"]
462
+ processing_stages = [s for s in enabled_stages if s.type.value == "stage"]
463
+
464
+ output.append("\n📊 PIPELINE SUMMARY:")
465
+ output.append("-" * 30)
466
+ output.append(f" Total Stages: {len(config.stages)}")
467
+ output.append(f" • Enabled: {len(enabled_stages)}")
468
+ if disabled_stages:
469
+ output.append(f" • Disabled: {len(disabled_stages)}")
470
+ output.append(f" • Sources: {len(source_stages)}")
471
+ output.append(f" • Processing: {len(processing_stages)}")
472
+ output.append(f" • Sinks: {len(sink_stages)}")
473
+ output.append(f" Total Edges: {len(config.edges)}")
474
+ output.append(f" Execution Phases: {len(phases_by_number)}")
475
+
476
+ output.append("\n" + "=" * 80)
477
+ output.append("✅ Pipeline configuration loaded and ready for execution!")
478
+ output.append("=" * 80)
479
+
480
+ return "\n".join(output)
481
+
482
+
483
+ def _get_replica_display_info(stage, config):
484
+ """Generate replica information display for a stage."""
485
+ if not stage or not stage.replicas:
486
+ return " [1 replica]" # Default display
487
+
488
+ replicas = stage.replicas
489
+ replica_parts = []
490
+
491
+ # Check if dynamic scaling is disabled
492
+ dynamic_scaling_disabled = getattr(config.pipeline, "disable_dynamic_scaling", False)
493
+
494
+ if dynamic_scaling_disabled:
495
+ # Static scaling mode - show resolved static replica count
496
+ if hasattr(replicas, "static_replicas") and replicas.static_replicas is not None:
497
+ if isinstance(replicas.static_replicas, int):
498
+ # Resolved static replica count
499
+ replica_parts.append(f"{replicas.static_replicas} static")
500
+ else:
501
+ # Strategy-based (should be resolved by now, but show strategy info)
502
+ strategy_config = replicas.static_replicas
503
+ strategy_name = strategy_config.strategy.value if hasattr(strategy_config, "strategy") else "unknown"
504
+
505
+ # Show strategy details
506
+ strategy_details = []
507
+ if hasattr(strategy_config, "memory_per_replica_mb") and strategy_config.memory_per_replica_mb:
508
+ strategy_details.append(f"{strategy_config.memory_per_replica_mb}MB/replica")
509
+ if hasattr(strategy_config, "cpu_percent") and strategy_config.cpu_percent:
510
+ strategy_details.append(f"{strategy_config.cpu_percent*100:.0f}% CPU")
511
+ if hasattr(strategy_config, "limit") and strategy_config.limit:
512
+ strategy_details.append(f"max {strategy_config.limit}")
513
+
514
+ detail_str = f" ({', '.join(strategy_details)})" if strategy_details else ""
515
+ replica_parts.append(f"static-{strategy_name}{detail_str}")
516
+ else:
517
+ # Fallback to legacy fields for static mode
518
+ if replicas.cpu_count_max is not None:
519
+ replica_parts.append(f"{replicas.cpu_count_max} static")
520
+ elif replicas.cpu_percent_max is not None:
521
+ replica_parts.append(f"{replicas.cpu_percent_max*100:.0f}% static")
522
+ else:
523
+ replica_parts.append("1 static")
524
+ else:
525
+ # Dynamic scaling mode - show min-max range with strategy details
526
+ min_val = "0"
527
+ max_info = "?"
528
+
529
+ # Get min replicas
530
+ if hasattr(replicas, "min_replicas") and replicas.min_replicas is not None:
531
+ min_val = str(replicas.min_replicas)
532
+ elif replicas.cpu_count_min is not None:
533
+ min_val = str(replicas.cpu_count_min)
534
+ elif replicas.cpu_percent_min is not None:
535
+ min_val = f"{replicas.cpu_percent_min*100:.0f}%"
536
+
537
+ # Get max replicas with detailed strategy information
538
+ if hasattr(replicas, "max_replicas") and replicas.max_replicas is not None:
539
+ if isinstance(replicas.max_replicas, int):
540
+ max_info = str(replicas.max_replicas)
541
+ else:
542
+ # Strategy-based max replicas - show strategy details
543
+ strategy_config = replicas.max_replicas
544
+ strategy_name = strategy_config.strategy.value if hasattr(strategy_config, "strategy") else "strategy"
545
+
546
+ # Build detailed strategy information
547
+ strategy_details = []
548
+ if hasattr(strategy_config, "memory_per_replica_mb") and strategy_config.memory_per_replica_mb:
549
+ strategy_details.append(f"{strategy_config.memory_per_replica_mb}MB/replica")
550
+ if hasattr(strategy_config, "cpu_percent") and strategy_config.cpu_percent:
551
+ strategy_details.append(f"{strategy_config.cpu_percent*100:.1f}% CPU")
552
+ if hasattr(strategy_config, "value") and strategy_config.value:
553
+ strategy_details.append(f"value={strategy_config.value}")
554
+ if hasattr(strategy_config, "limit") and strategy_config.limit:
555
+ strategy_details.append(f"limit={strategy_config.limit}")
556
+
557
+ if strategy_details:
558
+ max_info = f"{strategy_name} ({', '.join(strategy_details)})"
559
+ else:
560
+ max_info = strategy_name
561
+ elif replicas.cpu_count_max is not None:
562
+ max_info = str(replicas.cpu_count_max)
563
+ elif replicas.cpu_percent_max is not None:
564
+ max_info = f"{replicas.cpu_percent_max*100:.0f}%"
565
+
566
+ # Show scaling range
567
+ replica_parts.append(f"{min_val}→{max_info} dynamic")
568
+
569
+ # Also show static strategy if available for comparison
570
+ if hasattr(replicas, "static_replicas") and replicas.static_replicas is not None:
571
+ if isinstance(replicas.static_replicas, int):
572
+ replica_parts.append(f"static={replicas.static_replicas}")
573
+ else:
574
+ static_strategy = replicas.static_replicas
575
+ static_name = static_strategy.strategy.value if hasattr(static_strategy, "strategy") else "static"
576
+ static_details = []
577
+ if hasattr(static_strategy, "memory_per_replica_mb") and static_strategy.memory_per_replica_mb:
578
+ static_details.append(f"{static_strategy.memory_per_replica_mb}MB/replica")
579
+ if hasattr(static_strategy, "limit") and static_strategy.limit:
580
+ static_details.append(f"limit={static_strategy.limit}")
581
+
582
+ detail_str = f" ({', '.join(static_details)})" if static_details else ""
583
+ replica_parts.append(f"static={static_name}{detail_str}")
584
+
585
+ if replica_parts:
586
+ return f" [{', '.join(replica_parts)}]"
587
+ else:
588
+ return " [1 replica]"
589
+
590
+
591
+ def dump_pipeline_to_graphviz(
592
+ config: PipelineConfigSchema,
593
+ output_path: str,
594
+ ) -> None:
595
+ """
596
+ Generates a Graphviz visualization of the pipeline configuration.
597
+
598
+ Parameters
599
+ ----------
600
+ config : PipelineConfigSchema
601
+ The pipeline configuration object.
602
+ output_path : str
603
+ The path to save the Graphviz DOT file.
604
+ """
605
+ if not GRAPHVIZ_AVAILABLE:
606
+ logger.warning("graphviz is not installed. Skipping graph generation.")
607
+ return
608
+
609
+ dot = graphviz.Digraph(comment=config.name)
610
+ dot.attr(
611
+ "graph",
612
+ rankdir="TB",
613
+ splines="ortho",
614
+ label=f"<{config.name}<BR/><FONT POINT-SIZE='10'>{config.description}</FONT>>",
615
+ labelloc="t",
616
+ fontsize="20",
617
+ )
618
+
619
+ # Group stages by phase for layered layout
620
+ stages_by_phase = {phase: [] for phase in sorted(config.get_phases(), key=lambda p: p.value)}
621
+ for stage in config.stages:
622
+ stages_by_phase[stage.phase].append(stage)
623
+
624
+ # Create nodes within phase subgraphs
625
+ for phase, stages in stages_by_phase.items():
626
+ if not stages:
627
+ continue
628
+ with dot.subgraph(name=f"cluster_{phase.name}") as c:
629
+ phase_color = PHASE_COLORS.get(phase.name, "lightgrey")
630
+ c.attr(label=phase.name, style="filled", color=phase_color)
631
+ for stage in stages:
632
+ # Create a detailed HTML-like label for the node
633
+ enabled_color = "darkgreen" if stage.enabled else "red"
634
+ label = '<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">'
635
+ label += f'<TR><TD COLSPAN="2" BGCOLOR="lightblue"><B>{stage.name}</B></TD></TR>'
636
+ label += (
637
+ f'<TR><TD>Status</TD><TD COLOR="{enabled_color}">'
638
+ f'{"Enabled" if stage.enabled else "Disabled"}</TD></TR>'
639
+ )
640
+ label += f"<TR><TD>Type</TD><TD>{stage.type.value}</TD></TR>"
641
+ label += f"<TR><TD>Actor</TD><TD>{stage.actor}</TD></TR>"
642
+
643
+ # Add replica info
644
+ if stage.replicas:
645
+ for key, value in stage.replicas.model_dump(exclude_none=True).items():
646
+ label += f"<TR><TD>Replica: {key}</TD><TD>{value}</TD></TR>"
647
+
648
+ # Add config info
649
+ if stage.config:
650
+ label += '<TR><TD COLSPAN="2" BGCOLOR="lightgrey"><B>Configuration</B></TD></TR>'
651
+ for key, value in stage.config.items():
652
+ label += f"<TR><TD>{key}</TD><TD>{value}</TD></TR>"
653
+
654
+ label += "</TABLE>>"
655
+ c.node(stage.name, label=label, shape="plaintext")
656
+
657
+ # Add edges for data flow
658
+ for edge in config.edges:
659
+ dot.edge(edge.from_stage, edge.to_stage, penwidth="2")
660
+
661
+ # Add edges for logical dependencies
662
+ for stage in config.stages:
663
+ for dep in stage.runs_after:
664
+ dot.edge(dep, stage.name, style="dashed", color="grey", constraint="false")
665
+
666
+ # Add a legend
667
+ with dot.subgraph(name="cluster_legend") as s:
668
+ s.attr(label="Legend", color="black")
669
+ s.node("data_flow_legend", "Data Flow", shape="plaintext")
670
+ s.node("dependency_legend", "Logical Dependency", shape="plaintext")
671
+ s.edge("data_flow_legend", "dependency_legend", style="invis") # layout hack
672
+ dot.edge("data_flow_legend", "dependency_legend", label="", penwidth="2", style="solid")
673
+ dot.edge("dependency_legend", "data_flow_legend", label="", style="dashed", color="grey", constraint="false")
674
+
675
+ try:
676
+ output_dir = os.path.dirname(output_path)
677
+ if output_dir:
678
+ os.makedirs(output_dir, exist_ok=True)
679
+ dot.save(output_path)
680
+ logger.info(f"Pipeline graph saved to {output_path}")
681
+ except Exception as e:
682
+ logger.error(f"Failed to save pipeline graph: {e}")