nv-ingest 2025.7.8.dev20250708__tar.gz → 2025.7.10.dev20250710__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

Files changed (109) hide show
  1. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/PKG-INFO +1 -1
  2. nv_ingest-2025.7.10.dev20250710/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +353 -0
  3. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +438 -163
  4. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +30 -3
  5. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +159 -230
  6. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +27 -9
  7. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +7 -72
  8. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +2 -1
  9. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +22 -12
  10. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest.egg-info/PKG-INFO +1 -1
  11. nv_ingest-2025.7.8.dev20250708/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -591
  12. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/LICENSE +0 -0
  13. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/MANIFEST.in +0 -0
  14. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/__init__.py +0 -0
  15. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/api/__init__.py +0 -0
  16. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/api/main.py +0 -0
  17. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/api/v1/__init__.py +0 -0
  18. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/api/v1/health.py +0 -0
  19. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/api/v1/ingest.py +0 -0
  20. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/api/v1/metrics.py +0 -0
  21. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/__init__.py +0 -0
  22. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/__init__.py +0 -0
  23. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
  24. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
  25. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
  26. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
  27. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
  28. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
  29. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
  30. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
  31. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
  32. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
  33. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  34. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
  35. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
  36. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
  37. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
  38. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
  39. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
  40. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
  41. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
  42. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
  43. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
  44. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
  45. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
  46. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
  47. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
  48. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
  49. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
  50. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
  51. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
  52. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
  53. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
  54. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
  55. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
  56. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
  57. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
  58. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
  59. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
  60. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
  61. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
  62. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
  63. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
  64. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
  65. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
  66. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
  67. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
  68. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
  69. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
  70. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
  71. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
  72. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
  73. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
  74. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +0 -0
  75. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
  76. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
  77. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
  78. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
  79. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/schemas/__init__.py +0 -0
  80. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
  81. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
  82. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
  83. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
  84. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
  85. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
  86. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
  87. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
  88. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
  89. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
  90. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
  91. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/util/__init__.py +0 -0
  92. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
  93. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
  94. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/util/service/__init__.py +0 -0
  95. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
  96. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
  97. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
  98. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
  99. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
  100. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
  101. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
  102. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
  103. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest/version.py +0 -0
  104. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest.egg-info/SOURCES.txt +0 -0
  105. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest.egg-info/dependency_links.txt +0 -0
  106. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest.egg-info/requires.txt +0 -0
  107. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/nv_ingest.egg-info/top_level.txt +0 -0
  108. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/pyproject.toml +0 -0
  109. {nv_ingest-2025.7.8.dev20250708 → nv_ingest-2025.7.10.dev20250710}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.7.8.dev20250708
3
+ Version: 2025.7.10.dev20250710
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -0,0 +1,353 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ import threading
6
+ import logging
7
+ import contextlib
8
+ import time
9
+ from typing import List, Dict, Tuple, Any, Optional, Iterator, Set
10
+
11
+ import ray
12
+
13
+ # --- Constants ---
14
+ CLEANUP_INTERVAL_SECONDS = 15.0
15
+ PENDING_SHUTDOWN_TIMEOUT_SECONDS = 60.0 * 60
16
+ PENDING_CHECK_ACTOR_METHOD_TIMEOUT = 5.0
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class StageInfo:
22
+ def __init__(
23
+ self,
24
+ name,
25
+ callable,
26
+ config,
27
+ is_source=False,
28
+ is_sink=False,
29
+ min_replicas=0,
30
+ max_replicas=1,
31
+ pending_shutdown=False,
32
+ ):
33
+ self.name = name
34
+ self.callable = callable
35
+ self.config = config
36
+ self.is_source = is_source
37
+ self.is_sink = is_sink
38
+ self.min_replicas = min_replicas
39
+ self.max_replicas = max_replicas
40
+ self.pending_shutdown = pending_shutdown
41
+
42
+
43
+ class PipelineTopology:
44
+ """
45
+ Holds the structural definition and runtime state of the pipeline.
46
+
47
+ Encapsulates stages, connections, actors, queues, and associated state
48
+ with thread-safe access via internal locking.
49
+ """
50
+
51
+ def __init__(self):
52
+ # --- Definition ---
53
+ self._stages: List[StageInfo] = []
54
+ self._connections: Dict[str, List[Tuple[str, int]]] = {}
55
+
56
+ # --- Runtime State ---
57
+ self._stage_actors: Dict[str, List[Any]] = {}
58
+ self._edge_queues: Dict[str, Tuple[Any, int]] = {} # Map: q_name -> (QueueHandle, Capacity)
59
+ self._scaling_state: Dict[str, str] = {} # Map: stage_name -> "Idle" | "Scaling Up" | "Scaling Down" | "Error"
60
+ self._stage_memory_overhead: Dict[str, float] = {} # Populated during build/config
61
+ self._actors_pending_removal: Set[Tuple[str, Any]] = set()
62
+
63
+ # --- Operational State ---
64
+ self._is_flushing: bool = False
65
+
66
+ # --- Synchronization & Threading ---
67
+ self._lock: threading.Lock = threading.Lock()
68
+ self._stop_cleanup = threading.Event()
69
+ self._cleanup_thread = None
70
+
71
+ def __del__(self):
72
+ """Ensure cleanup thread is stopped and internal actor references are released."""
73
+ logger.debug("PipelineTopology destructor called. Cleaning up thread and actor references.")
74
+
75
+ # Stop the background cleanup thread
76
+ try:
77
+ self.stop_cleanup_thread()
78
+ except Exception as e:
79
+ logger.warning(f"Error stopping cleanup thread during __del__: {e}")
80
+
81
+ # Clear references to actor handles and shutdown futures
82
+ try:
83
+ self._stage_actors.clear()
84
+ self._edge_queues.clear()
85
+ self._scaling_state.clear()
86
+ self._stage_memory_overhead.clear()
87
+ self._actors_pending_removal.clear()
88
+ self._stages.clear()
89
+ self._connections.clear()
90
+ except Exception as e:
91
+ logger.warning(f"Error clearing internal state during __del__: {e}")
92
+
93
+ # --- Lock Context Manager ---
94
+ @contextlib.contextmanager
95
+ def lock_context(self) -> Iterator["PipelineTopology"]:
96
+ """Provides safe access to the topology under lock for complex operations."""
97
+ with self._lock:
98
+ yield self
99
+
100
+ # --- Mutator Methods (Write Operations - Use Lock) ---
101
+
102
+ def add_stage(self, stage_info: StageInfo) -> None:
103
+ """Adds a stage definition."""
104
+ with self._lock:
105
+ # Prevent duplicate stage names?
106
+ if any(s.name == stage_info.name for s in self._stages):
107
+ logger.error(f"Attempted to add duplicate stage name: {stage_info.name}")
108
+ raise ValueError(f"Stage name '{stage_info.name}' already exists.")
109
+ self._stages.append(stage_info)
110
+ logger.debug(f"Added stage definition: {stage_info.name}")
111
+
112
+ def add_connection(self, from_stage: str, to_stage: str, queue_size: int) -> None:
113
+ """Adds a connection definition between two stages."""
114
+ with self._lock:
115
+ # Basic validation (more can be added in Pipeline class)
116
+ stage_names = {s.name for s in self._stages}
117
+ if from_stage not in stage_names:
118
+ raise ValueError(f"Source stage '{from_stage}' for connection not found.")
119
+ if to_stage not in stage_names:
120
+ raise ValueError(f"Destination stage '{to_stage}' for connection not found.")
121
+
122
+ self._connections.setdefault(from_stage, []).append((to_stage, queue_size))
123
+ logger.debug(f"Added connection definition: {from_stage} -> {to_stage} (q_size={queue_size})")
124
+
125
+ def set_actors_for_stage(self, stage_name: str, actors: List[Any]) -> None:
126
+ """Sets the list of actors for a given stage, resetting scaling state."""
127
+ with self._lock:
128
+ if stage_name not in {s.name for s in self._stages}:
129
+ logger.warning(f"Attempted to set actors for unknown stage: {stage_name}")
130
+ return # Or raise error?
131
+ self._stage_actors[stage_name] = actors
132
+ self._scaling_state[stage_name] = "Idle" # Initialize/reset state
133
+ logger.debug(f"Set {len(actors)} actors for stage '{stage_name}'. State set to Idle.")
134
+
135
+ def add_actor_to_stage(self, stage_name: str, actor: Any) -> None:
136
+ """Adds a single actor to a stage's list."""
137
+ with self._lock:
138
+ if stage_name not in self._stage_actors:
139
+ # This might happen if stage has 0 min_replicas and is scaled up first time
140
+ self._stage_actors[stage_name] = []
141
+ self._scaling_state[stage_name] = "Idle" # Ensure state exists
142
+ logger.debug(f"Initialized actor list for stage '{stage_name}' during add.")
143
+ self._stage_actors[stage_name].append(actor)
144
+ logger.debug(f"Added actor to stage '{stage_name}'. New count: {len(self._stage_actors[stage_name])}")
145
+
146
+ def remove_actors_from_stage(self, stage_name: str, actors_to_remove: List[Any]) -> List[Any]:
147
+ """
148
+ Removes specific actors from a stage's list immediately.
149
+ Called by the cleanup thread or potentially for forced removal.
150
+ """
151
+ removed = []
152
+ # Assumes lock is already held by caller (e.g., cleanup thread or lock_context)
153
+ if stage_name not in self._stage_actors:
154
+ logger.warning(
155
+ f"[Topology-InternalRemove] Attempted to remove actors from non-existent stage entry: {stage_name}"
156
+ )
157
+ return []
158
+ current_actors = self._stage_actors.get(stage_name, [])
159
+
160
+ # Create sets for efficient lookup
161
+ current_actor_set = set(current_actors)
162
+ to_remove_set = set(actors_to_remove)
163
+
164
+ # Actors remaining are those in current set but not in removal set
165
+ actors_remaining = list(current_actor_set - to_remove_set)
166
+ # Actors actually removed are the intersection
167
+ actors_actually_removed = list(current_actor_set.intersection(to_remove_set))
168
+
169
+ if actors_actually_removed:
170
+ self._stage_actors[stage_name] = actors_remaining
171
+ removed = actors_actually_removed
172
+ logger.debug(
173
+ f"[Topology-InternalRemove] Removed {len(removed)} actors from stage '{stage_name}'. "
174
+ f"Remaining: {len(actors_remaining)}"
175
+ )
176
+ elif to_remove_set:
177
+ # This might happen if called twice for the same actor
178
+ logger.debug(f"[Topology-InternalRemove] No actors matching removal list found in stage '{stage_name}'.")
179
+
180
+ return removed
181
+
182
+ def mark_actor_for_removal(self, stage_name: str, actor: Any) -> None:
183
+ """Marks an actor as pending removal, to be cleaned up by the background thread."""
184
+ with self._lock:
185
+ self._actors_pending_removal.add((stage_name, actor))
186
+ logger.info(f"Marked actor {actor} from stage {stage_name} for removal.")
187
+
188
+ def start_cleanup_thread(self, interval: int = 5) -> None:
189
+ """Starts the background thread for periodic cleanup tasks."""
190
+ if self._cleanup_thread is None or not self._cleanup_thread.is_alive():
191
+ self._stop_cleanup.clear()
192
+ self._cleanup_thread = threading.Thread(target=self._cleanup_loop, args=(interval,), daemon=True)
193
+ self._cleanup_thread.start()
194
+ logger.info("Topology cleanup thread started.")
195
+
196
+ def stop_cleanup_thread(self) -> None:
197
+ """Stops the background cleanup thread."""
198
+ if self._cleanup_thread and self._cleanup_thread.is_alive():
199
+ self._stop_cleanup.set()
200
+ self._cleanup_thread.join(timeout=5)
201
+ logger.info("Topology cleanup thread stopped.")
202
+
203
+ def _cleanup_loop(self, interval: int) -> None:
204
+ """Periodically checks for and removes actors that have completed shutdown."""
205
+ while not self._stop_cleanup.is_set():
206
+ actors_to_remove_finally = []
207
+ if not self._actors_pending_removal:
208
+ time.sleep(interval)
209
+ continue
210
+
211
+ # Check the status of actors pending removal
212
+ # Create a copy for safe iteration, as the set might be modified elsewhere
213
+ pending_actors_copy = set()
214
+ with self._lock:
215
+ pending_actors_copy = set(self._actors_pending_removal)
216
+
217
+ for stage_name, actor in pending_actors_copy:
218
+ try:
219
+ if ray.get(actor.is_shutdown_complete.remote()):
220
+ actors_to_remove_finally.append((stage_name, actor))
221
+ except ray.exceptions.RayActorError:
222
+ logger.warning(
223
+ f"Actor {actor} from stage {stage_name} is no longer available (RayActorError). "
224
+ f"Assuming it has shut down and marking for removal."
225
+ )
226
+ actors_to_remove_finally.append((stage_name, actor))
227
+ except Exception as e:
228
+ logger.error(f"Error checking shutdown status for actor {actor}: {e}", exc_info=True)
229
+
230
+ # Remove the fully shut-down actors from the topology
231
+ if actors_to_remove_finally:
232
+ with self._lock:
233
+ for stage_name, actor in actors_to_remove_finally:
234
+ if (stage_name, actor) in self._actors_pending_removal:
235
+ self._actors_pending_removal.remove((stage_name, actor))
236
+ if actor in self._stage_actors.get(stage_name, []):
237
+ self._stage_actors[stage_name].remove(actor)
238
+ logger.info(f"Successfully removed actor {actor} from stage {stage_name} in topology.")
239
+
240
+ time.sleep(interval)
241
+
242
+ def set_edge_queues(self, queues: Dict[str, Tuple[Any, int]]) -> None:
243
+ """Sets the dictionary of edge queues."""
244
+ with self._lock:
245
+ self._edge_queues = queues
246
+ logger.debug(f"Set {len(queues)} edge queues.")
247
+
248
+ def update_scaling_state(self, stage_name: str, state: str) -> None:
249
+ """Updates the scaling state for a stage."""
250
+ with self._lock:
251
+ # Add validation for state values?
252
+ valid_states = {"Idle", "Scaling Up", "Scaling Down", "Error"}
253
+ if state not in valid_states:
254
+ logger.error(f"Invalid scaling state '{state}' for stage '{stage_name}'. Ignoring.")
255
+ return
256
+ if stage_name not in {s.name for s in self._stages}:
257
+ logger.warning(f"Attempted to set scaling state for unknown stage: {stage_name}")
258
+ return
259
+ self._scaling_state[stage_name] = state
260
+ logger.debug(f"Updated scaling state for '{stage_name}' to '{state}'.")
261
+
262
+ def set_flushing(self, is_flushing: bool) -> None:
263
+ """Sets the pipeline flushing state."""
264
+ with self._lock:
265
+ self._is_flushing = is_flushing
266
+ logger.debug(f"Pipeline flushing state set to: {is_flushing}")
267
+
268
+ def set_stage_memory_overhead(self, overheads: Dict[str, float]) -> None:
269
+ """Sets the estimated memory overhead for stages."""
270
+ with self._lock:
271
+ self._stage_memory_overhead = overheads
272
+ logger.debug(f"Set memory overheads for {len(overheads)} stages.")
273
+
274
+ def clear_runtime_state(self) -> None:
275
+ """Clears actors, queues, and scaling state. Keeps definitions."""
276
+ with self._lock:
277
+ self._stage_actors.clear()
278
+ self._edge_queues.clear()
279
+ self._scaling_state.clear()
280
+ self._is_flushing = False # Reset flushing state too
281
+
282
+ logger.debug("Cleared runtime state (actors, queues, scaling state, flushing flag).")
283
+
284
+ # --- Accessor Methods (Read Operations - Use Lock, Return Copies) ---
285
+
286
+ def get_all_actors(self) -> List[Any]:
287
+ """Returns a list of all actors across all stages."""
288
+ with self._lock:
289
+ return [actor for actors in self._stage_actors.values() for actor in actors]
290
+
291
+ def get_stages_info(self) -> List[StageInfo]:
292
+ """Returns a copy of stage info with pending_shutdown flags updated."""
293
+ with self._lock:
294
+ updated_stages = []
295
+ for stage in self._stages:
296
+ pending_shutdown = bool(self._actors_pending_removal)
297
+ # Make a shallow copy with updated pending_shutdown
298
+ stage_copy = StageInfo(
299
+ name=stage.name,
300
+ callable=stage.callable,
301
+ config=stage.config,
302
+ is_source=stage.is_source,
303
+ is_sink=stage.is_sink,
304
+ min_replicas=stage.min_replicas,
305
+ max_replicas=stage.max_replicas,
306
+ pending_shutdown=pending_shutdown,
307
+ )
308
+ updated_stages.append(stage_copy)
309
+ return updated_stages
310
+
311
+ def get_stage_info(self, stage_name: str) -> Optional[StageInfo]:
312
+ """Returns the StageInfo for a specific stage, or None if not found."""
313
+ with self._lock:
314
+ for stage in self._stages:
315
+ if stage.name == stage_name:
316
+ return stage
317
+ return None
318
+
319
+ def get_connections(self) -> Dict[str, List[Tuple[str, int]]]:
320
+ """Returns a shallow copy of the connection dictionary."""
321
+ with self._lock:
322
+ # Shallow copy is usually sufficient here as tuples are immutable
323
+ return self._connections.copy()
324
+
325
+ def get_stage_actors(self) -> Dict[str, List[Any]]:
326
+ """Returns a copy of the stage actors dictionary (with copies of actor lists)."""
327
+ with self._lock:
328
+ return {name: list(actors) for name, actors in self._stage_actors.items()}
329
+
330
+ def get_actor_count(self, stage_name: str) -> int:
331
+ """Returns the number of actors for a specific stage."""
332
+ with self._lock:
333
+ return len(self._stage_actors.get(stage_name, []))
334
+
335
+ def get_edge_queues(self) -> Dict[str, Tuple[Any, int]]:
336
+ """Returns a shallow copy of the edge queues' dictionary."""
337
+ with self._lock:
338
+ return self._edge_queues.copy()
339
+
340
+ def get_scaling_state(self) -> Dict[str, str]:
341
+ """Returns a copy of the scaling state dictionary."""
342
+ with self._lock:
343
+ return self._scaling_state.copy()
344
+
345
+ def get_is_flushing(self) -> bool:
346
+ """Returns the current flushing state."""
347
+ with self._lock:
348
+ return self._is_flushing
349
+
350
+ def get_stage_memory_overhead(self) -> Dict[str, float]:
351
+ """Returns a copy of the stage memory overhead dictionary."""
352
+ with self._lock:
353
+ return self._stage_memory_overhead.copy()