nv-ingest 2025.8.4.dev20250804__py3-none-any.whl → 2025.12.10.dev20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. nv_ingest/api/__init__.py +6 -0
  2. nv_ingest/api/main.py +2 -0
  3. nv_ingest/api/tracing.py +82 -0
  4. nv_ingest/api/v2/README.md +203 -0
  5. nv_ingest/api/v2/__init__.py +3 -0
  6. nv_ingest/api/v2/ingest.py +1300 -0
  7. nv_ingest/framework/orchestration/execution/__init__.py +3 -0
  8. nv_ingest/framework/orchestration/execution/helpers.py +85 -0
  9. nv_ingest/framework/orchestration/execution/options.py +112 -0
  10. nv_ingest/framework/orchestration/process/__init__.py +3 -0
  11. nv_ingest/framework/orchestration/process/dependent_services.py +84 -0
  12. nv_ingest/framework/orchestration/process/execution.py +495 -0
  13. nv_ingest/framework/orchestration/process/lifecycle.py +214 -0
  14. nv_ingest/framework/orchestration/process/strategies.py +218 -0
  15. nv_ingest/framework/orchestration/process/termination.py +147 -0
  16. nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +3 -3
  17. nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +4 -4
  18. nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +32 -38
  19. nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +5 -5
  20. nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +10 -7
  21. nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +17 -14
  22. nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +11 -6
  23. nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +10 -5
  24. nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +12 -7
  25. nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +22 -10
  26. nv_ingest/framework/orchestration/ray/stages/extractors/ocr_extractor.py +71 -0
  27. nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +19 -15
  28. nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +10 -5
  29. nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +16 -14
  30. nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +16 -13
  31. nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +3 -0
  32. nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +3 -3
  33. nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +92 -4
  34. nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +12 -8
  35. nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +12 -9
  36. nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +4 -4
  37. nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +5 -2
  38. nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +116 -69
  39. nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +79 -11
  40. nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +10 -5
  41. nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +8 -4
  42. nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +17 -7
  43. nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +12 -6
  44. nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +17 -18
  45. nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +21 -14
  46. nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +11 -3
  47. nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +1 -2
  48. nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +33 -326
  49. nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +13 -3
  50. nv_ingest/framework/util/flow_control/udf_intercept.py +352 -0
  51. nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +215 -11
  52. nv_ingest/pipeline/__init__.py +3 -0
  53. nv_ingest/pipeline/config/__init__.py +3 -0
  54. nv_ingest/pipeline/config/loaders.py +229 -0
  55. nv_ingest/pipeline/config/replica_resolver.py +237 -0
  56. nv_ingest/pipeline/default_libmode_pipeline_impl.py +528 -0
  57. nv_ingest/pipeline/default_pipeline_impl.py +557 -0
  58. nv_ingest/pipeline/ingest_pipeline.py +389 -0
  59. nv_ingest/pipeline/pipeline_schema.py +398 -0
  60. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/METADATA +6 -3
  61. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/RECORD +64 -43
  62. nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +0 -359
  63. nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +0 -649
  64. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/WHEEL +0 -0
  65. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/licenses/LICENSE +0 -0
  66. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  import logging
6
- from typing import Any
6
+ from typing import Any, Optional
7
7
  from pydantic import BaseModel
8
8
  import ray
9
9
 
@@ -14,6 +14,8 @@ from nv_ingest.framework.util.telemetry.global_stats import GlobalStats
14
14
  from nv_ingest_api.util.exception_handlers.decorators import (
15
15
  nv_ingest_node_failure_try_except,
16
16
  )
17
+ from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
18
+ from nv_ingest_api.internal.primitives.tracing.tagging import traceable
17
19
 
18
20
  # Import the JobCounter schema and global stats singleton.
19
21
 
@@ -30,15 +32,17 @@ class JobCounterStage(RayActorStage):
30
32
  statistic each time it processes a message.
31
33
  """
32
34
 
33
- def __init__(self, config: BaseModel) -> None:
35
+ def __init__(self, config: BaseModel, stage_name: Optional[str] = None) -> None:
34
36
  # Ensure base attributes (e.g. self._running) are initialized.
35
- super().__init__(config)
37
+ super().__init__(config, stage_name=stage_name)
36
38
  # The validated config should be a JobCounterSchema instance.
37
39
  self.validated_config: JobCounterSchema = config
38
40
  # Obtain the global stats' singleton.
39
41
  self.stats = GlobalStats.get_instance()
40
42
 
41
- @nv_ingest_node_failure_try_except(annotation_id="job_counter", raise_on_failure=False)
43
+ @nv_ingest_node_failure_try_except()
44
+ @traceable()
45
+ @udf_intercept_hook()
42
46
  async def on_data(self, message: Any) -> Any:
43
47
  """
44
48
  Process an incoming IngestControlMessage by counting jobs.
@@ -24,6 +24,7 @@ from nv_ingest_api.util.exception_handlers.decorators import nv_ingest_node_fail
24
24
 
25
25
  from nv_ingest_api.internal.primitives.tracing.logging import TaskResultStatus
26
26
  from nv_ingest_api.internal.primitives.ingest_control_message import IngestControlMessage
27
+ from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
27
28
 
28
29
 
29
30
  @ray.remote
@@ -35,8 +36,8 @@ class OpenTelemetryTracerStage(RayActorStage):
35
36
  It creates spans for tasks and exports them to a configured OpenTelemetry endpoint.
36
37
  """
37
38
 
38
- def __init__(self, config: OpenTelemetryTracerSchema) -> None:
39
- super().__init__(config)
39
+ def __init__(self, config: OpenTelemetryTracerSchema, stage_name: Optional[str] = None) -> None:
40
+ super().__init__(config, stage_name=stage_name)
40
41
 
41
42
  # self._logger.info(f"[Telemetry] Initializing OpenTelemetry tracer stage with config: {config}")
42
43
 
@@ -81,7 +82,7 @@ class OpenTelemetryTracerStage(RayActorStage):
81
82
  parent_ctx = trace.set_span_in_context(NonRecordingSpan(span_context))
82
83
  parent_span = self.tracer.start_span(str(job_id), context=parent_ctx, start_time=start_time)
83
84
 
84
- event_count = create_span_with_timestamps(self.tracer, parent_span, message)
85
+ event_count = create_span_with_timestamps(self.tracer, parent_span, message, self._logger)
85
86
 
86
87
  if message.has_metadata("cm_failed") and message.get_metadata("cm_failed"):
87
88
  parent_span.set_status(Status(StatusCode.ERROR))
@@ -96,7 +97,8 @@ class OpenTelemetryTracerStage(RayActorStage):
96
97
 
97
98
  self._logger.debug(f"[Telemetry] Exported spans for message {job_id} with {event_count} total events.")
98
99
 
99
- @nv_ingest_node_failure_try_except(annotation_id="otel_tracer", raise_on_failure=False)
100
+ @nv_ingest_node_failure_try_except()
101
+ @udf_intercept_hook()
100
102
  def on_data(self, control_message: IngestControlMessage) -> Optional[Any]:
101
103
  try:
102
104
  do_trace_tagging = bool(control_message.get_metadata("config::add_trace_tagging"))
@@ -160,7 +162,7 @@ def extract_annotated_task_results(message):
160
162
  return task_results
161
163
 
162
164
 
163
- def create_span_with_timestamps(tracer, parent_span, message) -> int:
165
+ def create_span_with_timestamps(tracer, parent_span, message, logger) -> int:
164
166
  timestamps = extract_timestamps_from_message(message)
165
167
  task_results = extract_annotated_task_results(message)
166
168
 
@@ -175,8 +177,16 @@ def create_span_with_timestamps(tracer, parent_span, message) -> int:
175
177
  if not subtask:
176
178
  span = tracer.start_span(main_task, context=child_ctx, start_time=ts_entry)
177
179
  else:
178
- subtask_ctx = trace.set_span_in_context(ctx_store[main_task][0])
179
- span = tracer.start_span(subtask, context=subtask_ctx, start_time=ts_entry)
180
+ # Check if parent context exists, otherwise create standalone span with warning
181
+ if main_task in ctx_store:
182
+ subtask_ctx = trace.set_span_in_context(ctx_store[main_task][0])
183
+ span = tracer.start_span(subtask, context=subtask_ctx, start_time=ts_entry)
184
+ else:
185
+ logger.warning(
186
+ f"Missing parent context for subtask '{subtask}'"
187
+ f" (expected parent: '{main_task}'). Creating standalone span."
188
+ )
189
+ span = tracer.start_span(f"{main_task}::{subtask}", context=child_ctx, start_time=ts_entry)
180
190
 
181
191
  span.add_event("entry", timestamp=ts_entry)
182
192
  span.add_event("exit", timestamp=ts_exit)
@@ -4,12 +4,13 @@
4
4
 
5
5
  import logging
6
6
  import pprint
7
- from typing import Any
7
+ from typing import Any, Optional
8
8
 
9
9
  import ray
10
10
 
11
11
  from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
12
12
  from nv_ingest.framework.util.flow_control import filter_by_task
13
+ from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
13
14
  from nv_ingest_api.internal.primitives.ingest_control_message import remove_task_by_type
14
15
  from nv_ingest_api.internal.primitives.tracing.tagging import traceable
15
16
  from nv_ingest_api.internal.schemas.transform.transform_image_caption_schema import ImageCaptionExtractionSchema
@@ -17,6 +18,7 @@ from nv_ingest_api.internal.transform.caption_image import transform_image_creat
17
18
  from nv_ingest_api.util.exception_handlers.decorators import (
18
19
  nv_ingest_node_failure_try_except,
19
20
  )
21
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
20
22
 
21
23
  logger = logging.getLogger(__name__)
22
24
 
@@ -31,8 +33,8 @@ class ImageCaptionTransformStage(RayActorStage):
31
33
  are stored in the control message.
32
34
  """
33
35
 
34
- def __init__(self, config: ImageCaptionExtractionSchema) -> None:
35
- super().__init__(config)
36
+ def __init__(self, config: ImageCaptionExtractionSchema, stage_name: Optional[str] = None) -> None:
37
+ super().__init__(config, stage_name=stage_name)
36
38
  try:
37
39
  self.validated_config = config
38
40
  logger.info("ImageCaptionTransformStage configuration validated.")
@@ -40,9 +42,10 @@ class ImageCaptionTransformStage(RayActorStage):
40
42
  logger.exception("Error validating caption extraction config")
41
43
  raise e
42
44
 
43
- @traceable("image_captioning")
45
+ @nv_ingest_node_failure_try_except()
46
+ @traceable()
47
+ @udf_intercept_hook()
44
48
  @filter_by_task(required_tasks=["caption"])
45
- @nv_ingest_node_failure_try_except(annotation_id="image_captioning", raise_on_failure=False)
46
49
  def on_data(self, control_message: Any) -> Any:
47
50
  """
48
51
  Process the control message by extracting image captions.
@@ -65,7 +68,10 @@ class ImageCaptionTransformStage(RayActorStage):
65
68
 
66
69
  # Remove the "caption" task to obtain task-specific configuration.
67
70
  task_config = remove_task_by_type(control_message, "caption")
68
- logger.debug("ImageCaptionTransformStage: Task configuration extracted: %s", pprint.pformat(task_config))
71
+ logger.debug(
72
+ "ImageCaptionTransformStage: Task configuration extracted: %s",
73
+ pprint.pformat(sanitize_for_logging(task_config)),
74
+ )
69
75
 
70
76
  # Call the caption extraction function.
71
77
  new_df = transform_image_create_vlm_caption_internal(
@@ -2,12 +2,10 @@
2
2
  # All rights reserved.
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
- import logging
6
5
  import pprint
7
- from typing import Any
6
+ from typing import Optional
8
7
  import ray
9
8
 
10
- # Assume these imports come from your project:
11
9
  from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
12
10
  from nv_ingest.framework.util.flow_control import filter_by_task
13
11
  from nv_ingest_api.internal.primitives.ingest_control_message import remove_task_by_type, IngestControlMessage
@@ -17,8 +15,9 @@ from nv_ingest_api.internal.transform.embed_text import transform_create_text_em
17
15
  from nv_ingest_api.util.exception_handlers.decorators import (
18
16
  nv_ingest_node_failure_try_except,
19
17
  )
18
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
20
19
 
21
- logger = logging.getLogger(__name__)
20
+ from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
22
21
 
23
22
 
24
23
  @ray.remote
@@ -31,19 +30,20 @@ class TextEmbeddingTransformStage(RayActorStage):
31
30
  trace or extraction metadata is added.
32
31
  """
33
32
 
34
- def __init__(self, config: TextEmbeddingSchema) -> None:
35
- super().__init__(config, log_to_stdout=False)
33
+ def __init__(self, config: TextEmbeddingSchema, stage_name: Optional[str] = None) -> None:
34
+ super().__init__(config, stage_name=stage_name)
36
35
  try:
37
36
  self.validated_config = config
38
- logger.info("TextEmbeddingTransformStage configuration validated successfully.")
37
+ self._logger.info("TextEmbeddingTransformStage configuration validated successfully.")
39
38
  except Exception as e:
40
- logger.exception("Error validating text embedding extractor config")
41
- raise e
39
+ self._logger.exception(f"Error validating text embedding config: {e}")
40
+ raise
42
41
 
43
- @traceable("text_embedding")
42
+ @nv_ingest_node_failure_try_except()
43
+ @traceable()
44
+ @udf_intercept_hook()
44
45
  @filter_by_task(required_tasks=["embed"])
45
- @nv_ingest_node_failure_try_except(annotation_id="text_embedding", raise_on_failure=False)
46
- def on_data(self, control_message: IngestControlMessage) -> Any:
46
+ def on_data(self, control_message: IngestControlMessage) -> IngestControlMessage:
47
47
  """
48
48
  Process the control message by generating text embeddings.
49
49
 
@@ -57,25 +57,24 @@ class TextEmbeddingTransformStage(RayActorStage):
57
57
  IngestControlMessage
58
58
  The updated message with text embeddings and trace info added.
59
59
  """
60
- logger.info("TextEmbeddingTransformStage.on_data: Starting text embedding transformation.")
61
-
62
60
  # Get the DataFrame payload.
63
61
  df_payload = control_message.payload()
64
- logger.debug("TextEmbeddingTransformStage: Extracted payload with %d rows.", len(df_payload))
62
+ self._logger.debug("TextEmbeddingTransformStage: Extracted payload with %d rows.", len(df_payload))
65
63
 
66
64
  # Remove the "embed" task to obtain task-specific configuration.
67
65
  task_config = remove_task_by_type(control_message, "embed")
68
- logger.debug("TextEmbeddingTransformStage: Task configuration extracted: %s", pprint.pformat(task_config))
66
+ self._logger.debug(
67
+ "TextEmbeddingTransformStage: Task configuration extracted: %s",
68
+ pprint.pformat(sanitize_for_logging(task_config)),
69
+ )
69
70
 
70
71
  # Call the text embedding extraction function.
71
72
  new_df, execution_trace_log = transform_create_text_embeddings_internal(
72
73
  df_payload, task_config=task_config, transform_config=self.validated_config
73
74
  )
74
- logger.info("Text embedding transformation completed. New payload has %d rows.", len(new_df))
75
75
 
76
76
  # Update the control message payload.
77
77
  control_message.payload(new_df)
78
78
  # Annotate the message metadata with trace info.
79
79
  control_message.set_metadata("text_embedding_trace", execution_trace_log)
80
- logger.info("Text embedding trace metadata added.")
81
80
  return control_message
@@ -3,18 +3,22 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  import logging
6
- from typing import Any
6
+ from typing import Any, Optional
7
+
7
8
  import ray
8
9
 
9
10
  from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
10
11
  from nv_ingest.framework.util.flow_control import filter_by_task
11
- from nv_ingest_api.internal.primitives.ingest_control_message import remove_task_by_type, IngestControlMessage
12
+ from nv_ingest_api.internal.primitives.ingest_control_message import IngestControlMessage, remove_task_by_type
12
13
  from nv_ingest_api.internal.primitives.tracing.tagging import traceable
13
14
  from nv_ingest_api.internal.schemas.transform.transform_text_splitter_schema import TextSplitterSchema
14
15
  from nv_ingest_api.internal.transform.split_text import transform_text_split_and_tokenize_internal
15
16
  from nv_ingest_api.util.exception_handlers.decorators import (
16
17
  nv_ingest_node_failure_try_except,
17
18
  )
19
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
20
+
21
+ from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
18
22
 
19
23
  logger = logging.getLogger(__name__)
20
24
 
@@ -29,15 +33,16 @@ class TextSplitterStage(RayActorStage):
29
33
  and tokenization logic. The updated DataFrame is then set back into the message.
30
34
  """
31
35
 
32
- def __init__(self, config: TextSplitterSchema) -> None:
33
- super().__init__(config)
36
+ def __init__(self, config: TextSplitterSchema, stage_name: Optional[str] = None) -> None:
37
+ super().__init__(config, stage_name=stage_name)
34
38
  # Store the validated configuration (assumed to be an instance of TextSplitterSchema)
35
39
  self.validated_config: TextSplitterSchema = config
36
- logger.info("TextSplitterStage initialized with config: %s", config)
40
+ logger.info("TextSplitterStage initialized with config: %s", sanitize_for_logging(config))
37
41
 
38
- @traceable("text_splitter")
39
- @filter_by_task(["split"])
40
- @nv_ingest_node_failure_try_except(annotation_id="text_splitter", raise_on_failure=False)
42
+ @nv_ingest_node_failure_try_except()
43
+ @traceable()
44
+ @udf_intercept_hook()
45
+ @filter_by_task(required_tasks=["split"])
41
46
  def on_data(self, message: Any) -> Any:
42
47
  """
43
48
  Process an incoming IngestControlMessage by splitting and tokenizing its text.
@@ -59,7 +64,7 @@ class TextSplitterStage(RayActorStage):
59
64
 
60
65
  # Remove the "split" task to obtain task-specific configuration.
61
66
  task_config = remove_task_by_type(message, "split")
62
- logger.debug("Extracted task config: %s", task_config)
67
+ logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
63
68
 
64
69
  # Transform the DataFrame (split text and tokenize).
65
70
  df_updated = transform_text_split_and_tokenize_internal(
@@ -68,11 +73,13 @@ class TextSplitterStage(RayActorStage):
68
73
  transform_config=self.validated_config,
69
74
  execution_trace_log=None,
70
75
  )
71
- logger.info("TextSplitterStage.on_data: Transformation complete. Updated payload has %d rows.", len(df_updated))
76
+ logger.debug(
77
+ "TextSplitterStage.on_data: Transformation complete. Updated payload has %d rows.", len(df_updated)
78
+ )
72
79
 
73
80
  # Update the message payload.
74
81
  message.payload(df_updated)
75
- logger.info("TextSplitterStage.on_data: Finished processing, returning updated message.")
82
+ logger.debug("TextSplitterStage.on_data: Finished processing, returning updated message.")
76
83
 
77
84
  return message
78
85
 
@@ -101,7 +108,7 @@ def text_splitter_fn(control_message: IngestControlMessage, stage_config: TextSp
101
108
 
102
109
  # Remove the "split" task to obtain task-specific configuration.
103
110
  task_config = remove_task_by_type(control_message, "split")
104
- logger.debug("Extracted task config: %s", task_config)
111
+ logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
105
112
 
106
113
  # Transform the DataFrame (split text and tokenize).
107
114
  df_updated = transform_text_split_and_tokenize_internal(
@@ -110,10 +117,10 @@ def text_splitter_fn(control_message: IngestControlMessage, stage_config: TextSp
110
117
  transform_config=stage_config,
111
118
  execution_trace_log=None,
112
119
  )
113
- logger.info("TextSplitterStage.on_data: Transformation complete. Updated payload has %d rows.", len(df_updated))
120
+ logger.debug("TextSplitterStage.on_data: Transformation complete. Updated payload has %d rows.", len(df_updated))
114
121
 
115
122
  # Update the message payload.
116
123
  control_message.payload(df_updated)
117
- logger.info("TextSplitterStage.on_data: Finished processing, returning updated message.")
124
+ logger.debug("TextSplitterStage.on_data: Finished processing, returning updated message.")
118
125
 
119
126
  return control_message
@@ -4,11 +4,16 @@
4
4
 
5
5
  import logging
6
6
  import time
7
- from typing import Any
7
+ from typing import Any, Optional
8
8
  from pydantic import BaseModel
9
9
  import ray
10
10
 
11
11
  from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
12
+ from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
13
+ from nv_ingest_api.internal.primitives.tracing.tagging import traceable
14
+ from nv_ingest_api.util.exception_handlers.decorators import (
15
+ nv_ingest_node_failure_try_except,
16
+ )
12
17
 
13
18
  logger = logging.getLogger(__name__)
14
19
 
@@ -22,12 +27,15 @@ class ThroughputMonitorStage(RayActorStage):
22
27
  It also adds the throughput as metadata on the control message before passing it on.
23
28
  """
24
29
 
25
- def __init__(self, config: BaseModel) -> None:
30
+ def __init__(self, config: BaseModel, stage_name: Optional[str] = None) -> None:
26
31
  # Initialize base attributes (e.g., self._running, self.start_time) via the base class.
27
- super().__init__(config)
32
+ super().__init__(config, stage_name=stage_name)
28
33
  self.count = 0
29
34
  self.last_emit_time = None # Timestamp when the last throughput measure was emitted
30
35
 
36
+ @nv_ingest_node_failure_try_except()
37
+ @traceable()
38
+ @udf_intercept_hook()
31
39
  async def on_data(self, message: Any) -> Any:
32
40
  """
33
41
  Process an incoming control message. Increment the internal counter and, every 100 messages,
@@ -10,7 +10,6 @@ from typing import Dict, Any, List, Tuple, Optional
10
10
 
11
11
  from nv_ingest_api.util.system.hardware_info import SystemResourceProbe
12
12
 
13
- logging.basicConfig(level=logging.INFO)
14
13
  logger = logging.getLogger(__name__)
15
14
 
16
15
  # --- Constants ---
@@ -259,7 +258,7 @@ class ResourceConstraintManager:
259
258
  else:
260
259
  self.core_based_replica_limit = None # Treat as unlimited if detection failed
261
260
 
262
- logger.info(
261
+ logger.debug(
263
262
  f"[ConstraintMgr] Initialized. MaxReplicas={max_replicas}, "
264
263
  f"EffectiveCoreLimit={self.available_cores:.2f} " # Log the potentially fractional value
265
264
  f"(Method: {self.core_detection_details.get('detection_method')}), "