nv-ingest 25.6.25.dev20250625__py3-none-any.whl → 25.6.27.dev20250627__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

@@ -9,6 +9,7 @@ import threading
9
9
  from abc import ABC, abstractmethod
10
10
  from collections import defaultdict
11
11
  from dataclasses import dataclass
12
+ from types import FunctionType
12
13
 
13
14
  import psutil
14
15
  import uuid
@@ -24,6 +25,9 @@ import time
24
25
  from nv_ingest.framework.orchestration.ray.primitives.pipeline_topology import PipelineTopology, StageInfo
25
26
  from nv_ingest.framework.orchestration.ray.primitives.ray_stat_collector import RayStatsCollector
26
27
  from nv_ingest.framework.orchestration.ray.util.pipeline.pid_controller import PIDController, ResourceConstraintManager
28
+ from nv_ingest.framework.orchestration.ray.util.pipeline.tools import wrap_callable_as_stage
29
+ from nv_ingest_api.util.imports.callable_signatures import ingest_stage_callable_signature
30
+ from nv_ingest_api.util.imports.dynamic_resolvers import resolve_callable_from_path
27
31
 
28
32
  logger = logging.getLogger(__name__)
29
33
 
@@ -43,7 +47,7 @@ class PipelineInterface(ABC):
43
47
  Parameters
44
48
  ----------
45
49
  monitor_poll_interval : float
46
- Interval in seconds for monitoring poll (default: 5.0).
50
+ Interval in seconds for the monitoring poll (default: 5.0).
47
51
  scaling_poll_interval : float
48
52
  Interval in seconds for scaling decisions (default: 30.0).
49
53
  """
@@ -270,7 +274,7 @@ class RayPipeline(PipelineInterface):
270
274
 
271
275
  logger.info("RayStatsCollector initialized using StatsConfig.")
272
276
 
273
- # --- Accessor Methods for Stats Collector (and internal use) ---
277
+ # --- Accessor Methods for Stat Collector (and internal use) ---
274
278
 
275
279
  def __del__(self):
276
280
  try:
@@ -428,15 +432,39 @@ class RayPipeline(PipelineInterface):
428
432
  return self
429
433
 
430
434
  def add_stage(
431
- self, *, name: str, stage_actor: Any, config: BaseModel, min_replicas: int = 0, max_replicas: int = 1
435
+ self,
436
+ *,
437
+ name: str,
438
+ stage_actor: Any,
439
+ config: BaseModel,
440
+ min_replicas: int = 0,
441
+ max_replicas: int = 1,
432
442
  ) -> "RayPipeline":
433
443
  if min_replicas < 0:
434
444
  logger.warning(f"Stage '{name}': min_replicas cannot be negative. Overriding to 0.")
435
445
  min_replicas = 0
446
+
447
+ resolved_actor = stage_actor
448
+
449
+ # Support module path (e.g., "mypkg.mymodule:my_lambda")
450
+ if isinstance(stage_actor, str):
451
+ resolved_actor = resolve_callable_from_path(
452
+ callable_path=stage_actor, signature_schema=ingest_stage_callable_signature
453
+ )
454
+
455
+ # Wrap callables
456
+ if isinstance(resolved_actor, FunctionType):
457
+ schema_type = type(config)
458
+ resolved_actor = wrap_callable_as_stage(resolved_actor, schema_type)
459
+
436
460
  stage_info = StageInfo(
437
- name=name, callable=stage_actor, config=config, min_replicas=min_replicas, max_replicas=max_replicas
461
+ name=name,
462
+ callable=resolved_actor,
463
+ config=config,
464
+ min_replicas=min_replicas,
465
+ max_replicas=max_replicas,
438
466
  )
439
- self.topology.add_stage(stage_info) # Delegate
467
+ self.topology.add_stage(stage_info)
440
468
 
441
469
  return self
442
470
 
@@ -23,16 +23,13 @@ class RayActorSourceStage(RayActorStage, ABC):
23
23
  super().__init__(config, log_to_stdout=log_to_stdout)
24
24
  self.paused = False
25
25
 
26
+ def on_data(self, IngestControlMessage):
27
+ return NotImplemented("Source stages do not implement on_data().")
28
+
26
29
  @ray.method(num_returns=1)
27
30
  def set_input_queue(self, queue_handle: Any) -> bool:
28
31
  raise NotImplementedError("Source stages do not support an input queue.")
29
32
 
30
- def get_input(self) -> Any:
31
- """
32
- Source stages must implement get_input() to fetch control messages from an external source.
33
- """
34
- pass
35
-
36
33
  @abstractmethod
37
34
  def _read_input(self) -> Any:
38
35
  """
@@ -304,14 +304,6 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
304
304
 
305
305
  return control_message
306
306
 
307
- def on_data(self, control_message: any) -> any:
308
- """
309
- Process the control message.
310
- For this source stage, no additional processing is done, so simply return it.
311
- """
312
- self._logger.debug("on_data: Received control message for processing")
313
- return control_message
314
-
315
307
  # In the processing loop, instead of checking a boolean, we wait on the event.
316
308
  def _processing_loop(self) -> None:
317
309
  """
@@ -336,7 +328,6 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
336
328
  self._active_processing = True
337
329
 
338
330
  self._logger.debug("Control message received; processing data")
339
- updated_cm = self.on_data(control_message)
340
331
 
341
332
  # Block until not paused using the pause event.
342
333
  if self.output_queue is not None:
@@ -349,7 +340,7 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
349
340
 
350
341
  while True:
351
342
  try:
352
- self.output_queue.put(updated_cm)
343
+ self.output_queue.put(control_message)
353
344
  self.stats["successful_queue_writes"] += 1
354
345
  break
355
346
  except Exception:
@@ -1,11 +1,14 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
1
5
  import logging
2
6
  from typing import Any
3
7
  import ray
4
8
 
5
- # Assume these imports come from your project:
6
9
  from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
7
10
  from nv_ingest.framework.util.flow_control import filter_by_task
8
- from nv_ingest_api.internal.primitives.ingest_control_message import remove_task_by_type
11
+ from nv_ingest_api.internal.primitives.ingest_control_message import remove_task_by_type, IngestControlMessage
9
12
  from nv_ingest_api.internal.primitives.tracing.tagging import traceable
10
13
  from nv_ingest_api.internal.schemas.transform.transform_text_splitter_schema import TextSplitterSchema
11
14
  from nv_ingest_api.internal.transform.split_text import transform_text_split_and_tokenize_internal
@@ -72,3 +75,45 @@ class TextSplitterStage(RayActorStage):
72
75
  logger.info("TextSplitterStage.on_data: Finished processing, returning updated message.")
73
76
 
74
77
  return message
78
+
79
+
80
+ def text_splitter_fn(control_message: IngestControlMessage, stage_config: TextSplitterSchema) -> IngestControlMessage:
81
+ """
82
+ Process an incoming IngestControlMessage by splitting and tokenizing its text.
83
+
84
+ Parameters
85
+ ----------
86
+ control_message : IngestControlMessage
87
+ The incoming message containing the payload DataFrame.
88
+
89
+ stage_config : BaseModel
90
+ The stage level configuration object
91
+
92
+ Returns
93
+ -------
94
+ IngestControlMessage
95
+ The updated message with its payload transformed.
96
+ """
97
+
98
+ # Extract the DataFrame payload.
99
+ df_payload = control_message.payload()
100
+ logger.debug("Extracted payload with %d rows.", len(df_payload))
101
+
102
+ # Remove the "split" task to obtain task-specific configuration.
103
+ task_config = remove_task_by_type(control_message, "split")
104
+ logger.debug("Extracted task config: %s", task_config)
105
+
106
+ # Transform the DataFrame (split text and tokenize).
107
+ df_updated = transform_text_split_and_tokenize_internal(
108
+ df_transform_ledger=df_payload,
109
+ task_config=task_config,
110
+ transform_config=stage_config,
111
+ execution_trace_log=None,
112
+ )
113
+ logger.info("TextSplitterStage.on_data: Transformation complete. Updated payload has %d rows.", len(df_updated))
114
+
115
+ # Update the message payload.
116
+ control_message.payload(df_updated)
117
+ logger.info("TextSplitterStage.on_data: Finished processing, returning updated message.")
118
+
119
+ return control_message
@@ -2,8 +2,6 @@
2
2
  # All rights reserved.
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
- # TODO(Devin)
6
- # flake8: noqa
7
5
  import os
8
6
 
9
7
  import click
@@ -11,6 +9,7 @@ import logging
11
9
 
12
10
  from nv_ingest.framework.orchestration.ray.stages.sinks.default_drain import DefaultDrainSink
13
11
  from nv_ingest.framework.orchestration.ray.stages.telemetry.otel_tracer import OpenTelemetryTracerStage
12
+ from nv_ingest.framework.orchestration.ray.stages.transforms.text_splitter import TextSplitterStage
14
13
  from nv_ingest.framework.schemas.framework_otel_tracer_schema import OpenTelemetryTracerSchema
15
14
  from nv_ingest_api.internal.schemas.extract.extract_infographic_schema import InfographicExtractorSchema
16
15
 
@@ -41,7 +40,6 @@ from nv_ingest.framework.orchestration.ray.stages.storage.image_storage import I
41
40
  from nv_ingest.framework.orchestration.ray.stages.storage.store_embeddings import EmbeddingStorageStage
42
41
  from nv_ingest.framework.orchestration.ray.stages.transforms.image_caption import ImageCaptionTransformStage
43
42
  from nv_ingest.framework.orchestration.ray.stages.transforms.text_embed import TextEmbeddingTransformStage
44
- from nv_ingest.framework.orchestration.ray.stages.transforms.text_splitter import TextSplitterStage
45
43
  from nv_ingest.framework.schemas.framework_metadata_injector_schema import MetadataInjectorSchema
46
44
  from nv_ingest_api.internal.schemas.extract.extract_audio_schema import AudioExtractorSchema
47
45
  from nv_ingest_api.internal.schemas.extract.extract_chart_schema import ChartExtractorSchema
@@ -0,0 +1,203 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ import logging
6
+ import uuid
7
+ from typing import Callable, Optional, Union, Dict, List, Type
8
+
9
+ import ray
10
+ from pydantic import BaseModel
11
+
12
+ from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
13
+ from nv_ingest.framework.util.flow_control import filter_by_task
14
+ from nv_ingest_api.internal.primitives.tracing.tagging import traceable
15
+ from nv_ingest_api.util.exception_handlers.decorators import nv_ingest_node_failure_try_except
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def wrap_callable_as_stage(
21
+ fn: Callable[[object, BaseModel], object],
22
+ schema_type: Type[BaseModel],
23
+ *,
24
+ required_tasks: Optional[List[str]] = None,
25
+ trace_id: Optional[str] = None,
26
+ ):
27
+ """
28
+ Factory to wrap a user-supplied function into a Ray actor, returning a proxy
29
+ for unique, isolated dynamic actor creation.
30
+
31
+ Parameters
32
+ ----------
33
+ fn : Callable[[IngestControlMessage, BaseModel], IngestControlMessage]
34
+ The processing function to be wrapped in the Ray actor.
35
+ schema_type : Type[BaseModel]
36
+ Pydantic schema used to validate and pass the stage config.
37
+ required_tasks : Optional[List[str]], optional
38
+ Task names this stage should filter on. If None, no filtering is applied.
39
+ trace_id : Optional[str], optional
40
+ Optional name for trace annotation; defaults to the function name.
41
+
42
+ Returns
43
+ -------
44
+ StageProxy : object
45
+ A factory-like proxy exposing `.remote()` and `.options()` for Ray-idiomatic
46
+ actor creation. Direct instantiation or class method use is not supported.
47
+
48
+ Notes
49
+ -----
50
+ - Each call to `.remote()` or `.options()` generates a new, dynamically created class
51
+ (using `type()`), ensuring Ray treats each as a unique actor type and preventing
52
+ class/actor name collisions or registry issues. This is essential when running
53
+ dynamic or parallel pipelines and tests.
54
+ - Only `.remote(config)` and `.options(...)` (chained with `.remote(config)`) are supported.
55
+ All other class/actor patterns will raise `NotImplementedError`.
56
+ """
57
+ trace_name = trace_id or fn.__name__
58
+
59
+ def make_actor_class():
60
+ """
61
+ Dynamically constructs a unique Ray actor class for every call.
62
+
63
+ Engineering Note
64
+ ----------------
65
+ This pattern uses Python's `type()` to create a new class object for each actor instance,
66
+ guaranteeing a unique type each time. Ray's internal registry identifies actor types
67
+ by their Python class object. If you use the same class (even with different logic or
68
+ @ray.remote), Ray may reuse or overwrite them, causing hard-to-diagnose bugs in
69
+ parallel or test code. By generating a fresh class each time, we fully isolate state,
70
+ serialization, and Ray's registry—avoiding actor collisions and test pollution.
71
+
72
+ Returns
73
+ -------
74
+ new_class : type
75
+ The dynamically constructed RayActorStage subclass.
76
+ """
77
+ class_name = f"LambdaStage_{fn.__name__}_{uuid.uuid4().hex[:8]}"
78
+
79
+ def __init__(self, config: Union[Dict, BaseModel]) -> None:
80
+ """
81
+ Parameters
82
+ ----------
83
+ config : Union[Dict, BaseModel]
84
+ Stage configuration, validated against `schema_type`.
85
+ """
86
+ validated_config = schema_type(**config) if not isinstance(config, schema_type) else config
87
+ super(new_class, self).__init__(validated_config, log_to_stdout=True)
88
+ self.validated_config = validated_config
89
+ self._logger.info(f"{self.__class__.__name__} initialized with validated config.")
90
+
91
+ @traceable(trace_name)
92
+ @nv_ingest_node_failure_try_except(annotation_id=trace_name, raise_on_failure=False)
93
+ @filter_by_task(required_tasks=required_tasks) if required_tasks else (lambda f: f)
94
+ def on_data(self, control_message):
95
+ """
96
+ Processes a control message using the wrapped function.
97
+
98
+ Parameters
99
+ ----------
100
+ control_message : IngestControlMessage
101
+ The message to be processed.
102
+
103
+ Returns
104
+ -------
105
+ IngestControlMessage
106
+ The processed message, or the original on failure.
107
+ """
108
+ try:
109
+ return fn(control_message, self.validated_config)
110
+ except Exception as e:
111
+ self._logger.exception(f"{self.__class__.__name__} failed: {e}")
112
+ self.stats["errors"] += 1
113
+ return control_message
114
+
115
+ # --- ENGINEERING NOTE ---
116
+ # The `class_dict` collects all the methods and attributes for the dynamic class.
117
+ # This allows us to build a fresh class object per call, preventing Ray from
118
+ # reusing or overwriting global actor types. It is the critical piece for
119
+ # robust dynamic actor creation in Ray!
120
+ # ------------------------
121
+
122
+ class_dict = {
123
+ "__init__": __init__,
124
+ "on_data": on_data,
125
+ }
126
+ bases = (RayActorStage,)
127
+ new_class = type(class_name, bases, class_dict)
128
+ return new_class
129
+
130
+ class StageProxy:
131
+ """
132
+ Factory/proxy for dynamic Ray actor creation; not itself a Ray actor.
133
+
134
+ Methods
135
+ -------
136
+ remote(config)
137
+ Instantiate a Ray actor with a unique dynamic class and name.
138
+ options(*args, **kwargs)
139
+ Advanced Ray actor configuration (chain with `.remote(config)`).
140
+ actor_class()
141
+ Generates and returns a fresh actor class (for introspection/testing only).
142
+ """
143
+
144
+ @staticmethod
145
+ def remote(config):
146
+ """
147
+ Instantiate a Ray actor with a unique dynamic class and name.
148
+
149
+ Parameters
150
+ ----------
151
+ config : Union[Dict, BaseModel]
152
+ Stage configuration to pass to the actor.
153
+
154
+ Returns
155
+ -------
156
+ ray.actor.ActorHandle
157
+ Handle to the started Ray actor.
158
+ """
159
+ _ActorClass = ray.remote(make_actor_class())
160
+ unique_name = f"{fn.__name__}_{str(uuid.uuid4())[:8]}"
161
+ return _ActorClass.options(name=unique_name).remote(config)
162
+
163
+ @staticmethod
164
+ def options(*args, **kwargs):
165
+ """
166
+ Return a Ray actor class with the specified options set.
167
+ Must call `.remote(config)` on the result.
168
+
169
+ Parameters
170
+ ----------
171
+ *args
172
+ Positional arguments for Ray actor options.
173
+ **kwargs
174
+ Keyword arguments for Ray actor options (e.g., resources).
175
+
176
+ Returns
177
+ -------
178
+ ray.actor.ActorClass
179
+ Ray actor class, requires .remote(config) to instantiate.
180
+ """
181
+ ActorClass = ray.remote(make_actor_class())
182
+ if "name" not in kwargs:
183
+ kwargs["name"] = f"{fn.__name__}_{str(uuid.uuid4())[:8]}"
184
+ return ActorClass.options(*args, **kwargs)
185
+
186
+ def __new__(cls, *a, **k):
187
+ raise NotImplementedError("StageProxy is a factory, not a Ray actor or class. Use .remote() or .options().")
188
+
189
+ def __call__(self, *a, **k):
190
+ raise NotImplementedError("StageProxy is a factory, not a Ray actor or class. Use .remote() or .options().")
191
+
192
+ def __getattr__(self, name):
193
+ # Only allow access to known public members
194
+ if name in {"remote", "options", "actor_class"}:
195
+ return getattr(self, name)
196
+ raise NotImplementedError(
197
+ f"StageProxy does not implement '{name}'. Only .remote(), .options(), .actor_class() are available."
198
+ )
199
+
200
+ # For testing or introspection only.
201
+ # actor_class = staticmethod(make_actor_class)
202
+
203
+ return StageProxy
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 25.6.25.dev20250625
3
+ Version: 25.6.27.dev20250627
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -21,7 +21,7 @@ nv_ingest/framework/orchestration/ray/primitives/__init__.py,sha256=wQSlVx3T14Zg
21
21
  nv_ingest/framework/orchestration/ray/primitives/dataclasses.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py,sha256=L8ENPiF-lxqhIXVEQwQD5CCqQMb710ynj5D_Y4ixGhs,11077
23
23
  nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py,sha256=2Xg7QoKKPPFUWkLck7NtEtb1xLnK3b5uUw8LRxPhLyw,29106
24
- nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py,sha256=BEBLjkYFXIH396EUQcfuxhrWlIMs9i6z7YfeeqJ5cZg,59579
24
+ nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py,sha256=6i0EGWZ9WXpPVkfLwP5a2Y45gwAhQjWjobTp_kuFPsE,60478
25
25
  nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py,sha256=AJ79OTh_NxxoTcyBNiopq3K_nLumsB9UU_axqQS3Gus,15810
26
26
  nv_ingest/framework/orchestration/ray/stages/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
27
27
  nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
@@ -39,7 +39,7 @@ nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py,sha2
39
39
  nv_ingest/framework/orchestration/ray/stages/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
40
40
  nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py,sha256=LnVqBJmpfCmcI-eJLbkwK-7SS-hpEp98P4iCRv_Zhb0,1726
41
41
  nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py,sha256=AhlZUbDK2Jckqnu8hVbJrckW8MsSixfmWc1bst9gRYk,3447
42
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py,sha256=MtePbSiouDOtNYQ8bQI3hkHSXqkjN9r-NU2lXTz8paM,1793
42
+ nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py,sha256=1Pae2xRPK0_QLh53yHECVFm2guwgvZaiRRr3tp4OpYI,1744
43
43
  nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py,sha256=rAuEH8uq8-j4Ipkb1zMB8z_x_PMvxwO9LFN4iY7UXjE,28957
44
44
  nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
45
45
  nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py,sha256=UepeDvH6Cfgm5rIylRx6uOxihS0OZ4Q1DGUrjUybNaY,3493
@@ -48,7 +48,7 @@ nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py,sha256=wQSlVx3T14
48
48
  nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py,sha256=0SQHJlFuXlP16YRWduX1fMKgjhUd7UhDAWQ8XZh4_0I,1471
49
49
  nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py,sha256=enylryvcPmzirpOjCahqYJbNSLsNvv1KpMnOzGqNZQQ,11509
50
50
  nv_ingest/framework/orchestration/ray/stages/sources/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
51
- nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py,sha256=srDsgp8ExMHZNI76ch3iX7S0drMXmQ3NkWC_udnwqmo,20286
51
+ nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py,sha256=9YoVytbFFt-RpIR_MN2m3T93zVTjts8tjhi0qzLJkTw,19922
52
52
  nv_ingest/framework/orchestration/ray/stages/storage/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
53
53
  nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py,sha256=6NkwQzseAnaj0Ptpr3oKvab2EnJdMwTjI2p4dS_HzsI,3901
54
54
  nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py,sha256=SMLHQElZkKldnjy0_VHIKS65DBAAtOhwhdoaFe1yb9I,3337
@@ -59,7 +59,7 @@ nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py,sha256=53M
59
59
  nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
60
60
  nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py,sha256=OuqPmJmCqbg9k7roDivuvfYVTd05Nl9PMC0_E9PHgYw,3514
61
61
  nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py,sha256=Hh7UXVZSS5LKt3uU5gwWIQ7SYqQrSN9tzFYw4CeUpUA,3535
62
- nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py,sha256=ulFDsTuz4oivvm_FKACfN1KH9X33mGH87LfD7rkaJnY,3090
62
+ nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py,sha256=hyTIv3MMnuyZmgbBHbXicSgk9DhbpygDrgUtXRwvlmo,4677
63
63
  nv_ingest/framework/orchestration/ray/stages/utility/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
64
64
  nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py,sha256=MB27CkoNeuirN6CUHgjsC5Wh958NF7m_N7HE4VKfx3k,2264
65
65
  nv_ingest/framework/orchestration/ray/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
@@ -67,7 +67,8 @@ nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py,sha256=wQSlVx3T1
67
67
  nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py,sha256=AWyCFPP41vp1NOkO2urqm7vh-sTGKypJxwhdq8HxK6Q,50681
68
68
  nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py,sha256=jMYnVe_0rb1OIO9mlB4LH3uXtgaXBbUG-rDPx6fe6J8,10456
69
69
  nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py,sha256=IKQHlEwe0xsjr4MgQJVL0UtnKha1qaoPFc08DF5QzMM,14351
70
- nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py,sha256=ZFJkeJNbDM_GsedUlfk2B8kI93L_MNK6gxPgeryZM6I,21463
70
+ nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py,sha256=zExjTOJOQbPuxTYoa54tYoIhvRQQWSkPJRVp47vYY64,21434
71
+ nv_ingest/framework/orchestration/ray/util/pipeline/tools.py,sha256=LQVb8k9jURaxh2Ga44Js_XuYFCbeN4_nLgDmtExovQg,8026
71
72
  nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
72
73
  nv_ingest/framework/orchestration/ray/util/system_tools/memory.py,sha256=ICqY0LLB3hFTZk03iX5yffMSKFH2q_aQomtDVzS_mKw,2228
73
74
  nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py,sha256=2oHZdO_3L1LGuzpyNmZBDh19n0E-APAaHk4MEwBwSHs,12895
@@ -95,8 +96,8 @@ nv_ingest/framework/util/service/meta/ingest/__init__.py,sha256=wQSlVx3T14ZgQAt-
95
96
  nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py,sha256=QS3uNxWBl5dIcmIpJKNe8_TLcTUuN2vcKyHeAwa-eSo,1589
96
97
  nv_ingest/framework/util/telemetry/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
97
98
  nv_ingest/framework/util/telemetry/global_stats.py,sha256=nq65pEEdiwjAfGiqsxG1CeQMC96O3CfQxsZuGFCY-ds,4554
98
- nv_ingest-25.6.25.dev20250625.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
99
- nv_ingest-25.6.25.dev20250625.dist-info/METADATA,sha256=uYNf7IuKHG8WrBE5U18jPcdxxF0t6prLbhma8Q8uKvI,15140
100
- nv_ingest-25.6.25.dev20250625.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
101
- nv_ingest-25.6.25.dev20250625.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
102
- nv_ingest-25.6.25.dev20250625.dist-info/RECORD,,
99
+ nv_ingest-25.6.27.dev20250627.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
100
+ nv_ingest-25.6.27.dev20250627.dist-info/METADATA,sha256=XQl_fLAl_V1RSmIEoK9OF2_EBziXEQYAYiXhzfDEc5c,15140
101
+ nv_ingest-25.6.27.dev20250627.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
102
+ nv_ingest-25.6.27.dev20250627.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
103
+ nv_ingest-25.6.27.dev20250627.dist-info/RECORD,,