nv-ingest 2025.8.4.dev20250804__py3-none-any.whl → 2025.12.10.dev20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. nv_ingest/api/__init__.py +6 -0
  2. nv_ingest/api/main.py +2 -0
  3. nv_ingest/api/tracing.py +82 -0
  4. nv_ingest/api/v2/README.md +203 -0
  5. nv_ingest/api/v2/__init__.py +3 -0
  6. nv_ingest/api/v2/ingest.py +1300 -0
  7. nv_ingest/framework/orchestration/execution/__init__.py +3 -0
  8. nv_ingest/framework/orchestration/execution/helpers.py +85 -0
  9. nv_ingest/framework/orchestration/execution/options.py +112 -0
  10. nv_ingest/framework/orchestration/process/__init__.py +3 -0
  11. nv_ingest/framework/orchestration/process/dependent_services.py +84 -0
  12. nv_ingest/framework/orchestration/process/execution.py +495 -0
  13. nv_ingest/framework/orchestration/process/lifecycle.py +214 -0
  14. nv_ingest/framework/orchestration/process/strategies.py +218 -0
  15. nv_ingest/framework/orchestration/process/termination.py +147 -0
  16. nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +3 -3
  17. nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +4 -4
  18. nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +32 -38
  19. nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +5 -5
  20. nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +10 -7
  21. nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +17 -14
  22. nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +11 -6
  23. nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +10 -5
  24. nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +12 -7
  25. nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +22 -10
  26. nv_ingest/framework/orchestration/ray/stages/extractors/ocr_extractor.py +71 -0
  27. nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +19 -15
  28. nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +10 -5
  29. nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +16 -14
  30. nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +16 -13
  31. nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +3 -0
  32. nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +3 -3
  33. nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +92 -4
  34. nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +12 -8
  35. nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +12 -9
  36. nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +4 -4
  37. nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +5 -2
  38. nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +116 -69
  39. nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +79 -11
  40. nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +10 -5
  41. nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +8 -4
  42. nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +17 -7
  43. nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +12 -6
  44. nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +17 -18
  45. nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +21 -14
  46. nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +11 -3
  47. nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +1 -2
  48. nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +33 -326
  49. nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +13 -3
  50. nv_ingest/framework/util/flow_control/udf_intercept.py +352 -0
  51. nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +215 -11
  52. nv_ingest/pipeline/__init__.py +3 -0
  53. nv_ingest/pipeline/config/__init__.py +3 -0
  54. nv_ingest/pipeline/config/loaders.py +229 -0
  55. nv_ingest/pipeline/config/replica_resolver.py +237 -0
  56. nv_ingest/pipeline/default_libmode_pipeline_impl.py +528 -0
  57. nv_ingest/pipeline/default_pipeline_impl.py +557 -0
  58. nv_ingest/pipeline/ingest_pipeline.py +389 -0
  59. nv_ingest/pipeline/pipeline_schema.py +398 -0
  60. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/METADATA +6 -3
  61. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/RECORD +64 -43
  62. nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +0 -359
  63. nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +0 -649
  64. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/WHEEL +0 -0
  65. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/licenses/LICENSE +0 -0
  66. {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,398 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from enum import Enum
6
+ from typing import Dict, Any, List, Optional, Set, Union
7
+ from pydantic import BaseModel, Field, field_validator, model_validator, ConfigDict
8
+
9
+ from nv_ingest_api.internal.enums.common import PipelinePhase
10
+
11
+
12
+ class StageType(str, Enum):
13
+ """
14
+ The type of a pipeline stage.
15
+ """
16
+
17
+ SOURCE = "source"
18
+ STAGE = "stage"
19
+ SINK = "sink"
20
+
21
+
22
+ class ReplicaCalculationStrategy(str, Enum):
23
+ """
24
+ Strategy for calculating replica counts at runtime.
25
+ """
26
+
27
+ STATIC = "static" # Fixed number of replicas
28
+ CPU_PERCENTAGE = "cpu_percentage" # Percentage of available CPU cores
29
+ MEMORY_THRESHOLDING = "memory_thresholding" # Based on memory allocation per replica
30
+ MEMORY_STATIC_GLOBAL_PERCENT = "memory_static_global_percent" # Memory-safe calculation with budget limits
31
+
32
+
33
+ class ReplicaStrategyConfig(BaseModel):
34
+ """
35
+ Configuration for a specific replica calculation strategy.
36
+
37
+ Attributes
38
+ ----------
39
+ strategy : ReplicaCalculationStrategy
40
+ The calculation strategy to use.
41
+ value : Optional[Union[int, float]]
42
+ The primary value for the strategy (e.g., static count, CPU percentage).
43
+ limit : Optional[int]
44
+ Optional upper limit for calculated replicas.
45
+ cpu_percent : Optional[float]
46
+ CPU percentage for CPU_PERCENTAGE strategy (0.0 to 1.0).
47
+ memory_per_replica_mb : Optional[int]
48
+ Expected memory usage per replica in MB.
49
+ memory_threshold_percent : Optional[float]
50
+ Memory threshold percentage for MEMORY_THRESHOLDING strategy (0.0 to 1.0).
51
+ max_memory_budget_mb : Optional[int]
52
+ Maximum memory budget for MEMORY_STATIC_GLOBAL_PERCENT strategy in MB.
53
+ """
54
+
55
+ strategy: ReplicaCalculationStrategy = Field(..., description="The calculation strategy to use.")
56
+ value: Optional[Union[int, float]] = Field(None, description="Primary value for the strategy.")
57
+ limit: Optional[int] = Field(None, description="Optional upper limit for calculated replicas.", ge=1)
58
+ cpu_percent: Optional[float] = Field(
59
+ None, description="CPU percentage for CPU_PERCENTAGE strategy.", ge=0.0, le=1.0
60
+ )
61
+ memory_per_replica_mb: Optional[int] = Field(None, description="Expected memory usage per replica in MB.", gt=0)
62
+ memory_threshold_percent: Optional[float] = Field(
63
+ None, description="Memory threshold percentage for MEMORY_THRESHOLDING strategy.", ge=0.0, le=1.0
64
+ )
65
+ max_memory_budget_mb: Optional[int] = Field(
66
+ None, description="Maximum memory budget for MEMORY_STATIC_GLOBAL_PERCENT strategy in MB.", gt=0
67
+ )
68
+
69
+ @model_validator(mode="after")
70
+ def validate_strategy_config(self):
71
+ """Validate that required fields are present for each strategy."""
72
+ if self.strategy == ReplicaCalculationStrategy.STATIC:
73
+ if self.value is None or not isinstance(self.value, int):
74
+ raise ValueError("STATIC strategy requires 'value' as an integer")
75
+ elif self.strategy == ReplicaCalculationStrategy.CPU_PERCENTAGE:
76
+ if self.cpu_percent is None:
77
+ if self.value is None or not isinstance(self.value, (int, float)):
78
+ raise ValueError("CPU_PERCENTAGE strategy requires 'cpu_percent' or 'value' as a float")
79
+ self.cpu_percent = float(self.value)
80
+ elif self.strategy == ReplicaCalculationStrategy.MEMORY_THRESHOLDING:
81
+ if self.memory_per_replica_mb is None:
82
+ raise ValueError("MEMORY_THRESHOLDING strategy requires 'memory_per_replica_mb'")
83
+ elif self.strategy == ReplicaCalculationStrategy.MEMORY_STATIC_GLOBAL_PERCENT:
84
+ if self.memory_per_replica_mb is None:
85
+ raise ValueError("MEMORY_STATIC_GLOBAL_PERCENT strategy requires 'memory_per_replica_mb'")
86
+ # max_memory_budget_mb is optional - uses global static_memory_threshold if not provided
87
+ return self
88
+
89
+
90
+ class ReplicaConfig(BaseModel):
91
+ """
92
+ Configuration for stage replicas supporting both dynamic and static scaling modes.
93
+
94
+ Defines the min/max number of replicas for a stage, either as absolute counts,
95
+ percentages of total CPU cores, or resource-based calculations. Supports different
96
+ configurations for dynamic vs static scaling modes.
97
+
98
+ Attributes
99
+ ----------
100
+ cpu_count_min : Optional[int]
101
+ Absolute minimum number of replicas. Must be >= 0. (Legacy support)
102
+ cpu_count_max : Optional[int]
103
+ Absolute maximum number of replicas. Must be >= 1. (Legacy support)
104
+ cpu_percent_min : Optional[float]
105
+ Minimum number of replicas as a percentage (0.0 to 1.0) of total cores. (Legacy support)
106
+ cpu_percent_max : Optional[float]
107
+ Maximum number of replicas as a percentage (0.0 to 1.0) of total cores. (Legacy support)
108
+ min_replicas : Optional[int]
109
+ Minimum number of replicas for both scaling modes. Must be >= 0.
110
+ max_replicas : Optional[Union[int, ReplicaStrategyConfig]]
111
+ Maximum replicas for dynamic scaling mode. Can be static int or strategy config.
112
+ static_replicas : Optional[Union[int, ReplicaStrategyConfig]]
113
+ Replica configuration for static scaling mode. Can be static int or strategy config.
114
+ """
115
+
116
+ # Legacy fields for backward compatibility
117
+ cpu_count_min: Optional[int] = Field(None, description="Absolute minimum number of replicas.", ge=0)
118
+ cpu_count_max: Optional[int] = Field(None, description="Absolute maximum number of replicas.", ge=1)
119
+ cpu_percent_min: Optional[float] = Field(
120
+ None, description="Minimum number of replicas as a percentage of total cores.", ge=0.0, le=1.0
121
+ )
122
+ cpu_percent_max: Optional[float] = Field(
123
+ None, description="Maximum number of replicas as a percentage of total cores.", ge=0.0, le=1.0
124
+ )
125
+
126
+ # New flexible replica configuration
127
+ min_replicas: Optional[int] = Field(None, description="Minimum number of replicas.", ge=0)
128
+ max_replicas: Optional[Union[int, ReplicaStrategyConfig]] = Field(
129
+ None, description="Maximum replicas for dynamic scaling mode."
130
+ )
131
+ static_replicas: Optional[Union[int, ReplicaStrategyConfig]] = Field(
132
+ None, description="Replica configuration for static scaling mode."
133
+ )
134
+
135
+ @model_validator(mode="after")
136
+ def check_exclusive_min_max(self) -> "ReplicaConfig":
137
+ """
138
+ Validates that replica configuration is consistent and complete.
139
+
140
+ Ensures that:
141
+ 1. Legacy fields (cpu_count_*, cpu_percent_*) are not mixed with new fields
142
+ 2. At least one configuration method is specified
143
+ 3. Min/max relationships are valid
144
+ """
145
+ legacy_fields = [self.cpu_count_min, self.cpu_count_max, self.cpu_percent_min, self.cpu_percent_max]
146
+ new_fields = [self.min_replicas, self.max_replicas, self.static_replicas]
147
+
148
+ has_legacy = any(field is not None for field in legacy_fields)
149
+ has_new = any(field is not None for field in new_fields)
150
+
151
+ if has_legacy and has_new:
152
+ raise ValueError(
153
+ "Cannot mix legacy replica fields (cpu_count_*, cpu_percent_*) with new fields "
154
+ "(min_replicas, max_replicas, static_replicas). Use one approach or the other."
155
+ )
156
+
157
+ if not has_legacy and not has_new:
158
+ # Set sensible defaults for new configuration
159
+ self.min_replicas = 0
160
+ self.max_replicas = 1
161
+
162
+ # Legacy validation (existing logic)
163
+ if has_legacy:
164
+ if self.cpu_count_min is not None and self.cpu_percent_min is not None:
165
+ raise ValueError("Cannot specify both cpu_count_min and cpu_percent_min")
166
+ if self.cpu_count_max is not None and self.cpu_percent_max is not None:
167
+ raise ValueError("Cannot specify both cpu_count_max and cpu_percent_max")
168
+
169
+ # Validate min <= max for legacy fields
170
+ if self.cpu_count_min is not None and self.cpu_count_max is not None:
171
+ if self.cpu_count_min > self.cpu_count_max:
172
+ raise ValueError("cpu_count_min cannot be greater than cpu_count_max")
173
+ if self.cpu_percent_min is not None and self.cpu_percent_max is not None:
174
+ if self.cpu_percent_min > self.cpu_percent_max:
175
+ raise ValueError("cpu_percent_min cannot be greater than cpu_percent_max")
176
+
177
+ # New configuration validation
178
+ if has_new:
179
+ # Validate min_replicas against max_replicas if both are static integers
180
+ if (
181
+ self.min_replicas is not None
182
+ and isinstance(self.max_replicas, int)
183
+ and self.min_replicas > self.max_replicas
184
+ ):
185
+ raise ValueError("min_replicas cannot be greater than max_replicas")
186
+
187
+ # Validate min_replicas against static_replicas if both are static integers
188
+ if (
189
+ self.min_replicas is not None
190
+ and isinstance(self.static_replicas, int)
191
+ and self.min_replicas > self.static_replicas
192
+ ):
193
+ raise ValueError("min_replicas cannot be greater than static_replicas")
194
+
195
+ return self
196
+
197
+ model_config = ConfigDict(extra="forbid")
198
+
199
+
200
+ class StageConfig(BaseModel):
201
+ """
202
+ Configuration for a single pipeline stage.
203
+
204
+ Describes a single component in the ingestion pipeline, including its name,
205
+ type, actor implementation, and specific configuration.
206
+
207
+ Attributes
208
+ ----------
209
+ name : str
210
+ A unique name to identify the stage within the pipeline.
211
+ type : StageType
212
+ The type of the stage, which determines how it's added to the RayPipeline.
213
+ phase: PipelinePhase
214
+ The logical phase of the stage in the pipeline.
215
+ actor : Optional[str]
216
+ The fully qualified import path to the actor class or function that
217
+ implements the stage's logic. Mutually exclusive with 'callable'.
218
+ callable : Optional[str]
219
+ The fully qualified import path to a callable function that
220
+ implements the stage's logic. Mutually exclusive with 'actor'.
221
+ task_filters: Optional[List[Any]]
222
+ List of task types this callable stage should filter for. Only applies to callable stages.
223
+ Supports both simple strings (e.g., "udf") and complex filters (e.g., ["udf", {"phase": 5}]).
224
+ enabled : bool
225
+ A flag to indicate whether the stage should be included in the pipeline.
226
+ If False, the stage and its connected edges are ignored.
227
+ config : Dict[str, Any]
228
+ A dictionary of configuration parameters passed to the stage's actor.
229
+ replicas : ReplicaConfig
230
+ The replica configuration for the stage.
231
+ runs_after: List[str]
232
+ A list of stage names that this stage must be downstream of.
233
+ """
234
+
235
+ name: str = Field(..., description="Unique name for the stage.")
236
+ type: StageType = Field(StageType.STAGE, description="Type of the stage.")
237
+ phase: PipelinePhase = Field(..., description="The logical phase of the stage.")
238
+ actor: Optional[str] = Field(None, description="Full import path to the stage's actor class or function.")
239
+ callable: Optional[str] = Field(None, description="Full import path to a callable function for the stage.")
240
+ task_filters: Optional[List[Any]] = Field(
241
+ None, description="List of task types this callable stage should filter for. Only applies to callable stages."
242
+ )
243
+ enabled: bool = Field(True, description="Whether the stage is enabled.")
244
+ config: Dict[str, Any] = Field({}, description="Configuration dictionary for the stage.")
245
+ replicas: ReplicaConfig = Field(default_factory=ReplicaConfig, description="Replica configuration.")
246
+ runs_after: List[str] = Field(default_factory=list, description="List of stages this stage must run after.")
247
+
248
+ @model_validator(mode="after")
249
+ def check_actor_or_callable(self) -> "StageConfig":
250
+ """
251
+ Validates that exactly one of 'actor' or 'callable' is specified.
252
+ """
253
+ if self.actor is None and self.callable is None:
254
+ raise ValueError("Either 'actor' or 'callable' must be specified")
255
+
256
+ if self.actor is not None and self.callable is not None:
257
+ raise ValueError("Cannot specify both 'actor' and 'callable' - they are mutually exclusive")
258
+
259
+ return self
260
+
261
+ model_config = ConfigDict(extra="forbid")
262
+
263
+
264
+ class EdgeConfig(BaseModel):
265
+ """
266
+ Configuration for an edge between two stages.
267
+
268
+ Defines a connection from a source stage to a destination stage, including
269
+ the size of the intermediate queue.
270
+
271
+ Attributes
272
+ ----------
273
+ from_stage : str
274
+ The name of the source stage for the edge.
275
+ to_stage : str
276
+ The name of the destination stage for the edge.
277
+ queue_size : int
278
+ The maximum number of items in the queue between the two stages.
279
+ """
280
+
281
+ from_stage: str = Field(..., alias="from", description="The name of the source stage.")
282
+ to_stage: str = Field(..., alias="to", description="The name of the destination stage.")
283
+ queue_size: int = Field(100, gt=0, description="The size of the queue between stages.")
284
+
285
+ model_config = ConfigDict(extra="forbid")
286
+
287
+
288
+ class PIDControllerConfig(BaseModel):
289
+ """
290
+ Configuration for the PID controller used in dynamic scaling.
291
+
292
+ Attributes
293
+ ----------
294
+ kp : float
295
+ Proportional gain for the PID controller.
296
+ ki : float
297
+ Integral gain for the PID controller.
298
+ ema_alpha : float
299
+ Exponential moving average alpha for the PID controller.
300
+ target_queue_depth : int
301
+ Target queue depth for the PID controller.
302
+ penalty_factor : float
303
+ Penalty factor for the PID controller.
304
+ error_boost_factor : float
305
+ Error boost factor for the PID controller.
306
+ rcm_memory_safety_buffer_fraction : float
307
+ Resource constraint manager memory safety buffer fraction.
308
+ """
309
+
310
+ kp: float = Field(0.2, gt=0.0, description="Proportional gain for the PID controller.")
311
+ ki: float = Field(0.01, ge=0.0, description="Integral gain for the PID controller.")
312
+ ema_alpha: float = Field(
313
+ 0.1, ge=0.0, le=1.0, description="Exponential moving average alpha for the PID controller."
314
+ )
315
+ target_queue_depth: int = Field(0, ge=0, description="Target queue depth for the PID controller.")
316
+ penalty_factor: float = Field(0.1, ge=0.0, description="Penalty factor for the PID controller.")
317
+ error_boost_factor: float = Field(1.5, gt=0.0, description="Error boost factor for the PID controller.")
318
+ rcm_memory_safety_buffer_fraction: float = Field(
319
+ 0.15, ge=0.0, le=1.0, description="Resource constraint manager memory safety buffer fraction."
320
+ )
321
+
322
+ model_config = ConfigDict(extra="forbid")
323
+
324
+
325
+ class PipelineRuntimeConfig(BaseModel):
326
+ """
327
+ Configuration for pipeline runtime behavior.
328
+
329
+ Parameters
330
+ ----------
331
+ disable_dynamic_scaling : bool
332
+ Whether to disable dynamic scaling of replicas (default: False).
333
+ dynamic_memory_threshold : float
334
+ The memory utilization threshold (0.0 to 1.0) for dynamic scaling decisions.
335
+ static_memory_threshold : float
336
+ Global memory threshold for static scaling mode (default: 0.75).
337
+ pid_controller : PIDControllerConfig
338
+ PID controller configuration for dynamic scaling.
339
+ launch_simple_broker : bool
340
+ If True, launches a simple message broker for the pipeline.
341
+ """
342
+
343
+ disable_dynamic_scaling: bool = Field(False, description="Disable dynamic scaling of stage replicas.")
344
+ dynamic_memory_threshold: float = Field(
345
+ 0.75, ge=0.0, le=0.95, description="Memory utilization threshold for dynamic scaling."
346
+ )
347
+ static_memory_threshold: float = Field(
348
+ 0.75, ge=0.0, le=1.0, description="Global memory threshold for static scaling mode."
349
+ )
350
+ pid_controller: PIDControllerConfig = Field(
351
+ default_factory=PIDControllerConfig, description="PID controller configuration for dynamic scaling."
352
+ )
353
+ launch_simple_broker: bool = Field(False, description="Launch a simple message broker for the pipeline.")
354
+
355
+ model_config = ConfigDict(extra="forbid")
356
+
357
+
358
+ class PipelineConfigSchema(BaseModel):
359
+ """
360
+ Root configuration model for an ingestion pipeline.
361
+
362
+ This model represents the entire declarative configuration for an ingestion
363
+ pipeline, including all stages and the edges that connect them.
364
+
365
+ Attributes
366
+ ----------
367
+ name : str
368
+ The name of the pipeline.
369
+ description : str
370
+ A description of the pipeline.
371
+ stages : List[StageConfig]
372
+ A list of all stage configurations in the pipeline.
373
+ edges : List[EdgeConfig]
374
+ A list of all edge configurations that define the pipeline's topology.
375
+ pipeline: Optional[PipelineRuntimeConfig] = Field(default_factory=PipelineRuntimeConfig,
376
+ description="Runtime configuration for the pipeline.")
377
+ """
378
+
379
+ name: str = Field(..., description="The name of the pipeline.")
380
+ description: str = Field(..., description="A description of the pipeline.")
381
+ stages: List[StageConfig] = Field(..., description="List of all stages in the pipeline.")
382
+ edges: List[EdgeConfig] = Field(..., description="List of all edges connecting the stages.")
383
+ pipeline: Optional[PipelineRuntimeConfig] = Field(
384
+ default_factory=PipelineRuntimeConfig, description="Runtime configuration for the pipeline."
385
+ )
386
+
387
+ @field_validator("stages", "edges")
388
+ def check_not_empty(cls, v: list) -> list:
389
+ """Validates that the list is not empty."""
390
+ if not v:
391
+ raise ValueError("must not be empty")
392
+ return v
393
+
394
+ def get_phases(self) -> Set[PipelinePhase]:
395
+ """Returns a set of all unique phases in the pipeline."""
396
+ return {stage.phase for stage in self.stages}
397
+
398
+ model_config = ConfigDict(extra="forbid")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.8.4.dev20250804
3
+ Version: 2025.12.10.dev20251210
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -219,16 +219,19 @@ Requires-Dist: diskcache>=5.6.3
219
219
  Requires-Dist: fastapi>=0.115.6
220
220
  Requires-Dist: fastparquet>=2024.11.0
221
221
  Requires-Dist: fsspec>=2024.10.0
222
+ Requires-Dist: universal_pathlib>=0.2.6
223
+ Requires-Dist: s3fs>=2024.10.0
222
224
  Requires-Dist: gunicorn
223
225
  Requires-Dist: h11>=0.16.0
224
226
  Requires-Dist: httpx>=0.28.1
225
227
  Requires-Dist: isodate>=0.7.2
226
228
  Requires-Dist: langdetect>=1.0.9
227
229
  Requires-Dist: minio>=7.2.12
228
- Requires-Dist: openai>=1.82.0
230
+ Requires-Dist: librosa>=0.10.2
229
231
  Requires-Dist: opentelemetry-api>=1.27.0
230
232
  Requires-Dist: opentelemetry-exporter-otlp>=1.27.0
231
233
  Requires-Dist: opentelemetry-sdk>=1.27.0
234
+ Requires-Dist: psutil>=7.1.0
232
235
  Requires-Dist: pydantic>2.0.0
233
236
  Requires-Dist: pydantic-settings>2.0.0
234
237
  Requires-Dist: pypdfium2==4.30.0
@@ -240,7 +243,7 @@ Requires-Dist: python-docx>=1.1.2
240
243
  Requires-Dist: python-dotenv>=1.0.1
241
244
  Requires-Dist: python-pptx>=1.0.2
242
245
  Requires-Dist: prometheus-client
243
- Requires-Dist: ray[all]>=2.37.0
246
+ Requires-Dist: ray[all]>=2.49.0
244
247
  Requires-Dist: redis>=5.2.1
245
248
  Requires-Dist: requests>=2.28.2
246
249
  Requires-Dist: scikit-learn>=1.6.0
@@ -1,75 +1,87 @@
1
1
  nv_ingest/__init__.py,sha256=vJLPeuxiIHqbxXPJSu9qe3MS-GPavbOUExyRq83DxxM,895
2
2
  nv_ingest/version.py,sha256=MG7DxlzpnoJI56vqxwzs9WeMAEI3uPhfDiNLs6GN6wI,986
3
- nv_ingest/api/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
4
- nv_ingest/api/main.py,sha256=XE-p4lJp1E7CCDOB8ENtYFrf63Dtq2bzQiGxpRfL2LA,1603
3
+ nv_ingest/api/__init__.py,sha256=ED07QUqwVyJalH0ahhnnjvc2W_in6TpZZ5nJ6NWU9-Y,271
4
+ nv_ingest/api/main.py,sha256=uCCkUNLS1xE9TDYKDOdxEfo_9jQWumpQAPWrxj5m9Go,1706
5
+ nv_ingest/api/tracing.py,sha256=NkqMuUiB6ixGU5MYp3TrODsZDQepJ1kbH8JFHsYjuE0,2940
5
6
  nv_ingest/api/v1/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
6
7
  nv_ingest/api/v1/health.py,sha256=pV-RoVq5y0iBPp0qZoLzd1xKpd0JiHAi0UMyMj99LqU,4740
7
8
  nv_ingest/api/v1/ingest.py,sha256=LWk3LN4lBd3uO8h30EN42g3LHCVcO00avVd5ohVK7NI,19392
8
9
  nv_ingest/api/v1/metrics.py,sha256=ZGVRApYLnzc2f2C7wRgGd7deqiXan-jxfA-33a16clY,981
10
+ nv_ingest/api/v2/README.md,sha256=VhpdjEmCyr3qIOhwqISFx9C5WezJFcxYc-NB9S98HMg,7562
11
+ nv_ingest/api/v2/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
12
+ nv_ingest/api/v2/ingest.py,sha256=vjjb2xOOtlTVoTMc4rNdUI6yKYdEeR-umA_pwP_Rt64,53103
9
13
  nv_ingest/framework/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
10
14
  nv_ingest/framework/orchestration/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
15
+ nv_ingest/framework/orchestration/execution/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
16
+ nv_ingest/framework/orchestration/execution/helpers.py,sha256=-F8SZh7ISWtzJz6X1O2LQ133t-17Jxi8lL-NHz4rwj0,2818
17
+ nv_ingest/framework/orchestration/execution/options.py,sha256=Ms1t4591EIv4ZrMRdhsCYPgLnMVXJosG3MURCbPXUoA,3983
18
+ nv_ingest/framework/orchestration/process/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
19
+ nv_ingest/framework/orchestration/process/dependent_services.py,sha256=s0j_rsFtCKHFIuvOkBe9NEAkPNPhSYse_ApeHka8gyg,3032
20
+ nv_ingest/framework/orchestration/process/execution.py,sha256=P1kzpYV23e4QYrKw9Td1TCZK3CK1ENVqqnI_axRCqBk,19814
21
+ nv_ingest/framework/orchestration/process/lifecycle.py,sha256=L5NDwnzSMQPGjqJDC8jC75L1YqWey-dtK8N_HgBzb0E,8001
22
+ nv_ingest/framework/orchestration/process/strategies.py,sha256=Q1Q04PPseF775omeS0FoXfK187NiS_bbqTaaJRwzKn8,7972
23
+ nv_ingest/framework/orchestration/process/termination.py,sha256=PAogFeW0FATFS6Mcp_UkZgq_SbWV18RtdZN-0NbComw,5042
11
24
  nv_ingest/framework/orchestration/ray/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
12
25
  nv_ingest/framework/orchestration/ray/edges/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
13
26
  nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py,sha256=PQliU_kyGbO9o42njpb8FrDMLrbLqwZzmBNXifxyG5Y,2312
14
27
  nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py,sha256=VFii2yxJuikimOxie3edKq5JN06g78AF8bdHSHVX8p8,2677
15
28
  nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py,sha256=N6NH4KgZJ60e_JkGRcSmfQtX37qtX4TMcavOR-n3heE,2549
16
29
  nv_ingest/framework/orchestration/ray/examples/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
17
- nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py,sha256=DufjmNm-05uTkq_Mz0QQB6fHw_Rl9eX3PRtnH4sntGs,16405
30
+ nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py,sha256=Bn4rjkO14BwvvUNG_HBCSVXetYk7DKqRRsYHJADWqjc,16455
18
31
  nv_ingest/framework/orchestration/ray/examples/task_source_harness.py,sha256=Yt7uxThg7s8WuMiaHLKC8r1XAG7QixegfkT-juE5oNw,1953
19
32
  nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py,sha256=XkvsoIzH5ftXvAZ4ox7mxbx7ESVx6D8Xupcwbqgd52w,3277
20
33
  nv_ingest/framework/orchestration/ray/primitives/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
21
34
  nv_ingest/framework/orchestration/ray/primitives/dataclasses.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
35
  nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py,sha256=L8ENPiF-lxqhIXVEQwQD5CCqQMb710ynj5D_Y4ixGhs,11077
23
- nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py,sha256=IxLQYHYc9BnMOi73TSJzuhl8KOJAwlwwp8SPdUkV3nE,15737
24
- nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py,sha256=HGJ_TyLTKKRl10HWfyx3D-n-zrFY0Fg9TN74UbOeCm8,66584
25
- nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py,sha256=8SpZzulHatqah7U3YHJMTLaYyPlWdCoaer_oNjhmHZo,17221
36
+ nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py,sha256=yd2sb_q_FbBLDWiYgdKRhpPVAIl26Wg8w3yelZ7c5zQ,15741
37
+ nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py,sha256=t9lf6zTjl_P5pe8mW-5F5pbZoC5mlcvEn2RCaLn_3Tk,66521
38
+ nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py,sha256=GGY6_i6_g5xTFzdo9Qmsu9i4knMTq6pJfgm-aaPEt_o,17226
26
39
  nv_ingest/framework/orchestration/ray/stages/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
27
40
  nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
28
- nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py,sha256=KV4hvY0NTGG8CjZviTgcFLQzaH8WJJGkkb9PFYbROww,3417
29
- nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py,sha256=tydluNNXfZYSo-0eqqafB59icF3SaeLXWcMrZ6OzlyQ,3998
30
- nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py,sha256=tSa3Z4vK6sYJ6RBNMa7_FiuOwUaDUl0rTJ6agGbI5y0,3426
31
- nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py,sha256=fyr0oXokhuaGQrNu5rKyH_qNMD12AS1xPDxKgA26YHE,3426
32
- nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py,sha256=c-qlLGSizLOgKqH7wl_c8dGOVKYxLtXhZEHLXil4Jc4,3734
33
- nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py,sha256=dmgvzGMxVX81g7TpZO1ACnRh7sdtpc7YX5KK2QW26U4,2565
34
- nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py,sha256=BUVuYOCGyPdPpacVhL5rnvA56hydnBip7tPaWTXaT1c,4650
35
- nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py,sha256=ywPGA-3GNsbp3FWFsu04foumM6ZCccRrm73ijS7oY0g,3581
36
- nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py,sha256=EOcjyJYAB3TuXewZFld4shnGQUQ9VysjPrIWnmb8zuI,3893
41
+ nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py,sha256=UVp_kDmkaBlfO0Mbl_IxKq6imzLvs4-DKHgUHJIh3mo,3629
42
+ nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py,sha256=rfaDx6PqRCguhSYkJI6iVmMMtAlJNxzKfUrLmw_fKqs,4381
43
+ nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py,sha256=R4vshPcAUN2U6BIv8BCZQ862wLx8RJhCGXfpQ3K09Bs,3627
44
+ nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py,sha256=7JrZSVIrK4_wr2s7TOTss7pgTY2F9GPQ7Ze3F_WFlKU,3642
45
+ nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py,sha256=iY9fEfucfgCmO2ixX6qwn418J97nJz_FQGh7B6yziVo,3980
46
+ nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py,sha256=v5J7dnJBEaDfjoTz_N_yC3RAt6lwMLgLT28V-ahquLE,3261
47
+ nv_ingest/framework/orchestration/ray/stages/extractors/ocr_extractor.py,sha256=pwVoA5-CF9GVWusoFZOMGBvSyW5udD9bdxVJXA_SghE,3188
48
+ nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py,sha256=QagIA99AsHLihjRbXm-2BphdoQGHwzOHlqLyz7oDOSk,4992
49
+ nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py,sha256=RMbbl7Cuj4BT-TcgUx_0k8R-DLdw-o3fHxcIBIgrWt4,3776
50
+ nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py,sha256=p71ktv6v5T-9npYpCbgbwW6-fS-65UWS7rCm8OWr2Bc,4170
37
51
  nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
38
- nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py,sha256=K8jase7PD9kd8AuntzjdS1IO4ae8Oo_6byZsFG777D0,6838
52
+ nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py,sha256=gTPRFOoUGcwNrmPeqv4n5VmU-LBkha9QTYHO0ntiaIk,7116
39
53
  nv_ingest/framework/orchestration/ray/stages/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
40
54
  nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py,sha256=LnVqBJmpfCmcI-eJLbkwK-7SS-hpEp98P4iCRv_Zhb0,1726
41
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py,sha256=AhlZUbDK2Jckqnu8hVbJrckW8MsSixfmWc1bst9gRYk,3447
42
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py,sha256=1Pae2xRPK0_QLh53yHECVFm2guwgvZaiRRr3tp4OpYI,1744
43
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py,sha256=pvBFsURWoDiAmDWNTLv2pdm5slv-1OnuXxwYvgaKumU,25703
55
+ nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py,sha256=HQJXIuU7VjiQ6fQjHjbNNmIJX5f30cXFB0CJGixgwVo,3633
56
+ nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py,sha256=hP25MLTP2bOEEncrYdxPPqeRyRVbij8aEurR1F1ZmhE,1811
57
+ nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py,sha256=qiB_ZU5_3bXgvE9C2rvnXIS0Alm6M5PWLCeQm8ZxOy4,29812
44
58
  nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
45
- nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py,sha256=UepeDvH6Cfgm5rIylRx6uOxihS0OZ4Q1DGUrjUybNaY,3493
46
- nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py,sha256=9ek5rVa4_GVdmVHGMJvbxacRSpIqVoUxgv28lzJwrTQ,3319
59
+ nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py,sha256=cPLG3ZEqhZkRiSsbL7cbF1zsvOAimd8K5O-qadUR9Mg,3709
60
+ nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py,sha256=f1CS8x9uifY1FJ_1lUF0fNNMExvM4zBIF012gxnSpqU,3523
47
61
  nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
48
- nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py,sha256=0SQHJlFuXlP16YRWduX1fMKgjhUd7UhDAWQ8XZh4_0I,1471
49
- nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py,sha256=enylryvcPmzirpOjCahqYJbNSLsNvv1KpMnOzGqNZQQ,11509
62
+ nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py,sha256=_USW1Vq8G2Wn-QFdPfFQCrtKG46hHeJvkEGbBxdpbVM,1488
63
+ nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py,sha256=QcvMQXIJ7EWIxty76Mo5Xv38Oj6X2KuS8qXQlf7E1uA,11676
50
64
  nv_ingest/framework/orchestration/ray/stages/sources/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
51
- nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py,sha256=b9ndnQBB1paR0iRe3NdzQ7BZ2S65LG2jbtjXvvDc_s4,21183
65
+ nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py,sha256=LrqaWpWyuiAHlpXWKYSyHZJBFegGXfNlpCXrucbK5NM,24067
52
66
  nv_ingest/framework/orchestration/ray/stages/storage/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
53
- nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py,sha256=6NkwQzseAnaj0Ptpr3oKvab2EnJdMwTjI2p4dS_HzsI,3901
54
- nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py,sha256=SMLHQElZkKldnjy0_VHIKS65DBAAtOhwhdoaFe1yb9I,3337
67
+ nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py,sha256=f1iA7rjYFA1G1EXqFM6URUi_QRql1Y1OrnMPKONsSqo,6907
68
+ nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py,sha256=EUtwhSDf-qGLVEhWEInr1VaLsvpcHUSyzCmHQVai-Ps,3547
55
69
  nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
56
- nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py,sha256=a6-GKP9w02DsMsZk6Fi-MLTRCW2MI7dnO1N2JCQFUIo,2656
70
+ nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py,sha256=jEtEUibqs6IS6QakrzWY9zmxSUzuBpg_hzXy2R-I10Y,2870
57
71
  nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
58
- nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py,sha256=53M8Xw84gcuRAeHbf4Z_ApLFX9Zkd1KSec_2k-wXl6c,7947
72
+ nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py,sha256=GqFwRpTlIu2qgb08B3OqVIKUG8QQAwSOHhjvnyL_9UE,8553
59
73
  nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
60
- nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py,sha256=OuqPmJmCqbg9k7roDivuvfYVTd05Nl9PMC0_E9PHgYw,3514
61
- nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py,sha256=cERKHTMkfOLGpibApCpGC_8URoIi-wn-Ttv-4b2XpJ0,3556
62
- nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py,sha256=hyTIv3MMnuyZmgbBHbXicSgk9DhbpygDrgUtXRwvlmo,4677
74
+ nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py,sha256=GYF41y_teXMIzB24JQXuesVmvksmgNvTGYU3uU4TzbM,3742
75
+ nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py,sha256=o6QPd3GCPxbAvQFeb1oByOP5LII-FVkKbmAkBjCioB0,3435
76
+ nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py,sha256=FAScWaZijrJHc5F5jgulHP_cdX2telS4pq3spwliFXw,4963
63
77
  nv_ingest/framework/orchestration/ray/stages/utility/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
64
- nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py,sha256=MB27CkoNeuirN6CUHgjsC5Wh958NF7m_N7HE4VKfx3k,2264
78
+ nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py,sha256=J7Qs745rt7cQlR3L2K9U6Mb_BDKUNpl_xoqCZMEPlks,2674
65
79
  nv_ingest/framework/orchestration/ray/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
66
80
  nv_ingest/framework/orchestration/ray/util/env_config.py,sha256=GN9msJ_3jdOBIAPnXNxX0ds_BKtHRnRhnYxwzcAU2KY,2386
67
81
  nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
68
- nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py,sha256=flRLS7yc5n6gheykayuL3prC7O-ZhcVY2s9Wc14SGWE,47377
69
- nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py,sha256=cZhx3jamrwyU3v_LYwNlsGa9AuGQeI9aiEKbijuKa4o,17558
70
- nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py,sha256=-lXiAjxpk3CxIFWi7GW8ggTn1cjyugYZ9EtSfTT20mg,14844
71
- nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py,sha256=CD5pAa8oQJVplCr2snzZXJyRjSclcIPGw8aFPI2dU1k,22246
72
- nv_ingest/framework/orchestration/ray/util/pipeline/tools.py,sha256=LQVb8k9jURaxh2Ga44Js_XuYFCbeN4_nLgDmtExovQg,8026
82
+ nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py,sha256=0dSDVTv3FXjMZ79sQh4i4YEwnqND5iPw8GAeZI0oJO4,47338
83
+ nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py,sha256=zWi-6-7dfb_3R00uVi3wdYMH1HgeevkBkg47UY8QqUQ,4386
84
+ nv_ingest/framework/orchestration/ray/util/pipeline/tools.py,sha256=MzxLjElEVb6C5ghfJ7GCp8uqNZeVuzz8xJnxzdQmOsI,8425
73
85
  nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
74
86
  nv_ingest/framework/orchestration/ray/util/system_tools/memory.py,sha256=ICqY0LLB3hFTZk03iX5yffMSKFH2q_aQomtDVzS_mKw,2228
75
87
  nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py,sha256=2oHZdO_3L1LGuzpyNmZBDh19n0E-APAaHk4MEwBwSHs,12895
@@ -88,17 +100,26 @@ nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py,sha256=gcd27IM2R2Y
88
100
  nv_ingest/framework/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
89
101
  nv_ingest/framework/util/flow_control/__init__.py,sha256=blU7s6_dxTG2gHo6YadXslT0J-gkdl6dcCADZgpnjGc,215
90
102
  nv_ingest/framework/util/flow_control/filter_by_task.py,sha256=EMGoNNPiDbfVDGzXfGAxoxllqhFPqRkh9VqCidxyibI,10893
103
+ nv_ingest/framework/util/flow_control/udf_intercept.py,sha256=zQ9uuCcHLEd0P52EiwQB9hY-90X1HashVeyHanZJ3sg,14452
91
104
  nv_ingest/framework/util/service/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
92
105
  nv_ingest/framework/util/service/impl/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
93
106
  nv_ingest/framework/util/service/impl/ingest/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
94
- nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py,sha256=KbzQFo7qVbCITiKYVPcGN0x4NI8piJy70Dz-8jf59Xs,15415
107
+ nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py,sha256=59P-BMWnFY37GJm5w23-TMxgLhiZGZpJogC0gjDBaTA,23835
95
108
  nv_ingest/framework/util/service/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
96
109
  nv_ingest/framework/util/service/meta/ingest/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
97
110
  nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py,sha256=QS3uNxWBl5dIcmIpJKNe8_TLcTUuN2vcKyHeAwa-eSo,1589
98
111
  nv_ingest/framework/util/telemetry/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
99
112
  nv_ingest/framework/util/telemetry/global_stats.py,sha256=nq65pEEdiwjAfGiqsxG1CeQMC96O3CfQxsZuGFCY-ds,4554
100
- nv_ingest-2025.8.4.dev20250804.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
101
- nv_ingest-2025.8.4.dev20250804.dist-info/METADATA,sha256=GEnoU2W_jK4q9Bo551K0HRHWQEyYyaqdtMPQt0lxdCA,15060
102
- nv_ingest-2025.8.4.dev20250804.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
103
- nv_ingest-2025.8.4.dev20250804.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
104
- nv_ingest-2025.8.4.dev20250804.dist-info/RECORD,,
113
+ nv_ingest/pipeline/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
114
+ nv_ingest/pipeline/default_libmode_pipeline_impl.py,sha256=M31VN1xVTdoiNdjaSSPKEZr-yKhXDSwQ1hAVIkpJZLw,16232
115
+ nv_ingest/pipeline/default_pipeline_impl.py,sha256=TW9N9UcgsBL5SG1pxuSdgBIyFpBORskbHCmvJBmIIuw,16770
116
+ nv_ingest/pipeline/ingest_pipeline.py,sha256=wHAJhqAM2s8nbY-8itVogmSU-yVN4PZONGWcKnhzgfg,17794
117
+ nv_ingest/pipeline/pipeline_schema.py,sha256=rLZZz2It2o2hVNWrZUJU8CarrqRei1fho3ZEMkkoBcg,17940
118
+ nv_ingest/pipeline/config/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
119
+ nv_ingest/pipeline/config/loaders.py,sha256=75Yr9WYO7j7ghvKTnYLfZXQZEH3J3VEZo5J4TunC_Us,7590
120
+ nv_ingest/pipeline/config/replica_resolver.py,sha256=dEwqMXNttfw0QeisTGGkp24785jqzVCDAEFyQIffeGc,9369
121
+ nv_ingest-2025.12.10.dev20251210.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
122
+ nv_ingest-2025.12.10.dev20251210.dist-info/METADATA,sha256=4wQaqrQjyq98-3vTXm-gQsgDmgzyrv8RGC0hsCN7jSs,15163
123
+ nv_ingest-2025.12.10.dev20251210.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
124
+ nv_ingest-2025.12.10.dev20251210.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
125
+ nv_ingest-2025.12.10.dev20251210.dist-info/RECORD,,