nv-ingest 2025.8.2.dev20250802__tar.gz → 2025.8.4.dev20250804__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest might be problematic. Click here for more details.
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/PKG-INFO +1 -1
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +161 -2
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +4 -4
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest.egg-info/PKG-INFO +1 -1
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/LICENSE +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/MANIFEST.in +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/main.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/v1/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/v1/health.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/v1/ingest.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/v1/metrics.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/service/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/version.py +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest.egg-info/SOURCES.txt +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest.egg-info/dependency_links.txt +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest.egg-info/requires.txt +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest.egg-info/top_level.txt +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/pyproject.toml +0 -0
- {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/setup.cfg +0 -0
|
@@ -9,6 +9,7 @@ import os
|
|
|
9
9
|
from typing import Dict, Any
|
|
10
10
|
|
|
11
11
|
import ray
|
|
12
|
+
from ray import LoggingConfig
|
|
12
13
|
from pydantic import BaseModel
|
|
13
14
|
|
|
14
15
|
from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import RayPipeline
|
|
@@ -47,16 +48,174 @@ def export_config_to_env(ingest_config: Any) -> None:
|
|
|
47
48
|
os.environ.update({key.upper(): val for key, val in ingest_config.items()})
|
|
48
49
|
|
|
49
50
|
|
|
51
|
+
def build_logging_config_from_env() -> LoggingConfig:
|
|
52
|
+
"""
|
|
53
|
+
Build Ray LoggingConfig from environment variables.
|
|
54
|
+
|
|
55
|
+
Package-level preset (sets all defaults):
|
|
56
|
+
- INGEST_RAY_LOG_LEVEL: PRODUCTION, DEVELOPMENT, DEBUG. Default: DEVELOPMENT
|
|
57
|
+
|
|
58
|
+
Individual environment variables (override preset defaults):
|
|
59
|
+
- RAY_LOGGING_LEVEL: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL). Default: INFO
|
|
60
|
+
- RAY_LOGGING_ENCODING: Log encoding format (TEXT, JSON). Default: TEXT
|
|
61
|
+
- RAY_LOGGING_ADDITIONAL_ATTRS: Comma-separated list of additional standard logger attributes
|
|
62
|
+
- RAY_DEDUP_LOGS: Enable/disable log deduplication (0/1). Default: 1 (enabled)
|
|
63
|
+
- RAY_LOG_TO_DRIVER: Enable/disable logging to driver (true/false). Default: true
|
|
64
|
+
- RAY_LOGGING_ROTATE_BYTES: Maximum log file size before rotation (bytes). Default: 1GB
|
|
65
|
+
- RAY_LOGGING_ROTATE_BACKUP_COUNT: Number of backup log files to keep. Default: 19
|
|
66
|
+
- RAY_DISABLE_IMPORT_WARNING: Disable Ray import warnings (0/1). Default: 0
|
|
67
|
+
- RAY_USAGE_STATS_ENABLED: Enable/disable usage stats collection (0/1). Default: 1
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
# Apply package-level preset defaults first
|
|
71
|
+
preset_level = os.environ.get("INGEST_RAY_LOG_LEVEL", "DEVELOPMENT").upper()
|
|
72
|
+
|
|
73
|
+
# Define preset configurations
|
|
74
|
+
presets = {
|
|
75
|
+
"PRODUCTION": {
|
|
76
|
+
"RAY_LOGGING_LEVEL": "ERROR",
|
|
77
|
+
"RAY_LOGGING_ENCODING": "TEXT",
|
|
78
|
+
"RAY_LOGGING_ADDITIONAL_ATTRS": "",
|
|
79
|
+
"RAY_DEDUP_LOGS": "1",
|
|
80
|
+
"RAY_LOG_TO_DRIVER": "0", # false
|
|
81
|
+
"RAY_LOGGING_ROTATE_BYTES": "1073741824", # 1GB
|
|
82
|
+
"RAY_LOGGING_ROTATE_BACKUP_COUNT": "9", # 10GB total
|
|
83
|
+
"RAY_DISABLE_IMPORT_WARNING": "1",
|
|
84
|
+
"RAY_USAGE_STATS_ENABLED": "0",
|
|
85
|
+
},
|
|
86
|
+
"DEVELOPMENT": {
|
|
87
|
+
"RAY_LOGGING_LEVEL": "INFO",
|
|
88
|
+
"RAY_LOGGING_ENCODING": "TEXT",
|
|
89
|
+
"RAY_LOGGING_ADDITIONAL_ATTRS": "",
|
|
90
|
+
"RAY_DEDUP_LOGS": "1",
|
|
91
|
+
"RAY_LOG_TO_DRIVER": "1", # true
|
|
92
|
+
"RAY_LOGGING_ROTATE_BYTES": "1073741824", # 1GB
|
|
93
|
+
"RAY_LOGGING_ROTATE_BACKUP_COUNT": "19", # 20GB total
|
|
94
|
+
"RAY_DISABLE_IMPORT_WARNING": "0",
|
|
95
|
+
"RAY_USAGE_STATS_ENABLED": "1",
|
|
96
|
+
},
|
|
97
|
+
"DEBUG": {
|
|
98
|
+
"RAY_LOGGING_LEVEL": "DEBUG",
|
|
99
|
+
"RAY_LOGGING_ENCODING": "JSON",
|
|
100
|
+
"RAY_LOGGING_ADDITIONAL_ATTRS": "name,funcName,lineno",
|
|
101
|
+
"RAY_DEDUP_LOGS": "0",
|
|
102
|
+
"RAY_LOG_TO_DRIVER": "1", # true
|
|
103
|
+
"RAY_LOGGING_ROTATE_BYTES": "536870912", # 512MB
|
|
104
|
+
"RAY_LOGGING_ROTATE_BACKUP_COUNT": "39", # 20GB total
|
|
105
|
+
"RAY_DISABLE_IMPORT_WARNING": "0",
|
|
106
|
+
"RAY_USAGE_STATS_ENABLED": "1",
|
|
107
|
+
},
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
# Validate preset level
|
|
111
|
+
if preset_level not in presets:
|
|
112
|
+
logger.warning(
|
|
113
|
+
f"Invalid INGEST_RAY_LOG_LEVEL '{preset_level}', using DEVELOPMENT. "
|
|
114
|
+
f"Valid presets: {list(presets.keys())}"
|
|
115
|
+
)
|
|
116
|
+
preset_level = "DEVELOPMENT"
|
|
117
|
+
|
|
118
|
+
# Apply preset defaults (only if env var not already set)
|
|
119
|
+
preset_config = presets[preset_level]
|
|
120
|
+
for key, default_value in preset_config.items():
|
|
121
|
+
if key not in os.environ:
|
|
122
|
+
os.environ[key] = default_value
|
|
123
|
+
|
|
124
|
+
logger.info(f"Applied Ray logging preset: {preset_level}")
|
|
125
|
+
|
|
126
|
+
# Get log level from environment, default to INFO
|
|
127
|
+
log_level = os.environ.get("RAY_LOGGING_LEVEL", "INFO").upper()
|
|
128
|
+
|
|
129
|
+
# Validate log level
|
|
130
|
+
valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
|
|
131
|
+
if log_level not in valid_levels:
|
|
132
|
+
logger.warning(f"Invalid RAY_LOGGING_LEVEL '{log_level}', using INFO. Valid levels: {valid_levels}")
|
|
133
|
+
log_level = "INFO"
|
|
134
|
+
|
|
135
|
+
# Get encoding format from environment, default to TEXT
|
|
136
|
+
encoding = os.environ.get("RAY_LOGGING_ENCODING", "TEXT").upper()
|
|
137
|
+
|
|
138
|
+
# Validate encoding
|
|
139
|
+
valid_encodings = ["TEXT", "JSON"]
|
|
140
|
+
if encoding not in valid_encodings:
|
|
141
|
+
logger.warning(f"Invalid RAY_LOGGING_ENCODING '{encoding}', using TEXT. Valid encodings: {valid_encodings}")
|
|
142
|
+
encoding = "TEXT"
|
|
143
|
+
|
|
144
|
+
# Get additional standard logger attributes
|
|
145
|
+
additional_attrs_str = os.environ.get("RAY_LOGGING_ADDITIONAL_ATTRS", "")
|
|
146
|
+
additional_log_standard_attrs = []
|
|
147
|
+
if additional_attrs_str:
|
|
148
|
+
additional_log_standard_attrs = [attr.strip() for attr in additional_attrs_str.split(",") if attr.strip()]
|
|
149
|
+
|
|
150
|
+
# Set log deduplication environment variable if specified
|
|
151
|
+
dedup_logs = os.environ.get("RAY_DEDUP_LOGS", "1")
|
|
152
|
+
if dedup_logs is not None:
|
|
153
|
+
os.environ["RAY_DEDUP_LOGS"] = str(dedup_logs)
|
|
154
|
+
|
|
155
|
+
# Set log to driver environment variable if specified
|
|
156
|
+
log_to_driver = os.environ.get("RAY_LOG_TO_DRIVER", "1")
|
|
157
|
+
if log_to_driver is not None:
|
|
158
|
+
os.environ["RAY_LOG_TO_DRIVER"] = str(log_to_driver).lower()
|
|
159
|
+
|
|
160
|
+
# Configure log rotation settings
|
|
161
|
+
rotate_bytes = os.environ.get("RAY_LOGGING_ROTATE_BYTES", "1073741824") # Default: 1GB per file
|
|
162
|
+
if rotate_bytes is not None:
|
|
163
|
+
try:
|
|
164
|
+
rotate_bytes_int = int(rotate_bytes)
|
|
165
|
+
os.environ["RAY_LOGGING_ROTATE_BYTES"] = str(rotate_bytes_int)
|
|
166
|
+
except ValueError:
|
|
167
|
+
logger.warning(f"Invalid RAY_LOGGING_ROTATE_BYTES '{rotate_bytes}', using default (1GB)")
|
|
168
|
+
os.environ["RAY_LOGGING_ROTATE_BYTES"] = "1073741824"
|
|
169
|
+
|
|
170
|
+
rotate_backup_count = os.environ.get("RAY_LOGGING_ROTATE_BACKUP_COUNT", "19") # Default: 19 backups (20GB Max)
|
|
171
|
+
if rotate_backup_count is not None:
|
|
172
|
+
try:
|
|
173
|
+
backup_count_int = int(rotate_backup_count)
|
|
174
|
+
os.environ["RAY_LOGGING_ROTATE_BACKUP_COUNT"] = str(backup_count_int)
|
|
175
|
+
except ValueError:
|
|
176
|
+
logger.warning(f"Invalid RAY_LOGGING_ROTATE_BACKUP_COUNT '{rotate_backup_count}', using default (19)")
|
|
177
|
+
os.environ["RAY_LOGGING_ROTATE_BACKUP_COUNT"] = "19"
|
|
178
|
+
|
|
179
|
+
# Configure Ray internal logging verbosity
|
|
180
|
+
disable_import_warning = os.environ.get("RAY_DISABLE_IMPORT_WARNING", "0")
|
|
181
|
+
if disable_import_warning is not None:
|
|
182
|
+
os.environ["RAY_DISABLE_IMPORT_WARNING"] = str(disable_import_warning)
|
|
183
|
+
|
|
184
|
+
# Configure usage stats collection
|
|
185
|
+
usage_stats_enabled = os.environ.get("RAY_USAGE_STATS_ENABLED", "1")
|
|
186
|
+
if usage_stats_enabled is not None:
|
|
187
|
+
os.environ["RAY_USAGE_STATS_ENABLED"] = str(usage_stats_enabled)
|
|
188
|
+
|
|
189
|
+
# Create LoggingConfig with validated parameters
|
|
190
|
+
logging_config = LoggingConfig(
|
|
191
|
+
encoding=encoding,
|
|
192
|
+
log_level=log_level,
|
|
193
|
+
additional_log_standard_attrs=additional_log_standard_attrs,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
logger.info(
|
|
197
|
+
f"Ray logging configured: preset={preset_level}, level={log_level}, encoding={encoding}, "
|
|
198
|
+
f"additional_attrs={additional_log_standard_attrs}, "
|
|
199
|
+
f"dedup_logs={os.environ.get('RAY_DEDUP_LOGS', '1')}, "
|
|
200
|
+
f"log_to_driver={os.environ.get('RAY_LOG_TO_DRIVER', 'true')}, "
|
|
201
|
+
f"rotate_bytes={os.environ.get('RAY_LOGGING_ROTATE_BYTES', '1073741824')}, "
|
|
202
|
+
f"rotate_backup_count={os.environ.get('RAY_LOGGING_ROTATE_BACKUP_COUNT', '19')}"
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
return logging_config
|
|
206
|
+
|
|
207
|
+
|
|
50
208
|
def setup_ingestion_pipeline(pipeline: RayPipeline, ingest_config: Dict[str, Any] = None):
|
|
51
209
|
# Initialize the pipeline with the configuration
|
|
52
210
|
if ingest_config:
|
|
53
211
|
# Export the config to environment variables
|
|
54
212
|
export_config_to_env(ingest_config)
|
|
55
213
|
|
|
56
|
-
|
|
214
|
+
_ = logging.getLogger().getEffectiveLevel()
|
|
215
|
+
logging_config = build_logging_config_from_env()
|
|
57
216
|
ray_context = ray.init(
|
|
58
217
|
namespace="nv_ingest_ray",
|
|
59
|
-
|
|
218
|
+
logging_config=logging_config,
|
|
60
219
|
ignore_reinit_error=True,
|
|
61
220
|
dashboard_host="0.0.0.0",
|
|
62
221
|
dashboard_port=8265,
|
|
@@ -242,7 +242,7 @@ def add_table_extractor_stage(pipeline, default_cpu_count, stage_name="table_ext
|
|
|
242
242
|
stage_actor=TableExtractorStage,
|
|
243
243
|
config=table_extractor_config,
|
|
244
244
|
min_replicas=0,
|
|
245
|
-
max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.20),
|
|
245
|
+
max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.20, replica_limit=4),
|
|
246
246
|
)
|
|
247
247
|
|
|
248
248
|
return stage_name
|
|
@@ -271,7 +271,7 @@ def add_chart_extractor_stage(pipeline, default_cpu_count, stage_name="chart_ext
|
|
|
271
271
|
stage_actor=ChartExtractorStage,
|
|
272
272
|
config=chart_extractor_config,
|
|
273
273
|
min_replicas=0,
|
|
274
|
-
max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.20),
|
|
274
|
+
max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.20, replica_limit=4),
|
|
275
275
|
)
|
|
276
276
|
|
|
277
277
|
return stage_name
|
|
@@ -417,7 +417,7 @@ def add_otel_tracer_stage(pipeline, default_cpu_count, stage_name="otel_tracer")
|
|
|
417
417
|
stage_actor=OpenTelemetryTracerStage,
|
|
418
418
|
config=otel_tracer_config,
|
|
419
419
|
min_replicas=0,
|
|
420
|
-
max_replicas=
|
|
420
|
+
max_replicas=1,
|
|
421
421
|
)
|
|
422
422
|
|
|
423
423
|
return stage_name
|
|
@@ -523,7 +523,7 @@ def add_text_embedding_stage(pipeline, default_cpu_count, stage_name="text_embed
|
|
|
523
523
|
stage_actor=TextEmbeddingTransformStage,
|
|
524
524
|
config=config,
|
|
525
525
|
min_replicas=0,
|
|
526
|
-
max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.07, replica_limit=
|
|
526
|
+
max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.07, replica_limit=4),
|
|
527
527
|
)
|
|
528
528
|
|
|
529
529
|
return stage_name
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/v1/__init__.py
RENAMED
|
File without changes
|
{nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/v1/health.py
RENAMED
|
File without changes
|
{nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/v1/ingest.py
RENAMED
|
File without changes
|
{nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/v1/metrics.py
RENAMED
|
File without changes
|
{nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest.egg-info/requires.txt
RENAMED
|
File without changes
|
{nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|