nv-ingest 2025.8.2.dev20250802__tar.gz → 2025.8.4.dev20250804__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

Files changed (109) hide show
  1. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/PKG-INFO +1 -1
  2. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +161 -2
  3. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +4 -4
  4. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest.egg-info/PKG-INFO +1 -1
  5. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/LICENSE +0 -0
  6. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/MANIFEST.in +0 -0
  7. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/__init__.py +0 -0
  8. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/__init__.py +0 -0
  9. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/main.py +0 -0
  10. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/v1/__init__.py +0 -0
  11. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/v1/health.py +0 -0
  12. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/v1/ingest.py +0 -0
  13. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/api/v1/metrics.py +0 -0
  14. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/__init__.py +0 -0
  15. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/__init__.py +0 -0
  16. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
  17. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
  18. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
  19. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
  20. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
  21. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
  22. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
  23. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
  24. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
  25. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
  26. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  27. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
  28. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
  29. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
  30. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
  31. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
  32. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
  33. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
  34. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
  35. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
  36. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
  37. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
  38. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
  39. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
  40. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
  41. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
  42. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
  43. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
  44. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
  45. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
  46. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
  47. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
  48. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
  49. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
  50. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
  51. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
  52. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
  53. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
  54. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
  55. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
  56. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
  57. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
  58. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
  59. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
  60. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
  61. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
  62. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
  63. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
  64. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
  65. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
  66. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
  67. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
  68. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
  69. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
  70. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
  71. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
  72. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
  73. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
  74. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
  75. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
  76. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
  77. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
  78. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
  79. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/__init__.py +0 -0
  80. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
  81. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
  82. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
  83. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
  84. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
  85. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
  86. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
  87. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
  88. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
  89. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
  90. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
  91. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/__init__.py +0 -0
  92. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
  93. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
  94. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/service/__init__.py +0 -0
  95. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
  96. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
  97. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
  98. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
  99. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
  100. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
  101. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
  102. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
  103. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest/version.py +0 -0
  104. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest.egg-info/SOURCES.txt +0 -0
  105. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest.egg-info/dependency_links.txt +0 -0
  106. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest.egg-info/requires.txt +0 -0
  107. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/nv_ingest.egg-info/top_level.txt +0 -0
  108. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/pyproject.toml +0 -0
  109. {nv_ingest-2025.8.2.dev20250802 → nv_ingest-2025.8.4.dev20250804}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.8.2.dev20250802
3
+ Version: 2025.8.4.dev20250804
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -9,6 +9,7 @@ import os
9
9
  from typing import Dict, Any
10
10
 
11
11
  import ray
12
+ from ray import LoggingConfig
12
13
  from pydantic import BaseModel
13
14
 
14
15
  from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import RayPipeline
@@ -47,16 +48,174 @@ def export_config_to_env(ingest_config: Any) -> None:
47
48
  os.environ.update({key.upper(): val for key, val in ingest_config.items()})
48
49
 
49
50
 
51
+ def build_logging_config_from_env() -> LoggingConfig:
52
+ """
53
+ Build Ray LoggingConfig from environment variables.
54
+
55
+ Package-level preset (sets all defaults):
56
+ - INGEST_RAY_LOG_LEVEL: PRODUCTION, DEVELOPMENT, DEBUG. Default: DEVELOPMENT
57
+
58
+ Individual environment variables (override preset defaults):
59
+ - RAY_LOGGING_LEVEL: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL). Default: INFO
60
+ - RAY_LOGGING_ENCODING: Log encoding format (TEXT, JSON). Default: TEXT
61
+ - RAY_LOGGING_ADDITIONAL_ATTRS: Comma-separated list of additional standard logger attributes
62
+ - RAY_DEDUP_LOGS: Enable/disable log deduplication (0/1). Default: 1 (enabled)
63
+ - RAY_LOG_TO_DRIVER: Enable/disable logging to driver (true/false). Default: true
64
+ - RAY_LOGGING_ROTATE_BYTES: Maximum log file size before rotation (bytes). Default: 1GB
65
+ - RAY_LOGGING_ROTATE_BACKUP_COUNT: Number of backup log files to keep. Default: 19
66
+ - RAY_DISABLE_IMPORT_WARNING: Disable Ray import warnings (0/1). Default: 0
67
+ - RAY_USAGE_STATS_ENABLED: Enable/disable usage stats collection (0/1). Default: 1
68
+ """
69
+
70
+ # Apply package-level preset defaults first
71
+ preset_level = os.environ.get("INGEST_RAY_LOG_LEVEL", "DEVELOPMENT").upper()
72
+
73
+ # Define preset configurations
74
+ presets = {
75
+ "PRODUCTION": {
76
+ "RAY_LOGGING_LEVEL": "ERROR",
77
+ "RAY_LOGGING_ENCODING": "TEXT",
78
+ "RAY_LOGGING_ADDITIONAL_ATTRS": "",
79
+ "RAY_DEDUP_LOGS": "1",
80
+ "RAY_LOG_TO_DRIVER": "0", # false
81
+ "RAY_LOGGING_ROTATE_BYTES": "1073741824", # 1GB
82
+ "RAY_LOGGING_ROTATE_BACKUP_COUNT": "9", # 10GB total
83
+ "RAY_DISABLE_IMPORT_WARNING": "1",
84
+ "RAY_USAGE_STATS_ENABLED": "0",
85
+ },
86
+ "DEVELOPMENT": {
87
+ "RAY_LOGGING_LEVEL": "INFO",
88
+ "RAY_LOGGING_ENCODING": "TEXT",
89
+ "RAY_LOGGING_ADDITIONAL_ATTRS": "",
90
+ "RAY_DEDUP_LOGS": "1",
91
+ "RAY_LOG_TO_DRIVER": "1", # true
92
+ "RAY_LOGGING_ROTATE_BYTES": "1073741824", # 1GB
93
+ "RAY_LOGGING_ROTATE_BACKUP_COUNT": "19", # 20GB total
94
+ "RAY_DISABLE_IMPORT_WARNING": "0",
95
+ "RAY_USAGE_STATS_ENABLED": "1",
96
+ },
97
+ "DEBUG": {
98
+ "RAY_LOGGING_LEVEL": "DEBUG",
99
+ "RAY_LOGGING_ENCODING": "JSON",
100
+ "RAY_LOGGING_ADDITIONAL_ATTRS": "name,funcName,lineno",
101
+ "RAY_DEDUP_LOGS": "0",
102
+ "RAY_LOG_TO_DRIVER": "1", # true
103
+ "RAY_LOGGING_ROTATE_BYTES": "536870912", # 512MB
104
+ "RAY_LOGGING_ROTATE_BACKUP_COUNT": "39", # 20GB total
105
+ "RAY_DISABLE_IMPORT_WARNING": "0",
106
+ "RAY_USAGE_STATS_ENABLED": "1",
107
+ },
108
+ }
109
+
110
+ # Validate preset level
111
+ if preset_level not in presets:
112
+ logger.warning(
113
+ f"Invalid INGEST_RAY_LOG_LEVEL '{preset_level}', using DEVELOPMENT. "
114
+ f"Valid presets: {list(presets.keys())}"
115
+ )
116
+ preset_level = "DEVELOPMENT"
117
+
118
+ # Apply preset defaults (only if env var not already set)
119
+ preset_config = presets[preset_level]
120
+ for key, default_value in preset_config.items():
121
+ if key not in os.environ:
122
+ os.environ[key] = default_value
123
+
124
+ logger.info(f"Applied Ray logging preset: {preset_level}")
125
+
126
+ # Get log level from environment, default to INFO
127
+ log_level = os.environ.get("RAY_LOGGING_LEVEL", "INFO").upper()
128
+
129
+ # Validate log level
130
+ valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
131
+ if log_level not in valid_levels:
132
+ logger.warning(f"Invalid RAY_LOGGING_LEVEL '{log_level}', using INFO. Valid levels: {valid_levels}")
133
+ log_level = "INFO"
134
+
135
+ # Get encoding format from environment, default to TEXT
136
+ encoding = os.environ.get("RAY_LOGGING_ENCODING", "TEXT").upper()
137
+
138
+ # Validate encoding
139
+ valid_encodings = ["TEXT", "JSON"]
140
+ if encoding not in valid_encodings:
141
+ logger.warning(f"Invalid RAY_LOGGING_ENCODING '{encoding}', using TEXT. Valid encodings: {valid_encodings}")
142
+ encoding = "TEXT"
143
+
144
+ # Get additional standard logger attributes
145
+ additional_attrs_str = os.environ.get("RAY_LOGGING_ADDITIONAL_ATTRS", "")
146
+ additional_log_standard_attrs = []
147
+ if additional_attrs_str:
148
+ additional_log_standard_attrs = [attr.strip() for attr in additional_attrs_str.split(",") if attr.strip()]
149
+
150
+ # Set log deduplication environment variable if specified
151
+ dedup_logs = os.environ.get("RAY_DEDUP_LOGS", "1")
152
+ if dedup_logs is not None:
153
+ os.environ["RAY_DEDUP_LOGS"] = str(dedup_logs)
154
+
155
+ # Set log to driver environment variable if specified
156
+ log_to_driver = os.environ.get("RAY_LOG_TO_DRIVER", "1")
157
+ if log_to_driver is not None:
158
+ os.environ["RAY_LOG_TO_DRIVER"] = str(log_to_driver).lower()
159
+
160
+ # Configure log rotation settings
161
+ rotate_bytes = os.environ.get("RAY_LOGGING_ROTATE_BYTES", "1073741824") # Default: 1GB per file
162
+ if rotate_bytes is not None:
163
+ try:
164
+ rotate_bytes_int = int(rotate_bytes)
165
+ os.environ["RAY_LOGGING_ROTATE_BYTES"] = str(rotate_bytes_int)
166
+ except ValueError:
167
+ logger.warning(f"Invalid RAY_LOGGING_ROTATE_BYTES '{rotate_bytes}', using default (1GB)")
168
+ os.environ["RAY_LOGGING_ROTATE_BYTES"] = "1073741824"
169
+
170
+ rotate_backup_count = os.environ.get("RAY_LOGGING_ROTATE_BACKUP_COUNT", "19") # Default: 19 backups (20GB Max)
171
+ if rotate_backup_count is not None:
172
+ try:
173
+ backup_count_int = int(rotate_backup_count)
174
+ os.environ["RAY_LOGGING_ROTATE_BACKUP_COUNT"] = str(backup_count_int)
175
+ except ValueError:
176
+ logger.warning(f"Invalid RAY_LOGGING_ROTATE_BACKUP_COUNT '{rotate_backup_count}', using default (19)")
177
+ os.environ["RAY_LOGGING_ROTATE_BACKUP_COUNT"] = "19"
178
+
179
+ # Configure Ray internal logging verbosity
180
+ disable_import_warning = os.environ.get("RAY_DISABLE_IMPORT_WARNING", "0")
181
+ if disable_import_warning is not None:
182
+ os.environ["RAY_DISABLE_IMPORT_WARNING"] = str(disable_import_warning)
183
+
184
+ # Configure usage stats collection
185
+ usage_stats_enabled = os.environ.get("RAY_USAGE_STATS_ENABLED", "1")
186
+ if usage_stats_enabled is not None:
187
+ os.environ["RAY_USAGE_STATS_ENABLED"] = str(usage_stats_enabled)
188
+
189
+ # Create LoggingConfig with validated parameters
190
+ logging_config = LoggingConfig(
191
+ encoding=encoding,
192
+ log_level=log_level,
193
+ additional_log_standard_attrs=additional_log_standard_attrs,
194
+ )
195
+
196
+ logger.info(
197
+ f"Ray logging configured: preset={preset_level}, level={log_level}, encoding={encoding}, "
198
+ f"additional_attrs={additional_log_standard_attrs}, "
199
+ f"dedup_logs={os.environ.get('RAY_DEDUP_LOGS', '1')}, "
200
+ f"log_to_driver={os.environ.get('RAY_LOG_TO_DRIVER', 'true')}, "
201
+ f"rotate_bytes={os.environ.get('RAY_LOGGING_ROTATE_BYTES', '1073741824')}, "
202
+ f"rotate_backup_count={os.environ.get('RAY_LOGGING_ROTATE_BACKUP_COUNT', '19')}"
203
+ )
204
+
205
+ return logging_config
206
+
207
+
50
208
  def setup_ingestion_pipeline(pipeline: RayPipeline, ingest_config: Dict[str, Any] = None):
51
209
  # Initialize the pipeline with the configuration
52
210
  if ingest_config:
53
211
  # Export the config to environment variables
54
212
  export_config_to_env(ingest_config)
55
213
 
56
- current_level = logging.getLogger().getEffectiveLevel()
214
+ _ = logging.getLogger().getEffectiveLevel()
215
+ logging_config = build_logging_config_from_env()
57
216
  ray_context = ray.init(
58
217
  namespace="nv_ingest_ray",
59
- logging_level=current_level,
218
+ logging_config=logging_config,
60
219
  ignore_reinit_error=True,
61
220
  dashboard_host="0.0.0.0",
62
221
  dashboard_port=8265,
@@ -242,7 +242,7 @@ def add_table_extractor_stage(pipeline, default_cpu_count, stage_name="table_ext
242
242
  stage_actor=TableExtractorStage,
243
243
  config=table_extractor_config,
244
244
  min_replicas=0,
245
- max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.20),
245
+ max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.20, replica_limit=4),
246
246
  )
247
247
 
248
248
  return stage_name
@@ -271,7 +271,7 @@ def add_chart_extractor_stage(pipeline, default_cpu_count, stage_name="chart_ext
271
271
  stage_actor=ChartExtractorStage,
272
272
  config=chart_extractor_config,
273
273
  min_replicas=0,
274
- max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.20),
274
+ max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.20, replica_limit=4),
275
275
  )
276
276
 
277
277
  return stage_name
@@ -417,7 +417,7 @@ def add_otel_tracer_stage(pipeline, default_cpu_count, stage_name="otel_tracer")
417
417
  stage_actor=OpenTelemetryTracerStage,
418
418
  config=otel_tracer_config,
419
419
  min_replicas=0,
420
- max_replicas=2,
420
+ max_replicas=1,
421
421
  )
422
422
 
423
423
  return stage_name
@@ -523,7 +523,7 @@ def add_text_embedding_stage(pipeline, default_cpu_count, stage_name="text_embed
523
523
  stage_actor=TextEmbeddingTransformStage,
524
524
  config=config,
525
525
  min_replicas=0,
526
- max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.07, replica_limit=6),
526
+ max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.07, replica_limit=4),
527
527
  )
528
528
 
529
529
  return stage_name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.8.2.dev20250802
3
+ Version: 2025.8.4.dev20250804
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License