nv-ingest 2025.8.20.dev20250820__tar.gz → 2025.8.22.dev20250822__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest might be problematic. Click here for more details.
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/PKG-INFO +1 -1
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +2 -1
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +5 -2
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +2 -1
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +2 -1
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +2 -1
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +5 -2
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +2 -1
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +2 -1
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +2 -1
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +2 -1
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +5 -1
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +2 -1
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +5 -1
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +5 -1
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +4 -3
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest.egg-info/PKG-INFO +1 -1
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/LICENSE +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/MANIFEST.in +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/api/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/api/main.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/api/v1/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/api/v1/health.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/api/v1/ingest.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/api/v1/metrics.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/execution/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/execution/helpers.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/execution/options.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/process/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/process/dependent_services.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/process/execution.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/process/lifecycle.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/process/strategies.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/process/termination.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/schemas/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/util/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/util/flow_control/udf_intercept.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/util/service/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/pipeline/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/pipeline/config/__init__.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/pipeline/config/loaders.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/pipeline/config/replica_resolver.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/pipeline/default_libmode_pipeline_impl.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/pipeline/default_pipeline_impl.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/pipeline/ingest_pipeline.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/pipeline/pipeline_schema.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/version.py +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest.egg-info/SOURCES.txt +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest.egg-info/dependency_links.txt +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest.egg-info/requires.txt +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest.egg-info/top_level.txt +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/pyproject.toml +0 -0
- {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/setup.cfg +0 -0
|
@@ -17,6 +17,7 @@ from nv_ingest_api.internal.schemas.extract.extract_audio_schema import AudioExt
|
|
|
17
17
|
from nv_ingest_api.util.exception_handlers.decorators import (
|
|
18
18
|
nv_ingest_node_failure_try_except,
|
|
19
19
|
)
|
|
20
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
20
21
|
|
|
21
22
|
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
22
23
|
|
|
@@ -69,7 +70,7 @@ class AudioExtractorStage(RayActorStage):
|
|
|
69
70
|
|
|
70
71
|
# Remove the "audio_data_extract" task from the message to obtain task-specific configuration.
|
|
71
72
|
task_config = remove_task_by_type(control_message, "extract")
|
|
72
|
-
self._logger.debug("Extracted task config: %s", task_config)
|
|
73
|
+
self._logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
|
|
73
74
|
|
|
74
75
|
# Perform audio text extraction.
|
|
75
76
|
new_df, extraction_info = extract_text_from_audio_internal(
|
|
@@ -13,8 +13,11 @@ from nv_ingest.framework.util.flow_control import filter_by_task
|
|
|
13
13
|
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
14
14
|
from nv_ingest_api.internal.primitives.tracing.tagging import traceable
|
|
15
15
|
from nv_ingest_api.internal.schemas.extract.extract_chart_schema import ChartExtractorSchema
|
|
16
|
-
from nv_ingest_api.util.exception_handlers.decorators import nv_ingest_node_failure_try_except
|
|
17
16
|
from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
|
|
17
|
+
from nv_ingest_api.util.exception_handlers.decorators import (
|
|
18
|
+
nv_ingest_node_failure_try_except,
|
|
19
|
+
)
|
|
20
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
18
21
|
|
|
19
22
|
logger = logging.getLogger(__name__)
|
|
20
23
|
|
|
@@ -66,7 +69,7 @@ class ChartExtractorStage(RayActorStage):
|
|
|
66
69
|
|
|
67
70
|
# Remove the "chart_data_extract" task to obtain task-specific configuration.
|
|
68
71
|
task_config = remove_task_by_type(control_message, "chart_data_extract")
|
|
69
|
-
logger.debug("ChartExtractorStage: Task config extracted: %s", task_config)
|
|
72
|
+
logger.debug("ChartExtractorStage: Task config extracted: %s", sanitize_for_logging(task_config))
|
|
70
73
|
|
|
71
74
|
# Perform chart data extraction.
|
|
72
75
|
execution_trace_log = {}
|
|
@@ -16,6 +16,7 @@ from nv_ingest_api.internal.schemas.extract.extract_docx_schema import DocxExtra
|
|
|
16
16
|
from nv_ingest_api.util.exception_handlers.decorators import (
|
|
17
17
|
nv_ingest_node_failure_try_except,
|
|
18
18
|
)
|
|
19
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
19
20
|
|
|
20
21
|
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
21
22
|
|
|
@@ -68,7 +69,7 @@ class DocxExtractorStage(RayActorStage):
|
|
|
68
69
|
|
|
69
70
|
# Remove the "docx-extract" task from the message to obtain task-specific configuration.
|
|
70
71
|
task_config = remove_task_by_type(control_message, "extract")
|
|
71
|
-
self._logger.debug("Extracted task config: %s", task_config)
|
|
72
|
+
self._logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
|
|
72
73
|
|
|
73
74
|
# Perform DOCX content extraction.
|
|
74
75
|
new_df, extraction_info = extract_primitives_from_docx_internal(
|
|
@@ -17,6 +17,7 @@ from nv_ingest_api.internal.schemas.extract.extract_html_schema import HtmlExtra
|
|
|
17
17
|
from nv_ingest_api.util.exception_handlers.decorators import (
|
|
18
18
|
nv_ingest_node_failure_try_except,
|
|
19
19
|
)
|
|
20
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
20
21
|
|
|
21
22
|
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
22
23
|
|
|
@@ -69,7 +70,7 @@ class HtmlExtractorStage(RayActorStage):
|
|
|
69
70
|
|
|
70
71
|
# Remove the "html_content_extract" task from the message to obtain task-specific configuration.
|
|
71
72
|
task_config = remove_task_by_type(control_message, "extract")
|
|
72
|
-
self._logger.debug("Extracted task config: %s", task_config)
|
|
73
|
+
self._logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
|
|
73
74
|
|
|
74
75
|
# Perform html content extraction.
|
|
75
76
|
new_df, extraction_info = extract_markdown_from_html_internal(
|
|
@@ -16,6 +16,7 @@ from nv_ingest_api.internal.schemas.extract.extract_image_schema import ImageExt
|
|
|
16
16
|
from nv_ingest_api.util.exception_handlers.decorators import (
|
|
17
17
|
nv_ingest_node_failure_try_except,
|
|
18
18
|
)
|
|
19
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
19
20
|
|
|
20
21
|
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
21
22
|
|
|
@@ -68,7 +69,7 @@ class ImageExtractorStage(RayActorStage):
|
|
|
68
69
|
|
|
69
70
|
# Remove the "extract" task from the message to obtain task-specific configuration.
|
|
70
71
|
task_config = remove_task_by_type(control_message, "extract")
|
|
71
|
-
logger.debug("Extracted task config: %s", task_config)
|
|
72
|
+
logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
|
|
72
73
|
|
|
73
74
|
# Perform image primitives extraction.
|
|
74
75
|
new_df, extraction_info = extract_primitives_from_image_internal(
|
|
@@ -15,7 +15,10 @@ from nv_ingest_api.internal.primitives.tracing.tagging import set_trace_timestam
|
|
|
15
15
|
from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
|
|
16
16
|
from nv_ingest.framework.util.flow_control import filter_by_task
|
|
17
17
|
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
18
|
-
from nv_ingest_api.util.exception_handlers.decorators import
|
|
18
|
+
from nv_ingest_api.util.exception_handlers.decorators import (
|
|
19
|
+
nv_ingest_node_failure_try_except,
|
|
20
|
+
)
|
|
21
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
19
22
|
|
|
20
23
|
logger = logging.getLogger(__name__)
|
|
21
24
|
|
|
@@ -87,7 +90,7 @@ class PDFExtractorStage(RayActorStage):
|
|
|
87
90
|
|
|
88
91
|
# Remove the "extract" task from the message to obtain task-specific configuration.
|
|
89
92
|
task_config = remove_task_by_type(control_message, "extract")
|
|
90
|
-
logger.debug("Extracted task config: %s", task_config)
|
|
93
|
+
logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
|
|
91
94
|
|
|
92
95
|
# Perform PDF extraction.
|
|
93
96
|
execution_trace_log = {}
|
|
@@ -16,6 +16,7 @@ from nv_ingest_api.internal.schemas.extract.extract_table_schema import TableExt
|
|
|
16
16
|
from nv_ingest_api.util.exception_handlers.decorators import (
|
|
17
17
|
nv_ingest_node_failure_try_except,
|
|
18
18
|
)
|
|
19
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
19
20
|
|
|
20
21
|
logger = logging.getLogger(__name__)
|
|
21
22
|
|
|
@@ -65,7 +66,7 @@ class TableExtractorStage(RayActorStage):
|
|
|
65
66
|
|
|
66
67
|
# Remove the "table_data_extract" task to obtain task-specific configuration.
|
|
67
68
|
task_config = remove_task_by_type(control_message, "table_data_extract")
|
|
68
|
-
logger.debug("Extracted task configuration: %s", task_config)
|
|
69
|
+
logger.debug("Extracted task configuration: %s", sanitize_for_logging(task_config))
|
|
69
70
|
|
|
70
71
|
# Perform table data extraction.
|
|
71
72
|
execution_trace_log = {}
|
|
@@ -25,6 +25,7 @@ from nv_ingest_api.util.exception_handlers.decorators import (
|
|
|
25
25
|
nv_ingest_node_failure_try_except,
|
|
26
26
|
)
|
|
27
27
|
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
28
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
28
29
|
|
|
29
30
|
logger = logging.getLogger(__name__)
|
|
30
31
|
|
|
@@ -42,7 +43,7 @@ class MetadataInjectionStage(RayActorStage):
|
|
|
42
43
|
# Call the base initializer to set attributes like self._running.
|
|
43
44
|
super().__init__(config, stage_name=stage_name)
|
|
44
45
|
# Additional initialization can be added here if necessary.
|
|
45
|
-
self._logger.debug("MetadataInjectionStage initialized with config: %s", config)
|
|
46
|
+
self._logger.debug("MetadataInjectionStage initialized with config: %s", sanitize_for_logging(config))
|
|
46
47
|
|
|
47
48
|
@nv_ingest_node_failure_try_except()
|
|
48
49
|
@traceable()
|
|
@@ -18,6 +18,7 @@ from nv_ingest_api.internal.schemas.mutate.mutate_image_dedup_schema import Imag
|
|
|
18
18
|
from nv_ingest_api.util.exception_handlers.decorators import (
|
|
19
19
|
nv_ingest_node_failure_try_except,
|
|
20
20
|
)
|
|
21
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
21
22
|
|
|
22
23
|
logger = logging.getLogger(__name__)
|
|
23
24
|
|
|
@@ -68,7 +69,7 @@ class ImageDedupStage(RayActorStage):
|
|
|
68
69
|
|
|
69
70
|
# Remove the "dedup" task from the message to obtain task-specific configuration.
|
|
70
71
|
task_config = remove_task_by_type(control_message, "dedup")
|
|
71
|
-
logger.debug("Extracted task config: %s", task_config)
|
|
72
|
+
logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
|
|
72
73
|
|
|
73
74
|
# Perform image deduplication.
|
|
74
75
|
new_df = deduplicate_images_internal(
|
|
@@ -17,6 +17,7 @@ from nv_ingest_api.internal.schemas.transform.transform_image_filter_schema impo
|
|
|
17
17
|
from nv_ingest_api.util.exception_handlers.decorators import (
|
|
18
18
|
nv_ingest_node_failure_try_except,
|
|
19
19
|
)
|
|
20
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
20
21
|
|
|
21
22
|
logger = logging.getLogger(__name__)
|
|
22
23
|
|
|
@@ -67,7 +68,7 @@ class ImageFilterStage(RayActorStage):
|
|
|
67
68
|
|
|
68
69
|
# Remove the "filter" task from the message to obtain task-specific configuration.
|
|
69
70
|
task_config = remove_task_by_type(control_message, "filter")
|
|
70
|
-
logger.debug("Extracted task config: %s", task_config)
|
|
71
|
+
logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
|
|
71
72
|
|
|
72
73
|
task_params: Dict[str, Any] = task_config.get("params", {})
|
|
73
74
|
|
|
@@ -29,6 +29,7 @@ from nv_ingest_api.internal.schemas.meta.ingest_job_schema import validate_inges
|
|
|
29
29
|
# Import clients
|
|
30
30
|
from nv_ingest_api.util.message_brokers.simple_message_broker.simple_client import SimpleClient
|
|
31
31
|
from nv_ingest_api.util.service_clients.redis.redis_client import RedisClient
|
|
32
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
32
33
|
|
|
33
34
|
logger = logging.getLogger(__name__)
|
|
34
35
|
|
|
@@ -104,8 +105,11 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
|
|
|
104
105
|
def __init__(self, config: MessageBrokerTaskSourceConfig, stage_name: Optional[str] = None) -> None:
|
|
105
106
|
super().__init__(config, log_to_stdout=False, stage_name=stage_name)
|
|
106
107
|
self.config: MessageBrokerTaskSourceConfig # Add a type hint for self.config
|
|
108
|
+
|
|
109
|
+
# Sanitize config before logging to avoid leaking secrets
|
|
110
|
+
_sanitized = sanitize_for_logging(config)
|
|
107
111
|
self._logger.debug(
|
|
108
|
-
"Initializing MessageBrokerTaskSourceStage with config: %s",
|
|
112
|
+
"Initializing MessageBrokerTaskSourceStage with config: %s", _sanitized
|
|
109
113
|
) # Log validated config
|
|
110
114
|
|
|
111
115
|
# Access validated configuration directly via self.config
|
|
@@ -16,6 +16,7 @@ from nv_ingest_api.internal.store.embed_text_upload import store_text_embeddings
|
|
|
16
16
|
from nv_ingest_api.util.exception_handlers.decorators import (
|
|
17
17
|
nv_ingest_node_failure_try_except,
|
|
18
18
|
)
|
|
19
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
19
20
|
|
|
20
21
|
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
21
22
|
|
|
@@ -68,7 +69,7 @@ class EmbeddingStorageStage(RayActorStage):
|
|
|
68
69
|
|
|
69
70
|
# Remove the "store_embedding" task from the message to obtain task-specific configuration.
|
|
70
71
|
task_config = remove_task_by_type(control_message, "store_embedding")
|
|
71
|
-
logger.debug("Extracted task config: %s", task_config)
|
|
72
|
+
logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
|
|
72
73
|
|
|
73
74
|
# Perform embedding storage.
|
|
74
75
|
new_df = store_text_embeddings_internal(
|
|
@@ -18,6 +18,7 @@ from nv_ingest_api.internal.transform.caption_image import transform_image_creat
|
|
|
18
18
|
from nv_ingest_api.util.exception_handlers.decorators import (
|
|
19
19
|
nv_ingest_node_failure_try_except,
|
|
20
20
|
)
|
|
21
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
21
22
|
|
|
22
23
|
logger = logging.getLogger(__name__)
|
|
23
24
|
|
|
@@ -67,7 +68,10 @@ class ImageCaptionTransformStage(RayActorStage):
|
|
|
67
68
|
|
|
68
69
|
# Remove the "caption" task to obtain task-specific configuration.
|
|
69
70
|
task_config = remove_task_by_type(control_message, "caption")
|
|
70
|
-
logger.debug(
|
|
71
|
+
logger.debug(
|
|
72
|
+
"ImageCaptionTransformStage: Task configuration extracted: %s",
|
|
73
|
+
pprint.pformat(sanitize_for_logging(task_config)),
|
|
74
|
+
)
|
|
71
75
|
|
|
72
76
|
# Call the caption extraction function.
|
|
73
77
|
new_df = transform_image_create_vlm_caption_internal(
|
|
@@ -15,6 +15,7 @@ from nv_ingest_api.internal.transform.embed_text import transform_create_text_em
|
|
|
15
15
|
from nv_ingest_api.util.exception_handlers.decorators import (
|
|
16
16
|
nv_ingest_node_failure_try_except,
|
|
17
17
|
)
|
|
18
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
18
19
|
|
|
19
20
|
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
20
21
|
|
|
@@ -62,7 +63,10 @@ class TextEmbeddingTransformStage(RayActorStage):
|
|
|
62
63
|
|
|
63
64
|
# Remove the "embed" task to obtain task-specific configuration.
|
|
64
65
|
task_config = remove_task_by_type(control_message, "embed")
|
|
65
|
-
self._logger.debug(
|
|
66
|
+
self._logger.debug(
|
|
67
|
+
"TextEmbeddingTransformStage: Task configuration extracted: %s",
|
|
68
|
+
pprint.pformat(sanitize_for_logging(task_config)),
|
|
69
|
+
)
|
|
66
70
|
|
|
67
71
|
# Call the text embedding extraction function.
|
|
68
72
|
new_df, execution_trace_log = transform_create_text_embeddings_internal(
|
|
@@ -16,6 +16,7 @@ from nv_ingest_api.internal.transform.split_text import transform_text_split_and
|
|
|
16
16
|
from nv_ingest_api.util.exception_handlers.decorators import (
|
|
17
17
|
nv_ingest_node_failure_try_except,
|
|
18
18
|
)
|
|
19
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
19
20
|
|
|
20
21
|
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
21
22
|
|
|
@@ -36,7 +37,7 @@ class TextSplitterStage(RayActorStage):
|
|
|
36
37
|
super().__init__(config, stage_name=stage_name)
|
|
37
38
|
# Store the validated configuration (assumed to be an instance of TextSplitterSchema)
|
|
38
39
|
self.validated_config: TextSplitterSchema = config
|
|
39
|
-
logger.
|
|
40
|
+
logger.info("TextSplitterStage initialized with config: %s", sanitize_for_logging(config))
|
|
40
41
|
|
|
41
42
|
@nv_ingest_node_failure_try_except()
|
|
42
43
|
@traceable()
|
|
@@ -63,7 +64,7 @@ class TextSplitterStage(RayActorStage):
|
|
|
63
64
|
|
|
64
65
|
# Remove the "split" task to obtain task-specific configuration.
|
|
65
66
|
task_config = remove_task_by_type(message, "split")
|
|
66
|
-
logger.debug("Extracted task config: %s", task_config)
|
|
67
|
+
logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
|
|
67
68
|
|
|
68
69
|
# Transform the DataFrame (split text and tokenize).
|
|
69
70
|
df_updated = transform_text_split_and_tokenize_internal(
|
|
@@ -107,7 +108,7 @@ def text_splitter_fn(control_message: IngestControlMessage, stage_config: TextSp
|
|
|
107
108
|
|
|
108
109
|
# Remove the "split" task to obtain task-specific configuration.
|
|
109
110
|
task_config = remove_task_by_type(control_message, "split")
|
|
110
|
-
logger.debug("Extracted task config: %s", task_config)
|
|
111
|
+
logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
|
|
111
112
|
|
|
112
113
|
# Transform the DataFrame (split text and tokenize).
|
|
113
114
|
df_updated = transform_text_split_and_tokenize_internal(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/api/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/api/v1/__init__.py
RENAMED
|
File without changes
|
{nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/api/v1/health.py
RENAMED
|
File without changes
|
{nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/api/v1/ingest.py
RENAMED
|
File without changes
|
{nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/api/v1/metrics.py
RENAMED
|
File without changes
|
{nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/framework/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest/pipeline/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest.egg-info/requires.txt
RENAMED
|
File without changes
|
{nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.22.dev20250822}/nv_ingest.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|