nv-ingest 2025.10.28.dev20251028__tar.gz → 2025.10.29.dev20251029__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest might be problematic. Click here for more details.
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/PKG-INFO +1 -1
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v2/ingest.py +72 -1
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/process/dependent_services.py +17 -10
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/process/strategies.py +6 -2
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +4 -4
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest.egg-info/PKG-INFO +1 -1
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/LICENSE +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/MANIFEST.in +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/main.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/tracing.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v1/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v1/health.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v1/ingest.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v1/metrics.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v2/README.md +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v2/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/execution/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/execution/helpers.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/execution/options.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/process/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/process/execution.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/process/lifecycle.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/process/termination.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/flow_control/udf_intercept.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/service/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/config/__init__.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/config/loaders.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/config/replica_resolver.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/default_libmode_pipeline_impl.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/default_pipeline_impl.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/ingest_pipeline.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/pipeline_schema.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/version.py +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest.egg-info/SOURCES.txt +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest.egg-info/dependency_links.txt +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest.egg-info/requires.txt +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest.egg-info/top_level.txt +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/pyproject.toml +0 -0
- {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/setup.cfg +0 -0
{nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v2/ingest.py
RENAMED
|
@@ -432,6 +432,76 @@ def _extract_ray_telemetry(result: Dict[str, Any]) -> Tuple[Optional[Dict[str, A
|
|
|
432
432
|
return trace_dict, annotations_dict
|
|
433
433
|
|
|
434
434
|
|
|
435
|
+
def _normalize_chunk_records(
|
|
436
|
+
records: Optional[List[Any]],
|
|
437
|
+
descriptor: Dict[str, Any],
|
|
438
|
+
parent_metadata: Dict[str, Any],
|
|
439
|
+
) -> List[Any]:
|
|
440
|
+
"""Re-map chunk-local metadata to document-level context for aggregation."""
|
|
441
|
+
|
|
442
|
+
if not isinstance(records, list):
|
|
443
|
+
return []
|
|
444
|
+
|
|
445
|
+
total_pages = parent_metadata.get("total_pages")
|
|
446
|
+
original_source_id = parent_metadata.get("original_source_id")
|
|
447
|
+
original_source_name = parent_metadata.get("original_source_name")
|
|
448
|
+
|
|
449
|
+
start_page = descriptor.get("start_page")
|
|
450
|
+
page_offset = start_page - 1 if isinstance(start_page, int) and start_page > 0 else 0
|
|
451
|
+
|
|
452
|
+
normalized_entries: List[Any] = []
|
|
453
|
+
|
|
454
|
+
for entry in records:
|
|
455
|
+
if not isinstance(entry, dict):
|
|
456
|
+
normalized_entries.append(entry)
|
|
457
|
+
continue
|
|
458
|
+
|
|
459
|
+
normalized_entry = entry.copy()
|
|
460
|
+
original_metadata = entry.get("metadata")
|
|
461
|
+
|
|
462
|
+
if isinstance(original_metadata, dict):
|
|
463
|
+
normalized_metadata = original_metadata.copy()
|
|
464
|
+
normalized_entry["metadata"] = normalized_metadata
|
|
465
|
+
|
|
466
|
+
original_source_meta = original_metadata.get("source_metadata")
|
|
467
|
+
if isinstance(original_source_meta, dict):
|
|
468
|
+
normalized_source_meta = original_source_meta.copy()
|
|
469
|
+
normalized_metadata["source_metadata"] = normalized_source_meta
|
|
470
|
+
|
|
471
|
+
if original_source_id:
|
|
472
|
+
normalized_source_meta["source_id"] = original_source_id
|
|
473
|
+
if original_source_name:
|
|
474
|
+
normalized_source_meta["source_name"] = original_source_name
|
|
475
|
+
|
|
476
|
+
original_content_meta = original_metadata.get("content_metadata")
|
|
477
|
+
if isinstance(original_content_meta, dict):
|
|
478
|
+
normalized_content_meta = original_content_meta.copy()
|
|
479
|
+
normalized_metadata["content_metadata"] = normalized_content_meta
|
|
480
|
+
|
|
481
|
+
page_number = normalized_content_meta.get("page_number")
|
|
482
|
+
if isinstance(page_number, int) and page_number >= 0:
|
|
483
|
+
normalized_content_meta["page_number"] = page_number + page_offset
|
|
484
|
+
|
|
485
|
+
if isinstance(total_pages, int) and isinstance(normalized_content_meta.get("page_count"), int):
|
|
486
|
+
# Ensure optional per-record page count reflects the full document
|
|
487
|
+
normalized_content_meta["page_count"] = total_pages
|
|
488
|
+
|
|
489
|
+
original_hierarchy = original_content_meta.get("hierarchy")
|
|
490
|
+
if isinstance(original_hierarchy, dict):
|
|
491
|
+
normalized_hierarchy = original_hierarchy.copy()
|
|
492
|
+
normalized_content_meta["hierarchy"] = normalized_hierarchy
|
|
493
|
+
|
|
494
|
+
hierarchy_page = normalized_hierarchy.get("page")
|
|
495
|
+
if isinstance(hierarchy_page, int) and hierarchy_page >= 0:
|
|
496
|
+
normalized_hierarchy["page"] = hierarchy_page + page_offset
|
|
497
|
+
if isinstance(total_pages, int):
|
|
498
|
+
normalized_hierarchy["page_count"] = total_pages
|
|
499
|
+
|
|
500
|
+
normalized_entries.append(normalized_entry)
|
|
501
|
+
|
|
502
|
+
return normalized_entries
|
|
503
|
+
|
|
504
|
+
|
|
435
505
|
def _aggregate_parent_traces(chunk_traces: Dict[str, Any]) -> Dict[str, Any]:
|
|
436
506
|
"""
|
|
437
507
|
Aggregate chunk-level traces into parent-level metrics.
|
|
@@ -574,7 +644,8 @@ def _build_aggregated_response(
|
|
|
574
644
|
if result is not None:
|
|
575
645
|
# Add page data to aggregated result
|
|
576
646
|
if "data" in result:
|
|
577
|
-
|
|
647
|
+
normalized_records = _normalize_chunk_records(result.get("data"), descriptor, metadata)
|
|
648
|
+
aggregated_result["data"].extend(normalized_records)
|
|
578
649
|
chunk_entry = dict(descriptor)
|
|
579
650
|
aggregated_result["metadata"]["chunks"].append(chunk_entry)
|
|
580
651
|
|
|
@@ -18,6 +18,18 @@ from nv_ingest_api.util.message_brokers.simple_message_broker.broker import Simp
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
20
|
|
|
21
|
+
def _broker_server_target(host, port, max_queue_size):
|
|
22
|
+
"""
|
|
23
|
+
Target function to be run in a separate process for the SimpleMessageBroker.
|
|
24
|
+
"""
|
|
25
|
+
server = SimpleMessageBroker(host, port, max_queue_size)
|
|
26
|
+
try:
|
|
27
|
+
server.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
28
|
+
except Exception:
|
|
29
|
+
pass
|
|
30
|
+
server.serve_forever()
|
|
31
|
+
|
|
32
|
+
|
|
21
33
|
def start_simple_message_broker(broker_client: dict) -> multiprocessing.Process:
|
|
22
34
|
"""
|
|
23
35
|
Starts a SimpleMessageBroker server in a separate process.
|
|
@@ -58,16 +70,11 @@ def start_simple_message_broker(broker_client: dict) -> multiprocessing.Process:
|
|
|
58
70
|
f"continuing to spawn a broker process (tests expect a Process to be returned)"
|
|
59
71
|
)
|
|
60
72
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
except Exception:
|
|
67
|
-
pass
|
|
68
|
-
server.serve_forever()
|
|
69
|
-
|
|
70
|
-
p = multiprocessing.Process(target=broker_server)
|
|
73
|
+
p = multiprocessing.Process(
|
|
74
|
+
target=_broker_server_target,
|
|
75
|
+
args=(server_host, server_port, max_queue_size),
|
|
76
|
+
daemon=True,
|
|
77
|
+
)
|
|
71
78
|
# If we're launching from inside the pipeline subprocess, mark daemon so the
|
|
72
79
|
# broker dies automatically when the subprocess exits.
|
|
73
80
|
p.daemon = os.environ.get("NV_INGEST_BROKER_IN_SUBPROCESS") == "1"
|
|
@@ -11,9 +11,10 @@ Strategy pattern for clean separation of execution concerns.
|
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
13
|
import atexit
|
|
14
|
-
import os
|
|
15
14
|
import logging
|
|
16
15
|
import multiprocessing
|
|
16
|
+
import os
|
|
17
|
+
import sys
|
|
17
18
|
import time
|
|
18
19
|
from abc import ABC, abstractmethod
|
|
19
20
|
|
|
@@ -132,7 +133,10 @@ class SubprocessStrategy(ProcessExecutionStrategy):
|
|
|
132
133
|
logger.info("Launching pipeline in Python subprocess using multiprocessing.")
|
|
133
134
|
|
|
134
135
|
# Create subprocess using fork context
|
|
135
|
-
|
|
136
|
+
start_method = "fork"
|
|
137
|
+
if sys.platform.lower() == "darwin":
|
|
138
|
+
start_method = "spawn"
|
|
139
|
+
ctx = multiprocessing.get_context(start_method)
|
|
136
140
|
process = ctx.Process(
|
|
137
141
|
target=run_pipeline_process,
|
|
138
142
|
args=(
|
|
@@ -501,21 +501,21 @@ class RedisIngestService(IngestServiceMeta):
|
|
|
501
501
|
metadata_key = f"parent:{parent_job_id}:metadata"
|
|
502
502
|
|
|
503
503
|
try:
|
|
504
|
-
# Check if this is a parent job
|
|
504
|
+
# Check if this is a parent job (check metadata_key since non-split PDFs may not have parent_key)
|
|
505
505
|
exists = await self._run_bounded_to_thread(
|
|
506
506
|
self._ingest_client.get_client().exists,
|
|
507
|
-
parent_key
|
|
507
|
+
metadata_key, # Check metadata instead of parent_key for non-split PDF support
|
|
508
508
|
)
|
|
509
509
|
|
|
510
510
|
if not exists:
|
|
511
511
|
return None
|
|
512
512
|
|
|
513
|
-
# Get subjob IDs
|
|
513
|
+
# Get subjob IDs (may be empty for non-split PDFs)
|
|
514
514
|
subjob_ids_bytes = await self._run_bounded_to_thread(
|
|
515
515
|
self._ingest_client.get_client().smembers,
|
|
516
516
|
parent_key,
|
|
517
517
|
)
|
|
518
|
-
subjob_id_set = {id.decode("utf-8") for id in subjob_ids_bytes}
|
|
518
|
+
subjob_id_set = {id.decode("utf-8") for id in subjob_ids_bytes} if subjob_ids_bytes else set()
|
|
519
519
|
|
|
520
520
|
# Get metadata
|
|
521
521
|
metadata_dict = await self._run_bounded_to_thread(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/tracing.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v1/__init__.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v1/health.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v1/ingest.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v1/metrics.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v2/README.md
RENAMED
|
File without changes
|
{nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v2/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|