nv-ingest 2025.10.22.dev20251022__tar.gz → 2025.10.23.dev20251023__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest might be problematic. Click here for more details.
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/PKG-INFO +1 -1
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v2/ingest.py +58 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/default_pipeline_impl.py +1 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest.egg-info/PKG-INFO +1 -1
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/LICENSE +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/MANIFEST.in +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/main.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/tracing.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v1/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v1/health.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v1/ingest.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v1/metrics.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v2/README.md +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v2/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/execution/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/execution/helpers.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/execution/options.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/process/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/process/dependent_services.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/process/execution.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/process/lifecycle.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/process/strategies.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/process/termination.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/flow_control/udf_intercept.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/service/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/config/__init__.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/config/loaders.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/config/replica_resolver.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/default_libmode_pipeline_impl.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/ingest_pipeline.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/pipeline_schema.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/version.py +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest.egg-info/SOURCES.txt +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest.egg-info/dependency_links.txt +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest.egg-info/requires.txt +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest.egg-info/top_level.txt +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/pyproject.toml +0 -0
- {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/setup.cfg +0 -0
{nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v2/ingest.py
RENAMED
|
@@ -672,11 +672,15 @@ async def submit_job_v2(
|
|
|
672
672
|
original_source_id = source_ids[0] if source_ids else "unknown_source.pdf"
|
|
673
673
|
original_source_name = source_names[0] if source_names else "unknown_source.pdf"
|
|
674
674
|
|
|
675
|
+
# Track page count for all PDFs (used for both splitting logic and metadata)
|
|
676
|
+
pdf_page_count_cache = None
|
|
677
|
+
|
|
675
678
|
# Check if this is a PDF that needs splitting
|
|
676
679
|
if document_types and payloads and document_types[0].lower() == "pdf":
|
|
677
680
|
# Decode the payload to check page count
|
|
678
681
|
pdf_content = base64.b64decode(payloads[0])
|
|
679
682
|
page_count = get_pdf_page_count(pdf_content)
|
|
683
|
+
pdf_page_count_cache = page_count # Cache for later use
|
|
680
684
|
pages_per_chunk = get_pdf_split_page_count(client_override=client_split_page_count)
|
|
681
685
|
|
|
682
686
|
# Split if the document has more pages than our chunk size
|
|
@@ -762,6 +766,34 @@ async def submit_job_v2(
|
|
|
762
766
|
await ingest_service.submit_job(updated_job_spec, parent_job_id)
|
|
763
767
|
await ingest_service.set_job_state(parent_job_id, STATE_SUBMITTED)
|
|
764
768
|
|
|
769
|
+
# If this was a PDF (even if not split), store page count metadata for tracking
|
|
770
|
+
if pdf_page_count_cache is not None:
|
|
771
|
+
try:
|
|
772
|
+
# Use cached page count from earlier check to avoid re-decoding
|
|
773
|
+
# Store minimal metadata for non-split PDFs (consistent with split PDFs)
|
|
774
|
+
single_pdf_metadata: Dict[str, Any] = {
|
|
775
|
+
"total_pages": pdf_page_count_cache,
|
|
776
|
+
"pages_per_chunk": pdf_page_count_cache, # Single chunk = entire document
|
|
777
|
+
"original_source_id": original_source_id,
|
|
778
|
+
"original_source_name": original_source_name,
|
|
779
|
+
"document_type": document_types[0],
|
|
780
|
+
"subjob_order": [], # No subjobs for non-split PDFs
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
# Store as parent job metadata with empty subjob list for consistency
|
|
784
|
+
await ingest_service.set_parent_job_mapping(
|
|
785
|
+
parent_job_id,
|
|
786
|
+
[], # Empty subjob list
|
|
787
|
+
single_pdf_metadata,
|
|
788
|
+
subjob_descriptors=[],
|
|
789
|
+
)
|
|
790
|
+
logger.debug(
|
|
791
|
+
f"Stored page count metadata for non-split PDF {original_source_name}: {pdf_page_count_cache} pages"
|
|
792
|
+
)
|
|
793
|
+
except Exception as metadata_err:
|
|
794
|
+
# Don't fail the job if metadata storage fails
|
|
795
|
+
logger.warning(f"Failed to store page count metadata for {parent_job_id}: {metadata_err}")
|
|
796
|
+
|
|
765
797
|
response.headers["x-trace-id"] = trace.format_trace_id(current_trace_id)
|
|
766
798
|
return parent_job_id
|
|
767
799
|
|
|
@@ -898,6 +930,32 @@ async def fetch_job_v2(job_id: str, ingest_service: INGEST_SERVICE_T):
|
|
|
898
930
|
|
|
899
931
|
logger.debug(f"Parent job {job_id} has {len(subjob_ids)} subjobs")
|
|
900
932
|
|
|
933
|
+
# Special case: Non-split PDFs have metadata but no subjobs
|
|
934
|
+
# Fetch the result directly and augment with page count metadata
|
|
935
|
+
if len(subjob_ids) == 0:
|
|
936
|
+
logger.debug(f"Job {job_id} is a non-split PDF, fetching result directly")
|
|
937
|
+
try:
|
|
938
|
+
job_response = await ingest_service.fetch_job(job_id)
|
|
939
|
+
|
|
940
|
+
# Augment response with page count metadata
|
|
941
|
+
if isinstance(job_response, dict):
|
|
942
|
+
if "metadata" not in job_response:
|
|
943
|
+
job_response["metadata"] = {}
|
|
944
|
+
job_response["metadata"]["total_pages"] = metadata.get("total_pages")
|
|
945
|
+
job_response["metadata"]["original_source_id"] = metadata.get("original_source_id")
|
|
946
|
+
job_response["metadata"]["original_source_name"] = metadata.get("original_source_name")
|
|
947
|
+
|
|
948
|
+
# Update job state after successful fetch
|
|
949
|
+
await _update_job_state_after_fetch(job_id, ingest_service)
|
|
950
|
+
|
|
951
|
+
return _stream_json_response(job_response)
|
|
952
|
+
except (TimeoutError, RedisError, ConnectionError):
|
|
953
|
+
logger.debug(f"Job {job_id} (non-split PDF) not ready yet")
|
|
954
|
+
raise HTTPException(status_code=202, detail="Job is processing. Retry later.")
|
|
955
|
+
except Exception as e:
|
|
956
|
+
logger.exception(f"Error fetching non-split PDF job {job_id}: {e}")
|
|
957
|
+
raise HTTPException(status_code=500, detail="Internal server error during job fetch.")
|
|
958
|
+
|
|
901
959
|
# Build ordered descriptors for subjobs
|
|
902
960
|
stored_descriptors = subjob_info.get("subjob_descriptors") or []
|
|
903
961
|
descriptor_lookup = {entry.get("job_id"): entry for entry in stored_descriptors if isinstance(entry, dict)}
|
|
@@ -318,6 +318,7 @@ stages:
|
|
|
318
318
|
config:
|
|
319
319
|
api_key: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
320
320
|
model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
|
|
321
|
+
endpoint_url: $VLM_CAPTION_ENDPOINT|"http://vlm:8000/v1/chat/completions"
|
|
321
322
|
prompt: "Caption the content of this image:"
|
|
322
323
|
replicas:
|
|
323
324
|
min_replicas: 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/tracing.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v1/__init__.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v1/health.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v1/ingest.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v1/metrics.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v2/README.md
RENAMED
|
File without changes
|
{nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v2/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|