nv-ingest 2025.10.14.dev20251014__tar.gz → 2025.10.16.dev20251016__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest might be problematic. Click here for more details.
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/PKG-INFO +1 -1
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v2/ingest.py +30 -4
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/default_libmode_pipeline_impl.py +14 -14
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/default_pipeline_impl.py +11 -11
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest.egg-info/PKG-INFO +1 -1
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/LICENSE +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/MANIFEST.in +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/main.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/tracing.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v1/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v1/health.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v1/ingest.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v1/metrics.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v2/README.md +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v2/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/execution/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/execution/helpers.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/execution/options.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/process/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/process/dependent_services.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/process/execution.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/process/lifecycle.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/process/strategies.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/process/termination.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/flow_control/udf_intercept.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/service/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/config/__init__.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/config/loaders.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/config/replica_resolver.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/ingest_pipeline.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/pipeline_schema.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/version.py +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest.egg-info/SOURCES.txt +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest.egg-info/dependency_links.txt +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest.egg-info/requires.txt +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest.egg-info/top_level.txt +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/pyproject.toml +0 -0
- {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/setup.cfg +0 -0
{nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v2/ingest.py
RENAMED
|
@@ -45,9 +45,30 @@ router = APIRouter()
|
|
|
45
45
|
DEFAULT_PDF_SPLIT_PAGE_COUNT = 32
|
|
46
46
|
|
|
47
47
|
|
|
48
|
-
def get_pdf_split_page_count() -> int:
|
|
49
|
-
"""
|
|
48
|
+
def get_pdf_split_page_count(client_override: Optional[int] = None) -> int:
|
|
49
|
+
"""
|
|
50
|
+
Resolve the page chunk size for PDF splitting with client override support.
|
|
50
51
|
|
|
52
|
+
Priority: client_override (clamped) > env var > default (32)
|
|
53
|
+
Enforces boundaries: min=1, max=128
|
|
54
|
+
"""
|
|
55
|
+
MIN_PAGES = 1
|
|
56
|
+
MAX_PAGES = 128
|
|
57
|
+
|
|
58
|
+
# Client override takes precedence if provided
|
|
59
|
+
if client_override is not None:
|
|
60
|
+
clamped = max(MIN_PAGES, min(client_override, MAX_PAGES))
|
|
61
|
+
if clamped != client_override:
|
|
62
|
+
logger.warning(
|
|
63
|
+
"Client requested split_page_count=%s; clamped to %s (min=%s, max=%s)",
|
|
64
|
+
client_override,
|
|
65
|
+
clamped,
|
|
66
|
+
MIN_PAGES,
|
|
67
|
+
MAX_PAGES,
|
|
68
|
+
)
|
|
69
|
+
return clamped
|
|
70
|
+
|
|
71
|
+
# Fall back to environment variable
|
|
51
72
|
raw_value = os.environ.get("PDF_SPLIT_PAGE_COUNT")
|
|
52
73
|
if raw_value is None:
|
|
53
74
|
return DEFAULT_PDF_SPLIT_PAGE_COUNT
|
|
@@ -530,6 +551,10 @@ async def submit_job_v2(
|
|
|
530
551
|
# Parse job spec
|
|
531
552
|
job_spec_dict = json.loads(job_spec.payload)
|
|
532
553
|
|
|
554
|
+
# Extract PDF configuration if provided by client
|
|
555
|
+
pdf_config = job_spec_dict.get("pdf_config", {})
|
|
556
|
+
client_split_page_count = pdf_config.get("split_page_count") if pdf_config else None
|
|
557
|
+
|
|
533
558
|
# Extract document type and payload from the proper structure
|
|
534
559
|
job_payload = job_spec_dict.get("job_payload", {})
|
|
535
560
|
document_types = job_payload.get("document_type", [])
|
|
@@ -546,12 +571,12 @@ async def submit_job_v2(
|
|
|
546
571
|
# Decode the payload to check page count
|
|
547
572
|
pdf_content = base64.b64decode(payloads[0])
|
|
548
573
|
page_count = get_pdf_page_count(pdf_content)
|
|
549
|
-
pages_per_chunk = get_pdf_split_page_count()
|
|
574
|
+
pages_per_chunk = get_pdf_split_page_count(client_override=client_split_page_count)
|
|
550
575
|
|
|
551
576
|
# Split if the document has more pages than our chunk size
|
|
552
577
|
if page_count > pages_per_chunk:
|
|
553
578
|
logger.warning(
|
|
554
|
-
"
|
|
579
|
+
"Splitting PDF %s into %s-page chunks (total pages: %s)",
|
|
555
580
|
original_source_name,
|
|
556
581
|
pages_per_chunk,
|
|
557
582
|
page_count,
|
|
@@ -599,6 +624,7 @@ async def submit_job_v2(
|
|
|
599
624
|
|
|
600
625
|
parent_metadata: Dict[str, Any] = {
|
|
601
626
|
"total_pages": page_count,
|
|
627
|
+
"pages_per_chunk": pages_per_chunk,
|
|
602
628
|
"original_source_id": original_source_id,
|
|
603
629
|
"original_source_name": original_source_name,
|
|
604
630
|
"document_type": document_types[0] if document_types else "pdf",
|
|
@@ -65,14 +65,14 @@ stages:
|
|
|
65
65
|
actor: "nv_ingest.framework.orchestration.ray.stages.extractors.pdf_extractor:PDFExtractorStage"
|
|
66
66
|
config:
|
|
67
67
|
pdfium_config:
|
|
68
|
-
auth_token: $NGC_API_KEY
|
|
68
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
69
69
|
yolox_endpoints: [
|
|
70
70
|
$YOLOX_GRPC_ENDPOINT|"",
|
|
71
71
|
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
|
|
72
72
|
]
|
|
73
73
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
74
74
|
nemoretriever_parse_config:
|
|
75
|
-
auth_token: $NGC_API_KEY
|
|
75
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
76
76
|
nemoretriever_parse_endpoints: [
|
|
77
77
|
$NEMORETRIEVER_PARSE_GRPC_ENDPOINT|"",
|
|
78
78
|
$NEMORETRIEVER_PARSE_HTTP_ENDPOINT|"https://integrate.api.nvidia.com/v1/chat/completions"
|
|
@@ -106,7 +106,7 @@ stages:
|
|
|
106
106
|
]
|
|
107
107
|
function_id: $AUDIO_FUNCTION_ID|"1598d209-5e27-4d3c-8079-4751568b1081"
|
|
108
108
|
audio_infer_protocol: $AUDIO_INFER_PROTOCOL|grpc
|
|
109
|
-
auth_token: $NGC_API_KEY
|
|
109
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
110
110
|
replicas:
|
|
111
111
|
min_replicas: 0
|
|
112
112
|
max_replicas:
|
|
@@ -127,7 +127,7 @@ stages:
|
|
|
127
127
|
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
|
|
128
128
|
]
|
|
129
129
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
130
|
-
auth_token: $NGC_API_KEY
|
|
130
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
131
131
|
replicas:
|
|
132
132
|
min_replicas: 0
|
|
133
133
|
max_replicas:
|
|
@@ -148,7 +148,7 @@ stages:
|
|
|
148
148
|
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
|
|
149
149
|
]
|
|
150
150
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
151
|
-
auth_token: $NGC_API_KEY
|
|
151
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
152
152
|
replicas:
|
|
153
153
|
min_replicas: 0
|
|
154
154
|
max_replicas:
|
|
@@ -169,7 +169,7 @@ stages:
|
|
|
169
169
|
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
|
|
170
170
|
]
|
|
171
171
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
172
|
-
auth_token: $NGC_API_KEY
|
|
172
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
173
173
|
replicas:
|
|
174
174
|
min_replicas: 0
|
|
175
175
|
max_replicas:
|
|
@@ -200,11 +200,11 @@ stages:
|
|
|
200
200
|
config:
|
|
201
201
|
endpoint_config:
|
|
202
202
|
ocr_endpoints: [
|
|
203
|
-
$OCR_GRPC_ENDPOINT|"
|
|
204
|
-
$OCR_HTTP_ENDPOINT|""
|
|
203
|
+
$OCR_GRPC_ENDPOINT|"",
|
|
204
|
+
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/baidu/paddleocr"
|
|
205
205
|
]
|
|
206
|
-
ocr_infer_protocol: $OCR_INFER_PROTOCOL|
|
|
207
|
-
auth_token: $NGC_API_KEY
|
|
206
|
+
ocr_infer_protocol: $OCR_INFER_PROTOCOL|"http"
|
|
207
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
208
208
|
replicas:
|
|
209
209
|
min_replicas: 0
|
|
210
210
|
max_replicas:
|
|
@@ -230,7 +230,7 @@ stages:
|
|
|
230
230
|
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/baidu/paddleocr"
|
|
231
231
|
]
|
|
232
232
|
ocr_infer_protocol: $PADDLE_INFER_PROTOCOL|"http"
|
|
233
|
-
auth_token: $NGC_API_KEY
|
|
233
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
234
234
|
replicas:
|
|
235
235
|
min_replicas: 0
|
|
236
236
|
max_replicas:
|
|
@@ -257,7 +257,7 @@ stages:
|
|
|
257
257
|
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/baidu/paddleocr"
|
|
258
258
|
]
|
|
259
259
|
ocr_infer_protocol: $OCR_INFER_PROTOCOL|"http"
|
|
260
|
-
auth_token: $NGC_API_KEY
|
|
260
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
261
261
|
replicas:
|
|
262
262
|
min_replicas: 0
|
|
263
263
|
max_replicas:
|
|
@@ -317,7 +317,7 @@ stages:
|
|
|
317
317
|
phase: 4 # TRANSFORM
|
|
318
318
|
actor: "nv_ingest.framework.orchestration.ray.stages.transforms.image_caption:ImageCaptionTransformStage"
|
|
319
319
|
config:
|
|
320
|
-
api_key: $NGC_API_KEY
|
|
320
|
+
api_key: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
321
321
|
endpoint_url: $VLM_CAPTION_ENDPOINT|"https://integrate.api.nvidia.com/v1/chat/completions"
|
|
322
322
|
model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
|
|
323
323
|
prompt: "Caption the content of this image:"
|
|
@@ -335,7 +335,7 @@ stages:
|
|
|
335
335
|
phase: 4 # TRANSFORM
|
|
336
336
|
actor: "nv_ingest.framework.orchestration.ray.stages.transforms.text_embed:TextEmbeddingTransformStage"
|
|
337
337
|
config:
|
|
338
|
-
api_key: $NGC_API_KEY
|
|
338
|
+
api_key: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
339
339
|
embedding_model: $EMBEDDING_NIM_MODEL_NAME|"nvidia/llama-3.2-nv-embedqa-1b-v2"
|
|
340
340
|
embedding_nim_endpoint: $EMBEDDING_NIM_ENDPOINT|"https://integrate.api.nvidia.com/v1"
|
|
341
341
|
replicas:
|
|
@@ -64,14 +64,14 @@ stages:
|
|
|
64
64
|
actor: "nv_ingest.framework.orchestration.ray.stages.extractors.pdf_extractor:PDFExtractorStage"
|
|
65
65
|
config:
|
|
66
66
|
pdfium_config:
|
|
67
|
-
auth_token: $NGC_API_KEY
|
|
67
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
68
68
|
yolox_endpoints: [
|
|
69
69
|
$YOLOX_GRPC_ENDPOINT|"page-elements:8001",
|
|
70
70
|
$YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
|
|
71
71
|
]
|
|
72
72
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
|
|
73
73
|
nemoretriever_parse_config:
|
|
74
|
-
auth_token: $NGC_API_KEY
|
|
74
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
75
75
|
nemoretriever_parse_endpoints: [
|
|
76
76
|
$NEMORETRIEVER_PARSE_GRPC_ENDPOINT|"",
|
|
77
77
|
$NEMORETRIEVER_PARSE_HTTP_ENDPOINT|"http://nemoretriever-parse:8000/v1/chat/completions",
|
|
@@ -105,7 +105,7 @@ stages:
|
|
|
105
105
|
]
|
|
106
106
|
function_id: $AUDIO_FUNCTION_ID|""
|
|
107
107
|
audio_infer_protocol: $AUDIO_INFER_PROTOCOL|grpc
|
|
108
|
-
auth_token: $NGC_API_KEY
|
|
108
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
109
109
|
replicas:
|
|
110
110
|
min_replicas: 0
|
|
111
111
|
max_replicas:
|
|
@@ -126,7 +126,7 @@ stages:
|
|
|
126
126
|
$YOLOX_HTTP_ENDPOINT|"",
|
|
127
127
|
]
|
|
128
128
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
|
|
129
|
-
auth_token: $NGC_API_KEY
|
|
129
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
130
130
|
replicas:
|
|
131
131
|
min_replicas: 0
|
|
132
132
|
max_replicas:
|
|
@@ -147,7 +147,7 @@ stages:
|
|
|
147
147
|
$YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
|
|
148
148
|
]
|
|
149
149
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
|
|
150
|
-
auth_token: $NGC_API_KEY
|
|
150
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
151
151
|
replicas:
|
|
152
152
|
min_replicas: 0
|
|
153
153
|
max_replicas:
|
|
@@ -168,7 +168,7 @@ stages:
|
|
|
168
168
|
$YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
|
|
169
169
|
]
|
|
170
170
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
|
|
171
|
-
auth_token: $NGC_API_KEY
|
|
171
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
172
172
|
replicas:
|
|
173
173
|
min_replicas: 0
|
|
174
174
|
max_replicas:
|
|
@@ -203,7 +203,7 @@ stages:
|
|
|
203
203
|
$OCR_HTTP_ENDPOINT|"http://ocr:8000/v1/infer",
|
|
204
204
|
]
|
|
205
205
|
ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
|
|
206
|
-
auth_token: $NGC_API_KEY
|
|
206
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
207
207
|
replicas:
|
|
208
208
|
min_replicas: 0
|
|
209
209
|
max_replicas:
|
|
@@ -229,7 +229,7 @@ stages:
|
|
|
229
229
|
$OCR_HTTP_ENDPOINT|"http://ocr:8000/v1/infer",
|
|
230
230
|
]
|
|
231
231
|
ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
|
|
232
|
-
auth_token: $NGC_API_KEY
|
|
232
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
233
233
|
replicas:
|
|
234
234
|
min_replicas: 0
|
|
235
235
|
max_replicas:
|
|
@@ -256,7 +256,7 @@ stages:
|
|
|
256
256
|
$OCR_HTTP_ENDPOINT|""
|
|
257
257
|
]
|
|
258
258
|
ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
|
|
259
|
-
auth_token: $NGC_API_KEY
|
|
259
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
260
260
|
replicas:
|
|
261
261
|
min_replicas: 0
|
|
262
262
|
max_replicas:
|
|
@@ -316,7 +316,7 @@ stages:
|
|
|
316
316
|
phase: 4 # TRANSFORM
|
|
317
317
|
actor: "nv_ingest.framework.orchestration.ray.stages.transforms.image_caption:ImageCaptionTransformStage"
|
|
318
318
|
config:
|
|
319
|
-
api_key: $NGC_API_KEY
|
|
319
|
+
api_key: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
320
320
|
model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
|
|
321
321
|
prompt: "Caption the content of this image:"
|
|
322
322
|
replicas:
|
|
@@ -333,7 +333,7 @@ stages:
|
|
|
333
333
|
phase: 4 # TRANSFORM
|
|
334
334
|
actor: "nv_ingest.framework.orchestration.ray.stages.transforms.text_embed:TextEmbeddingTransformStage"
|
|
335
335
|
config:
|
|
336
|
-
api_key: $NGC_API_KEY
|
|
336
|
+
api_key: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
337
337
|
embedding_model: $EMBEDDING_NIM_MODEL_NAME|"nvidia/llama-3.2-nv-embedqa-1b-v2"
|
|
338
338
|
embedding_nim_endpoint: $EMBEDDING_NIM_ENDPOINT|"http://embedding:8000/v1"
|
|
339
339
|
replicas:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/tracing.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v1/__init__.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v1/health.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v1/ingest.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v1/metrics.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v2/README.md
RENAMED
|
File without changes
|
{nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v2/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|