nv-ingest 2025.11.24.dev20251124__tar.gz → 2025.12.8.dev20251208__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/PKG-INFO +3 -1
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +72 -6
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/pipeline/default_libmode_pipeline_impl.py +17 -3
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/pipeline/default_pipeline_impl.py +18 -1
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest.egg-info/PKG-INFO +3 -1
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest.egg-info/requires.txt +2 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/pyproject.toml +2 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/LICENSE +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/MANIFEST.in +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/main.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/tracing.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/v1/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/v1/health.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/v1/ingest.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/v1/metrics.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/v2/README.md +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/v2/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/v2/ingest.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/execution/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/execution/helpers.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/execution/options.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/process/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/process/dependent_services.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/process/execution.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/process/lifecycle.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/process/strategies.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/process/termination.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/extractors/ocr_extractor.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/schemas/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/util/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/util/flow_control/udf_intercept.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/util/service/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/pipeline/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/pipeline/config/__init__.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/pipeline/config/loaders.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/pipeline/config/replica_resolver.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/pipeline/ingest_pipeline.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/pipeline/pipeline_schema.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/version.py +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest.egg-info/SOURCES.txt +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest.egg-info/dependency_links.txt +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest.egg-info/top_level.txt +0 -0
- {nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nv-ingest
|
|
3
|
-
Version: 2025.
|
|
3
|
+
Version: 2025.12.8.dev20251208
|
|
4
4
|
Summary: Python module for multimodal document ingestion
|
|
5
5
|
Author-email: Jeremy Dyer <jdyer@nvidia.com>
|
|
6
6
|
License: Apache License
|
|
@@ -219,6 +219,8 @@ Requires-Dist: diskcache>=5.6.3
|
|
|
219
219
|
Requires-Dist: fastapi>=0.115.6
|
|
220
220
|
Requires-Dist: fastparquet>=2024.11.0
|
|
221
221
|
Requires-Dist: fsspec>=2024.10.0
|
|
222
|
+
Requires-Dist: universal_pathlib>=0.2.6
|
|
223
|
+
Requires-Dist: s3fs>=2024.10.0
|
|
222
224
|
Requires-Dist: gunicorn
|
|
223
225
|
Requires-Dist: h11>=0.16.0
|
|
224
226
|
Requires-Dist: httpx>=0.28.1
|
|
@@ -3,7 +3,9 @@
|
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
+
import os
|
|
6
7
|
from typing import Dict, Any, Optional
|
|
8
|
+
from urllib.parse import urlparse
|
|
7
9
|
|
|
8
10
|
import pandas as pd
|
|
9
11
|
import ray
|
|
@@ -26,7 +28,8 @@ logger = logging.getLogger(__name__)
|
|
|
26
28
|
@ray.remote
|
|
27
29
|
class ImageStorageStage(RayActorStage):
|
|
28
30
|
"""
|
|
29
|
-
A Ray actor stage that stores images or structured content
|
|
31
|
+
A Ray actor stage that stores images or structured content using an fsspec-compatible backend and updates
|
|
32
|
+
metadata with storage URLs.
|
|
30
33
|
|
|
31
34
|
This stage uses the validated configuration (ImageStorageModuleSchema) to process and store the DataFrame
|
|
32
35
|
payload and updates the control message accordingly.
|
|
@@ -69,8 +72,16 @@ class ImageStorageStage(RayActorStage):
|
|
|
69
72
|
task_config = remove_task_by_type(control_message, "store")
|
|
70
73
|
# logger.debug("ImageStorageStage: Task configuration extracted: %s", pprint.pformat(task_config))
|
|
71
74
|
|
|
72
|
-
|
|
73
|
-
|
|
75
|
+
stage_defaults = {
|
|
76
|
+
"structured": self.validated_config.structured,
|
|
77
|
+
"images": self.validated_config.images,
|
|
78
|
+
"storage_uri": self.validated_config.storage_uri,
|
|
79
|
+
"storage_options": self.validated_config.storage_options,
|
|
80
|
+
"public_base_url": self.validated_config.public_base_url,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
store_structured: bool = task_config.get("structured", stage_defaults["structured"])
|
|
84
|
+
store_unstructured: bool = task_config.get("images", stage_defaults["images"])
|
|
74
85
|
|
|
75
86
|
content_types: Dict[Any, Any] = {}
|
|
76
87
|
if store_structured:
|
|
@@ -80,14 +91,34 @@ class ImageStorageStage(RayActorStage):
|
|
|
80
91
|
content_types[ContentTypeEnum.IMAGE] = store_unstructured
|
|
81
92
|
|
|
82
93
|
params: Dict[str, Any] = task_config.get("params", {})
|
|
83
|
-
params["content_types"] = content_types
|
|
84
94
|
|
|
85
|
-
|
|
95
|
+
storage_uri = task_config.get("storage_uri") or params.get("storage_uri") or stage_defaults["storage_uri"]
|
|
96
|
+
storage_options = {
|
|
97
|
+
**(stage_defaults["storage_options"] or {}),
|
|
98
|
+
**(task_config.get("storage_options") or {}),
|
|
99
|
+
**params.get("storage_options", {}),
|
|
100
|
+
}
|
|
101
|
+
if "public_base_url" in task_config:
|
|
102
|
+
public_base_url = task_config["public_base_url"]
|
|
103
|
+
else:
|
|
104
|
+
public_base_url = params.get("public_base_url", stage_defaults["public_base_url"])
|
|
105
|
+
|
|
106
|
+
storage_options = self._inject_storage_defaults(storage_uri, storage_options)
|
|
107
|
+
|
|
108
|
+
storage_params: Dict[str, Any] = {
|
|
109
|
+
"content_types": content_types,
|
|
110
|
+
"storage_uri": storage_uri,
|
|
111
|
+
"storage_options": storage_options,
|
|
112
|
+
}
|
|
113
|
+
if public_base_url:
|
|
114
|
+
storage_params["public_base_url"] = public_base_url
|
|
115
|
+
|
|
116
|
+
logger.debug("Processing storage task with parameters: %s", storage_params)
|
|
86
117
|
|
|
87
118
|
# Store images or structured content.
|
|
88
119
|
df_storage_ledger: pd.DataFrame = store_images_to_minio_internal(
|
|
89
120
|
df_storage_ledger=df_payload,
|
|
90
|
-
task_config=
|
|
121
|
+
task_config=storage_params,
|
|
91
122
|
storage_config={},
|
|
92
123
|
execution_trace_log=None,
|
|
93
124
|
)
|
|
@@ -98,3 +129,38 @@ class ImageStorageStage(RayActorStage):
|
|
|
98
129
|
control_message.payload(df_storage_ledger)
|
|
99
130
|
|
|
100
131
|
return control_message
|
|
132
|
+
|
|
133
|
+
@staticmethod
|
|
134
|
+
def _inject_storage_defaults(storage_uri: str, storage_options: Dict[str, Any]) -> Dict[str, Any]:
|
|
135
|
+
"""
|
|
136
|
+
Populate storage options for common backends (e.g., MinIO/S3) using environment defaults.
|
|
137
|
+
"""
|
|
138
|
+
parsed_scheme = urlparse(storage_uri).scheme.lower()
|
|
139
|
+
merged_options: Dict[str, Any] = {k: v for k, v in storage_options.items() if v is not None}
|
|
140
|
+
|
|
141
|
+
if parsed_scheme not in {"s3", "s3a", "s3n"}:
|
|
142
|
+
return merged_options
|
|
143
|
+
|
|
144
|
+
def _set_if_absent(key: str, env_var: str) -> None:
|
|
145
|
+
if key not in merged_options and env_var in os.environ:
|
|
146
|
+
merged_options[key] = os.environ[env_var]
|
|
147
|
+
|
|
148
|
+
_set_if_absent("key", "MINIO_ACCESS_KEY")
|
|
149
|
+
_set_if_absent("secret", "MINIO_SECRET_KEY")
|
|
150
|
+
if "token" not in merged_options and os.environ.get("MINIO_SESSION_TOKEN"):
|
|
151
|
+
merged_options["token"] = os.environ["MINIO_SESSION_TOKEN"]
|
|
152
|
+
|
|
153
|
+
client_kwargs = dict(merged_options.get("client_kwargs", {}))
|
|
154
|
+
endpoint = os.environ.get("MINIO_INTERNAL_ADDRESS")
|
|
155
|
+
if not endpoint:
|
|
156
|
+
endpoint = "http://minio:9000"
|
|
157
|
+
if endpoint and not endpoint.startswith(("http://", "https://")):
|
|
158
|
+
endpoint = f"http://{endpoint}"
|
|
159
|
+
client_kwargs.setdefault("endpoint_url", endpoint)
|
|
160
|
+
region = os.environ.get("MINIO_REGION")
|
|
161
|
+
if region:
|
|
162
|
+
client_kwargs.setdefault("region_name", region)
|
|
163
|
+
if client_kwargs:
|
|
164
|
+
merged_options["client_kwargs"] = client_kwargs
|
|
165
|
+
|
|
166
|
+
return merged_options
|
|
@@ -128,6 +128,13 @@ stages:
|
|
|
128
128
|
]
|
|
129
129
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
130
130
|
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
131
|
+
pdfium_config:
|
|
132
|
+
yolox_endpoints: [
|
|
133
|
+
$YOLOX_GRPC_ENDPOINT|"",
|
|
134
|
+
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
|
|
135
|
+
]
|
|
136
|
+
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
137
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
131
138
|
replicas:
|
|
132
139
|
min_replicas: 0
|
|
133
140
|
max_replicas:
|
|
@@ -149,6 +156,13 @@ stages:
|
|
|
149
156
|
]
|
|
150
157
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
151
158
|
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
159
|
+
pdfium_config:
|
|
160
|
+
yolox_endpoints: [
|
|
161
|
+
$YOLOX_GRPC_ENDPOINT|"",
|
|
162
|
+
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
|
|
163
|
+
]
|
|
164
|
+
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
165
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
152
166
|
replicas:
|
|
153
167
|
min_replicas: 0
|
|
154
168
|
max_replicas:
|
|
@@ -201,7 +215,7 @@ stages:
|
|
|
201
215
|
endpoint_config:
|
|
202
216
|
ocr_endpoints: [
|
|
203
217
|
$OCR_GRPC_ENDPOINT|"",
|
|
204
|
-
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/
|
|
218
|
+
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
|
|
205
219
|
]
|
|
206
220
|
ocr_infer_protocol: $OCR_INFER_PROTOCOL|"http"
|
|
207
221
|
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
@@ -227,7 +241,7 @@ stages:
|
|
|
227
241
|
yolox_infer_protocol: $YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL|"http"
|
|
228
242
|
ocr_endpoints: [
|
|
229
243
|
$OCR_GRPC_ENDPOINT|"",
|
|
230
|
-
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/
|
|
244
|
+
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
|
|
231
245
|
]
|
|
232
246
|
ocr_infer_protocol: $PADDLE_INFER_PROTOCOL|"http"
|
|
233
247
|
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
@@ -254,7 +268,7 @@ stages:
|
|
|
254
268
|
yolox_infer_protocol: $YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL|"http"
|
|
255
269
|
ocr_endpoints: [
|
|
256
270
|
$OCR_GRPC_ENDPOINT|"",
|
|
257
|
-
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/
|
|
271
|
+
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
|
|
258
272
|
]
|
|
259
273
|
ocr_infer_protocol: $OCR_INFER_PROTOCOL|"http"
|
|
260
274
|
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
@@ -123,7 +123,14 @@ stages:
|
|
|
123
123
|
docx_extraction_config:
|
|
124
124
|
yolox_endpoints: [
|
|
125
125
|
$YOLOX_GRPC_ENDPOINT|"page-elements:8001",
|
|
126
|
-
$YOLOX_HTTP_ENDPOINT|"",
|
|
126
|
+
$YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
|
|
127
|
+
]
|
|
128
|
+
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
|
|
129
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
130
|
+
pdfium_config:
|
|
131
|
+
yolox_endpoints: [
|
|
132
|
+
$YOLOX_GRPC_ENDPOINT|"page-elements:8001",
|
|
133
|
+
$YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
|
|
127
134
|
]
|
|
128
135
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
|
|
129
136
|
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
@@ -148,6 +155,13 @@ stages:
|
|
|
148
155
|
]
|
|
149
156
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
|
|
150
157
|
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
158
|
+
pdfium_config:
|
|
159
|
+
yolox_endpoints: [
|
|
160
|
+
$YOLOX_GRPC_ENDPOINT|"page-elements:8001",
|
|
161
|
+
$YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
|
|
162
|
+
]
|
|
163
|
+
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
|
|
164
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
151
165
|
replicas:
|
|
152
166
|
min_replicas: 0
|
|
153
167
|
max_replicas:
|
|
@@ -372,6 +386,9 @@ stages:
|
|
|
372
386
|
type: "stage"
|
|
373
387
|
phase: 5 # RESPONSE
|
|
374
388
|
actor: "nv_ingest.framework.orchestration.ray.stages.storage.image_storage:ImageStorageStage"
|
|
389
|
+
config:
|
|
390
|
+
storage_uri: $IMAGE_STORAGE_URI|"s3://nv-ingest/artifacts/store/images"
|
|
391
|
+
public_base_url: $IMAGE_STORAGE_PUBLIC_BASE_URL|""
|
|
375
392
|
replicas:
|
|
376
393
|
min_replicas: 0
|
|
377
394
|
max_replicas:
|
{nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nv-ingest
|
|
3
|
-
Version: 2025.
|
|
3
|
+
Version: 2025.12.8.dev20251208
|
|
4
4
|
Summary: Python module for multimodal document ingestion
|
|
5
5
|
Author-email: Jeremy Dyer <jdyer@nvidia.com>
|
|
6
6
|
License: Apache License
|
|
@@ -219,6 +219,8 @@ Requires-Dist: diskcache>=5.6.3
|
|
|
219
219
|
Requires-Dist: fastapi>=0.115.6
|
|
220
220
|
Requires-Dist: fastparquet>=2024.11.0
|
|
221
221
|
Requires-Dist: fsspec>=2024.10.0
|
|
222
|
+
Requires-Dist: universal_pathlib>=0.2.6
|
|
223
|
+
Requires-Dist: s3fs>=2024.10.0
|
|
222
224
|
Requires-Dist: gunicorn
|
|
223
225
|
Requires-Dist: h11>=0.16.0
|
|
224
226
|
Requires-Dist: httpx>=0.28.1
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/tracing.py
RENAMED
|
File without changes
|
{nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/v1/__init__.py
RENAMED
|
File without changes
|
{nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/v1/health.py
RENAMED
|
File without changes
|
{nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/v1/ingest.py
RENAMED
|
File without changes
|
{nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/v1/metrics.py
RENAMED
|
File without changes
|
{nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/v2/README.md
RENAMED
|
File without changes
|
{nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/v2/__init__.py
RENAMED
|
File without changes
|
{nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/api/v2/ingest.py
RENAMED
|
File without changes
|
{nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/framework/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest/pipeline/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.11.24.dev20251124 → nv_ingest-2025.12.8.dev20251208}/nv_ingest.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|