nv-ingest 2025.10.20.dev20251020__tar.gz → 2025.10.22.dev20251022__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/PKG-INFO +1 -1
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/v2/README.md +81 -8
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/v2/ingest.py +106 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest.egg-info/PKG-INFO +1 -1
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/LICENSE +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/MANIFEST.in +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/main.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/tracing.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/v1/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/v1/health.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/v1/ingest.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/v1/metrics.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/v2/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/execution/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/execution/helpers.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/execution/options.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/process/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/process/dependent_services.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/process/execution.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/process/lifecycle.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/process/strategies.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/process/termination.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/schemas/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/util/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/util/flow_control/udf_intercept.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/util/service/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/pipeline/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/pipeline/config/__init__.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/pipeline/config/loaders.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/pipeline/config/replica_resolver.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/pipeline/default_libmode_pipeline_impl.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/pipeline/default_pipeline_impl.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/pipeline/ingest_pipeline.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/pipeline/pipeline_schema.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/version.py +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest.egg-info/SOURCES.txt +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest.egg-info/dependency_links.txt +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest.egg-info/requires.txt +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest.egg-info/top_level.txt +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/pyproject.toml +0 -0
- {nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/setup.cfg +0 -0
{nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/v2/README.md
RENAMED
|
@@ -40,10 +40,24 @@ This behaviour matches the V1 tracing model and sets the foundation for adding W
|
|
|
40
40
|
|
|
41
41
|
The fetch endpoint returns a JSON body shaped like the following:
|
|
42
42
|
|
|
43
|
-
```
|
|
43
|
+
```json
|
|
44
44
|
{
|
|
45
45
|
"data": [...],
|
|
46
46
|
"status": "success",
|
|
47
|
+
"trace": {
|
|
48
|
+
"trace::entry::pdf_extractor": 1000,
|
|
49
|
+
"trace::exit::pdf_extractor": 2150,
|
|
50
|
+
"trace::resident_time::pdf_extractor": 250,
|
|
51
|
+
"trace::entry::table_extractor": 1200,
|
|
52
|
+
"trace::exit::table_extractor": 2300,
|
|
53
|
+
"trace::resident_time::table_extractor": 300
|
|
54
|
+
// ... parent-level aggregated traces only (clean, V1-compatible)
|
|
55
|
+
},
|
|
56
|
+
"annotations": {
|
|
57
|
+
"annotation::uuid-1": {"task_id": "pdf_extractor", "task_result": "SUCCESS"},
|
|
58
|
+
"annotation::uuid-2": {"task_id": "table_extractor", "task_result": "SUCCESS"}
|
|
59
|
+
// ... all annotations from all chunks (annotations have unique UUIDs)
|
|
60
|
+
},
|
|
47
61
|
"metadata": {
|
|
48
62
|
"parent_job_id": "<uuid>",
|
|
49
63
|
"total_pages": 320,
|
|
@@ -68,9 +82,9 @@ The fetch endpoint returns a JSON body shaped like the following:
|
|
|
68
82
|
"chunk_index": 1,
|
|
69
83
|
"start_page": 1,
|
|
70
84
|
"end_page": 32,
|
|
71
|
-
"trace": {"trace::
|
|
85
|
+
"trace": {"trace::entry::pdf_extractor": 1.7599e18, ...}
|
|
72
86
|
}
|
|
73
|
-
// ...
|
|
87
|
+
// ... per-chunk trace details
|
|
74
88
|
],
|
|
75
89
|
"annotation_segments": [
|
|
76
90
|
{
|
|
@@ -78,17 +92,76 @@ The fetch endpoint returns a JSON body shaped like the following:
|
|
|
78
92
|
"chunk_index": 1,
|
|
79
93
|
"start_page": 1,
|
|
80
94
|
"end_page": 32,
|
|
81
|
-
"annotations": {"annotation::
|
|
95
|
+
"annotations": {"annotation::uuid": {...}, ...}
|
|
96
|
+
}
|
|
97
|
+
// ... per-chunk annotation details
|
|
98
|
+
]
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
**Top-level trace and annotations** (V1 compatibility):
|
|
104
|
+
- `trace`: Contains **only parent-level aggregated traces** for clean V1 compatibility
|
|
105
|
+
- `trace::entry::<stage>` - Earliest entry time across all chunks
|
|
106
|
+
- `trace::exit::<stage>` - Latest exit time across all chunks
|
|
107
|
+
- `trace::resident_time::<stage>` - Sum of all chunk durations (total compute time)
|
|
108
|
+
- `annotations`: Merged annotations from all chunks (annotations have unique UUIDs so merge safely)
|
|
109
|
+
- These fields match V1 structure, allowing existing client code to work without modification
|
|
110
|
+
|
|
111
|
+
**Note:** Chunk-level trace details are available in `metadata.trace_segments[]` for granular analysis
|
|
112
|
+
|
|
113
|
+
**Parent-Level Trace Aggregation:**
|
|
114
|
+
|
|
115
|
+
For split PDFs, parent-level metrics are automatically computed for each stage (including nested stages):
|
|
116
|
+
|
|
117
|
+
- `trace::entry::<stage>` - Earliest entry time across all chunks (when first chunk entered stage)
|
|
118
|
+
- `trace::exit::<stage>` - Latest exit time across all chunks (when last chunk exited stage)
|
|
119
|
+
- `trace::resident_time::<stage>` - Sum of all chunk durations (total compute time in stage)
|
|
120
|
+
|
|
121
|
+
**Supports arbitrary nesting depth:**
|
|
122
|
+
- Simple: `trace::entry::pdf_extractor`
|
|
123
|
+
- Nested: `trace::entry::pdf_extractor::pdf_extraction::pdfium_pages_to_numpy_0`
|
|
124
|
+
|
|
125
|
+
**Example:**
|
|
126
|
+
```json
|
|
127
|
+
{
|
|
128
|
+
"trace": {
|
|
129
|
+
"trace::entry::pdf_extractor": 1000,
|
|
130
|
+
"trace::exit::pdf_extractor": 2150,
|
|
131
|
+
"trace::resident_time::pdf_extractor": 250
|
|
132
|
+
// ... only parent-level aggregations (clean, concise)
|
|
133
|
+
},
|
|
134
|
+
"metadata": {
|
|
135
|
+
"trace_segments": [
|
|
136
|
+
{
|
|
137
|
+
"chunk_index": 1,
|
|
138
|
+
"start_page": 1,
|
|
139
|
+
"end_page": 32,
|
|
140
|
+
"trace": {
|
|
141
|
+
"trace::entry::pdf_extractor": 1000,
|
|
142
|
+
"trace::exit::pdf_extractor": 1100
|
|
143
|
+
}
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
"chunk_index": 2,
|
|
147
|
+
"trace": {
|
|
148
|
+
"trace::entry::pdf_extractor": 2000,
|
|
149
|
+
"trace::exit::pdf_extractor": 2150
|
|
150
|
+
}
|
|
82
151
|
}
|
|
83
|
-
// ...
|
|
84
152
|
]
|
|
85
153
|
}
|
|
86
154
|
}
|
|
87
155
|
```
|
|
88
156
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
157
|
+
**Note:** `resident_time` represents total compute time (sum of chunk durations), while `exit - entry` shows wall-clock span.
|
|
158
|
+
|
|
159
|
+
**Detailed metadata** (V2-specific):
|
|
160
|
+
- `trace_segments`: **Chunk-level trace data** with page ranges for granular per-chunk analysis
|
|
161
|
+
- `annotation_segments`: Per-chunk annotation data with page ranges
|
|
162
|
+
- Clients can correlate chunk data by matching `job_id` or `chunk_index` across arrays
|
|
163
|
+
- Failed chunk entries remain in `failed_subjobs`; missing chunks indicate the sink did not emit telemetry
|
|
164
|
+
- **To access chunk traces:** Use `metadata.trace_segments[]` - each segment contains the full trace dict for that chunk
|
|
92
165
|
|
|
93
166
|
## Testing
|
|
94
167
|
|
{nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/v2/ingest.py
RENAMED
|
@@ -432,6 +432,88 @@ def _extract_ray_telemetry(result: Dict[str, Any]) -> Tuple[Optional[Dict[str, A
|
|
|
432
432
|
return trace_dict, annotations_dict
|
|
433
433
|
|
|
434
434
|
|
|
435
|
+
def _aggregate_parent_traces(chunk_traces: Dict[str, Any]) -> Dict[str, Any]:
|
|
436
|
+
"""
|
|
437
|
+
Aggregate chunk-level traces into parent-level metrics.
|
|
438
|
+
|
|
439
|
+
For each stage found in chunk traces:
|
|
440
|
+
- trace::entry::<stage> = min(all chunk entries) - earliest start
|
|
441
|
+
- trace::exit::<stage> = max(all chunk exits) - latest finish
|
|
442
|
+
- trace::resident_time::<stage> = sum(chunk durations) - total compute
|
|
443
|
+
|
|
444
|
+
Parameters
|
|
445
|
+
----------
|
|
446
|
+
chunk_traces : Dict[str, Any]
|
|
447
|
+
Trace dict with chunk-prefixed keys (chunk_N::trace::entry::stage_name)
|
|
448
|
+
|
|
449
|
+
Returns
|
|
450
|
+
-------
|
|
451
|
+
Dict[str, Any]
|
|
452
|
+
Parent-level aggregated traces (trace::entry::stage_name, etc.)
|
|
453
|
+
"""
|
|
454
|
+
# Group by stage: {stage_name: {chunk_idx: {entry: float, exit: float}}}
|
|
455
|
+
stage_data: Dict[str, Dict[int, Dict[str, Any]]] = {}
|
|
456
|
+
|
|
457
|
+
for key, value in chunk_traces.items():
|
|
458
|
+
if not key.startswith("chunk_"):
|
|
459
|
+
continue
|
|
460
|
+
|
|
461
|
+
parts = key.split("::")
|
|
462
|
+
if len(parts) < 4: # Minimum: chunk_N::trace::entry/exit::stage_name
|
|
463
|
+
continue
|
|
464
|
+
|
|
465
|
+
if parts[1] != "trace": # Ensure it's a trace key
|
|
466
|
+
continue
|
|
467
|
+
|
|
468
|
+
chunk_idx_str = parts[0].split("_")[1] # "chunk_1" -> "1"
|
|
469
|
+
try:
|
|
470
|
+
chunk_idx = int(chunk_idx_str)
|
|
471
|
+
except ValueError:
|
|
472
|
+
continue
|
|
473
|
+
|
|
474
|
+
event_type = parts[2] # "entry" or "exit"
|
|
475
|
+
|
|
476
|
+
# Stage name is everything after trace::entry:: or trace::exit::
|
|
477
|
+
# Handles both simple (pdf_extractor) and nested (pdf_extractor::pdf_extraction::pdfium_0)
|
|
478
|
+
stage_name = "::".join(parts[3:]) # Join remaining parts
|
|
479
|
+
|
|
480
|
+
if event_type not in ("entry", "exit"):
|
|
481
|
+
continue
|
|
482
|
+
|
|
483
|
+
if stage_name not in stage_data:
|
|
484
|
+
stage_data[stage_name] = {}
|
|
485
|
+
if chunk_idx not in stage_data[stage_name]:
|
|
486
|
+
stage_data[stage_name][chunk_idx] = {}
|
|
487
|
+
|
|
488
|
+
stage_data[stage_name][chunk_idx][event_type] = value
|
|
489
|
+
|
|
490
|
+
# Compute aggregated metrics
|
|
491
|
+
parent_traces: Dict[str, Any] = {}
|
|
492
|
+
|
|
493
|
+
for stage_name, chunks in stage_data.items():
|
|
494
|
+
entries = []
|
|
495
|
+
exits = []
|
|
496
|
+
durations = []
|
|
497
|
+
|
|
498
|
+
for chunk_data in chunks.values():
|
|
499
|
+
entry = chunk_data.get("entry")
|
|
500
|
+
exit_time = chunk_data.get("exit")
|
|
501
|
+
|
|
502
|
+
# Both entry and exit must exist for valid pair
|
|
503
|
+
if entry is not None and exit_time is not None:
|
|
504
|
+
entries.append(entry)
|
|
505
|
+
exits.append(exit_time)
|
|
506
|
+
durations.append(exit_time - entry)
|
|
507
|
+
|
|
508
|
+
# Only add parent traces if we have valid data
|
|
509
|
+
if entries and exits:
|
|
510
|
+
parent_traces[f"trace::entry::{stage_name}"] = min(entries)
|
|
511
|
+
parent_traces[f"trace::exit::{stage_name}"] = max(exits)
|
|
512
|
+
parent_traces[f"trace::resident_time::{stage_name}"] = sum(durations)
|
|
513
|
+
|
|
514
|
+
return parent_traces
|
|
515
|
+
|
|
516
|
+
|
|
435
517
|
def _build_aggregated_response(
|
|
436
518
|
parent_job_id: str,
|
|
437
519
|
subjob_results: List[Optional[Dict[str, Any]]],
|
|
@@ -469,6 +551,9 @@ def _build_aggregated_response(
|
|
|
469
551
|
"description": (
|
|
470
552
|
"One or more subjobs failed to complete" if any_failed else "Aggregated result composed from subjob outputs"
|
|
471
553
|
),
|
|
554
|
+
# Top-level trace/annotations for V1 compatibility
|
|
555
|
+
"trace": {},
|
|
556
|
+
"annotations": {},
|
|
472
557
|
"metadata": {
|
|
473
558
|
"parent_job_id": parent_job_id,
|
|
474
559
|
"total_pages": metadata.get("total_pages", len(subjob_ids)),
|
|
@@ -498,6 +583,7 @@ def _build_aggregated_response(
|
|
|
498
583
|
end_page = descriptor.get("end_page")
|
|
499
584
|
|
|
500
585
|
if trace_data:
|
|
586
|
+
# Add to trace_segments (detailed, per-chunk view)
|
|
501
587
|
aggregated_result["metadata"]["trace_segments"].append(
|
|
502
588
|
{
|
|
503
589
|
"job_id": descriptor.get("job_id"),
|
|
@@ -507,8 +593,10 @@ def _build_aggregated_response(
|
|
|
507
593
|
"trace": trace_data,
|
|
508
594
|
}
|
|
509
595
|
)
|
|
596
|
+
# Chunk traces stay in metadata.trace_segments only (not in top-level)
|
|
510
597
|
|
|
511
598
|
if annotation_data:
|
|
599
|
+
# Add to annotation_segments (detailed, per-chunk view)
|
|
512
600
|
aggregated_result["metadata"]["annotation_segments"].append(
|
|
513
601
|
{
|
|
514
602
|
"job_id": descriptor.get("job_id"),
|
|
@@ -518,10 +606,28 @@ def _build_aggregated_response(
|
|
|
518
606
|
"annotations": annotation_data,
|
|
519
607
|
}
|
|
520
608
|
)
|
|
609
|
+
# Merge into top-level annotations (annotations have unique UUIDs, safe to merge)
|
|
610
|
+
aggregated_result["annotations"].update(annotation_data)
|
|
521
611
|
else:
|
|
522
612
|
# Note failed page
|
|
523
613
|
logger.warning(f"Page {page_num} failed or missing")
|
|
524
614
|
|
|
615
|
+
# Compute parent-level trace aggregations from trace_segments
|
|
616
|
+
trace_segments = aggregated_result["metadata"]["trace_segments"]
|
|
617
|
+
if trace_segments:
|
|
618
|
+
# Build a temporary chunk trace dict for aggregation
|
|
619
|
+
temp_chunk_traces = {}
|
|
620
|
+
for segment in trace_segments:
|
|
621
|
+
chunk_idx = segment.get("chunk_index")
|
|
622
|
+
chunk_trace = segment.get("trace", {})
|
|
623
|
+
for trace_key, trace_value in chunk_trace.items():
|
|
624
|
+
prefixed_key = f"chunk_{chunk_idx}::{trace_key}"
|
|
625
|
+
temp_chunk_traces[prefixed_key] = trace_value
|
|
626
|
+
|
|
627
|
+
# Aggregate and set as top-level trace (only parent traces, no chunk traces)
|
|
628
|
+
parent_level_traces = _aggregate_parent_traces(temp_chunk_traces)
|
|
629
|
+
aggregated_result["trace"] = parent_level_traces
|
|
630
|
+
|
|
525
631
|
return aggregated_result
|
|
526
632
|
|
|
527
633
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/tracing.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/v1/__init__.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/v1/health.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/v1/ingest.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/v1/metrics.py
RENAMED
|
File without changes
|
{nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/api/v2/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest/pipeline/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.10.20.dev20251020 → nv_ingest-2025.10.22.dev20251022}/nv_ingest.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|