cloud-dog-vdb 0.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloud_dog_vdb-0.5.4/.docs-manifest.yml +6 -0
- cloud_dog_vdb-0.5.4/.gitignore +16 -0
- cloud_dog_vdb-0.5.4/AGENT-INSTRUCTION-FIX-VDB-TESTS.md +332 -0
- cloud_dog_vdb-0.5.4/AGENT-INSTRUCTION-FIX-VDB.md +188 -0
- cloud_dog_vdb-0.5.4/AGENTS.md +94 -0
- cloud_dog_vdb-0.5.4/ARCHITECTURE.md +768 -0
- cloud_dog_vdb-0.5.4/BUILD.md +76 -0
- cloud_dog_vdb-0.5.4/CHANGELOG.md +9 -0
- cloud_dog_vdb-0.5.4/DATA-MODEL.md +33 -0
- cloud_dog_vdb-0.5.4/LICENCE +190 -0
- cloud_dog_vdb-0.5.4/LICENSE +176 -0
- cloud_dog_vdb-0.5.4/NOTICE +7 -0
- cloud_dog_vdb-0.5.4/PKG-INFO +43 -0
- cloud_dog_vdb-0.5.4/PROGRAMME-0.4.0-DEVELOPMENT-BUILD-TEST.md +252 -0
- cloud_dog_vdb-0.5.4/README.md +129 -0
- cloud_dog_vdb-0.5.4/RELEASE_UPLIFT_PROPOSAL.md +115 -0
- cloud_dog_vdb-0.5.4/REQUIREMENTS.md +645 -0
- cloud_dog_vdb-0.5.4/RULES.md +120 -0
- cloud_dog_vdb-0.5.4/TESTS.md +507 -0
- cloud_dog_vdb-0.5.4/adoption_test.py +77 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/__init__.py +45 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/access/__init__.py +15 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/access/enforcement.py +32 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/access/policy.py +38 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/__init__.py +39 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/base.py +94 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/chroma.py +329 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/factory.py +51 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/infinity.py +404 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/opensearch.py +281 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/pgvector.py +300 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/qdrant.py +315 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/registry.py +38 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/vector_utils.py +35 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/weaviate.py +291 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/capabilities/__init__.py +15 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/capabilities/models.py +28 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/capabilities/planner.py +27 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/collections/__init__.py +15 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/collections/manager.py +44 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/collections/specs.py +34 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/compat/__init__.py +20 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/compat/response_normaliser.py +194 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/config/__init__.py +17 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/config/models.py +38 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/domain/__init__.py +25 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/domain/enums.py +35 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/domain/errors.py +45 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/domain/models.py +108 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/embeddings/__init__.py +18 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/embeddings/base.py +28 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/embeddings/providers.py +86 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/factory.py +27 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/__init__.py +29 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/acquire.py +35 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/checkpoints.py +34 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/chunk/__init__.py +15 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/chunk/base.py +33 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/chunk/boundary.py +23 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/chunk/fixed.py +28 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/chunk/recursive.py +33 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/chunk/semantic.py +40 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/convert/__init__.py +15 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/convert/base.py +34 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/convert/deepdoc_conv.py +26 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/convert/mineru_conv.py +25 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/convert/pandas_conv.py +32 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/embed.py +30 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/__init__.py +26 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/base.py +52 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/heuristics.py +31 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/planner.py +43 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/providers/__init__.py +23 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/providers/external_service.py +69 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/providers/llm.py +94 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/providers/local.py +78 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/registry.py +36 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/__init__.py +46 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/async_runner.py +215 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/base.py +52 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/capabilities.py +32 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/ir.py +57 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/planner.py +31 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/providers/__init__.py +29 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/providers/deepdoc.py +101 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/providers/docling.py +101 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/providers/internal.py +83 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/providers/marker_mcp.py +643 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/providers/mineru.py +703 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/providers/transformers.py +176 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/quality.py +21 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/registry.py +36 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/pipeline.py +433 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/table/__init__.py +25 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/table/policy.py +31 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/table/renderers.py +74 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/table/schema.py +40 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/verify.py +30 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/integrations/__init__.py +15 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/integrations/langchain.py +32 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/integrations/llamaindex.py +32 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/isolation/__init__.py +15 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/isolation/manager.py +36 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/jobs/__init__.py +15 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/jobs/models.py +28 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/jobs/queue.py +45 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/jobs/status.py +32 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/jobs/worker.py +28 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/lifecycle/__init__.py +25 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/lifecycle/manager.py +53 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/lifecycle/retention.py +83 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/metadata/__init__.py +46 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/metadata/filters.py +130 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/metadata/identity.py +72 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/metadata/normalise.py +35 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/metadata/provenance.py +102 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/metadata/schema.py +166 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/observability/__init__.py +15 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/observability/audit.py +32 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/observability/metrics.py +37 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/observability/otel.py +32 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/__init__.py +15 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/chroma.py +28 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/common.py +35 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/manager.py +34 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/opensearch.py +28 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/pgvector.py +28 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/qdrant.py +28 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/weaviate.py +28 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/remote/__init__.py +20 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/remote/client.py +105 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/runtime/__init__.py +18 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/runtime/client.py +362 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/runtime/factory.py +113 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/search/__init__.py +15 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/search/engine.py +44 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/search/rerank.py +29 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/testing/__init__.py +22 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/testing/comparison.py +424 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/testing/comparison_report.py +89 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/testing/conformance.py +32 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/testing/fixtures.py +30 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/testing/mock_adapters.py +32 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/versioning/__init__.py +24 -0
- cloud_dog_vdb-0.5.4/cloud_dog_vdb/versioning/schema_version.py +151 -0
- cloud_dog_vdb-0.5.4/defaults.yaml +132 -0
- cloud_dog_vdb-0.5.4/docs/ARCHITECTURE.md +16 -0
- cloud_dog_vdb-0.5.4/docs/CONFIGURATION.md +15 -0
- cloud_dog_vdb-0.5.4/docs/EXAMPLES.md +9 -0
- cloud_dog_vdb-0.5.4/pyproject.toml +50 -0
- cloud_dog_vdb-0.5.4/scaffold/cloud_dog_vdb/__init__.py +30 -0
- cloud_dog_vdb-0.5.4/scaffold/defaults.yaml +95 -0
- cloud_dog_vdb-0.5.4/scaffold/pyproject.toml +32 -0
- cloud_dog_vdb-0.5.4/scaffold/tests/conftest.py +186 -0
- cloud_dog_vdb-0.5.4/test-data/Aon global-medical-trend-rates-report-2026.pdf +0 -0
- cloud_dog_vdb-0.5.4/test-data/BOSIB13bdde89d07f1b3711dd8e86adb477.pdf +0 -0
- cloud_dog_vdb-0.5.4/test-data/CELEX_32016R0679_EN_TXT.pdf +0 -0
- cloud_dog_vdb-0.5.4/test-data/Examples.pdf +0 -0
- cloud_dog_vdb-0.5.4/test-data/HSSD_2025-0-00.pdf +0 -0
- cloud_dog_vdb-0.5.4/test-data/Handwritten-Concern-Form-reporting-Domestic-Abuse-Good-Example.pdf +0 -0
- cloud_dog_vdb-0.5.4/test-data/IBRD-Financial-Statements-June-2025.pdf +0 -0
- cloud_dog_vdb-0.5.4/test-data/ITEM_COD-0012-0001-_-089.pdf +0 -0
- cloud_dog_vdb-0.5.4/test-data/NIST.SP.800-53r5.pdf +0 -0
- cloud_dog_vdb-0.5.4/test-data/SAMPLE OF RURAL COMPLETED FORM.pdf +0 -0
- cloud_dog_vdb-0.5.4/test-data/Z83-example.pdf +0 -0
- cloud_dog_vdb-0.5.4/test-data/a10kfy2023filing.pdf +0 -0
- cloud_dog_vdb-0.5.4/test-data/corpus-manifest.yaml +162 -0
- cloud_dog_vdb-0.5.4/test-data/fw9.pdf +0 -0
- cloud_dog_vdb-0.5.4/tests/__init__.py +13 -0
- cloud_dog_vdb-0.5.4/tests/_uplift_assertions.py +52 -0
- cloud_dog_vdb-0.5.4/tests/application/AT1.1_ServiceStartupPattern/test_service_startup.py +44 -0
- cloud_dog_vdb-0.5.4/tests/application/AT1.2_FullIngestionFlow/test_full_ingestion.py +48 -0
- cloud_dog_vdb-0.5.4/tests/application/AT1.3_SearchWithFilters/test_search_filters.py +61 -0
- cloud_dog_vdb-0.5.4/tests/application/AT1.4_ConformanceSuite/test_conformance.py +26 -0
- cloud_dog_vdb-0.5.4/tests/application/AT1.5_ClientOnlyIntegration/test_client_only_integration.py +147 -0
- cloud_dog_vdb-0.5.4/tests/application/AT2.1_ParserFirstEndToEnd/test_at2_1_parser_first_end_to_end.py +87 -0
- cloud_dog_vdb-0.5.4/tests/application/AT2.2_ParserProviderCoverageMatrix/test_at2_2_parser_provider_coverage_matrix.py +65 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_1_corpus_small_comparison.json +417 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_1_corpus_small_comparison.md +43 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_2_corpus_medium_comparison.json +417 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_2_corpus_medium_comparison.md +43 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_3_corpus_large_comparison.json +417 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_3_corpus_large_comparison.md +43 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_4_marker_vs_mineru_quality.json +417 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_4_marker_vs_mineru_quality.md +43 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_5_provider_latency_ranking.json +417 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_5_provider_latency_ranking.md +43 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_6_table_extraction_comparison.json +417 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_6_table_extraction_comparison.md +43 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_7_image_extraction_comparison.json +417 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_7_image_extraction_comparison.md +43 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_8_comparison_report_generation.json +417 -0
- cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_8_comparison_report_generation.md +43 -0
- cloud_dog_vdb-0.5.4/tests/compatibility/CT1.1_PublicApiParity/test_ct1_1_public_api_parity.py +22 -0
- cloud_dog_vdb-0.5.4/tests/compatibility/CT1.2_DefaultBehaviourParity/test_ct1_2_default_behaviour_parity.py +23 -0
- cloud_dog_vdb-0.5.4/tests/compatibility/CT1.3_MetadataIdentityParity/test_ct1_3_metadata_identity_parity.py +24 -0
- cloud_dog_vdb-0.5.4/tests/compatibility/CT1.4_ConfigCompatibility/test_ct1_4_config_compatibility.py +23 -0
- cloud_dog_vdb-0.5.4/tests/compatibility/CT1.5_ErrorContractParity/test_ct1_5_error_contract_parity.py +23 -0
- cloud_dog_vdb-0.5.4/tests/compatibility/CT1.6_ResultEnvelopeParity/test_ct1_6_result_envelope_parity.py +27 -0
- cloud_dog_vdb-0.5.4/tests/compatibility/CT1.7_LegacyConverterParity/test_ct1_7_legacy_converter_parity.py +22 -0
- cloud_dog_vdb-0.5.4/tests/conftest.py +688 -0
- cloud_dog_vdb-0.5.4/tests/env-AT +29 -0
- cloud_dog_vdb-0.5.4/tests/env-CORPUS-LARGE +1 -0
- cloud_dog_vdb-0.5.4/tests/env-CORPUS-MEDIUM +1 -0
- cloud_dog_vdb-0.5.4/tests/env-CORPUS-SMALL +1 -0
- cloud_dog_vdb-0.5.4/tests/env-CT +4 -0
- cloud_dog_vdb-0.5.4/tests/env-IT +29 -0
- cloud_dog_vdb-0.5.4/tests/env-PT +53 -0
- cloud_dog_vdb-0.5.4/tests/env-PT-COMPARE +27 -0
- cloud_dog_vdb-0.5.4/tests/env-PT-PERF +44 -0
- cloud_dog_vdb-0.5.4/tests/env-QT +29 -0
- cloud_dog_vdb-0.5.4/tests/env-REQUIRE-ALL-PARSERS +13 -0
- cloud_dog_vdb-0.5.4/tests/env-ST +16 -0
- cloud_dog_vdb-0.5.4/tests/env-UT +26 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT1.10_PGVectorSearch/test_pgvector_search.py +35 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT1.11_CrossBackendPortable/test_cross_backend.py +58 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT1.12_IngestionRealBackend/test_ingestion_real.py +47 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT1.13_LifecycleRealBackend/test_lifecycle_real.py +61 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT1.1_ChromaCRUD/test_chroma_crud.py +52 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT1.2_ChromaSearch/test_chroma_search.py +40 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT1.3_QdrantCRUD/test_qdrant_crud.py +44 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT1.4_QdrantSearch/test_qdrant_search.py +37 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT1.5_WeaviateCRUD/test_weaviate_crud.py +46 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT1.6_WeaviateSearch/test_weaviate_search.py +38 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT1.7_OpenSearchCRUD/test_opensearch_crud.py +58 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT1.8_OpenSearchHybrid/test_opensearch_hybrid.py +45 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT1.9_PGVectorCRUD/test_pgvector_crud.py +38 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT2.10_EmbeddingDimensionValidation/test_it2_10_embedding_dimension_validation.py +63 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT2.11_ParserProviderCoverageMatrix/test_it2_11_parser_provider_coverage_matrix.py +65 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT2.1_ParserFirstIngest_Chroma/test_it2_1_parser_first_ingest_chroma.py +33 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT2.2_ParserFirstIngest_Qdrant/test_it2_2_parser_first_ingest_qdrant.py +33 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT2.3_ParserFirstIngest_Weaviate/test_it2_3_parser_first_ingest_weaviate.py +33 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT2.4_ParserFirstIngest_OpenSearch/test_it2_4_parser_first_ingest_opensearch.py +33 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT2.5_ParserFirstIngest_PGVector/test_it2_5_parser_first_ingest_pgvector.py +33 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT2.6_ParserFirstIngest_Infinity/test_it2_6_parser_first_ingest_infinity.py +36 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT2.7_DeleteByFilterPortableFallback/test_it2_7_delete_by_filter_portable_fallback.py +86 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT2.8_LifecycleAndPurgeSafety/test_it2_8_lifecycle_and_purge_safety.py +71 -0
- cloud_dog_vdb-0.5.4/tests/integration/IT2.9_MetadataFilterParity/test_it2_9_metadata_filter_parity.py +74 -0
- cloud_dog_vdb-0.5.4/tests/integration/__init__.py +13 -0
- cloud_dog_vdb-0.5.4/tests/integration/_metadata_parity.py +69 -0
- cloud_dog_vdb-0.5.4/tests/integration/_parser_ingest_helpers.py +214 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT1.1_MineruAdapter/test_pt1_1_mineru_adapter.py +40 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT1.2_DeepdocAdapter/test_pt1_2_deepdoc_adapter.py +40 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT1.3_DoclingAdapter/test_pt1_3_docling_adapter.py +40 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT1.4_MarkerMcpAdapter/test_pt1_4_marker_mcp_adapter.py +40 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT1.5_OcrProviders/test_pt1_5_ocr_providers.py +48 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT1.6_EndpointSource_EnvFile/test_pt1_6_endpoint_source_envfile.py +33 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT1.7_EndpointSource_VaultResolved/test_pt1_7_endpoint_source_vault_resolved.py +22 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT1.8_TransformersAdapter/test_pt1_8_transformers_adapter.py +40 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT3.1_CorpusSmallComparison/test_pt3_1_corpus_small_comparison.py +35 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT3.2_CorpusMediumComparison/test_pt3_2_corpus_medium_comparison.py +35 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT3.3_CorpusLargeComparison/test_pt3_3_corpus_large_comparison.py +34 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT3.4_MarkerVsMineruQuality/test_pt3_4_marker_vs_mineru_quality.py +39 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT3.5_ProviderLatencyRanking/test_pt3_5_provider_latency_ranking.py +42 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT3.6_TableExtractionComparison/test_pt3_6_table_extraction_comparison.py +36 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT3.7_ImageExtractionComparison/test_pt3_7_image_extraction_comparison.py +40 -0
- cloud_dog_vdb-0.5.4/tests/parser/PT3.8_ComparisonReportGeneration/test_pt3_8_comparison_report_generation.py +44 -0
- cloud_dog_vdb-0.5.4/tests/parser/_comparison_helpers.py +129 -0
- cloud_dog_vdb-0.5.4/tests/parser/_provider_matrix.py +717 -0
- cloud_dog_vdb-0.5.4/tests/parser_performance/PT2.1_CorpusLatencyBenchmarks/test_pt2_1_corpus_latency_benchmarks.py +36 -0
- cloud_dog_vdb-0.5.4/tests/parser_performance/PT2.2_CorpusThroughputBenchmarks/test_pt2_2_corpus_throughput_benchmarks.py +34 -0
- cloud_dog_vdb-0.5.4/tests/parser_performance/PT2.3_OcrAutoDecisionRate/test_pt2_3_ocr_auto_decision_rate.py +34 -0
- cloud_dog_vdb-0.5.4/tests/parser_performance/PT2.4_QualityInvariantPassRate/test_pt2_4_quality_invariant_pass_rate.py +57 -0
- cloud_dog_vdb-0.5.4/tests/parser_performance/PT2.5_ParserComparisonMatrix/test_pt2_5_parser_comparison_matrix.py +40 -0
- cloud_dog_vdb-0.5.4/tests/parser_performance/PT2.6_EmbeddingPipelinePerformance/test_pt2_6_embedding_pipeline_performance.py +38 -0
- cloud_dog_vdb-0.5.4/tests/quality/QT_PUBLISH_COMPLIANCE/__init__.py +2 -0
- cloud_dog_vdb-0.5.4/tests/quality/QT_PUBLISH_COMPLIANCE/test_publish_compliance.py +73 -0
- cloud_dog_vdb-0.5.4/tests/quality/__init__.py +2 -0
- cloud_dog_vdb-0.5.4/tests/reports/parser_quality_matrix_deepdoc.json +98 -0
- cloud_dog_vdb-0.5.4/tests/reports/parser_quality_matrix_docling.json +98 -0
- cloud_dog_vdb-0.5.4/tests/reports/parser_quality_matrix_marker_mcp.json +99 -0
- cloud_dog_vdb-0.5.4/tests/reports/parser_quality_matrix_mineru.json +99 -0
- cloud_dog_vdb-0.5.4/tests/reports/parser_quality_matrix_transformers.json +98 -0
- cloud_dog_vdb-0.5.4/tests/security/QT1.1_TenantIsolation/test_tenant_isolation.py +55 -0
- cloud_dog_vdb-0.5.4/tests/security/QT1.2_AccessControlEnforcement/test_access_control.py +50 -0
- cloud_dog_vdb-0.5.4/tests/security/QT1.3_SecretNeverPersisted/test_secret_handling.py +30 -0
- cloud_dog_vdb-0.5.4/tests/security/QT1.4_PurgeRequiresAdmin/test_purge_admin.py +51 -0
- cloud_dog_vdb-0.5.4/tests/security/QT2.1_CommandAllowlist/test_qt2_1_command_allowlist.py +20 -0
- cloud_dog_vdb-0.5.4/tests/security/QT2.2_CommandTimeout/test_qt2_2_command_timeout.py +20 -0
- cloud_dog_vdb-0.5.4/tests/security/QT2.3_EndpointAllowlist/test_qt2_3_endpoint_allowlist.py +20 -0
- cloud_dog_vdb-0.5.4/tests/security/QT2.4_SecretRedaction/test_qt2_4_secret_redaction.py +23 -0
- cloud_dog_vdb-0.5.4/tests/security/QT2.5_PathTraversalGuard/test_qt2_5_path_traversal_guard.py +24 -0
- cloud_dog_vdb-0.5.4/tests/security/QT2.6_OcrCostGuard/test_qt2_6_ocr_cost_guard.py +21 -0
- cloud_dog_vdb-0.5.4/tests/security/QT2.7_ConfigDelegationEnforcement/test_qt2_7_config_delegation_enforcement.py +24 -0
- cloud_dog_vdb-0.5.4/tests/system/ST1.10_ServiceStartupLocal/test_service_startup_local.py +26 -0
- cloud_dog_vdb-0.5.4/tests/system/ST1.11_FullIngestionLocal/test_full_ingestion_local.py +30 -0
- cloud_dog_vdb-0.5.4/tests/system/ST1.12_SearchWithFiltersLocal/test_search_filters_local.py +42 -0
- cloud_dog_vdb-0.5.4/tests/system/ST1.1_CRUDEndToEnd/test_crud_e2e.py +44 -0
- cloud_dog_vdb-0.5.4/tests/system/ST1.2_SearchEndToEnd/test_search_e2e.py +37 -0
- cloud_dog_vdb-0.5.4/tests/system/ST1.3_IngestionPipeline/test_ingestion_e2e.py +29 -0
- cloud_dog_vdb-0.5.4/tests/system/ST1.4_LifecycleEndToEnd/test_lifecycle_e2e.py +20 -0
- cloud_dog_vdb-0.5.4/tests/system/ST1.5_JobSubmitToComplete/test_job_e2e.py +25 -0
- cloud_dog_vdb-0.5.4/tests/system/ST1.6_IdempotentIngestion/test_idempotent.py +37 -0
- cloud_dog_vdb-0.5.4/tests/system/ST1.7_CollectionManagement/test_collection_mgmt.py +32 -0
- cloud_dog_vdb-0.5.4/tests/system/ST1.8_ConfigDelegationE2E/test_config_delegation_e2e.py +49 -0
- cloud_dog_vdb-0.5.4/tests/system/ST1.9_CrossBackendPortableLocal/test_cross_backend_local.py +43 -0
- cloud_dog_vdb-0.5.4/tests/system/ST2.1_LegacyPathParity/test_legacy_path_parity.py +90 -0
- cloud_dog_vdb-0.5.4/tests/system/ST2.2_IRPathLocal/test_ir_path_local.py +82 -0
- cloud_dog_vdb-0.5.4/tests/system/ST2.3_CheckpointResumeAcrossParse/test_checkpoint_resume_across_parse.py +22 -0
- cloud_dog_vdb-0.5.4/tests/system/ST2.4_TableChunkFlow/test_table_chunk_flow.py +24 -0
- cloud_dog_vdb-0.5.4/tests/system/ST2.5_OcrModeDisabled/test_ocr_mode_disabled.py +20 -0
- cloud_dog_vdb-0.5.4/tests/system/ST2.6_OcrModeForce/test_ocr_mode_force.py +20 -0
- cloud_dog_vdb-0.5.4/tests/system/ST2.7_OcrModeAuto/test_ocr_mode_auto.py +20 -0
- cloud_dog_vdb-0.5.4/tests/system/ST2.8_ParserFallbackExecution/test_parser_fallback_execution.py +52 -0
- cloud_dog_vdb-0.5.4/tests/system/ST3.1_MarkerMcpSyncParse/test_st3_1_marker_mcp_sync_parse.py +64 -0
- cloud_dog_vdb-0.5.4/tests/system/ST3.2_MarkerMcpAsyncParse/test_st3_2_marker_mcp_async_parse.py +75 -0
- cloud_dog_vdb-0.5.4/tests/system/ST3.3_MarkerMcpImageArtefacts/test_st3_3_marker_mcp_image_artefacts.py +76 -0
- cloud_dog_vdb-0.5.4/tests/system/ST3.4_MarkerMcpLargeDocument/test_st3_4_marker_mcp_large_document.py +78 -0
- cloud_dog_vdb-0.5.4/tests/tools/_enrichment.py +160 -0
- cloud_dog_vdb-0.5.4/tests/tools/local_deepdoc_parser.py +55 -0
- cloud_dog_vdb-0.5.4/tests/tools/local_docling_parser.py +68 -0
- cloud_dog_vdb-0.5.4/tests/tools/local_transformers_parser.py +73 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.10_DeterministicIdentity/test_canonical_identity_helpers.py +50 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.10_DeterministicIdentity/test_identity.py +23 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.11_SourceURINormalise/test_normalise.py +19 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.12_CommonIndexingOptions/test_indexing_options.py +20 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.13_CommonSearchOptions/test_search_options.py +20 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.14_BackendSpecificOptions/test_backend_options.py +21 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.15_OptionsManager/test_options_manager.py +19 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.16_LifecycleManager/test_lifecycle.py +20 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.17_RetentionPolicy/test_retention.py +28 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.18_SupersessionRules/test_supersession.py +21 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.19_PurgeSafety/test_purge_safety.py +23 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.1_AdapterInterface/test_adapter_interface.py +33 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.20_AccessPolicy/test_access_policy.py +21 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.21_TenantIsolation/test_isolation.py +20 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.22_FixedChunker/test_fixed_chunker.py +19 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.23_RecursiveChunker/test_recursive_chunker.py +19 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.24_SemanticChunker/test_semantic_chunker.py +19 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.25_PandasConverter/test_pandas_conv.py +19 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.26_GenericConverter/test_generic_conv.py +19 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.27_PipelineStages/test_pipeline_stages.py +29 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.28_Checkpoints/test_checkpoints.py +19 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.29_JobModel/test_job_model.py +20 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.2_ChromaAdapter/test_chroma.py +52 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.30_JobQueue/test_job_queue.py +24 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.31_ConfigDelegation/test_config_delegation.py +72 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.32_ErrorTaxonomy/test_errors.py +19 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.33_CollectionSpec/test_collection_spec.py +20 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.34_ConfigDelegationVerification/test_config_delegation_verification.py +63 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.35_ResponseNormaliser/test_response_normaliser.py +61 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.36_RemoteProxy/test_remote_proxy.py +69 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.37_SchemaVersioning/test_schema_versioning.py +48 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.38_LifecycleFunctions/test_lifecycle_functions.py +20 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.39_ConformanceMock/test_conformance_mock.py +20 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.3_QdrantAdapter/test_qdrant_unit.py +88 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.40_RemoteClientMock/test_remote_client_mock.py +55 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.41_TenantIsolationLogic/test_tenant_isolation_logic.py +20 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.42_AccessControlLogic/test_access_control_logic.py +20 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.43_PurgeAdminLogic/test_purge_admin_logic.py +21 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.44_RealEmbeddingPropagation/test_real_embedding_propagation.py +154 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.45_MetadataFilters/test_metadata_filters.py +101 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.46_ProvenanceExtensions/test_provenance.py +46 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.47_CrossBackendMetadataParityFixture/test_metadata_parity_fixture.py +103 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.4_WeaviateAdapter/test_weaviate_unit.py +21 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.5_OpenSearchAdapter/test_opensearch_unit.py +21 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.6_PGVectorAdapter/test_pgvector_unit.py +21 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.7_CapabilityDescriptor/test_capabilities.py +20 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.8_QueryPlanner/test_query_planner.py +21 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.9_MetadataValidator/test_canonical_metadata_schema.py +68 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT1.9_MetadataValidator/test_metadata.py +56 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.10_ConfigAliasCompatibility/test_config_alias_compatibility.py +30 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.11_ParserCommandSandboxPolicy/test_parser_command_sandbox_policy.py +26 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.12_ProviderHealthContract/test_provider_health_contract.py +389 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.13_BatchUpsertLifecycleParity/test_batch_upsert_lifecycle_parity.py +85 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.14_InfinityOutputShapeCompatibility/test_infinity_output_shape_compatibility.py +38 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.15_SourceUriFilenameMimeInference/test_source_uri_filename_mime_inference.py +185 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.1_ParserCapabilities/test_parser_capabilities.py +41 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.2_ParserPlannerSelection/test_parser_planner_selection.py +31 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.3_ParserFallbackPolicy/test_parser_fallback_policy.py +24 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.4_DocumentIRSchema/test_document_ir_schema.py +22 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.5_TableRenderPolicies/test_table_render_policies.py +21 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.6_TableJsonSchema/test_table_json_schema.py +21 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.7_OCRHeuristics/test_ocr_heuristics.py +24 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.8_OCRCostLimiter/test_ocr_cost_limiter.py +24 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT2.9_BoundaryAwareChunking/test_boundary_aware_chunking.py +25 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT3.1_MarkerResponseContract/test_ut3_1_marker_response_contract.py +33 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT3.2_MarkerImageExtraction/test_ut3_2_marker_image_extraction.py +39 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT3.3_MarkerTOCExtraction/test_ut3_3_marker_toc_extraction.py +38 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT3.4_AsyncParseRunnerSubmitPoll/test_ut3_4_async_parse_runner_submit_poll.py +77 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT3.5_AsyncParseRunnerTimeout/test_ut3_5_async_parse_runner_timeout.py +68 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT3.6_AsyncParseRunnerCancellation/test_ut3_6_async_parse_runner_cancellation.py +78 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT3.7_AsyncModeAutoTrigger/test_ut3_7_async_mode_auto_trigger.py +120 -0
- cloud_dog_vdb-0.5.4/tests/unit/UT3.8_SyncModeDefault/test_ut3_8_sync_mode_default.py +112 -0
- cloud_dog_vdb-0.5.4/working/AGENT-INSTRUCTION-FIX-VDB-TESTS-REPORT.md +77 -0
- cloud_dog_vdb-0.5.4/working/W19A-VDB-050-REPORT.md +75 -0
- cloud_dog_vdb-0.5.4/working/W23A-P2-REPORT.md +164 -0
- cloud_dog_vdb-0.5.4/working/W23A-PARSER-SETUP-REPORT.md +104 -0
- cloud_dog_vdb-0.5.4/working/W23A-PT14-RERUN-V2-REPORT.md +42 -0
- cloud_dog_vdb-0.5.4/working/W23A-PT14-RERUN-V3-REPORT.md +75 -0
- cloud_dog_vdb-0.5.4/working/W23A-PT14-RERUN-V4-REPORT.md +76 -0
- cloud_dog_vdb-0.5.4/working/W23A-PT14-RERUN-V5-REPORT.md +53 -0
- cloud_dog_vdb-0.5.4/working/W25A-75-MARKER-MCP-VALIDATE-REPORT.md +108 -0
- cloud_dog_vdb-0.5.4/working/W28A-116-FIX-VDB-REPORT.md +99 -0
- cloud_dog_vdb-0.5.4/working/W28A-124-PLATFORM-VDB-HEALTH-REPORT.md +97 -0
- cloud_dog_vdb-0.5.4/working/W28A-131-PLATFORM-VDB-ST-TIMEOUT-FIX-REPORT.md +93 -0
- cloud_dog_vdb-0.5.4/working/W28A-85-PARSER-FIX-REPORT.md +75 -0
- cloud_dog_vdb-0.5.4/working/cloud_dog_vdb_ARCHITECTURE.md +235 -0
- cloud_dog_vdb-0.5.4/working/cloud_dog_vdb_RELEASE_UPLIFT_PROPOSAL.md +98 -0
- cloud_dog_vdb-0.5.4/working/cloud_dog_vdb_REQUIREMENTS.md +254 -0
- cloud_dog_vdb-0.5.4/working/cloud_dog_vdb_TESTS.md +150 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
# Agent Instruction — Fix cloud_dog_vdb Test Infrastructure
|
|
2
|
+
|
|
3
|
+
**Package:** `cloud_dog_vdb`
|
|
4
|
+
**Version:** 0.3.0
|
|
5
|
+
**Date:** 2026-02-20
|
|
6
|
+
**Status:** OPEN — CRITICAL defects in test infrastructure
|
|
7
|
+
**Scope:** Fix silent-skip test fraud, missing precondition checks, misclassified tests, env file gaps
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## INTEGRITY WARRANTY — READ THIS FIRST
|
|
12
|
+
|
|
13
|
+
This section is copied verbatim from the platform-wide `RULES.md` Section 1. It is **NON-NEGOTIABLE**.
|
|
14
|
+
|
|
15
|
+
**I WILL NEVER:**
|
|
16
|
+
- **LIE** about test results, implementation status, or compliance
|
|
17
|
+
- **FUDGE** test data, configuration, or validation results
|
|
18
|
+
- **HACK** around problems instead of fixing root causes
|
|
19
|
+
- **FALSIFY** test outputs, logs, or status reports
|
|
20
|
+
- **STUB** functionality in IT/AT tests when real implementation is required
|
|
21
|
+
- **FAKE** success when there are errors, warnings, or failures
|
|
22
|
+
- **HIDE** failures, warnings, or non-compliance
|
|
23
|
+
- **PRETEND** tests pass when they fail
|
|
24
|
+
- **SKIP** validation steps to claim completion
|
|
25
|
+
- **BYPASS** rules or requirements for convenience
|
|
26
|
+
|
|
27
|
+
**IF I CANNOT GUARANTEE 100% COMPLIANCE, I WILL STOP AND SAY SO EXPLICITLY.**
|
|
28
|
+
|
|
29
|
+
**IF TESTS FAIL, I WILL REPORT FAILURES HONESTLY, NOT HIDE THEM.**
|
|
30
|
+
|
|
31
|
+
**IF I DON'T KNOW, I WILL ASK, NOT GUESS.**
|
|
32
|
+
|
|
33
|
+
**"ASK. DON'T GUESS. DON'T LIE. DON'T FUDGE."**
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## ADDITIONAL RULES — ZERO TOLERANCE
|
|
38
|
+
|
|
39
|
+
These rules supplement the Integrity Warranty. Violation of ANY rule invalidates all work.
|
|
40
|
+
|
|
41
|
+
1. **100% REAL systems in IT/AT** — no mocks, no stubs, no `local_mode=True`, no `MockTransport`. If it says "Integration" it MUST integrate with a real external service.
|
|
42
|
+
2. **Silent skip is a LIE** — `pytest.skip()` when a backend is unavailable makes the test report say "0 failed". This is indistinguishable from "all passed". A skipped IT/AT test is NOT a passed test. It is an UNTESTED test.
|
|
43
|
+
3. **env files MUST be complete** — if a test tier requires `VAULT_TOKEN`, the env file for that tier MUST either contain it or the test MUST `pytest.fail()` (not skip) when it is absent.
|
|
44
|
+
4. **Test type MUST match reality** — a test that calls pure functions with no external service is a **UT**, not an IT/AT/QT. A test that uses `local_mode=True` is a **ST** at best, not an IT/AT.
|
|
45
|
+
5. **No decoration env files** — every variable in an env file MUST be consumed by the test code that loads it. If the test ignores the env file and hard-codes Vault, the env file is decoration and MUST be removed or the test MUST be fixed to use it.
|
|
46
|
+
6. **Write-path precondition checks** — before attempting backend writes in IT tests, probe the backend with a lightweight write-then-delete operation. If the probe fails, `pytest.fail()` with a clear message identifying the backend and error. Do NOT just crash with an opaque 500.
|
|
47
|
+
7. **Config delegation** — test fixtures MUST use the same config loading path as the application. If the app uses `cloud_dog_config` layered precedence, tests MUST NOT bypass it by shelling out to Vault directly.
|
|
48
|
+
8. **Honest reporting** — when reporting test results, ALWAYS state the skip count. "76 passed, 0 failed, 11 skipped" is NOT the same as "76 passed". If skipped tests include IT tests that should have run against real backends, this MUST be flagged as a gap, not hidden.
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## WHY THIS INSTRUCTION EXISTS
|
|
53
|
+
|
|
54
|
+
### Audit Findings (2026-02-20)
|
|
55
|
+
|
|
56
|
+
An audit of the `cloud_dog_vdb` test infrastructure found **5 critical defects**:
|
|
57
|
+
|
|
58
|
+
| ID | Severity | Finding | Evidence |
|
|
59
|
+
|----|----------|---------|----------|
|
|
60
|
+
| **T-1** | CRITICAL | `env-IT` missing `VAULT_TOKEN` — all 11 real-backend IT tests silently skip | `tests/env-IT` has 3 of 4 required Vault vars. `conftest.py:59` calls `pytest.skip()` when `VAULT_TOKEN` is missing. Result: "0 failed, 11 skipped" reported as "PASS". |
|
|
61
|
+
| **T-2** | CRITICAL | 10 tests misclassified as IT/AT/QT when they are actually UT/ST | See § Misclassified Tests below. |
|
|
62
|
+
| **T-3** | HIGH | No write-path precondition check before backend operations | IT tests dive straight into `create_collection()` / `add_documents()`. When backend has resource exhaustion (e.g. Chroma FD limit), tests crash with opaque HTTP 500. |
|
|
63
|
+
| **T-4** | HIGH | `pytest.skip()` used instead of `pytest.fail()` for mandatory IT preconditions | `conftest.py:59` and all 11 IT tests use `pytest.skip()` when config/backend is missing. For IT tests that MUST run against real backends, this should be `pytest.fail()`. |
|
|
64
|
+
| **T-5** | MEDIUM | `env-UT`, `env-ST`, `env-AT` all contain identical Vault vars but no `VAULT_TOKEN` | All 4 env files have `VAULT_ADDR`, `VAULT_MOUNT_POINT`, `VAULT_CONFIG_PATH` but not `VAULT_TOKEN`. UT/ST don't need Vault (per RULES.md § 5.5). AT tests use `local_mode=True` so don't need it either. These Vault vars are decoration. |
|
|
65
|
+
|
|
66
|
+
### Misclassified Tests (T-2)
|
|
67
|
+
|
|
68
|
+
| Test | Claimed Type | Actual Type | Evidence |
|
|
69
|
+
|------|-------------|-------------|----------|
|
|
70
|
+
| IT1.11 CrossBackendPortable | Integration | ST (local) | Uses `local_mode=True` — no real backend |
|
|
71
|
+
| IT1.13 LifecycleRealBackend | Integration | **UT** | Calls `mark_deleted()` / `mark_superseded()` — pure functions, no backend |
|
|
72
|
+
| AT1.1 ServiceStartupPattern | Application | ST (local) | Uses `local_mode=True` — no real service |
|
|
73
|
+
| AT1.2 FullIngestionFlow | Application | ST (local) | Uses `local_mode=True` — no real service |
|
|
74
|
+
| AT1.3 SearchWithFilters | Application | ST (local) | Uses `local_mode=True` — no real service |
|
|
75
|
+
| AT1.4 ConformanceSuite | Application | **UT** | Uses `mock_adapter()` — mock, not real |
|
|
76
|
+
| AT1.5 ClientOnlyIntegration | Application | **UT** | Uses `httpx.MockTransport` — mock, not real |
|
|
77
|
+
| QT1.1 TenantIsolation | Security | **UT** | Calls `enforce_tenant()` — pure function, 7 lines |
|
|
78
|
+
| QT1.2 AccessControlEnforcement | Security | **UT** | Calls `can_admin()` — pure function, 7 lines |
|
|
79
|
+
| QT1.4 PurgeRequiresAdmin | Security | **UT** | Calls `can_admin()` — pure function, 8 lines |
|
|
80
|
+
|
|
81
|
+
**Correctly classified tests:** QT1.3 (static file scan — legitimate QT).
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## HARD CONSTRAINTS
|
|
86
|
+
|
|
87
|
+
- **DO NOT** delete any test. Reclassify by moving to the correct directory.
|
|
88
|
+
- **DO NOT** add `os.environ`/`os.getenv` reads in library source code.
|
|
89
|
+
- **DO NOT** weaken any existing test assertion.
|
|
90
|
+
- **DO NOT** convert real-backend IT tests to use `local_mode=True`.
|
|
91
|
+
- **DO NOT** claim completion without running the full verification chain AND reporting skip counts.
|
|
92
|
+
- **UK English only.**
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## PHASE 1 — Fix env-IT to include VAULT_TOKEN (T-1)
|
|
97
|
+
|
|
98
|
+
### Step 1.1 — Decide: env-IT should NOT contain VAULT_TOKEN directly
|
|
99
|
+
|
|
100
|
+
Per RULES.md § 5.5: *"NEVER save keys, passwords, tokens, or credentials into the repository."*
|
|
101
|
+
|
|
102
|
+
`VAULT_TOKEN` is a credential. It MUST NOT be in `env-IT`.
|
|
103
|
+
|
|
104
|
+
**Solution:** The test runner MUST source `env-vault` before running IT tests. The `conftest.py` fixture must enforce this by calling `pytest.fail()` (not `pytest.skip()`) when `VAULT_TOKEN` is missing for IT-tier tests.
|
|
105
|
+
|
|
106
|
+
### Step 1.2 — Update `conftest.py`: fail instead of skip for IT/AT tests
|
|
107
|
+
|
|
108
|
+
Change `conftest.py` `vault_config()` fixture:
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
# BEFORE:
|
|
112
|
+
if missing:
|
|
113
|
+
pytest.skip(f"Vault variables missing: {', '.join(missing)}")
|
|
114
|
+
|
|
115
|
+
# AFTER:
|
|
116
|
+
if missing:
|
|
117
|
+
tier = os.environ.get("TEST_ENV_TIER", "")
|
|
118
|
+
if tier in ("IT", "AT"):
|
|
119
|
+
pytest.fail(
|
|
120
|
+
f"VAULT_TOKEN and Vault variables are REQUIRED for {tier} tests. "
|
|
121
|
+
f"Missing: {', '.join(missing)}. "
|
|
122
|
+
f"Run: set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a"
|
|
123
|
+
)
|
|
124
|
+
pytest.skip(f"Vault variables missing (UT/ST may skip): {', '.join(missing)}")
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Step 1.3 — Add `TEST_ENV_TIER` to env files
|
|
128
|
+
|
|
129
|
+
Add to each env file:
|
|
130
|
+
|
|
131
|
+
| File | Add |
|
|
132
|
+
|------|-----|
|
|
133
|
+
| `env-UT` | `TEST_ENV_TIER=UT` |
|
|
134
|
+
| `env-ST` | `TEST_ENV_TIER=ST` |
|
|
135
|
+
| `env-IT` | `TEST_ENV_TIER=IT` |
|
|
136
|
+
| `env-AT` | `TEST_ENV_TIER=AT` |
|
|
137
|
+
|
|
138
|
+
### Step 1.4 — Remove decoration Vault vars from env-UT and env-ST
|
|
139
|
+
|
|
140
|
+
UT and ST tests MUST NOT require Vault (RULES.md § 5.5). Remove `VAULT_ADDR`, `VAULT_MOUNT_POINT`, `VAULT_CONFIG_PATH` from `env-UT` and `env-ST`.
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## PHASE 2 — Add write-path precondition checks (T-3)
|
|
145
|
+
|
|
146
|
+
### Step 2.1 — Create a reusable backend probe fixture
|
|
147
|
+
|
|
148
|
+
Add to `conftest.py`:
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
@pytest.fixture(scope="session")
|
|
152
|
+
def chroma_ready(vdbs: dict) -> dict:
|
|
153
|
+
"""Verify Chroma can actually handle writes, not just heartbeat."""
|
|
154
|
+
cfg = vdbs.get("chroma", {})
|
|
155
|
+
if not cfg:
|
|
156
|
+
pytest.fail("dev.vdbs.chroma missing from Vault config")
|
|
157
|
+
from cloud_dog_vdb.adapters.chroma import ChromaAdapter
|
|
158
|
+
from cloud_dog_vdb.config.models import ProviderConfig
|
|
159
|
+
from cloud_dog_vdb.domain.models import CollectionSpec
|
|
160
|
+
import asyncio
|
|
161
|
+
|
|
162
|
+
a = ChromaAdapter(
|
|
163
|
+
ProviderConfig(provider_id="chroma", base_url=cfg.get("base_url", ""), api_key=cfg.get("auth_token", "")),
|
|
164
|
+
local_mode=False,
|
|
165
|
+
)
|
|
166
|
+
probe_name = "_cloud_dog_write_probe"
|
|
167
|
+
loop = asyncio.get_event_loop()
|
|
168
|
+
try:
|
|
169
|
+
loop.run_until_complete(a.delete_collection(probe_name))
|
|
170
|
+
loop.run_until_complete(a.create_collection(CollectionSpec(name=probe_name)))
|
|
171
|
+
loop.run_until_complete(a.delete_collection(probe_name))
|
|
172
|
+
except Exception as exc:
|
|
173
|
+
pytest.fail(f"Chroma write-path probe FAILED: {exc}")
|
|
174
|
+
return cfg
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
Create equivalent `qdrant_ready`, `weaviate_ready`, `opensearch_ready`, `pgvector_ready` fixtures.
|
|
178
|
+
|
|
179
|
+
### Step 2.2 — Update IT tests to use `*_ready` fixtures instead of raw `vdbs`
|
|
180
|
+
|
|
181
|
+
Replace `vdbs` parameter with the appropriate `*_ready` fixture in each IT test.
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## PHASE 3 — Reclassify misclassified tests (T-2)
|
|
186
|
+
|
|
187
|
+
### Step 3.1 — Move misclassified tests to correct directories
|
|
188
|
+
|
|
189
|
+
| Current Location | Move To | Reason |
|
|
190
|
+
|-----------------|---------|--------|
|
|
191
|
+
| `integration/IT1.11_CrossBackendPortable/` | `system/ST1.9_CrossBackendPortableLocal/` | Uses `local_mode=True` |
|
|
192
|
+
| `integration/IT1.13_LifecycleRealBackend/` | `unit/UT1.38_LifecycleFunctions/` | Pure function calls |
|
|
193
|
+
| `application/AT1.1_ServiceStartupPattern/` | `system/ST1.10_ServiceStartupLocal/` | Uses `local_mode=True` |
|
|
194
|
+
| `application/AT1.2_FullIngestionFlow/` | `system/ST1.11_FullIngestionLocal/` | Uses `local_mode=True` |
|
|
195
|
+
| `application/AT1.3_SearchWithFilters/` | `system/ST1.12_SearchWithFiltersLocal/` | Uses `local_mode=True` |
|
|
196
|
+
| `application/AT1.4_ConformanceSuite/` | `unit/UT1.39_ConformanceMock/` | Uses `mock_adapter()` |
|
|
197
|
+
| `application/AT1.5_ClientOnlyIntegration/` | `unit/UT1.40_RemoteClientMock/` | Uses `MockTransport` |
|
|
198
|
+
| `security/QT1.1_TenantIsolation/` | `unit/UT1.41_TenantIsolationLogic/` | Pure function |
|
|
199
|
+
| `security/QT1.2_AccessControlEnforcement/` | `unit/UT1.42_AccessControlLogic/` | Pure function |
|
|
200
|
+
| `security/QT1.4_PurgeRequiresAdmin/` | `unit/UT1.43_PurgeAdminLogic/` | Pure function |
|
|
201
|
+
|
|
202
|
+
### Step 3.2 — Write REAL IT/AT/QT replacements
|
|
203
|
+
|
|
204
|
+
For each reclassified test, write a NEW test at the original location that actually uses real backends:
|
|
205
|
+
|
|
206
|
+
| New Test | What It Must Do |
|
|
207
|
+
|----------|----------------|
|
|
208
|
+
| IT1.11 CrossBackendPortable | Run the same portable contract against BOTH Chroma AND Qdrant real backends |
|
|
209
|
+
| IT1.13 LifecycleRealBackend | Mark records as deleted/superseded IN a real Chroma collection, verify via query |
|
|
210
|
+
| AT1.1 ServiceStartupPattern | Start a real VDB client against Chroma, verify `init_backend()` succeeds against real server |
|
|
211
|
+
| AT1.2 FullIngestionFlow | Run full ingestion against real Chroma, not `local_mode` |
|
|
212
|
+
| AT1.3 SearchWithFilters | Search with metadata filters against real Chroma |
|
|
213
|
+
| AT1.4 ConformanceSuite | Run `adapter_conforms()` against a REAL adapter, not mock |
|
|
214
|
+
| AT1.5 ClientOnlyIntegration | If `RemoteVDBClient` is meant for real remote use, test against a real endpoint. If no real endpoint exists, document this as a gap and leave it as UT until a remote VDB service is deployed. |
|
|
215
|
+
| QT1.1 TenantIsolation | Ingest records with different `tenant_id` into real Chroma, verify isolation via search |
|
|
216
|
+
| QT1.2 AccessControlEnforcement | Test access control against a real adapter operation |
|
|
217
|
+
| QT1.4 PurgeRequiresAdmin | Attempt purge on a real collection, verify admin enforcement |
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
## PHASE 4 — Clean up env files (T-5)
|
|
222
|
+
|
|
223
|
+
### Step 4.1 — env-UT contents
|
|
224
|
+
|
|
225
|
+
```
|
|
226
|
+
TEST_ENV_TIER=UT
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
UT tests MUST NOT need anything else. If they do, the test has a dependency problem.
|
|
230
|
+
|
|
231
|
+
### Step 4.2 — env-ST contents
|
|
232
|
+
|
|
233
|
+
```
|
|
234
|
+
TEST_ENV_TIER=ST
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
ST tests use `local_mode=True` and in-memory backends. No external config needed.
|
|
238
|
+
|
|
239
|
+
### Step 4.3 — env-IT contents
|
|
240
|
+
|
|
241
|
+
```
|
|
242
|
+
TEST_ENV_TIER=IT
|
|
243
|
+
VAULT_ADDR=https://vault0.cloud-dog.net
|
|
244
|
+
VAULT_MOUNT_POINT=cloud_dog_ai
|
|
245
|
+
VAULT_CONFIG_PATH=config
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
`VAULT_TOKEN` comes from `env-vault` sourced before test run. The conftest will `pytest.fail()` if it is missing.
|
|
249
|
+
|
|
250
|
+
### Step 4.4 — env-AT contents
|
|
251
|
+
|
|
252
|
+
```
|
|
253
|
+
TEST_ENV_TIER=AT
|
|
254
|
+
VAULT_ADDR=https://vault0.cloud-dog.net
|
|
255
|
+
VAULT_MOUNT_POINT=cloud_dog_ai
|
|
256
|
+
VAULT_CONFIG_PATH=config
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
AT tests require real backends. Same Vault dependency as IT.
|
|
260
|
+
|
|
261
|
+
---
|
|
262
|
+
|
|
263
|
+
## PHASE 5 — Update TESTS.md
|
|
264
|
+
|
|
265
|
+
### Step 5.1 — Update test directory structure to reflect reclassifications
|
|
266
|
+
|
|
267
|
+
### Step 5.2 — Update test counts
|
|
268
|
+
|
|
269
|
+
After reclassification, counts should be approximately:
|
|
270
|
+
- **UT:** 37 (original) + 6 (reclassified from IT/AT/QT) = **43**
|
|
271
|
+
- **ST:** 8 (original) + 4 (reclassified from IT/AT) = **12**
|
|
272
|
+
- **IT:** 13 (original) - 2 (reclassified) + 2 (new real replacements) = **13**
|
|
273
|
+
- **AT:** 5 (original) - 5 (reclassified) + 5 (new real replacements) = **5**
|
|
274
|
+
- **QT:** 4 (original) - 3 (reclassified) + 3 (new real replacements) = **4**
|
|
275
|
+
|
|
276
|
+
### Step 5.3 — Update Test Run History
|
|
277
|
+
|
|
278
|
+
Record actual results with skip counts. Example:
|
|
279
|
+
|
|
280
|
+
```
|
|
281
|
+
| Date | Scope | Command | Passed | Failed | Skipped | Notes |
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
**NEVER write "PASS" without the skip count.**
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
## PHASE 6 — Verification
|
|
289
|
+
|
|
290
|
+
### Step 6.1 — Run UT + ST (no Vault required)
|
|
291
|
+
|
|
292
|
+
```bash
|
|
293
|
+
cd /opt/iac/Development/cloud-dog-ai/cloud-dog-ai-platform-standards/packages/backend/platform-vdb
|
|
294
|
+
.venv/bin/pytest tests/unit tests/system --env tests/env-UT --env tests/env-ST -v
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
**Expected:** All pass, 0 skipped.
|
|
298
|
+
|
|
299
|
+
### Step 6.2 — Run IT + AT (Vault required)
|
|
300
|
+
|
|
301
|
+
```bash
|
|
302
|
+
set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a
|
|
303
|
+
.venv/bin/pytest tests/integration tests/application --env tests/env-IT --env tests/env-AT -v
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
**Expected:** All pass against available backends. Any backend that is down should produce `pytest.fail()` with a clear error message, NOT a silent skip.
|
|
307
|
+
|
|
308
|
+
### Step 6.3 — Run QT (Vault required for real-backend QT tests)
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
.venv/bin/pytest tests/security --env tests/env-IT -v
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
### Step 6.4 — Report honestly
|
|
315
|
+
|
|
316
|
+
State exact counts: `N passed, N failed, N skipped`. If any IT/AT test skipped, explain WHY and which backend is unavailable.
|
|
317
|
+
|
|
318
|
+
---
|
|
319
|
+
|
|
320
|
+
## COMPLETION GATE
|
|
321
|
+
|
|
322
|
+
This instruction is complete ONLY when:
|
|
323
|
+
|
|
324
|
+
1. `env-IT` no longer causes silent skips — missing `VAULT_TOKEN` produces `pytest.fail()`
|
|
325
|
+
2. All 10 misclassified tests are moved to correct directories
|
|
326
|
+
3. Real-backend replacements exist for all reclassified IT/AT/QT tests
|
|
327
|
+
4. Write-path probe fixtures exist for all 5 backends
|
|
328
|
+
5. TESTS.md updated with correct classifications and honest run history
|
|
329
|
+
6. Full test suite runs with `0 skipped` for UT/ST tier
|
|
330
|
+
7. IT/AT tier runs against real backends with honest reporting
|
|
331
|
+
|
|
332
|
+
**DO NOT claim completion without evidence for ALL 7 gates.**
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# Agent Instruction — Fix cloud_dog_vdb (v0.2.0)
|
|
2
|
+
|
|
3
|
+
**Package:** `cloud_dog_vdb`
|
|
4
|
+
**Target version:** 0.2.0
|
|
5
|
+
**Date:** 2026-02-18 (re-review with source verification)
|
|
6
|
+
**Scope:** Config-delegation enforcement + adapter rewiring + test alignment + v0.2.0 features — **ALL DELIVERED AND VERIFIED**
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## Status: ✅ COMPLETE (minor SA1 extra-files gap noted)
|
|
11
|
+
|
|
12
|
+
All 6 issues from the original instruction have been resolved. This document is retained for reference and future maintenance.
|
|
13
|
+
|
|
14
|
+
**Verified on 2026-02-18 (re-review):**
|
|
15
|
+
- 91 source files across 20+ subpackages
|
|
16
|
+
- 67 test directories present (37 UT + 8 ST + 13 IT + 5 AT + 4 QT)
|
|
17
|
+
- Zero config-delegation violations: `os.environ`/`hvac`/`overlay_secrets`/`VAULT_JSON` grep returns zero hits
|
|
18
|
+
- `secrets/` directory does NOT exist (deleted)
|
|
19
|
+
- All 5 adapters use `self.config.*` directly (no `self._runtime`, no overlay)
|
|
20
|
+
- Duplicate converter files removed (only `*_conv.py` versions remain)
|
|
21
|
+
- Test directories renamed to match TESTS.md v0.2.0 (ConfigDelegation, ConfigDelegationVerification, ConfigDelegationE2E)
|
|
22
|
+
- Old scaffold IT directories removed
|
|
23
|
+
- All 3 v0.2.0 feature files present and substantive
|
|
24
|
+
- Build produces `cloud_dog_vdb-0.2.0` wheel + sdist
|
|
25
|
+
|
|
26
|
+
**Governing documents:**
|
|
27
|
+
1. `platform-vdb/REQUIREMENTS.md` (v0.2.0) — FR1.31, FR1.33, FR1.34–FR1.36
|
|
28
|
+
2. `platform-vdb/ARCHITECTURE.md` (v0.2.0) — SA1 module layout
|
|
29
|
+
3. `platform-vdb/TESTS.md` (v0.2.0) — all test directories
|
|
30
|
+
4. `packages/backend/AGENT-INSTRUCTION.md` — Integrity Warranty and Config Delegation — ZERO TOLERANCE (MANDATORY)
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## Delivery Summary
|
|
35
|
+
|
|
36
|
+
### Issue 1 — Config Delegation Enforcement ✅ RESOLVED
|
|
37
|
+
|
|
38
|
+
| Sub-issue | Status | Evidence |
|
|
39
|
+
|-----------|--------|----------|
|
|
40
|
+
| 1A. Delete `secrets/` module | ✅ | Directory does not exist |
|
|
41
|
+
| 1B. Rewire all 5 adapters | ✅ | Zero `overlay_secrets`, `self._runtime`, or `from cloud_dog_vdb.secrets` hits in source |
|
|
42
|
+
| 1C. Fix `observability/otel.py` | ✅ | Zero `os.environ` hits in any source file |
|
|
43
|
+
| 1D. Rename 3 test directories | ✅ | `UT1.31_ConfigDelegation`, `UT1.34_ConfigDelegationVerification`, `ST1.8_ConfigDelegationE2E` all present |
|
|
44
|
+
|
|
45
|
+
Config delegation verification command returns clean:
|
|
46
|
+
```bash
|
|
47
|
+
grep -rn "os.environ\|import hvac\|overlay_secrets\|from cloud_dog_vdb.secrets" cloud_dog_vdb/ --include="*.py" | grep -v __pycache__
|
|
48
|
+
# → zero results
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
### Issue 2 — SA1 Module Alignment ✅ MOSTLY RESOLVED
|
|
54
|
+
|
|
55
|
+
**Resolved items:**
|
|
56
|
+
- `secrets/` directory deleted ✅
|
|
57
|
+
- `registry/` duplicate directory removed ✅
|
|
58
|
+
- Duplicate converter files (`deepdoc.py`, `mineru.py`, `pandas.py`) removed — only `*_conv.py` versions remain ✅
|
|
59
|
+
|
|
60
|
+
**Remaining extra files (non-blocking — additive, not violations):**
|
|
61
|
+
|
|
62
|
+
| File/Directory | Purpose | Recommendation |
|
|
63
|
+
|----------------|---------|----------------|
|
|
64
|
+
| `factory.py` (top-level) | `get_vdb_client()` factory imported by `__init__.py` | Add to SA1 or fold into `runtime/factory.py` |
|
|
65
|
+
| `embeddings/` (3 files) | Standalone embedding provider helpers | Add to SA1 (supplements `ingestion/embed.py`) |
|
|
66
|
+
| `adapters/vector_utils.py` | Deterministic vector generation for testing | Move to `testing/` or add to SA1 |
|
|
67
|
+
| `runtime/` (3 files) | `VDBClient` and factory | Add to SA1 |
|
|
68
|
+
|
|
69
|
+
**Recommendation:** Update ARCHITECTURE.md SA1 to include these files. This is documentation-only — the package is functionally complete.
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
### Issue 3 — Compatibility Normaliser (FR1.34) ✅ DELIVERED
|
|
74
|
+
|
|
75
|
+
- `cloud_dog_vdb/compat/response_normaliser.py` — **178 lines**
|
|
76
|
+
- Per-backend mappings for Chroma, Qdrant, Weaviate, OpenSearch, PGVector
|
|
77
|
+
- Normalises backend responses to unified `SearchResult` / `Record` models
|
|
78
|
+
- Test directory `UT1.35_ResponseNormaliser` present
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
### Issue 4 — Client-Only Integration Mode (FR1.35) ✅ DELIVERED
|
|
83
|
+
|
|
84
|
+
- `cloud_dog_vdb/remote/client.py` — **86 lines**
|
|
85
|
+
- `RemoteVDBClient` proxy delegating all ops to remote VDB via HTTP
|
|
86
|
+
- No local backend dependency required
|
|
87
|
+
- Test directories `UT1.36_RemoteProxy` and `AT1.5_ClientOnlyIntegration` present
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
### Issue 5 — Collection Schema Versioning (FR1.36) ✅ DELIVERED
|
|
92
|
+
|
|
93
|
+
- `cloud_dog_vdb/versioning/schema_version.py` — **129 lines**
|
|
94
|
+
- Tracks dimension count, metadata fields, embedding model, version number per collection
|
|
95
|
+
- Version mismatch detection at query time
|
|
96
|
+
- Migration utility for detecting dimension changes
|
|
97
|
+
- Test directory `UT1.37_SchemaVersioning` present
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
### Issue 6 — Test Directory Alignment ✅ RESOLVED
|
|
102
|
+
|
|
103
|
+
- Old scaffold IT directories (ChromaRemoteCollection, ChromaLocalCollection, QdrantCollection, WeaviateCollection, OpenSearchIndex, PGVectorTable, EmbeddingProviders) removed
|
|
104
|
+
- All IT directories match TESTS.md v0.2.0 names (IT1.1_ChromaCRUD through IT1.13_IngestionPipelineEndToEnd)
|
|
105
|
+
- Config delegation test directories renamed per v0.2.0 spec
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Verification — Full Suite
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
set -a; source /opt/iac/Development/cloud-dog-ai/env-vault-admin; set +a
|
|
113
|
+
|
|
114
|
+
# 1. Config delegation check — MUST return zero hits
|
|
115
|
+
grep -rn "os.environ\|import hvac\|overlay_secrets\|from cloud_dog_vdb.secrets" cloud_dog_vdb/ --include="*.py" | grep -v __pycache__
|
|
116
|
+
|
|
117
|
+
# 2. secrets/ directory MUST NOT exist
|
|
118
|
+
test ! -d cloud_dog_vdb/secrets && echo "PASS" || echo "FAIL"
|
|
119
|
+
|
|
120
|
+
# 3. All tests pass
|
|
121
|
+
.venv/bin/pytest tests --env tests/env-UT --env tests/env-ST --env tests/env-IT --env tests/env-AT -q
|
|
122
|
+
|
|
123
|
+
# 4. Lint clean
|
|
124
|
+
.venv/bin/ruff check cloud_dog_vdb tests
|
|
125
|
+
.venv/bin/ruff format --check cloud_dog_vdb tests
|
|
126
|
+
|
|
127
|
+
# 5. Build
|
|
128
|
+
.venv/bin/python -m build --no-isolation
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## pyproject.toml version
|
|
132
|
+
|
|
133
|
+
```toml
|
|
134
|
+
version = "0.2.0"
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## MANDATORY COMPLETION REPORT
|
|
140
|
+
|
|
141
|
+
When finished, write your report to:
|
|
142
|
+
**`/opt/iac/Development/cloud-dog-ai/cloud-dog-ai-platform-standards/packages/backend/platform-vdb/working/W28A-116-FIX-VDB-REPORT.md`**
|
|
143
|
+
|
|
144
|
+
Your report MUST include ALL of the following:
|
|
145
|
+
|
|
146
|
+
### 1. Run summary
|
|
147
|
+
- List every file changed and what was changed
|
|
148
|
+
- List every test fixed and how
|
|
149
|
+
|
|
150
|
+
### 2. Test results (REAL counts from actual runs)
|
|
151
|
+
```
|
|
152
|
+
QT: Xp / Yf
|
|
153
|
+
UT: Xp / Yf
|
|
154
|
+
ST: Xp / Yf
|
|
155
|
+
IT: Xp / Yf
|
|
156
|
+
AT: Xp / Yf
|
|
157
|
+
Ruff: X issues
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### 3. Verdict
|
|
161
|
+
State one of: **PASS** (100% green) / **PARTIAL** (some fixed, some remain) / **FAIL** (no improvement) / **BLOCKED** (cannot proceed)
|
|
162
|
+
|
|
163
|
+
If not PASS, list every remaining failure with classification: `CODE_BUG`, `ENV_CONFIG`, `INFRA_MISSING`, `EXT_SERVICE`
|
|
164
|
+
|
|
165
|
+
### 4. Evidence logs
|
|
166
|
+
All logs MUST be saved to `working/` directory:
|
|
167
|
+
```
|
|
168
|
+
working/w28a-116-qt.log
|
|
169
|
+
working/w28a-116-ut.log
|
|
170
|
+
working/w28a-116-st.log
|
|
171
|
+
working/w28a-116-it.log
|
|
172
|
+
working/w28a-116-at.log
|
|
173
|
+
working/w28a-116-ruff.log
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### 5. RULES.md COMPLIANCE WARRANTY
|
|
177
|
+
|
|
178
|
+
Copy this EXACTLY into your report:
|
|
179
|
+
```
|
|
180
|
+
I warrant that:
|
|
181
|
+
1. I have read RULES.md IN FULL before starting work
|
|
182
|
+
2. ALL code I produced is 100% compliant with RULES.md
|
|
183
|
+
3. ALL test results reported are REAL — exact counts from actual runs
|
|
184
|
+
4. I have NOT weakened any test
|
|
185
|
+
5. I have NOT stored, copied, or exposed any credentials
|
|
186
|
+
6. ALL credentials come from Vault or git-ignored env files
|
|
187
|
+
7. I have NOT modified files outside this package
|
|
188
|
+
```
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# platform-vdb Agent Guidance
|
|
2
|
+
|
|
3
|
+
This runbook is for agents implementing or validating `cloud_dog_vdb` changes.
|
|
4
|
+
|
|
5
|
+
## Non-Negotiable Rules
|
|
6
|
+
|
|
7
|
+
- Use real dependencies for `ST/IT/AT/PT/QT` tiers.
|
|
8
|
+
- Do not add stubs, silent fallbacks, or fake success paths to satisfy tests.
|
|
9
|
+
- Preserve backward compatibility:
|
|
10
|
+
- public API parity,
|
|
11
|
+
- default behavior parity,
|
|
12
|
+
- metadata identity parity,
|
|
13
|
+
- error/result envelope parity.
|
|
14
|
+
- Load credentials from approved env/Vault sources only.
|
|
15
|
+
|
|
16
|
+
## Required Inputs
|
|
17
|
+
|
|
18
|
+
- Repository root: `/opt/iac/Development/cloud-dog-ai/cloud-dog-ai-platform-standards`
|
|
19
|
+
- Package root: `packages/backend/platform-vdb`
|
|
20
|
+
- Vault env file (not committed): `/opt/iac/Development/cloud-dog-ai/env-vault`
|
|
21
|
+
- Corpus manifest: `test-data/corpus-manifest.yaml`
|
|
22
|
+
|
|
23
|
+
## Service Configuration Expectations
|
|
24
|
+
|
|
25
|
+
- `dev.services.mineru` must resolve to `MINERU_BASE_URL` for parser tests.
|
|
26
|
+
- `dev.services.marker_mcp` (or `dev.services.markermcp`) is supported but currently held disabled when instructed.
|
|
27
|
+
- `dev.vdbs.infinity` must exist for Infinity adapter integration tests.
|
|
28
|
+
- Local parser command adapters are supported (no Vault required) for:
|
|
29
|
+
- `deepdoc` via `tests/tools/local_deepdoc_parser.py`
|
|
30
|
+
- `docling` via `tests/tools/local_docling_parser.py`
|
|
31
|
+
- `transformers` via `tests/tools/local_transformers_parser.py`
|
|
32
|
+
|
|
33
|
+
## Local Parser Setup
|
|
34
|
+
|
|
35
|
+
- Install local parser dependencies in package venv:
|
|
36
|
+
- `.venv/bin/pip install transformers==4.57.1 docling-parse==5.4.0`
|
|
37
|
+
- Ensure env files used for IT/AT/PT/PT-PERF enable parser commands:
|
|
38
|
+
- `DEEPDOC_ENABLED=true`, `DOCLING_ENABLED=true`, `TRANSFORMERS_ENABLED=true`
|
|
39
|
+
- command values point to `.venv/bin/python tests/tools/local_*_parser.py`
|
|
40
|
+
- For PT/PT-PERF MinerU stability on shared GPU hosts, keep low-VRAM flags explicit:
|
|
41
|
+
- `MINERU_FORMULA_ENABLE=false`
|
|
42
|
+
- `MINERU_TABLE_ENABLE=false`
|
|
43
|
+
- `MINERU_RETURN_MIDDLE_JSON=false`
|
|
44
|
+
- `MINERU_RETURN_IMAGES=false`
|
|
45
|
+
|
|
46
|
+
## Standard Execution Order
|
|
47
|
+
|
|
48
|
+
Run from `packages/backend/platform-vdb`:
|
|
49
|
+
|
|
50
|
+
0. `.venv/bin/pip install transformers==4.57.1 docling-parse==5.4.0`
|
|
51
|
+
1. `set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a; .venv/bin/pytest tests/integration/IT2.11_ParserProviderCoverageMatrix --env tests/env-IT --env tests/env-REQUIRE-ALL-PARSERS -q`
|
|
52
|
+
2. `set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a; .venv/bin/pytest tests/application/AT2.2_ParserProviderCoverageMatrix --env tests/env-AT --env tests/env-REQUIRE-ALL-PARSERS -q`
|
|
53
|
+
3. `.venv/bin/pytest tests/parser/PT1.2_DeepdocAdapter tests/parser/PT1.3_DoclingAdapter tests/parser/PT1.8_TransformersAdapter --env tests/env-PT --env tests/env-CORPUS-SMALL -q`
|
|
54
|
+
4. `.venv/bin/ruff check`
|
|
55
|
+
5. `.venv/bin/ruff format --check`
|
|
56
|
+
6. `.venv/bin/pytest tests/unit tests/system --env tests/env-UT --env tests/env-ST -q`
|
|
57
|
+
7. `.venv/bin/pytest tests/compatibility --env tests/env-CT -q`
|
|
58
|
+
8. `set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a; .venv/bin/pytest tests/integration --env tests/env-IT -q`
|
|
59
|
+
9. `set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a; timeout 1800 .venv/bin/pytest tests/parser --env tests/env-PT --env tests/env-CORPUS-LARGE -q`
|
|
60
|
+
10. `set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a; timeout 1800 .venv/bin/pytest tests/parser_performance --env tests/env-PT-PERF --env tests/env-CORPUS-LARGE -q`
|
|
61
|
+
11. `set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a; .venv/bin/pytest tests/application --env tests/env-AT -q`
|
|
62
|
+
12. `set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a; .venv/bin/pytest tests/security --env tests/env-IT -q`
|
|
63
|
+
13. `.venv/bin/python -m build`
|
|
64
|
+
14. `.venv/bin/pip install --force-reinstall dist/cloud_dog_vdb-0.4.1-py3-none-any.whl`
|
|
65
|
+
15. Smoke import: `.venv/bin/python -c "import cloud_dog_vdb; print(cloud_dog_vdb.__version__)"`
|
|
66
|
+
|
|
67
|
+
For full evidence capture, mirror outputs to `/tmp/w13a_*.log` using `tee`.
|
|
68
|
+
|
|
69
|
+
## Staged Parser Validation (Recommended)
|
|
70
|
+
|
|
71
|
+
Use corpus slicing for deterministic progression:
|
|
72
|
+
|
|
73
|
+
- Small: `--env tests/env-CORPUS-SMALL`
|
|
74
|
+
- Medium: `--env tests/env-CORPUS-MEDIUM`
|
|
75
|
+
- Large: `--env tests/env-CORPUS-LARGE`
|
|
76
|
+
|
|
77
|
+
Recommended progression:
|
|
78
|
+
|
|
79
|
+
1. `tests/parser` + small
|
|
80
|
+
2. `tests/parser_performance` + small
|
|
81
|
+
3. `tests/parser` + medium
|
|
82
|
+
4. `tests/parser_performance` + medium
|
|
83
|
+
5. Large slice under explicit timeout guard
|
|
84
|
+
|
|
85
|
+
## Documentation Stage (Release Gate)
|
|
86
|
+
|
|
87
|
+
After test/build execution, update:
|
|
88
|
+
|
|
89
|
+
- `TESTS.md` run history (commands, date, pass/fail/skip, blockers).
|
|
90
|
+
- `README.md` document links and runtime prerequisites.
|
|
91
|
+
- `PROGRAMME-0.4.0-DEVELOPMENT-BUILD-TEST.md` if sequencing/gates changed.
|
|
92
|
+
- `RELEASE_UPLIFT_PROPOSAL.md` if scope or constraints changed.
|
|
93
|
+
|
|
94
|
+
Do not claim 100% completion if any parser-performance or provider enablement gate is still open.
|