cloud-dog-vdb 0.5.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (401) hide show
  1. cloud_dog_vdb-0.5.4/.docs-manifest.yml +6 -0
  2. cloud_dog_vdb-0.5.4/.gitignore +16 -0
  3. cloud_dog_vdb-0.5.4/AGENT-INSTRUCTION-FIX-VDB-TESTS.md +332 -0
  4. cloud_dog_vdb-0.5.4/AGENT-INSTRUCTION-FIX-VDB.md +188 -0
  5. cloud_dog_vdb-0.5.4/AGENTS.md +94 -0
  6. cloud_dog_vdb-0.5.4/ARCHITECTURE.md +768 -0
  7. cloud_dog_vdb-0.5.4/BUILD.md +76 -0
  8. cloud_dog_vdb-0.5.4/CHANGELOG.md +9 -0
  9. cloud_dog_vdb-0.5.4/DATA-MODEL.md +33 -0
  10. cloud_dog_vdb-0.5.4/LICENCE +190 -0
  11. cloud_dog_vdb-0.5.4/LICENSE +176 -0
  12. cloud_dog_vdb-0.5.4/NOTICE +7 -0
  13. cloud_dog_vdb-0.5.4/PKG-INFO +43 -0
  14. cloud_dog_vdb-0.5.4/PROGRAMME-0.4.0-DEVELOPMENT-BUILD-TEST.md +252 -0
  15. cloud_dog_vdb-0.5.4/README.md +129 -0
  16. cloud_dog_vdb-0.5.4/RELEASE_UPLIFT_PROPOSAL.md +115 -0
  17. cloud_dog_vdb-0.5.4/REQUIREMENTS.md +645 -0
  18. cloud_dog_vdb-0.5.4/RULES.md +120 -0
  19. cloud_dog_vdb-0.5.4/TESTS.md +507 -0
  20. cloud_dog_vdb-0.5.4/adoption_test.py +77 -0
  21. cloud_dog_vdb-0.5.4/cloud_dog_vdb/__init__.py +45 -0
  22. cloud_dog_vdb-0.5.4/cloud_dog_vdb/access/__init__.py +15 -0
  23. cloud_dog_vdb-0.5.4/cloud_dog_vdb/access/enforcement.py +32 -0
  24. cloud_dog_vdb-0.5.4/cloud_dog_vdb/access/policy.py +38 -0
  25. cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/__init__.py +39 -0
  26. cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/base.py +94 -0
  27. cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/chroma.py +329 -0
  28. cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/factory.py +51 -0
  29. cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/infinity.py +404 -0
  30. cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/opensearch.py +281 -0
  31. cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/pgvector.py +300 -0
  32. cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/qdrant.py +315 -0
  33. cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/registry.py +38 -0
  34. cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/vector_utils.py +35 -0
  35. cloud_dog_vdb-0.5.4/cloud_dog_vdb/adapters/weaviate.py +291 -0
  36. cloud_dog_vdb-0.5.4/cloud_dog_vdb/capabilities/__init__.py +15 -0
  37. cloud_dog_vdb-0.5.4/cloud_dog_vdb/capabilities/models.py +28 -0
  38. cloud_dog_vdb-0.5.4/cloud_dog_vdb/capabilities/planner.py +27 -0
  39. cloud_dog_vdb-0.5.4/cloud_dog_vdb/collections/__init__.py +15 -0
  40. cloud_dog_vdb-0.5.4/cloud_dog_vdb/collections/manager.py +44 -0
  41. cloud_dog_vdb-0.5.4/cloud_dog_vdb/collections/specs.py +34 -0
  42. cloud_dog_vdb-0.5.4/cloud_dog_vdb/compat/__init__.py +20 -0
  43. cloud_dog_vdb-0.5.4/cloud_dog_vdb/compat/response_normaliser.py +194 -0
  44. cloud_dog_vdb-0.5.4/cloud_dog_vdb/config/__init__.py +17 -0
  45. cloud_dog_vdb-0.5.4/cloud_dog_vdb/config/models.py +38 -0
  46. cloud_dog_vdb-0.5.4/cloud_dog_vdb/domain/__init__.py +25 -0
  47. cloud_dog_vdb-0.5.4/cloud_dog_vdb/domain/enums.py +35 -0
  48. cloud_dog_vdb-0.5.4/cloud_dog_vdb/domain/errors.py +45 -0
  49. cloud_dog_vdb-0.5.4/cloud_dog_vdb/domain/models.py +108 -0
  50. cloud_dog_vdb-0.5.4/cloud_dog_vdb/embeddings/__init__.py +18 -0
  51. cloud_dog_vdb-0.5.4/cloud_dog_vdb/embeddings/base.py +28 -0
  52. cloud_dog_vdb-0.5.4/cloud_dog_vdb/embeddings/providers.py +86 -0
  53. cloud_dog_vdb-0.5.4/cloud_dog_vdb/factory.py +27 -0
  54. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/__init__.py +29 -0
  55. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/acquire.py +35 -0
  56. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/checkpoints.py +34 -0
  57. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/chunk/__init__.py +15 -0
  58. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/chunk/base.py +33 -0
  59. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/chunk/boundary.py +23 -0
  60. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/chunk/fixed.py +28 -0
  61. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/chunk/recursive.py +33 -0
  62. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/chunk/semantic.py +40 -0
  63. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/convert/__init__.py +15 -0
  64. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/convert/base.py +34 -0
  65. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/convert/deepdoc_conv.py +26 -0
  66. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/convert/mineru_conv.py +25 -0
  67. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/convert/pandas_conv.py +32 -0
  68. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/embed.py +30 -0
  69. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/__init__.py +26 -0
  70. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/base.py +52 -0
  71. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/heuristics.py +31 -0
  72. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/planner.py +43 -0
  73. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/providers/__init__.py +23 -0
  74. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/providers/external_service.py +69 -0
  75. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/providers/llm.py +94 -0
  76. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/providers/local.py +78 -0
  77. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/ocr/registry.py +36 -0
  78. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/__init__.py +46 -0
  79. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/async_runner.py +215 -0
  80. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/base.py +52 -0
  81. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/capabilities.py +32 -0
  82. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/ir.py +57 -0
  83. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/planner.py +31 -0
  84. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/providers/__init__.py +29 -0
  85. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/providers/deepdoc.py +101 -0
  86. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/providers/docling.py +101 -0
  87. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/providers/internal.py +83 -0
  88. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/providers/marker_mcp.py +643 -0
  89. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/providers/mineru.py +703 -0
  90. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/providers/transformers.py +176 -0
  91. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/quality.py +21 -0
  92. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/parse/registry.py +36 -0
  93. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/pipeline.py +433 -0
  94. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/table/__init__.py +25 -0
  95. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/table/policy.py +31 -0
  96. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/table/renderers.py +74 -0
  97. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/table/schema.py +40 -0
  98. cloud_dog_vdb-0.5.4/cloud_dog_vdb/ingestion/verify.py +30 -0
  99. cloud_dog_vdb-0.5.4/cloud_dog_vdb/integrations/__init__.py +15 -0
  100. cloud_dog_vdb-0.5.4/cloud_dog_vdb/integrations/langchain.py +32 -0
  101. cloud_dog_vdb-0.5.4/cloud_dog_vdb/integrations/llamaindex.py +32 -0
  102. cloud_dog_vdb-0.5.4/cloud_dog_vdb/isolation/__init__.py +15 -0
  103. cloud_dog_vdb-0.5.4/cloud_dog_vdb/isolation/manager.py +36 -0
  104. cloud_dog_vdb-0.5.4/cloud_dog_vdb/jobs/__init__.py +15 -0
  105. cloud_dog_vdb-0.5.4/cloud_dog_vdb/jobs/models.py +28 -0
  106. cloud_dog_vdb-0.5.4/cloud_dog_vdb/jobs/queue.py +45 -0
  107. cloud_dog_vdb-0.5.4/cloud_dog_vdb/jobs/status.py +32 -0
  108. cloud_dog_vdb-0.5.4/cloud_dog_vdb/jobs/worker.py +28 -0
  109. cloud_dog_vdb-0.5.4/cloud_dog_vdb/lifecycle/__init__.py +25 -0
  110. cloud_dog_vdb-0.5.4/cloud_dog_vdb/lifecycle/manager.py +53 -0
  111. cloud_dog_vdb-0.5.4/cloud_dog_vdb/lifecycle/retention.py +83 -0
  112. cloud_dog_vdb-0.5.4/cloud_dog_vdb/metadata/__init__.py +46 -0
  113. cloud_dog_vdb-0.5.4/cloud_dog_vdb/metadata/filters.py +130 -0
  114. cloud_dog_vdb-0.5.4/cloud_dog_vdb/metadata/identity.py +72 -0
  115. cloud_dog_vdb-0.5.4/cloud_dog_vdb/metadata/normalise.py +35 -0
  116. cloud_dog_vdb-0.5.4/cloud_dog_vdb/metadata/provenance.py +102 -0
  117. cloud_dog_vdb-0.5.4/cloud_dog_vdb/metadata/schema.py +166 -0
  118. cloud_dog_vdb-0.5.4/cloud_dog_vdb/observability/__init__.py +15 -0
  119. cloud_dog_vdb-0.5.4/cloud_dog_vdb/observability/audit.py +32 -0
  120. cloud_dog_vdb-0.5.4/cloud_dog_vdb/observability/metrics.py +37 -0
  121. cloud_dog_vdb-0.5.4/cloud_dog_vdb/observability/otel.py +32 -0
  122. cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/__init__.py +15 -0
  123. cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/chroma.py +28 -0
  124. cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/common.py +35 -0
  125. cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/manager.py +34 -0
  126. cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/opensearch.py +28 -0
  127. cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/pgvector.py +28 -0
  128. cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/qdrant.py +28 -0
  129. cloud_dog_vdb-0.5.4/cloud_dog_vdb/options/weaviate.py +28 -0
  130. cloud_dog_vdb-0.5.4/cloud_dog_vdb/remote/__init__.py +20 -0
  131. cloud_dog_vdb-0.5.4/cloud_dog_vdb/remote/client.py +105 -0
  132. cloud_dog_vdb-0.5.4/cloud_dog_vdb/runtime/__init__.py +18 -0
  133. cloud_dog_vdb-0.5.4/cloud_dog_vdb/runtime/client.py +362 -0
  134. cloud_dog_vdb-0.5.4/cloud_dog_vdb/runtime/factory.py +113 -0
  135. cloud_dog_vdb-0.5.4/cloud_dog_vdb/search/__init__.py +15 -0
  136. cloud_dog_vdb-0.5.4/cloud_dog_vdb/search/engine.py +44 -0
  137. cloud_dog_vdb-0.5.4/cloud_dog_vdb/search/rerank.py +29 -0
  138. cloud_dog_vdb-0.5.4/cloud_dog_vdb/testing/__init__.py +22 -0
  139. cloud_dog_vdb-0.5.4/cloud_dog_vdb/testing/comparison.py +424 -0
  140. cloud_dog_vdb-0.5.4/cloud_dog_vdb/testing/comparison_report.py +89 -0
  141. cloud_dog_vdb-0.5.4/cloud_dog_vdb/testing/conformance.py +32 -0
  142. cloud_dog_vdb-0.5.4/cloud_dog_vdb/testing/fixtures.py +30 -0
  143. cloud_dog_vdb-0.5.4/cloud_dog_vdb/testing/mock_adapters.py +32 -0
  144. cloud_dog_vdb-0.5.4/cloud_dog_vdb/versioning/__init__.py +24 -0
  145. cloud_dog_vdb-0.5.4/cloud_dog_vdb/versioning/schema_version.py +151 -0
  146. cloud_dog_vdb-0.5.4/defaults.yaml +132 -0
  147. cloud_dog_vdb-0.5.4/docs/ARCHITECTURE.md +16 -0
  148. cloud_dog_vdb-0.5.4/docs/CONFIGURATION.md +15 -0
  149. cloud_dog_vdb-0.5.4/docs/EXAMPLES.md +9 -0
  150. cloud_dog_vdb-0.5.4/pyproject.toml +50 -0
  151. cloud_dog_vdb-0.5.4/scaffold/cloud_dog_vdb/__init__.py +30 -0
  152. cloud_dog_vdb-0.5.4/scaffold/defaults.yaml +95 -0
  153. cloud_dog_vdb-0.5.4/scaffold/pyproject.toml +32 -0
  154. cloud_dog_vdb-0.5.4/scaffold/tests/conftest.py +186 -0
  155. cloud_dog_vdb-0.5.4/test-data/Aon global-medical-trend-rates-report-2026.pdf +0 -0
  156. cloud_dog_vdb-0.5.4/test-data/BOSIB13bdde89d07f1b3711dd8e86adb477.pdf +0 -0
  157. cloud_dog_vdb-0.5.4/test-data/CELEX_32016R0679_EN_TXT.pdf +0 -0
  158. cloud_dog_vdb-0.5.4/test-data/Examples.pdf +0 -0
  159. cloud_dog_vdb-0.5.4/test-data/HSSD_2025-0-00.pdf +0 -0
  160. cloud_dog_vdb-0.5.4/test-data/Handwritten-Concern-Form-reporting-Domestic-Abuse-Good-Example.pdf +0 -0
  161. cloud_dog_vdb-0.5.4/test-data/IBRD-Financial-Statements-June-2025.pdf +0 -0
  162. cloud_dog_vdb-0.5.4/test-data/ITEM_COD-0012-0001-_-089.pdf +0 -0
  163. cloud_dog_vdb-0.5.4/test-data/NIST.SP.800-53r5.pdf +0 -0
  164. cloud_dog_vdb-0.5.4/test-data/SAMPLE OF RURAL COMPLETED FORM.pdf +0 -0
  165. cloud_dog_vdb-0.5.4/test-data/Z83-example.pdf +0 -0
  166. cloud_dog_vdb-0.5.4/test-data/a10kfy2023filing.pdf +0 -0
  167. cloud_dog_vdb-0.5.4/test-data/corpus-manifest.yaml +162 -0
  168. cloud_dog_vdb-0.5.4/test-data/fw9.pdf +0 -0
  169. cloud_dog_vdb-0.5.4/tests/__init__.py +13 -0
  170. cloud_dog_vdb-0.5.4/tests/_uplift_assertions.py +52 -0
  171. cloud_dog_vdb-0.5.4/tests/application/AT1.1_ServiceStartupPattern/test_service_startup.py +44 -0
  172. cloud_dog_vdb-0.5.4/tests/application/AT1.2_FullIngestionFlow/test_full_ingestion.py +48 -0
  173. cloud_dog_vdb-0.5.4/tests/application/AT1.3_SearchWithFilters/test_search_filters.py +61 -0
  174. cloud_dog_vdb-0.5.4/tests/application/AT1.4_ConformanceSuite/test_conformance.py +26 -0
  175. cloud_dog_vdb-0.5.4/tests/application/AT1.5_ClientOnlyIntegration/test_client_only_integration.py +147 -0
  176. cloud_dog_vdb-0.5.4/tests/application/AT2.1_ParserFirstEndToEnd/test_at2_1_parser_first_end_to_end.py +87 -0
  177. cloud_dog_vdb-0.5.4/tests/application/AT2.2_ParserProviderCoverageMatrix/test_at2_2_parser_provider_coverage_matrix.py +65 -0
  178. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_1_corpus_small_comparison.json +417 -0
  179. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_1_corpus_small_comparison.md +43 -0
  180. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_2_corpus_medium_comparison.json +417 -0
  181. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_2_corpus_medium_comparison.md +43 -0
  182. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_3_corpus_large_comparison.json +417 -0
  183. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_3_corpus_large_comparison.md +43 -0
  184. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_4_marker_vs_mineru_quality.json +417 -0
  185. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_4_marker_vs_mineru_quality.md +43 -0
  186. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_5_provider_latency_ranking.json +417 -0
  187. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_5_provider_latency_ranking.md +43 -0
  188. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_6_table_extraction_comparison.json +417 -0
  189. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_6_table_extraction_comparison.md +43 -0
  190. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_7_image_extraction_comparison.json +417 -0
  191. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_7_image_extraction_comparison.md +43 -0
  192. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_8_comparison_report_generation.json +417 -0
  193. cloud_dog_vdb-0.5.4/tests/comparison_reports/pt3_8_comparison_report_generation.md +43 -0
  194. cloud_dog_vdb-0.5.4/tests/compatibility/CT1.1_PublicApiParity/test_ct1_1_public_api_parity.py +22 -0
  195. cloud_dog_vdb-0.5.4/tests/compatibility/CT1.2_DefaultBehaviourParity/test_ct1_2_default_behaviour_parity.py +23 -0
  196. cloud_dog_vdb-0.5.4/tests/compatibility/CT1.3_MetadataIdentityParity/test_ct1_3_metadata_identity_parity.py +24 -0
  197. cloud_dog_vdb-0.5.4/tests/compatibility/CT1.4_ConfigCompatibility/test_ct1_4_config_compatibility.py +23 -0
  198. cloud_dog_vdb-0.5.4/tests/compatibility/CT1.5_ErrorContractParity/test_ct1_5_error_contract_parity.py +23 -0
  199. cloud_dog_vdb-0.5.4/tests/compatibility/CT1.6_ResultEnvelopeParity/test_ct1_6_result_envelope_parity.py +27 -0
  200. cloud_dog_vdb-0.5.4/tests/compatibility/CT1.7_LegacyConverterParity/test_ct1_7_legacy_converter_parity.py +22 -0
  201. cloud_dog_vdb-0.5.4/tests/conftest.py +688 -0
  202. cloud_dog_vdb-0.5.4/tests/env-AT +29 -0
  203. cloud_dog_vdb-0.5.4/tests/env-CORPUS-LARGE +1 -0
  204. cloud_dog_vdb-0.5.4/tests/env-CORPUS-MEDIUM +1 -0
  205. cloud_dog_vdb-0.5.4/tests/env-CORPUS-SMALL +1 -0
  206. cloud_dog_vdb-0.5.4/tests/env-CT +4 -0
  207. cloud_dog_vdb-0.5.4/tests/env-IT +29 -0
  208. cloud_dog_vdb-0.5.4/tests/env-PT +53 -0
  209. cloud_dog_vdb-0.5.4/tests/env-PT-COMPARE +27 -0
  210. cloud_dog_vdb-0.5.4/tests/env-PT-PERF +44 -0
  211. cloud_dog_vdb-0.5.4/tests/env-QT +29 -0
  212. cloud_dog_vdb-0.5.4/tests/env-REQUIRE-ALL-PARSERS +13 -0
  213. cloud_dog_vdb-0.5.4/tests/env-ST +16 -0
  214. cloud_dog_vdb-0.5.4/tests/env-UT +26 -0
  215. cloud_dog_vdb-0.5.4/tests/integration/IT1.10_PGVectorSearch/test_pgvector_search.py +35 -0
  216. cloud_dog_vdb-0.5.4/tests/integration/IT1.11_CrossBackendPortable/test_cross_backend.py +58 -0
  217. cloud_dog_vdb-0.5.4/tests/integration/IT1.12_IngestionRealBackend/test_ingestion_real.py +47 -0
  218. cloud_dog_vdb-0.5.4/tests/integration/IT1.13_LifecycleRealBackend/test_lifecycle_real.py +61 -0
  219. cloud_dog_vdb-0.5.4/tests/integration/IT1.1_ChromaCRUD/test_chroma_crud.py +52 -0
  220. cloud_dog_vdb-0.5.4/tests/integration/IT1.2_ChromaSearch/test_chroma_search.py +40 -0
  221. cloud_dog_vdb-0.5.4/tests/integration/IT1.3_QdrantCRUD/test_qdrant_crud.py +44 -0
  222. cloud_dog_vdb-0.5.4/tests/integration/IT1.4_QdrantSearch/test_qdrant_search.py +37 -0
  223. cloud_dog_vdb-0.5.4/tests/integration/IT1.5_WeaviateCRUD/test_weaviate_crud.py +46 -0
  224. cloud_dog_vdb-0.5.4/tests/integration/IT1.6_WeaviateSearch/test_weaviate_search.py +38 -0
  225. cloud_dog_vdb-0.5.4/tests/integration/IT1.7_OpenSearchCRUD/test_opensearch_crud.py +58 -0
  226. cloud_dog_vdb-0.5.4/tests/integration/IT1.8_OpenSearchHybrid/test_opensearch_hybrid.py +45 -0
  227. cloud_dog_vdb-0.5.4/tests/integration/IT1.9_PGVectorCRUD/test_pgvector_crud.py +38 -0
  228. cloud_dog_vdb-0.5.4/tests/integration/IT2.10_EmbeddingDimensionValidation/test_it2_10_embedding_dimension_validation.py +63 -0
  229. cloud_dog_vdb-0.5.4/tests/integration/IT2.11_ParserProviderCoverageMatrix/test_it2_11_parser_provider_coverage_matrix.py +65 -0
  230. cloud_dog_vdb-0.5.4/tests/integration/IT2.1_ParserFirstIngest_Chroma/test_it2_1_parser_first_ingest_chroma.py +33 -0
  231. cloud_dog_vdb-0.5.4/tests/integration/IT2.2_ParserFirstIngest_Qdrant/test_it2_2_parser_first_ingest_qdrant.py +33 -0
  232. cloud_dog_vdb-0.5.4/tests/integration/IT2.3_ParserFirstIngest_Weaviate/test_it2_3_parser_first_ingest_weaviate.py +33 -0
  233. cloud_dog_vdb-0.5.4/tests/integration/IT2.4_ParserFirstIngest_OpenSearch/test_it2_4_parser_first_ingest_opensearch.py +33 -0
  234. cloud_dog_vdb-0.5.4/tests/integration/IT2.5_ParserFirstIngest_PGVector/test_it2_5_parser_first_ingest_pgvector.py +33 -0
  235. cloud_dog_vdb-0.5.4/tests/integration/IT2.6_ParserFirstIngest_Infinity/test_it2_6_parser_first_ingest_infinity.py +36 -0
  236. cloud_dog_vdb-0.5.4/tests/integration/IT2.7_DeleteByFilterPortableFallback/test_it2_7_delete_by_filter_portable_fallback.py +86 -0
  237. cloud_dog_vdb-0.5.4/tests/integration/IT2.8_LifecycleAndPurgeSafety/test_it2_8_lifecycle_and_purge_safety.py +71 -0
  238. cloud_dog_vdb-0.5.4/tests/integration/IT2.9_MetadataFilterParity/test_it2_9_metadata_filter_parity.py +74 -0
  239. cloud_dog_vdb-0.5.4/tests/integration/__init__.py +13 -0
  240. cloud_dog_vdb-0.5.4/tests/integration/_metadata_parity.py +69 -0
  241. cloud_dog_vdb-0.5.4/tests/integration/_parser_ingest_helpers.py +214 -0
  242. cloud_dog_vdb-0.5.4/tests/parser/PT1.1_MineruAdapter/test_pt1_1_mineru_adapter.py +40 -0
  243. cloud_dog_vdb-0.5.4/tests/parser/PT1.2_DeepdocAdapter/test_pt1_2_deepdoc_adapter.py +40 -0
  244. cloud_dog_vdb-0.5.4/tests/parser/PT1.3_DoclingAdapter/test_pt1_3_docling_adapter.py +40 -0
  245. cloud_dog_vdb-0.5.4/tests/parser/PT1.4_MarkerMcpAdapter/test_pt1_4_marker_mcp_adapter.py +40 -0
  246. cloud_dog_vdb-0.5.4/tests/parser/PT1.5_OcrProviders/test_pt1_5_ocr_providers.py +48 -0
  247. cloud_dog_vdb-0.5.4/tests/parser/PT1.6_EndpointSource_EnvFile/test_pt1_6_endpoint_source_envfile.py +33 -0
  248. cloud_dog_vdb-0.5.4/tests/parser/PT1.7_EndpointSource_VaultResolved/test_pt1_7_endpoint_source_vault_resolved.py +22 -0
  249. cloud_dog_vdb-0.5.4/tests/parser/PT1.8_TransformersAdapter/test_pt1_8_transformers_adapter.py +40 -0
  250. cloud_dog_vdb-0.5.4/tests/parser/PT3.1_CorpusSmallComparison/test_pt3_1_corpus_small_comparison.py +35 -0
  251. cloud_dog_vdb-0.5.4/tests/parser/PT3.2_CorpusMediumComparison/test_pt3_2_corpus_medium_comparison.py +35 -0
  252. cloud_dog_vdb-0.5.4/tests/parser/PT3.3_CorpusLargeComparison/test_pt3_3_corpus_large_comparison.py +34 -0
  253. cloud_dog_vdb-0.5.4/tests/parser/PT3.4_MarkerVsMineruQuality/test_pt3_4_marker_vs_mineru_quality.py +39 -0
  254. cloud_dog_vdb-0.5.4/tests/parser/PT3.5_ProviderLatencyRanking/test_pt3_5_provider_latency_ranking.py +42 -0
  255. cloud_dog_vdb-0.5.4/tests/parser/PT3.6_TableExtractionComparison/test_pt3_6_table_extraction_comparison.py +36 -0
  256. cloud_dog_vdb-0.5.4/tests/parser/PT3.7_ImageExtractionComparison/test_pt3_7_image_extraction_comparison.py +40 -0
  257. cloud_dog_vdb-0.5.4/tests/parser/PT3.8_ComparisonReportGeneration/test_pt3_8_comparison_report_generation.py +44 -0
  258. cloud_dog_vdb-0.5.4/tests/parser/_comparison_helpers.py +129 -0
  259. cloud_dog_vdb-0.5.4/tests/parser/_provider_matrix.py +717 -0
  260. cloud_dog_vdb-0.5.4/tests/parser_performance/PT2.1_CorpusLatencyBenchmarks/test_pt2_1_corpus_latency_benchmarks.py +36 -0
  261. cloud_dog_vdb-0.5.4/tests/parser_performance/PT2.2_CorpusThroughputBenchmarks/test_pt2_2_corpus_throughput_benchmarks.py +34 -0
  262. cloud_dog_vdb-0.5.4/tests/parser_performance/PT2.3_OcrAutoDecisionRate/test_pt2_3_ocr_auto_decision_rate.py +34 -0
  263. cloud_dog_vdb-0.5.4/tests/parser_performance/PT2.4_QualityInvariantPassRate/test_pt2_4_quality_invariant_pass_rate.py +57 -0
  264. cloud_dog_vdb-0.5.4/tests/parser_performance/PT2.5_ParserComparisonMatrix/test_pt2_5_parser_comparison_matrix.py +40 -0
  265. cloud_dog_vdb-0.5.4/tests/parser_performance/PT2.6_EmbeddingPipelinePerformance/test_pt2_6_embedding_pipeline_performance.py +38 -0
  266. cloud_dog_vdb-0.5.4/tests/quality/QT_PUBLISH_COMPLIANCE/__init__.py +2 -0
  267. cloud_dog_vdb-0.5.4/tests/quality/QT_PUBLISH_COMPLIANCE/test_publish_compliance.py +73 -0
  268. cloud_dog_vdb-0.5.4/tests/quality/__init__.py +2 -0
  269. cloud_dog_vdb-0.5.4/tests/reports/parser_quality_matrix_deepdoc.json +98 -0
  270. cloud_dog_vdb-0.5.4/tests/reports/parser_quality_matrix_docling.json +98 -0
  271. cloud_dog_vdb-0.5.4/tests/reports/parser_quality_matrix_marker_mcp.json +99 -0
  272. cloud_dog_vdb-0.5.4/tests/reports/parser_quality_matrix_mineru.json +99 -0
  273. cloud_dog_vdb-0.5.4/tests/reports/parser_quality_matrix_transformers.json +98 -0
  274. cloud_dog_vdb-0.5.4/tests/security/QT1.1_TenantIsolation/test_tenant_isolation.py +55 -0
  275. cloud_dog_vdb-0.5.4/tests/security/QT1.2_AccessControlEnforcement/test_access_control.py +50 -0
  276. cloud_dog_vdb-0.5.4/tests/security/QT1.3_SecretNeverPersisted/test_secret_handling.py +30 -0
  277. cloud_dog_vdb-0.5.4/tests/security/QT1.4_PurgeRequiresAdmin/test_purge_admin.py +51 -0
  278. cloud_dog_vdb-0.5.4/tests/security/QT2.1_CommandAllowlist/test_qt2_1_command_allowlist.py +20 -0
  279. cloud_dog_vdb-0.5.4/tests/security/QT2.2_CommandTimeout/test_qt2_2_command_timeout.py +20 -0
  280. cloud_dog_vdb-0.5.4/tests/security/QT2.3_EndpointAllowlist/test_qt2_3_endpoint_allowlist.py +20 -0
  281. cloud_dog_vdb-0.5.4/tests/security/QT2.4_SecretRedaction/test_qt2_4_secret_redaction.py +23 -0
  282. cloud_dog_vdb-0.5.4/tests/security/QT2.5_PathTraversalGuard/test_qt2_5_path_traversal_guard.py +24 -0
  283. cloud_dog_vdb-0.5.4/tests/security/QT2.6_OcrCostGuard/test_qt2_6_ocr_cost_guard.py +21 -0
  284. cloud_dog_vdb-0.5.4/tests/security/QT2.7_ConfigDelegationEnforcement/test_qt2_7_config_delegation_enforcement.py +24 -0
  285. cloud_dog_vdb-0.5.4/tests/system/ST1.10_ServiceStartupLocal/test_service_startup_local.py +26 -0
  286. cloud_dog_vdb-0.5.4/tests/system/ST1.11_FullIngestionLocal/test_full_ingestion_local.py +30 -0
  287. cloud_dog_vdb-0.5.4/tests/system/ST1.12_SearchWithFiltersLocal/test_search_filters_local.py +42 -0
  288. cloud_dog_vdb-0.5.4/tests/system/ST1.1_CRUDEndToEnd/test_crud_e2e.py +44 -0
  289. cloud_dog_vdb-0.5.4/tests/system/ST1.2_SearchEndToEnd/test_search_e2e.py +37 -0
  290. cloud_dog_vdb-0.5.4/tests/system/ST1.3_IngestionPipeline/test_ingestion_e2e.py +29 -0
  291. cloud_dog_vdb-0.5.4/tests/system/ST1.4_LifecycleEndToEnd/test_lifecycle_e2e.py +20 -0
  292. cloud_dog_vdb-0.5.4/tests/system/ST1.5_JobSubmitToComplete/test_job_e2e.py +25 -0
  293. cloud_dog_vdb-0.5.4/tests/system/ST1.6_IdempotentIngestion/test_idempotent.py +37 -0
  294. cloud_dog_vdb-0.5.4/tests/system/ST1.7_CollectionManagement/test_collection_mgmt.py +32 -0
  295. cloud_dog_vdb-0.5.4/tests/system/ST1.8_ConfigDelegationE2E/test_config_delegation_e2e.py +49 -0
  296. cloud_dog_vdb-0.5.4/tests/system/ST1.9_CrossBackendPortableLocal/test_cross_backend_local.py +43 -0
  297. cloud_dog_vdb-0.5.4/tests/system/ST2.1_LegacyPathParity/test_legacy_path_parity.py +90 -0
  298. cloud_dog_vdb-0.5.4/tests/system/ST2.2_IRPathLocal/test_ir_path_local.py +82 -0
  299. cloud_dog_vdb-0.5.4/tests/system/ST2.3_CheckpointResumeAcrossParse/test_checkpoint_resume_across_parse.py +22 -0
  300. cloud_dog_vdb-0.5.4/tests/system/ST2.4_TableChunkFlow/test_table_chunk_flow.py +24 -0
  301. cloud_dog_vdb-0.5.4/tests/system/ST2.5_OcrModeDisabled/test_ocr_mode_disabled.py +20 -0
  302. cloud_dog_vdb-0.5.4/tests/system/ST2.6_OcrModeForce/test_ocr_mode_force.py +20 -0
  303. cloud_dog_vdb-0.5.4/tests/system/ST2.7_OcrModeAuto/test_ocr_mode_auto.py +20 -0
  304. cloud_dog_vdb-0.5.4/tests/system/ST2.8_ParserFallbackExecution/test_parser_fallback_execution.py +52 -0
  305. cloud_dog_vdb-0.5.4/tests/system/ST3.1_MarkerMcpSyncParse/test_st3_1_marker_mcp_sync_parse.py +64 -0
  306. cloud_dog_vdb-0.5.4/tests/system/ST3.2_MarkerMcpAsyncParse/test_st3_2_marker_mcp_async_parse.py +75 -0
  307. cloud_dog_vdb-0.5.4/tests/system/ST3.3_MarkerMcpImageArtefacts/test_st3_3_marker_mcp_image_artefacts.py +76 -0
  308. cloud_dog_vdb-0.5.4/tests/system/ST3.4_MarkerMcpLargeDocument/test_st3_4_marker_mcp_large_document.py +78 -0
  309. cloud_dog_vdb-0.5.4/tests/tools/_enrichment.py +160 -0
  310. cloud_dog_vdb-0.5.4/tests/tools/local_deepdoc_parser.py +55 -0
  311. cloud_dog_vdb-0.5.4/tests/tools/local_docling_parser.py +68 -0
  312. cloud_dog_vdb-0.5.4/tests/tools/local_transformers_parser.py +73 -0
  313. cloud_dog_vdb-0.5.4/tests/unit/UT1.10_DeterministicIdentity/test_canonical_identity_helpers.py +50 -0
  314. cloud_dog_vdb-0.5.4/tests/unit/UT1.10_DeterministicIdentity/test_identity.py +23 -0
  315. cloud_dog_vdb-0.5.4/tests/unit/UT1.11_SourceURINormalise/test_normalise.py +19 -0
  316. cloud_dog_vdb-0.5.4/tests/unit/UT1.12_CommonIndexingOptions/test_indexing_options.py +20 -0
  317. cloud_dog_vdb-0.5.4/tests/unit/UT1.13_CommonSearchOptions/test_search_options.py +20 -0
  318. cloud_dog_vdb-0.5.4/tests/unit/UT1.14_BackendSpecificOptions/test_backend_options.py +21 -0
  319. cloud_dog_vdb-0.5.4/tests/unit/UT1.15_OptionsManager/test_options_manager.py +19 -0
  320. cloud_dog_vdb-0.5.4/tests/unit/UT1.16_LifecycleManager/test_lifecycle.py +20 -0
  321. cloud_dog_vdb-0.5.4/tests/unit/UT1.17_RetentionPolicy/test_retention.py +28 -0
  322. cloud_dog_vdb-0.5.4/tests/unit/UT1.18_SupersessionRules/test_supersession.py +21 -0
  323. cloud_dog_vdb-0.5.4/tests/unit/UT1.19_PurgeSafety/test_purge_safety.py +23 -0
  324. cloud_dog_vdb-0.5.4/tests/unit/UT1.1_AdapterInterface/test_adapter_interface.py +33 -0
  325. cloud_dog_vdb-0.5.4/tests/unit/UT1.20_AccessPolicy/test_access_policy.py +21 -0
  326. cloud_dog_vdb-0.5.4/tests/unit/UT1.21_TenantIsolation/test_isolation.py +20 -0
  327. cloud_dog_vdb-0.5.4/tests/unit/UT1.22_FixedChunker/test_fixed_chunker.py +19 -0
  328. cloud_dog_vdb-0.5.4/tests/unit/UT1.23_RecursiveChunker/test_recursive_chunker.py +19 -0
  329. cloud_dog_vdb-0.5.4/tests/unit/UT1.24_SemanticChunker/test_semantic_chunker.py +19 -0
  330. cloud_dog_vdb-0.5.4/tests/unit/UT1.25_PandasConverter/test_pandas_conv.py +19 -0
  331. cloud_dog_vdb-0.5.4/tests/unit/UT1.26_GenericConverter/test_generic_conv.py +19 -0
  332. cloud_dog_vdb-0.5.4/tests/unit/UT1.27_PipelineStages/test_pipeline_stages.py +29 -0
  333. cloud_dog_vdb-0.5.4/tests/unit/UT1.28_Checkpoints/test_checkpoints.py +19 -0
  334. cloud_dog_vdb-0.5.4/tests/unit/UT1.29_JobModel/test_job_model.py +20 -0
  335. cloud_dog_vdb-0.5.4/tests/unit/UT1.2_ChromaAdapter/test_chroma.py +52 -0
  336. cloud_dog_vdb-0.5.4/tests/unit/UT1.30_JobQueue/test_job_queue.py +24 -0
  337. cloud_dog_vdb-0.5.4/tests/unit/UT1.31_ConfigDelegation/test_config_delegation.py +72 -0
  338. cloud_dog_vdb-0.5.4/tests/unit/UT1.32_ErrorTaxonomy/test_errors.py +19 -0
  339. cloud_dog_vdb-0.5.4/tests/unit/UT1.33_CollectionSpec/test_collection_spec.py +20 -0
  340. cloud_dog_vdb-0.5.4/tests/unit/UT1.34_ConfigDelegationVerification/test_config_delegation_verification.py +63 -0
  341. cloud_dog_vdb-0.5.4/tests/unit/UT1.35_ResponseNormaliser/test_response_normaliser.py +61 -0
  342. cloud_dog_vdb-0.5.4/tests/unit/UT1.36_RemoteProxy/test_remote_proxy.py +69 -0
  343. cloud_dog_vdb-0.5.4/tests/unit/UT1.37_SchemaVersioning/test_schema_versioning.py +48 -0
  344. cloud_dog_vdb-0.5.4/tests/unit/UT1.38_LifecycleFunctions/test_lifecycle_functions.py +20 -0
  345. cloud_dog_vdb-0.5.4/tests/unit/UT1.39_ConformanceMock/test_conformance_mock.py +20 -0
  346. cloud_dog_vdb-0.5.4/tests/unit/UT1.3_QdrantAdapter/test_qdrant_unit.py +88 -0
  347. cloud_dog_vdb-0.5.4/tests/unit/UT1.40_RemoteClientMock/test_remote_client_mock.py +55 -0
  348. cloud_dog_vdb-0.5.4/tests/unit/UT1.41_TenantIsolationLogic/test_tenant_isolation_logic.py +20 -0
  349. cloud_dog_vdb-0.5.4/tests/unit/UT1.42_AccessControlLogic/test_access_control_logic.py +20 -0
  350. cloud_dog_vdb-0.5.4/tests/unit/UT1.43_PurgeAdminLogic/test_purge_admin_logic.py +21 -0
  351. cloud_dog_vdb-0.5.4/tests/unit/UT1.44_RealEmbeddingPropagation/test_real_embedding_propagation.py +154 -0
  352. cloud_dog_vdb-0.5.4/tests/unit/UT1.45_MetadataFilters/test_metadata_filters.py +101 -0
  353. cloud_dog_vdb-0.5.4/tests/unit/UT1.46_ProvenanceExtensions/test_provenance.py +46 -0
  354. cloud_dog_vdb-0.5.4/tests/unit/UT1.47_CrossBackendMetadataParityFixture/test_metadata_parity_fixture.py +103 -0
  355. cloud_dog_vdb-0.5.4/tests/unit/UT1.4_WeaviateAdapter/test_weaviate_unit.py +21 -0
  356. cloud_dog_vdb-0.5.4/tests/unit/UT1.5_OpenSearchAdapter/test_opensearch_unit.py +21 -0
  357. cloud_dog_vdb-0.5.4/tests/unit/UT1.6_PGVectorAdapter/test_pgvector_unit.py +21 -0
  358. cloud_dog_vdb-0.5.4/tests/unit/UT1.7_CapabilityDescriptor/test_capabilities.py +20 -0
  359. cloud_dog_vdb-0.5.4/tests/unit/UT1.8_QueryPlanner/test_query_planner.py +21 -0
  360. cloud_dog_vdb-0.5.4/tests/unit/UT1.9_MetadataValidator/test_canonical_metadata_schema.py +68 -0
  361. cloud_dog_vdb-0.5.4/tests/unit/UT1.9_MetadataValidator/test_metadata.py +56 -0
  362. cloud_dog_vdb-0.5.4/tests/unit/UT2.10_ConfigAliasCompatibility/test_config_alias_compatibility.py +30 -0
  363. cloud_dog_vdb-0.5.4/tests/unit/UT2.11_ParserCommandSandboxPolicy/test_parser_command_sandbox_policy.py +26 -0
  364. cloud_dog_vdb-0.5.4/tests/unit/UT2.12_ProviderHealthContract/test_provider_health_contract.py +389 -0
  365. cloud_dog_vdb-0.5.4/tests/unit/UT2.13_BatchUpsertLifecycleParity/test_batch_upsert_lifecycle_parity.py +85 -0
  366. cloud_dog_vdb-0.5.4/tests/unit/UT2.14_InfinityOutputShapeCompatibility/test_infinity_output_shape_compatibility.py +38 -0
  367. cloud_dog_vdb-0.5.4/tests/unit/UT2.15_SourceUriFilenameMimeInference/test_source_uri_filename_mime_inference.py +185 -0
  368. cloud_dog_vdb-0.5.4/tests/unit/UT2.1_ParserCapabilities/test_parser_capabilities.py +41 -0
  369. cloud_dog_vdb-0.5.4/tests/unit/UT2.2_ParserPlannerSelection/test_parser_planner_selection.py +31 -0
  370. cloud_dog_vdb-0.5.4/tests/unit/UT2.3_ParserFallbackPolicy/test_parser_fallback_policy.py +24 -0
  371. cloud_dog_vdb-0.5.4/tests/unit/UT2.4_DocumentIRSchema/test_document_ir_schema.py +22 -0
  372. cloud_dog_vdb-0.5.4/tests/unit/UT2.5_TableRenderPolicies/test_table_render_policies.py +21 -0
  373. cloud_dog_vdb-0.5.4/tests/unit/UT2.6_TableJsonSchema/test_table_json_schema.py +21 -0
  374. cloud_dog_vdb-0.5.4/tests/unit/UT2.7_OCRHeuristics/test_ocr_heuristics.py +24 -0
  375. cloud_dog_vdb-0.5.4/tests/unit/UT2.8_OCRCostLimiter/test_ocr_cost_limiter.py +24 -0
  376. cloud_dog_vdb-0.5.4/tests/unit/UT2.9_BoundaryAwareChunking/test_boundary_aware_chunking.py +25 -0
  377. cloud_dog_vdb-0.5.4/tests/unit/UT3.1_MarkerResponseContract/test_ut3_1_marker_response_contract.py +33 -0
  378. cloud_dog_vdb-0.5.4/tests/unit/UT3.2_MarkerImageExtraction/test_ut3_2_marker_image_extraction.py +39 -0
  379. cloud_dog_vdb-0.5.4/tests/unit/UT3.3_MarkerTOCExtraction/test_ut3_3_marker_toc_extraction.py +38 -0
  380. cloud_dog_vdb-0.5.4/tests/unit/UT3.4_AsyncParseRunnerSubmitPoll/test_ut3_4_async_parse_runner_submit_poll.py +77 -0
  381. cloud_dog_vdb-0.5.4/tests/unit/UT3.5_AsyncParseRunnerTimeout/test_ut3_5_async_parse_runner_timeout.py +68 -0
  382. cloud_dog_vdb-0.5.4/tests/unit/UT3.6_AsyncParseRunnerCancellation/test_ut3_6_async_parse_runner_cancellation.py +78 -0
  383. cloud_dog_vdb-0.5.4/tests/unit/UT3.7_AsyncModeAutoTrigger/test_ut3_7_async_mode_auto_trigger.py +120 -0
  384. cloud_dog_vdb-0.5.4/tests/unit/UT3.8_SyncModeDefault/test_ut3_8_sync_mode_default.py +112 -0
  385. cloud_dog_vdb-0.5.4/working/AGENT-INSTRUCTION-FIX-VDB-TESTS-REPORT.md +77 -0
  386. cloud_dog_vdb-0.5.4/working/W19A-VDB-050-REPORT.md +75 -0
  387. cloud_dog_vdb-0.5.4/working/W23A-P2-REPORT.md +164 -0
  388. cloud_dog_vdb-0.5.4/working/W23A-PARSER-SETUP-REPORT.md +104 -0
  389. cloud_dog_vdb-0.5.4/working/W23A-PT14-RERUN-V2-REPORT.md +42 -0
  390. cloud_dog_vdb-0.5.4/working/W23A-PT14-RERUN-V3-REPORT.md +75 -0
  391. cloud_dog_vdb-0.5.4/working/W23A-PT14-RERUN-V4-REPORT.md +76 -0
  392. cloud_dog_vdb-0.5.4/working/W23A-PT14-RERUN-V5-REPORT.md +53 -0
  393. cloud_dog_vdb-0.5.4/working/W25A-75-MARKER-MCP-VALIDATE-REPORT.md +108 -0
  394. cloud_dog_vdb-0.5.4/working/W28A-116-FIX-VDB-REPORT.md +99 -0
  395. cloud_dog_vdb-0.5.4/working/W28A-124-PLATFORM-VDB-HEALTH-REPORT.md +97 -0
  396. cloud_dog_vdb-0.5.4/working/W28A-131-PLATFORM-VDB-ST-TIMEOUT-FIX-REPORT.md +93 -0
  397. cloud_dog_vdb-0.5.4/working/W28A-85-PARSER-FIX-REPORT.md +75 -0
  398. cloud_dog_vdb-0.5.4/working/cloud_dog_vdb_ARCHITECTURE.md +235 -0
  399. cloud_dog_vdb-0.5.4/working/cloud_dog_vdb_RELEASE_UPLIFT_PROPOSAL.md +98 -0
  400. cloud_dog_vdb-0.5.4/working/cloud_dog_vdb_REQUIREMENTS.md +254 -0
  401. cloud_dog_vdb-0.5.4/working/cloud_dog_vdb_TESTS.md +150 -0
@@ -0,0 +1,6 @@
1
+ schema-version: 1.0
2
+ project: platform-vdb
3
+ canon-set: PACKAGE
4
+ canon-template-baseline: 1.0
5
+ conditional-docs: []
6
+ additional-docs: []
@@ -0,0 +1,16 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .pytest_cache/
8
+ .ruff_cache/
9
+ .coverage
10
+ coverage.xml
11
+ htmlcov/
12
+ working/
13
+ private/
14
+ archive/
15
+ logs/
16
+ *.db
@@ -0,0 +1,332 @@
1
+ # Agent Instruction — Fix cloud_dog_vdb Test Infrastructure
2
+
3
+ **Package:** `cloud_dog_vdb`
4
+ **Version:** 0.3.0
5
+ **Date:** 2026-02-20
6
+ **Status:** OPEN — CRITICAL defects in test infrastructure
7
+ **Scope:** Fix silent-skip test fraud, missing precondition checks, misclassified tests, env file gaps
8
+
9
+ ---
10
+
11
+ ## INTEGRITY WARRANTY — READ THIS FIRST
12
+
13
+ This section is copied verbatim from the platform-wide `RULES.md` Section 1. It is **NON-NEGOTIABLE**.
14
+
15
+ **I WILL NEVER:**
16
+ - **LIE** about test results, implementation status, or compliance
17
+ - **FUDGE** test data, configuration, or validation results
18
+ - **HACK** around problems instead of fixing root causes
19
+ - **FALSIFY** test outputs, logs, or status reports
20
+ - **STUB** functionality in IT/AT tests when real implementation is required
21
+ - **FAKE** success when there are errors, warnings, or failures
22
+ - **HIDE** failures, warnings, or non-compliance
23
+ - **PRETEND** tests pass when they fail
24
+ - **SKIP** validation steps to claim completion
25
+ - **BYPASS** rules or requirements for convenience
26
+
27
+ **IF I CANNOT GUARANTEE 100% COMPLIANCE, I WILL STOP AND SAY SO EXPLICITLY.**
28
+
29
+ **IF TESTS FAIL, I WILL REPORT FAILURES HONESTLY, NOT HIDE THEM.**
30
+
31
+ **IF I DON'T KNOW, I WILL ASK, NOT GUESS.**
32
+
33
+ **"ASK. DON'T GUESS. DON'T LIE. DON'T FUDGE."**
34
+
35
+ ---
36
+
37
+ ## ADDITIONAL RULES — ZERO TOLERANCE
38
+
39
+ These rules supplement the Integrity Warranty. Violation of ANY rule invalidates all work.
40
+
41
+ 1. **100% REAL systems in IT/AT** — no mocks, no stubs, no `local_mode=True`, no `MockTransport`. If it says "Integration" it MUST integrate with a real external service.
42
+ 2. **Silent skip is a LIE** — `pytest.skip()` when a backend is unavailable makes the test report say "0 failed". This is indistinguishable from "all passed". A skipped IT/AT test is NOT a passed test. It is an UNTESTED test.
43
+ 3. **env files MUST be complete** — if a test tier requires `VAULT_TOKEN`, the env file for that tier MUST either contain it or the test MUST `pytest.fail()` (not skip) when it is absent.
44
+ 4. **Test type MUST match reality** — a test that calls pure functions with no external service is a **UT**, not an IT/AT/QT. A test that uses `local_mode=True` is a **ST** at best, not an IT/AT.
45
+ 5. **No decoration env files** — every variable in an env file MUST be consumed by the test code that loads it. If the test ignores the env file and hard-codes Vault, the env file is decoration and MUST be removed or the test MUST be fixed to use it.
46
+ 6. **Write-path precondition checks** — before attempting backend writes in IT tests, probe the backend with a lightweight write-then-delete operation. If the probe fails, `pytest.fail()` with a clear message identifying the backend and error. Do NOT just crash with an opaque 500.
47
+ 7. **Config delegation** — test fixtures MUST use the same config loading path as the application. If the app uses `cloud_dog_config` layered precedence, tests MUST NOT bypass it by shelling out to Vault directly.
48
+ 8. **Honest reporting** — when reporting test results, ALWAYS state the skip count. "76 passed, 0 failed, 11 skipped" is NOT the same as "76 passed". If skipped tests include IT tests that should have run against real backends, this MUST be flagged as a gap, not hidden.
49
+
50
+ ---
51
+
52
+ ## WHY THIS INSTRUCTION EXISTS
53
+
54
+ ### Audit Findings (2026-02-20)
55
+
56
+ An audit of the `cloud_dog_vdb` test infrastructure found **5 critical defects**:
57
+
58
+ | ID | Severity | Finding | Evidence |
59
+ |----|----------|---------|----------|
60
+ | **T-1** | CRITICAL | `env-IT` missing `VAULT_TOKEN` — all 11 real-backend IT tests silently skip | `tests/env-IT` has 3 of 4 required Vault vars. `conftest.py:59` calls `pytest.skip()` when `VAULT_TOKEN` is missing. Result: "0 failed, 11 skipped" reported as "PASS". |
61
+ | **T-2** | CRITICAL | 10 tests misclassified as IT/AT/QT when they are actually UT/ST | See § Misclassified Tests below. |
62
+ | **T-3** | HIGH | No write-path precondition check before backend operations | IT tests dive straight into `create_collection()` / `add_documents()`. When backend has resource exhaustion (e.g. Chroma FD limit), tests crash with opaque HTTP 500. |
63
+ | **T-4** | HIGH | `pytest.skip()` used instead of `pytest.fail()` for mandatory IT preconditions | `conftest.py:59` and all 11 IT tests use `pytest.skip()` when config/backend is missing. For IT tests that MUST run against real backends, this should be `pytest.fail()`. |
64
+ | **T-5** | MEDIUM | `env-UT`, `env-ST`, `env-AT` all contain identical Vault vars but no `VAULT_TOKEN` | All 4 env files have `VAULT_ADDR`, `VAULT_MOUNT_POINT`, `VAULT_CONFIG_PATH` but not `VAULT_TOKEN`. UT/ST don't need Vault (per RULES.md § 5.5). AT tests use `local_mode=True` so don't need it either. These Vault vars are decoration. |
65
+
66
+ ### Misclassified Tests (T-2)
67
+
68
+ | Test | Claimed Type | Actual Type | Evidence |
69
+ |------|-------------|-------------|----------|
70
+ | IT1.11 CrossBackendPortable | Integration | ST (local) | Uses `local_mode=True` — no real backend |
71
+ | IT1.13 LifecycleRealBackend | Integration | **UT** | Calls `mark_deleted()` / `mark_superseded()` — pure functions, no backend |
72
+ | AT1.1 ServiceStartupPattern | Application | ST (local) | Uses `local_mode=True` — no real service |
73
+ | AT1.2 FullIngestionFlow | Application | ST (local) | Uses `local_mode=True` — no real service |
74
+ | AT1.3 SearchWithFilters | Application | ST (local) | Uses `local_mode=True` — no real service |
75
+ | AT1.4 ConformanceSuite | Application | **UT** | Uses `mock_adapter()` — mock, not real |
76
+ | AT1.5 ClientOnlyIntegration | Application | **UT** | Uses `httpx.MockTransport` — mock, not real |
77
+ | QT1.1 TenantIsolation | Security | **UT** | Calls `enforce_tenant()` — pure function, 7 lines |
78
+ | QT1.2 AccessControlEnforcement | Security | **UT** | Calls `can_admin()` — pure function, 7 lines |
79
+ | QT1.4 PurgeRequiresAdmin | Security | **UT** | Calls `can_admin()` — pure function, 8 lines |
80
+
81
+ **Correctly classified tests:** QT1.3 (static file scan — legitimate QT).
82
+
83
+ ---
84
+
85
+ ## HARD CONSTRAINTS
86
+
87
+ - **DO NOT** delete any test. Reclassify by moving to the correct directory.
88
+ - **DO NOT** add `os.environ`/`os.getenv` reads in library source code.
89
+ - **DO NOT** weaken any existing test assertion.
90
+ - **DO NOT** convert real-backend IT tests to use `local_mode=True`.
91
+ - **DO NOT** claim completion without running the full verification chain AND reporting skip counts.
92
+ - **UK English only.**
93
+
94
+ ---
95
+
96
+ ## PHASE 1 — Fix env-IT to include VAULT_TOKEN (T-1)
97
+
98
+ ### Step 1.1 — Decide: env-IT should NOT contain VAULT_TOKEN directly
99
+
100
+ Per RULES.md § 5.5: *"NEVER save keys, passwords, tokens, or credentials into the repository."*
101
+
102
+ `VAULT_TOKEN` is a credential. It MUST NOT be in `env-IT`.
103
+
104
+ **Solution:** The test runner MUST source `env-vault` before running IT tests. The `conftest.py` fixture must enforce this by calling `pytest.fail()` (not `pytest.skip()`) when `VAULT_TOKEN` is missing for IT-tier tests.
105
+
106
+ ### Step 1.2 — Update `conftest.py`: fail instead of skip for IT/AT tests
107
+
108
+ Change `conftest.py` `vault_config()` fixture:
109
+
110
+ ```python
111
+ # BEFORE:
112
+ if missing:
113
+ pytest.skip(f"Vault variables missing: {', '.join(missing)}")
114
+
115
+ # AFTER:
116
+ if missing:
117
+ tier = os.environ.get("TEST_ENV_TIER", "")
118
+ if tier in ("IT", "AT"):
119
+ pytest.fail(
120
+ f"VAULT_TOKEN and Vault variables are REQUIRED for {tier} tests. "
121
+ f"Missing: {', '.join(missing)}. "
122
+ f"Run: set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a"
123
+ )
124
+ pytest.skip(f"Vault variables missing (UT/ST may skip): {', '.join(missing)}")
125
+ ```
126
+
127
+ ### Step 1.3 — Add `TEST_ENV_TIER` to env files
128
+
129
+ Add to each env file:
130
+
131
+ | File | Add |
132
+ |------|-----|
133
+ | `env-UT` | `TEST_ENV_TIER=UT` |
134
+ | `env-ST` | `TEST_ENV_TIER=ST` |
135
+ | `env-IT` | `TEST_ENV_TIER=IT` |
136
+ | `env-AT` | `TEST_ENV_TIER=AT` |
137
+
138
+ ### Step 1.4 — Remove decoration Vault vars from env-UT and env-ST
139
+
140
+ UT and ST tests MUST NOT require Vault (RULES.md § 5.5). Remove `VAULT_ADDR`, `VAULT_MOUNT_POINT`, `VAULT_CONFIG_PATH` from `env-UT` and `env-ST`.
141
+
142
+ ---
143
+
144
+ ## PHASE 2 — Add write-path precondition checks (T-3)
145
+
146
+ ### Step 2.1 — Create a reusable backend probe fixture
147
+
148
+ Add to `conftest.py`:
149
+
150
+ ```python
151
+ @pytest.fixture(scope="session")
152
+ def chroma_ready(vdbs: dict) -> dict:
153
+ """Verify Chroma can actually handle writes, not just heartbeat."""
154
+ cfg = vdbs.get("chroma", {})
155
+ if not cfg:
156
+ pytest.fail("dev.vdbs.chroma missing from Vault config")
157
+ from cloud_dog_vdb.adapters.chroma import ChromaAdapter
158
+ from cloud_dog_vdb.config.models import ProviderConfig
159
+ from cloud_dog_vdb.domain.models import CollectionSpec
160
+ import asyncio
161
+
162
+ a = ChromaAdapter(
163
+ ProviderConfig(provider_id="chroma", base_url=cfg.get("base_url", ""), api_key=cfg.get("auth_token", "")),
164
+ local_mode=False,
165
+ )
166
+ probe_name = "_cloud_dog_write_probe"
167
+ loop = asyncio.get_event_loop()
168
+ try:
169
+ loop.run_until_complete(a.delete_collection(probe_name))
170
+ loop.run_until_complete(a.create_collection(CollectionSpec(name=probe_name)))
171
+ loop.run_until_complete(a.delete_collection(probe_name))
172
+ except Exception as exc:
173
+ pytest.fail(f"Chroma write-path probe FAILED: {exc}")
174
+ return cfg
175
+ ```
176
+
177
+ Create equivalent `qdrant_ready`, `weaviate_ready`, `opensearch_ready`, `pgvector_ready` fixtures.
178
+
179
+ ### Step 2.2 — Update IT tests to use `*_ready` fixtures instead of raw `vdbs`
180
+
181
+ Replace `vdbs` parameter with the appropriate `*_ready` fixture in each IT test.
182
+
183
+ ---
184
+
185
+ ## PHASE 3 — Reclassify misclassified tests (T-2)
186
+
187
+ ### Step 3.1 — Move misclassified tests to correct directories
188
+
189
+ | Current Location | Move To | Reason |
190
+ |-----------------|---------|--------|
191
+ | `integration/IT1.11_CrossBackendPortable/` | `system/ST1.9_CrossBackendPortableLocal/` | Uses `local_mode=True` |
192
+ | `integration/IT1.13_LifecycleRealBackend/` | `unit/UT1.38_LifecycleFunctions/` | Pure function calls |
193
+ | `application/AT1.1_ServiceStartupPattern/` | `system/ST1.10_ServiceStartupLocal/` | Uses `local_mode=True` |
194
+ | `application/AT1.2_FullIngestionFlow/` | `system/ST1.11_FullIngestionLocal/` | Uses `local_mode=True` |
195
+ | `application/AT1.3_SearchWithFilters/` | `system/ST1.12_SearchWithFiltersLocal/` | Uses `local_mode=True` |
196
+ | `application/AT1.4_ConformanceSuite/` | `unit/UT1.39_ConformanceMock/` | Uses `mock_adapter()` |
197
+ | `application/AT1.5_ClientOnlyIntegration/` | `unit/UT1.40_RemoteClientMock/` | Uses `MockTransport` |
198
+ | `security/QT1.1_TenantIsolation/` | `unit/UT1.41_TenantIsolationLogic/` | Pure function |
199
+ | `security/QT1.2_AccessControlEnforcement/` | `unit/UT1.42_AccessControlLogic/` | Pure function |
200
+ | `security/QT1.4_PurgeRequiresAdmin/` | `unit/UT1.43_PurgeAdminLogic/` | Pure function |
201
+
202
+ ### Step 3.2 — Write REAL IT/AT/QT replacements
203
+
204
+ For each reclassified test, write a NEW test at the original location that actually uses real backends:
205
+
206
+ | New Test | What It Must Do |
207
+ |----------|----------------|
208
+ | IT1.11 CrossBackendPortable | Run the same portable contract against BOTH Chroma AND Qdrant real backends |
209
+ | IT1.13 LifecycleRealBackend | Mark records as deleted/superseded IN a real Chroma collection, verify via query |
210
+ | AT1.1 ServiceStartupPattern | Start a real VDB client against Chroma, verify `init_backend()` succeeds against real server |
211
+ | AT1.2 FullIngestionFlow | Run full ingestion against real Chroma, not `local_mode` |
212
+ | AT1.3 SearchWithFilters | Search with metadata filters against real Chroma |
213
+ | AT1.4 ConformanceSuite | Run `adapter_conforms()` against a REAL adapter, not mock |
214
+ | AT1.5 ClientOnlyIntegration | If `RemoteVDBClient` is meant for real remote use, test against a real endpoint. If no real endpoint exists, document this as a gap and leave it as UT until a remote VDB service is deployed. |
215
+ | QT1.1 TenantIsolation | Ingest records with different `tenant_id` into real Chroma, verify isolation via search |
216
+ | QT1.2 AccessControlEnforcement | Test access control against a real adapter operation |
217
+ | QT1.4 PurgeRequiresAdmin | Attempt purge on a real collection, verify admin enforcement |
218
+
219
+ ---
220
+
221
+ ## PHASE 4 — Clean up env files (T-5)
222
+
223
+ ### Step 4.1 — env-UT contents
224
+
225
+ ```
226
+ TEST_ENV_TIER=UT
227
+ ```
228
+
229
+ UT tests MUST NOT need anything else. If they do, the test has a dependency problem.
230
+
231
+ ### Step 4.2 — env-ST contents
232
+
233
+ ```
234
+ TEST_ENV_TIER=ST
235
+ ```
236
+
237
+ ST tests use `local_mode=True` and in-memory backends. No external config needed.
238
+
239
+ ### Step 4.3 — env-IT contents
240
+
241
+ ```
242
+ TEST_ENV_TIER=IT
243
+ VAULT_ADDR=https://vault0.cloud-dog.net
244
+ VAULT_MOUNT_POINT=cloud_dog_ai
245
+ VAULT_CONFIG_PATH=config
246
+ ```
247
+
248
+ `VAULT_TOKEN` comes from `env-vault` sourced before test run. The conftest will `pytest.fail()` if it is missing.
249
+
250
+ ### Step 4.4 — env-AT contents
251
+
252
+ ```
253
+ TEST_ENV_TIER=AT
254
+ VAULT_ADDR=https://vault0.cloud-dog.net
255
+ VAULT_MOUNT_POINT=cloud_dog_ai
256
+ VAULT_CONFIG_PATH=config
257
+ ```
258
+
259
+ AT tests require real backends. Same Vault dependency as IT.
260
+
261
+ ---
262
+
263
+ ## PHASE 5 — Update TESTS.md
264
+
265
+ ### Step 5.1 — Update test directory structure to reflect reclassifications
266
+
267
+ ### Step 5.2 — Update test counts
268
+
269
+ After reclassification, counts should be approximately:
270
+ - **UT:** 37 (original) + 6 (reclassified from IT/AT/QT) = **43**
271
+ - **ST:** 8 (original) + 4 (reclassified from IT/AT) = **12**
272
+ - **IT:** 13 (original) - 2 (reclassified) + 2 (new real replacements) = **13**
273
+ - **AT:** 5 (original) - 5 (reclassified) + 5 (new real replacements) = **5**
274
+ - **QT:** 4 (original) - 3 (reclassified) + 3 (new real replacements) = **4**
275
+
276
+ ### Step 5.3 — Update Test Run History
277
+
278
+ Record actual results with skip counts. Example:
279
+
280
+ ```
281
+ | Date | Scope | Command | Passed | Failed | Skipped | Notes |
282
+ ```
283
+
284
+ **NEVER write "PASS" without the skip count.**
285
+
286
+ ---
287
+
288
+ ## PHASE 6 — Verification
289
+
290
+ ### Step 6.1 — Run UT + ST (no Vault required)
291
+
292
+ ```bash
293
+ cd /opt/iac/Development/cloud-dog-ai/cloud-dog-ai-platform-standards/packages/backend/platform-vdb
294
+ .venv/bin/pytest tests/unit tests/system --env tests/env-UT --env tests/env-ST -v
295
+ ```
296
+
297
+ **Expected:** All pass, 0 skipped.
298
+
299
+ ### Step 6.2 — Run IT + AT (Vault required)
300
+
301
+ ```bash
302
+ set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a
303
+ .venv/bin/pytest tests/integration tests/application --env tests/env-IT --env tests/env-AT -v
304
+ ```
305
+
306
+ **Expected:** All pass against available backends. Any backend that is down should produce `pytest.fail()` with a clear error message, NOT a silent skip.
307
+
308
+ ### Step 6.3 — Run QT (Vault required for real-backend QT tests)
309
+
310
+ ```bash
311
+ .venv/bin/pytest tests/security --env tests/env-IT -v
312
+ ```
313
+
314
+ ### Step 6.4 — Report honestly
315
+
316
+ State exact counts: `N passed, N failed, N skipped`. If any IT/AT test skipped, explain WHY and which backend is unavailable.
317
+
318
+ ---
319
+
320
+ ## COMPLETION GATE
321
+
322
+ This instruction is complete ONLY when:
323
+
324
+ 1. `env-IT` no longer causes silent skips — missing `VAULT_TOKEN` produces `pytest.fail()`
325
+ 2. All 10 misclassified tests are moved to correct directories
326
+ 3. Real-backend replacements exist for all reclassified IT/AT/QT tests
327
+ 4. Write-path probe fixtures exist for all 5 backends
328
+ 5. TESTS.md updated with correct classifications and honest run history
329
+ 6. Full test suite runs with `0 skipped` for UT/ST tier
330
+ 7. IT/AT tier runs against real backends with honest reporting
331
+
332
+ **DO NOT claim completion without evidence for ALL 7 gates.**
@@ -0,0 +1,188 @@
1
+ # Agent Instruction — Fix cloud_dog_vdb (v0.2.0)
2
+
3
+ **Package:** `cloud_dog_vdb`
4
+ **Target version:** 0.2.0
5
+ **Date:** 2026-02-18 (re-review with source verification)
6
+ **Scope:** Config-delegation enforcement + adapter rewiring + test alignment + v0.2.0 features — **ALL DELIVERED AND VERIFIED**
7
+
8
+ ---
9
+
10
+ ## Status: ✅ COMPLETE (minor SA1 extra-files gap noted)
11
+
12
+ All 6 issues from the original instruction have been resolved. This document is retained for reference and future maintenance.
13
+
14
+ **Verified on 2026-02-18 (re-review):**
15
+ - 91 source files across 20+ subpackages
16
+ - 67 test directories present (37 UT + 8 ST + 13 IT + 5 AT + 4 QT)
17
+ - Zero config-delegation violations: `os.environ`/`hvac`/`overlay_secrets`/`VAULT_JSON` grep returns zero hits
18
+ - `secrets/` directory does NOT exist (deleted)
19
+ - All 5 adapters use `self.config.*` directly (no `self._runtime`, no overlay)
20
+ - Duplicate converter files removed (only `*_conv.py` versions remain)
21
+ - Test directories renamed to match TESTS.md v0.2.0 (ConfigDelegation, ConfigDelegationVerification, ConfigDelegationE2E)
22
+ - Old scaffold IT directories removed
23
+ - All 3 v0.2.0 feature files present and substantive
24
+ - Build produces `cloud_dog_vdb-0.2.0` wheel + sdist
25
+
26
+ **Governing documents:**
27
+ 1. `platform-vdb/REQUIREMENTS.md` (v0.2.0) — FR1.31, FR1.33, FR1.34–FR1.36
28
+ 2. `platform-vdb/ARCHITECTURE.md` (v0.2.0) — SA1 module layout
29
+ 3. `platform-vdb/TESTS.md` (v0.2.0) — all test directories
30
+ 4. `packages/backend/AGENT-INSTRUCTION.md` — Integrity Warranty and Config Delegation — ZERO TOLERANCE (MANDATORY)
31
+
32
+ ---
33
+
34
+ ## Delivery Summary
35
+
36
+ ### Issue 1 — Config Delegation Enforcement ✅ RESOLVED
37
+
38
+ | Sub-issue | Status | Evidence |
39
+ |-----------|--------|----------|
40
+ | 1A. Delete `secrets/` module | ✅ | Directory does not exist |
41
+ | 1B. Rewire all 5 adapters | ✅ | Zero `overlay_secrets`, `self._runtime`, or `from cloud_dog_vdb.secrets` hits in source |
42
+ | 1C. Fix `observability/otel.py` | ✅ | Zero `os.environ` hits in any source file |
43
+ | 1D. Rename 3 test directories | ✅ | `UT1.31_ConfigDelegation`, `UT1.34_ConfigDelegationVerification`, `ST1.8_ConfigDelegationE2E` all present |
44
+
45
+ Config delegation verification command returns clean:
46
+ ```bash
47
+ grep -rn "os.environ\|import hvac\|overlay_secrets\|from cloud_dog_vdb.secrets" cloud_dog_vdb/ --include="*.py" | grep -v __pycache__
48
+ # → zero results
49
+ ```
50
+
51
+ ---
52
+
53
+ ### Issue 2 — SA1 Module Alignment ✅ MOSTLY RESOLVED
54
+
55
+ **Resolved items:**
56
+ - `secrets/` directory deleted ✅
57
+ - `registry/` duplicate directory removed ✅
58
+ - Duplicate converter files (`deepdoc.py`, `mineru.py`, `pandas.py`) removed — only `*_conv.py` versions remain ✅
59
+
60
+ **Remaining extra files (non-blocking — additive, not violations):**
61
+
62
+ | File/Directory | Purpose | Recommendation |
63
+ |----------------|---------|----------------|
64
+ | `factory.py` (top-level) | `get_vdb_client()` factory imported by `__init__.py` | Add to SA1 or fold into `runtime/factory.py` |
65
+ | `embeddings/` (3 files) | Standalone embedding provider helpers | Add to SA1 (supplements `ingestion/embed.py`) |
66
+ | `adapters/vector_utils.py` | Deterministic vector generation for testing | Move to `testing/` or add to SA1 |
67
+ | `runtime/` (3 files) | `VDBClient` and factory | Add to SA1 |
68
+
69
+ **Recommendation:** Update ARCHITECTURE.md SA1 to include these files. This is documentation-only — the package is functionally complete.
70
+
71
+ ---
72
+
73
+ ### Issue 3 — Compatibility Normaliser (FR1.34) ✅ DELIVERED
74
+
75
+ - `cloud_dog_vdb/compat/response_normaliser.py` — **178 lines**
76
+ - Per-backend mappings for Chroma, Qdrant, Weaviate, OpenSearch, PGVector
77
+ - Normalises backend responses to unified `SearchResult` / `Record` models
78
+ - Test directory `UT1.35_ResponseNormaliser` present
79
+
80
+ ---
81
+
82
+ ### Issue 4 — Client-Only Integration Mode (FR1.35) ✅ DELIVERED
83
+
84
+ - `cloud_dog_vdb/remote/client.py` — **86 lines**
85
+ - `RemoteVDBClient` proxy delegating all ops to remote VDB via HTTP
86
+ - No local backend dependency required
87
+ - Test directories `UT1.36_RemoteProxy` and `AT1.5_ClientOnlyIntegration` present
88
+
89
+ ---
90
+
91
+ ### Issue 5 — Collection Schema Versioning (FR1.36) ✅ DELIVERED
92
+
93
+ - `cloud_dog_vdb/versioning/schema_version.py` — **129 lines**
94
+ - Tracks dimension count, metadata fields, embedding model, version number per collection
95
+ - Version mismatch detection at query time
96
+ - Migration utility for detecting dimension changes
97
+ - Test directory `UT1.37_SchemaVersioning` present
98
+
99
+ ---
100
+
101
+ ### Issue 6 — Test Directory Alignment ✅ RESOLVED
102
+
103
+ - Old scaffold IT directories (ChromaRemoteCollection, ChromaLocalCollection, QdrantCollection, WeaviateCollection, OpenSearchIndex, PGVectorTable, EmbeddingProviders) removed
104
+ - All IT directories match TESTS.md v0.2.0 names (IT1.1_ChromaCRUD through IT1.13_IngestionPipelineEndToEnd)
105
+ - Config delegation test directories renamed per v0.2.0 spec
106
+
107
+ ---
108
+
109
+ ## Verification — Full Suite
110
+
111
+ ```bash
112
+ set -a; source /opt/iac/Development/cloud-dog-ai/env-vault-admin; set +a
113
+
114
+ # 1. Config delegation check — MUST return zero hits
115
+ grep -rn "os.environ\|import hvac\|overlay_secrets\|from cloud_dog_vdb.secrets" cloud_dog_vdb/ --include="*.py" | grep -v __pycache__
116
+
117
+ # 2. secrets/ directory MUST NOT exist
118
+ test ! -d cloud_dog_vdb/secrets && echo "PASS" || echo "FAIL"
119
+
120
+ # 3. All tests pass
121
+ .venv/bin/pytest tests --env tests/env-UT --env tests/env-ST --env tests/env-IT --env tests/env-AT -q
122
+
123
+ # 4. Lint clean
124
+ .venv/bin/ruff check cloud_dog_vdb tests
125
+ .venv/bin/ruff format --check cloud_dog_vdb tests
126
+
127
+ # 5. Build
128
+ .venv/bin/python -m build --no-isolation
129
+ ```
130
+
131
+ ## pyproject.toml version
132
+
133
+ ```toml
134
+ version = "0.2.0"
135
+ ```
136
+
137
+ ---
138
+
139
+ ## MANDATORY COMPLETION REPORT
140
+
141
+ When finished, write your report to:
142
+ **`/opt/iac/Development/cloud-dog-ai/cloud-dog-ai-platform-standards/packages/backend/platform-vdb/working/W28A-116-FIX-VDB-REPORT.md`**
143
+
144
+ Your report MUST include ALL of the following:
145
+
146
+ ### 1. Run summary
147
+ - List every file changed and what was changed
148
+ - List every test fixed and how
149
+
150
+ ### 2. Test results (REAL counts from actual runs)
151
+ ```
152
+ QT: Xp / Yf
153
+ UT: Xp / Yf
154
+ ST: Xp / Yf
155
+ IT: Xp / Yf
156
+ AT: Xp / Yf
157
+ Ruff: X issues
158
+ ```
159
+
160
+ ### 3. Verdict
161
+ State one of: **PASS** (100% green) / **PARTIAL** (some fixed, some remain) / **FAIL** (no improvement) / **BLOCKED** (cannot proceed)
162
+
163
+ If not PASS, list every remaining failure with classification: `CODE_BUG`, `ENV_CONFIG`, `INFRA_MISSING`, `EXT_SERVICE`
164
+
165
+ ### 4. Evidence logs
166
+ All logs MUST be saved to `working/` directory:
167
+ ```
168
+ working/w28a-116-qt.log
169
+ working/w28a-116-ut.log
170
+ working/w28a-116-st.log
171
+ working/w28a-116-it.log
172
+ working/w28a-116-at.log
173
+ working/w28a-116-ruff.log
174
+ ```
175
+
176
+ ### 5. RULES.md COMPLIANCE WARRANTY
177
+
178
+ Copy this EXACTLY into your report:
179
+ ```
180
+ I warrant that:
181
+ 1. I have read RULES.md IN FULL before starting work
182
+ 2. ALL code I produced is 100% compliant with RULES.md
183
+ 3. ALL test results reported are REAL — exact counts from actual runs
184
+ 4. I have NOT weakened any test
185
+ 5. I have NOT stored, copied, or exposed any credentials
186
+ 6. ALL credentials come from Vault or git-ignored env files
187
+ 7. I have NOT modified files outside this package
188
+ ```
@@ -0,0 +1,94 @@
1
+ # platform-vdb Agent Guidance
2
+
3
+ This runbook is for agents implementing or validating `cloud_dog_vdb` changes.
4
+
5
+ ## Non-Negotiable Rules
6
+
7
+ - Use real dependencies for `ST/IT/AT/PT/QT` tiers.
8
+ - Do not add stubs, silent fallbacks, or fake success paths to satisfy tests.
9
+ - Preserve backward compatibility:
10
+ - public API parity,
11
+ - default behavior parity,
12
+ - metadata identity parity,
13
+ - error/result envelope parity.
14
+ - Load credentials from approved env/Vault sources only.
15
+
16
+ ## Required Inputs
17
+
18
+ - Repository root: `/opt/iac/Development/cloud-dog-ai/cloud-dog-ai-platform-standards`
19
+ - Package root: `packages/backend/platform-vdb`
20
+ - Vault env file (not committed): `/opt/iac/Development/cloud-dog-ai/env-vault`
21
+ - Corpus manifest: `test-data/corpus-manifest.yaml`
22
+
23
+ ## Service Configuration Expectations
24
+
25
+ - `dev.services.mineru` must resolve to `MINERU_BASE_URL` for parser tests.
26
+ - `dev.services.marker_mcp` (or `dev.services.markermcp`) is supported but currently held disabled when instructed.
27
+ - `dev.vdbs.infinity` must exist for Infinity adapter integration tests.
28
+ - Local parser command adapters are supported (no Vault required) for:
29
+ - `deepdoc` via `tests/tools/local_deepdoc_parser.py`
30
+ - `docling` via `tests/tools/local_docling_parser.py`
31
+ - `transformers` via `tests/tools/local_transformers_parser.py`
32
+
33
+ ## Local Parser Setup
34
+
35
+ - Install local parser dependencies in package venv:
36
+ - `.venv/bin/pip install transformers==4.57.1 docling-parse==5.4.0`
37
+ - Ensure env files used for IT/AT/PT/PT-PERF enable parser commands:
38
+ - `DEEPDOC_ENABLED=true`, `DOCLING_ENABLED=true`, `TRANSFORMERS_ENABLED=true`
39
+ - command values point to `.venv/bin/python tests/tools/local_*_parser.py`
40
+ - For PT/PT-PERF MinerU stability on shared GPU hosts, keep low-VRAM flags explicit:
41
+ - `MINERU_FORMULA_ENABLE=false`
42
+ - `MINERU_TABLE_ENABLE=false`
43
+ - `MINERU_RETURN_MIDDLE_JSON=false`
44
+ - `MINERU_RETURN_IMAGES=false`
45
+
46
+ ## Standard Execution Order
47
+
48
+ Run from `packages/backend/platform-vdb`:
49
+
50
+ 0. `.venv/bin/pip install transformers==4.57.1 docling-parse==5.4.0`
51
+ 1. `set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a; .venv/bin/pytest tests/integration/IT2.11_ParserProviderCoverageMatrix --env tests/env-IT --env tests/env-REQUIRE-ALL-PARSERS -q`
52
+ 2. `set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a; .venv/bin/pytest tests/application/AT2.2_ParserProviderCoverageMatrix --env tests/env-AT --env tests/env-REQUIRE-ALL-PARSERS -q`
53
+ 3. `.venv/bin/pytest tests/parser/PT1.2_DeepdocAdapter tests/parser/PT1.3_DoclingAdapter tests/parser/PT1.8_TransformersAdapter --env tests/env-PT --env tests/env-CORPUS-SMALL -q`
54
+ 4. `.venv/bin/ruff check`
55
+ 5. `.venv/bin/ruff format --check`
56
+ 6. `.venv/bin/pytest tests/unit tests/system --env tests/env-UT --env tests/env-ST -q`
57
+ 7. `.venv/bin/pytest tests/compatibility --env tests/env-CT -q`
58
+ 8. `set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a; .venv/bin/pytest tests/integration --env tests/env-IT -q`
59
+ 9. `set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a; timeout 1800 .venv/bin/pytest tests/parser --env tests/env-PT --env tests/env-CORPUS-LARGE -q`
60
+ 10. `set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a; timeout 1800 .venv/bin/pytest tests/parser_performance --env tests/env-PT-PERF --env tests/env-CORPUS-LARGE -q`
61
+ 11. `set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a; .venv/bin/pytest tests/application --env tests/env-AT -q`
62
+ 12. `set -a; source /opt/iac/Development/cloud-dog-ai/env-vault; set +a; .venv/bin/pytest tests/security --env tests/env-IT -q`
63
+ 13. `.venv/bin/python -m build`
64
+ 14. `.venv/bin/pip install --force-reinstall dist/cloud_dog_vdb-0.4.1-py3-none-any.whl`
65
+ 15. Smoke import: `.venv/bin/python -c "import cloud_dog_vdb; print(cloud_dog_vdb.__version__)"`
66
+
67
+ For full evidence capture, mirror outputs to `/tmp/w13a_*.log` using `tee`.
68
+
69
+ ## Staged Parser Validation (Recommended)
70
+
71
+ Use corpus slicing for deterministic progression:
72
+
73
+ - Small: `--env tests/env-CORPUS-SMALL`
74
+ - Medium: `--env tests/env-CORPUS-MEDIUM`
75
+ - Large: `--env tests/env-CORPUS-LARGE`
76
+
77
+ Recommended progression:
78
+
79
+ 1. `tests/parser` + small
80
+ 2. `tests/parser_performance` + small
81
+ 3. `tests/parser` + medium
82
+ 4. `tests/parser_performance` + medium
83
+ 5. Large slice under explicit timeout guard
84
+
85
+ ## Documentation Stage (Release Gate)
86
+
87
+ After test/build execution, update:
88
+
89
+ - `TESTS.md` run history (commands, date, pass/fail/skip, blockers).
90
+ - `README.md` document links and runtime prerequisites.
91
+ - `PROGRAMME-0.4.0-DEVELOPMENT-BUILD-TEST.md` if sequencing/gates changed.
92
+ - `RELEASE_UPLIFT_PROPOSAL.md` if scope or constraints changed.
93
+
94
+ Do not claim 100% completion if any parser-performance or provider enablement gate is still open.