ocr-provenance-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocr-provenance-mcp might be problematic. Click here for more details.
- package/.env.example +55 -0
- package/LICENSE +78 -0
- package/README.md +1154 -0
- package/dist/bin-http.d.ts +24 -0
- package/dist/bin-http.d.ts.map +1 -0
- package/dist/bin-http.js +275 -0
- package/dist/bin-http.js.map +1 -0
- package/dist/bin-setup.d.ts +11 -0
- package/dist/bin-setup.d.ts.map +1 -0
- package/dist/bin-setup.js +610 -0
- package/dist/bin-setup.js.map +1 -0
- package/dist/bin.d.ts +16 -0
- package/dist/bin.d.ts.map +1 -0
- package/dist/bin.js +16 -0
- package/dist/bin.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +90 -0
- package/dist/index.js.map +1 -0
- package/dist/models/chunk.d.ts +136 -0
- package/dist/models/chunk.d.ts.map +1 -0
- package/dist/models/chunk.js +27 -0
- package/dist/models/chunk.js.map +1 -0
- package/dist/models/cluster.d.ts +79 -0
- package/dist/models/cluster.d.ts.map +1 -0
- package/dist/models/cluster.js +10 -0
- package/dist/models/cluster.js.map +1 -0
- package/dist/models/comparison.d.ts +62 -0
- package/dist/models/comparison.d.ts.map +1 -0
- package/dist/models/comparison.js +8 -0
- package/dist/models/comparison.js.map +1 -0
- package/dist/models/document.d.ts +104 -0
- package/dist/models/document.d.ts.map +1 -0
- package/dist/models/document.js +15 -0
- package/dist/models/document.js.map +1 -0
- package/dist/models/embedding.d.ts +87 -0
- package/dist/models/embedding.d.ts.map +1 -0
- package/dist/models/embedding.js +23 -0
- package/dist/models/embedding.js.map +1 -0
- package/dist/models/extraction.d.ts +15 -0
- package/dist/models/extraction.d.ts.map +1 -0
- package/dist/models/extraction.js +2 -0
- package/dist/models/extraction.js.map +1 -0
- package/dist/models/form-fill.d.ts +23 -0
- package/dist/models/form-fill.d.ts.map +1 -0
- package/dist/models/form-fill.js +2 -0
- package/dist/models/form-fill.js.map +1 -0
- package/dist/models/image.d.ts +177 -0
- package/dist/models/image.d.ts.map +1 -0
- package/dist/models/image.js +8 -0
- package/dist/models/image.js.map +1 -0
- package/dist/models/index.d.ts +14 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +22 -0
- package/dist/models/index.js.map +1 -0
- package/dist/models/provenance.d.ts +174 -0
- package/dist/models/provenance.d.ts.map +1 -0
- package/dist/models/provenance.js +53 -0
- package/dist/models/provenance.js.map +1 -0
- package/dist/models/uploaded-file.d.ts +20 -0
- package/dist/models/uploaded-file.d.ts.map +1 -0
- package/dist/models/uploaded-file.js +2 -0
- package/dist/models/uploaded-file.js.map +1 -0
- package/dist/server/errors.d.ts +93 -0
- package/dist/server/errors.d.ts.map +1 -0
- package/dist/server/errors.js +256 -0
- package/dist/server/errors.js.map +1 -0
- package/dist/server/events.d.ts +36 -0
- package/dist/server/events.d.ts.map +1 -0
- package/dist/server/events.js +48 -0
- package/dist/server/events.js.map +1 -0
- package/dist/server/permissions.d.ts +26 -0
- package/dist/server/permissions.d.ts.map +1 -0
- package/dist/server/permissions.js +194 -0
- package/dist/server/permissions.js.map +1 -0
- package/dist/server/register-tools.d.ts +25 -0
- package/dist/server/register-tools.d.ts.map +1 -0
- package/dist/server/register-tools.js +102 -0
- package/dist/server/register-tools.js.map +1 -0
- package/dist/server/startup.d.ts +16 -0
- package/dist/server/startup.d.ts.map +1 -0
- package/dist/server/startup.js +37 -0
- package/dist/server/startup.js.map +1 -0
- package/dist/server/state.d.ts +166 -0
- package/dist/server/state.d.ts.map +1 -0
- package/dist/server/state.js +424 -0
- package/dist/server/state.js.map +1 -0
- package/dist/server/transports/http-transport.d.ts +37 -0
- package/dist/server/transports/http-transport.d.ts.map +1 -0
- package/dist/server/transports/http-transport.js +204 -0
- package/dist/server/transports/http-transport.js.map +1 -0
- package/dist/server/transports/index.d.ts +9 -0
- package/dist/server/transports/index.d.ts.map +1 -0
- package/dist/server/transports/index.js +9 -0
- package/dist/server/transports/index.js.map +1 -0
- package/dist/server/transports/session-manager.d.ts +40 -0
- package/dist/server/transports/session-manager.d.ts.map +1 -0
- package/dist/server/transports/session-manager.js +74 -0
- package/dist/server/transports/session-manager.js.map +1 -0
- package/dist/server/types.d.ts +82 -0
- package/dist/server/types.d.ts.map +1 -0
- package/dist/server/types.js +14 -0
- package/dist/server/types.js.map +1 -0
- package/dist/services/audit.d.ts +26 -0
- package/dist/services/audit.d.ts.map +1 -0
- package/dist/services/audit.js +43 -0
- package/dist/services/audit.js.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.js +46 -0
- package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
- package/dist/services/chunking/chunk-merger.d.ts +26 -0
- package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
- package/dist/services/chunking/chunk-merger.js +94 -0
- package/dist/services/chunking/chunk-merger.js.map +1 -0
- package/dist/services/chunking/chunker.d.ts +62 -0
- package/dist/services/chunking/chunker.d.ts.map +1 -0
- package/dist/services/chunking/chunker.js +566 -0
- package/dist/services/chunking/chunker.js.map +1 -0
- package/dist/services/chunking/heading-normalizer.d.ts +33 -0
- package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/heading-normalizer.js +101 -0
- package/dist/services/chunking/heading-normalizer.js.map +1 -0
- package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
- package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
- package/dist/services/chunking/json-block-analyzer.js +1033 -0
- package/dist/services/chunking/json-block-analyzer.js.map +1 -0
- package/dist/services/chunking/markdown-parser.d.ts +75 -0
- package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
- package/dist/services/chunking/markdown-parser.js +428 -0
- package/dist/services/chunking/markdown-parser.js.map +1 -0
- package/dist/services/chunking/text-normalizer.d.ts +20 -0
- package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/text-normalizer.js +36 -0
- package/dist/services/chunking/text-normalizer.js.map +1 -0
- package/dist/services/clm/contract-schemas.d.ts +36 -0
- package/dist/services/clm/contract-schemas.d.ts.map +1 -0
- package/dist/services/clm/contract-schemas.js +92 -0
- package/dist/services/clm/contract-schemas.js.map +1 -0
- package/dist/services/clm/summarization.d.ts +46 -0
- package/dist/services/clm/summarization.d.ts.map +1 -0
- package/dist/services/clm/summarization.js +61 -0
- package/dist/services/clm/summarization.js.map +1 -0
- package/dist/services/clustering/clustering-service.d.ts +58 -0
- package/dist/services/clustering/clustering-service.d.ts.map +1 -0
- package/dist/services/clustering/clustering-service.js +467 -0
- package/dist/services/clustering/clustering-service.js.map +1 -0
- package/dist/services/comparison/diff-service.d.ts +41 -0
- package/dist/services/comparison/diff-service.d.ts.map +1 -0
- package/dist/services/comparison/diff-service.js +120 -0
- package/dist/services/comparison/diff-service.js.map +1 -0
- package/dist/services/embedding/embedder.d.ts +55 -0
- package/dist/services/embedding/embedder.d.ts.map +1 -0
- package/dist/services/embedding/embedder.js +202 -0
- package/dist/services/embedding/embedder.js.map +1 -0
- package/dist/services/embedding/nomic.d.ts +67 -0
- package/dist/services/embedding/nomic.d.ts.map +1 -0
- package/dist/services/embedding/nomic.js +280 -0
- package/dist/services/embedding/nomic.js.map +1 -0
- package/dist/services/gemini/circuit-breaker.d.ts +106 -0
- package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
- package/dist/services/gemini/circuit-breaker.js +237 -0
- package/dist/services/gemini/circuit-breaker.js.map +1 -0
- package/dist/services/gemini/client.d.ts +173 -0
- package/dist/services/gemini/client.d.ts.map +1 -0
- package/dist/services/gemini/client.js +483 -0
- package/dist/services/gemini/client.js.map +1 -0
- package/dist/services/gemini/config.d.ts +116 -0
- package/dist/services/gemini/config.d.ts.map +1 -0
- package/dist/services/gemini/config.js +118 -0
- package/dist/services/gemini/config.js.map +1 -0
- package/dist/services/gemini/index.d.ts +9 -0
- package/dist/services/gemini/index.d.ts.map +1 -0
- package/dist/services/gemini/index.js +13 -0
- package/dist/services/gemini/index.js.map +1 -0
- package/dist/services/gemini/rate-limiter.d.ts +62 -0
- package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
- package/dist/services/gemini/rate-limiter.js +120 -0
- package/dist/services/gemini/rate-limiter.js.map +1 -0
- package/dist/services/images/extractor.d.ts +88 -0
- package/dist/services/images/extractor.d.ts.map +1 -0
- package/dist/services/images/extractor.js +340 -0
- package/dist/services/images/extractor.js.map +1 -0
- package/dist/services/images/optimizer.d.ts +130 -0
- package/dist/services/images/optimizer.d.ts.map +1 -0
- package/dist/services/images/optimizer.js +228 -0
- package/dist/services/images/optimizer.js.map +1 -0
- package/dist/services/ocr/datalab.d.ts +64 -0
- package/dist/services/ocr/datalab.d.ts.map +1 -0
- package/dist/services/ocr/datalab.js +425 -0
- package/dist/services/ocr/datalab.js.map +1 -0
- package/dist/services/ocr/errors.d.ts +38 -0
- package/dist/services/ocr/errors.d.ts.map +1 -0
- package/dist/services/ocr/errors.js +83 -0
- package/dist/services/ocr/errors.js.map +1 -0
- package/dist/services/ocr/file-manager.d.ts +76 -0
- package/dist/services/ocr/file-manager.d.ts.map +1 -0
- package/dist/services/ocr/file-manager.js +238 -0
- package/dist/services/ocr/file-manager.js.map +1 -0
- package/dist/services/ocr/form-fill.d.ts +48 -0
- package/dist/services/ocr/form-fill.d.ts.map +1 -0
- package/dist/services/ocr/form-fill.js +213 -0
- package/dist/services/ocr/form-fill.js.map +1 -0
- package/dist/services/ocr/processor.d.ts +95 -0
- package/dist/services/ocr/processor.d.ts.map +1 -0
- package/dist/services/ocr/processor.js +259 -0
- package/dist/services/ocr/processor.js.map +1 -0
- package/dist/services/provenance/agent-metadata.d.ts +82 -0
- package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
- package/dist/services/provenance/agent-metadata.js +106 -0
- package/dist/services/provenance/agent-metadata.js.map +1 -0
- package/dist/services/provenance/chain-hash.d.ts +57 -0
- package/dist/services/provenance/chain-hash.d.ts.map +1 -0
- package/dist/services/provenance/chain-hash.js +131 -0
- package/dist/services/provenance/chain-hash.js.map +1 -0
- package/dist/services/provenance/exporter.d.ts +202 -0
- package/dist/services/provenance/exporter.d.ts.map +1 -0
- package/dist/services/provenance/exporter.js +457 -0
- package/dist/services/provenance/exporter.js.map +1 -0
- package/dist/services/provenance/index.d.ts +15 -0
- package/dist/services/provenance/index.d.ts.map +1 -0
- package/dist/services/provenance/index.js +17 -0
- package/dist/services/provenance/index.js.map +1 -0
- package/dist/services/provenance/tracker.d.ts +138 -0
- package/dist/services/provenance/tracker.d.ts.map +1 -0
- package/dist/services/provenance/tracker.js +293 -0
- package/dist/services/provenance/tracker.js.map +1 -0
- package/dist/services/provenance/verifier.d.ts +153 -0
- package/dist/services/provenance/verifier.d.ts.map +1 -0
- package/dist/services/provenance/verifier.js +536 -0
- package/dist/services/provenance/verifier.js.map +1 -0
- package/dist/services/python-pool.d.ts +70 -0
- package/dist/services/python-pool.d.ts.map +1 -0
- package/dist/services/python-pool.js +265 -0
- package/dist/services/python-pool.js.map +1 -0
- package/dist/services/search/bm25.d.ts +180 -0
- package/dist/services/search/bm25.d.ts.map +1 -0
- package/dist/services/search/bm25.js +656 -0
- package/dist/services/search/bm25.js.map +1 -0
- package/dist/services/search/fusion.d.ts +103 -0
- package/dist/services/search/fusion.d.ts.map +1 -0
- package/dist/services/search/fusion.js +122 -0
- package/dist/services/search/fusion.js.map +1 -0
- package/dist/services/search/local-reranker.d.ts +30 -0
- package/dist/services/search/local-reranker.d.ts.map +1 -0
- package/dist/services/search/local-reranker.js +123 -0
- package/dist/services/search/local-reranker.js.map +1 -0
- package/dist/services/search/quality.d.ts +11 -0
- package/dist/services/search/quality.d.ts.map +1 -0
- package/dist/services/search/quality.js +17 -0
- package/dist/services/search/quality.js.map +1 -0
- package/dist/services/search/query-classifier.d.ts +34 -0
- package/dist/services/search/query-classifier.d.ts.map +1 -0
- package/dist/services/search/query-classifier.js +114 -0
- package/dist/services/search/query-classifier.js.map +1 -0
- package/dist/services/search/query-expander.d.ts +73 -0
- package/dist/services/search/query-expander.d.ts.map +1 -0
- package/dist/services/search/query-expander.js +281 -0
- package/dist/services/search/query-expander.js.map +1 -0
- package/dist/services/search/reranker.d.ts +44 -0
- package/dist/services/search/reranker.d.ts.map +1 -0
- package/dist/services/search/reranker.js +101 -0
- package/dist/services/search/reranker.js.map +1 -0
- package/dist/services/storage/database/annotation-operations.d.ts +113 -0
- package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/annotation-operations.js +177 -0
- package/dist/services/storage/database/annotation-operations.js.map +1 -0
- package/dist/services/storage/database/approval-operations.d.ts +132 -0
- package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
- package/dist/services/storage/database/approval-operations.js +206 -0
- package/dist/services/storage/database/approval-operations.js.map +1 -0
- package/dist/services/storage/database/chunk-operations.d.ts +132 -0
- package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
- package/dist/services/storage/database/chunk-operations.js +306 -0
- package/dist/services/storage/database/chunk-operations.js.map +1 -0
- package/dist/services/storage/database/cluster-operations.d.ts +97 -0
- package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
- package/dist/services/storage/database/cluster-operations.js +258 -0
- package/dist/services/storage/database/cluster-operations.js.map +1 -0
- package/dist/services/storage/database/comparison-operations.d.ts +41 -0
- package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
- package/dist/services/storage/database/comparison-operations.js +65 -0
- package/dist/services/storage/database/comparison-operations.js.map +1 -0
- package/dist/services/storage/database/converters.d.ts +36 -0
- package/dist/services/storage/database/converters.d.ts.map +1 -0
- package/dist/services/storage/database/converters.js +244 -0
- package/dist/services/storage/database/converters.js.map +1 -0
- package/dist/services/storage/database/document-operations.d.ts +145 -0
- package/dist/services/storage/database/document-operations.d.ts.map +1 -0
- package/dist/services/storage/database/document-operations.js +498 -0
- package/dist/services/storage/database/document-operations.js.map +1 -0
- package/dist/services/storage/database/embedding-operations.d.ts +130 -0
- package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
- package/dist/services/storage/database/embedding-operations.js +315 -0
- package/dist/services/storage/database/embedding-operations.js.map +1 -0
- package/dist/services/storage/database/extraction-operations.d.ts +47 -0
- package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
- package/dist/services/storage/database/extraction-operations.js +85 -0
- package/dist/services/storage/database/extraction-operations.js.map +1 -0
- package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
- package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
- package/dist/services/storage/database/form-fill-operations.js +116 -0
- package/dist/services/storage/database/form-fill-operations.js.map +1 -0
- package/dist/services/storage/database/helpers.d.ts +29 -0
- package/dist/services/storage/database/helpers.d.ts.map +1 -0
- package/dist/services/storage/database/helpers.js +55 -0
- package/dist/services/storage/database/helpers.js.map +1 -0
- package/dist/services/storage/database/image-operations.d.ts +202 -0
- package/dist/services/storage/database/image-operations.d.ts.map +1 -0
- package/dist/services/storage/database/image-operations.js +484 -0
- package/dist/services/storage/database/image-operations.js.map +1 -0
- package/dist/services/storage/database/index.d.ts +13 -0
- package/dist/services/storage/database/index.d.ts.map +1 -0
- package/dist/services/storage/database/index.js +16 -0
- package/dist/services/storage/database/index.js.map +1 -0
- package/dist/services/storage/database/lock-operations.d.ts +59 -0
- package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
- package/dist/services/storage/database/lock-operations.js +89 -0
- package/dist/services/storage/database/lock-operations.js.map +1 -0
- package/dist/services/storage/database/obligation-operations.d.ts +88 -0
- package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/obligation-operations.js +206 -0
- package/dist/services/storage/database/obligation-operations.js.map +1 -0
- package/dist/services/storage/database/ocr-operations.d.ts +33 -0
- package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
- package/dist/services/storage/database/ocr-operations.js +70 -0
- package/dist/services/storage/database/ocr-operations.js.map +1 -0
- package/dist/services/storage/database/playbook-operations.d.ts +72 -0
- package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
- package/dist/services/storage/database/playbook-operations.js +247 -0
- package/dist/services/storage/database/playbook-operations.js.map +1 -0
- package/dist/services/storage/database/provenance-operations.d.ts +112 -0
- package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
- package/dist/services/storage/database/provenance-operations.js +251 -0
- package/dist/services/storage/database/provenance-operations.js.map +1 -0
- package/dist/services/storage/database/service.d.ts +142 -0
- package/dist/services/storage/database/service.d.ts.map +1 -0
- package/dist/services/storage/database/service.js +310 -0
- package/dist/services/storage/database/service.js.map +1 -0
- package/dist/services/storage/database/static-operations.d.ts +30 -0
- package/dist/services/storage/database/static-operations.d.ts.map +1 -0
- package/dist/services/storage/database/static-operations.js +218 -0
- package/dist/services/storage/database/static-operations.js.map +1 -0
- package/dist/services/storage/database/stats-operations.d.ts +101 -0
- package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
- package/dist/services/storage/database/stats-operations.js +394 -0
- package/dist/services/storage/database/stats-operations.js.map +1 -0
- package/dist/services/storage/database/tag-operations.d.ts +76 -0
- package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
- package/dist/services/storage/database/tag-operations.js +178 -0
- package/dist/services/storage/database/tag-operations.js.map +1 -0
- package/dist/services/storage/database/types.d.ts +286 -0
- package/dist/services/storage/database/types.d.ts.map +1 -0
- package/dist/services/storage/database/types.js +39 -0
- package/dist/services/storage/database/types.js.map +1 -0
- package/dist/services/storage/database/upload-operations.d.ts +71 -0
- package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
- package/dist/services/storage/database/upload-operations.js +124 -0
- package/dist/services/storage/database/upload-operations.js.map +1 -0
- package/dist/services/storage/database/user-operations.d.ts +102 -0
- package/dist/services/storage/database/user-operations.d.ts.map +1 -0
- package/dist/services/storage/database/user-operations.js +151 -0
- package/dist/services/storage/database/user-operations.js.map +1 -0
- package/dist/services/storage/database/workflow-operations.d.ts +98 -0
- package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
- package/dist/services/storage/database/workflow-operations.js +157 -0
- package/dist/services/storage/database/workflow-operations.js.map +1 -0
- package/dist/services/storage/database.d.ts +16 -0
- package/dist/services/storage/database.d.ts.map +1 -0
- package/dist/services/storage/database.js +15 -0
- package/dist/services/storage/database.js.map +1 -0
- package/dist/services/storage/index.d.ts +10 -0
- package/dist/services/storage/index.d.ts.map +1 -0
- package/dist/services/storage/index.js +10 -0
- package/dist/services/storage/index.js.map +1 -0
- package/dist/services/storage/migrations/index.d.ts +16 -0
- package/dist/services/storage/migrations/index.d.ts.map +1 -0
- package/dist/services/storage/migrations/index.js +20 -0
- package/dist/services/storage/migrations/index.js.map +1 -0
- package/dist/services/storage/migrations/operations.d.ts +40 -0
- package/dist/services/storage/migrations/operations.d.ts.map +1 -0
- package/dist/services/storage/migrations/operations.js +2910 -0
- package/dist/services/storage/migrations/operations.js.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.js +1006 -0
- package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.js +176 -0
- package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
- package/dist/services/storage/migrations/types.d.ts +15 -0
- package/dist/services/storage/migrations/types.d.ts.map +1 -0
- package/dist/services/storage/migrations/types.js +21 -0
- package/dist/services/storage/migrations/types.js.map +1 -0
- package/dist/services/storage/migrations/verification.d.ts +20 -0
- package/dist/services/storage/migrations/verification.d.ts.map +1 -0
- package/dist/services/storage/migrations/verification.js +78 -0
- package/dist/services/storage/migrations/verification.js.map +1 -0
- package/dist/services/storage/migrations.d.ts +16 -0
- package/dist/services/storage/migrations.d.ts.map +1 -0
- package/dist/services/storage/migrations.js +17 -0
- package/dist/services/storage/migrations.js.map +1 -0
- package/dist/services/storage/types.d.ts +12 -0
- package/dist/services/storage/types.d.ts.map +1 -0
- package/dist/services/storage/types.js +5 -0
- package/dist/services/storage/types.js.map +1 -0
- package/dist/services/storage/vector.d.ts +208 -0
- package/dist/services/storage/vector.d.ts.map +1 -0
- package/dist/services/storage/vector.js +526 -0
- package/dist/services/storage/vector.js.map +1 -0
- package/dist/services/vlm/pipeline.d.ts +194 -0
- package/dist/services/vlm/pipeline.d.ts.map +1 -0
- package/dist/services/vlm/pipeline.js +800 -0
- package/dist/services/vlm/pipeline.js.map +1 -0
- package/dist/services/vlm/prompts.d.ts +171 -0
- package/dist/services/vlm/prompts.d.ts.map +1 -0
- package/dist/services/vlm/prompts.js +229 -0
- package/dist/services/vlm/prompts.js.map +1 -0
- package/dist/services/vlm/service.d.ts +174 -0
- package/dist/services/vlm/service.d.ts.map +1 -0
- package/dist/services/vlm/service.js +256 -0
- package/dist/services/vlm/service.js.map +1 -0
- package/dist/services/webhook-delivery.d.ts +4 -0
- package/dist/services/webhook-delivery.d.ts.map +1 -0
- package/dist/services/webhook-delivery.js +140 -0
- package/dist/services/webhook-delivery.js.map +1 -0
- package/dist/tools/chunks.d.ts +19 -0
- package/dist/tools/chunks.d.ts.map +1 -0
- package/dist/tools/chunks.js +392 -0
- package/dist/tools/chunks.js.map +1 -0
- package/dist/tools/clm.d.ts +16 -0
- package/dist/tools/clm.d.ts.map +1 -0
- package/dist/tools/clm.js +668 -0
- package/dist/tools/clm.js.map +1 -0
- package/dist/tools/clustering.d.ts +13 -0
- package/dist/tools/clustering.d.ts.map +1 -0
- package/dist/tools/clustering.js +498 -0
- package/dist/tools/clustering.js.map +1 -0
- package/dist/tools/collaboration.d.ts +15 -0
- package/dist/tools/collaboration.d.ts.map +1 -0
- package/dist/tools/collaboration.js +516 -0
- package/dist/tools/collaboration.js.map +1 -0
- package/dist/tools/comparison.d.ts +13 -0
- package/dist/tools/comparison.d.ts.map +1 -0
- package/dist/tools/comparison.js +735 -0
- package/dist/tools/comparison.js.map +1 -0
- package/dist/tools/compliance.d.ts +15 -0
- package/dist/tools/compliance.d.ts.map +1 -0
- package/dist/tools/compliance.js +640 -0
- package/dist/tools/compliance.js.map +1 -0
- package/dist/tools/config.d.ts +19 -0
- package/dist/tools/config.d.ts.map +1 -0
- package/dist/tools/config.js +213 -0
- package/dist/tools/config.js.map +1 -0
- package/dist/tools/database.d.ts +62 -0
- package/dist/tools/database.d.ts.map +1 -0
- package/dist/tools/database.js +288 -0
- package/dist/tools/database.js.map +1 -0
- package/dist/tools/documents.d.ts +61 -0
- package/dist/tools/documents.d.ts.map +1 -0
- package/dist/tools/documents.js +1624 -0
- package/dist/tools/documents.js.map +1 -0
- package/dist/tools/embeddings.d.ts +14 -0
- package/dist/tools/embeddings.d.ts.map +1 -0
- package/dist/tools/embeddings.js +626 -0
- package/dist/tools/embeddings.js.map +1 -0
- package/dist/tools/evaluation.d.ts +25 -0
- package/dist/tools/evaluation.d.ts.map +1 -0
- package/dist/tools/evaluation.js +523 -0
- package/dist/tools/evaluation.js.map +1 -0
- package/dist/tools/events.d.ts +16 -0
- package/dist/tools/events.d.ts.map +1 -0
- package/dist/tools/events.js +493 -0
- package/dist/tools/events.js.map +1 -0
- package/dist/tools/extraction-structured.d.ts +13 -0
- package/dist/tools/extraction-structured.d.ts.map +1 -0
- package/dist/tools/extraction-structured.js +390 -0
- package/dist/tools/extraction-structured.js.map +1 -0
- package/dist/tools/extraction.d.ts +24 -0
- package/dist/tools/extraction.d.ts.map +1 -0
- package/dist/tools/extraction.js +424 -0
- package/dist/tools/extraction.js.map +1 -0
- package/dist/tools/file-management.d.ts +14 -0
- package/dist/tools/file-management.d.ts.map +1 -0
- package/dist/tools/file-management.js +523 -0
- package/dist/tools/file-management.js.map +1 -0
- package/dist/tools/form-fill.d.ts +13 -0
- package/dist/tools/form-fill.d.ts.map +1 -0
- package/dist/tools/form-fill.js +250 -0
- package/dist/tools/form-fill.js.map +1 -0
- package/dist/tools/health.d.ts +19 -0
- package/dist/tools/health.d.ts.map +1 -0
- package/dist/tools/health.js +229 -0
- package/dist/tools/health.js.map +1 -0
- package/dist/tools/images.d.ts +54 -0
- package/dist/tools/images.d.ts.map +1 -0
- package/dist/tools/images.js +787 -0
- package/dist/tools/images.js.map +1 -0
- package/dist/tools/ingestion.d.ts +94 -0
- package/dist/tools/ingestion.d.ts.map +1 -0
- package/dist/tools/ingestion.js +1659 -0
- package/dist/tools/ingestion.js.map +1 -0
- package/dist/tools/intelligence.d.ts +18 -0
- package/dist/tools/intelligence.d.ts.map +1 -0
- package/dist/tools/intelligence.js +1039 -0
- package/dist/tools/intelligence.js.map +1 -0
- package/dist/tools/provenance.d.ts +51 -0
- package/dist/tools/provenance.d.ts.map +1 -0
- package/dist/tools/provenance.js +691 -0
- package/dist/tools/provenance.js.map +1 -0
- package/dist/tools/reports.d.ts +41 -0
- package/dist/tools/reports.d.ts.map +1 -0
- package/dist/tools/reports.js +1394 -0
- package/dist/tools/reports.js.map +1 -0
- package/dist/tools/search.d.ts +35 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +2528 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/shared.d.ts +52 -0
- package/dist/tools/shared.d.ts.map +1 -0
- package/dist/tools/shared.js +54 -0
- package/dist/tools/shared.js.map +1 -0
- package/dist/tools/tags.d.ts +15 -0
- package/dist/tools/tags.d.ts.map +1 -0
- package/dist/tools/tags.js +287 -0
- package/dist/tools/tags.js.map +1 -0
- package/dist/tools/timeline.d.ts +15 -0
- package/dist/tools/timeline.d.ts.map +1 -0
- package/dist/tools/timeline.js +14 -0
- package/dist/tools/timeline.js.map +1 -0
- package/dist/tools/users.d.ts +14 -0
- package/dist/tools/users.d.ts.map +1 -0
- package/dist/tools/users.js +257 -0
- package/dist/tools/users.js.map +1 -0
- package/dist/tools/vlm.d.ts +40 -0
- package/dist/tools/vlm.d.ts.map +1 -0
- package/dist/tools/vlm.js +475 -0
- package/dist/tools/vlm.js.map +1 -0
- package/dist/tools/workflow.d.ts +16 -0
- package/dist/tools/workflow.d.ts.map +1 -0
- package/dist/tools/workflow.js +495 -0
- package/dist/tools/workflow.js.map +1 -0
- package/dist/utils/backoff.d.ts +53 -0
- package/dist/utils/backoff.d.ts.map +1 -0
- package/dist/utils/backoff.js +78 -0
- package/dist/utils/backoff.js.map +1 -0
- package/dist/utils/config-persistence.d.ts +33 -0
- package/dist/utils/config-persistence.d.ts.map +1 -0
- package/dist/utils/config-persistence.js +61 -0
- package/dist/utils/config-persistence.js.map +1 -0
- package/dist/utils/hash.d.ts +65 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +146 -0
- package/dist/utils/hash.js.map +1 -0
- package/dist/utils/math.d.ts +21 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +39 -0
- package/dist/utils/math.js.map +1 -0
- package/dist/utils/validation.d.ts +697 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +529 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +96 -0
- package/python/.gitkeep +0 -0
- package/python/__init__.py +104 -0
- package/python/clustering_worker.py +440 -0
- package/python/docx_image_extractor.py +524 -0
- package/python/embedding_worker.py +552 -0
- package/python/file_manager_worker.py +564 -0
- package/python/form_fill_worker.py +399 -0
- package/python/gpu_utils.py +582 -0
- package/python/image_extractor.py +317 -0
- package/python/image_optimizer.py +444 -0
- package/python/ocr_worker.py +712 -0
- package/python/pyproject.toml +76 -0
- package/python/requirements.txt +51 -0
- package/python/reranker_worker.py +87 -0
|
@@ -0,0 +1,2910 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Database Migration Operations
|
|
3
|
+
*
|
|
4
|
+
* Contains the main migration functions: initializeDatabase, migrateToLatest,
|
|
5
|
+
* checkSchemaVersion, and getCurrentSchemaVersion.
|
|
6
|
+
*
|
|
7
|
+
* @module migrations/operations
|
|
8
|
+
*/
|
|
9
|
+
import { MigrationError } from './types.js';
|
|
10
|
+
import { SCHEMA_VERSION, CREATE_CHUNKS_FTS_TABLE, CREATE_FTS_TRIGGERS, CREATE_FTS_INDEX_METADATA, CREATE_VLM_FTS_TABLE, CREATE_VLM_FTS_TRIGGERS, CREATE_EXTRACTIONS_TABLE, CREATE_FORM_FILLS_TABLE, CREATE_EXTRACTIONS_FTS_TABLE, CREATE_EXTRACTIONS_FTS_TRIGGERS, CREATE_UPLOADED_FILES_TABLE, CREATE_COMPARISONS_TABLE, CREATE_CLUSTERS_TABLE, CREATE_DOCUMENT_CLUSTERS_TABLE, CREATE_TAGS_TABLE, CREATE_ENTITY_TAGS_TABLE, CREATE_DOCUMENTS_FTS_TABLE, CREATE_DOCUMENTS_FTS_TRIGGERS, CREATE_USERS_TABLE, CREATE_AUDIT_LOG_TABLE, CREATE_ANNOTATIONS_TABLE, CREATE_DOCUMENT_LOCKS_TABLE, CREATE_WORKFLOW_STATES_TABLE, CREATE_APPROVAL_CHAINS_TABLE, CREATE_APPROVAL_STEPS_TABLE, CREATE_OBLIGATIONS_TABLE, CREATE_PLAYBOOKS_TABLE, CREATE_WEBHOOKS_TABLE, } from './schema-definitions.js';
|
|
11
|
+
// ─── Legacy entity/KG table definitions (inlined for migration chain v12→v25) ───
|
|
12
|
+
// These tables were removed from schema-definitions.ts in v26 but the migration
|
|
13
|
+
// functions that originally created them (v12→v13, v14→v15, v17→v18, etc.) still
|
|
14
|
+
// reference these constants so that old databases can migrate through the full chain.
|
|
15
|
+
// The v25→v26 migration then drops all of them.
|
|
16
|
+
const CREATE_ENTITIES_TABLE = `
|
|
17
|
+
CREATE TABLE IF NOT EXISTS entities (
|
|
18
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
19
|
+
document_id TEXT NOT NULL REFERENCES documents(id),
|
|
20
|
+
entity_type TEXT NOT NULL CHECK (entity_type IN ('person', 'organization', 'date', 'amount', 'case_number', 'location', 'statute', 'exhibit', 'medication', 'diagnosis', 'medical_device', 'other')),
|
|
21
|
+
raw_text TEXT NOT NULL,
|
|
22
|
+
normalized_text TEXT NOT NULL,
|
|
23
|
+
confidence REAL NOT NULL DEFAULT 0.0,
|
|
24
|
+
metadata TEXT,
|
|
25
|
+
provenance_id TEXT NOT NULL REFERENCES provenance(id),
|
|
26
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
27
|
+
)`;
|
|
28
|
+
const CREATE_ENTITY_MENTIONS_TABLE = `
|
|
29
|
+
CREATE TABLE IF NOT EXISTS entity_mentions (
|
|
30
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
31
|
+
entity_id TEXT NOT NULL REFERENCES entities(id),
|
|
32
|
+
document_id TEXT NOT NULL REFERENCES documents(id),
|
|
33
|
+
chunk_id TEXT REFERENCES chunks(id),
|
|
34
|
+
page_number INTEGER,
|
|
35
|
+
character_start INTEGER,
|
|
36
|
+
character_end INTEGER,
|
|
37
|
+
context_text TEXT,
|
|
38
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
39
|
+
)`;
|
|
40
|
+
const CREATE_KNOWLEDGE_NODES_TABLE = `
|
|
41
|
+
CREATE TABLE IF NOT EXISTS knowledge_nodes (
|
|
42
|
+
id TEXT PRIMARY KEY,
|
|
43
|
+
entity_type TEXT NOT NULL CHECK (entity_type IN ('person', 'organization', 'date', 'amount', 'case_number', 'location', 'statute', 'exhibit', 'medication', 'diagnosis', 'medical_device', 'other')),
|
|
44
|
+
canonical_name TEXT NOT NULL,
|
|
45
|
+
normalized_name TEXT NOT NULL,
|
|
46
|
+
aliases TEXT,
|
|
47
|
+
document_count INTEGER NOT NULL DEFAULT 1,
|
|
48
|
+
mention_count INTEGER NOT NULL DEFAULT 0,
|
|
49
|
+
edge_count INTEGER NOT NULL DEFAULT 0,
|
|
50
|
+
avg_confidence REAL NOT NULL DEFAULT 0.0,
|
|
51
|
+
metadata TEXT,
|
|
52
|
+
provenance_id TEXT NOT NULL,
|
|
53
|
+
created_at TEXT NOT NULL,
|
|
54
|
+
updated_at TEXT NOT NULL,
|
|
55
|
+
importance_score REAL,
|
|
56
|
+
resolution_type TEXT,
|
|
57
|
+
FOREIGN KEY (provenance_id) REFERENCES provenance(id)
|
|
58
|
+
)
|
|
59
|
+
`;
|
|
60
|
+
const CREATE_KNOWLEDGE_EDGES_TABLE = `
|
|
61
|
+
CREATE TABLE IF NOT EXISTS knowledge_edges (
|
|
62
|
+
id TEXT PRIMARY KEY,
|
|
63
|
+
source_node_id TEXT NOT NULL,
|
|
64
|
+
target_node_id TEXT NOT NULL,
|
|
65
|
+
relationship_type TEXT NOT NULL CHECK (relationship_type IN ('co_mentioned', 'co_located', 'works_at', 'represents', 'located_in', 'filed_in', 'cites', 'references', 'party_to', 'related_to', 'precedes', 'occurred_at', 'treated_with', 'administered_via', 'managed_by', 'interacts_with', 'diagnosed_with', 'prescribed_by', 'admitted_to', 'supervised_by', 'filed_by', 'contraindicated_with')),
|
|
66
|
+
weight REAL NOT NULL DEFAULT 1.0,
|
|
67
|
+
evidence_count INTEGER NOT NULL DEFAULT 1,
|
|
68
|
+
document_ids TEXT NOT NULL,
|
|
69
|
+
metadata TEXT,
|
|
70
|
+
provenance_id TEXT NOT NULL,
|
|
71
|
+
created_at TEXT NOT NULL,
|
|
72
|
+
valid_from TEXT,
|
|
73
|
+
valid_until TEXT,
|
|
74
|
+
normalized_weight REAL DEFAULT 0,
|
|
75
|
+
contradiction_count INTEGER DEFAULT 0,
|
|
76
|
+
FOREIGN KEY (source_node_id) REFERENCES knowledge_nodes(id),
|
|
77
|
+
FOREIGN KEY (target_node_id) REFERENCES knowledge_nodes(id),
|
|
78
|
+
FOREIGN KEY (provenance_id) REFERENCES provenance(id)
|
|
79
|
+
)
|
|
80
|
+
`;
|
|
81
|
+
const CREATE_NODE_ENTITY_LINKS_TABLE = `
|
|
82
|
+
CREATE TABLE IF NOT EXISTS node_entity_links (
|
|
83
|
+
id TEXT PRIMARY KEY,
|
|
84
|
+
node_id TEXT NOT NULL,
|
|
85
|
+
entity_id TEXT NOT NULL UNIQUE,
|
|
86
|
+
document_id TEXT NOT NULL,
|
|
87
|
+
similarity_score REAL NOT NULL DEFAULT 1.0,
|
|
88
|
+
resolution_method TEXT,
|
|
89
|
+
created_at TEXT NOT NULL,
|
|
90
|
+
FOREIGN KEY (node_id) REFERENCES knowledge_nodes(id),
|
|
91
|
+
FOREIGN KEY (entity_id) REFERENCES entities(id),
|
|
92
|
+
FOREIGN KEY (document_id) REFERENCES documents(id)
|
|
93
|
+
)
|
|
94
|
+
`;
|
|
95
|
+
const CREATE_KNOWLEDGE_NODES_FTS_TABLE = `
|
|
96
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS knowledge_nodes_fts USING fts5(
|
|
97
|
+
canonical_name,
|
|
98
|
+
content='knowledge_nodes',
|
|
99
|
+
content_rowid='rowid',
|
|
100
|
+
tokenize='porter unicode61'
|
|
101
|
+
)
|
|
102
|
+
`;
|
|
103
|
+
const CREATE_KNOWLEDGE_NODES_FTS_TRIGGERS = [
|
|
104
|
+
`CREATE TRIGGER IF NOT EXISTS knowledge_nodes_fts_ai AFTER INSERT ON knowledge_nodes BEGIN
|
|
105
|
+
INSERT INTO knowledge_nodes_fts(rowid, canonical_name) VALUES (new.rowid, new.canonical_name);
|
|
106
|
+
END`,
|
|
107
|
+
`CREATE TRIGGER IF NOT EXISTS knowledge_nodes_fts_ad AFTER DELETE ON knowledge_nodes BEGIN
|
|
108
|
+
INSERT INTO knowledge_nodes_fts(knowledge_nodes_fts, rowid, canonical_name) VALUES ('delete', old.rowid, old.canonical_name);
|
|
109
|
+
END`,
|
|
110
|
+
`CREATE TRIGGER IF NOT EXISTS knowledge_nodes_fts_au AFTER UPDATE OF canonical_name ON knowledge_nodes BEGIN
|
|
111
|
+
INSERT INTO knowledge_nodes_fts(knowledge_nodes_fts, rowid, canonical_name) VALUES ('delete', old.rowid, old.canonical_name);
|
|
112
|
+
INSERT INTO knowledge_nodes_fts(rowid, canonical_name) VALUES (new.rowid, new.canonical_name);
|
|
113
|
+
END`,
|
|
114
|
+
];
|
|
115
|
+
const CREATE_ENTITY_EXTRACTION_SEGMENTS_TABLE = `
|
|
116
|
+
CREATE TABLE IF NOT EXISTS entity_extraction_segments (
|
|
117
|
+
id TEXT PRIMARY KEY,
|
|
118
|
+
document_id TEXT NOT NULL REFERENCES documents(id),
|
|
119
|
+
ocr_result_id TEXT NOT NULL REFERENCES ocr_results(id),
|
|
120
|
+
segment_index INTEGER NOT NULL,
|
|
121
|
+
text TEXT NOT NULL,
|
|
122
|
+
character_start INTEGER NOT NULL,
|
|
123
|
+
character_end INTEGER NOT NULL,
|
|
124
|
+
text_length INTEGER NOT NULL,
|
|
125
|
+
overlap_previous INTEGER NOT NULL DEFAULT 0,
|
|
126
|
+
overlap_next INTEGER NOT NULL DEFAULT 0,
|
|
127
|
+
extraction_status TEXT NOT NULL DEFAULT 'pending'
|
|
128
|
+
CHECK (extraction_status IN ('pending', 'processing', 'complete', 'failed')),
|
|
129
|
+
entity_count INTEGER DEFAULT 0,
|
|
130
|
+
extracted_at TEXT,
|
|
131
|
+
error_message TEXT,
|
|
132
|
+
provenance_id TEXT REFERENCES provenance(id),
|
|
133
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
134
|
+
UNIQUE(document_id, segment_index)
|
|
135
|
+
)
|
|
136
|
+
`;
|
|
137
|
+
const CREATE_ENTITY_EMBEDDINGS_TABLE = `
|
|
138
|
+
CREATE TABLE IF NOT EXISTS entity_embeddings (
|
|
139
|
+
id TEXT PRIMARY KEY,
|
|
140
|
+
node_id TEXT NOT NULL REFERENCES knowledge_nodes(id),
|
|
141
|
+
original_text TEXT NOT NULL,
|
|
142
|
+
original_text_length INTEGER NOT NULL,
|
|
143
|
+
entity_type TEXT NOT NULL,
|
|
144
|
+
document_count INTEGER NOT NULL DEFAULT 1,
|
|
145
|
+
model_name TEXT NOT NULL DEFAULT 'nomic-embed-text-v1.5',
|
|
146
|
+
content_hash TEXT NOT NULL,
|
|
147
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
148
|
+
provenance_id TEXT REFERENCES provenance(id)
|
|
149
|
+
)
|
|
150
|
+
`;
|
|
151
|
+
const CREATE_VEC_ENTITY_EMBEDDINGS_TABLE = `
|
|
152
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS vec_entity_embeddings USING vec0(
|
|
153
|
+
entity_embedding_id TEXT PRIMARY KEY,
|
|
154
|
+
vector FLOAT[768] distance_metric=cosine
|
|
155
|
+
)
|
|
156
|
+
`;
|
|
157
|
+
const CREATE_CORPUS_INTELLIGENCE_TABLE = `
|
|
158
|
+
CREATE TABLE IF NOT EXISTS corpus_intelligence (
|
|
159
|
+
id TEXT PRIMARY KEY,
|
|
160
|
+
database_name TEXT NOT NULL,
|
|
161
|
+
corpus_summary TEXT NOT NULL,
|
|
162
|
+
key_actors TEXT NOT NULL,
|
|
163
|
+
themes TEXT NOT NULL,
|
|
164
|
+
narrative_arcs TEXT,
|
|
165
|
+
entity_count INTEGER NOT NULL,
|
|
166
|
+
document_count INTEGER NOT NULL,
|
|
167
|
+
model TEXT NOT NULL,
|
|
168
|
+
provenance_id TEXT NOT NULL REFERENCES provenance(id),
|
|
169
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
170
|
+
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
171
|
+
)`;
|
|
172
|
+
const CREATE_DOCUMENT_NARRATIVES_TABLE = `
|
|
173
|
+
CREATE TABLE IF NOT EXISTS document_narratives (
|
|
174
|
+
id TEXT PRIMARY KEY,
|
|
175
|
+
document_id TEXT NOT NULL UNIQUE REFERENCES documents(id),
|
|
176
|
+
narrative_text TEXT NOT NULL,
|
|
177
|
+
entity_roster TEXT NOT NULL,
|
|
178
|
+
corpus_context TEXT,
|
|
179
|
+
synthesis_count INTEGER DEFAULT 0,
|
|
180
|
+
model TEXT NOT NULL,
|
|
181
|
+
provenance_id TEXT NOT NULL REFERENCES provenance(id),
|
|
182
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
183
|
+
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
184
|
+
)`;
|
|
185
|
+
const CREATE_ENTITY_ROLES_TABLE = `
|
|
186
|
+
CREATE TABLE IF NOT EXISTS entity_roles (
|
|
187
|
+
id TEXT PRIMARY KEY,
|
|
188
|
+
node_id TEXT NOT NULL REFERENCES knowledge_nodes(id),
|
|
189
|
+
role TEXT NOT NULL,
|
|
190
|
+
theme TEXT,
|
|
191
|
+
importance_rank INTEGER,
|
|
192
|
+
context_summary TEXT,
|
|
193
|
+
scope TEXT NOT NULL DEFAULT 'database',
|
|
194
|
+
scope_id TEXT,
|
|
195
|
+
model TEXT NOT NULL,
|
|
196
|
+
provenance_id TEXT NOT NULL REFERENCES provenance(id),
|
|
197
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
198
|
+
)`;
|
|
199
|
+
import { configurePragmas, initializeSchemaVersion, createTables, createVecTable, createIndexes, createFTSTables, initializeDatabaseMetadata, loadSqliteVecExtension, } from './schema-helpers.js';
|
|
200
|
+
import { computeFTSContentHash } from '../../search/bm25.js';
|
|
201
|
+
/**
|
|
202
|
+
* Check the current schema version of the database
|
|
203
|
+
* @param db - Database instance
|
|
204
|
+
* @returns Current schema version, or 0 if not initialized
|
|
205
|
+
*/
|
|
206
|
+
export function checkSchemaVersion(db) {
|
|
207
|
+
try {
|
|
208
|
+
// Check if schema_version table exists
|
|
209
|
+
const tableExists = db
|
|
210
|
+
.prepare(`
|
|
211
|
+
SELECT name FROM sqlite_master
|
|
212
|
+
WHERE type = 'table' AND name = 'schema_version'
|
|
213
|
+
`)
|
|
214
|
+
.get();
|
|
215
|
+
if (!tableExists) {
|
|
216
|
+
return 0;
|
|
217
|
+
}
|
|
218
|
+
const row = db.prepare('SELECT version FROM schema_version WHERE id = ?').get(1);
|
|
219
|
+
return row?.version ?? 0;
|
|
220
|
+
}
|
|
221
|
+
catch (error) {
|
|
222
|
+
throw new MigrationError('Failed to check schema version', 'query', 'schema_version', error);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Get the current schema version constant
|
|
227
|
+
* @returns The current schema version number
|
|
228
|
+
*/
|
|
229
|
+
export function getCurrentSchemaVersion() {
|
|
230
|
+
return SCHEMA_VERSION;
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Initialize the database with all tables, indexes, and configuration
|
|
234
|
+
*
|
|
235
|
+
* This function is idempotent - safe to call multiple times.
|
|
236
|
+
* Creates tables only if they don't exist.
|
|
237
|
+
*
|
|
238
|
+
* @param db - Database instance from better-sqlite3
|
|
239
|
+
* @throws MigrationError if any operation fails
|
|
240
|
+
*/
|
|
241
|
+
export function initializeDatabase(db) {
|
|
242
|
+
// Step 1: Configure pragmas (must be outside transaction)
|
|
243
|
+
configurePragmas(db);
|
|
244
|
+
// Step 2: Load sqlite-vec extension (must be before virtual table creation, outside transaction)
|
|
245
|
+
loadSqliteVecExtension(db);
|
|
246
|
+
// Steps 3-8 wrapped in a transaction so that if the process crashes mid-init,
|
|
247
|
+
// the DB won't have a version stamp with missing tables (MIG-5 fix).
|
|
248
|
+
// Schema version is stamped LAST so a crash before completion leaves version=0,
|
|
249
|
+
// causing a clean re-init on restart.
|
|
250
|
+
const initTransaction = db.transaction(() => {
|
|
251
|
+
// Step 3: Create tables in dependency order
|
|
252
|
+
createTables(db);
|
|
253
|
+
// Step 4: Create sqlite-vec virtual table
|
|
254
|
+
createVecTable(db);
|
|
255
|
+
// Step 5: Create indexes
|
|
256
|
+
createIndexes(db);
|
|
257
|
+
// Step 6: Create FTS5 tables and triggers
|
|
258
|
+
createFTSTables(db);
|
|
259
|
+
// Step 7: Initialize metadata
|
|
260
|
+
initializeDatabaseMetadata(db);
|
|
261
|
+
// Step 8: Initialize schema version tracking (LAST - so crash before here means version=0)
|
|
262
|
+
initializeSchemaVersion(db);
|
|
263
|
+
});
|
|
264
|
+
initTransaction();
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Migrate from schema version 1 to version 2
|
|
268
|
+
*
|
|
269
|
+
* Changes in v2:
|
|
270
|
+
* - provenance.type: Added 'IMAGE' and 'VLM_DESCRIPTION' to CHECK constraint
|
|
271
|
+
* - provenance.source_type: Added 'IMAGE_EXTRACTION' and 'VLM' to CHECK constraint
|
|
272
|
+
*
|
|
273
|
+
* Note: SQLite CHECK constraints cannot be modified directly. However, since SQLite
|
|
274
|
+
* stores CHECK constraints as metadata and only validates at INSERT/UPDATE time,
|
|
275
|
+
* existing data remains valid. For new inserts, we recreate the table with the
|
|
276
|
+
* updated constraint.
|
|
277
|
+
*
|
|
278
|
+
* @param db - Database instance from better-sqlite3
|
|
279
|
+
* @throws MigrationError if migration fails
|
|
280
|
+
*/
|
|
281
|
+
function migrateV1ToV2(db) {
|
|
282
|
+
try {
|
|
283
|
+
// SQLite doesn't support ALTER TABLE to modify CHECK constraints.
|
|
284
|
+
// We need to recreate the provenance table with the new constraints.
|
|
285
|
+
// Foreign keys must be disabled during table recreation to avoid
|
|
286
|
+
// constraint failures when dropping the old table (other tables reference it).
|
|
287
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
288
|
+
db.exec('BEGIN TRANSACTION');
|
|
289
|
+
// Step 1: Create a new table with updated CHECK constraints
|
|
290
|
+
db.exec(`
|
|
291
|
+
CREATE TABLE provenance_new (
|
|
292
|
+
id TEXT PRIMARY KEY,
|
|
293
|
+
type TEXT NOT NULL CHECK (type IN ('DOCUMENT', 'OCR_RESULT', 'CHUNK', 'IMAGE', 'VLM_DESCRIPTION', 'EMBEDDING')),
|
|
294
|
+
created_at TEXT NOT NULL,
|
|
295
|
+
processed_at TEXT NOT NULL,
|
|
296
|
+
source_file_created_at TEXT,
|
|
297
|
+
source_file_modified_at TEXT,
|
|
298
|
+
source_type TEXT NOT NULL CHECK (source_type IN ('FILE', 'OCR', 'CHUNKING', 'IMAGE_EXTRACTION', 'VLM', 'EMBEDDING')),
|
|
299
|
+
source_path TEXT,
|
|
300
|
+
source_id TEXT,
|
|
301
|
+
root_document_id TEXT NOT NULL,
|
|
302
|
+
location TEXT,
|
|
303
|
+
content_hash TEXT NOT NULL,
|
|
304
|
+
input_hash TEXT,
|
|
305
|
+
file_hash TEXT,
|
|
306
|
+
processor TEXT NOT NULL,
|
|
307
|
+
processor_version TEXT NOT NULL,
|
|
308
|
+
processing_params TEXT NOT NULL,
|
|
309
|
+
processing_duration_ms INTEGER,
|
|
310
|
+
processing_quality_score REAL,
|
|
311
|
+
parent_id TEXT,
|
|
312
|
+
parent_ids TEXT NOT NULL,
|
|
313
|
+
chain_depth INTEGER NOT NULL,
|
|
314
|
+
chain_path TEXT,
|
|
315
|
+
FOREIGN KEY (source_id) REFERENCES provenance_new(id),
|
|
316
|
+
FOREIGN KEY (parent_id) REFERENCES provenance_new(id)
|
|
317
|
+
)
|
|
318
|
+
`);
|
|
319
|
+
// Step 2: Copy existing data to the new table
|
|
320
|
+
db.exec(`
|
|
321
|
+
INSERT INTO provenance_new
|
|
322
|
+
SELECT * FROM provenance
|
|
323
|
+
`);
|
|
324
|
+
// Step 3: Drop the old table
|
|
325
|
+
db.exec('DROP TABLE provenance');
|
|
326
|
+
// Step 4: Rename the new table to the original name
|
|
327
|
+
db.exec('ALTER TABLE provenance_new RENAME TO provenance');
|
|
328
|
+
// Step 5: Recreate indexes for the provenance table
|
|
329
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_source_id ON provenance(source_id)');
|
|
330
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_type ON provenance(type)');
|
|
331
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_root_document_id ON provenance(root_document_id)');
|
|
332
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_parent_id ON provenance(parent_id)');
|
|
333
|
+
// Step 6: Create images table (new in v2 - supports IMAGE provenance type)
|
|
334
|
+
db.exec(`
|
|
335
|
+
CREATE TABLE IF NOT EXISTS images (
|
|
336
|
+
id TEXT PRIMARY KEY,
|
|
337
|
+
document_id TEXT NOT NULL,
|
|
338
|
+
ocr_result_id TEXT NOT NULL,
|
|
339
|
+
page_number INTEGER NOT NULL,
|
|
340
|
+
bbox_x REAL NOT NULL,
|
|
341
|
+
bbox_y REAL NOT NULL,
|
|
342
|
+
bbox_width REAL NOT NULL,
|
|
343
|
+
bbox_height REAL NOT NULL,
|
|
344
|
+
image_index INTEGER NOT NULL,
|
|
345
|
+
format TEXT NOT NULL,
|
|
346
|
+
width INTEGER NOT NULL,
|
|
347
|
+
height INTEGER NOT NULL,
|
|
348
|
+
extracted_path TEXT,
|
|
349
|
+
file_size INTEGER,
|
|
350
|
+
vlm_status TEXT NOT NULL DEFAULT 'pending' CHECK (vlm_status IN ('pending', 'processing', 'complete', 'failed')),
|
|
351
|
+
vlm_description TEXT,
|
|
352
|
+
vlm_structured_data TEXT,
|
|
353
|
+
vlm_embedding_id TEXT,
|
|
354
|
+
vlm_model TEXT,
|
|
355
|
+
vlm_confidence REAL,
|
|
356
|
+
vlm_processed_at TEXT,
|
|
357
|
+
vlm_tokens_used INTEGER,
|
|
358
|
+
context_text TEXT,
|
|
359
|
+
provenance_id TEXT,
|
|
360
|
+
created_at TEXT NOT NULL,
|
|
361
|
+
error_message TEXT,
|
|
362
|
+
FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE CASCADE,
|
|
363
|
+
FOREIGN KEY (ocr_result_id) REFERENCES ocr_results(id) ON DELETE CASCADE,
|
|
364
|
+
FOREIGN KEY (vlm_embedding_id) REFERENCES embeddings(id),
|
|
365
|
+
FOREIGN KEY (provenance_id) REFERENCES provenance(id)
|
|
366
|
+
)
|
|
367
|
+
`);
|
|
368
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_images_document_id ON images(document_id)');
|
|
369
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_images_ocr_result_id ON images(ocr_result_id)');
|
|
370
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_images_page ON images(document_id, page_number)');
|
|
371
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_images_vlm_status ON images(vlm_status)');
|
|
372
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_images_pending ON images(vlm_status) WHERE vlm_status = 'pending'`);
|
|
373
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_images_provenance_id ON images(provenance_id)');
|
|
374
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
375
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
376
|
+
if (fkViolations.length > 0) {
|
|
377
|
+
throw new Error(`Foreign key integrity check failed after v1->v2 migration: ${fkViolations.length} violation(s). ` +
|
|
378
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
379
|
+
}
|
|
380
|
+
db.exec('COMMIT');
|
|
381
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
382
|
+
}
|
|
383
|
+
catch (error) {
|
|
384
|
+
// Rollback on error
|
|
385
|
+
try {
|
|
386
|
+
db.exec('ROLLBACK');
|
|
387
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
388
|
+
}
|
|
389
|
+
catch (rollbackErr) {
|
|
390
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
391
|
+
}
|
|
392
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
393
|
+
throw new MigrationError(`Failed to migrate provenance table from v1 to v2: ${cause}`, 'migrate', 'provenance', error);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
/**
|
|
397
|
+
* Migrate from schema version 2 to version 3
|
|
398
|
+
*
|
|
399
|
+
* Changes in v3:
|
|
400
|
+
* - embeddings.chunk_id: Changed from NOT NULL to nullable
|
|
401
|
+
* - embeddings.image_id: New column (nullable) for VLM description embeddings
|
|
402
|
+
* - embeddings: Added CHECK constraint (chunk_id IS NOT NULL OR image_id IS NOT NULL)
|
|
403
|
+
* - embeddings: Added FOREIGN KEY (image_id) REFERENCES images(id)
|
|
404
|
+
*
|
|
405
|
+
* This migration allows embeddings to reference either chunks (text embeddings)
|
|
406
|
+
* or images (VLM description embeddings).
|
|
407
|
+
*
|
|
408
|
+
* @param db - Database instance from better-sqlite3
|
|
409
|
+
* @throws MigrationError if migration fails
|
|
410
|
+
*/
|
|
411
|
+
function migrateV2ToV3(db) {
|
|
412
|
+
try {
|
|
413
|
+
// Foreign keys must be disabled during table recreation
|
|
414
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
415
|
+
db.exec('BEGIN TRANSACTION');
|
|
416
|
+
// Step 1: Create new embeddings table with updated schema
|
|
417
|
+
db.exec(`
|
|
418
|
+
CREATE TABLE embeddings_new (
|
|
419
|
+
id TEXT PRIMARY KEY,
|
|
420
|
+
chunk_id TEXT,
|
|
421
|
+
image_id TEXT,
|
|
422
|
+
document_id TEXT NOT NULL,
|
|
423
|
+
original_text TEXT NOT NULL,
|
|
424
|
+
original_text_length INTEGER NOT NULL,
|
|
425
|
+
source_file_path TEXT NOT NULL,
|
|
426
|
+
source_file_name TEXT NOT NULL,
|
|
427
|
+
source_file_hash TEXT NOT NULL,
|
|
428
|
+
page_number INTEGER,
|
|
429
|
+
page_range TEXT,
|
|
430
|
+
character_start INTEGER NOT NULL,
|
|
431
|
+
character_end INTEGER NOT NULL,
|
|
432
|
+
chunk_index INTEGER NOT NULL,
|
|
433
|
+
total_chunks INTEGER NOT NULL,
|
|
434
|
+
model_name TEXT NOT NULL,
|
|
435
|
+
model_version TEXT NOT NULL,
|
|
436
|
+
task_type TEXT NOT NULL CHECK (task_type IN ('search_document', 'search_query')),
|
|
437
|
+
inference_mode TEXT NOT NULL CHECK (inference_mode = 'local'),
|
|
438
|
+
gpu_device TEXT,
|
|
439
|
+
provenance_id TEXT NOT NULL UNIQUE,
|
|
440
|
+
content_hash TEXT NOT NULL,
|
|
441
|
+
created_at TEXT NOT NULL,
|
|
442
|
+
generation_duration_ms INTEGER,
|
|
443
|
+
FOREIGN KEY (chunk_id) REFERENCES chunks(id),
|
|
444
|
+
FOREIGN KEY (image_id) REFERENCES images(id),
|
|
445
|
+
FOREIGN KEY (document_id) REFERENCES documents(id),
|
|
446
|
+
FOREIGN KEY (provenance_id) REFERENCES provenance(id),
|
|
447
|
+
CHECK (chunk_id IS NOT NULL OR image_id IS NOT NULL)
|
|
448
|
+
)
|
|
449
|
+
`);
|
|
450
|
+
// Step 2: Copy existing data (image_id will be NULL for existing embeddings)
|
|
451
|
+
db.exec(`
|
|
452
|
+
INSERT INTO embeddings_new (
|
|
453
|
+
id, chunk_id, image_id, document_id, original_text, original_text_length,
|
|
454
|
+
source_file_path, source_file_name, source_file_hash, page_number, page_range,
|
|
455
|
+
character_start, character_end, chunk_index, total_chunks, model_name,
|
|
456
|
+
model_version, task_type, inference_mode, gpu_device, provenance_id,
|
|
457
|
+
content_hash, created_at, generation_duration_ms
|
|
458
|
+
)
|
|
459
|
+
SELECT
|
|
460
|
+
id, chunk_id, NULL, document_id, original_text, original_text_length,
|
|
461
|
+
source_file_path, source_file_name, source_file_hash, page_number, page_range,
|
|
462
|
+
character_start, character_end, chunk_index, total_chunks, model_name,
|
|
463
|
+
model_version, task_type, inference_mode, gpu_device, provenance_id,
|
|
464
|
+
content_hash, created_at, generation_duration_ms
|
|
465
|
+
FROM embeddings
|
|
466
|
+
`);
|
|
467
|
+
// Step 3: Drop old table
|
|
468
|
+
db.exec('DROP TABLE embeddings');
|
|
469
|
+
// Step 4: Rename new table
|
|
470
|
+
db.exec('ALTER TABLE embeddings_new RENAME TO embeddings');
|
|
471
|
+
// Step 5: Recreate indexes
|
|
472
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_embeddings_chunk_id ON embeddings(chunk_id)');
|
|
473
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_embeddings_image_id ON embeddings(image_id)');
|
|
474
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_embeddings_document_id ON embeddings(document_id)');
|
|
475
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_embeddings_source_file ON embeddings(source_file_path)');
|
|
476
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_embeddings_page ON embeddings(page_number)');
|
|
477
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
478
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
479
|
+
if (fkViolations.length > 0) {
|
|
480
|
+
throw new Error(`Foreign key integrity check failed after v2->v3 migration: ${fkViolations.length} violation(s). ` +
|
|
481
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
482
|
+
}
|
|
483
|
+
db.exec('COMMIT');
|
|
484
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
485
|
+
}
|
|
486
|
+
catch (error) {
|
|
487
|
+
try {
|
|
488
|
+
db.exec('ROLLBACK');
|
|
489
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
490
|
+
}
|
|
491
|
+
catch (rollbackErr) {
|
|
492
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
493
|
+
}
|
|
494
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
495
|
+
throw new MigrationError(`Failed to migrate embeddings table from v2 to v3: ${cause}`, 'migrate', 'embeddings', error);
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
/**
|
|
499
|
+
* Migrate from schema version 3 to version 4
|
|
500
|
+
*
|
|
501
|
+
* Changes in v4:
|
|
502
|
+
* - chunks_fts: FTS5 virtual table for BM25 full-text search
|
|
503
|
+
* - chunks_fts_ai/ad/au: Sync triggers to keep FTS5 in sync with chunks
|
|
504
|
+
* - fts_index_metadata: Audit trail for FTS index rebuilds
|
|
505
|
+
*
|
|
506
|
+
* @param db - Database instance from better-sqlite3
|
|
507
|
+
* @throws MigrationError if migration fails
|
|
508
|
+
*/
|
|
509
|
+
function migrateV3ToV4(db) {
|
|
510
|
+
try {
|
|
511
|
+
db.exec('BEGIN TRANSACTION');
|
|
512
|
+
// 1. Create FTS5 virtual table
|
|
513
|
+
db.exec(CREATE_CHUNKS_FTS_TABLE);
|
|
514
|
+
// 2. Create sync triggers
|
|
515
|
+
for (const trigger of CREATE_FTS_TRIGGERS) {
|
|
516
|
+
db.exec(trigger);
|
|
517
|
+
}
|
|
518
|
+
// 3. Create metadata table
|
|
519
|
+
db.exec(CREATE_FTS_INDEX_METADATA);
|
|
520
|
+
// 4. Populate FTS5 from existing chunks
|
|
521
|
+
db.exec("INSERT INTO chunks_fts(chunks_fts) VALUES('rebuild')");
|
|
522
|
+
// 5. Count indexed chunks and store metadata
|
|
523
|
+
const count = db.prepare('SELECT COUNT(*) as cnt FROM chunks').get();
|
|
524
|
+
const contentHash = computeFTSContentHash(db);
|
|
525
|
+
db.prepare(`
|
|
526
|
+
INSERT OR REPLACE INTO fts_index_metadata (id, last_rebuild_at, chunks_indexed, tokenizer, schema_version, content_hash)
|
|
527
|
+
VALUES (1, ?, ?, 'porter unicode61', 4, ?)
|
|
528
|
+
`).run(new Date().toISOString(), count.cnt, contentHash);
|
|
529
|
+
db.exec('COMMIT');
|
|
530
|
+
}
|
|
531
|
+
catch (error) {
|
|
532
|
+
try {
|
|
533
|
+
db.exec('ROLLBACK');
|
|
534
|
+
}
|
|
535
|
+
catch (rollbackErr) {
|
|
536
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
537
|
+
}
|
|
538
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
539
|
+
throw new MigrationError(`Failed to migrate from v3 to v4 (FTS5 setup): ${cause}`, 'migrate', 'chunks_fts', error);
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
/**
|
|
543
|
+
* Migrate from schema version 4 to version 5
|
|
544
|
+
*
|
|
545
|
+
* Changes in v5:
|
|
546
|
+
* - images.block_type: Datalab block type (Figure, Picture, PageHeader, etc.)
|
|
547
|
+
* - images.is_header_footer: Boolean flag for header/footer images
|
|
548
|
+
* - images.content_hash: SHA-256 of image bytes for deduplication
|
|
549
|
+
* - idx_images_content_hash: Index for fast dedup lookups
|
|
550
|
+
*
|
|
551
|
+
* @param db - Database instance from better-sqlite3
|
|
552
|
+
* @throws MigrationError if migration fails
|
|
553
|
+
*/
|
|
554
|
+
function migrateV4ToV5(db) {
|
|
555
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
556
|
+
// Check existing columns for idempotency (safe on retry after partial failure)
|
|
557
|
+
const columns = db.prepare('PRAGMA table_info(images)').all();
|
|
558
|
+
const columnNames = new Set(columns.map((c) => c.name));
|
|
559
|
+
const transaction = db.transaction(() => {
|
|
560
|
+
if (!columnNames.has('block_type')) {
|
|
561
|
+
db.exec('ALTER TABLE images ADD COLUMN block_type TEXT');
|
|
562
|
+
}
|
|
563
|
+
if (!columnNames.has('is_header_footer')) {
|
|
564
|
+
db.exec('ALTER TABLE images ADD COLUMN is_header_footer INTEGER NOT NULL DEFAULT 0');
|
|
565
|
+
}
|
|
566
|
+
if (!columnNames.has('content_hash')) {
|
|
567
|
+
db.exec('ALTER TABLE images ADD COLUMN content_hash TEXT');
|
|
568
|
+
}
|
|
569
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_images_content_hash ON images(content_hash)');
|
|
570
|
+
// M-5: FK integrity check inside transaction so violations cause rollback
|
|
571
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
572
|
+
if (fkViolations.length > 0) {
|
|
573
|
+
throw new Error(`Foreign key integrity check failed after v4->v5 migration: ${fkViolations.length} violation(s). ` +
|
|
574
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
575
|
+
}
|
|
576
|
+
});
|
|
577
|
+
try {
|
|
578
|
+
transaction();
|
|
579
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
580
|
+
}
|
|
581
|
+
catch (error) {
|
|
582
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
583
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
584
|
+
throw new MigrationError(`Failed to migrate from v4 to v5 (image filtering columns): ${cause}`, 'migrate', 'images', error);
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
/**
|
|
588
|
+
* Migrate from schema version 5 to version 6
|
|
589
|
+
*
|
|
590
|
+
* Changes in v6:
|
|
591
|
+
* - vlm_fts: FTS5 virtual table for VLM description full-text search
|
|
592
|
+
* - vlm_fts_ai/ad/au: Sync triggers on embeddings (where image_id IS NOT NULL)
|
|
593
|
+
* - fts_index_metadata: Remove CHECK (id = 1) constraint to allow id=2 row for VLM FTS
|
|
594
|
+
* - fts_index_metadata id=2: VLM FTS metadata row
|
|
595
|
+
*
|
|
596
|
+
* @param db - Database instance from better-sqlite3
|
|
597
|
+
* @throws MigrationError if migration fails
|
|
598
|
+
*/
|
|
599
|
+
function migrateV5ToV6(db) {
|
|
600
|
+
try {
|
|
601
|
+
// Check if DDL phase already completed (safe on retry after partial failure)
|
|
602
|
+
const vlmFtsExists = db
|
|
603
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='vlm_fts'")
|
|
604
|
+
.get();
|
|
605
|
+
const newMetadataExists = db
|
|
606
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='fts_index_metadata'")
|
|
607
|
+
.get();
|
|
608
|
+
const oldBackupExists = db
|
|
609
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='fts_index_metadata_old'")
|
|
610
|
+
.get();
|
|
611
|
+
if (!vlmFtsExists) {
|
|
612
|
+
// DDL phase not yet completed -- run it
|
|
613
|
+
// Only rename if the backup doesn't already exist from a previous interrupted run
|
|
614
|
+
if (!oldBackupExists && newMetadataExists) {
|
|
615
|
+
db.exec('ALTER TABLE fts_index_metadata RENAME TO fts_index_metadata_old');
|
|
616
|
+
}
|
|
617
|
+
// Create new metadata table (without CHECK (id = 1) constraint)
|
|
618
|
+
db.exec(`
|
|
619
|
+
CREATE TABLE IF NOT EXISTS fts_index_metadata (
|
|
620
|
+
id INTEGER PRIMARY KEY,
|
|
621
|
+
last_rebuild_at TEXT,
|
|
622
|
+
chunks_indexed INTEGER NOT NULL DEFAULT 0,
|
|
623
|
+
tokenizer TEXT NOT NULL DEFAULT 'porter unicode61',
|
|
624
|
+
schema_version INTEGER NOT NULL DEFAULT 7,
|
|
625
|
+
content_hash TEXT
|
|
626
|
+
)
|
|
627
|
+
`);
|
|
628
|
+
// Create VLM FTS5 virtual table
|
|
629
|
+
db.exec(CREATE_VLM_FTS_TABLE);
|
|
630
|
+
// Create VLM FTS sync triggers
|
|
631
|
+
for (const trigger of CREATE_VLM_FTS_TRIGGERS) {
|
|
632
|
+
db.exec(trigger);
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
// DML phase: always safe to retry (uses INSERT OR IGNORE, checks before DROP)
|
|
636
|
+
db.exec('BEGIN TRANSACTION');
|
|
637
|
+
try {
|
|
638
|
+
// Copy data from old table if it still exists and new table needs it
|
|
639
|
+
const oldStillExists = db
|
|
640
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='fts_index_metadata_old'")
|
|
641
|
+
.get();
|
|
642
|
+
if (oldStillExists) {
|
|
643
|
+
// Only copy if new table doesn't already have the data (id=1 row)
|
|
644
|
+
const hasChunkMetadata = db.prepare('SELECT id FROM fts_index_metadata WHERE id = 1').get();
|
|
645
|
+
if (!hasChunkMetadata) {
|
|
646
|
+
db.exec('INSERT OR IGNORE INTO fts_index_metadata SELECT * FROM fts_index_metadata_old');
|
|
647
|
+
}
|
|
648
|
+
// Safe to drop backup now that data is in the new table
|
|
649
|
+
db.exec('DROP TABLE fts_index_metadata_old');
|
|
650
|
+
}
|
|
651
|
+
// Insert VLM FTS metadata row (id=2)
|
|
652
|
+
const now = new Date().toISOString();
|
|
653
|
+
db.prepare(`
|
|
654
|
+
INSERT OR IGNORE INTO fts_index_metadata (id, last_rebuild_at, chunks_indexed, tokenizer, schema_version, content_hash)
|
|
655
|
+
VALUES (2, ?, 0, 'porter unicode61', 6, NULL)
|
|
656
|
+
`).run(now);
|
|
657
|
+
// Populate vlm_fts from existing VLM embeddings
|
|
658
|
+
const vlmCount = db
|
|
659
|
+
.prepare('SELECT COUNT(*) as cnt FROM embeddings WHERE image_id IS NOT NULL')
|
|
660
|
+
.get();
|
|
661
|
+
if (vlmCount.cnt > 0) {
|
|
662
|
+
// Only populate if not already done (check FTS row count)
|
|
663
|
+
const ftsCount = db.prepare('SELECT COUNT(*) as cnt FROM vlm_fts').get();
|
|
664
|
+
if (ftsCount.cnt === 0) {
|
|
665
|
+
db.exec(`
|
|
666
|
+
INSERT INTO vlm_fts(rowid, original_text)
|
|
667
|
+
SELECT rowid, original_text FROM embeddings WHERE image_id IS NOT NULL
|
|
668
|
+
`);
|
|
669
|
+
}
|
|
670
|
+
// Update VLM FTS metadata with count
|
|
671
|
+
db.prepare('UPDATE fts_index_metadata SET chunks_indexed = ?, last_rebuild_at = ? WHERE id = 2').run(vlmCount.cnt, now);
|
|
672
|
+
}
|
|
673
|
+
db.exec('COMMIT');
|
|
674
|
+
}
|
|
675
|
+
catch (dmlError) {
|
|
676
|
+
try {
|
|
677
|
+
db.exec('ROLLBACK');
|
|
678
|
+
}
|
|
679
|
+
catch (rollbackErr) {
|
|
680
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
681
|
+
}
|
|
682
|
+
throw dmlError;
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
catch (error) {
|
|
686
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
687
|
+
throw new MigrationError(`Failed to migrate from v5 to v6 (VLM FTS setup): ${cause}`, 'migrate', 'vlm_fts', error);
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
/**
|
|
691
|
+
* Migrate from schema version 6 to version 7
|
|
692
|
+
*
|
|
693
|
+
* Changes in v7:
|
|
694
|
+
* - provenance.source_type: Added 'VLM_DEDUP' to CHECK constraint
|
|
695
|
+
* This allows VLM pipeline to record deduplicated image results with
|
|
696
|
+
* a distinct source_type for provenance tracking.
|
|
697
|
+
*
|
|
698
|
+
* @param db - Database instance from better-sqlite3
|
|
699
|
+
* @throws MigrationError if migration fails
|
|
700
|
+
*/
|
|
701
|
+
function migrateV6ToV7(db) {
|
|
702
|
+
try {
|
|
703
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
704
|
+
db.exec('BEGIN TRANSACTION');
|
|
705
|
+
// Step 1: Create new provenance table with VLM_DEDUP in source_type CHECK
|
|
706
|
+
db.exec(`
|
|
707
|
+
CREATE TABLE provenance_new (
|
|
708
|
+
id TEXT PRIMARY KEY,
|
|
709
|
+
type TEXT NOT NULL CHECK (type IN ('DOCUMENT', 'OCR_RESULT', 'CHUNK', 'IMAGE', 'VLM_DESCRIPTION', 'EMBEDDING')),
|
|
710
|
+
created_at TEXT NOT NULL,
|
|
711
|
+
processed_at TEXT NOT NULL,
|
|
712
|
+
source_file_created_at TEXT,
|
|
713
|
+
source_file_modified_at TEXT,
|
|
714
|
+
source_type TEXT NOT NULL CHECK (source_type IN ('FILE', 'OCR', 'CHUNKING', 'IMAGE_EXTRACTION', 'VLM', 'VLM_DEDUP', 'EMBEDDING')),
|
|
715
|
+
source_path TEXT,
|
|
716
|
+
source_id TEXT,
|
|
717
|
+
root_document_id TEXT NOT NULL,
|
|
718
|
+
location TEXT,
|
|
719
|
+
content_hash TEXT NOT NULL,
|
|
720
|
+
input_hash TEXT,
|
|
721
|
+
file_hash TEXT,
|
|
722
|
+
processor TEXT NOT NULL,
|
|
723
|
+
processor_version TEXT NOT NULL,
|
|
724
|
+
processing_params TEXT NOT NULL,
|
|
725
|
+
processing_duration_ms INTEGER,
|
|
726
|
+
processing_quality_score REAL,
|
|
727
|
+
parent_id TEXT,
|
|
728
|
+
parent_ids TEXT NOT NULL,
|
|
729
|
+
chain_depth INTEGER NOT NULL,
|
|
730
|
+
chain_path TEXT,
|
|
731
|
+
FOREIGN KEY (source_id) REFERENCES provenance_new(id),
|
|
732
|
+
FOREIGN KEY (parent_id) REFERENCES provenance_new(id)
|
|
733
|
+
)
|
|
734
|
+
`);
|
|
735
|
+
// Step 2: Copy existing data
|
|
736
|
+
db.exec(`
|
|
737
|
+
INSERT INTO provenance_new
|
|
738
|
+
SELECT * FROM provenance
|
|
739
|
+
`);
|
|
740
|
+
// Step 3: Drop old table
|
|
741
|
+
db.exec('DROP TABLE provenance');
|
|
742
|
+
// Step 4: Rename new table
|
|
743
|
+
db.exec('ALTER TABLE provenance_new RENAME TO provenance');
|
|
744
|
+
// Step 5: Recreate indexes
|
|
745
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_source_id ON provenance(source_id)');
|
|
746
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_type ON provenance(type)');
|
|
747
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_root_document_id ON provenance(root_document_id)');
|
|
748
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_parent_id ON provenance(parent_id)');
|
|
749
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
750
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
751
|
+
if (fkViolations.length > 0) {
|
|
752
|
+
throw new Error(`Foreign key integrity check failed after v6->v7 migration: ${fkViolations.length} violation(s). ` +
|
|
753
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
754
|
+
}
|
|
755
|
+
db.exec('COMMIT');
|
|
756
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
757
|
+
}
|
|
758
|
+
catch (error) {
|
|
759
|
+
try {
|
|
760
|
+
db.exec('ROLLBACK');
|
|
761
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
762
|
+
}
|
|
763
|
+
catch (rollbackErr) {
|
|
764
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
765
|
+
}
|
|
766
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
767
|
+
throw new MigrationError(`Failed to migrate provenance table from v6 to v7: ${cause}`, 'migrate', 'provenance', error);
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
/**
|
|
771
|
+
* Migrate from schema version 9 to version 10
|
|
772
|
+
*
|
|
773
|
+
* Changes in v10:
|
|
774
|
+
* - embeddings.extraction_id: New column for extraction-sourced embeddings
|
|
775
|
+
* - embeddings CHECK: Now allows extraction_id-only rows
|
|
776
|
+
* - embeddings FK: extraction_id REFERENCES extractions(id)
|
|
777
|
+
* - idx_embeddings_extraction_id: New index
|
|
778
|
+
*
|
|
779
|
+
* @param db - Database instance from better-sqlite3
|
|
780
|
+
* @throws MigrationError if migration fails
|
|
781
|
+
*/
|
|
782
|
+
function migrateV9ToV10(db) {
|
|
783
|
+
try {
|
|
784
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
785
|
+
db.exec('BEGIN TRANSACTION');
|
|
786
|
+
// Step 1: Create new embeddings table with extraction_id + updated CHECK
|
|
787
|
+
db.exec(`
|
|
788
|
+
CREATE TABLE embeddings_new (
|
|
789
|
+
id TEXT PRIMARY KEY,
|
|
790
|
+
chunk_id TEXT,
|
|
791
|
+
image_id TEXT,
|
|
792
|
+
extraction_id TEXT,
|
|
793
|
+
document_id TEXT NOT NULL,
|
|
794
|
+
original_text TEXT NOT NULL,
|
|
795
|
+
original_text_length INTEGER NOT NULL,
|
|
796
|
+
source_file_path TEXT NOT NULL,
|
|
797
|
+
source_file_name TEXT NOT NULL,
|
|
798
|
+
source_file_hash TEXT NOT NULL,
|
|
799
|
+
page_number INTEGER,
|
|
800
|
+
page_range TEXT,
|
|
801
|
+
character_start INTEGER NOT NULL,
|
|
802
|
+
character_end INTEGER NOT NULL,
|
|
803
|
+
chunk_index INTEGER NOT NULL,
|
|
804
|
+
total_chunks INTEGER NOT NULL,
|
|
805
|
+
model_name TEXT NOT NULL,
|
|
806
|
+
model_version TEXT NOT NULL,
|
|
807
|
+
task_type TEXT NOT NULL CHECK (task_type IN ('search_document', 'search_query')),
|
|
808
|
+
inference_mode TEXT NOT NULL CHECK (inference_mode = 'local'),
|
|
809
|
+
gpu_device TEXT,
|
|
810
|
+
provenance_id TEXT NOT NULL UNIQUE,
|
|
811
|
+
content_hash TEXT NOT NULL,
|
|
812
|
+
created_at TEXT NOT NULL,
|
|
813
|
+
generation_duration_ms INTEGER,
|
|
814
|
+
FOREIGN KEY (chunk_id) REFERENCES chunks(id),
|
|
815
|
+
FOREIGN KEY (image_id) REFERENCES images(id),
|
|
816
|
+
FOREIGN KEY (extraction_id) REFERENCES extractions(id),
|
|
817
|
+
FOREIGN KEY (document_id) REFERENCES documents(id),
|
|
818
|
+
FOREIGN KEY (provenance_id) REFERENCES provenance(id),
|
|
819
|
+
CHECK (chunk_id IS NOT NULL OR image_id IS NOT NULL OR extraction_id IS NOT NULL)
|
|
820
|
+
)
|
|
821
|
+
`);
|
|
822
|
+
// Step 2: Copy existing data (extraction_id = NULL for all existing embeddings)
|
|
823
|
+
db.exec(`
|
|
824
|
+
INSERT INTO embeddings_new (
|
|
825
|
+
id, chunk_id, image_id, extraction_id, document_id, original_text, original_text_length,
|
|
826
|
+
source_file_path, source_file_name, source_file_hash, page_number, page_range,
|
|
827
|
+
character_start, character_end, chunk_index, total_chunks, model_name,
|
|
828
|
+
model_version, task_type, inference_mode, gpu_device, provenance_id,
|
|
829
|
+
content_hash, created_at, generation_duration_ms
|
|
830
|
+
)
|
|
831
|
+
SELECT
|
|
832
|
+
id, chunk_id, image_id, NULL, document_id, original_text, original_text_length,
|
|
833
|
+
source_file_path, source_file_name, source_file_hash, page_number, page_range,
|
|
834
|
+
character_start, character_end, chunk_index, total_chunks, model_name,
|
|
835
|
+
model_version, task_type, inference_mode, gpu_device, provenance_id,
|
|
836
|
+
content_hash, created_at, generation_duration_ms
|
|
837
|
+
FROM embeddings
|
|
838
|
+
`);
|
|
839
|
+
// Step 3: Drop old table
|
|
840
|
+
db.exec('DROP TABLE embeddings');
|
|
841
|
+
// Step 4: Rename new table
|
|
842
|
+
db.exec('ALTER TABLE embeddings_new RENAME TO embeddings');
|
|
843
|
+
// Step 5: Recreate all embeddings indexes
|
|
844
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_embeddings_chunk_id ON embeddings(chunk_id)');
|
|
845
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_embeddings_image_id ON embeddings(image_id)');
|
|
846
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_embeddings_extraction_id ON embeddings(extraction_id)');
|
|
847
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_embeddings_document_id ON embeddings(document_id)');
|
|
848
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_embeddings_source_file ON embeddings(source_file_path)');
|
|
849
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_embeddings_page ON embeddings(page_number)');
|
|
850
|
+
// Step 6: Recreate VLM FTS triggers (they reference embeddings table which was recreated)
|
|
851
|
+
// The triggers were lost when the old embeddings table was dropped.
|
|
852
|
+
// Check if vlm_fts exists - if so, recreate its triggers
|
|
853
|
+
const vlmFtsExists = db
|
|
854
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='vlm_fts'")
|
|
855
|
+
.get();
|
|
856
|
+
if (vlmFtsExists) {
|
|
857
|
+
// Drop old triggers if they exist
|
|
858
|
+
db.exec('DROP TRIGGER IF EXISTS vlm_fts_ai');
|
|
859
|
+
db.exec('DROP TRIGGER IF EXISTS vlm_fts_ad');
|
|
860
|
+
db.exec('DROP TRIGGER IF EXISTS vlm_fts_au');
|
|
861
|
+
// Recreate
|
|
862
|
+
db.exec(`CREATE TRIGGER IF NOT EXISTS vlm_fts_ai AFTER INSERT ON embeddings
|
|
863
|
+
WHEN new.image_id IS NOT NULL BEGIN
|
|
864
|
+
INSERT INTO vlm_fts(rowid, original_text) VALUES (new.rowid, new.original_text);
|
|
865
|
+
END`);
|
|
866
|
+
db.exec(`CREATE TRIGGER IF NOT EXISTS vlm_fts_ad AFTER DELETE ON embeddings
|
|
867
|
+
WHEN old.image_id IS NOT NULL BEGIN
|
|
868
|
+
INSERT INTO vlm_fts(vlm_fts, rowid, original_text) VALUES('delete', old.rowid, old.original_text);
|
|
869
|
+
END`);
|
|
870
|
+
db.exec(`CREATE TRIGGER IF NOT EXISTS vlm_fts_au AFTER UPDATE OF original_text ON embeddings
|
|
871
|
+
WHEN new.image_id IS NOT NULL BEGIN
|
|
872
|
+
INSERT INTO vlm_fts(vlm_fts, rowid, original_text) VALUES('delete', old.rowid, old.original_text);
|
|
873
|
+
INSERT INTO vlm_fts(rowid, original_text) VALUES (new.rowid, new.original_text);
|
|
874
|
+
END`);
|
|
875
|
+
}
|
|
876
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
877
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
878
|
+
if (fkViolations.length > 0) {
|
|
879
|
+
throw new Error(`Foreign key integrity check failed after v9->v10 migration: ${fkViolations.length} violation(s). ` +
|
|
880
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
881
|
+
}
|
|
882
|
+
db.exec('COMMIT');
|
|
883
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
884
|
+
}
|
|
885
|
+
catch (error) {
|
|
886
|
+
try {
|
|
887
|
+
db.exec('ROLLBACK');
|
|
888
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
889
|
+
}
|
|
890
|
+
catch (rollbackErr) {
|
|
891
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
892
|
+
}
|
|
893
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
894
|
+
throw new MigrationError(`Failed to migrate from v9 to v10 (extraction embeddings): ${cause}`, 'migrate', 'embeddings', error);
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
/**
|
|
898
|
+
* Migrate from schema version 10 to version 11
|
|
899
|
+
*
|
|
900
|
+
* Changes in v11:
|
|
901
|
+
* - ocr_results.json_blocks: JSON block hierarchy from Datalab
|
|
902
|
+
* - ocr_results.extras_json: Extra metadata (cost_breakdown, Datalab metadata)
|
|
903
|
+
*
|
|
904
|
+
* Uses ALTER TABLE ADD COLUMN (nullable TEXT columns, no table recreation needed).
|
|
905
|
+
*
|
|
906
|
+
* @param db - Database instance from better-sqlite3
|
|
907
|
+
* @throws MigrationError if migration fails
|
|
908
|
+
*/
|
|
909
|
+
function migrateV10ToV11(db) {
|
|
910
|
+
try {
|
|
911
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
912
|
+
const columns = db.prepare('PRAGMA table_info(ocr_results)').all();
|
|
913
|
+
const names = new Set(columns.map((c) => c.name));
|
|
914
|
+
const transaction = db.transaction(() => {
|
|
915
|
+
if (!names.has('json_blocks')) {
|
|
916
|
+
db.exec('ALTER TABLE ocr_results ADD COLUMN json_blocks TEXT');
|
|
917
|
+
}
|
|
918
|
+
if (!names.has('extras_json')) {
|
|
919
|
+
db.exec('ALTER TABLE ocr_results ADD COLUMN extras_json TEXT');
|
|
920
|
+
}
|
|
921
|
+
// M-5: FK integrity check inside transaction so violations cause rollback
|
|
922
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
923
|
+
if (fkViolations.length > 0) {
|
|
924
|
+
throw new Error(`Foreign key integrity check failed after v10->v11 migration: ${fkViolations.length} violation(s). ` +
|
|
925
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
926
|
+
}
|
|
927
|
+
});
|
|
928
|
+
transaction();
|
|
929
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
930
|
+
}
|
|
931
|
+
catch (error) {
|
|
932
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
933
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
934
|
+
throw new MigrationError(`Failed to migrate from v10 to v11 (json_blocks, extras_json): ${cause}`, 'migrate', 'ocr_results', error);
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
/**
|
|
938
|
+
* Migrate database to the latest schema version
|
|
939
|
+
*
|
|
940
|
+
* Checks current version and applies any necessary migrations.
|
|
941
|
+
*
|
|
942
|
+
* @param db - Database instance from better-sqlite3
|
|
943
|
+
* @throws MigrationError if migration fails
|
|
944
|
+
*/
|
|
945
|
+
export function migrateToLatest(db) {
|
|
946
|
+
const currentVersion = checkSchemaVersion(db);
|
|
947
|
+
if (currentVersion === 0) {
|
|
948
|
+
// Fresh database - initialize everything
|
|
949
|
+
initializeDatabase(db);
|
|
950
|
+
return;
|
|
951
|
+
}
|
|
952
|
+
if (currentVersion === SCHEMA_VERSION) {
|
|
953
|
+
// Already at latest version
|
|
954
|
+
return;
|
|
955
|
+
}
|
|
956
|
+
if (currentVersion > SCHEMA_VERSION) {
|
|
957
|
+
throw new MigrationError(`Database schema version (${String(currentVersion)}) is newer than supported version (${String(SCHEMA_VERSION)}). ` +
|
|
958
|
+
'Please update the application.', 'version_check', undefined);
|
|
959
|
+
}
|
|
960
|
+
// Helper to bump schema_version immediately after each successful migration step.
|
|
961
|
+
// This ensures crash-safety: if the process dies between migrations, only the
|
|
962
|
+
// remaining migrations re-run on restart (MIG-1 fix).
|
|
963
|
+
const bumpVersion = (targetVersion) => {
|
|
964
|
+
try {
|
|
965
|
+
db.prepare('UPDATE schema_version SET version = ?, updated_at = ? WHERE id = 1').run(targetVersion, new Date().toISOString());
|
|
966
|
+
}
|
|
967
|
+
catch (error) {
|
|
968
|
+
throw new MigrationError(`Failed to update schema version to ${String(targetVersion)} after migration`, 'update', 'schema_version', error);
|
|
969
|
+
}
|
|
970
|
+
};
|
|
971
|
+
// Apply migrations incrementally, bumping version after each step
|
|
972
|
+
if (currentVersion < 2) {
|
|
973
|
+
migrateV1ToV2(db);
|
|
974
|
+
bumpVersion(2);
|
|
975
|
+
}
|
|
976
|
+
if (currentVersion < 3) {
|
|
977
|
+
migrateV2ToV3(db);
|
|
978
|
+
bumpVersion(3);
|
|
979
|
+
}
|
|
980
|
+
if (currentVersion < 4) {
|
|
981
|
+
migrateV3ToV4(db);
|
|
982
|
+
bumpVersion(4);
|
|
983
|
+
}
|
|
984
|
+
if (currentVersion < 5) {
|
|
985
|
+
migrateV4ToV5(db);
|
|
986
|
+
bumpVersion(5);
|
|
987
|
+
}
|
|
988
|
+
if (currentVersion < 6) {
|
|
989
|
+
migrateV5ToV6(db);
|
|
990
|
+
bumpVersion(6);
|
|
991
|
+
}
|
|
992
|
+
if (currentVersion < 7) {
|
|
993
|
+
migrateV6ToV7(db);
|
|
994
|
+
bumpVersion(7);
|
|
995
|
+
}
|
|
996
|
+
if (currentVersion < 8) {
|
|
997
|
+
migrateV7ToV8(db);
|
|
998
|
+
bumpVersion(8);
|
|
999
|
+
}
|
|
1000
|
+
if (currentVersion < 9) {
|
|
1001
|
+
migrateV8ToV9(db);
|
|
1002
|
+
bumpVersion(9);
|
|
1003
|
+
}
|
|
1004
|
+
if (currentVersion < 10) {
|
|
1005
|
+
migrateV9ToV10(db);
|
|
1006
|
+
bumpVersion(10);
|
|
1007
|
+
}
|
|
1008
|
+
if (currentVersion < 11) {
|
|
1009
|
+
migrateV10ToV11(db);
|
|
1010
|
+
bumpVersion(11);
|
|
1011
|
+
}
|
|
1012
|
+
if (currentVersion < 12) {
|
|
1013
|
+
migrateV11ToV12(db);
|
|
1014
|
+
bumpVersion(12);
|
|
1015
|
+
}
|
|
1016
|
+
if (currentVersion < 13) {
|
|
1017
|
+
migrateV12ToV13(db);
|
|
1018
|
+
bumpVersion(13);
|
|
1019
|
+
}
|
|
1020
|
+
if (currentVersion < 14) {
|
|
1021
|
+
migrateV13ToV14(db);
|
|
1022
|
+
bumpVersion(14);
|
|
1023
|
+
}
|
|
1024
|
+
if (currentVersion < 15) {
|
|
1025
|
+
migrateV14ToV15(db);
|
|
1026
|
+
bumpVersion(15);
|
|
1027
|
+
}
|
|
1028
|
+
if (currentVersion < 16) {
|
|
1029
|
+
migrateV15ToV16(db);
|
|
1030
|
+
bumpVersion(16);
|
|
1031
|
+
}
|
|
1032
|
+
if (currentVersion < 17) {
|
|
1033
|
+
migrateV16ToV17(db);
|
|
1034
|
+
bumpVersion(17);
|
|
1035
|
+
}
|
|
1036
|
+
if (currentVersion < 18) {
|
|
1037
|
+
migrateV17ToV18(db);
|
|
1038
|
+
bumpVersion(18);
|
|
1039
|
+
}
|
|
1040
|
+
if (currentVersion < 19) {
|
|
1041
|
+
migrateV18ToV19(db);
|
|
1042
|
+
bumpVersion(19);
|
|
1043
|
+
}
|
|
1044
|
+
if (currentVersion < 20) {
|
|
1045
|
+
migrateV19ToV20(db);
|
|
1046
|
+
bumpVersion(20);
|
|
1047
|
+
}
|
|
1048
|
+
if (currentVersion < 21) {
|
|
1049
|
+
migrateV20ToV21(db);
|
|
1050
|
+
bumpVersion(21);
|
|
1051
|
+
}
|
|
1052
|
+
if (currentVersion < 22) {
|
|
1053
|
+
migrateV21ToV22(db);
|
|
1054
|
+
bumpVersion(22);
|
|
1055
|
+
}
|
|
1056
|
+
if (currentVersion < 23) {
|
|
1057
|
+
migrateV22ToV23(db);
|
|
1058
|
+
bumpVersion(23);
|
|
1059
|
+
}
|
|
1060
|
+
if (currentVersion < 24) {
|
|
1061
|
+
migrateV23ToV24(db);
|
|
1062
|
+
bumpVersion(24);
|
|
1063
|
+
}
|
|
1064
|
+
if (currentVersion < 25) {
|
|
1065
|
+
migrateV24ToV25(db);
|
|
1066
|
+
bumpVersion(25);
|
|
1067
|
+
}
|
|
1068
|
+
if (currentVersion < 26) {
|
|
1069
|
+
migrateV25ToV26(db);
|
|
1070
|
+
bumpVersion(26);
|
|
1071
|
+
}
|
|
1072
|
+
if (currentVersion < 27) {
|
|
1073
|
+
migrateV26ToV27(db);
|
|
1074
|
+
bumpVersion(27);
|
|
1075
|
+
}
|
|
1076
|
+
if (currentVersion < 28) {
|
|
1077
|
+
migrateV27ToV28(db);
|
|
1078
|
+
bumpVersion(28);
|
|
1079
|
+
}
|
|
1080
|
+
if (currentVersion < 29) {
|
|
1081
|
+
migrateV28ToV29(db);
|
|
1082
|
+
bumpVersion(29);
|
|
1083
|
+
}
|
|
1084
|
+
if (currentVersion < 30) {
|
|
1085
|
+
migrateV29ToV30(db);
|
|
1086
|
+
bumpVersion(30);
|
|
1087
|
+
}
|
|
1088
|
+
if (currentVersion < 31) {
|
|
1089
|
+
// M-6: bumpVersion is passed into migrateV30ToV31 so it runs inside the
|
|
1090
|
+
// same transaction as the migration body, making them atomic.
|
|
1091
|
+
migrateV30ToV31(db, bumpVersion);
|
|
1092
|
+
}
|
|
1093
|
+
if (currentVersion < 32) {
|
|
1094
|
+
migrateV31ToV32(db);
|
|
1095
|
+
bumpVersion(32);
|
|
1096
|
+
}
|
|
1097
|
+
}
|
|
1098
|
+
/**
|
|
1099
|
+
* Migrate from schema version 7 to version 8
|
|
1100
|
+
*
|
|
1101
|
+
* Changes in v8:
|
|
1102
|
+
* - extractions: New table for structured data extracted via page_schema
|
|
1103
|
+
* - form_fills: New table for Datalab /fill API results
|
|
1104
|
+
* - documents: Added doc_title, doc_author, doc_subject columns
|
|
1105
|
+
* - provenance.type: Added 'EXTRACTION', 'FORM_FILL' to CHECK constraint
|
|
1106
|
+
* - provenance.source_type: Added 'EXTRACTION', 'FORM_FILL' to CHECK constraint
|
|
1107
|
+
* - New indexes: idx_extractions_document_id, idx_form_fills_status, idx_documents_doc_title
|
|
1108
|
+
*
|
|
1109
|
+
* @param db - Database instance from better-sqlite3
|
|
1110
|
+
* @throws MigrationError if migration fails
|
|
1111
|
+
*/
|
|
1112
|
+
function migrateV7ToV8(db) {
|
|
1113
|
+
try {
|
|
1114
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
1115
|
+
db.exec('BEGIN TRANSACTION');
|
|
1116
|
+
// Step 1: Create new tables
|
|
1117
|
+
db.exec(CREATE_EXTRACTIONS_TABLE);
|
|
1118
|
+
db.exec(CREATE_FORM_FILLS_TABLE);
|
|
1119
|
+
// Step 2: Add new columns to documents table
|
|
1120
|
+
const columns = db.prepare('PRAGMA table_info(documents)').all();
|
|
1121
|
+
const columnNames = new Set(columns.map((c) => c.name));
|
|
1122
|
+
if (!columnNames.has('doc_title')) {
|
|
1123
|
+
db.exec('ALTER TABLE documents ADD COLUMN doc_title TEXT');
|
|
1124
|
+
}
|
|
1125
|
+
if (!columnNames.has('doc_author')) {
|
|
1126
|
+
db.exec('ALTER TABLE documents ADD COLUMN doc_author TEXT');
|
|
1127
|
+
}
|
|
1128
|
+
if (!columnNames.has('doc_subject')) {
|
|
1129
|
+
db.exec('ALTER TABLE documents ADD COLUMN doc_subject TEXT');
|
|
1130
|
+
}
|
|
1131
|
+
// Step 3: Create new indexes
|
|
1132
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_extractions_document_id ON extractions(document_id)');
|
|
1133
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_form_fills_status ON form_fills(status)');
|
|
1134
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_documents_doc_title ON documents(doc_title)');
|
|
1135
|
+
// Step 4: Recreate provenance table with EXTRACTION and FORM_FILL in CHECK constraints
|
|
1136
|
+
db.exec(`
|
|
1137
|
+
CREATE TABLE provenance_new (
|
|
1138
|
+
id TEXT PRIMARY KEY,
|
|
1139
|
+
type TEXT NOT NULL CHECK (type IN ('DOCUMENT', 'OCR_RESULT', 'CHUNK', 'IMAGE', 'VLM_DESCRIPTION', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL')),
|
|
1140
|
+
created_at TEXT NOT NULL,
|
|
1141
|
+
processed_at TEXT NOT NULL,
|
|
1142
|
+
source_file_created_at TEXT,
|
|
1143
|
+
source_file_modified_at TEXT,
|
|
1144
|
+
source_type TEXT NOT NULL CHECK (source_type IN ('FILE', 'OCR', 'CHUNKING', 'IMAGE_EXTRACTION', 'VLM', 'VLM_DEDUP', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL')),
|
|
1145
|
+
source_path TEXT,
|
|
1146
|
+
source_id TEXT,
|
|
1147
|
+
root_document_id TEXT NOT NULL,
|
|
1148
|
+
location TEXT,
|
|
1149
|
+
content_hash TEXT NOT NULL,
|
|
1150
|
+
input_hash TEXT,
|
|
1151
|
+
file_hash TEXT,
|
|
1152
|
+
processor TEXT NOT NULL,
|
|
1153
|
+
processor_version TEXT NOT NULL,
|
|
1154
|
+
processing_params TEXT NOT NULL,
|
|
1155
|
+
processing_duration_ms INTEGER,
|
|
1156
|
+
processing_quality_score REAL,
|
|
1157
|
+
parent_id TEXT,
|
|
1158
|
+
parent_ids TEXT NOT NULL,
|
|
1159
|
+
chain_depth INTEGER NOT NULL,
|
|
1160
|
+
chain_path TEXT,
|
|
1161
|
+
FOREIGN KEY (source_id) REFERENCES provenance_new(id),
|
|
1162
|
+
FOREIGN KEY (parent_id) REFERENCES provenance_new(id)
|
|
1163
|
+
)
|
|
1164
|
+
`);
|
|
1165
|
+
// Step 5: Copy existing provenance data
|
|
1166
|
+
db.exec(`
|
|
1167
|
+
INSERT INTO provenance_new
|
|
1168
|
+
SELECT * FROM provenance
|
|
1169
|
+
`);
|
|
1170
|
+
// Step 6: Drop old provenance table
|
|
1171
|
+
db.exec('DROP TABLE provenance');
|
|
1172
|
+
// Step 7: Rename new table
|
|
1173
|
+
db.exec('ALTER TABLE provenance_new RENAME TO provenance');
|
|
1174
|
+
// Step 8: Recreate provenance indexes
|
|
1175
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_source_id ON provenance(source_id)');
|
|
1176
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_type ON provenance(type)');
|
|
1177
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_root_document_id ON provenance(root_document_id)');
|
|
1178
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_parent_id ON provenance(parent_id)');
|
|
1179
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
1180
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
1181
|
+
if (fkViolations.length > 0) {
|
|
1182
|
+
throw new Error(`Foreign key integrity check failed after v7->v8 migration: ${fkViolations.length} violation(s). ` +
|
|
1183
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
1184
|
+
}
|
|
1185
|
+
db.exec('COMMIT');
|
|
1186
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1187
|
+
}
|
|
1188
|
+
catch (error) {
|
|
1189
|
+
try {
|
|
1190
|
+
db.exec('ROLLBACK');
|
|
1191
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1192
|
+
}
|
|
1193
|
+
catch (rollbackErr) {
|
|
1194
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
1195
|
+
}
|
|
1196
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
1197
|
+
throw new MigrationError(`Failed to migrate from v7 to v8 (extractions, form_fills, doc metadata): ${cause}`, 'migrate', 'provenance', error);
|
|
1198
|
+
}
|
|
1199
|
+
}
|
|
1200
|
+
/**
|
|
1201
|
+
* Migrate from schema version 8 to version 9
|
|
1202
|
+
*
|
|
1203
|
+
* Changes in v9:
|
|
1204
|
+
* - extractions_fts: FTS5 virtual table for extraction content full-text search
|
|
1205
|
+
* - extractions_fts_ai/ad/au: Sync triggers on extractions table
|
|
1206
|
+
* - fts_index_metadata id=3: Extraction FTS metadata row
|
|
1207
|
+
* - form_fills.cost_cents: Changed from INTEGER to REAL (fractional cents)
|
|
1208
|
+
*
|
|
1209
|
+
* @param db - Database instance from better-sqlite3
|
|
1210
|
+
* @throws MigrationError if migration fails
|
|
1211
|
+
*/
|
|
1212
|
+
function migrateV8ToV9(db) {
|
|
1213
|
+
try {
|
|
1214
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
1215
|
+
db.exec('BEGIN TRANSACTION');
|
|
1216
|
+
// Step 1: Create extractions FTS5 virtual table
|
|
1217
|
+
db.exec(CREATE_EXTRACTIONS_FTS_TABLE);
|
|
1218
|
+
// Step 2: Create extractions FTS sync triggers
|
|
1219
|
+
for (const trigger of CREATE_EXTRACTIONS_FTS_TRIGGERS) {
|
|
1220
|
+
db.exec(trigger);
|
|
1221
|
+
}
|
|
1222
|
+
// Step 3: Populate FTS from existing extractions
|
|
1223
|
+
db.exec("INSERT INTO extractions_fts(extractions_fts) VALUES('rebuild')");
|
|
1224
|
+
// Step 4: Add extraction FTS metadata row (id=3)
|
|
1225
|
+
const now = new Date().toISOString();
|
|
1226
|
+
const extractionCount = db.prepare('SELECT COUNT(*) as cnt FROM extractions').get().cnt;
|
|
1227
|
+
db.prepare(`
|
|
1228
|
+
INSERT OR IGNORE INTO fts_index_metadata (id, last_rebuild_at, chunks_indexed, tokenizer, schema_version, content_hash)
|
|
1229
|
+
VALUES (3, ?, ?, 'porter unicode61', 9, NULL)
|
|
1230
|
+
`).run(now, extractionCount);
|
|
1231
|
+
// Step 5: Recreate form_fills with cost_cents REAL (was INTEGER)
|
|
1232
|
+
db.exec(`
|
|
1233
|
+
CREATE TABLE form_fills_new (
|
|
1234
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
1235
|
+
source_file_path TEXT NOT NULL,
|
|
1236
|
+
source_file_hash TEXT NOT NULL,
|
|
1237
|
+
field_data_json TEXT NOT NULL,
|
|
1238
|
+
context TEXT,
|
|
1239
|
+
confidence_threshold REAL NOT NULL DEFAULT 0.5,
|
|
1240
|
+
output_file_path TEXT,
|
|
1241
|
+
output_base64 TEXT,
|
|
1242
|
+
fields_filled TEXT NOT NULL DEFAULT '[]',
|
|
1243
|
+
fields_not_found TEXT NOT NULL DEFAULT '[]',
|
|
1244
|
+
page_count INTEGER,
|
|
1245
|
+
cost_cents REAL,
|
|
1246
|
+
status TEXT NOT NULL CHECK(status IN ('pending', 'processing', 'complete', 'failed')),
|
|
1247
|
+
error_message TEXT,
|
|
1248
|
+
provenance_id TEXT NOT NULL REFERENCES provenance(id),
|
|
1249
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
1250
|
+
)
|
|
1251
|
+
`);
|
|
1252
|
+
db.exec('INSERT INTO form_fills_new SELECT * FROM form_fills');
|
|
1253
|
+
db.exec('DROP TABLE form_fills');
|
|
1254
|
+
db.exec('ALTER TABLE form_fills_new RENAME TO form_fills');
|
|
1255
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_form_fills_status ON form_fills(status)');
|
|
1256
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
1257
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
1258
|
+
if (fkViolations.length > 0) {
|
|
1259
|
+
throw new Error(`Foreign key integrity check failed after v8->v9 migration: ${fkViolations.length} violation(s). ` +
|
|
1260
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
1261
|
+
}
|
|
1262
|
+
db.exec('COMMIT');
|
|
1263
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1264
|
+
}
|
|
1265
|
+
catch (error) {
|
|
1266
|
+
try {
|
|
1267
|
+
db.exec('ROLLBACK');
|
|
1268
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1269
|
+
}
|
|
1270
|
+
catch (rollbackErr) {
|
|
1271
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
1272
|
+
}
|
|
1273
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
1274
|
+
throw new MigrationError(`Failed to migrate from v8 to v9 (extractions FTS, cost_cents REAL): ${cause}`, 'migrate', 'extractions_fts', error);
|
|
1275
|
+
}
|
|
1276
|
+
}
|
|
1277
|
+
/**
|
|
1278
|
+
* Migrate from schema version 11 to version 12
|
|
1279
|
+
*
|
|
1280
|
+
* Changes in v12:
|
|
1281
|
+
* - uploaded_files: New table for Datalab cloud file uploads
|
|
1282
|
+
* - documents.datalab_file_id: New column linking documents to uploaded files
|
|
1283
|
+
* - 3 new indexes: idx_uploaded_files_file_hash, idx_uploaded_files_status, idx_uploaded_files_datalab_file_id
|
|
1284
|
+
*
|
|
1285
|
+
* @param db - Database instance from better-sqlite3
|
|
1286
|
+
* @throws MigrationError if migration fails
|
|
1287
|
+
*/
|
|
1288
|
+
function migrateV11ToV12(db) {
|
|
1289
|
+
try {
|
|
1290
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
1291
|
+
const transaction = db.transaction(() => {
|
|
1292
|
+
// Create uploaded_files table
|
|
1293
|
+
db.exec(CREATE_UPLOADED_FILES_TABLE);
|
|
1294
|
+
// Create indexes
|
|
1295
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_uploaded_files_file_hash ON uploaded_files(file_hash)');
|
|
1296
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_uploaded_files_status ON uploaded_files(upload_status)');
|
|
1297
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_uploaded_files_datalab_file_id ON uploaded_files(datalab_file_id)');
|
|
1298
|
+
// Add datalab_file_id column to documents
|
|
1299
|
+
const columns = db.prepare('PRAGMA table_info(documents)').all();
|
|
1300
|
+
if (!columns.some((c) => c.name === 'datalab_file_id')) {
|
|
1301
|
+
db.exec('ALTER TABLE documents ADD COLUMN datalab_file_id TEXT');
|
|
1302
|
+
}
|
|
1303
|
+
// M-5: FK integrity check inside transaction so violations cause rollback
|
|
1304
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
1305
|
+
if (fkViolations.length > 0) {
|
|
1306
|
+
throw new Error(`Foreign key integrity check failed after v11->v12 migration: ${fkViolations.length} violation(s). ` +
|
|
1307
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
1308
|
+
}
|
|
1309
|
+
});
|
|
1310
|
+
transaction();
|
|
1311
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1312
|
+
}
|
|
1313
|
+
catch (error) {
|
|
1314
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1315
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
1316
|
+
throw new MigrationError(`Failed to migrate from v11 to v12 (uploaded_files): ${cause}`, 'migrate', 'uploaded_files', error);
|
|
1317
|
+
}
|
|
1318
|
+
}
|
|
1319
|
+
/**
|
|
1320
|
+
* Migrate from schema version 12 to version 13
|
|
1321
|
+
*
|
|
1322
|
+
* Changes in v13:
|
|
1323
|
+
* - provenance.type: Added 'ENTITY_EXTRACTION' to CHECK constraint
|
|
1324
|
+
* - provenance.source_type: Added 'ENTITY_EXTRACTION' to CHECK constraint
|
|
1325
|
+
* - entities: New table for named entities extracted from documents
|
|
1326
|
+
* - entity_mentions: New table for entity occurrence tracking
|
|
1327
|
+
* - 4 new indexes: idx_entities_document_id, idx_entities_entity_type,
|
|
1328
|
+
* idx_entities_normalized_text, idx_entity_mentions_entity_id
|
|
1329
|
+
*
|
|
1330
|
+
* @param db - Database instance from better-sqlite3
|
|
1331
|
+
* @throws MigrationError if migration fails
|
|
1332
|
+
*/
|
|
1333
|
+
function migrateV12ToV13(db) {
|
|
1334
|
+
try {
|
|
1335
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
1336
|
+
db.exec('BEGIN TRANSACTION');
|
|
1337
|
+
// Step 1: Recreate provenance table with ENTITY_EXTRACTION in CHECK constraints
|
|
1338
|
+
db.exec(`
|
|
1339
|
+
CREATE TABLE provenance_new (
|
|
1340
|
+
id TEXT PRIMARY KEY,
|
|
1341
|
+
type TEXT NOT NULL CHECK (type IN ('DOCUMENT', 'OCR_RESULT', 'CHUNK', 'IMAGE', 'VLM_DESCRIPTION', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL', 'ENTITY_EXTRACTION')),
|
|
1342
|
+
created_at TEXT NOT NULL,
|
|
1343
|
+
processed_at TEXT NOT NULL,
|
|
1344
|
+
source_file_created_at TEXT,
|
|
1345
|
+
source_file_modified_at TEXT,
|
|
1346
|
+
source_type TEXT NOT NULL CHECK (source_type IN ('FILE', 'OCR', 'CHUNKING', 'IMAGE_EXTRACTION', 'VLM', 'VLM_DEDUP', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL', 'ENTITY_EXTRACTION')),
|
|
1347
|
+
source_path TEXT,
|
|
1348
|
+
source_id TEXT,
|
|
1349
|
+
root_document_id TEXT NOT NULL,
|
|
1350
|
+
location TEXT,
|
|
1351
|
+
content_hash TEXT NOT NULL,
|
|
1352
|
+
input_hash TEXT,
|
|
1353
|
+
file_hash TEXT,
|
|
1354
|
+
processor TEXT NOT NULL,
|
|
1355
|
+
processor_version TEXT NOT NULL,
|
|
1356
|
+
processing_params TEXT NOT NULL,
|
|
1357
|
+
processing_duration_ms INTEGER,
|
|
1358
|
+
processing_quality_score REAL,
|
|
1359
|
+
parent_id TEXT,
|
|
1360
|
+
parent_ids TEXT NOT NULL,
|
|
1361
|
+
chain_depth INTEGER NOT NULL,
|
|
1362
|
+
chain_path TEXT,
|
|
1363
|
+
FOREIGN KEY (source_id) REFERENCES provenance_new(id),
|
|
1364
|
+
FOREIGN KEY (parent_id) REFERENCES provenance_new(id)
|
|
1365
|
+
)
|
|
1366
|
+
`);
|
|
1367
|
+
// Step 2: Copy existing data
|
|
1368
|
+
db.exec('INSERT INTO provenance_new SELECT * FROM provenance');
|
|
1369
|
+
// Step 3: Drop old table
|
|
1370
|
+
db.exec('DROP TABLE provenance');
|
|
1371
|
+
// Step 4: Rename new table
|
|
1372
|
+
db.exec('ALTER TABLE provenance_new RENAME TO provenance');
|
|
1373
|
+
// Step 5: Recreate provenance indexes
|
|
1374
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_source_id ON provenance(source_id)');
|
|
1375
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_type ON provenance(type)');
|
|
1376
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_root_document_id ON provenance(root_document_id)');
|
|
1377
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_parent_id ON provenance(parent_id)');
|
|
1378
|
+
// Step 6: Create entities and entity_mentions tables
|
|
1379
|
+
db.exec(CREATE_ENTITIES_TABLE);
|
|
1380
|
+
db.exec(CREATE_ENTITY_MENTIONS_TABLE);
|
|
1381
|
+
// Step 7: Create indexes for new tables
|
|
1382
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entities_document_id ON entities(document_id)');
|
|
1383
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entities_entity_type ON entities(entity_type)');
|
|
1384
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entities_normalized_text ON entities(normalized_text)');
|
|
1385
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entity_mentions_entity_id ON entity_mentions(entity_id)');
|
|
1386
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
1387
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
1388
|
+
if (fkViolations.length > 0) {
|
|
1389
|
+
throw new Error(`Foreign key integrity check failed after v12->v13 migration: ${fkViolations.length} violation(s). ` +
|
|
1390
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
1391
|
+
}
|
|
1392
|
+
db.exec('COMMIT');
|
|
1393
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1394
|
+
}
|
|
1395
|
+
catch (error) {
|
|
1396
|
+
try {
|
|
1397
|
+
db.exec('ROLLBACK');
|
|
1398
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1399
|
+
}
|
|
1400
|
+
catch (rollbackErr) {
|
|
1401
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
1402
|
+
}
|
|
1403
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
1404
|
+
throw new MigrationError(`Failed to migrate from v12 to v13 (entity extraction): ${cause}`, 'migrate', 'provenance', error);
|
|
1405
|
+
}
|
|
1406
|
+
}
|
|
1407
|
+
/**
|
|
1408
|
+
* Migrate from schema version 13 to version 14
|
|
1409
|
+
*
|
|
1410
|
+
* Changes in v14:
|
|
1411
|
+
* - provenance.type: Added 'COMPARISON' to CHECK constraint
|
|
1412
|
+
* - provenance.source_type: Added 'COMPARISON' to CHECK constraint
|
|
1413
|
+
* - comparisons: New table for document comparison results
|
|
1414
|
+
* - 3 new indexes: idx_comparisons_doc1, idx_comparisons_doc2, idx_comparisons_created
|
|
1415
|
+
*
|
|
1416
|
+
* @param db - Database instance from better-sqlite3
|
|
1417
|
+
* @throws MigrationError if migration fails
|
|
1418
|
+
*/
|
|
1419
|
+
function migrateV13ToV14(db) {
|
|
1420
|
+
try {
|
|
1421
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
1422
|
+
db.exec('BEGIN TRANSACTION');
|
|
1423
|
+
// Step 1: Recreate provenance table with COMPARISON in CHECK constraints
|
|
1424
|
+
db.exec(`
|
|
1425
|
+
CREATE TABLE provenance_new (
|
|
1426
|
+
id TEXT PRIMARY KEY,
|
|
1427
|
+
type TEXT NOT NULL CHECK (type IN ('DOCUMENT', 'OCR_RESULT', 'CHUNK', 'IMAGE', 'VLM_DESCRIPTION', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL', 'ENTITY_EXTRACTION', 'COMPARISON')),
|
|
1428
|
+
created_at TEXT NOT NULL,
|
|
1429
|
+
processed_at TEXT NOT NULL,
|
|
1430
|
+
source_file_created_at TEXT,
|
|
1431
|
+
source_file_modified_at TEXT,
|
|
1432
|
+
source_type TEXT NOT NULL CHECK (source_type IN ('FILE', 'OCR', 'CHUNKING', 'IMAGE_EXTRACTION', 'VLM', 'VLM_DEDUP', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL', 'ENTITY_EXTRACTION', 'COMPARISON')),
|
|
1433
|
+
source_path TEXT,
|
|
1434
|
+
source_id TEXT,
|
|
1435
|
+
root_document_id TEXT NOT NULL,
|
|
1436
|
+
location TEXT,
|
|
1437
|
+
content_hash TEXT NOT NULL,
|
|
1438
|
+
input_hash TEXT,
|
|
1439
|
+
file_hash TEXT,
|
|
1440
|
+
processor TEXT NOT NULL,
|
|
1441
|
+
processor_version TEXT NOT NULL,
|
|
1442
|
+
processing_params TEXT NOT NULL,
|
|
1443
|
+
processing_duration_ms INTEGER,
|
|
1444
|
+
processing_quality_score REAL,
|
|
1445
|
+
parent_id TEXT,
|
|
1446
|
+
parent_ids TEXT NOT NULL,
|
|
1447
|
+
chain_depth INTEGER NOT NULL,
|
|
1448
|
+
chain_path TEXT,
|
|
1449
|
+
FOREIGN KEY (source_id) REFERENCES provenance_new(id),
|
|
1450
|
+
FOREIGN KEY (parent_id) REFERENCES provenance_new(id)
|
|
1451
|
+
)
|
|
1452
|
+
`);
|
|
1453
|
+
// Step 2: Copy existing data
|
|
1454
|
+
db.exec('INSERT INTO provenance_new SELECT * FROM provenance');
|
|
1455
|
+
// Step 3: Drop old table
|
|
1456
|
+
db.exec('DROP TABLE provenance');
|
|
1457
|
+
// Step 4: Rename new table
|
|
1458
|
+
db.exec('ALTER TABLE provenance_new RENAME TO provenance');
|
|
1459
|
+
// Step 5: Recreate provenance indexes
|
|
1460
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_source_id ON provenance(source_id)');
|
|
1461
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_type ON provenance(type)');
|
|
1462
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_root_document_id ON provenance(root_document_id)');
|
|
1463
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_parent_id ON provenance(parent_id)');
|
|
1464
|
+
// Step 6: Create comparisons table
|
|
1465
|
+
db.exec(CREATE_COMPARISONS_TABLE);
|
|
1466
|
+
// Step 7: Create indexes for comparisons table
|
|
1467
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_comparisons_doc1 ON comparisons(document_id_1)');
|
|
1468
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_comparisons_doc2 ON comparisons(document_id_2)');
|
|
1469
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_comparisons_created ON comparisons(created_at)');
|
|
1470
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
1471
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
1472
|
+
if (fkViolations.length > 0) {
|
|
1473
|
+
throw new Error(`Foreign key integrity check failed after v13->v14 migration: ${fkViolations.length} violation(s). ` +
|
|
1474
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
1475
|
+
}
|
|
1476
|
+
db.exec('COMMIT');
|
|
1477
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1478
|
+
}
|
|
1479
|
+
catch (error) {
|
|
1480
|
+
try {
|
|
1481
|
+
db.exec('ROLLBACK');
|
|
1482
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1483
|
+
}
|
|
1484
|
+
catch (rollbackErr) {
|
|
1485
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
1486
|
+
}
|
|
1487
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
1488
|
+
throw new MigrationError(`Failed to migrate from v13 to v14 (document comparison): ${cause}`, 'migrate', 'provenance', error);
|
|
1489
|
+
}
|
|
1490
|
+
}
|
|
1491
|
+
/**
|
|
1492
|
+
* Migrate from schema version 15 to version 16
|
|
1493
|
+
*
|
|
1494
|
+
* Changes in v16:
|
|
1495
|
+
* - provenance.type: Added 'KNOWLEDGE_GRAPH' to CHECK constraint
|
|
1496
|
+
* - provenance.source_type: Added 'KNOWLEDGE_GRAPH' to CHECK constraint
|
|
1497
|
+
* - knowledge_nodes: New table for unified entities resolved across documents
|
|
1498
|
+
* - knowledge_edges: New table for relationships between knowledge nodes
|
|
1499
|
+
* - node_entity_links: New table linking knowledge nodes to source entity extractions
|
|
1500
|
+
* - 8 new indexes: idx_kn_entity_type, idx_kn_normalized_name, idx_kn_document_count,
|
|
1501
|
+
* idx_ke_source_node, idx_ke_target_node, idx_ke_relationship_type,
|
|
1502
|
+
* idx_nel_node_id, idx_nel_document_id
|
|
1503
|
+
*
|
|
1504
|
+
* @param db - Database instance from better-sqlite3
|
|
1505
|
+
* @throws MigrationError if migration fails
|
|
1506
|
+
*/
|
|
1507
|
+
function migrateV15ToV16(db) {
|
|
1508
|
+
try {
|
|
1509
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
1510
|
+
db.exec('BEGIN TRANSACTION');
|
|
1511
|
+
// Step 1: Recreate provenance table with KNOWLEDGE_GRAPH in CHECK constraints
|
|
1512
|
+
db.exec(`
|
|
1513
|
+
CREATE TABLE provenance_new (
|
|
1514
|
+
id TEXT PRIMARY KEY,
|
|
1515
|
+
type TEXT NOT NULL CHECK (type IN ('DOCUMENT', 'OCR_RESULT', 'CHUNK', 'IMAGE', 'VLM_DESCRIPTION', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL', 'ENTITY_EXTRACTION', 'COMPARISON', 'CLUSTERING', 'KNOWLEDGE_GRAPH')),
|
|
1516
|
+
created_at TEXT NOT NULL,
|
|
1517
|
+
processed_at TEXT NOT NULL,
|
|
1518
|
+
source_file_created_at TEXT,
|
|
1519
|
+
source_file_modified_at TEXT,
|
|
1520
|
+
source_type TEXT NOT NULL CHECK (source_type IN ('FILE', 'OCR', 'CHUNKING', 'IMAGE_EXTRACTION', 'VLM', 'VLM_DEDUP', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL', 'ENTITY_EXTRACTION', 'COMPARISON', 'CLUSTERING', 'KNOWLEDGE_GRAPH')),
|
|
1521
|
+
source_path TEXT,
|
|
1522
|
+
source_id TEXT,
|
|
1523
|
+
root_document_id TEXT NOT NULL,
|
|
1524
|
+
location TEXT,
|
|
1525
|
+
content_hash TEXT NOT NULL,
|
|
1526
|
+
input_hash TEXT,
|
|
1527
|
+
file_hash TEXT,
|
|
1528
|
+
processor TEXT NOT NULL,
|
|
1529
|
+
processor_version TEXT NOT NULL,
|
|
1530
|
+
processing_params TEXT NOT NULL,
|
|
1531
|
+
processing_duration_ms INTEGER,
|
|
1532
|
+
processing_quality_score REAL,
|
|
1533
|
+
parent_id TEXT,
|
|
1534
|
+
parent_ids TEXT NOT NULL,
|
|
1535
|
+
chain_depth INTEGER NOT NULL,
|
|
1536
|
+
chain_path TEXT,
|
|
1537
|
+
FOREIGN KEY (source_id) REFERENCES provenance_new(id),
|
|
1538
|
+
FOREIGN KEY (parent_id) REFERENCES provenance_new(id)
|
|
1539
|
+
)
|
|
1540
|
+
`);
|
|
1541
|
+
// Step 2: Copy existing data
|
|
1542
|
+
db.exec('INSERT INTO provenance_new SELECT * FROM provenance');
|
|
1543
|
+
// Step 3: Drop old table
|
|
1544
|
+
db.exec('DROP TABLE provenance');
|
|
1545
|
+
// Step 4: Rename new table
|
|
1546
|
+
db.exec('ALTER TABLE provenance_new RENAME TO provenance');
|
|
1547
|
+
// Step 5: Recreate provenance indexes
|
|
1548
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_source_id ON provenance(source_id)');
|
|
1549
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_type ON provenance(type)');
|
|
1550
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_root_document_id ON provenance(root_document_id)');
|
|
1551
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_parent_id ON provenance(parent_id)');
|
|
1552
|
+
// Step 6: Create knowledge graph tables
|
|
1553
|
+
db.exec(CREATE_KNOWLEDGE_NODES_TABLE);
|
|
1554
|
+
db.exec(CREATE_KNOWLEDGE_EDGES_TABLE);
|
|
1555
|
+
db.exec(CREATE_NODE_ENTITY_LINKS_TABLE);
|
|
1556
|
+
// Step 7: Create indexes for knowledge graph tables
|
|
1557
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_kn_entity_type ON knowledge_nodes(entity_type)');
|
|
1558
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_kn_normalized_name ON knowledge_nodes(normalized_name)');
|
|
1559
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_kn_document_count ON knowledge_nodes(document_count DESC)');
|
|
1560
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_ke_source_node ON knowledge_edges(source_node_id)');
|
|
1561
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_ke_target_node ON knowledge_edges(target_node_id)');
|
|
1562
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_ke_relationship_type ON knowledge_edges(relationship_type)');
|
|
1563
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_nel_node_id ON node_entity_links(node_id)');
|
|
1564
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_nel_document_id ON node_entity_links(document_id)');
|
|
1565
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
1566
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
1567
|
+
if (fkViolations.length > 0) {
|
|
1568
|
+
throw new Error(`Foreign key integrity check failed after v15->v16 migration: ${fkViolations.length} violation(s). ` +
|
|
1569
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
1570
|
+
}
|
|
1571
|
+
db.exec('COMMIT');
|
|
1572
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1573
|
+
}
|
|
1574
|
+
catch (error) {
|
|
1575
|
+
try {
|
|
1576
|
+
db.exec('ROLLBACK');
|
|
1577
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1578
|
+
}
|
|
1579
|
+
catch (rollbackErr) {
|
|
1580
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
1581
|
+
}
|
|
1582
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
1583
|
+
throw new MigrationError(`Failed to migrate from v15 to v16 (knowledge graph): ${cause}`, 'migrate', 'provenance', error);
|
|
1584
|
+
}
|
|
1585
|
+
}
|
|
1586
|
+
/**
|
|
1587
|
+
* Migrate from schema version 14 to version 15
|
|
1588
|
+
*
|
|
1589
|
+
* Changes in v15:
|
|
1590
|
+
* - provenance.type: Added 'CLUSTERING' to CHECK constraint
|
|
1591
|
+
* - provenance.source_type: Added 'CLUSTERING' to CHECK constraint
|
|
1592
|
+
* - clusters: New table for document clustering results
|
|
1593
|
+
* - document_clusters: New table for document-cluster assignments
|
|
1594
|
+
* - 6 new indexes: idx_clusters_run_id, idx_clusters_tag, idx_clusters_created,
|
|
1595
|
+
* idx_doc_clusters_document, idx_doc_clusters_cluster, idx_doc_clusters_run
|
|
1596
|
+
*
|
|
1597
|
+
* @param db - Database instance from better-sqlite3
|
|
1598
|
+
* @throws MigrationError if migration fails
|
|
1599
|
+
*/
|
|
1600
|
+
function migrateV14ToV15(db) {
|
|
1601
|
+
try {
|
|
1602
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
1603
|
+
db.exec('BEGIN TRANSACTION');
|
|
1604
|
+
// Step 1: Recreate provenance table with CLUSTERING in CHECK constraints
|
|
1605
|
+
db.exec(`
|
|
1606
|
+
CREATE TABLE provenance_new (
|
|
1607
|
+
id TEXT PRIMARY KEY,
|
|
1608
|
+
type TEXT NOT NULL CHECK (type IN ('DOCUMENT', 'OCR_RESULT', 'CHUNK', 'IMAGE', 'VLM_DESCRIPTION', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL', 'ENTITY_EXTRACTION', 'COMPARISON', 'CLUSTERING')),
|
|
1609
|
+
created_at TEXT NOT NULL,
|
|
1610
|
+
processed_at TEXT NOT NULL,
|
|
1611
|
+
source_file_created_at TEXT,
|
|
1612
|
+
source_file_modified_at TEXT,
|
|
1613
|
+
source_type TEXT NOT NULL CHECK (source_type IN ('FILE', 'OCR', 'CHUNKING', 'IMAGE_EXTRACTION', 'VLM', 'VLM_DEDUP', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL', 'ENTITY_EXTRACTION', 'COMPARISON', 'CLUSTERING')),
|
|
1614
|
+
source_path TEXT,
|
|
1615
|
+
source_id TEXT,
|
|
1616
|
+
root_document_id TEXT NOT NULL,
|
|
1617
|
+
location TEXT,
|
|
1618
|
+
content_hash TEXT NOT NULL,
|
|
1619
|
+
input_hash TEXT,
|
|
1620
|
+
file_hash TEXT,
|
|
1621
|
+
processor TEXT NOT NULL,
|
|
1622
|
+
processor_version TEXT NOT NULL,
|
|
1623
|
+
processing_params TEXT NOT NULL,
|
|
1624
|
+
processing_duration_ms INTEGER,
|
|
1625
|
+
processing_quality_score REAL,
|
|
1626
|
+
parent_id TEXT,
|
|
1627
|
+
parent_ids TEXT NOT NULL,
|
|
1628
|
+
chain_depth INTEGER NOT NULL,
|
|
1629
|
+
chain_path TEXT,
|
|
1630
|
+
FOREIGN KEY (source_id) REFERENCES provenance_new(id),
|
|
1631
|
+
FOREIGN KEY (parent_id) REFERENCES provenance_new(id)
|
|
1632
|
+
)
|
|
1633
|
+
`);
|
|
1634
|
+
// Step 2: Copy existing data
|
|
1635
|
+
db.exec('INSERT INTO provenance_new SELECT * FROM provenance');
|
|
1636
|
+
// Step 3: Drop old table
|
|
1637
|
+
db.exec('DROP TABLE provenance');
|
|
1638
|
+
// Step 4: Rename new table
|
|
1639
|
+
db.exec('ALTER TABLE provenance_new RENAME TO provenance');
|
|
1640
|
+
// Step 5: Recreate provenance indexes
|
|
1641
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_source_id ON provenance(source_id)');
|
|
1642
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_type ON provenance(type)');
|
|
1643
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_root_document_id ON provenance(root_document_id)');
|
|
1644
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_parent_id ON provenance(parent_id)');
|
|
1645
|
+
// Step 6: Create clusters table
|
|
1646
|
+
db.exec(CREATE_CLUSTERS_TABLE);
|
|
1647
|
+
// Step 7: Create document_clusters table
|
|
1648
|
+
db.exec(CREATE_DOCUMENT_CLUSTERS_TABLE);
|
|
1649
|
+
// Step 8: Create indexes for clustering tables
|
|
1650
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_clusters_run_id ON clusters(run_id)');
|
|
1651
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_clusters_tag ON clusters(classification_tag)');
|
|
1652
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_clusters_created ON clusters(created_at DESC)');
|
|
1653
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_doc_clusters_document ON document_clusters(document_id)');
|
|
1654
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_doc_clusters_cluster ON document_clusters(cluster_id)');
|
|
1655
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_doc_clusters_run ON document_clusters(run_id)');
|
|
1656
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
1657
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
1658
|
+
if (fkViolations.length > 0) {
|
|
1659
|
+
throw new Error(`Foreign key integrity check failed after v14->v15 migration: ${fkViolations.length} violation(s). ` +
|
|
1660
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
1661
|
+
}
|
|
1662
|
+
db.exec('COMMIT');
|
|
1663
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1664
|
+
}
|
|
1665
|
+
catch (error) {
|
|
1666
|
+
try {
|
|
1667
|
+
db.exec('ROLLBACK');
|
|
1668
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1669
|
+
}
|
|
1670
|
+
catch (rollbackErr) {
|
|
1671
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
1672
|
+
}
|
|
1673
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
1674
|
+
throw new MigrationError(`Failed to migrate from v14 to v15 (document clustering): ${cause}`, 'migrate', 'provenance', error);
|
|
1675
|
+
}
|
|
1676
|
+
}
|
|
1677
|
+
/**
|
|
1678
|
+
* Migrate from schema version 16 to version 17
|
|
1679
|
+
*
|
|
1680
|
+
* Changes in v17 (knowledge graph optimization):
|
|
1681
|
+
* - knowledge_nodes.edge_count: New column tracking edge count per node
|
|
1682
|
+
* - node_entity_links.resolution_method: New column tracking how entity was resolved
|
|
1683
|
+
* - knowledge_edges: Expanded CHECK constraint with 'precedes', 'occurred_at' relationship types
|
|
1684
|
+
* - knowledge_nodes_fts: New FTS5 virtual table for knowledge node full-text search
|
|
1685
|
+
* - knowledge_nodes_fts_ai/ad/au: FTS5 sync triggers for knowledge_nodes
|
|
1686
|
+
* - idx_knowledge_nodes_canonical_lower: Case-insensitive index on canonical_name
|
|
1687
|
+
* - idx_entity_mentions_chunk_id: Index on entity_mentions.chunk_id for chunk-based lookups
|
|
1688
|
+
*
|
|
1689
|
+
* @param db - Database instance from better-sqlite3
|
|
1690
|
+
* @throws MigrationError if migration fails
|
|
1691
|
+
*/
|
|
1692
|
+
function migrateV16ToV17(db) {
|
|
1693
|
+
try {
|
|
1694
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
1695
|
+
db.exec('BEGIN TRANSACTION');
|
|
1696
|
+
// Step 1: Add resolution_method column to node_entity_links (if not already present from fresh schema)
|
|
1697
|
+
const nelColumns = db.pragma('table_info(node_entity_links)');
|
|
1698
|
+
if (!nelColumns.some((c) => c.name === 'resolution_method')) {
|
|
1699
|
+
db.exec('ALTER TABLE node_entity_links ADD COLUMN resolution_method TEXT');
|
|
1700
|
+
}
|
|
1701
|
+
// Step 2: Add edge_count column to knowledge_nodes (if not already present from fresh schema)
|
|
1702
|
+
const knColumns = db.pragma('table_info(knowledge_nodes)');
|
|
1703
|
+
if (!knColumns.some((c) => c.name === 'edge_count')) {
|
|
1704
|
+
db.exec('ALTER TABLE knowledge_nodes ADD COLUMN edge_count INTEGER NOT NULL DEFAULT 0');
|
|
1705
|
+
}
|
|
1706
|
+
// Step 3: Recreate knowledge_edges with expanded CHECK constraint
|
|
1707
|
+
// Include v20 columns (valid_from, valid_until, normalized_weight, contradiction_count)
|
|
1708
|
+
// so that SELECT * works regardless of whether the source table was created fresh (with v20 cols)
|
|
1709
|
+
// or via earlier migrations (without them).
|
|
1710
|
+
const keColumns = db.pragma('table_info(knowledge_edges)');
|
|
1711
|
+
const hasV20Cols = keColumns.some((c) => c.name === 'valid_from');
|
|
1712
|
+
db.exec(`
|
|
1713
|
+
CREATE TABLE knowledge_edges_new (
|
|
1714
|
+
id TEXT PRIMARY KEY,
|
|
1715
|
+
source_node_id TEXT NOT NULL,
|
|
1716
|
+
target_node_id TEXT NOT NULL,
|
|
1717
|
+
relationship_type TEXT NOT NULL CHECK (relationship_type IN (
|
|
1718
|
+
'co_mentioned', 'co_located', 'works_at', 'represents',
|
|
1719
|
+
'located_in', 'filed_in', 'cites', 'references',
|
|
1720
|
+
'party_to', 'related_to', 'precedes', 'occurred_at'
|
|
1721
|
+
)),
|
|
1722
|
+
weight REAL NOT NULL DEFAULT 1.0,
|
|
1723
|
+
evidence_count INTEGER NOT NULL DEFAULT 1,
|
|
1724
|
+
document_ids TEXT NOT NULL,
|
|
1725
|
+
metadata TEXT,
|
|
1726
|
+
provenance_id TEXT NOT NULL,
|
|
1727
|
+
created_at TEXT NOT NULL,
|
|
1728
|
+
valid_from TEXT,
|
|
1729
|
+
valid_until TEXT,
|
|
1730
|
+
normalized_weight REAL DEFAULT 0,
|
|
1731
|
+
contradiction_count INTEGER DEFAULT 0,
|
|
1732
|
+
FOREIGN KEY (source_node_id) REFERENCES knowledge_nodes(id),
|
|
1733
|
+
FOREIGN KEY (target_node_id) REFERENCES knowledge_nodes(id),
|
|
1734
|
+
FOREIGN KEY (provenance_id) REFERENCES provenance(id)
|
|
1735
|
+
)
|
|
1736
|
+
`);
|
|
1737
|
+
// Step 4: Copy existing edges (use explicit columns if source lacks v20 columns)
|
|
1738
|
+
if (hasV20Cols) {
|
|
1739
|
+
db.exec('INSERT INTO knowledge_edges_new SELECT * FROM knowledge_edges');
|
|
1740
|
+
}
|
|
1741
|
+
else {
|
|
1742
|
+
db.exec(`INSERT INTO knowledge_edges_new (id, source_node_id, target_node_id, relationship_type, weight, evidence_count, document_ids, metadata, provenance_id, created_at)
|
|
1743
|
+
SELECT id, source_node_id, target_node_id, relationship_type, weight, evidence_count, document_ids, metadata, provenance_id, created_at FROM knowledge_edges`);
|
|
1744
|
+
}
|
|
1745
|
+
// Step 5: Drop old edges table
|
|
1746
|
+
db.exec('DROP TABLE knowledge_edges');
|
|
1747
|
+
// Step 6: Rename new table
|
|
1748
|
+
db.exec('ALTER TABLE knowledge_edges_new RENAME TO knowledge_edges');
|
|
1749
|
+
// Step 7: Recreate indexes on knowledge_edges (dropped with old table)
|
|
1750
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_ke_source_node ON knowledge_edges(source_node_id)');
|
|
1751
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_ke_target_node ON knowledge_edges(target_node_id)');
|
|
1752
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_ke_relationship_type ON knowledge_edges(relationship_type)');
|
|
1753
|
+
// Step 8: Create new optimization indexes
|
|
1754
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_knowledge_nodes_canonical_lower ON knowledge_nodes(canonical_name COLLATE NOCASE)');
|
|
1755
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entity_mentions_chunk_id ON entity_mentions(chunk_id)');
|
|
1756
|
+
// Step 9: Backfill edge_count from existing edges
|
|
1757
|
+
db.exec(`
|
|
1758
|
+
UPDATE knowledge_nodes SET edge_count = (
|
|
1759
|
+
SELECT COUNT(*) FROM knowledge_edges
|
|
1760
|
+
WHERE source_node_id = knowledge_nodes.id OR target_node_id = knowledge_nodes.id
|
|
1761
|
+
)
|
|
1762
|
+
`);
|
|
1763
|
+
// Step 10: Create knowledge_nodes_fts FTS5 table
|
|
1764
|
+
db.exec(CREATE_KNOWLEDGE_NODES_FTS_TABLE);
|
|
1765
|
+
// Step 11: Create FTS triggers
|
|
1766
|
+
for (const trigger of CREATE_KNOWLEDGE_NODES_FTS_TRIGGERS) {
|
|
1767
|
+
db.exec(trigger);
|
|
1768
|
+
}
|
|
1769
|
+
// Step 12: Populate FTS from existing knowledge_nodes
|
|
1770
|
+
const nodeCount = db.prepare('SELECT COUNT(*) as cnt FROM knowledge_nodes').get();
|
|
1771
|
+
if (nodeCount.cnt > 0) {
|
|
1772
|
+
db.exec(`
|
|
1773
|
+
INSERT INTO knowledge_nodes_fts(rowid, canonical_name)
|
|
1774
|
+
SELECT rowid, canonical_name FROM knowledge_nodes
|
|
1775
|
+
`);
|
|
1776
|
+
}
|
|
1777
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
1778
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
1779
|
+
if (fkViolations.length > 0) {
|
|
1780
|
+
throw new Error(`Foreign key integrity check failed after v16->v17 migration: ${fkViolations.length} violation(s). ` +
|
|
1781
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
1782
|
+
}
|
|
1783
|
+
db.exec('COMMIT');
|
|
1784
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1785
|
+
}
|
|
1786
|
+
catch (error) {
|
|
1787
|
+
try {
|
|
1788
|
+
db.exec('ROLLBACK');
|
|
1789
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1790
|
+
}
|
|
1791
|
+
catch (rollbackErr) {
|
|
1792
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
1793
|
+
}
|
|
1794
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
1795
|
+
throw new MigrationError(`Failed to migrate from v16 to v17 (knowledge graph optimization): ${cause}`, 'migrate', 'knowledge_edges', error);
|
|
1796
|
+
}
|
|
1797
|
+
}
|
|
1798
|
+
/**
|
|
1799
|
+
* Migrate from schema version 17 to version 18
|
|
1800
|
+
*
|
|
1801
|
+
* Changes in v18:
|
|
1802
|
+
* - entities.entity_type: Added 'medication', 'diagnosis' to CHECK constraint
|
|
1803
|
+
* - knowledge_nodes.entity_type: Added 'medication', 'diagnosis' to CHECK constraint
|
|
1804
|
+
*
|
|
1805
|
+
* SQLite CHECK constraints require table recreation to modify.
|
|
1806
|
+
*
|
|
1807
|
+
* @param db - Database instance from better-sqlite3
|
|
1808
|
+
* @throws MigrationError if migration fails
|
|
1809
|
+
*/
|
|
1810
|
+
function migrateV17ToV18(db) {
|
|
1811
|
+
const entityTypeCheck = `('person', 'organization', 'date', 'amount', 'case_number', 'location', 'statute', 'exhibit', 'medication', 'diagnosis', 'medical_device', 'other')`;
|
|
1812
|
+
try {
|
|
1813
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
1814
|
+
db.exec('BEGIN TRANSACTION');
|
|
1815
|
+
// Step 1: Recreate entities table with expanded CHECK constraint
|
|
1816
|
+
db.exec(`
|
|
1817
|
+
CREATE TABLE entities_new (
|
|
1818
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
1819
|
+
document_id TEXT NOT NULL REFERENCES documents(id),
|
|
1820
|
+
entity_type TEXT NOT NULL CHECK (entity_type IN ${entityTypeCheck}),
|
|
1821
|
+
raw_text TEXT NOT NULL,
|
|
1822
|
+
normalized_text TEXT NOT NULL,
|
|
1823
|
+
confidence REAL NOT NULL DEFAULT 0.0,
|
|
1824
|
+
metadata TEXT,
|
|
1825
|
+
provenance_id TEXT NOT NULL REFERENCES provenance(id),
|
|
1826
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
1827
|
+
)
|
|
1828
|
+
`);
|
|
1829
|
+
db.exec('INSERT INTO entities_new SELECT * FROM entities');
|
|
1830
|
+
db.exec('DROP TABLE entities');
|
|
1831
|
+
db.exec('ALTER TABLE entities_new RENAME TO entities');
|
|
1832
|
+
// Recreate entities indexes
|
|
1833
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entities_document_id ON entities(document_id)');
|
|
1834
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entities_entity_type ON entities(entity_type)');
|
|
1835
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entities_normalized_text ON entities(normalized_text)');
|
|
1836
|
+
// Step 2: Recreate knowledge_nodes table with expanded CHECK constraint
|
|
1837
|
+
// Include v20 columns (importance_score, resolution_type) so that SELECT * works
|
|
1838
|
+
// regardless of whether the source table was created fresh (with v20 cols) or via earlier migrations.
|
|
1839
|
+
const knColsV18 = db.pragma('table_info(knowledge_nodes)');
|
|
1840
|
+
const hasV20NodeCols = knColsV18.some((c) => c.name === 'importance_score');
|
|
1841
|
+
db.exec(`
|
|
1842
|
+
CREATE TABLE knowledge_nodes_new (
|
|
1843
|
+
id TEXT PRIMARY KEY,
|
|
1844
|
+
entity_type TEXT NOT NULL CHECK (entity_type IN ${entityTypeCheck}),
|
|
1845
|
+
canonical_name TEXT NOT NULL,
|
|
1846
|
+
normalized_name TEXT NOT NULL,
|
|
1847
|
+
aliases TEXT,
|
|
1848
|
+
document_count INTEGER NOT NULL DEFAULT 1,
|
|
1849
|
+
mention_count INTEGER NOT NULL DEFAULT 0,
|
|
1850
|
+
edge_count INTEGER NOT NULL DEFAULT 0,
|
|
1851
|
+
avg_confidence REAL NOT NULL DEFAULT 0.0,
|
|
1852
|
+
metadata TEXT,
|
|
1853
|
+
provenance_id TEXT NOT NULL,
|
|
1854
|
+
created_at TEXT NOT NULL,
|
|
1855
|
+
updated_at TEXT NOT NULL,
|
|
1856
|
+
importance_score REAL,
|
|
1857
|
+
resolution_type TEXT,
|
|
1858
|
+
FOREIGN KEY (provenance_id) REFERENCES provenance(id)
|
|
1859
|
+
)
|
|
1860
|
+
`);
|
|
1861
|
+
if (hasV20NodeCols) {
|
|
1862
|
+
db.exec('INSERT INTO knowledge_nodes_new SELECT * FROM knowledge_nodes');
|
|
1863
|
+
}
|
|
1864
|
+
else {
|
|
1865
|
+
db.exec(`INSERT INTO knowledge_nodes_new (id, entity_type, canonical_name, normalized_name, aliases, document_count, mention_count, edge_count, avg_confidence, metadata, provenance_id, created_at, updated_at)
|
|
1866
|
+
SELECT id, entity_type, canonical_name, normalized_name, aliases, document_count, mention_count, edge_count, avg_confidence, metadata, provenance_id, created_at, updated_at FROM knowledge_nodes`);
|
|
1867
|
+
}
|
|
1868
|
+
// Drop FTS table and triggers before dropping knowledge_nodes (FTS references it)
|
|
1869
|
+
db.exec('DROP TRIGGER IF EXISTS knowledge_nodes_fts_insert');
|
|
1870
|
+
db.exec('DROP TRIGGER IF EXISTS knowledge_nodes_fts_delete');
|
|
1871
|
+
db.exec('DROP TRIGGER IF EXISTS knowledge_nodes_fts_update');
|
|
1872
|
+
db.exec('DROP TABLE IF EXISTS knowledge_nodes_fts');
|
|
1873
|
+
db.exec('DROP TABLE knowledge_nodes');
|
|
1874
|
+
db.exec('ALTER TABLE knowledge_nodes_new RENAME TO knowledge_nodes');
|
|
1875
|
+
// Recreate knowledge_nodes indexes
|
|
1876
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_kn_entity_type ON knowledge_nodes(entity_type)');
|
|
1877
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_kn_normalized_name ON knowledge_nodes(normalized_name)');
|
|
1878
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_kn_document_count ON knowledge_nodes(document_count)');
|
|
1879
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_knowledge_nodes_canonical_lower ON knowledge_nodes(canonical_name COLLATE NOCASE)');
|
|
1880
|
+
// Recreate FTS5 table and triggers
|
|
1881
|
+
db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS knowledge_nodes_fts USING fts5(canonical_name, content='knowledge_nodes', content_rowid='rowid')`);
|
|
1882
|
+
db.exec(`CREATE TRIGGER IF NOT EXISTS knowledge_nodes_fts_insert AFTER INSERT ON knowledge_nodes BEGIN INSERT INTO knowledge_nodes_fts(rowid, canonical_name) VALUES (new.rowid, new.canonical_name); END`);
|
|
1883
|
+
db.exec(`CREATE TRIGGER IF NOT EXISTS knowledge_nodes_fts_delete AFTER DELETE ON knowledge_nodes BEGIN INSERT INTO knowledge_nodes_fts(knowledge_nodes_fts, rowid, canonical_name) VALUES ('delete', old.rowid, old.canonical_name); END`);
|
|
1884
|
+
db.exec(`CREATE TRIGGER IF NOT EXISTS knowledge_nodes_fts_update AFTER UPDATE ON knowledge_nodes BEGIN INSERT INTO knowledge_nodes_fts(knowledge_nodes_fts, rowid, canonical_name) VALUES ('delete', old.rowid, old.canonical_name); INSERT INTO knowledge_nodes_fts(rowid, canonical_name) VALUES (new.rowid, new.canonical_name); END`);
|
|
1885
|
+
// Repopulate FTS from existing data
|
|
1886
|
+
const nodeCount = db.prepare('SELECT COUNT(*) as cnt FROM knowledge_nodes').get();
|
|
1887
|
+
if (nodeCount.cnt > 0) {
|
|
1888
|
+
db.exec(`
|
|
1889
|
+
INSERT INTO knowledge_nodes_fts(rowid, canonical_name)
|
|
1890
|
+
SELECT rowid, canonical_name FROM knowledge_nodes
|
|
1891
|
+
`);
|
|
1892
|
+
}
|
|
1893
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
1894
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
1895
|
+
if (fkViolations.length > 0) {
|
|
1896
|
+
throw new Error(`Foreign key integrity check failed after v17->v18 migration: ${fkViolations.length} violation(s). ` +
|
|
1897
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
1898
|
+
}
|
|
1899
|
+
db.exec('COMMIT');
|
|
1900
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1901
|
+
}
|
|
1902
|
+
catch (error) {
|
|
1903
|
+
try {
|
|
1904
|
+
db.exec('ROLLBACK');
|
|
1905
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
1906
|
+
}
|
|
1907
|
+
catch (rollbackErr) {
|
|
1908
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
1909
|
+
}
|
|
1910
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
1911
|
+
throw new MigrationError(`Failed to migrate from v17 to v18 (medical entity types): ${cause}`, 'migrate', 'entities', error);
|
|
1912
|
+
}
|
|
1913
|
+
}
|
|
1914
|
+
/**
|
|
1915
|
+
* Migrate from schema version 18 to version 19
|
|
1916
|
+
*
|
|
1917
|
+
* Changes in v19:
|
|
1918
|
+
* - entity_extraction_segments: New table for chunked entity extraction with provenance
|
|
1919
|
+
* Stores 50K-character segments with 10% overlap for focused Gemini extraction.
|
|
1920
|
+
* Each segment records its exact character_start/character_end in the OCR text
|
|
1921
|
+
* and links to provenance for full traceability.
|
|
1922
|
+
* - 3 new indexes: idx_segments_document, idx_segments_status, idx_segments_doc_status
|
|
1923
|
+
*
|
|
1924
|
+
* @param db - Database instance from better-sqlite3
|
|
1925
|
+
* @throws MigrationError if migration fails
|
|
1926
|
+
*/
|
|
1927
|
+
function migrateV18ToV19(db) {
|
|
1928
|
+
try {
|
|
1929
|
+
db.exec('BEGIN TRANSACTION');
|
|
1930
|
+
// Step 1: Create entity_extraction_segments table
|
|
1931
|
+
db.exec(CREATE_ENTITY_EXTRACTION_SEGMENTS_TABLE);
|
|
1932
|
+
// Step 2: Create indexes
|
|
1933
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_segments_document ON entity_extraction_segments(document_id)');
|
|
1934
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_segments_status ON entity_extraction_segments(extraction_status)');
|
|
1935
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_segments_doc_status ON entity_extraction_segments(document_id, extraction_status)');
|
|
1936
|
+
db.exec('COMMIT');
|
|
1937
|
+
}
|
|
1938
|
+
catch (error) {
|
|
1939
|
+
try {
|
|
1940
|
+
db.exec('ROLLBACK');
|
|
1941
|
+
}
|
|
1942
|
+
catch (rollbackErr) {
|
|
1943
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
1944
|
+
}
|
|
1945
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
1946
|
+
throw new MigrationError(`Failed to migrate from v18 to v19 (entity extraction segments): ${cause}`, 'migrate', 'entity_extraction_segments', error);
|
|
1947
|
+
}
|
|
1948
|
+
}
|
|
1949
|
+
/**
|
|
1950
|
+
* Migrate from schema version 19 to version 20
|
|
1951
|
+
*
|
|
1952
|
+
* Changes in v20:
|
|
1953
|
+
* - knowledge_edges: Added valid_from, valid_until (TEXT) for temporal bounds
|
|
1954
|
+
* - knowledge_edges: Added normalized_weight (REAL DEFAULT 0) for weight normalization
|
|
1955
|
+
* - knowledge_edges: Added contradiction_count (INTEGER DEFAULT 0) for contradiction tracking
|
|
1956
|
+
* - knowledge_nodes: Added importance_score (REAL) for node ranking
|
|
1957
|
+
* - knowledge_nodes: Added resolution_type (TEXT) for entity resolution tracking
|
|
1958
|
+
* - entity_embeddings: New table for entity vector embeddings
|
|
1959
|
+
* - vec_entity_embeddings: New sqlite-vec virtual table for entity semantic search
|
|
1960
|
+
* - 3 new indexes: idx_entity_embeddings_entity_id, idx_entity_embeddings_node_id,
|
|
1961
|
+
* idx_entity_embeddings_content_hash
|
|
1962
|
+
*
|
|
1963
|
+
* Note: knowledge_nodes.updated_at already exists from the v16 schema, so it is NOT added here.
|
|
1964
|
+
*
|
|
1965
|
+
* @param db - Database instance from better-sqlite3
|
|
1966
|
+
* @throws MigrationError if migration fails
|
|
1967
|
+
*/
|
|
1968
|
+
function migrateV19ToV20(db) {
|
|
1969
|
+
// M-5: PRAGMA foreign_keys in try-finally so it ALWAYS re-enables even on crash
|
|
1970
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
1971
|
+
try {
|
|
1972
|
+
// M-5: Wrap all DDL in a transaction for atomicity
|
|
1973
|
+
db.exec('BEGIN TRANSACTION');
|
|
1974
|
+
try {
|
|
1975
|
+
// Step 1: Add new columns to knowledge_edges
|
|
1976
|
+
const edgeCols = db.pragma('table_info(knowledge_edges)');
|
|
1977
|
+
const edgeColNames = new Set(edgeCols.map((c) => c.name));
|
|
1978
|
+
if (!edgeColNames.has('valid_from')) {
|
|
1979
|
+
db.exec('ALTER TABLE knowledge_edges ADD COLUMN valid_from TEXT');
|
|
1980
|
+
}
|
|
1981
|
+
if (!edgeColNames.has('valid_until')) {
|
|
1982
|
+
db.exec('ALTER TABLE knowledge_edges ADD COLUMN valid_until TEXT');
|
|
1983
|
+
}
|
|
1984
|
+
if (!edgeColNames.has('normalized_weight')) {
|
|
1985
|
+
db.exec('ALTER TABLE knowledge_edges ADD COLUMN normalized_weight REAL DEFAULT 0');
|
|
1986
|
+
}
|
|
1987
|
+
if (!edgeColNames.has('contradiction_count')) {
|
|
1988
|
+
db.exec('ALTER TABLE knowledge_edges ADD COLUMN contradiction_count INTEGER DEFAULT 0');
|
|
1989
|
+
}
|
|
1990
|
+
// Step 2: Add new columns to knowledge_nodes
|
|
1991
|
+
const nodeCols = db.pragma('table_info(knowledge_nodes)');
|
|
1992
|
+
const nodeColNames = new Set(nodeCols.map((c) => c.name));
|
|
1993
|
+
if (!nodeColNames.has('importance_score')) {
|
|
1994
|
+
db.exec('ALTER TABLE knowledge_nodes ADD COLUMN importance_score REAL');
|
|
1995
|
+
}
|
|
1996
|
+
if (!nodeColNames.has('resolution_type')) {
|
|
1997
|
+
db.exec('ALTER TABLE knowledge_nodes ADD COLUMN resolution_type TEXT');
|
|
1998
|
+
}
|
|
1999
|
+
// Step 3: Add ocr_quality_score to chunks
|
|
2000
|
+
const chunkCols = db.pragma('table_info(chunks)');
|
|
2001
|
+
const chunkColNames = new Set(chunkCols.map((c) => c.name));
|
|
2002
|
+
if (!chunkColNames.has('ocr_quality_score')) {
|
|
2003
|
+
db.exec('ALTER TABLE chunks ADD COLUMN ocr_quality_score REAL');
|
|
2004
|
+
}
|
|
2005
|
+
// Step 4: Create placeholder entity_embeddings table (v21 will recreate with correct schema)
|
|
2006
|
+
db.exec(`CREATE TABLE IF NOT EXISTS entity_embeddings (
|
|
2007
|
+
id TEXT PRIMARY KEY,
|
|
2008
|
+
entity_id TEXT NOT NULL REFERENCES entities(id),
|
|
2009
|
+
node_id TEXT REFERENCES knowledge_nodes(id),
|
|
2010
|
+
embedding_model TEXT NOT NULL,
|
|
2011
|
+
dimensions INTEGER NOT NULL,
|
|
2012
|
+
content_hash TEXT NOT NULL,
|
|
2013
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
2014
|
+
provenance_id TEXT REFERENCES provenance(id)
|
|
2015
|
+
)`);
|
|
2016
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entity_embeddings_entity_id ON entity_embeddings(entity_id)');
|
|
2017
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entity_embeddings_node_id ON entity_embeddings(node_id)');
|
|
2018
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entity_embeddings_content_hash ON entity_embeddings(content_hash)');
|
|
2019
|
+
db.exec('COMMIT');
|
|
2020
|
+
}
|
|
2021
|
+
catch (error) {
|
|
2022
|
+
db.exec('ROLLBACK');
|
|
2023
|
+
throw error;
|
|
2024
|
+
}
|
|
2025
|
+
// Step 5: Create placeholder vec_entity_embeddings virtual table (v21 will recreate with correct PK)
|
|
2026
|
+
// Note: Virtual table creation (vec0) is placed outside the transaction because
|
|
2027
|
+
// vec0 virtual tables may not support transactional DDL in all SQLite builds.
|
|
2028
|
+
db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS vec_entity_embeddings USING vec0(
|
|
2029
|
+
id TEXT PRIMARY KEY,
|
|
2030
|
+
embedding float[768] distance_metric=cosine
|
|
2031
|
+
)`);
|
|
2032
|
+
// FK integrity check after all DDL is committed
|
|
2033
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
2034
|
+
if (fkViolations.length > 0) {
|
|
2035
|
+
throw new Error(`Foreign key integrity check failed after v19->v20 migration: ${fkViolations.length} violation(s). ` +
|
|
2036
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
2037
|
+
}
|
|
2038
|
+
}
|
|
2039
|
+
finally {
|
|
2040
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
2041
|
+
}
|
|
2042
|
+
}
|
|
2043
|
+
/**
|
|
2044
|
+
* Migrate from schema version 20 to version 21
|
|
2045
|
+
*
|
|
2046
|
+
* Changes in v21:
|
|
2047
|
+
* - Rebuild entity_embeddings table with correct columns:
|
|
2048
|
+
* node_id, original_text, original_text_length, entity_type, document_count, model_name
|
|
2049
|
+
* (v20 table had entity_id, embedding_model, dimensions which didn't match embed_entities code)
|
|
2050
|
+
* - Rebuild vec_entity_embeddings with entity_embedding_id PK (was id)
|
|
2051
|
+
*/
|
|
2052
|
+
function migrateV20ToV21(db) {
|
|
2053
|
+
// M-6: PRAGMA foreign_keys in try-finally so it ALWAYS re-enables even on crash
|
|
2054
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
2055
|
+
try {
|
|
2056
|
+
// M-6: Wrap DROP + CREATE in a transaction for atomicity
|
|
2057
|
+
db.exec('BEGIN TRANSACTION');
|
|
2058
|
+
try {
|
|
2059
|
+
// Step 1: Drop and recreate entity_embeddings with correct schema
|
|
2060
|
+
// Safe because embed_entities never succeeded with the v20 schema
|
|
2061
|
+
// DROP TABLE removes the table's indexes automatically
|
|
2062
|
+
db.exec('DROP TABLE IF EXISTS entity_embeddings');
|
|
2063
|
+
db.exec(CREATE_ENTITY_EMBEDDINGS_TABLE);
|
|
2064
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entity_embeddings_node_id ON entity_embeddings(node_id)');
|
|
2065
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entity_embeddings_content_hash ON entity_embeddings(content_hash)');
|
|
2066
|
+
db.exec('COMMIT');
|
|
2067
|
+
}
|
|
2068
|
+
catch (error) {
|
|
2069
|
+
db.exec('ROLLBACK');
|
|
2070
|
+
throw error;
|
|
2071
|
+
}
|
|
2072
|
+
// Step 2: Drop and recreate vec_entity_embeddings with correct PK column name
|
|
2073
|
+
// Note: Virtual table operations (vec0) are placed outside the transaction because
|
|
2074
|
+
// vec0 virtual tables may not support transactional DDL in all SQLite builds.
|
|
2075
|
+
db.exec('DROP TABLE IF EXISTS vec_entity_embeddings');
|
|
2076
|
+
db.exec(CREATE_VEC_ENTITY_EMBEDDINGS_TABLE);
|
|
2077
|
+
// FK integrity check after all DDL is committed
|
|
2078
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
2079
|
+
if (fkViolations.length > 0) {
|
|
2080
|
+
throw new Error(`Foreign key integrity check failed after v20->v21 migration: ${fkViolations.length} violation(s). ` +
|
|
2081
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
2082
|
+
}
|
|
2083
|
+
}
|
|
2084
|
+
finally {
|
|
2085
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
2086
|
+
}
|
|
2087
|
+
}
|
|
2088
|
+
/**
|
|
2089
|
+
* Migrate from schema version 21 to version 22
|
|
2090
|
+
*
|
|
2091
|
+
* Fixes FTS tokenizer and trigger inconsistencies (F-S3, F-S4, F-S5):
|
|
2092
|
+
* - F-S3: knowledge_nodes_fts was created WITHOUT `porter unicode61` tokenizer
|
|
2093
|
+
* in v18 migration (fresh DB has it). Recreated with correct tokenizer.
|
|
2094
|
+
* - F-S4: v18 update trigger fires on ANY column update. Fixed to fire only
|
|
2095
|
+
* on `canonical_name` changes (AFTER UPDATE OF canonical_name).
|
|
2096
|
+
* - F-S5: v18 triggers use `_insert/_delete/_update` naming. Fixed to use
|
|
2097
|
+
* `_ai/_ad/_au` naming convention matching fresh schema definitions.
|
|
2098
|
+
*
|
|
2099
|
+
* @param db - Database instance from better-sqlite3
|
|
2100
|
+
* @throws MigrationError if migration fails
|
|
2101
|
+
*/
|
|
2102
|
+
function migrateV21ToV22(db) {
|
|
2103
|
+
try {
|
|
2104
|
+
db.exec('BEGIN TRANSACTION');
|
|
2105
|
+
// Step 1: Drop old FTS table and ALL trigger name variants
|
|
2106
|
+
// (covers both v18 naming and fresh-schema naming)
|
|
2107
|
+
db.exec('DROP TRIGGER IF EXISTS knowledge_nodes_fts_insert');
|
|
2108
|
+
db.exec('DROP TRIGGER IF EXISTS knowledge_nodes_fts_delete');
|
|
2109
|
+
db.exec('DROP TRIGGER IF EXISTS knowledge_nodes_fts_update');
|
|
2110
|
+
db.exec('DROP TRIGGER IF EXISTS knowledge_nodes_fts_ai');
|
|
2111
|
+
db.exec('DROP TRIGGER IF EXISTS knowledge_nodes_fts_ad');
|
|
2112
|
+
db.exec('DROP TRIGGER IF EXISTS knowledge_nodes_fts_au');
|
|
2113
|
+
db.exec('DROP TABLE IF EXISTS knowledge_nodes_fts');
|
|
2114
|
+
// Step 2: Recreate FTS table with porter tokenizer (matching schema-definitions.ts)
|
|
2115
|
+
db.exec(CREATE_KNOWLEDGE_NODES_FTS_TABLE);
|
|
2116
|
+
// Step 3: Create triggers with correct _ai/_ad/_au naming and
|
|
2117
|
+
// AFTER UPDATE OF canonical_name scoping (matching schema-definitions.ts)
|
|
2118
|
+
for (const trigger of CREATE_KNOWLEDGE_NODES_FTS_TRIGGERS) {
|
|
2119
|
+
db.exec(trigger);
|
|
2120
|
+
}
|
|
2121
|
+
// Step 4: Repopulate FTS from existing knowledge_nodes data
|
|
2122
|
+
const nodeCount = db.prepare('SELECT COUNT(*) as cnt FROM knowledge_nodes').get();
|
|
2123
|
+
if (nodeCount.cnt > 0) {
|
|
2124
|
+
db.exec(`
|
|
2125
|
+
INSERT INTO knowledge_nodes_fts(rowid, canonical_name)
|
|
2126
|
+
SELECT rowid, canonical_name FROM knowledge_nodes
|
|
2127
|
+
`);
|
|
2128
|
+
}
|
|
2129
|
+
db.exec('COMMIT');
|
|
2130
|
+
}
|
|
2131
|
+
catch (error) {
|
|
2132
|
+
try {
|
|
2133
|
+
db.exec('ROLLBACK');
|
|
2134
|
+
}
|
|
2135
|
+
catch (rollbackErr) {
|
|
2136
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
2137
|
+
}
|
|
2138
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
2139
|
+
throw new MigrationError(`Failed to migrate from v21 to v22 (FTS tokenizer/trigger fix): ${cause}`, 'migrate', 'knowledge_nodes_fts', error);
|
|
2140
|
+
}
|
|
2141
|
+
}
|
|
2142
|
+
/**
|
|
2143
|
+
* Migrate from schema version 22 to version 23
|
|
2144
|
+
*
|
|
2145
|
+
* Changes in v23:
|
|
2146
|
+
* - Add 4 medical relationship types to knowledge_edges CHECK constraint:
|
|
2147
|
+
* treated_with, administered_via, managed_by, interacts_with
|
|
2148
|
+
*
|
|
2149
|
+
* Strategy: Recreate knowledge_edges table with updated CHECK constraint,
|
|
2150
|
+
* copy all existing data, swap tables.
|
|
2151
|
+
*
|
|
2152
|
+
* @throws MigrationError if migration fails
|
|
2153
|
+
*/
|
|
2154
|
+
function migrateV22ToV23(db) {
|
|
2155
|
+
try {
|
|
2156
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
2157
|
+
db.exec('BEGIN TRANSACTION');
|
|
2158
|
+
// Check if knowledge_edges table exists (KG tables are only created in v15+)
|
|
2159
|
+
const tableExists = db
|
|
2160
|
+
.prepare("SELECT COUNT(*) as cnt FROM sqlite_master WHERE type='table' AND name='knowledge_edges'")
|
|
2161
|
+
.get();
|
|
2162
|
+
if (tableExists.cnt === 0) {
|
|
2163
|
+
// No knowledge_edges table - nothing to migrate
|
|
2164
|
+
db.exec('COMMIT');
|
|
2165
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
2166
|
+
return;
|
|
2167
|
+
}
|
|
2168
|
+
// Step 1: Create new table with expanded CHECK constraint
|
|
2169
|
+
db.exec(`
|
|
2170
|
+
CREATE TABLE knowledge_edges_new (
|
|
2171
|
+
id TEXT PRIMARY KEY,
|
|
2172
|
+
source_node_id TEXT NOT NULL,
|
|
2173
|
+
target_node_id TEXT NOT NULL,
|
|
2174
|
+
relationship_type TEXT NOT NULL CHECK (relationship_type IN (
|
|
2175
|
+
'co_mentioned', 'co_located', 'works_at', 'represents',
|
|
2176
|
+
'located_in', 'filed_in', 'cites', 'references',
|
|
2177
|
+
'party_to', 'related_to', 'precedes', 'occurred_at',
|
|
2178
|
+
'treated_with', 'administered_via', 'managed_by', 'interacts_with'
|
|
2179
|
+
)),
|
|
2180
|
+
weight REAL NOT NULL DEFAULT 1.0,
|
|
2181
|
+
evidence_count INTEGER NOT NULL DEFAULT 1,
|
|
2182
|
+
document_ids TEXT NOT NULL,
|
|
2183
|
+
metadata TEXT,
|
|
2184
|
+
provenance_id TEXT NOT NULL,
|
|
2185
|
+
created_at TEXT NOT NULL,
|
|
2186
|
+
valid_from TEXT,
|
|
2187
|
+
valid_until TEXT,
|
|
2188
|
+
normalized_weight REAL DEFAULT 0,
|
|
2189
|
+
contradiction_count INTEGER DEFAULT 0,
|
|
2190
|
+
FOREIGN KEY (source_node_id) REFERENCES knowledge_nodes(id),
|
|
2191
|
+
FOREIGN KEY (target_node_id) REFERENCES knowledge_nodes(id),
|
|
2192
|
+
FOREIGN KEY (provenance_id) REFERENCES provenance(id)
|
|
2193
|
+
)
|
|
2194
|
+
`);
|
|
2195
|
+
// Step 2: Copy all existing data
|
|
2196
|
+
db.exec(`
|
|
2197
|
+
INSERT INTO knowledge_edges_new
|
|
2198
|
+
SELECT id, source_node_id, target_node_id, relationship_type,
|
|
2199
|
+
weight, evidence_count, document_ids, metadata,
|
|
2200
|
+
provenance_id, created_at, valid_from, valid_until,
|
|
2201
|
+
normalized_weight, contradiction_count
|
|
2202
|
+
FROM knowledge_edges
|
|
2203
|
+
`);
|
|
2204
|
+
// Step 3: Drop old table and rename
|
|
2205
|
+
db.exec('DROP TABLE knowledge_edges');
|
|
2206
|
+
db.exec('ALTER TABLE knowledge_edges_new RENAME TO knowledge_edges');
|
|
2207
|
+
// Step 4: Recreate indexes (matching schema-definitions.ts names)
|
|
2208
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_ke_source_node ON knowledge_edges(source_node_id)');
|
|
2209
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_ke_target_node ON knowledge_edges(target_node_id)');
|
|
2210
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_ke_relationship_type ON knowledge_edges(relationship_type)');
|
|
2211
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
2212
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
2213
|
+
if (fkViolations.length > 0) {
|
|
2214
|
+
throw new Error(`Foreign key integrity check failed after v22->v23 migration: ${fkViolations.length} violation(s). ` +
|
|
2215
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
2216
|
+
}
|
|
2217
|
+
db.exec('COMMIT');
|
|
2218
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
2219
|
+
}
|
|
2220
|
+
catch (error) {
|
|
2221
|
+
try {
|
|
2222
|
+
db.exec('ROLLBACK');
|
|
2223
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
2224
|
+
}
|
|
2225
|
+
catch (rollbackErr) {
|
|
2226
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
2227
|
+
}
|
|
2228
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
2229
|
+
throw new MigrationError(`Failed to migrate from v22 to v23 (medical relationship types): ${cause}`, 'migrate', 'knowledge_edges', error);
|
|
2230
|
+
}
|
|
2231
|
+
}
|
|
2232
|
+
/**
|
|
2233
|
+
* Migrate from schema version 23 to version 24
|
|
2234
|
+
*
|
|
2235
|
+
* Changes in v24:
|
|
2236
|
+
* - Add index on entity_mentions(document_id) to eliminate full table scans
|
|
2237
|
+
* on queries that filter or join entity_mentions by document_id.
|
|
2238
|
+
*
|
|
2239
|
+
* @param db - Database instance from better-sqlite3
|
|
2240
|
+
* @throws MigrationError if migration fails
|
|
2241
|
+
*/
|
|
2242
|
+
function migrateV23ToV24(db) {
|
|
2243
|
+
try {
|
|
2244
|
+
// entity_mentions table was created in v14 — skip index creation if table doesn't exist
|
|
2245
|
+
const tableExists = db
|
|
2246
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entity_mentions'")
|
|
2247
|
+
.get();
|
|
2248
|
+
if (tableExists) {
|
|
2249
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entity_mentions_document_id ON entity_mentions(document_id)');
|
|
2250
|
+
}
|
|
2251
|
+
}
|
|
2252
|
+
catch (error) {
|
|
2253
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
2254
|
+
throw new MigrationError(`Failed to migrate from v23 to v24 (entity_mentions document_id index): ${cause}`, 'migrate', 'entity_mentions', error);
|
|
2255
|
+
}
|
|
2256
|
+
}
|
|
2257
|
+
/**
|
|
2258
|
+
* Migrate from schema version 24 to version 25
|
|
2259
|
+
*
|
|
2260
|
+
* Changes in v25 (AI Knowledge Synthesis):
|
|
2261
|
+
* - corpus_intelligence: New table for corpus-level AI summaries
|
|
2262
|
+
* - document_narratives: New table for document-level AI narratives
|
|
2263
|
+
* - entity_roles: New table for AI-determined entity roles
|
|
2264
|
+
* - knowledge_edges: 6 new relationship types added to CHECK constraint
|
|
2265
|
+
* - provenance: CORPUS_INTELLIGENCE added to type and source_type CHECK constraints
|
|
2266
|
+
* - 5 new indexes for the new tables
|
|
2267
|
+
*
|
|
2268
|
+
* @param db - Database instance from better-sqlite3
|
|
2269
|
+
* @throws MigrationError if migration fails
|
|
2270
|
+
*/
|
|
2271
|
+
function migrateV24ToV25(db) {
|
|
2272
|
+
try {
|
|
2273
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
2274
|
+
db.exec('BEGIN TRANSACTION');
|
|
2275
|
+
// Step 1: Create 3 new tables
|
|
2276
|
+
db.exec(CREATE_CORPUS_INTELLIGENCE_TABLE);
|
|
2277
|
+
db.exec(CREATE_DOCUMENT_NARRATIVES_TABLE);
|
|
2278
|
+
db.exec(CREATE_ENTITY_ROLES_TABLE);
|
|
2279
|
+
// Step 2: Create 6 new indexes
|
|
2280
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_corpus_intelligence_database ON corpus_intelligence(database_name)');
|
|
2281
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_document_narratives_document ON document_narratives(document_id)');
|
|
2282
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entity_roles_node ON entity_roles(node_id)');
|
|
2283
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entity_roles_theme ON entity_roles(theme)');
|
|
2284
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entity_roles_role ON entity_roles(role)');
|
|
2285
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entity_roles_scope ON entity_roles(scope, scope_id)');
|
|
2286
|
+
// Step 3: Expand knowledge_edges CHECK constraint with 6 new relationship types
|
|
2287
|
+
const edgesTableExists = db.prepare("SELECT COUNT(*) as cnt FROM sqlite_master WHERE type='table' AND name='knowledge_edges'").get();
|
|
2288
|
+
if (edgesTableExists.cnt > 0) {
|
|
2289
|
+
db.exec(`
|
|
2290
|
+
CREATE TABLE knowledge_edges_new (
|
|
2291
|
+
id TEXT PRIMARY KEY,
|
|
2292
|
+
source_node_id TEXT NOT NULL,
|
|
2293
|
+
target_node_id TEXT NOT NULL,
|
|
2294
|
+
relationship_type TEXT NOT NULL CHECK (relationship_type IN (
|
|
2295
|
+
'co_mentioned', 'co_located', 'works_at', 'represents',
|
|
2296
|
+
'located_in', 'filed_in', 'cites', 'references',
|
|
2297
|
+
'party_to', 'related_to', 'precedes', 'occurred_at',
|
|
2298
|
+
'treated_with', 'administered_via', 'managed_by', 'interacts_with',
|
|
2299
|
+
'diagnosed_with', 'prescribed_by', 'admitted_to', 'supervised_by', 'filed_by', 'contraindicated_with'
|
|
2300
|
+
)),
|
|
2301
|
+
weight REAL NOT NULL DEFAULT 1.0,
|
|
2302
|
+
evidence_count INTEGER NOT NULL DEFAULT 1,
|
|
2303
|
+
document_ids TEXT NOT NULL,
|
|
2304
|
+
metadata TEXT,
|
|
2305
|
+
provenance_id TEXT NOT NULL,
|
|
2306
|
+
created_at TEXT NOT NULL,
|
|
2307
|
+
valid_from TEXT,
|
|
2308
|
+
valid_until TEXT,
|
|
2309
|
+
normalized_weight REAL DEFAULT 0,
|
|
2310
|
+
contradiction_count INTEGER DEFAULT 0,
|
|
2311
|
+
FOREIGN KEY (source_node_id) REFERENCES knowledge_nodes(id),
|
|
2312
|
+
FOREIGN KEY (target_node_id) REFERENCES knowledge_nodes(id),
|
|
2313
|
+
FOREIGN KEY (provenance_id) REFERENCES provenance(id)
|
|
2314
|
+
)
|
|
2315
|
+
`);
|
|
2316
|
+
db.exec(`
|
|
2317
|
+
INSERT INTO knowledge_edges_new
|
|
2318
|
+
SELECT id, source_node_id, target_node_id, relationship_type,
|
|
2319
|
+
weight, evidence_count, document_ids, metadata,
|
|
2320
|
+
provenance_id, created_at, valid_from, valid_until,
|
|
2321
|
+
normalized_weight, contradiction_count
|
|
2322
|
+
FROM knowledge_edges
|
|
2323
|
+
`);
|
|
2324
|
+
db.exec('DROP TABLE knowledge_edges');
|
|
2325
|
+
db.exec('ALTER TABLE knowledge_edges_new RENAME TO knowledge_edges');
|
|
2326
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_ke_source_node ON knowledge_edges(source_node_id)');
|
|
2327
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_ke_target_node ON knowledge_edges(target_node_id)');
|
|
2328
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_ke_relationship_type ON knowledge_edges(relationship_type)');
|
|
2329
|
+
}
|
|
2330
|
+
// Step 4: Add CORPUS_INTELLIGENCE to provenance type and source_type CHECK constraints
|
|
2331
|
+
db.exec(`
|
|
2332
|
+
CREATE TABLE provenance_new (
|
|
2333
|
+
id TEXT PRIMARY KEY,
|
|
2334
|
+
type TEXT NOT NULL CHECK (type IN ('DOCUMENT', 'OCR_RESULT', 'CHUNK', 'IMAGE', 'VLM_DESCRIPTION', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL', 'ENTITY_EXTRACTION', 'COMPARISON', 'CLUSTERING', 'KNOWLEDGE_GRAPH', 'CORPUS_INTELLIGENCE')),
|
|
2335
|
+
created_at TEXT NOT NULL,
|
|
2336
|
+
processed_at TEXT NOT NULL,
|
|
2337
|
+
source_file_created_at TEXT,
|
|
2338
|
+
source_file_modified_at TEXT,
|
|
2339
|
+
source_type TEXT NOT NULL CHECK (source_type IN ('FILE', 'OCR', 'CHUNKING', 'IMAGE_EXTRACTION', 'VLM', 'VLM_DEDUP', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL', 'ENTITY_EXTRACTION', 'COMPARISON', 'CLUSTERING', 'KNOWLEDGE_GRAPH', 'CORPUS_INTELLIGENCE')),
|
|
2340
|
+
source_path TEXT,
|
|
2341
|
+
source_id TEXT,
|
|
2342
|
+
root_document_id TEXT NOT NULL,
|
|
2343
|
+
location TEXT,
|
|
2344
|
+
content_hash TEXT NOT NULL,
|
|
2345
|
+
input_hash TEXT,
|
|
2346
|
+
file_hash TEXT,
|
|
2347
|
+
processor TEXT NOT NULL,
|
|
2348
|
+
processor_version TEXT NOT NULL,
|
|
2349
|
+
processing_params TEXT NOT NULL,
|
|
2350
|
+
processing_duration_ms INTEGER,
|
|
2351
|
+
processing_quality_score REAL,
|
|
2352
|
+
parent_id TEXT,
|
|
2353
|
+
parent_ids TEXT NOT NULL,
|
|
2354
|
+
chain_depth INTEGER NOT NULL,
|
|
2355
|
+
chain_path TEXT,
|
|
2356
|
+
FOREIGN KEY (source_id) REFERENCES provenance_new(id),
|
|
2357
|
+
FOREIGN KEY (parent_id) REFERENCES provenance_new(id)
|
|
2358
|
+
)
|
|
2359
|
+
`);
|
|
2360
|
+
db.exec('INSERT INTO provenance_new SELECT * FROM provenance');
|
|
2361
|
+
db.exec('DROP TABLE provenance');
|
|
2362
|
+
db.exec('ALTER TABLE provenance_new RENAME TO provenance');
|
|
2363
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_source_id ON provenance(source_id)');
|
|
2364
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_type ON provenance(type)');
|
|
2365
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_root_document_id ON provenance(root_document_id)');
|
|
2366
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_parent_id ON provenance(parent_id)');
|
|
2367
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
2368
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
2369
|
+
if (fkViolations.length > 0) {
|
|
2370
|
+
throw new Error(`Foreign key integrity check failed after v24->v25 migration: ${fkViolations.length} violation(s). First: ${JSON.stringify(fkViolations[0])}`);
|
|
2371
|
+
}
|
|
2372
|
+
db.exec('COMMIT');
|
|
2373
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
2374
|
+
}
|
|
2375
|
+
catch (error) {
|
|
2376
|
+
try {
|
|
2377
|
+
db.exec('ROLLBACK');
|
|
2378
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
2379
|
+
}
|
|
2380
|
+
catch (rollbackErr) {
|
|
2381
|
+
console.error('[migrations] Rollback failed:', rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr));
|
|
2382
|
+
}
|
|
2383
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
2384
|
+
throw new MigrationError(`Failed to migrate from v24 to v25 (AI Knowledge Synthesis tables): ${cause}`, 'migrate', 'corpus_intelligence', error);
|
|
2385
|
+
}
|
|
2386
|
+
}
|
|
2387
|
+
/**
|
|
2388
|
+
* Migrate from schema version 25 to version 26
|
|
2389
|
+
*
|
|
2390
|
+
* BREAKING CHANGE: Removes all entity extraction and knowledge graph tables.
|
|
2391
|
+
* These features are being removed entirely - no backwards compatibility.
|
|
2392
|
+
*
|
|
2393
|
+
* Drops:
|
|
2394
|
+
* - entities, entity_mentions, knowledge_nodes, knowledge_edges
|
|
2395
|
+
* - node_entity_links, entity_extraction_segments
|
|
2396
|
+
* - entity_embeddings, vec_entity_embeddings
|
|
2397
|
+
* - corpus_intelligence, document_narratives, entity_roles
|
|
2398
|
+
* - knowledge_nodes_fts (FTS5 virtual table)
|
|
2399
|
+
* - All associated triggers and indexes
|
|
2400
|
+
* - Recreates provenance table without ENTITY_EXTRACTION/KNOWLEDGE_GRAPH/CORPUS_INTELLIGENCE
|
|
2401
|
+
* - Recreates comparisons table without entity_diff_json column
|
|
2402
|
+
*/
|
|
2403
|
+
function migrateV25ToV26(db) {
|
|
2404
|
+
try {
|
|
2405
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
2406
|
+
db.exec('BEGIN TRANSACTION');
|
|
2407
|
+
// Step 1: Drop entity/KG FTS triggers (must be before table drops)
|
|
2408
|
+
db.exec('DROP TRIGGER IF EXISTS knowledge_nodes_fts_ai');
|
|
2409
|
+
db.exec('DROP TRIGGER IF EXISTS knowledge_nodes_fts_ad');
|
|
2410
|
+
db.exec('DROP TRIGGER IF EXISTS knowledge_nodes_fts_au');
|
|
2411
|
+
// Step 2: Drop entity/KG indexes (IF EXISTS for safety)
|
|
2412
|
+
const entityKgIndexes = [
|
|
2413
|
+
'idx_entities_document_id', 'idx_entities_entity_type', 'idx_entities_normalized_text',
|
|
2414
|
+
'idx_entity_mentions_entity_id', 'idx_entity_mentions_document_id', 'idx_entity_mentions_chunk_id',
|
|
2415
|
+
'idx_kn_entity_type', 'idx_kn_normalized_name', 'idx_kn_document_count',
|
|
2416
|
+
'idx_ke_source_node', 'idx_ke_target_node', 'idx_ke_relationship_type',
|
|
2417
|
+
'idx_nel_node_id', 'idx_nel_document_id',
|
|
2418
|
+
'idx_knowledge_nodes_canonical_lower',
|
|
2419
|
+
'idx_segments_document', 'idx_segments_status', 'idx_segments_doc_status',
|
|
2420
|
+
'idx_entity_embeddings_node_id', 'idx_entity_embeddings_content_hash',
|
|
2421
|
+
'idx_corpus_intelligence_database',
|
|
2422
|
+
'idx_document_narratives_document',
|
|
2423
|
+
'idx_entity_roles_node', 'idx_entity_roles_theme', 'idx_entity_roles_role', 'idx_entity_roles_scope',
|
|
2424
|
+
];
|
|
2425
|
+
for (const idx of entityKgIndexes) {
|
|
2426
|
+
db.exec(`DROP INDEX IF EXISTS ${idx}`);
|
|
2427
|
+
}
|
|
2428
|
+
// Step 3: Drop entity/KG tables in FK-safe order
|
|
2429
|
+
// Virtual tables first (no FK dependencies)
|
|
2430
|
+
db.exec('DROP TABLE IF EXISTS vec_entity_embeddings');
|
|
2431
|
+
db.exec('DROP TABLE IF EXISTS knowledge_nodes_fts');
|
|
2432
|
+
// Tables with outgoing FKs first
|
|
2433
|
+
db.exec('DROP TABLE IF EXISTS entity_roles');
|
|
2434
|
+
db.exec('DROP TABLE IF EXISTS document_narratives');
|
|
2435
|
+
db.exec('DROP TABLE IF EXISTS corpus_intelligence');
|
|
2436
|
+
db.exec('DROP TABLE IF EXISTS entity_embeddings');
|
|
2437
|
+
db.exec('DROP TABLE IF EXISTS entity_extraction_segments');
|
|
2438
|
+
db.exec('DROP TABLE IF EXISTS node_entity_links');
|
|
2439
|
+
db.exec('DROP TABLE IF EXISTS knowledge_edges');
|
|
2440
|
+
db.exec('DROP TABLE IF EXISTS entity_mentions');
|
|
2441
|
+
db.exec('DROP TABLE IF EXISTS entities');
|
|
2442
|
+
db.exec('DROP TABLE IF EXISTS knowledge_nodes');
|
|
2443
|
+
// Step 4: Recreate provenance table without entity/KG types
|
|
2444
|
+
db.exec(`
|
|
2445
|
+
CREATE TABLE provenance_new (
|
|
2446
|
+
id TEXT PRIMARY KEY,
|
|
2447
|
+
type TEXT NOT NULL CHECK (type IN ('DOCUMENT', 'OCR_RESULT', 'CHUNK', 'IMAGE', 'VLM_DESCRIPTION', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL', 'COMPARISON', 'CLUSTERING')),
|
|
2448
|
+
created_at TEXT NOT NULL,
|
|
2449
|
+
processed_at TEXT NOT NULL,
|
|
2450
|
+
source_file_created_at TEXT,
|
|
2451
|
+
source_file_modified_at TEXT,
|
|
2452
|
+
source_type TEXT NOT NULL CHECK (source_type IN ('FILE', 'OCR', 'CHUNKING', 'IMAGE_EXTRACTION', 'VLM', 'VLM_DEDUP', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL', 'COMPARISON', 'CLUSTERING')),
|
|
2453
|
+
source_path TEXT,
|
|
2454
|
+
source_id TEXT,
|
|
2455
|
+
root_document_id TEXT NOT NULL,
|
|
2456
|
+
location TEXT,
|
|
2457
|
+
content_hash TEXT NOT NULL,
|
|
2458
|
+
input_hash TEXT,
|
|
2459
|
+
file_hash TEXT,
|
|
2460
|
+
processor TEXT NOT NULL,
|
|
2461
|
+
processor_version TEXT NOT NULL,
|
|
2462
|
+
processing_params TEXT NOT NULL,
|
|
2463
|
+
processing_duration_ms INTEGER,
|
|
2464
|
+
processing_quality_score REAL,
|
|
2465
|
+
parent_id TEXT,
|
|
2466
|
+
parent_ids TEXT NOT NULL,
|
|
2467
|
+
chain_depth INTEGER NOT NULL,
|
|
2468
|
+
chain_path TEXT,
|
|
2469
|
+
FOREIGN KEY (source_id) REFERENCES provenance_new(id),
|
|
2470
|
+
FOREIGN KEY (parent_id) REFERENCES provenance_new(id)
|
|
2471
|
+
)
|
|
2472
|
+
`);
|
|
2473
|
+
// Only copy rows with valid types (discard entity/KG provenance records)
|
|
2474
|
+
db.exec(`
|
|
2475
|
+
INSERT INTO provenance_new SELECT * FROM provenance
|
|
2476
|
+
WHERE type IN ('DOCUMENT', 'OCR_RESULT', 'CHUNK', 'IMAGE', 'VLM_DESCRIPTION', 'EMBEDDING', 'EXTRACTION', 'FORM_FILL', 'COMPARISON', 'CLUSTERING')
|
|
2477
|
+
`);
|
|
2478
|
+
db.exec('DROP TABLE provenance');
|
|
2479
|
+
db.exec('ALTER TABLE provenance_new RENAME TO provenance');
|
|
2480
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_source_id ON provenance(source_id)');
|
|
2481
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_type ON provenance(type)');
|
|
2482
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_root_document_id ON provenance(root_document_id)');
|
|
2483
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_provenance_parent_id ON provenance(parent_id)');
|
|
2484
|
+
// Step 5: Recreate comparisons table without entity_diff_json column
|
|
2485
|
+
db.exec(`
|
|
2486
|
+
CREATE TABLE comparisons_new (
|
|
2487
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
2488
|
+
document_id_1 TEXT NOT NULL REFERENCES documents(id),
|
|
2489
|
+
document_id_2 TEXT NOT NULL REFERENCES documents(id),
|
|
2490
|
+
similarity_ratio REAL NOT NULL,
|
|
2491
|
+
text_diff_json TEXT NOT NULL,
|
|
2492
|
+
structural_diff_json TEXT NOT NULL,
|
|
2493
|
+
summary TEXT NOT NULL,
|
|
2494
|
+
content_hash TEXT NOT NULL,
|
|
2495
|
+
provenance_id TEXT NOT NULL REFERENCES provenance(id),
|
|
2496
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
2497
|
+
processing_duration_ms INTEGER
|
|
2498
|
+
)
|
|
2499
|
+
`);
|
|
2500
|
+
db.exec(`
|
|
2501
|
+
INSERT INTO comparisons_new
|
|
2502
|
+
SELECT id, document_id_1, document_id_2, similarity_ratio,
|
|
2503
|
+
text_diff_json, structural_diff_json,
|
|
2504
|
+
summary, content_hash, provenance_id, created_at, processing_duration_ms
|
|
2505
|
+
FROM comparisons
|
|
2506
|
+
`);
|
|
2507
|
+
db.exec('DROP TABLE comparisons');
|
|
2508
|
+
db.exec('ALTER TABLE comparisons_new RENAME TO comparisons');
|
|
2509
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_comparisons_doc1 ON comparisons(document_id_1)');
|
|
2510
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_comparisons_doc2 ON comparisons(document_id_2)');
|
|
2511
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_comparisons_created ON comparisons(created_at)');
|
|
2512
|
+
// M-5: Verify FK integrity BEFORE commit so violations cause rollback
|
|
2513
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
2514
|
+
if (fkViolations.length > 0) {
|
|
2515
|
+
console.error(`[Migration v25->v26] FK violations detected: ${JSON.stringify(fkViolations.slice(0, 5))}`);
|
|
2516
|
+
throw new Error(`Foreign key integrity check failed after v25->v26 migration: ${fkViolations.length} violation(s)`);
|
|
2517
|
+
}
|
|
2518
|
+
db.exec('COMMIT');
|
|
2519
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
2520
|
+
console.error('[Migration] v25 -> v26: Removed entity extraction and knowledge graph tables');
|
|
2521
|
+
}
|
|
2522
|
+
catch (error) {
|
|
2523
|
+
try {
|
|
2524
|
+
db.exec('ROLLBACK');
|
|
2525
|
+
}
|
|
2526
|
+
catch (rollbackError) {
|
|
2527
|
+
console.error(`[migrations] CRITICAL: Failed to rollback v25->v26 migration: ${rollbackError instanceof Error ? rollbackError.message : String(rollbackError)}`);
|
|
2528
|
+
}
|
|
2529
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
2530
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
2531
|
+
throw new MigrationError(`Failed to migrate from v25 to v26 (entity/KG removal): ${cause}`, 'migrate', 'entity_kg_removal', error);
|
|
2532
|
+
}
|
|
2533
|
+
}
|
|
2534
|
+
/**
|
|
2535
|
+
* Migrate from schema version 26 to version 27
|
|
2536
|
+
*
|
|
2537
|
+
* Changes in v27 (Hybrid Section-Aware Chunking - Phase 1):
|
|
2538
|
+
* - chunks.heading_context: Heading text providing context for the chunk
|
|
2539
|
+
* - chunks.heading_level: Heading level (1-6) of the section
|
|
2540
|
+
* - chunks.section_path: Full section path (e.g., "Introduction > Background")
|
|
2541
|
+
* - chunks.content_types: JSON array of content types in the chunk
|
|
2542
|
+
* - chunks.is_atomic: Whether chunk should not be split further (default 0)
|
|
2543
|
+
* - chunks.chunking_strategy: Strategy used to create the chunk (default 'hybrid_section')
|
|
2544
|
+
*
|
|
2545
|
+
* Uses ALTER TABLE ADD COLUMN (safe for nullable/defaulted columns, no table recreation needed).
|
|
2546
|
+
*
|
|
2547
|
+
* @param db - Database instance from better-sqlite3
|
|
2548
|
+
* @throws MigrationError if migration fails
|
|
2549
|
+
*/
|
|
2550
|
+
function migrateV26ToV27(db) {
|
|
2551
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
2552
|
+
// Check existing columns for idempotency (safe on retry after partial failure)
|
|
2553
|
+
const columns = db.prepare('PRAGMA table_info(chunks)').all();
|
|
2554
|
+
const columnNames = new Set(columns.map((c) => c.name));
|
|
2555
|
+
const transaction = db.transaction(() => {
|
|
2556
|
+
if (!columnNames.has('heading_context')) {
|
|
2557
|
+
db.exec('ALTER TABLE chunks ADD COLUMN heading_context TEXT');
|
|
2558
|
+
}
|
|
2559
|
+
if (!columnNames.has('heading_level')) {
|
|
2560
|
+
db.exec('ALTER TABLE chunks ADD COLUMN heading_level INTEGER');
|
|
2561
|
+
}
|
|
2562
|
+
if (!columnNames.has('section_path')) {
|
|
2563
|
+
db.exec('ALTER TABLE chunks ADD COLUMN section_path TEXT');
|
|
2564
|
+
}
|
|
2565
|
+
if (!columnNames.has('content_types')) {
|
|
2566
|
+
db.exec('ALTER TABLE chunks ADD COLUMN content_types TEXT');
|
|
2567
|
+
}
|
|
2568
|
+
if (!columnNames.has('is_atomic')) {
|
|
2569
|
+
db.exec('ALTER TABLE chunks ADD COLUMN is_atomic INTEGER NOT NULL DEFAULT 0');
|
|
2570
|
+
}
|
|
2571
|
+
if (!columnNames.has('chunking_strategy')) {
|
|
2572
|
+
db.exec("ALTER TABLE chunks ADD COLUMN chunking_strategy TEXT NOT NULL DEFAULT 'hybrid_section'");
|
|
2573
|
+
}
|
|
2574
|
+
// M-5: FK integrity check inside transaction so violations cause rollback
|
|
2575
|
+
const fkViolations = db.pragma('foreign_key_check');
|
|
2576
|
+
if (fkViolations.length > 0) {
|
|
2577
|
+
throw new Error(`Foreign key integrity check failed after v26->v27 migration: ${fkViolations.length} violation(s). ` +
|
|
2578
|
+
`First: ${JSON.stringify(fkViolations[0])}`);
|
|
2579
|
+
}
|
|
2580
|
+
});
|
|
2581
|
+
try {
|
|
2582
|
+
transaction();
|
|
2583
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
2584
|
+
console.error('[Migration] v26 -> v27: Added hybrid section-aware chunking columns to chunks table');
|
|
2585
|
+
}
|
|
2586
|
+
catch (error) {
|
|
2587
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
2588
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
2589
|
+
throw new MigrationError(`Failed to migrate from v26 to v27 (hybrid section-aware chunking columns): ${cause}`, 'migrate', 'chunks', error);
|
|
2590
|
+
}
|
|
2591
|
+
}
|
|
2592
|
+
/**
|
|
2593
|
+
* Migrate from schema version 27 to version 28
|
|
2594
|
+
*
|
|
2595
|
+
* Changes in v28:
|
|
2596
|
+
* - saved_searches: New table for persisting search results
|
|
2597
|
+
* - New indexes: idx_saved_searches_name, idx_saved_searches_search_type, idx_saved_searches_created
|
|
2598
|
+
*
|
|
2599
|
+
* @param db - Database instance from better-sqlite3
|
|
2600
|
+
* @throws MigrationError if migration fails
|
|
2601
|
+
*/
|
|
2602
|
+
function migrateV27ToV28(db) {
|
|
2603
|
+
console.error('[MIGRATION] Applying v27 → v28: Add saved_searches table');
|
|
2604
|
+
try {
|
|
2605
|
+
// L-5: Wrap CREATE TABLE + CREATE INDEX in a transaction for atomicity
|
|
2606
|
+
const transaction = db.transaction(() => {
|
|
2607
|
+
db.exec(`
|
|
2608
|
+
CREATE TABLE IF NOT EXISTS saved_searches (
|
|
2609
|
+
id TEXT PRIMARY KEY,
|
|
2610
|
+
name TEXT NOT NULL,
|
|
2611
|
+
query TEXT NOT NULL,
|
|
2612
|
+
search_type TEXT NOT NULL CHECK (search_type IN ('bm25', 'semantic', 'hybrid')),
|
|
2613
|
+
search_params TEXT NOT NULL DEFAULT '{}',
|
|
2614
|
+
result_count INTEGER NOT NULL,
|
|
2615
|
+
result_ids TEXT NOT NULL DEFAULT '[]',
|
|
2616
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
2617
|
+
notes TEXT
|
|
2618
|
+
)
|
|
2619
|
+
`);
|
|
2620
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_saved_searches_name ON saved_searches(name)');
|
|
2621
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_saved_searches_search_type ON saved_searches(search_type)');
|
|
2622
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_saved_searches_created ON saved_searches(created_at DESC)');
|
|
2623
|
+
});
|
|
2624
|
+
transaction();
|
|
2625
|
+
console.error('[MIGRATION] v28 migration complete: saved_searches table created');
|
|
2626
|
+
}
|
|
2627
|
+
catch (error) {
|
|
2628
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
2629
|
+
throw new MigrationError(`Failed to migrate from v27 to v28 (saved_searches table): ${cause}`, 'migrate', 'saved_searches', error);
|
|
2630
|
+
}
|
|
2631
|
+
}
|
|
2632
|
+
/**
|
|
2633
|
+
* Migrate from schema version 28 to version 29
|
|
2634
|
+
*
|
|
2635
|
+
* Changes in v29:
|
|
2636
|
+
* - tags: New table for user-defined tag labels
|
|
2637
|
+
* - entity_tags: New table for cross-entity tag assignments (document, chunk, image, extraction, cluster)
|
|
2638
|
+
* - New indexes: idx_entity_tags_entity, idx_entity_tags_tag
|
|
2639
|
+
*
|
|
2640
|
+
* @param db - Database instance from better-sqlite3
|
|
2641
|
+
* @throws MigrationError if migration fails
|
|
2642
|
+
*/
|
|
2643
|
+
function migrateV28ToV29(db) {
|
|
2644
|
+
console.error('[MIGRATION] Applying v28 → v29: Add tags and entity_tags tables');
|
|
2645
|
+
try {
|
|
2646
|
+
// L-5: Wrap CREATE TABLE + CREATE INDEX in a transaction for atomicity
|
|
2647
|
+
const transaction = db.transaction(() => {
|
|
2648
|
+
db.exec(CREATE_TAGS_TABLE);
|
|
2649
|
+
db.exec(CREATE_ENTITY_TAGS_TABLE);
|
|
2650
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entity_tags_entity ON entity_tags(entity_id, entity_type)');
|
|
2651
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_entity_tags_tag ON entity_tags(tag_id)');
|
|
2652
|
+
});
|
|
2653
|
+
transaction();
|
|
2654
|
+
console.error('[MIGRATION] v29 migration complete: tags and entity_tags tables created');
|
|
2655
|
+
}
|
|
2656
|
+
catch (error) {
|
|
2657
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
2658
|
+
throw new MigrationError(`Failed to migrate from v28 to v29 (tags tables): ${cause}`, 'migrate', 'tags', error);
|
|
2659
|
+
}
|
|
2660
|
+
}
|
|
2661
|
+
/**
|
|
2662
|
+
* Migrate from schema version 29 to version 30
|
|
2663
|
+
*
|
|
2664
|
+
* Changes in v30:
|
|
2665
|
+
* - documents_fts: FTS5 virtual table on doc_title, doc_author, doc_subject
|
|
2666
|
+
* - documents_fts triggers: insert, delete, update sync
|
|
2667
|
+
* - saved_searches: Add last_executed_at TEXT and execution_count INTEGER columns
|
|
2668
|
+
* - New indexes: idx_chunks_section_path, idx_chunks_heading_level
|
|
2669
|
+
*
|
|
2670
|
+
* @param db - Database instance from better-sqlite3
|
|
2671
|
+
* @throws MigrationError if migration fails
|
|
2672
|
+
*/
|
|
2673
|
+
function migrateV29ToV30(db) {
|
|
2674
|
+
console.error('[MIGRATION] Applying v29 → v30: Documents FTS5, saved search analytics, chunk indexes');
|
|
2675
|
+
try {
|
|
2676
|
+
// 1. Create documents_fts FTS5 virtual table
|
|
2677
|
+
// Note: FTS5 virtual table creation is outside the transaction because
|
|
2678
|
+
// virtual tables manage their own storage and may not support transactional DDL.
|
|
2679
|
+
db.exec(CREATE_DOCUMENTS_FTS_TABLE);
|
|
2680
|
+
// 2. Create sync triggers
|
|
2681
|
+
for (const trigger of CREATE_DOCUMENTS_FTS_TRIGGERS) {
|
|
2682
|
+
db.exec(trigger);
|
|
2683
|
+
}
|
|
2684
|
+
// L-5: Wrap the remaining DDL + FTS population in a transaction for atomicity.
|
|
2685
|
+
// The FTS delete-all + insert must be atomic to avoid an empty index on crash.
|
|
2686
|
+
const transaction = db.transaction(() => {
|
|
2687
|
+
// 3. Populate from existing data (clear first for crash-retry idempotency)
|
|
2688
|
+
db.exec("INSERT INTO documents_fts(documents_fts) VALUES('delete-all')");
|
|
2689
|
+
db.exec(`
|
|
2690
|
+
INSERT INTO documents_fts(rowid, doc_title, doc_author, doc_subject)
|
|
2691
|
+
SELECT rowid, COALESCE(doc_title, ''), COALESCE(doc_author, ''), COALESCE(doc_subject, '')
|
|
2692
|
+
FROM documents
|
|
2693
|
+
`);
|
|
2694
|
+
// 4. Add saved search analytics columns (idempotent: check column existence first)
|
|
2695
|
+
const ssColumns = db.prepare('PRAGMA table_info(saved_searches)').all();
|
|
2696
|
+
const ssColumnNames = new Set(ssColumns.map((c) => c.name));
|
|
2697
|
+
if (!ssColumnNames.has('last_executed_at')) {
|
|
2698
|
+
db.exec('ALTER TABLE saved_searches ADD COLUMN last_executed_at TEXT');
|
|
2699
|
+
}
|
|
2700
|
+
if (!ssColumnNames.has('execution_count')) {
|
|
2701
|
+
db.exec('ALTER TABLE saved_searches ADD COLUMN execution_count INTEGER DEFAULT 0');
|
|
2702
|
+
}
|
|
2703
|
+
// 5. Create chunk performance indexes
|
|
2704
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_chunks_section_path ON chunks(section_path)');
|
|
2705
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_chunks_heading_level ON chunks(heading_level)');
|
|
2706
|
+
});
|
|
2707
|
+
transaction();
|
|
2708
|
+
console.error('[MIGRATION] v30 migration complete: documents_fts, saved search analytics, chunk indexes');
|
|
2709
|
+
}
|
|
2710
|
+
catch (error) {
|
|
2711
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
2712
|
+
throw new MigrationError(`Failed to migrate from v29 to v30 (documents FTS, saved search analytics): ${cause}`, 'migrate', 'documents_fts', error);
|
|
2713
|
+
}
|
|
2714
|
+
}
|
|
2715
|
+
/**
|
|
2716
|
+
* Migration v30 → v31: Document metadata indexes, VLM text enrichment
|
|
2717
|
+
*
|
|
2718
|
+
* Changes:
|
|
2719
|
+
* - New indexes: idx_documents_doc_author, idx_documents_doc_subject
|
|
2720
|
+
* - Backfills VLM extracted text into embeddings for FTS searchability
|
|
2721
|
+
*
|
|
2722
|
+
* M-6: bumpVersion is called inside the transaction so migration body and
|
|
2723
|
+
* version bump are atomic. If the process crashes, both roll back together.
|
|
2724
|
+
*
|
|
2725
|
+
* @param db - Database instance from better-sqlite3
|
|
2726
|
+
* @param bumpVersion - Callback to bump schema version (called inside transaction)
|
|
2727
|
+
* @throws MigrationError if migration fails
|
|
2728
|
+
*/
|
|
2729
|
+
function migrateV30ToV31(db, bumpVersion) {
|
|
2730
|
+
console.error('[MIGRATION] Applying v30 → v31: document metadata indexes, VLM text enrichment');
|
|
2731
|
+
try {
|
|
2732
|
+
// M-6 / H-3: Wrap entire migration body + bumpVersion in a single transaction
|
|
2733
|
+
// so the UPDATE and version bump are atomic. If the process crashes between
|
|
2734
|
+
// them, both roll back and the migration re-runs cleanly on restart.
|
|
2735
|
+
const transaction = db.transaction(() => {
|
|
2736
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_documents_doc_author ON documents(doc_author)');
|
|
2737
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_documents_doc_subject ON documents(doc_subject)');
|
|
2738
|
+
// T2.10: Backfill VLM extracted text into embeddings for FTS searchability
|
|
2739
|
+
// Appends extracted text from vlm_structured_data to the embedding's original_text
|
|
2740
|
+
// so it enters the vlm_fts index automatically via existing triggers.
|
|
2741
|
+
//
|
|
2742
|
+
// H-3: Only update rows where GROUP_CONCAT produces a non-empty result.
|
|
2743
|
+
// Uses a subquery that returns NULL (not empty string) when no text found,
|
|
2744
|
+
// so the outer WHERE filters them out. No trailing space is appended.
|
|
2745
|
+
// L-12: Checks json_type(...) = 'array' before json_each() to avoid iterating
|
|
2746
|
+
// characters of a string or crashing on non-array $.extractedText values.
|
|
2747
|
+
db.exec(`
|
|
2748
|
+
UPDATE embeddings SET original_text = original_text || ' ' || (
|
|
2749
|
+
SELECT GROUP_CONCAT(value, ' ')
|
|
2750
|
+
FROM images i, json_each(json_extract(i.vlm_structured_data, '$.extractedText'))
|
|
2751
|
+
WHERE i.id = embeddings.image_id
|
|
2752
|
+
AND i.vlm_structured_data IS NOT NULL
|
|
2753
|
+
AND json_valid(i.vlm_structured_data)
|
|
2754
|
+
AND json_extract(i.vlm_structured_data, '$.extractedText') IS NOT NULL
|
|
2755
|
+
AND json_type(json_extract(i.vlm_structured_data, '$.extractedText')) = 'array'
|
|
2756
|
+
)
|
|
2757
|
+
WHERE embeddings.image_id IS NOT NULL
|
|
2758
|
+
AND EXISTS (
|
|
2759
|
+
SELECT 1 FROM images i
|
|
2760
|
+
WHERE i.id = embeddings.image_id
|
|
2761
|
+
AND i.vlm_structured_data IS NOT NULL
|
|
2762
|
+
AND json_valid(i.vlm_structured_data)
|
|
2763
|
+
AND json_extract(i.vlm_structured_data, '$.extractedText') IS NOT NULL
|
|
2764
|
+
AND json_type(json_extract(i.vlm_structured_data, '$.extractedText')) = 'array'
|
|
2765
|
+
)
|
|
2766
|
+
AND (
|
|
2767
|
+
SELECT GROUP_CONCAT(value, ' ')
|
|
2768
|
+
FROM images i, json_each(json_extract(i.vlm_structured_data, '$.extractedText'))
|
|
2769
|
+
WHERE i.id = embeddings.image_id
|
|
2770
|
+
AND i.vlm_structured_data IS NOT NULL
|
|
2771
|
+
AND json_valid(i.vlm_structured_data)
|
|
2772
|
+
AND json_type(json_extract(i.vlm_structured_data, '$.extractedText')) = 'array'
|
|
2773
|
+
) IS NOT NULL
|
|
2774
|
+
`);
|
|
2775
|
+
// Rebuild VLM FTS index to pick up the updated text.
|
|
2776
|
+
// H-4: Check table existence first. If vlm_fts doesn't exist yet (fresh DB
|
|
2777
|
+
// still running through early migrations), skip cleanly. Any OTHER error
|
|
2778
|
+
// (corruption, SQL error) must propagate and fail the migration.
|
|
2779
|
+
const vlmFtsExists = db
|
|
2780
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='vlm_fts'")
|
|
2781
|
+
.get();
|
|
2782
|
+
if (vlmFtsExists) {
|
|
2783
|
+
// Use delete-all + selective re-insert (NOT 'rebuild') because FTS5
|
|
2784
|
+
// external content 'rebuild' reads ALL rows from embeddings table,
|
|
2785
|
+
// including chunk embeddings (image_id IS NULL), creating ghost VLM results.
|
|
2786
|
+
db.exec("INSERT INTO vlm_fts(vlm_fts) VALUES('delete-all')");
|
|
2787
|
+
db.exec(`
|
|
2788
|
+
INSERT INTO vlm_fts(rowid, original_text)
|
|
2789
|
+
SELECT rowid, original_text FROM embeddings WHERE image_id IS NOT NULL
|
|
2790
|
+
`);
|
|
2791
|
+
console.error('[MIGRATION] VLM FTS index rebuilt with extracted text');
|
|
2792
|
+
}
|
|
2793
|
+
else {
|
|
2794
|
+
console.error('[MIGRATION] VLM FTS table does not exist yet, skipping rebuild');
|
|
2795
|
+
}
|
|
2796
|
+
// M-6: Bump version inside the transaction so it's atomic with the body
|
|
2797
|
+
bumpVersion(31);
|
|
2798
|
+
});
|
|
2799
|
+
transaction();
|
|
2800
|
+
console.error('[MIGRATION] v31 migration complete: indexes + VLM text enrichment');
|
|
2801
|
+
}
|
|
2802
|
+
catch (error) {
|
|
2803
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
2804
|
+
throw new MigrationError(`Failed to migrate v30 to v31: ${cause}`, 'migrate', 'document_indexes', error);
|
|
2805
|
+
}
|
|
2806
|
+
}
|
|
2807
|
+
/**
|
|
2808
|
+
* Migration v31 → v32: Multi-user, collaboration, workflow, CLM, and webhook tables
|
|
2809
|
+
*
|
|
2810
|
+
* Changes:
|
|
2811
|
+
* - 10 new tables: users, audit_log, annotations, document_locks, workflow_states,
|
|
2812
|
+
* approval_chains, approval_steps, obligations, playbooks, webhooks
|
|
2813
|
+
* - provenance: 4 new columns (user_id, agent_id, agent_metadata_json, chain_hash)
|
|
2814
|
+
* - saved_searches: 4 new columns (user_id, is_shared, alert_enabled, last_alert_at)
|
|
2815
|
+
* - 23 new indexes across all new tables
|
|
2816
|
+
*
|
|
2817
|
+
* @param db - Database instance from better-sqlite3
|
|
2818
|
+
* @throws MigrationError if migration fails
|
|
2819
|
+
*/
|
|
2820
|
+
function migrateV31ToV32(db) {
|
|
2821
|
+
console.error('[MIGRATION] Applying v31 → v32: multi-user, collaboration, workflow, CLM, webhooks');
|
|
2822
|
+
try {
|
|
2823
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
2824
|
+
const transaction = db.transaction(() => {
|
|
2825
|
+
// Step 1: Create all 10 new tables (users first, since others reference it)
|
|
2826
|
+
db.exec(CREATE_USERS_TABLE);
|
|
2827
|
+
db.exec(CREATE_AUDIT_LOG_TABLE);
|
|
2828
|
+
db.exec(CREATE_ANNOTATIONS_TABLE);
|
|
2829
|
+
db.exec(CREATE_DOCUMENT_LOCKS_TABLE);
|
|
2830
|
+
db.exec(CREATE_WORKFLOW_STATES_TABLE);
|
|
2831
|
+
db.exec(CREATE_APPROVAL_CHAINS_TABLE);
|
|
2832
|
+
db.exec(CREATE_APPROVAL_STEPS_TABLE);
|
|
2833
|
+
db.exec(CREATE_OBLIGATIONS_TABLE);
|
|
2834
|
+
db.exec(CREATE_PLAYBOOKS_TABLE);
|
|
2835
|
+
db.exec(CREATE_WEBHOOKS_TABLE);
|
|
2836
|
+
// Step 2: Add new columns to provenance table (idempotent via PRAGMA table_info check)
|
|
2837
|
+
const provColumns = db.prepare('PRAGMA table_info(provenance)').all();
|
|
2838
|
+
const provColumnNames = new Set(provColumns.map((c) => c.name));
|
|
2839
|
+
if (!provColumnNames.has('user_id')) {
|
|
2840
|
+
db.exec('ALTER TABLE provenance ADD COLUMN user_id TEXT');
|
|
2841
|
+
}
|
|
2842
|
+
if (!provColumnNames.has('agent_id')) {
|
|
2843
|
+
db.exec('ALTER TABLE provenance ADD COLUMN agent_id TEXT');
|
|
2844
|
+
}
|
|
2845
|
+
if (!provColumnNames.has('agent_metadata_json')) {
|
|
2846
|
+
db.exec('ALTER TABLE provenance ADD COLUMN agent_metadata_json TEXT');
|
|
2847
|
+
}
|
|
2848
|
+
if (!provColumnNames.has('chain_hash')) {
|
|
2849
|
+
db.exec('ALTER TABLE provenance ADD COLUMN chain_hash TEXT');
|
|
2850
|
+
}
|
|
2851
|
+
// Step 3: Add new columns to saved_searches table (idempotent via PRAGMA table_info check)
|
|
2852
|
+
const ssColumns = db.prepare('PRAGMA table_info(saved_searches)').all();
|
|
2853
|
+
const ssColumnNames = new Set(ssColumns.map((c) => c.name));
|
|
2854
|
+
if (!ssColumnNames.has('user_id')) {
|
|
2855
|
+
db.exec('ALTER TABLE saved_searches ADD COLUMN user_id TEXT');
|
|
2856
|
+
}
|
|
2857
|
+
if (!ssColumnNames.has('is_shared')) {
|
|
2858
|
+
db.exec('ALTER TABLE saved_searches ADD COLUMN is_shared INTEGER DEFAULT 0');
|
|
2859
|
+
}
|
|
2860
|
+
if (!ssColumnNames.has('alert_enabled')) {
|
|
2861
|
+
db.exec('ALTER TABLE saved_searches ADD COLUMN alert_enabled INTEGER DEFAULT 0');
|
|
2862
|
+
}
|
|
2863
|
+
if (!ssColumnNames.has('last_alert_at')) {
|
|
2864
|
+
db.exec('ALTER TABLE saved_searches ADD COLUMN last_alert_at TEXT');
|
|
2865
|
+
}
|
|
2866
|
+
// Step 4: Create all new indexes
|
|
2867
|
+
// Users indexes
|
|
2868
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_users_external_id ON users(external_id)');
|
|
2869
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_users_role ON users(role)');
|
|
2870
|
+
// Audit log indexes
|
|
2871
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_audit_log_user ON audit_log(user_id)');
|
|
2872
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_audit_log_action ON audit_log(action)');
|
|
2873
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_audit_log_entity ON audit_log(entity_type, entity_id)');
|
|
2874
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_audit_log_created ON audit_log(created_at)');
|
|
2875
|
+
// Annotations indexes
|
|
2876
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_annotations_document ON annotations(document_id)');
|
|
2877
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_annotations_chunk ON annotations(chunk_id)');
|
|
2878
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_annotations_user ON annotations(user_id)');
|
|
2879
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_annotations_type ON annotations(annotation_type)');
|
|
2880
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_annotations_status ON annotations(status)');
|
|
2881
|
+
// Workflow states indexes
|
|
2882
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_workflow_document ON workflow_states(document_id)');
|
|
2883
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_workflow_state ON workflow_states(state)');
|
|
2884
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_workflow_assigned ON workflow_states(assigned_to)');
|
|
2885
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_workflow_due ON workflow_states(due_date)');
|
|
2886
|
+
// Approval steps indexes
|
|
2887
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_approval_steps_doc ON approval_steps(document_id)');
|
|
2888
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_approval_steps_status ON approval_steps(status)');
|
|
2889
|
+
// Obligations indexes
|
|
2890
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_obligations_document ON obligations(document_id)');
|
|
2891
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_obligations_type ON obligations(obligation_type)');
|
|
2892
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_obligations_due ON obligations(due_date)');
|
|
2893
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_obligations_status ON obligations(status)');
|
|
2894
|
+
});
|
|
2895
|
+
transaction();
|
|
2896
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
2897
|
+
console.error('[MIGRATION] v32 migration complete: 10 new tables, provenance + saved_searches columns, 23 indexes');
|
|
2898
|
+
}
|
|
2899
|
+
catch (error) {
|
|
2900
|
+
try {
|
|
2901
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
2902
|
+
}
|
|
2903
|
+
catch (fkErr) {
|
|
2904
|
+
console.error('[migrations] Failed to restore foreign_keys pragma:', fkErr instanceof Error ? fkErr.message : String(fkErr));
|
|
2905
|
+
}
|
|
2906
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
2907
|
+
throw new MigrationError(`Failed to migrate v31 to v32 (multi-user, collaboration, workflow, CLM, webhooks): ${cause}`, 'migrate', 'users', error);
|
|
2908
|
+
}
|
|
2909
|
+
}
|
|
2910
|
+
//# sourceMappingURL=operations.js.map
|