ocr-provenance-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocr-provenance-mcp might be problematic. Click here for more details.
- package/.env.example +55 -0
- package/LICENSE +78 -0
- package/README.md +1154 -0
- package/dist/bin-http.d.ts +24 -0
- package/dist/bin-http.d.ts.map +1 -0
- package/dist/bin-http.js +275 -0
- package/dist/bin-http.js.map +1 -0
- package/dist/bin-setup.d.ts +11 -0
- package/dist/bin-setup.d.ts.map +1 -0
- package/dist/bin-setup.js +610 -0
- package/dist/bin-setup.js.map +1 -0
- package/dist/bin.d.ts +16 -0
- package/dist/bin.d.ts.map +1 -0
- package/dist/bin.js +16 -0
- package/dist/bin.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +90 -0
- package/dist/index.js.map +1 -0
- package/dist/models/chunk.d.ts +136 -0
- package/dist/models/chunk.d.ts.map +1 -0
- package/dist/models/chunk.js +27 -0
- package/dist/models/chunk.js.map +1 -0
- package/dist/models/cluster.d.ts +79 -0
- package/dist/models/cluster.d.ts.map +1 -0
- package/dist/models/cluster.js +10 -0
- package/dist/models/cluster.js.map +1 -0
- package/dist/models/comparison.d.ts +62 -0
- package/dist/models/comparison.d.ts.map +1 -0
- package/dist/models/comparison.js +8 -0
- package/dist/models/comparison.js.map +1 -0
- package/dist/models/document.d.ts +104 -0
- package/dist/models/document.d.ts.map +1 -0
- package/dist/models/document.js +15 -0
- package/dist/models/document.js.map +1 -0
- package/dist/models/embedding.d.ts +87 -0
- package/dist/models/embedding.d.ts.map +1 -0
- package/dist/models/embedding.js +23 -0
- package/dist/models/embedding.js.map +1 -0
- package/dist/models/extraction.d.ts +15 -0
- package/dist/models/extraction.d.ts.map +1 -0
- package/dist/models/extraction.js +2 -0
- package/dist/models/extraction.js.map +1 -0
- package/dist/models/form-fill.d.ts +23 -0
- package/dist/models/form-fill.d.ts.map +1 -0
- package/dist/models/form-fill.js +2 -0
- package/dist/models/form-fill.js.map +1 -0
- package/dist/models/image.d.ts +177 -0
- package/dist/models/image.d.ts.map +1 -0
- package/dist/models/image.js +8 -0
- package/dist/models/image.js.map +1 -0
- package/dist/models/index.d.ts +14 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +22 -0
- package/dist/models/index.js.map +1 -0
- package/dist/models/provenance.d.ts +174 -0
- package/dist/models/provenance.d.ts.map +1 -0
- package/dist/models/provenance.js +53 -0
- package/dist/models/provenance.js.map +1 -0
- package/dist/models/uploaded-file.d.ts +20 -0
- package/dist/models/uploaded-file.d.ts.map +1 -0
- package/dist/models/uploaded-file.js +2 -0
- package/dist/models/uploaded-file.js.map +1 -0
- package/dist/server/errors.d.ts +93 -0
- package/dist/server/errors.d.ts.map +1 -0
- package/dist/server/errors.js +256 -0
- package/dist/server/errors.js.map +1 -0
- package/dist/server/events.d.ts +36 -0
- package/dist/server/events.d.ts.map +1 -0
- package/dist/server/events.js +48 -0
- package/dist/server/events.js.map +1 -0
- package/dist/server/permissions.d.ts +26 -0
- package/dist/server/permissions.d.ts.map +1 -0
- package/dist/server/permissions.js +194 -0
- package/dist/server/permissions.js.map +1 -0
- package/dist/server/register-tools.d.ts +25 -0
- package/dist/server/register-tools.d.ts.map +1 -0
- package/dist/server/register-tools.js +102 -0
- package/dist/server/register-tools.js.map +1 -0
- package/dist/server/startup.d.ts +16 -0
- package/dist/server/startup.d.ts.map +1 -0
- package/dist/server/startup.js +37 -0
- package/dist/server/startup.js.map +1 -0
- package/dist/server/state.d.ts +166 -0
- package/dist/server/state.d.ts.map +1 -0
- package/dist/server/state.js +424 -0
- package/dist/server/state.js.map +1 -0
- package/dist/server/transports/http-transport.d.ts +37 -0
- package/dist/server/transports/http-transport.d.ts.map +1 -0
- package/dist/server/transports/http-transport.js +204 -0
- package/dist/server/transports/http-transport.js.map +1 -0
- package/dist/server/transports/index.d.ts +9 -0
- package/dist/server/transports/index.d.ts.map +1 -0
- package/dist/server/transports/index.js +9 -0
- package/dist/server/transports/index.js.map +1 -0
- package/dist/server/transports/session-manager.d.ts +40 -0
- package/dist/server/transports/session-manager.d.ts.map +1 -0
- package/dist/server/transports/session-manager.js +74 -0
- package/dist/server/transports/session-manager.js.map +1 -0
- package/dist/server/types.d.ts +82 -0
- package/dist/server/types.d.ts.map +1 -0
- package/dist/server/types.js +14 -0
- package/dist/server/types.js.map +1 -0
- package/dist/services/audit.d.ts +26 -0
- package/dist/services/audit.d.ts.map +1 -0
- package/dist/services/audit.js +43 -0
- package/dist/services/audit.js.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.js +46 -0
- package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
- package/dist/services/chunking/chunk-merger.d.ts +26 -0
- package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
- package/dist/services/chunking/chunk-merger.js +94 -0
- package/dist/services/chunking/chunk-merger.js.map +1 -0
- package/dist/services/chunking/chunker.d.ts +62 -0
- package/dist/services/chunking/chunker.d.ts.map +1 -0
- package/dist/services/chunking/chunker.js +566 -0
- package/dist/services/chunking/chunker.js.map +1 -0
- package/dist/services/chunking/heading-normalizer.d.ts +33 -0
- package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/heading-normalizer.js +101 -0
- package/dist/services/chunking/heading-normalizer.js.map +1 -0
- package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
- package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
- package/dist/services/chunking/json-block-analyzer.js +1033 -0
- package/dist/services/chunking/json-block-analyzer.js.map +1 -0
- package/dist/services/chunking/markdown-parser.d.ts +75 -0
- package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
- package/dist/services/chunking/markdown-parser.js +428 -0
- package/dist/services/chunking/markdown-parser.js.map +1 -0
- package/dist/services/chunking/text-normalizer.d.ts +20 -0
- package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/text-normalizer.js +36 -0
- package/dist/services/chunking/text-normalizer.js.map +1 -0
- package/dist/services/clm/contract-schemas.d.ts +36 -0
- package/dist/services/clm/contract-schemas.d.ts.map +1 -0
- package/dist/services/clm/contract-schemas.js +92 -0
- package/dist/services/clm/contract-schemas.js.map +1 -0
- package/dist/services/clm/summarization.d.ts +46 -0
- package/dist/services/clm/summarization.d.ts.map +1 -0
- package/dist/services/clm/summarization.js +61 -0
- package/dist/services/clm/summarization.js.map +1 -0
- package/dist/services/clustering/clustering-service.d.ts +58 -0
- package/dist/services/clustering/clustering-service.d.ts.map +1 -0
- package/dist/services/clustering/clustering-service.js +467 -0
- package/dist/services/clustering/clustering-service.js.map +1 -0
- package/dist/services/comparison/diff-service.d.ts +41 -0
- package/dist/services/comparison/diff-service.d.ts.map +1 -0
- package/dist/services/comparison/diff-service.js +120 -0
- package/dist/services/comparison/diff-service.js.map +1 -0
- package/dist/services/embedding/embedder.d.ts +55 -0
- package/dist/services/embedding/embedder.d.ts.map +1 -0
- package/dist/services/embedding/embedder.js +202 -0
- package/dist/services/embedding/embedder.js.map +1 -0
- package/dist/services/embedding/nomic.d.ts +67 -0
- package/dist/services/embedding/nomic.d.ts.map +1 -0
- package/dist/services/embedding/nomic.js +280 -0
- package/dist/services/embedding/nomic.js.map +1 -0
- package/dist/services/gemini/circuit-breaker.d.ts +106 -0
- package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
- package/dist/services/gemini/circuit-breaker.js +237 -0
- package/dist/services/gemini/circuit-breaker.js.map +1 -0
- package/dist/services/gemini/client.d.ts +173 -0
- package/dist/services/gemini/client.d.ts.map +1 -0
- package/dist/services/gemini/client.js +483 -0
- package/dist/services/gemini/client.js.map +1 -0
- package/dist/services/gemini/config.d.ts +116 -0
- package/dist/services/gemini/config.d.ts.map +1 -0
- package/dist/services/gemini/config.js +118 -0
- package/dist/services/gemini/config.js.map +1 -0
- package/dist/services/gemini/index.d.ts +9 -0
- package/dist/services/gemini/index.d.ts.map +1 -0
- package/dist/services/gemini/index.js +13 -0
- package/dist/services/gemini/index.js.map +1 -0
- package/dist/services/gemini/rate-limiter.d.ts +62 -0
- package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
- package/dist/services/gemini/rate-limiter.js +120 -0
- package/dist/services/gemini/rate-limiter.js.map +1 -0
- package/dist/services/images/extractor.d.ts +88 -0
- package/dist/services/images/extractor.d.ts.map +1 -0
- package/dist/services/images/extractor.js +340 -0
- package/dist/services/images/extractor.js.map +1 -0
- package/dist/services/images/optimizer.d.ts +130 -0
- package/dist/services/images/optimizer.d.ts.map +1 -0
- package/dist/services/images/optimizer.js +228 -0
- package/dist/services/images/optimizer.js.map +1 -0
- package/dist/services/ocr/datalab.d.ts +64 -0
- package/dist/services/ocr/datalab.d.ts.map +1 -0
- package/dist/services/ocr/datalab.js +425 -0
- package/dist/services/ocr/datalab.js.map +1 -0
- package/dist/services/ocr/errors.d.ts +38 -0
- package/dist/services/ocr/errors.d.ts.map +1 -0
- package/dist/services/ocr/errors.js +83 -0
- package/dist/services/ocr/errors.js.map +1 -0
- package/dist/services/ocr/file-manager.d.ts +76 -0
- package/dist/services/ocr/file-manager.d.ts.map +1 -0
- package/dist/services/ocr/file-manager.js +238 -0
- package/dist/services/ocr/file-manager.js.map +1 -0
- package/dist/services/ocr/form-fill.d.ts +48 -0
- package/dist/services/ocr/form-fill.d.ts.map +1 -0
- package/dist/services/ocr/form-fill.js +213 -0
- package/dist/services/ocr/form-fill.js.map +1 -0
- package/dist/services/ocr/processor.d.ts +95 -0
- package/dist/services/ocr/processor.d.ts.map +1 -0
- package/dist/services/ocr/processor.js +259 -0
- package/dist/services/ocr/processor.js.map +1 -0
- package/dist/services/provenance/agent-metadata.d.ts +82 -0
- package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
- package/dist/services/provenance/agent-metadata.js +106 -0
- package/dist/services/provenance/agent-metadata.js.map +1 -0
- package/dist/services/provenance/chain-hash.d.ts +57 -0
- package/dist/services/provenance/chain-hash.d.ts.map +1 -0
- package/dist/services/provenance/chain-hash.js +131 -0
- package/dist/services/provenance/chain-hash.js.map +1 -0
- package/dist/services/provenance/exporter.d.ts +202 -0
- package/dist/services/provenance/exporter.d.ts.map +1 -0
- package/dist/services/provenance/exporter.js +457 -0
- package/dist/services/provenance/exporter.js.map +1 -0
- package/dist/services/provenance/index.d.ts +15 -0
- package/dist/services/provenance/index.d.ts.map +1 -0
- package/dist/services/provenance/index.js +17 -0
- package/dist/services/provenance/index.js.map +1 -0
- package/dist/services/provenance/tracker.d.ts +138 -0
- package/dist/services/provenance/tracker.d.ts.map +1 -0
- package/dist/services/provenance/tracker.js +293 -0
- package/dist/services/provenance/tracker.js.map +1 -0
- package/dist/services/provenance/verifier.d.ts +153 -0
- package/dist/services/provenance/verifier.d.ts.map +1 -0
- package/dist/services/provenance/verifier.js +536 -0
- package/dist/services/provenance/verifier.js.map +1 -0
- package/dist/services/python-pool.d.ts +70 -0
- package/dist/services/python-pool.d.ts.map +1 -0
- package/dist/services/python-pool.js +265 -0
- package/dist/services/python-pool.js.map +1 -0
- package/dist/services/search/bm25.d.ts +180 -0
- package/dist/services/search/bm25.d.ts.map +1 -0
- package/dist/services/search/bm25.js +656 -0
- package/dist/services/search/bm25.js.map +1 -0
- package/dist/services/search/fusion.d.ts +103 -0
- package/dist/services/search/fusion.d.ts.map +1 -0
- package/dist/services/search/fusion.js +122 -0
- package/dist/services/search/fusion.js.map +1 -0
- package/dist/services/search/local-reranker.d.ts +30 -0
- package/dist/services/search/local-reranker.d.ts.map +1 -0
- package/dist/services/search/local-reranker.js +123 -0
- package/dist/services/search/local-reranker.js.map +1 -0
- package/dist/services/search/quality.d.ts +11 -0
- package/dist/services/search/quality.d.ts.map +1 -0
- package/dist/services/search/quality.js +17 -0
- package/dist/services/search/quality.js.map +1 -0
- package/dist/services/search/query-classifier.d.ts +34 -0
- package/dist/services/search/query-classifier.d.ts.map +1 -0
- package/dist/services/search/query-classifier.js +114 -0
- package/dist/services/search/query-classifier.js.map +1 -0
- package/dist/services/search/query-expander.d.ts +73 -0
- package/dist/services/search/query-expander.d.ts.map +1 -0
- package/dist/services/search/query-expander.js +281 -0
- package/dist/services/search/query-expander.js.map +1 -0
- package/dist/services/search/reranker.d.ts +44 -0
- package/dist/services/search/reranker.d.ts.map +1 -0
- package/dist/services/search/reranker.js +101 -0
- package/dist/services/search/reranker.js.map +1 -0
- package/dist/services/storage/database/annotation-operations.d.ts +113 -0
- package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/annotation-operations.js +177 -0
- package/dist/services/storage/database/annotation-operations.js.map +1 -0
- package/dist/services/storage/database/approval-operations.d.ts +132 -0
- package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
- package/dist/services/storage/database/approval-operations.js +206 -0
- package/dist/services/storage/database/approval-operations.js.map +1 -0
- package/dist/services/storage/database/chunk-operations.d.ts +132 -0
- package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
- package/dist/services/storage/database/chunk-operations.js +306 -0
- package/dist/services/storage/database/chunk-operations.js.map +1 -0
- package/dist/services/storage/database/cluster-operations.d.ts +97 -0
- package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
- package/dist/services/storage/database/cluster-operations.js +258 -0
- package/dist/services/storage/database/cluster-operations.js.map +1 -0
- package/dist/services/storage/database/comparison-operations.d.ts +41 -0
- package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
- package/dist/services/storage/database/comparison-operations.js +65 -0
- package/dist/services/storage/database/comparison-operations.js.map +1 -0
- package/dist/services/storage/database/converters.d.ts +36 -0
- package/dist/services/storage/database/converters.d.ts.map +1 -0
- package/dist/services/storage/database/converters.js +244 -0
- package/dist/services/storage/database/converters.js.map +1 -0
- package/dist/services/storage/database/document-operations.d.ts +145 -0
- package/dist/services/storage/database/document-operations.d.ts.map +1 -0
- package/dist/services/storage/database/document-operations.js +498 -0
- package/dist/services/storage/database/document-operations.js.map +1 -0
- package/dist/services/storage/database/embedding-operations.d.ts +130 -0
- package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
- package/dist/services/storage/database/embedding-operations.js +315 -0
- package/dist/services/storage/database/embedding-operations.js.map +1 -0
- package/dist/services/storage/database/extraction-operations.d.ts +47 -0
- package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
- package/dist/services/storage/database/extraction-operations.js +85 -0
- package/dist/services/storage/database/extraction-operations.js.map +1 -0
- package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
- package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
- package/dist/services/storage/database/form-fill-operations.js +116 -0
- package/dist/services/storage/database/form-fill-operations.js.map +1 -0
- package/dist/services/storage/database/helpers.d.ts +29 -0
- package/dist/services/storage/database/helpers.d.ts.map +1 -0
- package/dist/services/storage/database/helpers.js +55 -0
- package/dist/services/storage/database/helpers.js.map +1 -0
- package/dist/services/storage/database/image-operations.d.ts +202 -0
- package/dist/services/storage/database/image-operations.d.ts.map +1 -0
- package/dist/services/storage/database/image-operations.js +484 -0
- package/dist/services/storage/database/image-operations.js.map +1 -0
- package/dist/services/storage/database/index.d.ts +13 -0
- package/dist/services/storage/database/index.d.ts.map +1 -0
- package/dist/services/storage/database/index.js +16 -0
- package/dist/services/storage/database/index.js.map +1 -0
- package/dist/services/storage/database/lock-operations.d.ts +59 -0
- package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
- package/dist/services/storage/database/lock-operations.js +89 -0
- package/dist/services/storage/database/lock-operations.js.map +1 -0
- package/dist/services/storage/database/obligation-operations.d.ts +88 -0
- package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/obligation-operations.js +206 -0
- package/dist/services/storage/database/obligation-operations.js.map +1 -0
- package/dist/services/storage/database/ocr-operations.d.ts +33 -0
- package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
- package/dist/services/storage/database/ocr-operations.js +70 -0
- package/dist/services/storage/database/ocr-operations.js.map +1 -0
- package/dist/services/storage/database/playbook-operations.d.ts +72 -0
- package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
- package/dist/services/storage/database/playbook-operations.js +247 -0
- package/dist/services/storage/database/playbook-operations.js.map +1 -0
- package/dist/services/storage/database/provenance-operations.d.ts +112 -0
- package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
- package/dist/services/storage/database/provenance-operations.js +251 -0
- package/dist/services/storage/database/provenance-operations.js.map +1 -0
- package/dist/services/storage/database/service.d.ts +142 -0
- package/dist/services/storage/database/service.d.ts.map +1 -0
- package/dist/services/storage/database/service.js +310 -0
- package/dist/services/storage/database/service.js.map +1 -0
- package/dist/services/storage/database/static-operations.d.ts +30 -0
- package/dist/services/storage/database/static-operations.d.ts.map +1 -0
- package/dist/services/storage/database/static-operations.js +218 -0
- package/dist/services/storage/database/static-operations.js.map +1 -0
- package/dist/services/storage/database/stats-operations.d.ts +101 -0
- package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
- package/dist/services/storage/database/stats-operations.js +394 -0
- package/dist/services/storage/database/stats-operations.js.map +1 -0
- package/dist/services/storage/database/tag-operations.d.ts +76 -0
- package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
- package/dist/services/storage/database/tag-operations.js +178 -0
- package/dist/services/storage/database/tag-operations.js.map +1 -0
- package/dist/services/storage/database/types.d.ts +286 -0
- package/dist/services/storage/database/types.d.ts.map +1 -0
- package/dist/services/storage/database/types.js +39 -0
- package/dist/services/storage/database/types.js.map +1 -0
- package/dist/services/storage/database/upload-operations.d.ts +71 -0
- package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
- package/dist/services/storage/database/upload-operations.js +124 -0
- package/dist/services/storage/database/upload-operations.js.map +1 -0
- package/dist/services/storage/database/user-operations.d.ts +102 -0
- package/dist/services/storage/database/user-operations.d.ts.map +1 -0
- package/dist/services/storage/database/user-operations.js +151 -0
- package/dist/services/storage/database/user-operations.js.map +1 -0
- package/dist/services/storage/database/workflow-operations.d.ts +98 -0
- package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
- package/dist/services/storage/database/workflow-operations.js +157 -0
- package/dist/services/storage/database/workflow-operations.js.map +1 -0
- package/dist/services/storage/database.d.ts +16 -0
- package/dist/services/storage/database.d.ts.map +1 -0
- package/dist/services/storage/database.js +15 -0
- package/dist/services/storage/database.js.map +1 -0
- package/dist/services/storage/index.d.ts +10 -0
- package/dist/services/storage/index.d.ts.map +1 -0
- package/dist/services/storage/index.js +10 -0
- package/dist/services/storage/index.js.map +1 -0
- package/dist/services/storage/migrations/index.d.ts +16 -0
- package/dist/services/storage/migrations/index.d.ts.map +1 -0
- package/dist/services/storage/migrations/index.js +20 -0
- package/dist/services/storage/migrations/index.js.map +1 -0
- package/dist/services/storage/migrations/operations.d.ts +40 -0
- package/dist/services/storage/migrations/operations.d.ts.map +1 -0
- package/dist/services/storage/migrations/operations.js +2910 -0
- package/dist/services/storage/migrations/operations.js.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.js +1006 -0
- package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.js +176 -0
- package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
- package/dist/services/storage/migrations/types.d.ts +15 -0
- package/dist/services/storage/migrations/types.d.ts.map +1 -0
- package/dist/services/storage/migrations/types.js +21 -0
- package/dist/services/storage/migrations/types.js.map +1 -0
- package/dist/services/storage/migrations/verification.d.ts +20 -0
- package/dist/services/storage/migrations/verification.d.ts.map +1 -0
- package/dist/services/storage/migrations/verification.js +78 -0
- package/dist/services/storage/migrations/verification.js.map +1 -0
- package/dist/services/storage/migrations.d.ts +16 -0
- package/dist/services/storage/migrations.d.ts.map +1 -0
- package/dist/services/storage/migrations.js +17 -0
- package/dist/services/storage/migrations.js.map +1 -0
- package/dist/services/storage/types.d.ts +12 -0
- package/dist/services/storage/types.d.ts.map +1 -0
- package/dist/services/storage/types.js +5 -0
- package/dist/services/storage/types.js.map +1 -0
- package/dist/services/storage/vector.d.ts +208 -0
- package/dist/services/storage/vector.d.ts.map +1 -0
- package/dist/services/storage/vector.js +526 -0
- package/dist/services/storage/vector.js.map +1 -0
- package/dist/services/vlm/pipeline.d.ts +194 -0
- package/dist/services/vlm/pipeline.d.ts.map +1 -0
- package/dist/services/vlm/pipeline.js +800 -0
- package/dist/services/vlm/pipeline.js.map +1 -0
- package/dist/services/vlm/prompts.d.ts +171 -0
- package/dist/services/vlm/prompts.d.ts.map +1 -0
- package/dist/services/vlm/prompts.js +229 -0
- package/dist/services/vlm/prompts.js.map +1 -0
- package/dist/services/vlm/service.d.ts +174 -0
- package/dist/services/vlm/service.d.ts.map +1 -0
- package/dist/services/vlm/service.js +256 -0
- package/dist/services/vlm/service.js.map +1 -0
- package/dist/services/webhook-delivery.d.ts +4 -0
- package/dist/services/webhook-delivery.d.ts.map +1 -0
- package/dist/services/webhook-delivery.js +140 -0
- package/dist/services/webhook-delivery.js.map +1 -0
- package/dist/tools/chunks.d.ts +19 -0
- package/dist/tools/chunks.d.ts.map +1 -0
- package/dist/tools/chunks.js +392 -0
- package/dist/tools/chunks.js.map +1 -0
- package/dist/tools/clm.d.ts +16 -0
- package/dist/tools/clm.d.ts.map +1 -0
- package/dist/tools/clm.js +668 -0
- package/dist/tools/clm.js.map +1 -0
- package/dist/tools/clustering.d.ts +13 -0
- package/dist/tools/clustering.d.ts.map +1 -0
- package/dist/tools/clustering.js +498 -0
- package/dist/tools/clustering.js.map +1 -0
- package/dist/tools/collaboration.d.ts +15 -0
- package/dist/tools/collaboration.d.ts.map +1 -0
- package/dist/tools/collaboration.js +516 -0
- package/dist/tools/collaboration.js.map +1 -0
- package/dist/tools/comparison.d.ts +13 -0
- package/dist/tools/comparison.d.ts.map +1 -0
- package/dist/tools/comparison.js +735 -0
- package/dist/tools/comparison.js.map +1 -0
- package/dist/tools/compliance.d.ts +15 -0
- package/dist/tools/compliance.d.ts.map +1 -0
- package/dist/tools/compliance.js +640 -0
- package/dist/tools/compliance.js.map +1 -0
- package/dist/tools/config.d.ts +19 -0
- package/dist/tools/config.d.ts.map +1 -0
- package/dist/tools/config.js +213 -0
- package/dist/tools/config.js.map +1 -0
- package/dist/tools/database.d.ts +62 -0
- package/dist/tools/database.d.ts.map +1 -0
- package/dist/tools/database.js +288 -0
- package/dist/tools/database.js.map +1 -0
- package/dist/tools/documents.d.ts +61 -0
- package/dist/tools/documents.d.ts.map +1 -0
- package/dist/tools/documents.js +1624 -0
- package/dist/tools/documents.js.map +1 -0
- package/dist/tools/embeddings.d.ts +14 -0
- package/dist/tools/embeddings.d.ts.map +1 -0
- package/dist/tools/embeddings.js +626 -0
- package/dist/tools/embeddings.js.map +1 -0
- package/dist/tools/evaluation.d.ts +25 -0
- package/dist/tools/evaluation.d.ts.map +1 -0
- package/dist/tools/evaluation.js +523 -0
- package/dist/tools/evaluation.js.map +1 -0
- package/dist/tools/events.d.ts +16 -0
- package/dist/tools/events.d.ts.map +1 -0
- package/dist/tools/events.js +493 -0
- package/dist/tools/events.js.map +1 -0
- package/dist/tools/extraction-structured.d.ts +13 -0
- package/dist/tools/extraction-structured.d.ts.map +1 -0
- package/dist/tools/extraction-structured.js +390 -0
- package/dist/tools/extraction-structured.js.map +1 -0
- package/dist/tools/extraction.d.ts +24 -0
- package/dist/tools/extraction.d.ts.map +1 -0
- package/dist/tools/extraction.js +424 -0
- package/dist/tools/extraction.js.map +1 -0
- package/dist/tools/file-management.d.ts +14 -0
- package/dist/tools/file-management.d.ts.map +1 -0
- package/dist/tools/file-management.js +523 -0
- package/dist/tools/file-management.js.map +1 -0
- package/dist/tools/form-fill.d.ts +13 -0
- package/dist/tools/form-fill.d.ts.map +1 -0
- package/dist/tools/form-fill.js +250 -0
- package/dist/tools/form-fill.js.map +1 -0
- package/dist/tools/health.d.ts +19 -0
- package/dist/tools/health.d.ts.map +1 -0
- package/dist/tools/health.js +229 -0
- package/dist/tools/health.js.map +1 -0
- package/dist/tools/images.d.ts +54 -0
- package/dist/tools/images.d.ts.map +1 -0
- package/dist/tools/images.js +787 -0
- package/dist/tools/images.js.map +1 -0
- package/dist/tools/ingestion.d.ts +94 -0
- package/dist/tools/ingestion.d.ts.map +1 -0
- package/dist/tools/ingestion.js +1659 -0
- package/dist/tools/ingestion.js.map +1 -0
- package/dist/tools/intelligence.d.ts +18 -0
- package/dist/tools/intelligence.d.ts.map +1 -0
- package/dist/tools/intelligence.js +1039 -0
- package/dist/tools/intelligence.js.map +1 -0
- package/dist/tools/provenance.d.ts +51 -0
- package/dist/tools/provenance.d.ts.map +1 -0
- package/dist/tools/provenance.js +691 -0
- package/dist/tools/provenance.js.map +1 -0
- package/dist/tools/reports.d.ts +41 -0
- package/dist/tools/reports.d.ts.map +1 -0
- package/dist/tools/reports.js +1394 -0
- package/dist/tools/reports.js.map +1 -0
- package/dist/tools/search.d.ts +35 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +2528 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/shared.d.ts +52 -0
- package/dist/tools/shared.d.ts.map +1 -0
- package/dist/tools/shared.js +54 -0
- package/dist/tools/shared.js.map +1 -0
- package/dist/tools/tags.d.ts +15 -0
- package/dist/tools/tags.d.ts.map +1 -0
- package/dist/tools/tags.js +287 -0
- package/dist/tools/tags.js.map +1 -0
- package/dist/tools/timeline.d.ts +15 -0
- package/dist/tools/timeline.d.ts.map +1 -0
- package/dist/tools/timeline.js +14 -0
- package/dist/tools/timeline.js.map +1 -0
- package/dist/tools/users.d.ts +14 -0
- package/dist/tools/users.d.ts.map +1 -0
- package/dist/tools/users.js +257 -0
- package/dist/tools/users.js.map +1 -0
- package/dist/tools/vlm.d.ts +40 -0
- package/dist/tools/vlm.d.ts.map +1 -0
- package/dist/tools/vlm.js +475 -0
- package/dist/tools/vlm.js.map +1 -0
- package/dist/tools/workflow.d.ts +16 -0
- package/dist/tools/workflow.d.ts.map +1 -0
- package/dist/tools/workflow.js +495 -0
- package/dist/tools/workflow.js.map +1 -0
- package/dist/utils/backoff.d.ts +53 -0
- package/dist/utils/backoff.d.ts.map +1 -0
- package/dist/utils/backoff.js +78 -0
- package/dist/utils/backoff.js.map +1 -0
- package/dist/utils/config-persistence.d.ts +33 -0
- package/dist/utils/config-persistence.d.ts.map +1 -0
- package/dist/utils/config-persistence.js +61 -0
- package/dist/utils/config-persistence.js.map +1 -0
- package/dist/utils/hash.d.ts +65 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +146 -0
- package/dist/utils/hash.js.map +1 -0
- package/dist/utils/math.d.ts +21 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +39 -0
- package/dist/utils/math.js.map +1 -0
- package/dist/utils/validation.d.ts +697 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +529 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +96 -0
- package/python/.gitkeep +0 -0
- package/python/__init__.py +104 -0
- package/python/clustering_worker.py +440 -0
- package/python/docx_image_extractor.py +524 -0
- package/python/embedding_worker.py +552 -0
- package/python/file_manager_worker.py +564 -0
- package/python/form_fill_worker.py +399 -0
- package/python/gpu_utils.py +582 -0
- package/python/image_extractor.py +317 -0
- package/python/image_optimizer.py +444 -0
- package/python/ocr_worker.py +712 -0
- package/python/pyproject.toml +76 -0
- package/python/requirements.txt +51 -0
- package/python/reranker_worker.py +87 -0
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cluster operations for DatabaseService
|
|
3
|
+
*
|
|
4
|
+
* Handles CRUD operations for the clusters and document_clusters tables.
|
|
5
|
+
*/
|
|
6
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
7
|
+
import { runWithForeignKeyCheck } from './helpers.js';
|
|
8
|
+
// --- Cluster CRUD ---
|
|
9
|
+
/**
|
|
10
|
+
* Insert a cluster record
|
|
11
|
+
*/
|
|
12
|
+
export function insertCluster(db, cluster) {
|
|
13
|
+
const stmt = db.prepare(`
|
|
14
|
+
INSERT INTO clusters (id, run_id, cluster_index, label, description,
|
|
15
|
+
classification_tag, document_count, centroid_json, top_terms_json,
|
|
16
|
+
coherence_score, algorithm, algorithm_params_json, silhouette_score,
|
|
17
|
+
content_hash, provenance_id, created_at, processing_duration_ms)
|
|
18
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
19
|
+
`);
|
|
20
|
+
runWithForeignKeyCheck(stmt, [
|
|
21
|
+
cluster.id,
|
|
22
|
+
cluster.run_id,
|
|
23
|
+
cluster.cluster_index,
|
|
24
|
+
cluster.label,
|
|
25
|
+
cluster.description,
|
|
26
|
+
cluster.classification_tag,
|
|
27
|
+
cluster.document_count,
|
|
28
|
+
cluster.centroid_json,
|
|
29
|
+
cluster.top_terms_json,
|
|
30
|
+
cluster.coherence_score,
|
|
31
|
+
cluster.algorithm,
|
|
32
|
+
cluster.algorithm_params_json,
|
|
33
|
+
cluster.silhouette_score,
|
|
34
|
+
cluster.content_hash,
|
|
35
|
+
cluster.provenance_id,
|
|
36
|
+
cluster.created_at,
|
|
37
|
+
cluster.processing_duration_ms,
|
|
38
|
+
], `inserting cluster: FK violation for provenance_id="${cluster.provenance_id}"`);
|
|
39
|
+
return cluster.id;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Get a cluster by ID
|
|
43
|
+
*/
|
|
44
|
+
export function getCluster(db, id) {
|
|
45
|
+
const row = db.prepare('SELECT * FROM clusters WHERE id = ?').get(id);
|
|
46
|
+
return row ?? null;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* List clusters with optional filters and pagination
|
|
50
|
+
*/
|
|
51
|
+
export function listClusters(db, options) {
|
|
52
|
+
const params = [];
|
|
53
|
+
const conditions = [];
|
|
54
|
+
if (options?.run_id) {
|
|
55
|
+
conditions.push('run_id = ?');
|
|
56
|
+
params.push(options.run_id);
|
|
57
|
+
}
|
|
58
|
+
if (options?.classification_tag) {
|
|
59
|
+
conditions.push('classification_tag = ?');
|
|
60
|
+
params.push(options.classification_tag);
|
|
61
|
+
}
|
|
62
|
+
const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
|
63
|
+
params.push(options?.limit ?? 50, options?.offset ?? 0);
|
|
64
|
+
return db
|
|
65
|
+
.prepare(`SELECT * FROM clusters ${where} ORDER BY created_at DESC LIMIT ? OFFSET ?`)
|
|
66
|
+
.all(...params);
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Delete all clusters and their document assignments for a run.
|
|
70
|
+
* First deletes document_clusters, then clusters.
|
|
71
|
+
* Returns the number of clusters deleted.
|
|
72
|
+
*/
|
|
73
|
+
export function deleteClustersByRunId(db, runId) {
|
|
74
|
+
// Collect provenance IDs before deleting clusters (clusters.provenance_id NOT NULL REFERENCES provenance(id))
|
|
75
|
+
const provenanceIds = db
|
|
76
|
+
.prepare('SELECT provenance_id FROM clusters WHERE run_id = ?')
|
|
77
|
+
.all(runId);
|
|
78
|
+
db.prepare('DELETE FROM document_clusters WHERE run_id = ?').run(runId);
|
|
79
|
+
const result = db.prepare('DELETE FROM clusters WHERE run_id = ?').run(runId);
|
|
80
|
+
// Clean up orphaned provenance records now that the FK references are gone
|
|
81
|
+
if (provenanceIds.length > 0) {
|
|
82
|
+
const deleteProvStmt = db.prepare('DELETE FROM provenance WHERE id = ?');
|
|
83
|
+
for (const { provenance_id } of provenanceIds) {
|
|
84
|
+
try {
|
|
85
|
+
deleteProvStmt.run(provenance_id);
|
|
86
|
+
}
|
|
87
|
+
catch (e) {
|
|
88
|
+
// Log but don't fail if provenance record is still referenced elsewhere
|
|
89
|
+
console.error(`[cluster-operations] Failed to delete provenance ${provenance_id}:`, e instanceof Error ? e.message : String(e));
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return result.changes;
|
|
94
|
+
}
|
|
95
|
+
// --- DocumentCluster CRUD ---
|
|
96
|
+
/**
|
|
97
|
+
* Insert a document-cluster assignment
|
|
98
|
+
*/
|
|
99
|
+
export function insertDocumentCluster(db, dc) {
|
|
100
|
+
const stmt = db.prepare(`
|
|
101
|
+
INSERT INTO document_clusters (id, document_id, cluster_id, run_id,
|
|
102
|
+
similarity_to_centroid, membership_probability, is_noise, assigned_at)
|
|
103
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
104
|
+
`);
|
|
105
|
+
runWithForeignKeyCheck(stmt, [
|
|
106
|
+
dc.id,
|
|
107
|
+
dc.document_id,
|
|
108
|
+
dc.cluster_id,
|
|
109
|
+
dc.run_id,
|
|
110
|
+
dc.similarity_to_centroid,
|
|
111
|
+
dc.membership_probability,
|
|
112
|
+
dc.is_noise ? 1 : 0,
|
|
113
|
+
dc.assigned_at,
|
|
114
|
+
], `inserting document_cluster: FK violation for document_id="${dc.document_id}" or cluster_id="${dc.cluster_id}"`);
|
|
115
|
+
return dc.id;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Get all documents in a cluster, joined with documents for file_name
|
|
119
|
+
*/
|
|
120
|
+
export function getClusterDocuments(db, clusterId) {
|
|
121
|
+
return db
|
|
122
|
+
.prepare(`SELECT dc.document_id, d.file_name, dc.similarity_to_centroid, dc.membership_probability
|
|
123
|
+
FROM document_clusters dc
|
|
124
|
+
JOIN documents d ON d.id = dc.document_id
|
|
125
|
+
WHERE dc.cluster_id = ?
|
|
126
|
+
ORDER BY dc.similarity_to_centroid DESC`)
|
|
127
|
+
.all(clusterId);
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Get cluster summaries for a run (lightweight: no JSON blobs)
|
|
131
|
+
*/
|
|
132
|
+
export function getClusterSummariesByRunId(db, runId) {
|
|
133
|
+
return db
|
|
134
|
+
.prepare(`SELECT id, run_id, cluster_index, label, classification_tag, document_count,
|
|
135
|
+
coherence_score, created_at
|
|
136
|
+
FROM clusters
|
|
137
|
+
WHERE run_id = ?
|
|
138
|
+
ORDER BY cluster_index ASC`)
|
|
139
|
+
.all(runId);
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Get cluster summaries for a document (via document_clusters join)
|
|
143
|
+
*/
|
|
144
|
+
export function getClusterSummariesForDocument(db, documentId) {
|
|
145
|
+
return db
|
|
146
|
+
.prepare(`SELECT c.id, c.run_id, c.cluster_index, c.label, c.classification_tag,
|
|
147
|
+
c.document_count, c.coherence_score, c.created_at
|
|
148
|
+
FROM clusters c
|
|
149
|
+
JOIN document_clusters dc ON dc.cluster_id = c.id
|
|
150
|
+
WHERE dc.document_id = ?
|
|
151
|
+
ORDER BY c.created_at DESC`)
|
|
152
|
+
.all(documentId);
|
|
153
|
+
}
|
|
154
|
+
// --- Reassign & Merge ---
|
|
155
|
+
/**
|
|
156
|
+
* Reassign a document from its current cluster to a different target cluster.
|
|
157
|
+
* Deletes existing document_clusters entries for this document within the same run,
|
|
158
|
+
* inserts a new assignment, and updates member_count on both old and new clusters.
|
|
159
|
+
*
|
|
160
|
+
* @returns Object with old_cluster_id (null if not previously assigned) and run_id
|
|
161
|
+
*/
|
|
162
|
+
export function reassignDocument(db, documentId, targetClusterId) {
|
|
163
|
+
// Get the target cluster to know the run_id
|
|
164
|
+
const targetCluster = getCluster(db, targetClusterId);
|
|
165
|
+
if (!targetCluster) {
|
|
166
|
+
throw new Error(`Target cluster "${targetClusterId}" not found`);
|
|
167
|
+
}
|
|
168
|
+
const runId = targetCluster.run_id;
|
|
169
|
+
// Find existing assignment for this document in this run
|
|
170
|
+
const existing = db
|
|
171
|
+
.prepare('SELECT id, cluster_id FROM document_clusters WHERE document_id = ? AND run_id = ?')
|
|
172
|
+
.get(documentId, runId);
|
|
173
|
+
const oldClusterId = existing?.cluster_id ?? null;
|
|
174
|
+
if (oldClusterId === targetClusterId) {
|
|
175
|
+
// Already in the target cluster, no-op
|
|
176
|
+
return { old_cluster_id: oldClusterId, run_id: runId };
|
|
177
|
+
}
|
|
178
|
+
// Delete existing assignment for this document in this run
|
|
179
|
+
if (existing) {
|
|
180
|
+
db.prepare('DELETE FROM document_clusters WHERE id = ?').run(existing.id);
|
|
181
|
+
// Decrement old cluster's document_count (if it was in a cluster, not noise)
|
|
182
|
+
if (oldClusterId) {
|
|
183
|
+
db.prepare('UPDATE clusters SET document_count = MAX(0, document_count - 1) WHERE id = ?').run(oldClusterId);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
// Insert new assignment
|
|
187
|
+
const now = new Date().toISOString();
|
|
188
|
+
const dcId = uuidv4();
|
|
189
|
+
db.prepare(`
|
|
190
|
+
INSERT INTO document_clusters (id, document_id, cluster_id, run_id,
|
|
191
|
+
similarity_to_centroid, membership_probability, is_noise, assigned_at)
|
|
192
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
193
|
+
`).run(dcId, documentId, targetClusterId, runId, 0, 1.0, 0, now);
|
|
194
|
+
// Increment target cluster's document_count
|
|
195
|
+
db.prepare('UPDATE clusters SET document_count = document_count + 1 WHERE id = ?').run(targetClusterId);
|
|
196
|
+
return { old_cluster_id: oldClusterId, run_id: runId };
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Merge two clusters into one. All documents from cluster2 are moved to cluster1.
|
|
200
|
+
* cluster2 is deleted after the merge.
|
|
201
|
+
*
|
|
202
|
+
* Both clusters must belong to the same run_id.
|
|
203
|
+
*
|
|
204
|
+
* @returns Object with merged_cluster_id and documents_moved count
|
|
205
|
+
*/
|
|
206
|
+
export function mergeClusters(db, clusterId1, clusterId2) {
|
|
207
|
+
// Validation outside transaction - read-only lookups
|
|
208
|
+
const cluster1 = getCluster(db, clusterId1);
|
|
209
|
+
if (!cluster1) {
|
|
210
|
+
throw new Error(`Cluster "${clusterId1}" not found`);
|
|
211
|
+
}
|
|
212
|
+
const cluster2 = getCluster(db, clusterId2);
|
|
213
|
+
if (!cluster2) {
|
|
214
|
+
throw new Error(`Cluster "${clusterId2}" not found`);
|
|
215
|
+
}
|
|
216
|
+
if (cluster1.run_id !== cluster2.run_id) {
|
|
217
|
+
throw new Error(`Cannot merge clusters from different runs: "${cluster1.run_id}" vs "${cluster2.run_id}"`);
|
|
218
|
+
}
|
|
219
|
+
// M-8: Wrap all mutations in a transaction so a crash mid-merge
|
|
220
|
+
// cannot leave cluster state inconsistent (e.g., documents moved
|
|
221
|
+
// but old cluster not deleted, or count not updated).
|
|
222
|
+
const cluster2ProvId = cluster2.provenance_id;
|
|
223
|
+
const runInTransaction = db.transaction(() => {
|
|
224
|
+
// Move all document_clusters from cluster2 to cluster1
|
|
225
|
+
const moveResult = db
|
|
226
|
+
.prepare('UPDATE document_clusters SET cluster_id = ? WHERE cluster_id = ?')
|
|
227
|
+
.run(clusterId1, clusterId2);
|
|
228
|
+
const documentsMoved = moveResult.changes;
|
|
229
|
+
// Update cluster1's document_count
|
|
230
|
+
db.prepare('UPDATE clusters SET document_count = document_count + ? WHERE id = ?').run(documentsMoved, clusterId1);
|
|
231
|
+
// Delete cluster2 record
|
|
232
|
+
db.prepare('DELETE FROM clusters WHERE id = ?').run(clusterId2);
|
|
233
|
+
// Clean up cluster2's provenance record
|
|
234
|
+
try {
|
|
235
|
+
db.prepare('DELETE FROM provenance WHERE id = ?').run(cluster2ProvId);
|
|
236
|
+
}
|
|
237
|
+
catch (e) {
|
|
238
|
+
console.error(`[cluster-operations] Failed to delete provenance ${cluster2ProvId}:`, e instanceof Error ? e.message : String(e));
|
|
239
|
+
}
|
|
240
|
+
return documentsMoved;
|
|
241
|
+
});
|
|
242
|
+
const documentsMoved = runInTransaction();
|
|
243
|
+
return { merged_cluster_id: clusterId1, documents_moved: documentsMoved };
|
|
244
|
+
}
|
|
245
|
+
// --- Stats ---
|
|
246
|
+
/**
|
|
247
|
+
* Get aggregate clustering statistics
|
|
248
|
+
*/
|
|
249
|
+
export function getClusteringStats(db) {
|
|
250
|
+
const row = db
|
|
251
|
+
.prepare(`SELECT COUNT(*) AS total_clusters,
|
|
252
|
+
COUNT(DISTINCT run_id) AS total_runs,
|
|
253
|
+
AVG(coherence_score) AS avg_coherence
|
|
254
|
+
FROM clusters`)
|
|
255
|
+
.get();
|
|
256
|
+
return row;
|
|
257
|
+
}
|
|
258
|
+
//# sourceMappingURL=cluster-operations.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cluster-operations.js","sourceRoot":"","sources":["../../../../src/services/storage/database/cluster-operations.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,EAAE,EAAE,IAAI,MAAM,EAAE,MAAM,MAAM,CAAC;AAEpC,OAAO,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAC;AAEtD,uBAAuB;AAEvB;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,EAAqB,EAAE,OAAgB;IACnE,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;;GAMvB,CAAC,CAAC;IAEH,sBAAsB,CACpB,IAAI,EACJ;QACE,OAAO,CAAC,EAAE;QACV,OAAO,CAAC,MAAM;QACd,OAAO,CAAC,aAAa;QACrB,OAAO,CAAC,KAAK;QACb,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,kBAAkB;QAC1B,OAAO,CAAC,cAAc;QACtB,OAAO,CAAC,aAAa;QACrB,OAAO,CAAC,cAAc;QACtB,OAAO,CAAC,eAAe;QACvB,OAAO,CAAC,SAAS;QACjB,OAAO,CAAC,qBAAqB;QAC7B,OAAO,CAAC,gBAAgB;QACxB,OAAO,CAAC,YAAY;QACpB,OAAO,CAAC,aAAa;QACrB,OAAO,CAAC,UAAU;QAClB,OAAO,CAAC,sBAAsB;KAC/B,EACD,sDAAsD,OAAO,CAAC,aAAa,GAAG,CAC/E,CAAC;IAEF,OAAO,OAAO,CAAC,EAAE,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,EAAqB,EAAE,EAAU;IAC1D,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,qCAAqC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAwB,CAAC;IAC7F,OAAO,GAAG,IAAI,IAAI,CAAC;AACrB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAC1B,EAAqB,EACrB,OAA2F;IAE3F,MAAM,MAAM,GAAwB,EAAE,CAAC;IACvC,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,IAAI,OAAO,EAAE,MAAM,EAAE,CAAC;QACpB,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC9B,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAC9B,CAAC;IAED,IAAI,OAAO,EAAE,kBAAkB,EAAE,CAAC;QAChC,UAAU,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC;IAC1C,CAAC;IAED,MAAM,KAAK,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/E,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,IAAI,EAAE,EAAE,OAAO,EAAE,MAAM,IAAI,CAAC,CAAC,CAAC;IAExD,OAAO,EAAE;SACN,OAAO,CAAC,0BAA0B,KAAK,4CAA4C,CAAC;SACpF,GAAG,CAAC,GAAG,MAAM,CAAc,CAAC;AACjC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,EAAqB,EAAE,KAAa;IACxE,8GAA8G;IAC9G,MAAM,aAAa,GAAG,EAAE;SACrB,OAAO,CAAC,qDAAqD,CAAC;SAC9D,GAAG,CAAC,KAAK,CAAgC,CAAC;IAE7C,EAAE,CAAC,OAAO,CAAC,gDAAgD,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IACxE,MAAM,MAAM,GAAG,EAAE,CAAC,OAAO,CAAC,uCAAuC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IAE9E,2EAA2E;IAC3E,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,MAAM,cAAc,GAAG,EAAE,CAAC,OAAO,CAAC,qCAAqC,CAAC,CAAC;QACzE,KAAK,MAAM,EAAE,aAAa,EAAE,IAAI,aAAa,EAAE,CAAC;YAC9C,IAAI,CAAC;gBACH,cAAc,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;YACpC,CAAC;YAAC,OAAO,CAAU,EAAE,CAAC;gBACpB,wEAAwE;gBACxE,OAAO,CAAC,KAAK,CACX,oDAAoD,aAAa,GAAG,EACpE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAC3C,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,OAAO,CAAC;AACxB,CAAC;AAED,+BAA+B;AAE/B;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,EAAqB,EAAE,EAAmB;IAC9E,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC;;;;GAIvB,CAAC,CAAC;IAEH,sBAAsB,CACpB,IAAI,EACJ;QACE,EAAE,CAAC,EAAE;QACL,EAAE,CAAC,WAAW;QACd,EAAE,CAAC,UAAU;QACb,EAAE,CAAC,MAAM;QACT,EAAE,CAAC,sBAAsB;QACzB,EAAE,CAAC,sBAAsB;QACzB,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACnB,EAAE,CAAC,WAAW;KACf,EACD,6DAA6D,EAAE,CAAC,WAAW,oBAAoB,EAAE,CAAC,UAAU,GAAG,CAChH,CAAC;IAEF,OAAO,EAAE,CAAC,EAAE,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CACjC,EAAqB,EACrB,SAAiB;IAOjB,OAAO,EAAE;SACN,OAAO,CACN;;;;6CAIuC,CACxC;SACA,GAAG,CAAC,SAAS,CAKd,CAAC;AACL,CAAC;AAkBD;;GAEG;AACH,MAAM,UAAU,0BAA0B,CAAC,EAAqB,EAAE,KAAa;IAC7E,OAAO,EAAE;SACN,OAAO,CACN;;;;gCAI0B,CAC3B;SACA,GAAG,CAAC,KAAK,CAAqB,CAAC;AACpC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,8BAA8B,CAC5C,EAAqB,EACrB,UAAkB;IAElB,OAAO,EAAE;SACN,OAAO,CACN;;;;;gCAK0B,CAC3B;SACA,GAAG,CAAC,UAAU,CAAqB,CAAC;AACzC,CAAC;AAED,2BAA2B;AAE3B;;;;;;GAMG;AACH,MAAM,UAAU,gBAAgB,CAC9B,EAAqB,EACrB,UAAkB,EAClB,eAAuB;IAEvB,4CAA4C;IAC5C,MAAM,aAAa,GAAG,UAAU,CAAC,EAAE,EAAE,eAAe,CAAC,CAAC;IACtD,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,MAAM,IAAI,KAAK,CAAC,mBAAmB,eAAe,aAAa,CAAC,CAAC;IACnE,CAAC;IAED,MAAM,KAAK,GAAG,aAAa,CAAC,MAAM,CAAC;IAEnC,yDAAyD;IACzD,MAAM,QAAQ,GAAG,EAAE;SAChB,OAAO,CACN,mFAAmF,CACpF;SACA,GAAG,CAAC,UAAU,EAAE,KAAK,CAA0D,CAAC;IAEnF,MAAM,YAAY,GAAG,QAAQ,EAAE,UAAU,IAAI,IAAI,CAAC;IAElD,IAAI,YAAY,KAAK,eAAe,EAAE,CAAC;QACrC,uCAAuC;QACvC,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IACzD,CAAC;IAED,2DAA2D;IAC3D,IAAI,QAAQ,EAAE,CAAC;QACb,EAAE,CAAC,OAAO,CAAC,4CAA4C,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QAE1E,6EAA6E;QAC7E,IAAI,YAAY,EAAE,CAAC;YACjB,EAAE,CAAC,OAAO,CACR,8EAA8E,CAC/E,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IAED,wBAAwB;IACxB,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC;IACtB,EAAE,CAAC,OAAO,CAAC;;;;GAIV,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,UAAU,EAAE,eAAe,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,CAAC,CAAC;IAEjE,4CAA4C;IAC5C,EAAE,CAAC,OAAO,CACR,sEAAsE,CACvE,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;IAEvB,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;AACzD,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,aAAa,CAC3B,EAAqB,EACrB,UAAkB,EAClB,UAAkB;IAElB,qDAAqD;IACrD,MAAM,QAAQ,GAAG,UAAU,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC;IAC5C,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,MAAM,IAAI,KAAK,CAAC,YAAY,UAAU,aAAa,CAAC,CAAC;IACvD,CAAC;IAED,MAAM,QAAQ,GAAG,UAAU,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC;IAC5C,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,MAAM,IAAI,KAAK,CAAC,YAAY,UAAU,aAAa,CAAC,CAAC;IACvD,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,KAAK,QAAQ,CAAC,MAAM,EAAE,CAAC;QACxC,MAAM,IAAI,KAAK,CACb,+CAA+C,QAAQ,CAAC,MAAM,SAAS,QAAQ,CAAC,MAAM,GAAG,CAC1F,CAAC;IACJ,CAAC;IAED,gEAAgE;IAChE,iEAAiE;IACjE,sDAAsD;IACtD,MAAM,cAAc,GAAG,QAAQ,CAAC,aAAa,CAAC;IAE9C,MAAM,gBAAgB,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;QAC3C,uDAAuD;QACvD,MAAM,UAAU,GAAG,EAAE;aAClB,OAAO,CAAC,kEAAkE,CAAC;aAC3E,GAAG,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;QAE/B,MAAM,cAAc,GAAG,UAAU,CAAC,OAAO,CAAC;QAE1C,mCAAmC;QACnC,EAAE,CAAC,OAAO,CACR,sEAAsE,CACvE,CAAC,GAAG,CAAC,cAAc,EAAE,UAAU,CAAC,CAAC;QAElC,yBAAyB;QACzB,EAAE,CAAC,OAAO,CAAC,mCAAmC,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAEhE,wCAAwC;QACxC,IAAI,CAAC;YACH,EAAE,CAAC,OAAO,CAAC,qCAAqC,CAAC,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;QACxE,CAAC;QAAC,OAAO,CAAU,EAAE,CAAC;YACpB,OAAO,CAAC,KAAK,CACX,oDAAoD,cAAc,GAAG,EACrE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAC3C,CAAC;QACJ,CAAC;QAED,OAAO,cAAc,CAAC;IACxB,CAAC,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,gBAAgB,EAAE,CAAC;IAE1C,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,eAAe,EAAE,cAAc,EAAE,CAAC;AAC5E,CAAC;AAED,gBAAgB;AAEhB;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,EAAqB;IAKtD,MAAM,GAAG,GAAG,EAAE;SACX,OAAO,CACN;;;mBAGa,CACd;SACA,GAAG,EAAkF,CAAC;IAEzF,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Comparison operations for DatabaseService
|
|
3
|
+
*
|
|
4
|
+
* Handles CRUD operations for the comparisons table.
|
|
5
|
+
*/
|
|
6
|
+
import Database from 'better-sqlite3';
|
|
7
|
+
import type { Comparison } from '../../../models/comparison.js';
|
|
8
|
+
/**
|
|
9
|
+
* Insert a comparison record
|
|
10
|
+
*/
|
|
11
|
+
export declare function insertComparison(db: Database.Database, comparison: Comparison): string;
|
|
12
|
+
/**
|
|
13
|
+
* Get a comparison by ID
|
|
14
|
+
*/
|
|
15
|
+
export declare function getComparison(db: Database.Database, id: string): Comparison | null;
|
|
16
|
+
/**
|
|
17
|
+
* List comparisons with optional document filter and pagination
|
|
18
|
+
*/
|
|
19
|
+
export declare function listComparisons(db: Database.Database, options?: {
|
|
20
|
+
document_id?: string;
|
|
21
|
+
limit?: number;
|
|
22
|
+
offset?: number;
|
|
23
|
+
}): Comparison[];
|
|
24
|
+
/**
|
|
25
|
+
* Summary of a comparison (excludes large JSON diff fields)
|
|
26
|
+
*/
|
|
27
|
+
interface ComparisonSummary {
|
|
28
|
+
id: string;
|
|
29
|
+
document_id_1: string;
|
|
30
|
+
document_id_2: string;
|
|
31
|
+
similarity_ratio: number;
|
|
32
|
+
summary: string;
|
|
33
|
+
created_at: string;
|
|
34
|
+
processing_duration_ms: number | null;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Get comparison summaries for a document (lightweight: no JSON blobs)
|
|
38
|
+
*/
|
|
39
|
+
export declare function getComparisonSummariesByDocument(db: Database.Database, documentId: string): ComparisonSummary[];
|
|
40
|
+
export {};
|
|
41
|
+
//# sourceMappingURL=comparison-operations.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"comparison-operations.d.ts","sourceRoot":"","sources":["../../../../src/services/storage/database/comparison-operations.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AACtC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,+BAA+B,CAAC;AAGhE;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,UAAU,EAAE,UAAU,GAAG,MAAM,CA2BtF;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,EAAE,EAAE,MAAM,GAAG,UAAU,GAAG,IAAI,CAKlF;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,OAAO,CAAC,EAAE;IAAE,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GAClE,UAAU,EAAE,CAcd;AAED;;GAEG;AACH,UAAU,iBAAiB;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,MAAM,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,sBAAsB,EAAE,MAAM,GAAG,IAAI,CAAC;CACvC;AAED;;GAEG;AACH,wBAAgB,gCAAgC,CAC9C,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,UAAU,EAAE,MAAM,GACjB,iBAAiB,EAAE,CASrB"}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Comparison operations for DatabaseService
|
|
3
|
+
*
|
|
4
|
+
* Handles CRUD operations for the comparisons table.
|
|
5
|
+
*/
|
|
6
|
+
import { runWithForeignKeyCheck } from './helpers.js';
|
|
7
|
+
/**
|
|
8
|
+
* Insert a comparison record
|
|
9
|
+
*/
|
|
10
|
+
export function insertComparison(db, comparison) {
|
|
11
|
+
const stmt = db.prepare(`
|
|
12
|
+
INSERT INTO comparisons (id, document_id_1, document_id_2, similarity_ratio,
|
|
13
|
+
text_diff_json, structural_diff_json, summary,
|
|
14
|
+
content_hash, provenance_id, created_at, processing_duration_ms)
|
|
15
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
16
|
+
`);
|
|
17
|
+
runWithForeignKeyCheck(stmt, [
|
|
18
|
+
comparison.id,
|
|
19
|
+
comparison.document_id_1,
|
|
20
|
+
comparison.document_id_2,
|
|
21
|
+
comparison.similarity_ratio,
|
|
22
|
+
comparison.text_diff_json,
|
|
23
|
+
comparison.structural_diff_json,
|
|
24
|
+
comparison.summary,
|
|
25
|
+
comparison.content_hash,
|
|
26
|
+
comparison.provenance_id,
|
|
27
|
+
comparison.created_at,
|
|
28
|
+
comparison.processing_duration_ms,
|
|
29
|
+
], `inserting comparison: FK violation for document_id_1="${comparison.document_id_1}" or document_id_2="${comparison.document_id_2}"`);
|
|
30
|
+
return comparison.id;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Get a comparison by ID
|
|
34
|
+
*/
|
|
35
|
+
export function getComparison(db, id) {
|
|
36
|
+
const row = db.prepare('SELECT * FROM comparisons WHERE id = ?').get(id);
|
|
37
|
+
return row ?? null;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* List comparisons with optional document filter and pagination
|
|
41
|
+
*/
|
|
42
|
+
export function listComparisons(db, options) {
|
|
43
|
+
const params = [];
|
|
44
|
+
let where = '';
|
|
45
|
+
if (options?.document_id) {
|
|
46
|
+
where = 'WHERE document_id_1 = ? OR document_id_2 = ?';
|
|
47
|
+
params.push(options.document_id, options.document_id);
|
|
48
|
+
}
|
|
49
|
+
params.push(options?.limit ?? 50, options?.offset ?? 0);
|
|
50
|
+
return db
|
|
51
|
+
.prepare(`SELECT * FROM comparisons ${where} ORDER BY created_at DESC LIMIT ? OFFSET ?`)
|
|
52
|
+
.all(...params);
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Get comparison summaries for a document (lightweight: no JSON blobs)
|
|
56
|
+
*/
|
|
57
|
+
export function getComparisonSummariesByDocument(db, documentId) {
|
|
58
|
+
return db
|
|
59
|
+
.prepare(`SELECT id, document_id_1, document_id_2, similarity_ratio, summary, created_at, processing_duration_ms
|
|
60
|
+
FROM comparisons
|
|
61
|
+
WHERE document_id_1 = ? OR document_id_2 = ?
|
|
62
|
+
ORDER BY created_at DESC`)
|
|
63
|
+
.all(documentId, documentId);
|
|
64
|
+
}
|
|
65
|
+
//# sourceMappingURL=comparison-operations.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"comparison-operations.js","sourceRoot":"","sources":["../../../../src/services/storage/database/comparison-operations.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAIH,OAAO,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAC;AAEtD;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,EAAqB,EAAE,UAAsB;IAC5E,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;GAKvB,CAAC,CAAC;IAEH,sBAAsB,CACpB,IAAI,EACJ;QACE,UAAU,CAAC,EAAE;QACb,UAAU,CAAC,aAAa;QACxB,UAAU,CAAC,aAAa;QACxB,UAAU,CAAC,gBAAgB;QAC3B,UAAU,CAAC,cAAc;QACzB,UAAU,CAAC,oBAAoB;QAC/B,UAAU,CAAC,OAAO;QAClB,UAAU,CAAC,YAAY;QACvB,UAAU,CAAC,aAAa;QACxB,UAAU,CAAC,UAAU;QACrB,UAAU,CAAC,sBAAsB;KAClC,EACD,yDAAyD,UAAU,CAAC,aAAa,uBAAuB,UAAU,CAAC,aAAa,GAAG,CACpI,CAAC;IAEF,OAAO,UAAU,CAAC,EAAE,CAAC;AACvB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,EAAqB,EAAE,EAAU;IAC7D,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,wCAAwC,CAAC,CAAC,GAAG,CAAC,EAAE,CAE1D,CAAC;IACd,OAAO,GAAG,IAAI,IAAI,CAAC;AACrB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAC7B,EAAqB,EACrB,OAAmE;IAEnE,MAAM,MAAM,GAAwB,EAAE,CAAC;IACvC,IAAI,KAAK,GAAG,EAAE,CAAC;IAEf,IAAI,OAAO,EAAE,WAAW,EAAE,CAAC;QACzB,KAAK,GAAG,8CAA8C,CAAC;QACvD,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;IACxD,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,IAAI,EAAE,EAAE,OAAO,EAAE,MAAM,IAAI,CAAC,CAAC,CAAC;IAExD,OAAO,EAAE;SACN,OAAO,CAAC,6BAA6B,KAAK,4CAA4C,CAAC;SACvF,GAAG,CAAC,GAAG,MAAM,CAAiB,CAAC;AACpC,CAAC;AAeD;;GAEG;AACH,MAAM,UAAU,gCAAgC,CAC9C,EAAqB,EACrB,UAAkB;IAElB,OAAO,EAAE;SACN,OAAO,CACN;;;8BAGwB,CACzB;SACA,GAAG,CAAC,UAAU,EAAE,UAAU,CAAwB,CAAC;AACxD,CAAC"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Row conversion functions for DatabaseService
|
|
3
|
+
*
|
|
4
|
+
* Converts database row objects to domain model interfaces.
|
|
5
|
+
*/
|
|
6
|
+
import { Document, OCRResult } from '../../../models/document.js';
|
|
7
|
+
import { Chunk } from '../../../models/chunk.js';
|
|
8
|
+
import { Embedding } from '../../../models/embedding.js';
|
|
9
|
+
import { ProvenanceRecord } from '../../../models/provenance.js';
|
|
10
|
+
import { ImageReference } from '../../../models/image.js';
|
|
11
|
+
import { DocumentRow, OCRResultRow, ChunkRow, EmbeddingRow, ProvenanceRow, ImageRow } from './types.js';
|
|
12
|
+
/**
|
|
13
|
+
* Convert document row to Document interface
|
|
14
|
+
*/
|
|
15
|
+
export declare function rowToDocument(row: DocumentRow): Document;
|
|
16
|
+
/**
|
|
17
|
+
* Convert OCR result row to OCRResult interface
|
|
18
|
+
*/
|
|
19
|
+
export declare function rowToOCRResult(row: OCRResultRow): OCRResult;
|
|
20
|
+
/**
|
|
21
|
+
* Convert chunk row to Chunk interface
|
|
22
|
+
*/
|
|
23
|
+
export declare function rowToChunk(row: ChunkRow): Chunk;
|
|
24
|
+
/**
|
|
25
|
+
* Convert embedding row to Embedding interface (without vector)
|
|
26
|
+
*/
|
|
27
|
+
export declare function rowToEmbedding(row: EmbeddingRow): Omit<Embedding, 'vector'>;
|
|
28
|
+
/**
|
|
29
|
+
* Convert provenance row to ProvenanceRecord interface
|
|
30
|
+
*/
|
|
31
|
+
export declare function rowToProvenance(row: ProvenanceRow): ProvenanceRecord;
|
|
32
|
+
/**
|
|
33
|
+
* Convert image row to ImageReference interface
|
|
34
|
+
*/
|
|
35
|
+
export declare function rowToImage(row: ImageRow): ImageReference;
|
|
36
|
+
//# sourceMappingURL=converters.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"converters.d.ts","sourceRoot":"","sources":["../../../../src/services/storage/database/converters.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,QAAQ,EAAkB,SAAS,EAAE,MAAM,6BAA6B,CAAC;AAClF,OAAO,EAAE,KAAK,EAAE,MAAM,0BAA0B,CAAC;AACjD,OAAO,EAAE,SAAS,EAAE,MAAM,8BAA8B,CAAC;AACzD,OAAO,EACL,gBAAgB,EAIjB,MAAM,+BAA+B,CAAC;AACvC,OAAO,EAAE,cAAc,EAAgC,MAAM,0BAA0B,CAAC;AACxF,OAAO,EACL,WAAW,EACX,YAAY,EACZ,QAAQ,EACR,YAAY,EACZ,aAAa,EACb,QAAQ,EACT,MAAM,YAAY,CAAC;AAwEpB;;GAEG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,WAAW,GAAG,QAAQ,CAoBxD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,YAAY,GAAG,SAAS,CAmB3D;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,GAAG,EAAE,QAAQ,GAAG,KAAK,CA0B/C;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,YAAY,GAAG,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CA4B3E;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,aAAa,GAAG,gBAAgB,CA0BpE;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,GAAG,EAAE,QAAQ,GAAG,cAAc,CAsCxD"}
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Row conversion functions for DatabaseService
|
|
3
|
+
*
|
|
4
|
+
* Converts database row objects to domain model interfaces.
|
|
5
|
+
*/
|
|
6
|
+
/** Valid DocumentStatus values for runtime validation */
|
|
7
|
+
const VALID_DOCUMENT_STATUSES = ['pending', 'processing', 'complete', 'failed'];
|
|
8
|
+
/** Valid ProvenanceType values for runtime validation */
|
|
9
|
+
const VALID_PROVENANCE_TYPES = [
|
|
10
|
+
'DOCUMENT', 'OCR_RESULT', 'CHUNK', 'IMAGE', 'VLM_DESCRIPTION',
|
|
11
|
+
'EXTRACTION', 'FORM_FILL', 'COMPARISON', 'CLUSTERING', 'EMBEDDING',
|
|
12
|
+
];
|
|
13
|
+
/** Valid VLMStatus values for runtime validation */
|
|
14
|
+
const VALID_VLM_STATUSES = ['pending', 'processing', 'complete', 'failed'];
|
|
15
|
+
/**
|
|
16
|
+
* Validate that a string value is a member of an enum/union type at runtime.
|
|
17
|
+
* Throws a descriptive error if the value is invalid, preventing silent data corruption.
|
|
18
|
+
*/
|
|
19
|
+
function validateEnum(value, validValues, fieldName, id) {
|
|
20
|
+
if (!validValues.includes(value)) {
|
|
21
|
+
throw new Error(`Invalid ${fieldName} "${value}" in record ${id}. Valid values: ${validValues.join(', ')}`);
|
|
22
|
+
}
|
|
23
|
+
return value;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Safely parse JSON processing_params, returning a fallback on corrupt data.
|
|
27
|
+
*/
|
|
28
|
+
function parseProcessingParams(id, raw) {
|
|
29
|
+
try {
|
|
30
|
+
return JSON.parse(raw);
|
|
31
|
+
}
|
|
32
|
+
catch (error) {
|
|
33
|
+
console.error(`[converters] Corrupt processing_params in provenance ${id}: ${raw}:`, error instanceof Error ? error.message : String(error));
|
|
34
|
+
return { _parse_error: true, _raw: raw };
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Safely parse JSON location, returning null on corrupt data.
|
|
39
|
+
* Callers already handle null (ProvenanceRecord.location is ProvenanceLocation | null).
|
|
40
|
+
*/
|
|
41
|
+
function parseLocation(id, raw) {
|
|
42
|
+
try {
|
|
43
|
+
return JSON.parse(raw);
|
|
44
|
+
}
|
|
45
|
+
catch (error) {
|
|
46
|
+
console.error(`[converters] Corrupt location in provenance ${id}: ${raw}:`, error instanceof Error ? error.message : String(error));
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Safely parse JSON vlm_structured_data, returning null on corrupt data.
|
|
52
|
+
* Callers already handle null (ImageReference.vlm_structured_data is VLMStructuredData | null).
|
|
53
|
+
*/
|
|
54
|
+
function parseVLMStructuredData(id, raw) {
|
|
55
|
+
try {
|
|
56
|
+
return JSON.parse(raw);
|
|
57
|
+
}
|
|
58
|
+
catch (error) {
|
|
59
|
+
console.error(`[converters] Corrupt vlm_structured_data in image ${id}: ${raw}:`, error instanceof Error ? error.message : String(error));
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Convert document row to Document interface
|
|
65
|
+
*/
|
|
66
|
+
export function rowToDocument(row) {
|
|
67
|
+
return {
|
|
68
|
+
id: row.id,
|
|
69
|
+
file_path: row.file_path,
|
|
70
|
+
file_name: row.file_name,
|
|
71
|
+
file_hash: row.file_hash,
|
|
72
|
+
file_size: row.file_size,
|
|
73
|
+
file_type: row.file_type,
|
|
74
|
+
status: validateEnum(row.status, VALID_DOCUMENT_STATUSES, 'DocumentStatus', row.id),
|
|
75
|
+
page_count: row.page_count,
|
|
76
|
+
provenance_id: row.provenance_id,
|
|
77
|
+
created_at: row.created_at,
|
|
78
|
+
modified_at: row.modified_at,
|
|
79
|
+
ocr_completed_at: row.ocr_completed_at,
|
|
80
|
+
error_message: row.error_message,
|
|
81
|
+
doc_title: row.doc_title ?? null,
|
|
82
|
+
doc_author: row.doc_author ?? null,
|
|
83
|
+
doc_subject: row.doc_subject ?? null,
|
|
84
|
+
datalab_file_id: row.datalab_file_id ?? null,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Convert OCR result row to OCRResult interface
|
|
89
|
+
*/
|
|
90
|
+
export function rowToOCRResult(row) {
|
|
91
|
+
return {
|
|
92
|
+
id: row.id,
|
|
93
|
+
provenance_id: row.provenance_id,
|
|
94
|
+
document_id: row.document_id,
|
|
95
|
+
extracted_text: row.extracted_text,
|
|
96
|
+
text_length: row.text_length,
|
|
97
|
+
datalab_request_id: row.datalab_request_id,
|
|
98
|
+
datalab_mode: row.datalab_mode,
|
|
99
|
+
parse_quality_score: row.parse_quality_score,
|
|
100
|
+
page_count: row.page_count,
|
|
101
|
+
cost_cents: row.cost_cents,
|
|
102
|
+
content_hash: row.content_hash,
|
|
103
|
+
processing_started_at: row.processing_started_at,
|
|
104
|
+
processing_completed_at: row.processing_completed_at,
|
|
105
|
+
processing_duration_ms: row.processing_duration_ms,
|
|
106
|
+
json_blocks: row.json_blocks ?? null,
|
|
107
|
+
extras_json: row.extras_json ?? null,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Convert chunk row to Chunk interface
|
|
112
|
+
*/
|
|
113
|
+
export function rowToChunk(row) {
|
|
114
|
+
return {
|
|
115
|
+
id: row.id,
|
|
116
|
+
document_id: row.document_id,
|
|
117
|
+
ocr_result_id: row.ocr_result_id,
|
|
118
|
+
text: row.text,
|
|
119
|
+
text_hash: row.text_hash,
|
|
120
|
+
chunk_index: row.chunk_index,
|
|
121
|
+
character_start: row.character_start,
|
|
122
|
+
character_end: row.character_end,
|
|
123
|
+
page_number: row.page_number,
|
|
124
|
+
page_range: row.page_range,
|
|
125
|
+
overlap_previous: row.overlap_previous,
|
|
126
|
+
overlap_next: row.overlap_next,
|
|
127
|
+
provenance_id: row.provenance_id,
|
|
128
|
+
created_at: row.created_at,
|
|
129
|
+
embedding_status: row.embedding_status,
|
|
130
|
+
embedded_at: row.embedded_at,
|
|
131
|
+
ocr_quality_score: row.ocr_quality_score ?? null,
|
|
132
|
+
heading_context: row.heading_context,
|
|
133
|
+
heading_level: row.heading_level,
|
|
134
|
+
section_path: row.section_path,
|
|
135
|
+
content_types: row.content_types,
|
|
136
|
+
is_atomic: row.is_atomic,
|
|
137
|
+
chunking_strategy: row.chunking_strategy,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Convert embedding row to Embedding interface (without vector)
|
|
142
|
+
*/
|
|
143
|
+
export function rowToEmbedding(row) {
|
|
144
|
+
return {
|
|
145
|
+
id: row.id,
|
|
146
|
+
chunk_id: row.chunk_id,
|
|
147
|
+
image_id: row.image_id,
|
|
148
|
+
extraction_id: row.extraction_id,
|
|
149
|
+
document_id: row.document_id,
|
|
150
|
+
original_text: row.original_text,
|
|
151
|
+
original_text_length: row.original_text_length,
|
|
152
|
+
source_file_path: row.source_file_path,
|
|
153
|
+
source_file_name: row.source_file_name,
|
|
154
|
+
source_file_hash: row.source_file_hash,
|
|
155
|
+
page_number: row.page_number,
|
|
156
|
+
page_range: row.page_range,
|
|
157
|
+
character_start: row.character_start,
|
|
158
|
+
character_end: row.character_end,
|
|
159
|
+
chunk_index: row.chunk_index,
|
|
160
|
+
total_chunks: row.total_chunks,
|
|
161
|
+
model_name: row.model_name,
|
|
162
|
+
model_version: row.model_version,
|
|
163
|
+
task_type: row.task_type,
|
|
164
|
+
inference_mode: row.inference_mode,
|
|
165
|
+
gpu_device: row.gpu_device ?? '',
|
|
166
|
+
provenance_id: row.provenance_id,
|
|
167
|
+
content_hash: row.content_hash,
|
|
168
|
+
created_at: row.created_at,
|
|
169
|
+
generation_duration_ms: row.generation_duration_ms,
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Convert provenance row to ProvenanceRecord interface
|
|
174
|
+
*/
|
|
175
|
+
export function rowToProvenance(row) {
|
|
176
|
+
return {
|
|
177
|
+
id: row.id,
|
|
178
|
+
type: validateEnum(row.type, VALID_PROVENANCE_TYPES, 'ProvenanceType', row.id),
|
|
179
|
+
created_at: row.created_at,
|
|
180
|
+
processed_at: row.processed_at,
|
|
181
|
+
source_file_created_at: row.source_file_created_at,
|
|
182
|
+
source_file_modified_at: row.source_file_modified_at,
|
|
183
|
+
source_type: row.source_type,
|
|
184
|
+
source_path: row.source_path,
|
|
185
|
+
source_id: row.source_id,
|
|
186
|
+
root_document_id: row.root_document_id,
|
|
187
|
+
location: row.location ? parseLocation(row.id, row.location) : null,
|
|
188
|
+
content_hash: row.content_hash,
|
|
189
|
+
input_hash: row.input_hash,
|
|
190
|
+
file_hash: row.file_hash,
|
|
191
|
+
processor: row.processor,
|
|
192
|
+
processor_version: row.processor_version,
|
|
193
|
+
processing_params: parseProcessingParams(row.id, row.processing_params),
|
|
194
|
+
processing_duration_ms: row.processing_duration_ms,
|
|
195
|
+
processing_quality_score: row.processing_quality_score,
|
|
196
|
+
parent_id: row.parent_id,
|
|
197
|
+
parent_ids: row.parent_ids,
|
|
198
|
+
chain_depth: row.chain_depth,
|
|
199
|
+
chain_path: row.chain_path,
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Convert image row to ImageReference interface
|
|
204
|
+
*/
|
|
205
|
+
export function rowToImage(row) {
|
|
206
|
+
return {
|
|
207
|
+
id: row.id,
|
|
208
|
+
document_id: row.document_id,
|
|
209
|
+
ocr_result_id: row.ocr_result_id,
|
|
210
|
+
page_number: row.page_number,
|
|
211
|
+
bounding_box: {
|
|
212
|
+
x: row.bbox_x,
|
|
213
|
+
y: row.bbox_y,
|
|
214
|
+
width: row.bbox_width,
|
|
215
|
+
height: row.bbox_height,
|
|
216
|
+
},
|
|
217
|
+
image_index: row.image_index,
|
|
218
|
+
format: row.format,
|
|
219
|
+
dimensions: {
|
|
220
|
+
width: row.width,
|
|
221
|
+
height: row.height,
|
|
222
|
+
},
|
|
223
|
+
extracted_path: row.extracted_path,
|
|
224
|
+
file_size: row.file_size,
|
|
225
|
+
vlm_status: validateEnum(row.vlm_status, VALID_VLM_STATUSES, 'VLMStatus', row.id),
|
|
226
|
+
vlm_description: row.vlm_description,
|
|
227
|
+
vlm_structured_data: row.vlm_structured_data
|
|
228
|
+
? parseVLMStructuredData(row.id, row.vlm_structured_data)
|
|
229
|
+
: null,
|
|
230
|
+
vlm_embedding_id: row.vlm_embedding_id,
|
|
231
|
+
vlm_model: row.vlm_model,
|
|
232
|
+
vlm_confidence: row.vlm_confidence,
|
|
233
|
+
vlm_processed_at: row.vlm_processed_at,
|
|
234
|
+
vlm_tokens_used: row.vlm_tokens_used,
|
|
235
|
+
context_text: row.context_text,
|
|
236
|
+
provenance_id: row.provenance_id,
|
|
237
|
+
created_at: row.created_at,
|
|
238
|
+
error_message: row.error_message,
|
|
239
|
+
block_type: row.block_type ?? null,
|
|
240
|
+
is_header_footer: Boolean(row.is_header_footer),
|
|
241
|
+
content_hash: row.content_hash ?? null,
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
//# sourceMappingURL=converters.js.map
|