ocr-provenance-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocr-provenance-mcp might be problematic. Click here for more details.
- package/.env.example +55 -0
- package/LICENSE +78 -0
- package/README.md +1154 -0
- package/dist/bin-http.d.ts +24 -0
- package/dist/bin-http.d.ts.map +1 -0
- package/dist/bin-http.js +275 -0
- package/dist/bin-http.js.map +1 -0
- package/dist/bin-setup.d.ts +11 -0
- package/dist/bin-setup.d.ts.map +1 -0
- package/dist/bin-setup.js +610 -0
- package/dist/bin-setup.js.map +1 -0
- package/dist/bin.d.ts +16 -0
- package/dist/bin.d.ts.map +1 -0
- package/dist/bin.js +16 -0
- package/dist/bin.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +90 -0
- package/dist/index.js.map +1 -0
- package/dist/models/chunk.d.ts +136 -0
- package/dist/models/chunk.d.ts.map +1 -0
- package/dist/models/chunk.js +27 -0
- package/dist/models/chunk.js.map +1 -0
- package/dist/models/cluster.d.ts +79 -0
- package/dist/models/cluster.d.ts.map +1 -0
- package/dist/models/cluster.js +10 -0
- package/dist/models/cluster.js.map +1 -0
- package/dist/models/comparison.d.ts +62 -0
- package/dist/models/comparison.d.ts.map +1 -0
- package/dist/models/comparison.js +8 -0
- package/dist/models/comparison.js.map +1 -0
- package/dist/models/document.d.ts +104 -0
- package/dist/models/document.d.ts.map +1 -0
- package/dist/models/document.js +15 -0
- package/dist/models/document.js.map +1 -0
- package/dist/models/embedding.d.ts +87 -0
- package/dist/models/embedding.d.ts.map +1 -0
- package/dist/models/embedding.js +23 -0
- package/dist/models/embedding.js.map +1 -0
- package/dist/models/extraction.d.ts +15 -0
- package/dist/models/extraction.d.ts.map +1 -0
- package/dist/models/extraction.js +2 -0
- package/dist/models/extraction.js.map +1 -0
- package/dist/models/form-fill.d.ts +23 -0
- package/dist/models/form-fill.d.ts.map +1 -0
- package/dist/models/form-fill.js +2 -0
- package/dist/models/form-fill.js.map +1 -0
- package/dist/models/image.d.ts +177 -0
- package/dist/models/image.d.ts.map +1 -0
- package/dist/models/image.js +8 -0
- package/dist/models/image.js.map +1 -0
- package/dist/models/index.d.ts +14 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +22 -0
- package/dist/models/index.js.map +1 -0
- package/dist/models/provenance.d.ts +174 -0
- package/dist/models/provenance.d.ts.map +1 -0
- package/dist/models/provenance.js +53 -0
- package/dist/models/provenance.js.map +1 -0
- package/dist/models/uploaded-file.d.ts +20 -0
- package/dist/models/uploaded-file.d.ts.map +1 -0
- package/dist/models/uploaded-file.js +2 -0
- package/dist/models/uploaded-file.js.map +1 -0
- package/dist/server/errors.d.ts +93 -0
- package/dist/server/errors.d.ts.map +1 -0
- package/dist/server/errors.js +256 -0
- package/dist/server/errors.js.map +1 -0
- package/dist/server/events.d.ts +36 -0
- package/dist/server/events.d.ts.map +1 -0
- package/dist/server/events.js +48 -0
- package/dist/server/events.js.map +1 -0
- package/dist/server/permissions.d.ts +26 -0
- package/dist/server/permissions.d.ts.map +1 -0
- package/dist/server/permissions.js +194 -0
- package/dist/server/permissions.js.map +1 -0
- package/dist/server/register-tools.d.ts +25 -0
- package/dist/server/register-tools.d.ts.map +1 -0
- package/dist/server/register-tools.js +102 -0
- package/dist/server/register-tools.js.map +1 -0
- package/dist/server/startup.d.ts +16 -0
- package/dist/server/startup.d.ts.map +1 -0
- package/dist/server/startup.js +37 -0
- package/dist/server/startup.js.map +1 -0
- package/dist/server/state.d.ts +166 -0
- package/dist/server/state.d.ts.map +1 -0
- package/dist/server/state.js +424 -0
- package/dist/server/state.js.map +1 -0
- package/dist/server/transports/http-transport.d.ts +37 -0
- package/dist/server/transports/http-transport.d.ts.map +1 -0
- package/dist/server/transports/http-transport.js +204 -0
- package/dist/server/transports/http-transport.js.map +1 -0
- package/dist/server/transports/index.d.ts +9 -0
- package/dist/server/transports/index.d.ts.map +1 -0
- package/dist/server/transports/index.js +9 -0
- package/dist/server/transports/index.js.map +1 -0
- package/dist/server/transports/session-manager.d.ts +40 -0
- package/dist/server/transports/session-manager.d.ts.map +1 -0
- package/dist/server/transports/session-manager.js +74 -0
- package/dist/server/transports/session-manager.js.map +1 -0
- package/dist/server/types.d.ts +82 -0
- package/dist/server/types.d.ts.map +1 -0
- package/dist/server/types.js +14 -0
- package/dist/server/types.js.map +1 -0
- package/dist/services/audit.d.ts +26 -0
- package/dist/services/audit.d.ts.map +1 -0
- package/dist/services/audit.js +43 -0
- package/dist/services/audit.js.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.js +46 -0
- package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
- package/dist/services/chunking/chunk-merger.d.ts +26 -0
- package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
- package/dist/services/chunking/chunk-merger.js +94 -0
- package/dist/services/chunking/chunk-merger.js.map +1 -0
- package/dist/services/chunking/chunker.d.ts +62 -0
- package/dist/services/chunking/chunker.d.ts.map +1 -0
- package/dist/services/chunking/chunker.js +566 -0
- package/dist/services/chunking/chunker.js.map +1 -0
- package/dist/services/chunking/heading-normalizer.d.ts +33 -0
- package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/heading-normalizer.js +101 -0
- package/dist/services/chunking/heading-normalizer.js.map +1 -0
- package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
- package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
- package/dist/services/chunking/json-block-analyzer.js +1033 -0
- package/dist/services/chunking/json-block-analyzer.js.map +1 -0
- package/dist/services/chunking/markdown-parser.d.ts +75 -0
- package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
- package/dist/services/chunking/markdown-parser.js +428 -0
- package/dist/services/chunking/markdown-parser.js.map +1 -0
- package/dist/services/chunking/text-normalizer.d.ts +20 -0
- package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/text-normalizer.js +36 -0
- package/dist/services/chunking/text-normalizer.js.map +1 -0
- package/dist/services/clm/contract-schemas.d.ts +36 -0
- package/dist/services/clm/contract-schemas.d.ts.map +1 -0
- package/dist/services/clm/contract-schemas.js +92 -0
- package/dist/services/clm/contract-schemas.js.map +1 -0
- package/dist/services/clm/summarization.d.ts +46 -0
- package/dist/services/clm/summarization.d.ts.map +1 -0
- package/dist/services/clm/summarization.js +61 -0
- package/dist/services/clm/summarization.js.map +1 -0
- package/dist/services/clustering/clustering-service.d.ts +58 -0
- package/dist/services/clustering/clustering-service.d.ts.map +1 -0
- package/dist/services/clustering/clustering-service.js +467 -0
- package/dist/services/clustering/clustering-service.js.map +1 -0
- package/dist/services/comparison/diff-service.d.ts +41 -0
- package/dist/services/comparison/diff-service.d.ts.map +1 -0
- package/dist/services/comparison/diff-service.js +120 -0
- package/dist/services/comparison/diff-service.js.map +1 -0
- package/dist/services/embedding/embedder.d.ts +55 -0
- package/dist/services/embedding/embedder.d.ts.map +1 -0
- package/dist/services/embedding/embedder.js +202 -0
- package/dist/services/embedding/embedder.js.map +1 -0
- package/dist/services/embedding/nomic.d.ts +67 -0
- package/dist/services/embedding/nomic.d.ts.map +1 -0
- package/dist/services/embedding/nomic.js +280 -0
- package/dist/services/embedding/nomic.js.map +1 -0
- package/dist/services/gemini/circuit-breaker.d.ts +106 -0
- package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
- package/dist/services/gemini/circuit-breaker.js +237 -0
- package/dist/services/gemini/circuit-breaker.js.map +1 -0
- package/dist/services/gemini/client.d.ts +173 -0
- package/dist/services/gemini/client.d.ts.map +1 -0
- package/dist/services/gemini/client.js +483 -0
- package/dist/services/gemini/client.js.map +1 -0
- package/dist/services/gemini/config.d.ts +116 -0
- package/dist/services/gemini/config.d.ts.map +1 -0
- package/dist/services/gemini/config.js +118 -0
- package/dist/services/gemini/config.js.map +1 -0
- package/dist/services/gemini/index.d.ts +9 -0
- package/dist/services/gemini/index.d.ts.map +1 -0
- package/dist/services/gemini/index.js +13 -0
- package/dist/services/gemini/index.js.map +1 -0
- package/dist/services/gemini/rate-limiter.d.ts +62 -0
- package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
- package/dist/services/gemini/rate-limiter.js +120 -0
- package/dist/services/gemini/rate-limiter.js.map +1 -0
- package/dist/services/images/extractor.d.ts +88 -0
- package/dist/services/images/extractor.d.ts.map +1 -0
- package/dist/services/images/extractor.js +340 -0
- package/dist/services/images/extractor.js.map +1 -0
- package/dist/services/images/optimizer.d.ts +130 -0
- package/dist/services/images/optimizer.d.ts.map +1 -0
- package/dist/services/images/optimizer.js +228 -0
- package/dist/services/images/optimizer.js.map +1 -0
- package/dist/services/ocr/datalab.d.ts +64 -0
- package/dist/services/ocr/datalab.d.ts.map +1 -0
- package/dist/services/ocr/datalab.js +425 -0
- package/dist/services/ocr/datalab.js.map +1 -0
- package/dist/services/ocr/errors.d.ts +38 -0
- package/dist/services/ocr/errors.d.ts.map +1 -0
- package/dist/services/ocr/errors.js +83 -0
- package/dist/services/ocr/errors.js.map +1 -0
- package/dist/services/ocr/file-manager.d.ts +76 -0
- package/dist/services/ocr/file-manager.d.ts.map +1 -0
- package/dist/services/ocr/file-manager.js +238 -0
- package/dist/services/ocr/file-manager.js.map +1 -0
- package/dist/services/ocr/form-fill.d.ts +48 -0
- package/dist/services/ocr/form-fill.d.ts.map +1 -0
- package/dist/services/ocr/form-fill.js +213 -0
- package/dist/services/ocr/form-fill.js.map +1 -0
- package/dist/services/ocr/processor.d.ts +95 -0
- package/dist/services/ocr/processor.d.ts.map +1 -0
- package/dist/services/ocr/processor.js +259 -0
- package/dist/services/ocr/processor.js.map +1 -0
- package/dist/services/provenance/agent-metadata.d.ts +82 -0
- package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
- package/dist/services/provenance/agent-metadata.js +106 -0
- package/dist/services/provenance/agent-metadata.js.map +1 -0
- package/dist/services/provenance/chain-hash.d.ts +57 -0
- package/dist/services/provenance/chain-hash.d.ts.map +1 -0
- package/dist/services/provenance/chain-hash.js +131 -0
- package/dist/services/provenance/chain-hash.js.map +1 -0
- package/dist/services/provenance/exporter.d.ts +202 -0
- package/dist/services/provenance/exporter.d.ts.map +1 -0
- package/dist/services/provenance/exporter.js +457 -0
- package/dist/services/provenance/exporter.js.map +1 -0
- package/dist/services/provenance/index.d.ts +15 -0
- package/dist/services/provenance/index.d.ts.map +1 -0
- package/dist/services/provenance/index.js +17 -0
- package/dist/services/provenance/index.js.map +1 -0
- package/dist/services/provenance/tracker.d.ts +138 -0
- package/dist/services/provenance/tracker.d.ts.map +1 -0
- package/dist/services/provenance/tracker.js +293 -0
- package/dist/services/provenance/tracker.js.map +1 -0
- package/dist/services/provenance/verifier.d.ts +153 -0
- package/dist/services/provenance/verifier.d.ts.map +1 -0
- package/dist/services/provenance/verifier.js +536 -0
- package/dist/services/provenance/verifier.js.map +1 -0
- package/dist/services/python-pool.d.ts +70 -0
- package/dist/services/python-pool.d.ts.map +1 -0
- package/dist/services/python-pool.js +265 -0
- package/dist/services/python-pool.js.map +1 -0
- package/dist/services/search/bm25.d.ts +180 -0
- package/dist/services/search/bm25.d.ts.map +1 -0
- package/dist/services/search/bm25.js +656 -0
- package/dist/services/search/bm25.js.map +1 -0
- package/dist/services/search/fusion.d.ts +103 -0
- package/dist/services/search/fusion.d.ts.map +1 -0
- package/dist/services/search/fusion.js +122 -0
- package/dist/services/search/fusion.js.map +1 -0
- package/dist/services/search/local-reranker.d.ts +30 -0
- package/dist/services/search/local-reranker.d.ts.map +1 -0
- package/dist/services/search/local-reranker.js +123 -0
- package/dist/services/search/local-reranker.js.map +1 -0
- package/dist/services/search/quality.d.ts +11 -0
- package/dist/services/search/quality.d.ts.map +1 -0
- package/dist/services/search/quality.js +17 -0
- package/dist/services/search/quality.js.map +1 -0
- package/dist/services/search/query-classifier.d.ts +34 -0
- package/dist/services/search/query-classifier.d.ts.map +1 -0
- package/dist/services/search/query-classifier.js +114 -0
- package/dist/services/search/query-classifier.js.map +1 -0
- package/dist/services/search/query-expander.d.ts +73 -0
- package/dist/services/search/query-expander.d.ts.map +1 -0
- package/dist/services/search/query-expander.js +281 -0
- package/dist/services/search/query-expander.js.map +1 -0
- package/dist/services/search/reranker.d.ts +44 -0
- package/dist/services/search/reranker.d.ts.map +1 -0
- package/dist/services/search/reranker.js +101 -0
- package/dist/services/search/reranker.js.map +1 -0
- package/dist/services/storage/database/annotation-operations.d.ts +113 -0
- package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/annotation-operations.js +177 -0
- package/dist/services/storage/database/annotation-operations.js.map +1 -0
- package/dist/services/storage/database/approval-operations.d.ts +132 -0
- package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
- package/dist/services/storage/database/approval-operations.js +206 -0
- package/dist/services/storage/database/approval-operations.js.map +1 -0
- package/dist/services/storage/database/chunk-operations.d.ts +132 -0
- package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
- package/dist/services/storage/database/chunk-operations.js +306 -0
- package/dist/services/storage/database/chunk-operations.js.map +1 -0
- package/dist/services/storage/database/cluster-operations.d.ts +97 -0
- package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
- package/dist/services/storage/database/cluster-operations.js +258 -0
- package/dist/services/storage/database/cluster-operations.js.map +1 -0
- package/dist/services/storage/database/comparison-operations.d.ts +41 -0
- package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
- package/dist/services/storage/database/comparison-operations.js +65 -0
- package/dist/services/storage/database/comparison-operations.js.map +1 -0
- package/dist/services/storage/database/converters.d.ts +36 -0
- package/dist/services/storage/database/converters.d.ts.map +1 -0
- package/dist/services/storage/database/converters.js +244 -0
- package/dist/services/storage/database/converters.js.map +1 -0
- package/dist/services/storage/database/document-operations.d.ts +145 -0
- package/dist/services/storage/database/document-operations.d.ts.map +1 -0
- package/dist/services/storage/database/document-operations.js +498 -0
- package/dist/services/storage/database/document-operations.js.map +1 -0
- package/dist/services/storage/database/embedding-operations.d.ts +130 -0
- package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
- package/dist/services/storage/database/embedding-operations.js +315 -0
- package/dist/services/storage/database/embedding-operations.js.map +1 -0
- package/dist/services/storage/database/extraction-operations.d.ts +47 -0
- package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
- package/dist/services/storage/database/extraction-operations.js +85 -0
- package/dist/services/storage/database/extraction-operations.js.map +1 -0
- package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
- package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
- package/dist/services/storage/database/form-fill-operations.js +116 -0
- package/dist/services/storage/database/form-fill-operations.js.map +1 -0
- package/dist/services/storage/database/helpers.d.ts +29 -0
- package/dist/services/storage/database/helpers.d.ts.map +1 -0
- package/dist/services/storage/database/helpers.js +55 -0
- package/dist/services/storage/database/helpers.js.map +1 -0
- package/dist/services/storage/database/image-operations.d.ts +202 -0
- package/dist/services/storage/database/image-operations.d.ts.map +1 -0
- package/dist/services/storage/database/image-operations.js +484 -0
- package/dist/services/storage/database/image-operations.js.map +1 -0
- package/dist/services/storage/database/index.d.ts +13 -0
- package/dist/services/storage/database/index.d.ts.map +1 -0
- package/dist/services/storage/database/index.js +16 -0
- package/dist/services/storage/database/index.js.map +1 -0
- package/dist/services/storage/database/lock-operations.d.ts +59 -0
- package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
- package/dist/services/storage/database/lock-operations.js +89 -0
- package/dist/services/storage/database/lock-operations.js.map +1 -0
- package/dist/services/storage/database/obligation-operations.d.ts +88 -0
- package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/obligation-operations.js +206 -0
- package/dist/services/storage/database/obligation-operations.js.map +1 -0
- package/dist/services/storage/database/ocr-operations.d.ts +33 -0
- package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
- package/dist/services/storage/database/ocr-operations.js +70 -0
- package/dist/services/storage/database/ocr-operations.js.map +1 -0
- package/dist/services/storage/database/playbook-operations.d.ts +72 -0
- package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
- package/dist/services/storage/database/playbook-operations.js +247 -0
- package/dist/services/storage/database/playbook-operations.js.map +1 -0
- package/dist/services/storage/database/provenance-operations.d.ts +112 -0
- package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
- package/dist/services/storage/database/provenance-operations.js +251 -0
- package/dist/services/storage/database/provenance-operations.js.map +1 -0
- package/dist/services/storage/database/service.d.ts +142 -0
- package/dist/services/storage/database/service.d.ts.map +1 -0
- package/dist/services/storage/database/service.js +310 -0
- package/dist/services/storage/database/service.js.map +1 -0
- package/dist/services/storage/database/static-operations.d.ts +30 -0
- package/dist/services/storage/database/static-operations.d.ts.map +1 -0
- package/dist/services/storage/database/static-operations.js +218 -0
- package/dist/services/storage/database/static-operations.js.map +1 -0
- package/dist/services/storage/database/stats-operations.d.ts +101 -0
- package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
- package/dist/services/storage/database/stats-operations.js +394 -0
- package/dist/services/storage/database/stats-operations.js.map +1 -0
- package/dist/services/storage/database/tag-operations.d.ts +76 -0
- package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
- package/dist/services/storage/database/tag-operations.js +178 -0
- package/dist/services/storage/database/tag-operations.js.map +1 -0
- package/dist/services/storage/database/types.d.ts +286 -0
- package/dist/services/storage/database/types.d.ts.map +1 -0
- package/dist/services/storage/database/types.js +39 -0
- package/dist/services/storage/database/types.js.map +1 -0
- package/dist/services/storage/database/upload-operations.d.ts +71 -0
- package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
- package/dist/services/storage/database/upload-operations.js +124 -0
- package/dist/services/storage/database/upload-operations.js.map +1 -0
- package/dist/services/storage/database/user-operations.d.ts +102 -0
- package/dist/services/storage/database/user-operations.d.ts.map +1 -0
- package/dist/services/storage/database/user-operations.js +151 -0
- package/dist/services/storage/database/user-operations.js.map +1 -0
- package/dist/services/storage/database/workflow-operations.d.ts +98 -0
- package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
- package/dist/services/storage/database/workflow-operations.js +157 -0
- package/dist/services/storage/database/workflow-operations.js.map +1 -0
- package/dist/services/storage/database.d.ts +16 -0
- package/dist/services/storage/database.d.ts.map +1 -0
- package/dist/services/storage/database.js +15 -0
- package/dist/services/storage/database.js.map +1 -0
- package/dist/services/storage/index.d.ts +10 -0
- package/dist/services/storage/index.d.ts.map +1 -0
- package/dist/services/storage/index.js +10 -0
- package/dist/services/storage/index.js.map +1 -0
- package/dist/services/storage/migrations/index.d.ts +16 -0
- package/dist/services/storage/migrations/index.d.ts.map +1 -0
- package/dist/services/storage/migrations/index.js +20 -0
- package/dist/services/storage/migrations/index.js.map +1 -0
- package/dist/services/storage/migrations/operations.d.ts +40 -0
- package/dist/services/storage/migrations/operations.d.ts.map +1 -0
- package/dist/services/storage/migrations/operations.js +2910 -0
- package/dist/services/storage/migrations/operations.js.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.js +1006 -0
- package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.js +176 -0
- package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
- package/dist/services/storage/migrations/types.d.ts +15 -0
- package/dist/services/storage/migrations/types.d.ts.map +1 -0
- package/dist/services/storage/migrations/types.js +21 -0
- package/dist/services/storage/migrations/types.js.map +1 -0
- package/dist/services/storage/migrations/verification.d.ts +20 -0
- package/dist/services/storage/migrations/verification.d.ts.map +1 -0
- package/dist/services/storage/migrations/verification.js +78 -0
- package/dist/services/storage/migrations/verification.js.map +1 -0
- package/dist/services/storage/migrations.d.ts +16 -0
- package/dist/services/storage/migrations.d.ts.map +1 -0
- package/dist/services/storage/migrations.js +17 -0
- package/dist/services/storage/migrations.js.map +1 -0
- package/dist/services/storage/types.d.ts +12 -0
- package/dist/services/storage/types.d.ts.map +1 -0
- package/dist/services/storage/types.js +5 -0
- package/dist/services/storage/types.js.map +1 -0
- package/dist/services/storage/vector.d.ts +208 -0
- package/dist/services/storage/vector.d.ts.map +1 -0
- package/dist/services/storage/vector.js +526 -0
- package/dist/services/storage/vector.js.map +1 -0
- package/dist/services/vlm/pipeline.d.ts +194 -0
- package/dist/services/vlm/pipeline.d.ts.map +1 -0
- package/dist/services/vlm/pipeline.js +800 -0
- package/dist/services/vlm/pipeline.js.map +1 -0
- package/dist/services/vlm/prompts.d.ts +171 -0
- package/dist/services/vlm/prompts.d.ts.map +1 -0
- package/dist/services/vlm/prompts.js +229 -0
- package/dist/services/vlm/prompts.js.map +1 -0
- package/dist/services/vlm/service.d.ts +174 -0
- package/dist/services/vlm/service.d.ts.map +1 -0
- package/dist/services/vlm/service.js +256 -0
- package/dist/services/vlm/service.js.map +1 -0
- package/dist/services/webhook-delivery.d.ts +4 -0
- package/dist/services/webhook-delivery.d.ts.map +1 -0
- package/dist/services/webhook-delivery.js +140 -0
- package/dist/services/webhook-delivery.js.map +1 -0
- package/dist/tools/chunks.d.ts +19 -0
- package/dist/tools/chunks.d.ts.map +1 -0
- package/dist/tools/chunks.js +392 -0
- package/dist/tools/chunks.js.map +1 -0
- package/dist/tools/clm.d.ts +16 -0
- package/dist/tools/clm.d.ts.map +1 -0
- package/dist/tools/clm.js +668 -0
- package/dist/tools/clm.js.map +1 -0
- package/dist/tools/clustering.d.ts +13 -0
- package/dist/tools/clustering.d.ts.map +1 -0
- package/dist/tools/clustering.js +498 -0
- package/dist/tools/clustering.js.map +1 -0
- package/dist/tools/collaboration.d.ts +15 -0
- package/dist/tools/collaboration.d.ts.map +1 -0
- package/dist/tools/collaboration.js +516 -0
- package/dist/tools/collaboration.js.map +1 -0
- package/dist/tools/comparison.d.ts +13 -0
- package/dist/tools/comparison.d.ts.map +1 -0
- package/dist/tools/comparison.js +735 -0
- package/dist/tools/comparison.js.map +1 -0
- package/dist/tools/compliance.d.ts +15 -0
- package/dist/tools/compliance.d.ts.map +1 -0
- package/dist/tools/compliance.js +640 -0
- package/dist/tools/compliance.js.map +1 -0
- package/dist/tools/config.d.ts +19 -0
- package/dist/tools/config.d.ts.map +1 -0
- package/dist/tools/config.js +213 -0
- package/dist/tools/config.js.map +1 -0
- package/dist/tools/database.d.ts +62 -0
- package/dist/tools/database.d.ts.map +1 -0
- package/dist/tools/database.js +288 -0
- package/dist/tools/database.js.map +1 -0
- package/dist/tools/documents.d.ts +61 -0
- package/dist/tools/documents.d.ts.map +1 -0
- package/dist/tools/documents.js +1624 -0
- package/dist/tools/documents.js.map +1 -0
- package/dist/tools/embeddings.d.ts +14 -0
- package/dist/tools/embeddings.d.ts.map +1 -0
- package/dist/tools/embeddings.js +626 -0
- package/dist/tools/embeddings.js.map +1 -0
- package/dist/tools/evaluation.d.ts +25 -0
- package/dist/tools/evaluation.d.ts.map +1 -0
- package/dist/tools/evaluation.js +523 -0
- package/dist/tools/evaluation.js.map +1 -0
- package/dist/tools/events.d.ts +16 -0
- package/dist/tools/events.d.ts.map +1 -0
- package/dist/tools/events.js +493 -0
- package/dist/tools/events.js.map +1 -0
- package/dist/tools/extraction-structured.d.ts +13 -0
- package/dist/tools/extraction-structured.d.ts.map +1 -0
- package/dist/tools/extraction-structured.js +390 -0
- package/dist/tools/extraction-structured.js.map +1 -0
- package/dist/tools/extraction.d.ts +24 -0
- package/dist/tools/extraction.d.ts.map +1 -0
- package/dist/tools/extraction.js +424 -0
- package/dist/tools/extraction.js.map +1 -0
- package/dist/tools/file-management.d.ts +14 -0
- package/dist/tools/file-management.d.ts.map +1 -0
- package/dist/tools/file-management.js +523 -0
- package/dist/tools/file-management.js.map +1 -0
- package/dist/tools/form-fill.d.ts +13 -0
- package/dist/tools/form-fill.d.ts.map +1 -0
- package/dist/tools/form-fill.js +250 -0
- package/dist/tools/form-fill.js.map +1 -0
- package/dist/tools/health.d.ts +19 -0
- package/dist/tools/health.d.ts.map +1 -0
- package/dist/tools/health.js +229 -0
- package/dist/tools/health.js.map +1 -0
- package/dist/tools/images.d.ts +54 -0
- package/dist/tools/images.d.ts.map +1 -0
- package/dist/tools/images.js +787 -0
- package/dist/tools/images.js.map +1 -0
- package/dist/tools/ingestion.d.ts +94 -0
- package/dist/tools/ingestion.d.ts.map +1 -0
- package/dist/tools/ingestion.js +1659 -0
- package/dist/tools/ingestion.js.map +1 -0
- package/dist/tools/intelligence.d.ts +18 -0
- package/dist/tools/intelligence.d.ts.map +1 -0
- package/dist/tools/intelligence.js +1039 -0
- package/dist/tools/intelligence.js.map +1 -0
- package/dist/tools/provenance.d.ts +51 -0
- package/dist/tools/provenance.d.ts.map +1 -0
- package/dist/tools/provenance.js +691 -0
- package/dist/tools/provenance.js.map +1 -0
- package/dist/tools/reports.d.ts +41 -0
- package/dist/tools/reports.d.ts.map +1 -0
- package/dist/tools/reports.js +1394 -0
- package/dist/tools/reports.js.map +1 -0
- package/dist/tools/search.d.ts +35 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +2528 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/shared.d.ts +52 -0
- package/dist/tools/shared.d.ts.map +1 -0
- package/dist/tools/shared.js +54 -0
- package/dist/tools/shared.js.map +1 -0
- package/dist/tools/tags.d.ts +15 -0
- package/dist/tools/tags.d.ts.map +1 -0
- package/dist/tools/tags.js +287 -0
- package/dist/tools/tags.js.map +1 -0
- package/dist/tools/timeline.d.ts +15 -0
- package/dist/tools/timeline.d.ts.map +1 -0
- package/dist/tools/timeline.js +14 -0
- package/dist/tools/timeline.js.map +1 -0
- package/dist/tools/users.d.ts +14 -0
- package/dist/tools/users.d.ts.map +1 -0
- package/dist/tools/users.js +257 -0
- package/dist/tools/users.js.map +1 -0
- package/dist/tools/vlm.d.ts +40 -0
- package/dist/tools/vlm.d.ts.map +1 -0
- package/dist/tools/vlm.js +475 -0
- package/dist/tools/vlm.js.map +1 -0
- package/dist/tools/workflow.d.ts +16 -0
- package/dist/tools/workflow.d.ts.map +1 -0
- package/dist/tools/workflow.js +495 -0
- package/dist/tools/workflow.js.map +1 -0
- package/dist/utils/backoff.d.ts +53 -0
- package/dist/utils/backoff.d.ts.map +1 -0
- package/dist/utils/backoff.js +78 -0
- package/dist/utils/backoff.js.map +1 -0
- package/dist/utils/config-persistence.d.ts +33 -0
- package/dist/utils/config-persistence.d.ts.map +1 -0
- package/dist/utils/config-persistence.js +61 -0
- package/dist/utils/config-persistence.js.map +1 -0
- package/dist/utils/hash.d.ts +65 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +146 -0
- package/dist/utils/hash.js.map +1 -0
- package/dist/utils/math.d.ts +21 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +39 -0
- package/dist/utils/math.js.map +1 -0
- package/dist/utils/validation.d.ts +697 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +529 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +96 -0
- package/python/.gitkeep +0 -0
- package/python/__init__.py +104 -0
- package/python/clustering_worker.py +440 -0
- package/python/docx_image_extractor.py +524 -0
- package/python/embedding_worker.py +552 -0
- package/python/file_manager_worker.py +564 -0
- package/python/form_fill_worker.py +399 -0
- package/python/gpu_utils.py +582 -0
- package/python/image_extractor.py +317 -0
- package/python/image_optimizer.py +444 -0
- package/python/ocr_worker.py +712 -0
- package/python/pyproject.toml +76 -0
- package/python/requirements.txt +51 -0
- package/python/reranker_worker.py +87 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-operations.d.ts","sourceRoot":"","sources":["../../../../src/services/storage/database/chunk-operations.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AACtC,OAAO,EAAE,KAAK,EAAE,MAAM,0BAA0B,CAAC;AAKjD;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,gBAAgB,CAAC,EAAE,SAAS,GAAG,UAAU,GAAG,QAAQ,CAAC;IACrD,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,UAAU,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACtD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB;AAED;;;;;;;GAOG;AACH,wBAAgB,WAAW,CACzB,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,YAAY,GAAG,kBAAkB,GAAG,aAAa,CAAC,EACrE,oBAAoB,EAAE,MAAM,IAAI,GAC/B,MAAM,CA8CR;AAED;;;;;;;;GAQG;AACH,wBAAgB,YAAY,CAC1B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,MAAM,EAAE,IAAI,CAAC,KAAK,EAAE,YAAY,GAAG,kBAAkB,GAAG,aAAa,CAAC,EAAE,EACxE,oBAAoB,EAAE,MAAM,IAAI,EAChC,WAAW,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,CAAC,KAAK,CAAC,GACjC,MAAM,EAAE,CAwDV;AAED;;;;;;GAMG;AACH,wBAAgB,QAAQ,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,EAAE,EAAE,MAAM,GAAG,KAAK,GAAG,IAAI,CAIxE;AAED;;;;;;GAMG;AACH,wBAAgB,qBAAqB,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAGxF;AAED;;;;;;GAMG;AACH,wBAAgB,qBAAqB,CACnC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,UAAU,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5C,KAAK,EAAE,CAgBT;AAED;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CACpC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,WAAW,EAAE,MAAM,EACnB,OAAO,CAAC,EAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5C,KAAK,EAAE,CAgBT;AAED;;;;;;GAMG;AACH,wBAAgB,yBAAyB,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,KAAK,EAAE,CAQxF;AAED;;;;;;;;GAQG;AACH,wBAAgB,0BAA0B,CACxC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,EAAE,EAAE,MAAM,EACV,MAAM,EAAE,SAAS,GAAG,UAAU,GAAG,QAAQ,EACzC,UAAU,EAAE,MAAM,GAAG,SAAS,EAC9B,sBAAsB,EAAE,MAAM,IAAI,GACjC,IAAI,CAcN;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,iBAAiB,CAC/B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,kBAAkB,GAC1B;IAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAmEpC;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,iBAAiB,CAC/B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,UAAU,EAAE,MAAM,EAClB,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,MAAM,GACZ,KAAK,EAAE,CAST"}
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunk operations for DatabaseService
|
|
3
|
+
*
|
|
4
|
+
* Handles all CRUD operations for text chunks including
|
|
5
|
+
* batch inserts, embedding status updates, filtered queries,
|
|
6
|
+
* and neighbor lookups.
|
|
7
|
+
*/
|
|
8
|
+
import { DatabaseError, DatabaseErrorCode } from './types.js';
|
|
9
|
+
import { runWithForeignKeyCheck } from './helpers.js';
|
|
10
|
+
import { rowToChunk } from './converters.js';
|
|
11
|
+
/**
|
|
12
|
+
* Insert a chunk
|
|
13
|
+
*
|
|
14
|
+
* @param db - Database connection
|
|
15
|
+
* @param chunk - Chunk data (created_at, embedding_status, embedded_at will be generated)
|
|
16
|
+
* @param updateMetadataCounts - Callback to update metadata counts
|
|
17
|
+
* @returns string - The chunk ID
|
|
18
|
+
*/
|
|
19
|
+
export function insertChunk(db, chunk, updateMetadataCounts) {
|
|
20
|
+
const created_at = new Date().toISOString();
|
|
21
|
+
const stmt = db.prepare(`
|
|
22
|
+
INSERT INTO chunks (
|
|
23
|
+
id, document_id, ocr_result_id, text, text_hash, chunk_index,
|
|
24
|
+
character_start, character_end, page_number, page_range,
|
|
25
|
+
overlap_previous, overlap_next, provenance_id, created_at,
|
|
26
|
+
embedding_status, embedded_at, ocr_quality_score,
|
|
27
|
+
heading_context, heading_level, section_path,
|
|
28
|
+
content_types, is_atomic, chunking_strategy
|
|
29
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
30
|
+
`);
|
|
31
|
+
runWithForeignKeyCheck(stmt, [
|
|
32
|
+
chunk.id,
|
|
33
|
+
chunk.document_id,
|
|
34
|
+
chunk.ocr_result_id,
|
|
35
|
+
chunk.text,
|
|
36
|
+
chunk.text_hash,
|
|
37
|
+
chunk.chunk_index,
|
|
38
|
+
chunk.character_start,
|
|
39
|
+
chunk.character_end,
|
|
40
|
+
chunk.page_number,
|
|
41
|
+
chunk.page_range,
|
|
42
|
+
chunk.overlap_previous,
|
|
43
|
+
chunk.overlap_next,
|
|
44
|
+
chunk.provenance_id,
|
|
45
|
+
created_at,
|
|
46
|
+
'pending',
|
|
47
|
+
null,
|
|
48
|
+
chunk.ocr_quality_score ?? null,
|
|
49
|
+
chunk.heading_context ?? null,
|
|
50
|
+
chunk.heading_level ?? null,
|
|
51
|
+
chunk.section_path ?? null,
|
|
52
|
+
chunk.content_types ?? null,
|
|
53
|
+
chunk.is_atomic ?? 0,
|
|
54
|
+
chunk.chunking_strategy ?? 'fixed',
|
|
55
|
+
], 'inserting chunk: document_id, ocr_result_id, or provenance_id does not exist');
|
|
56
|
+
updateMetadataCounts();
|
|
57
|
+
return chunk.id;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Insert multiple chunks in a batch transaction
|
|
61
|
+
*
|
|
62
|
+
* @param db - Database connection
|
|
63
|
+
* @param chunks - Array of chunk data
|
|
64
|
+
* @param updateMetadataCounts - Callback to update metadata counts
|
|
65
|
+
* @param transaction - Transaction wrapper function
|
|
66
|
+
* @returns string[] - Array of chunk IDs
|
|
67
|
+
*/
|
|
68
|
+
export function insertChunks(db, chunks, updateMetadataCounts, transaction) {
|
|
69
|
+
if (chunks.length === 0) {
|
|
70
|
+
return [];
|
|
71
|
+
}
|
|
72
|
+
return transaction(() => {
|
|
73
|
+
const created_at = new Date().toISOString();
|
|
74
|
+
const ids = [];
|
|
75
|
+
const stmt = db.prepare(`
|
|
76
|
+
INSERT INTO chunks (
|
|
77
|
+
id, document_id, ocr_result_id, text, text_hash, chunk_index,
|
|
78
|
+
character_start, character_end, page_number, page_range,
|
|
79
|
+
overlap_previous, overlap_next, provenance_id, created_at,
|
|
80
|
+
embedding_status, embedded_at, ocr_quality_score,
|
|
81
|
+
heading_context, heading_level, section_path,
|
|
82
|
+
content_types, is_atomic, chunking_strategy
|
|
83
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
84
|
+
`);
|
|
85
|
+
for (const chunk of chunks) {
|
|
86
|
+
runWithForeignKeyCheck(stmt, [
|
|
87
|
+
chunk.id,
|
|
88
|
+
chunk.document_id,
|
|
89
|
+
chunk.ocr_result_id,
|
|
90
|
+
chunk.text,
|
|
91
|
+
chunk.text_hash,
|
|
92
|
+
chunk.chunk_index,
|
|
93
|
+
chunk.character_start,
|
|
94
|
+
chunk.character_end,
|
|
95
|
+
chunk.page_number,
|
|
96
|
+
chunk.page_range,
|
|
97
|
+
chunk.overlap_previous,
|
|
98
|
+
chunk.overlap_next,
|
|
99
|
+
chunk.provenance_id,
|
|
100
|
+
created_at,
|
|
101
|
+
'pending',
|
|
102
|
+
null,
|
|
103
|
+
chunk.ocr_quality_score ?? null,
|
|
104
|
+
chunk.heading_context ?? null,
|
|
105
|
+
chunk.heading_level ?? null,
|
|
106
|
+
chunk.section_path ?? null,
|
|
107
|
+
chunk.content_types ?? null,
|
|
108
|
+
chunk.is_atomic ?? 0,
|
|
109
|
+
chunk.chunking_strategy ?? 'fixed',
|
|
110
|
+
], `inserting chunk "${chunk.id}"`);
|
|
111
|
+
ids.push(chunk.id);
|
|
112
|
+
}
|
|
113
|
+
updateMetadataCounts();
|
|
114
|
+
return ids;
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Get a chunk by ID
|
|
119
|
+
*
|
|
120
|
+
* @param db - Database connection
|
|
121
|
+
* @param id - Chunk ID
|
|
122
|
+
* @returns Chunk | null - The chunk or null if not found
|
|
123
|
+
*/
|
|
124
|
+
export function getChunk(db, id) {
|
|
125
|
+
const stmt = db.prepare('SELECT * FROM chunks WHERE id = ?');
|
|
126
|
+
const row = stmt.get(id);
|
|
127
|
+
return row ? rowToChunk(row) : null;
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Check if a document has any chunks (M-9: avoids loading all chunk rows)
|
|
131
|
+
*
|
|
132
|
+
* @param db - Database connection
|
|
133
|
+
* @param documentId - Document ID
|
|
134
|
+
* @returns boolean - true if document has at least one chunk
|
|
135
|
+
*/
|
|
136
|
+
export function hasChunksByDocumentId(db, documentId) {
|
|
137
|
+
const stmt = db.prepare('SELECT 1 FROM chunks WHERE document_id = ? LIMIT 1');
|
|
138
|
+
return stmt.get(documentId) !== undefined;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Get all chunks for a document
|
|
142
|
+
*
|
|
143
|
+
* @param db - Database connection
|
|
144
|
+
* @param documentId - Document ID
|
|
145
|
+
* @returns Chunk[] - Array of chunks ordered by chunk_index
|
|
146
|
+
*/
|
|
147
|
+
export function getChunksByDocumentId(db, documentId, options) {
|
|
148
|
+
let sql = 'SELECT * FROM chunks WHERE document_id = ? ORDER BY chunk_index';
|
|
149
|
+
const params = [documentId];
|
|
150
|
+
const limit = options?.limit ?? 10000;
|
|
151
|
+
sql += ' LIMIT ?';
|
|
152
|
+
params.push(limit);
|
|
153
|
+
if (options?.offset !== undefined) {
|
|
154
|
+
sql += ' OFFSET ?';
|
|
155
|
+
params.push(options.offset);
|
|
156
|
+
}
|
|
157
|
+
const stmt = db.prepare(sql);
|
|
158
|
+
const rows = stmt.all(...params);
|
|
159
|
+
return rows.map(rowToChunk);
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Get all chunks for an OCR result
|
|
163
|
+
*
|
|
164
|
+
* @param db - Database connection
|
|
165
|
+
* @param ocrResultId - OCR result ID
|
|
166
|
+
* @returns Chunk[] - Array of chunks ordered by chunk_index
|
|
167
|
+
*/
|
|
168
|
+
export function getChunksByOCRResultId(db, ocrResultId, options) {
|
|
169
|
+
let sql = 'SELECT * FROM chunks WHERE ocr_result_id = ? ORDER BY chunk_index';
|
|
170
|
+
const params = [ocrResultId];
|
|
171
|
+
const limit = options?.limit ?? 10000;
|
|
172
|
+
sql += ' LIMIT ?';
|
|
173
|
+
params.push(limit);
|
|
174
|
+
if (options?.offset !== undefined) {
|
|
175
|
+
sql += ' OFFSET ?';
|
|
176
|
+
params.push(options.offset);
|
|
177
|
+
}
|
|
178
|
+
const stmt = db.prepare(sql);
|
|
179
|
+
const rows = stmt.all(...params);
|
|
180
|
+
return rows.map(rowToChunk);
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Get chunks pending embedding generation
|
|
184
|
+
*
|
|
185
|
+
* @param db - Database connection
|
|
186
|
+
* @param limit - Optional maximum number of chunks to return
|
|
187
|
+
* @returns Chunk[] - Array of pending chunks
|
|
188
|
+
*/
|
|
189
|
+
export function getPendingEmbeddingChunks(db, limit) {
|
|
190
|
+
// M-15: Default limit prevents unbounded loading of all pending chunks
|
|
191
|
+
const effectiveLimit = limit ?? 1000;
|
|
192
|
+
const query = "SELECT * FROM chunks WHERE embedding_status = 'pending' ORDER BY created_at LIMIT ?";
|
|
193
|
+
const stmt = db.prepare(query);
|
|
194
|
+
const rows = stmt.all(effectiveLimit);
|
|
195
|
+
return rows.map(rowToChunk);
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Update chunk embedding status
|
|
199
|
+
*
|
|
200
|
+
* @param db - Database connection
|
|
201
|
+
* @param id - Chunk ID
|
|
202
|
+
* @param status - New embedding status
|
|
203
|
+
* @param embeddedAt - Optional ISO 8601 timestamp when embedded
|
|
204
|
+
* @param updateMetadataModified - Callback to update metadata modified timestamp
|
|
205
|
+
*/
|
|
206
|
+
export function updateChunkEmbeddingStatus(db, id, status, embeddedAt, updateMetadataModified) {
|
|
207
|
+
const stmt = db.prepare(`
|
|
208
|
+
UPDATE chunks
|
|
209
|
+
SET embedding_status = ?, embedded_at = ?
|
|
210
|
+
WHERE id = ?
|
|
211
|
+
`);
|
|
212
|
+
const result = stmt.run(status, embeddedAt ?? null, id);
|
|
213
|
+
if (result.changes === 0) {
|
|
214
|
+
throw new DatabaseError(`Chunk "${id}" not found`, DatabaseErrorCode.CHUNK_NOT_FOUND);
|
|
215
|
+
}
|
|
216
|
+
updateMetadataModified();
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Get chunks for a document with dynamic filtering.
|
|
220
|
+
*
|
|
221
|
+
* Builds a parameterized WHERE clause from the provided filters.
|
|
222
|
+
* For content_type_filter, uses LIKE matching against the JSON-encoded
|
|
223
|
+
* content_types column (e.g., content_types LIKE '%table%').
|
|
224
|
+
*
|
|
225
|
+
* @param db - Database connection
|
|
226
|
+
* @param documentId - Document ID to filter by
|
|
227
|
+
* @param filters - Filter options
|
|
228
|
+
* @returns Object with chunks array and total count
|
|
229
|
+
*/
|
|
230
|
+
export function getChunksFiltered(db, documentId, filters) {
|
|
231
|
+
const conditions = ['document_id = ?'];
|
|
232
|
+
const params = [documentId];
|
|
233
|
+
if (filters.section_path_filter) {
|
|
234
|
+
conditions.push("section_path LIKE ? || '%'");
|
|
235
|
+
params.push(filters.section_path_filter);
|
|
236
|
+
}
|
|
237
|
+
if (filters.heading_filter) {
|
|
238
|
+
conditions.push("heading_context LIKE '%' || ? || '%'");
|
|
239
|
+
params.push(filters.heading_filter);
|
|
240
|
+
}
|
|
241
|
+
if (filters.content_type_filter && filters.content_type_filter.length > 0) {
|
|
242
|
+
// Each content type must be present in the JSON array string.
|
|
243
|
+
// Wrap value in JSON quotes to prevent substring false positives
|
|
244
|
+
// (e.g. "text" matching "context_text"). Matches search.ts resolveChunkFilter.
|
|
245
|
+
for (const ct of filters.content_type_filter) {
|
|
246
|
+
conditions.push("content_types LIKE '%' || ? || '%'");
|
|
247
|
+
params.push('"' + ct + '"');
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
if (filters.min_quality_score !== undefined) {
|
|
251
|
+
conditions.push('ocr_quality_score >= ?');
|
|
252
|
+
params.push(filters.min_quality_score);
|
|
253
|
+
}
|
|
254
|
+
if (filters.embedding_status) {
|
|
255
|
+
conditions.push('embedding_status = ?');
|
|
256
|
+
params.push(filters.embedding_status);
|
|
257
|
+
}
|
|
258
|
+
if (filters.is_atomic !== undefined) {
|
|
259
|
+
conditions.push('is_atomic = ?');
|
|
260
|
+
params.push(filters.is_atomic ? 1 : 0);
|
|
261
|
+
}
|
|
262
|
+
if (filters.page_range) {
|
|
263
|
+
if (filters.page_range.min_page !== undefined) {
|
|
264
|
+
conditions.push('page_number >= ?');
|
|
265
|
+
params.push(filters.page_range.min_page);
|
|
266
|
+
}
|
|
267
|
+
if (filters.page_range.max_page !== undefined) {
|
|
268
|
+
conditions.push('page_number <= ?');
|
|
269
|
+
params.push(filters.page_range.max_page);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
const whereClause = ' WHERE ' + conditions.join(' AND ');
|
|
273
|
+
// Get total count with same filters
|
|
274
|
+
const countRow = db
|
|
275
|
+
.prepare(`SELECT COUNT(*) as total FROM chunks${whereClause}`)
|
|
276
|
+
.get(...params);
|
|
277
|
+
// Get paginated results
|
|
278
|
+
const limit = filters.limit ?? 50;
|
|
279
|
+
const offset = filters.offset ?? 0;
|
|
280
|
+
const dataQuery = `SELECT * FROM chunks${whereClause} ORDER BY chunk_index LIMIT ? OFFSET ?`;
|
|
281
|
+
const rows = db.prepare(dataQuery).all(...params, limit, offset);
|
|
282
|
+
return {
|
|
283
|
+
chunks: rows.map(rowToChunk),
|
|
284
|
+
total: countRow.total,
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
/**
|
|
288
|
+
* Get neighboring chunks around a given chunk index for context building.
|
|
289
|
+
*
|
|
290
|
+
* Returns chunks with chunk_index in range [chunkIndex - count, chunkIndex + count],
|
|
291
|
+
* ordered by chunk_index.
|
|
292
|
+
*
|
|
293
|
+
* @param db - Database connection
|
|
294
|
+
* @param documentId - Document ID
|
|
295
|
+
* @param chunkIndex - Center chunk index
|
|
296
|
+
* @param count - Number of neighbors on each side
|
|
297
|
+
* @returns Chunk[] - Array of neighboring chunks (including center)
|
|
298
|
+
*/
|
|
299
|
+
export function getChunkNeighbors(db, documentId, chunkIndex, count) {
|
|
300
|
+
const minIndex = Math.max(0, chunkIndex - count);
|
|
301
|
+
const maxIndex = chunkIndex + count;
|
|
302
|
+
const stmt = db.prepare('SELECT * FROM chunks WHERE document_id = ? AND chunk_index BETWEEN ? AND ? ORDER BY chunk_index');
|
|
303
|
+
const rows = stmt.all(documentId, minIndex, maxIndex);
|
|
304
|
+
return rows.map(rowToChunk);
|
|
305
|
+
}
|
|
306
|
+
//# sourceMappingURL=chunk-operations.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-operations.js","sourceRoot":"","sources":["../../../../src/services/storage/database/chunk-operations.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAIH,OAAO,EAAE,aAAa,EAAE,iBAAiB,EAAY,MAAM,YAAY,CAAC;AACxE,OAAO,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAC;AACtD,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAkB7C;;;;;;;GAOG;AACH,MAAM,UAAU,WAAW,CACzB,EAAqB,EACrB,KAAqE,EACrE,oBAAgC;IAEhC,MAAM,UAAU,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAE5C,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;;;;;GASvB,CAAC,CAAC;IAEH,sBAAsB,CACpB,IAAI,EACJ;QACE,KAAK,CAAC,EAAE;QACR,KAAK,CAAC,WAAW;QACjB,KAAK,CAAC,aAAa;QACnB,KAAK,CAAC,IAAI;QACV,KAAK,CAAC,SAAS;QACf,KAAK,CAAC,WAAW;QACjB,KAAK,CAAC,eAAe;QACrB,KAAK,CAAC,aAAa;QACnB,KAAK,CAAC,WAAW;QACjB,KAAK,CAAC,UAAU;QAChB,KAAK,CAAC,gBAAgB;QACtB,KAAK,CAAC,YAAY;QAClB,KAAK,CAAC,aAAa;QACnB,UAAU;QACV,SAAS;QACT,IAAI;QACJ,KAAK,CAAC,iBAAiB,IAAI,IAAI;QAC/B,KAAK,CAAC,eAAe,IAAI,IAAI;QAC7B,KAAK,CAAC,aAAa,IAAI,IAAI;QAC3B,KAAK,CAAC,YAAY,IAAI,IAAI;QAC1B,KAAK,CAAC,aAAa,IAAI,IAAI;QAC3B,KAAK,CAAC,SAAS,IAAI,CAAC;QACpB,KAAK,CAAC,iBAAiB,IAAI,OAAO;KACnC,EACD,8EAA8E,CAC/E,CAAC;IAEF,oBAAoB,EAAE,CAAC;IACvB,OAAO,KAAK,CAAC,EAAE,CAAC;AAClB,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,YAAY,CAC1B,EAAqB,EACrB,MAAwE,EACxE,oBAAgC,EAChC,WAAkC;IAElC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,OAAO,WAAW,CAAC,GAAG,EAAE;QACtB,MAAM,UAAU,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC5C,MAAM,GAAG,GAAa,EAAE,CAAC;QAEzB,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;;;;;KASvB,CAAC,CAAC;QAEH,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,sBAAsB,CACpB,IAAI,EACJ;gBACE,KAAK,CAAC,EAAE;gBACR,KAAK,CAAC,WAAW;gBACjB,KAAK,CAAC,aAAa;gBACnB,KAAK,CAAC,IAAI;gBACV,KAAK,CAAC,SAAS;gBACf,KAAK,CAAC,WAAW;gBACjB,KAAK,CAAC,eAAe;gBACrB,KAAK,CAAC,aAAa;gBACnB,KAAK,CAAC,WAAW;gBACjB,KAAK,CAAC,UAAU;gBAChB,KAAK,CAAC,gBAAgB;gBACtB,KAAK,CAAC,YAAY;gBAClB,KAAK,CAAC,aAAa;gBACnB,UAAU;gBACV,SAAS;gBACT,IAAI;gBACJ,KAAK,CAAC,iBAAiB,IAAI,IAAI;gBAC/B,KAAK,CAAC,eAAe,IAAI,IAAI;gBAC7B,KAAK,CAAC,aAAa,IAAI,IAAI;gBAC3B,KAAK,CAAC,YAAY,IAAI,IAAI;gBAC1B,KAAK,CAAC,aAAa,IAAI,IAAI;gBAC3B,KAAK,CAAC,SAAS,IAAI,CAAC;gBACpB,KAAK,CAAC,iBAAiB,IAAI,OAAO;aACnC,EACD,oBAAoB,KAAK,CAAC,EAAE,GAAG,CAChC,CAAC;YACF,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACrB,CAAC;QAED,oBAAoB,EAAE,CAAC;QACvB,OAAO,GAAG,CAAC;IACb,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,QAAQ,CAAC,EAAqB,EAAE,EAAU;IACxD,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,mCAAmC,CAAC,CAAC;IAC7D,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAyB,CAAC;IACjD,OAAO,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AACtC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,qBAAqB,CAAC,EAAqB,EAAE,UAAkB;IAC7E,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,oDAAoD,CAAC,CAAC;IAC9E,OAAO,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,KAAK,SAAS,CAAC;AAC5C,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,qBAAqB,CACnC,EAAqB,EACrB,UAAkB,EAClB,OAA6C;IAE7C,IAAI,GAAG,GAAG,iEAAiE,CAAC;IAC5E,MAAM,MAAM,GAAwB,CAAC,UAAU,CAAC,CAAC;IAEjD,MAAM,KAAK,GAAG,OAAO,EAAE,KAAK,IAAI,KAAK,CAAC;IACtC,GAAG,IAAI,UAAU,CAAC;IAClB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAEnB,IAAI,OAAO,EAAE,MAAM,KAAK,SAAS,EAAE,CAAC;QAClC,GAAG,IAAI,WAAW,CAAC;QACnB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAC9B,CAAC;IAED,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC7B,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAe,CAAC;IAC/C,OAAO,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;AAC9B,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,sBAAsB,CACpC,EAAqB,EACrB,WAAmB,EACnB,OAA6C;IAE7C,IAAI,GAAG,GAAG,mEAAmE,CAAC;IAC9E,MAAM,MAAM,GAAwB,CAAC,WAAW,CAAC,CAAC;IAElD,MAAM,KAAK,GAAG,OAAO,EAAE,KAAK,IAAI,KAAK,CAAC;IACtC,GAAG,IAAI,UAAU,CAAC;IAClB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAEnB,IAAI,OAAO,EAAE,MAAM,KAAK,SAAS,EAAE,CAAC;QAClC,GAAG,IAAI,WAAW,CAAC;QACnB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAC9B,CAAC;IAED,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC7B,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAe,CAAC;IAC/C,OAAO,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;AAC9B,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,yBAAyB,CAAC,EAAqB,EAAE,KAAc;IAC7E,uEAAuE;IACvE,MAAM,cAAc,GAAG,KAAK,IAAI,IAAI,CAAC;IACrC,MAAM,KAAK,GACT,qFAAqF,CAAC;IACxF,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,cAAc,CAAe,CAAC;IACpD,OAAO,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;AAC9B,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,0BAA0B,CACxC,EAAqB,EACrB,EAAU,EACV,MAAyC,EACzC,UAA8B,EAC9B,sBAAkC;IAElC,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC;;;;GAIvB,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,UAAU,IAAI,IAAI,EAAE,EAAE,CAAC,CAAC;IAExD,IAAI,MAAM,CAAC,OAAO,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM,IAAI,aAAa,CAAC,UAAU,EAAE,aAAa,EAAE,iBAAiB,CAAC,eAAe,CAAC,CAAC;IACxF,CAAC;IAED,sBAAsB,EAAE,CAAC;AAC3B,CAAC;AAED;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,iBAAiB,CAC/B,EAAqB,EACrB,UAAkB,EAClB,OAA2B;IAE3B,MAAM,UAAU,GAAa,CAAC,iBAAiB,CAAC,CAAC;IACjD,MAAM,MAAM,GAAwB,CAAC,UAAU,CAAC,CAAC;IAEjD,IAAI,OAAO,CAAC,mBAAmB,EAAE,CAAC;QAChC,UAAU,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,mBAAmB,CAAC,CAAC;IAC3C,CAAC;IAED,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;QAC3B,UAAU,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;QACxD,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;IACtC,CAAC;IAED,IAAI,OAAO,CAAC,mBAAmB,IAAI,OAAO,CAAC,mBAAmB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1E,8DAA8D;QAC9D,iEAAiE;QACjE,+EAA+E;QAC/E,KAAK,MAAM,EAAE,IAAI,OAAO,CAAC,mBAAmB,EAAE,CAAC;YAC7C,UAAU,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;YACtD,MAAM,CAAC,IAAI,CAAC,GAAG,GAAG,EAAE,GAAG,GAAG,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;IAED,IAAI,OAAO,CAAC,iBAAiB,KAAK,SAAS,EAAE,CAAC;QAC5C,UAAU,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;QAC7B,UAAU,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;QACxC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACxC,CAAC;IAED,IAAI,OAAO,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;QACpC,UAAU,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QACjC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;QACvB,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC9C,UAAU,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAC3C,CAAC;QACD,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC9C,UAAU,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAC3C,CAAC;IACH,CAAC;IAED,MAAM,WAAW,GAAG,SAAS,GAAG,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEzD,oCAAoC;IACpC,MAAM,QAAQ,GAAG,EAAE;SAChB,OAAO,CAAC,uCAAuC,WAAW,EAAE,CAAC;SAC7D,GAAG,CAAC,GAAG,MAAM,CAAsB,CAAC;IAEvC,wBAAwB;IACxB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;IAClC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,CAAC,CAAC;IACnC,MAAM,SAAS,GAAG,uBAAuB,WAAW,wCAAwC,CAAC;IAC7F,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,GAAG,CAAC,GAAG,MAAM,EAAE,KAAK,EAAE,MAAM,CAAe,CAAC;IAE/E,OAAO;QACL,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC;QAC5B,KAAK,EAAE,QAAQ,CAAC,KAAK;KACtB,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,iBAAiB,CAC/B,EAAqB,EACrB,UAAkB,EAClB,UAAkB,EAClB,KAAa;IAEb,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,GAAG,KAAK,CAAC,CAAC;IACjD,MAAM,QAAQ,GAAG,UAAU,GAAG,KAAK,CAAC;IAEpC,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CACrB,iGAAiG,CAClG,CAAC;IACF,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,QAAQ,EAAE,QAAQ,CAAe,CAAC;IACpE,OAAO,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;AAC9B,CAAC"}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cluster operations for DatabaseService
|
|
3
|
+
*
|
|
4
|
+
* Handles CRUD operations for the clusters and document_clusters tables.
|
|
5
|
+
*/
|
|
6
|
+
import Database from 'better-sqlite3';
|
|
7
|
+
import type { Cluster, DocumentCluster } from '../../../models/cluster.js';
|
|
8
|
+
/**
|
|
9
|
+
* Insert a cluster record
|
|
10
|
+
*/
|
|
11
|
+
export declare function insertCluster(db: Database.Database, cluster: Cluster): string;
|
|
12
|
+
/**
|
|
13
|
+
* Get a cluster by ID
|
|
14
|
+
*/
|
|
15
|
+
export declare function getCluster(db: Database.Database, id: string): Cluster | null;
|
|
16
|
+
/**
|
|
17
|
+
* List clusters with optional filters and pagination
|
|
18
|
+
*/
|
|
19
|
+
export declare function listClusters(db: Database.Database, options?: {
|
|
20
|
+
run_id?: string;
|
|
21
|
+
classification_tag?: string;
|
|
22
|
+
limit?: number;
|
|
23
|
+
offset?: number;
|
|
24
|
+
}): Cluster[];
|
|
25
|
+
/**
|
|
26
|
+
* Delete all clusters and their document assignments for a run.
|
|
27
|
+
* First deletes document_clusters, then clusters.
|
|
28
|
+
* Returns the number of clusters deleted.
|
|
29
|
+
*/
|
|
30
|
+
export declare function deleteClustersByRunId(db: Database.Database, runId: string): number;
|
|
31
|
+
/**
|
|
32
|
+
* Insert a document-cluster assignment
|
|
33
|
+
*/
|
|
34
|
+
export declare function insertDocumentCluster(db: Database.Database, dc: DocumentCluster): string;
|
|
35
|
+
/**
|
|
36
|
+
* Get all documents in a cluster, joined with documents for file_name
|
|
37
|
+
*/
|
|
38
|
+
export declare function getClusterDocuments(db: Database.Database, clusterId: string): Array<{
|
|
39
|
+
document_id: string;
|
|
40
|
+
file_name: string;
|
|
41
|
+
similarity_to_centroid: number;
|
|
42
|
+
membership_probability: number;
|
|
43
|
+
}>;
|
|
44
|
+
/**
|
|
45
|
+
* Lightweight cluster summary (excludes large JSON fields)
|
|
46
|
+
*/
|
|
47
|
+
interface ClusterSummary {
|
|
48
|
+
id: string;
|
|
49
|
+
run_id: string;
|
|
50
|
+
cluster_index: number;
|
|
51
|
+
label: string | null;
|
|
52
|
+
classification_tag: string | null;
|
|
53
|
+
document_count: number;
|
|
54
|
+
coherence_score: number | null;
|
|
55
|
+
created_at: string;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Get cluster summaries for a run (lightweight: no JSON blobs)
|
|
59
|
+
*/
|
|
60
|
+
export declare function getClusterSummariesByRunId(db: Database.Database, runId: string): ClusterSummary[];
|
|
61
|
+
/**
|
|
62
|
+
* Get cluster summaries for a document (via document_clusters join)
|
|
63
|
+
*/
|
|
64
|
+
export declare function getClusterSummariesForDocument(db: Database.Database, documentId: string): ClusterSummary[];
|
|
65
|
+
/**
|
|
66
|
+
* Reassign a document from its current cluster to a different target cluster.
|
|
67
|
+
* Deletes existing document_clusters entries for this document within the same run,
|
|
68
|
+
* inserts a new assignment, and updates member_count on both old and new clusters.
|
|
69
|
+
*
|
|
70
|
+
* @returns Object with old_cluster_id (null if not previously assigned) and run_id
|
|
71
|
+
*/
|
|
72
|
+
export declare function reassignDocument(db: Database.Database, documentId: string, targetClusterId: string): {
|
|
73
|
+
old_cluster_id: string | null;
|
|
74
|
+
run_id: string;
|
|
75
|
+
};
|
|
76
|
+
/**
|
|
77
|
+
* Merge two clusters into one. All documents from cluster2 are moved to cluster1.
|
|
78
|
+
* cluster2 is deleted after the merge.
|
|
79
|
+
*
|
|
80
|
+
* Both clusters must belong to the same run_id.
|
|
81
|
+
*
|
|
82
|
+
* @returns Object with merged_cluster_id and documents_moved count
|
|
83
|
+
*/
|
|
84
|
+
export declare function mergeClusters(db: Database.Database, clusterId1: string, clusterId2: string): {
|
|
85
|
+
merged_cluster_id: string;
|
|
86
|
+
documents_moved: number;
|
|
87
|
+
};
|
|
88
|
+
/**
|
|
89
|
+
* Get aggregate clustering statistics
|
|
90
|
+
*/
|
|
91
|
+
export declare function getClusteringStats(db: Database.Database): {
|
|
92
|
+
total_clusters: number;
|
|
93
|
+
total_runs: number;
|
|
94
|
+
avg_coherence: number | null;
|
|
95
|
+
};
|
|
96
|
+
export {};
|
|
97
|
+
//# sourceMappingURL=cluster-operations.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cluster-operations.d.ts","sourceRoot":"","sources":["../../../../src/services/storage/database/cluster-operations.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AAEtC,OAAO,KAAK,EAAE,OAAO,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAK3E;;GAEG;AACH,wBAAgB,aAAa,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,OAAO,EAAE,OAAO,GAAG,MAAM,CAkC7E;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO,GAAG,IAAI,CAG5E;AAED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,OAAO,CAAC,EAAE;IAAE,MAAM,CAAC,EAAE,MAAM,CAAC;IAAC,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GAC1F,OAAO,EAAE,CAoBX;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CA0BlF;AAID;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,EAAE,EAAE,eAAe,GAAG,MAAM,CAuBxF;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CACjC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,SAAS,EAAE,MAAM,GAChB,KAAK,CAAC;IACP,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,sBAAsB,EAAE,MAAM,CAAC;IAC/B,sBAAsB,EAAE,MAAM,CAAC;CAChC,CAAC,CAeD;AAID;;GAEG;AACH,UAAU,cAAc;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,aAAa,EAAE,MAAM,CAAC;IACtB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,wBAAgB,0BAA0B,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,GAAG,cAAc,EAAE,CAUjG;AAED;;GAEG;AACH,wBAAgB,8BAA8B,CAC5C,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,UAAU,EAAE,MAAM,GACjB,cAAc,EAAE,CAWlB;AAID;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAC9B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,UAAU,EAAE,MAAM,EAClB,eAAe,EAAE,MAAM,GACtB;IAAE,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAkDnD;AAED;;;;;;;GAOG;AACH,wBAAgB,aAAa,CAC3B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,UAAU,EAAE,MAAM,EAClB,UAAU,EAAE,MAAM,GACjB;IAAE,iBAAiB,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,MAAM,CAAA;CAAE,CAuDxD;AAID;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,GAAG;IACzD,cAAc,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;CAC9B,CAWA"}
|