ocr-provenance-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocr-provenance-mcp might be problematic. Click here for more details.
- package/.env.example +55 -0
- package/LICENSE +78 -0
- package/README.md +1154 -0
- package/dist/bin-http.d.ts +24 -0
- package/dist/bin-http.d.ts.map +1 -0
- package/dist/bin-http.js +275 -0
- package/dist/bin-http.js.map +1 -0
- package/dist/bin-setup.d.ts +11 -0
- package/dist/bin-setup.d.ts.map +1 -0
- package/dist/bin-setup.js +610 -0
- package/dist/bin-setup.js.map +1 -0
- package/dist/bin.d.ts +16 -0
- package/dist/bin.d.ts.map +1 -0
- package/dist/bin.js +16 -0
- package/dist/bin.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +90 -0
- package/dist/index.js.map +1 -0
- package/dist/models/chunk.d.ts +136 -0
- package/dist/models/chunk.d.ts.map +1 -0
- package/dist/models/chunk.js +27 -0
- package/dist/models/chunk.js.map +1 -0
- package/dist/models/cluster.d.ts +79 -0
- package/dist/models/cluster.d.ts.map +1 -0
- package/dist/models/cluster.js +10 -0
- package/dist/models/cluster.js.map +1 -0
- package/dist/models/comparison.d.ts +62 -0
- package/dist/models/comparison.d.ts.map +1 -0
- package/dist/models/comparison.js +8 -0
- package/dist/models/comparison.js.map +1 -0
- package/dist/models/document.d.ts +104 -0
- package/dist/models/document.d.ts.map +1 -0
- package/dist/models/document.js +15 -0
- package/dist/models/document.js.map +1 -0
- package/dist/models/embedding.d.ts +87 -0
- package/dist/models/embedding.d.ts.map +1 -0
- package/dist/models/embedding.js +23 -0
- package/dist/models/embedding.js.map +1 -0
- package/dist/models/extraction.d.ts +15 -0
- package/dist/models/extraction.d.ts.map +1 -0
- package/dist/models/extraction.js +2 -0
- package/dist/models/extraction.js.map +1 -0
- package/dist/models/form-fill.d.ts +23 -0
- package/dist/models/form-fill.d.ts.map +1 -0
- package/dist/models/form-fill.js +2 -0
- package/dist/models/form-fill.js.map +1 -0
- package/dist/models/image.d.ts +177 -0
- package/dist/models/image.d.ts.map +1 -0
- package/dist/models/image.js +8 -0
- package/dist/models/image.js.map +1 -0
- package/dist/models/index.d.ts +14 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +22 -0
- package/dist/models/index.js.map +1 -0
- package/dist/models/provenance.d.ts +174 -0
- package/dist/models/provenance.d.ts.map +1 -0
- package/dist/models/provenance.js +53 -0
- package/dist/models/provenance.js.map +1 -0
- package/dist/models/uploaded-file.d.ts +20 -0
- package/dist/models/uploaded-file.d.ts.map +1 -0
- package/dist/models/uploaded-file.js +2 -0
- package/dist/models/uploaded-file.js.map +1 -0
- package/dist/server/errors.d.ts +93 -0
- package/dist/server/errors.d.ts.map +1 -0
- package/dist/server/errors.js +256 -0
- package/dist/server/errors.js.map +1 -0
- package/dist/server/events.d.ts +36 -0
- package/dist/server/events.d.ts.map +1 -0
- package/dist/server/events.js +48 -0
- package/dist/server/events.js.map +1 -0
- package/dist/server/permissions.d.ts +26 -0
- package/dist/server/permissions.d.ts.map +1 -0
- package/dist/server/permissions.js +194 -0
- package/dist/server/permissions.js.map +1 -0
- package/dist/server/register-tools.d.ts +25 -0
- package/dist/server/register-tools.d.ts.map +1 -0
- package/dist/server/register-tools.js +102 -0
- package/dist/server/register-tools.js.map +1 -0
- package/dist/server/startup.d.ts +16 -0
- package/dist/server/startup.d.ts.map +1 -0
- package/dist/server/startup.js +37 -0
- package/dist/server/startup.js.map +1 -0
- package/dist/server/state.d.ts +166 -0
- package/dist/server/state.d.ts.map +1 -0
- package/dist/server/state.js +424 -0
- package/dist/server/state.js.map +1 -0
- package/dist/server/transports/http-transport.d.ts +37 -0
- package/dist/server/transports/http-transport.d.ts.map +1 -0
- package/dist/server/transports/http-transport.js +204 -0
- package/dist/server/transports/http-transport.js.map +1 -0
- package/dist/server/transports/index.d.ts +9 -0
- package/dist/server/transports/index.d.ts.map +1 -0
- package/dist/server/transports/index.js +9 -0
- package/dist/server/transports/index.js.map +1 -0
- package/dist/server/transports/session-manager.d.ts +40 -0
- package/dist/server/transports/session-manager.d.ts.map +1 -0
- package/dist/server/transports/session-manager.js +74 -0
- package/dist/server/transports/session-manager.js.map +1 -0
- package/dist/server/types.d.ts +82 -0
- package/dist/server/types.d.ts.map +1 -0
- package/dist/server/types.js +14 -0
- package/dist/server/types.js.map +1 -0
- package/dist/services/audit.d.ts +26 -0
- package/dist/services/audit.d.ts.map +1 -0
- package/dist/services/audit.js +43 -0
- package/dist/services/audit.js.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.js +46 -0
- package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
- package/dist/services/chunking/chunk-merger.d.ts +26 -0
- package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
- package/dist/services/chunking/chunk-merger.js +94 -0
- package/dist/services/chunking/chunk-merger.js.map +1 -0
- package/dist/services/chunking/chunker.d.ts +62 -0
- package/dist/services/chunking/chunker.d.ts.map +1 -0
- package/dist/services/chunking/chunker.js +566 -0
- package/dist/services/chunking/chunker.js.map +1 -0
- package/dist/services/chunking/heading-normalizer.d.ts +33 -0
- package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/heading-normalizer.js +101 -0
- package/dist/services/chunking/heading-normalizer.js.map +1 -0
- package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
- package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
- package/dist/services/chunking/json-block-analyzer.js +1033 -0
- package/dist/services/chunking/json-block-analyzer.js.map +1 -0
- package/dist/services/chunking/markdown-parser.d.ts +75 -0
- package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
- package/dist/services/chunking/markdown-parser.js +428 -0
- package/dist/services/chunking/markdown-parser.js.map +1 -0
- package/dist/services/chunking/text-normalizer.d.ts +20 -0
- package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/text-normalizer.js +36 -0
- package/dist/services/chunking/text-normalizer.js.map +1 -0
- package/dist/services/clm/contract-schemas.d.ts +36 -0
- package/dist/services/clm/contract-schemas.d.ts.map +1 -0
- package/dist/services/clm/contract-schemas.js +92 -0
- package/dist/services/clm/contract-schemas.js.map +1 -0
- package/dist/services/clm/summarization.d.ts +46 -0
- package/dist/services/clm/summarization.d.ts.map +1 -0
- package/dist/services/clm/summarization.js +61 -0
- package/dist/services/clm/summarization.js.map +1 -0
- package/dist/services/clustering/clustering-service.d.ts +58 -0
- package/dist/services/clustering/clustering-service.d.ts.map +1 -0
- package/dist/services/clustering/clustering-service.js +467 -0
- package/dist/services/clustering/clustering-service.js.map +1 -0
- package/dist/services/comparison/diff-service.d.ts +41 -0
- package/dist/services/comparison/diff-service.d.ts.map +1 -0
- package/dist/services/comparison/diff-service.js +120 -0
- package/dist/services/comparison/diff-service.js.map +1 -0
- package/dist/services/embedding/embedder.d.ts +55 -0
- package/dist/services/embedding/embedder.d.ts.map +1 -0
- package/dist/services/embedding/embedder.js +202 -0
- package/dist/services/embedding/embedder.js.map +1 -0
- package/dist/services/embedding/nomic.d.ts +67 -0
- package/dist/services/embedding/nomic.d.ts.map +1 -0
- package/dist/services/embedding/nomic.js +280 -0
- package/dist/services/embedding/nomic.js.map +1 -0
- package/dist/services/gemini/circuit-breaker.d.ts +106 -0
- package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
- package/dist/services/gemini/circuit-breaker.js +237 -0
- package/dist/services/gemini/circuit-breaker.js.map +1 -0
- package/dist/services/gemini/client.d.ts +173 -0
- package/dist/services/gemini/client.d.ts.map +1 -0
- package/dist/services/gemini/client.js +483 -0
- package/dist/services/gemini/client.js.map +1 -0
- package/dist/services/gemini/config.d.ts +116 -0
- package/dist/services/gemini/config.d.ts.map +1 -0
- package/dist/services/gemini/config.js +118 -0
- package/dist/services/gemini/config.js.map +1 -0
- package/dist/services/gemini/index.d.ts +9 -0
- package/dist/services/gemini/index.d.ts.map +1 -0
- package/dist/services/gemini/index.js +13 -0
- package/dist/services/gemini/index.js.map +1 -0
- package/dist/services/gemini/rate-limiter.d.ts +62 -0
- package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
- package/dist/services/gemini/rate-limiter.js +120 -0
- package/dist/services/gemini/rate-limiter.js.map +1 -0
- package/dist/services/images/extractor.d.ts +88 -0
- package/dist/services/images/extractor.d.ts.map +1 -0
- package/dist/services/images/extractor.js +340 -0
- package/dist/services/images/extractor.js.map +1 -0
- package/dist/services/images/optimizer.d.ts +130 -0
- package/dist/services/images/optimizer.d.ts.map +1 -0
- package/dist/services/images/optimizer.js +228 -0
- package/dist/services/images/optimizer.js.map +1 -0
- package/dist/services/ocr/datalab.d.ts +64 -0
- package/dist/services/ocr/datalab.d.ts.map +1 -0
- package/dist/services/ocr/datalab.js +425 -0
- package/dist/services/ocr/datalab.js.map +1 -0
- package/dist/services/ocr/errors.d.ts +38 -0
- package/dist/services/ocr/errors.d.ts.map +1 -0
- package/dist/services/ocr/errors.js +83 -0
- package/dist/services/ocr/errors.js.map +1 -0
- package/dist/services/ocr/file-manager.d.ts +76 -0
- package/dist/services/ocr/file-manager.d.ts.map +1 -0
- package/dist/services/ocr/file-manager.js +238 -0
- package/dist/services/ocr/file-manager.js.map +1 -0
- package/dist/services/ocr/form-fill.d.ts +48 -0
- package/dist/services/ocr/form-fill.d.ts.map +1 -0
- package/dist/services/ocr/form-fill.js +213 -0
- package/dist/services/ocr/form-fill.js.map +1 -0
- package/dist/services/ocr/processor.d.ts +95 -0
- package/dist/services/ocr/processor.d.ts.map +1 -0
- package/dist/services/ocr/processor.js +259 -0
- package/dist/services/ocr/processor.js.map +1 -0
- package/dist/services/provenance/agent-metadata.d.ts +82 -0
- package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
- package/dist/services/provenance/agent-metadata.js +106 -0
- package/dist/services/provenance/agent-metadata.js.map +1 -0
- package/dist/services/provenance/chain-hash.d.ts +57 -0
- package/dist/services/provenance/chain-hash.d.ts.map +1 -0
- package/dist/services/provenance/chain-hash.js +131 -0
- package/dist/services/provenance/chain-hash.js.map +1 -0
- package/dist/services/provenance/exporter.d.ts +202 -0
- package/dist/services/provenance/exporter.d.ts.map +1 -0
- package/dist/services/provenance/exporter.js +457 -0
- package/dist/services/provenance/exporter.js.map +1 -0
- package/dist/services/provenance/index.d.ts +15 -0
- package/dist/services/provenance/index.d.ts.map +1 -0
- package/dist/services/provenance/index.js +17 -0
- package/dist/services/provenance/index.js.map +1 -0
- package/dist/services/provenance/tracker.d.ts +138 -0
- package/dist/services/provenance/tracker.d.ts.map +1 -0
- package/dist/services/provenance/tracker.js +293 -0
- package/dist/services/provenance/tracker.js.map +1 -0
- package/dist/services/provenance/verifier.d.ts +153 -0
- package/dist/services/provenance/verifier.d.ts.map +1 -0
- package/dist/services/provenance/verifier.js +536 -0
- package/dist/services/provenance/verifier.js.map +1 -0
- package/dist/services/python-pool.d.ts +70 -0
- package/dist/services/python-pool.d.ts.map +1 -0
- package/dist/services/python-pool.js +265 -0
- package/dist/services/python-pool.js.map +1 -0
- package/dist/services/search/bm25.d.ts +180 -0
- package/dist/services/search/bm25.d.ts.map +1 -0
- package/dist/services/search/bm25.js +656 -0
- package/dist/services/search/bm25.js.map +1 -0
- package/dist/services/search/fusion.d.ts +103 -0
- package/dist/services/search/fusion.d.ts.map +1 -0
- package/dist/services/search/fusion.js +122 -0
- package/dist/services/search/fusion.js.map +1 -0
- package/dist/services/search/local-reranker.d.ts +30 -0
- package/dist/services/search/local-reranker.d.ts.map +1 -0
- package/dist/services/search/local-reranker.js +123 -0
- package/dist/services/search/local-reranker.js.map +1 -0
- package/dist/services/search/quality.d.ts +11 -0
- package/dist/services/search/quality.d.ts.map +1 -0
- package/dist/services/search/quality.js +17 -0
- package/dist/services/search/quality.js.map +1 -0
- package/dist/services/search/query-classifier.d.ts +34 -0
- package/dist/services/search/query-classifier.d.ts.map +1 -0
- package/dist/services/search/query-classifier.js +114 -0
- package/dist/services/search/query-classifier.js.map +1 -0
- package/dist/services/search/query-expander.d.ts +73 -0
- package/dist/services/search/query-expander.d.ts.map +1 -0
- package/dist/services/search/query-expander.js +281 -0
- package/dist/services/search/query-expander.js.map +1 -0
- package/dist/services/search/reranker.d.ts +44 -0
- package/dist/services/search/reranker.d.ts.map +1 -0
- package/dist/services/search/reranker.js +101 -0
- package/dist/services/search/reranker.js.map +1 -0
- package/dist/services/storage/database/annotation-operations.d.ts +113 -0
- package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/annotation-operations.js +177 -0
- package/dist/services/storage/database/annotation-operations.js.map +1 -0
- package/dist/services/storage/database/approval-operations.d.ts +132 -0
- package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
- package/dist/services/storage/database/approval-operations.js +206 -0
- package/dist/services/storage/database/approval-operations.js.map +1 -0
- package/dist/services/storage/database/chunk-operations.d.ts +132 -0
- package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
- package/dist/services/storage/database/chunk-operations.js +306 -0
- package/dist/services/storage/database/chunk-operations.js.map +1 -0
- package/dist/services/storage/database/cluster-operations.d.ts +97 -0
- package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
- package/dist/services/storage/database/cluster-operations.js +258 -0
- package/dist/services/storage/database/cluster-operations.js.map +1 -0
- package/dist/services/storage/database/comparison-operations.d.ts +41 -0
- package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
- package/dist/services/storage/database/comparison-operations.js +65 -0
- package/dist/services/storage/database/comparison-operations.js.map +1 -0
- package/dist/services/storage/database/converters.d.ts +36 -0
- package/dist/services/storage/database/converters.d.ts.map +1 -0
- package/dist/services/storage/database/converters.js +244 -0
- package/dist/services/storage/database/converters.js.map +1 -0
- package/dist/services/storage/database/document-operations.d.ts +145 -0
- package/dist/services/storage/database/document-operations.d.ts.map +1 -0
- package/dist/services/storage/database/document-operations.js +498 -0
- package/dist/services/storage/database/document-operations.js.map +1 -0
- package/dist/services/storage/database/embedding-operations.d.ts +130 -0
- package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
- package/dist/services/storage/database/embedding-operations.js +315 -0
- package/dist/services/storage/database/embedding-operations.js.map +1 -0
- package/dist/services/storage/database/extraction-operations.d.ts +47 -0
- package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
- package/dist/services/storage/database/extraction-operations.js +85 -0
- package/dist/services/storage/database/extraction-operations.js.map +1 -0
- package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
- package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
- package/dist/services/storage/database/form-fill-operations.js +116 -0
- package/dist/services/storage/database/form-fill-operations.js.map +1 -0
- package/dist/services/storage/database/helpers.d.ts +29 -0
- package/dist/services/storage/database/helpers.d.ts.map +1 -0
- package/dist/services/storage/database/helpers.js +55 -0
- package/dist/services/storage/database/helpers.js.map +1 -0
- package/dist/services/storage/database/image-operations.d.ts +202 -0
- package/dist/services/storage/database/image-operations.d.ts.map +1 -0
- package/dist/services/storage/database/image-operations.js +484 -0
- package/dist/services/storage/database/image-operations.js.map +1 -0
- package/dist/services/storage/database/index.d.ts +13 -0
- package/dist/services/storage/database/index.d.ts.map +1 -0
- package/dist/services/storage/database/index.js +16 -0
- package/dist/services/storage/database/index.js.map +1 -0
- package/dist/services/storage/database/lock-operations.d.ts +59 -0
- package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
- package/dist/services/storage/database/lock-operations.js +89 -0
- package/dist/services/storage/database/lock-operations.js.map +1 -0
- package/dist/services/storage/database/obligation-operations.d.ts +88 -0
- package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/obligation-operations.js +206 -0
- package/dist/services/storage/database/obligation-operations.js.map +1 -0
- package/dist/services/storage/database/ocr-operations.d.ts +33 -0
- package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
- package/dist/services/storage/database/ocr-operations.js +70 -0
- package/dist/services/storage/database/ocr-operations.js.map +1 -0
- package/dist/services/storage/database/playbook-operations.d.ts +72 -0
- package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
- package/dist/services/storage/database/playbook-operations.js +247 -0
- package/dist/services/storage/database/playbook-operations.js.map +1 -0
- package/dist/services/storage/database/provenance-operations.d.ts +112 -0
- package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
- package/dist/services/storage/database/provenance-operations.js +251 -0
- package/dist/services/storage/database/provenance-operations.js.map +1 -0
- package/dist/services/storage/database/service.d.ts +142 -0
- package/dist/services/storage/database/service.d.ts.map +1 -0
- package/dist/services/storage/database/service.js +310 -0
- package/dist/services/storage/database/service.js.map +1 -0
- package/dist/services/storage/database/static-operations.d.ts +30 -0
- package/dist/services/storage/database/static-operations.d.ts.map +1 -0
- package/dist/services/storage/database/static-operations.js +218 -0
- package/dist/services/storage/database/static-operations.js.map +1 -0
- package/dist/services/storage/database/stats-operations.d.ts +101 -0
- package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
- package/dist/services/storage/database/stats-operations.js +394 -0
- package/dist/services/storage/database/stats-operations.js.map +1 -0
- package/dist/services/storage/database/tag-operations.d.ts +76 -0
- package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
- package/dist/services/storage/database/tag-operations.js +178 -0
- package/dist/services/storage/database/tag-operations.js.map +1 -0
- package/dist/services/storage/database/types.d.ts +286 -0
- package/dist/services/storage/database/types.d.ts.map +1 -0
- package/dist/services/storage/database/types.js +39 -0
- package/dist/services/storage/database/types.js.map +1 -0
- package/dist/services/storage/database/upload-operations.d.ts +71 -0
- package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
- package/dist/services/storage/database/upload-operations.js +124 -0
- package/dist/services/storage/database/upload-operations.js.map +1 -0
- package/dist/services/storage/database/user-operations.d.ts +102 -0
- package/dist/services/storage/database/user-operations.d.ts.map +1 -0
- package/dist/services/storage/database/user-operations.js +151 -0
- package/dist/services/storage/database/user-operations.js.map +1 -0
- package/dist/services/storage/database/workflow-operations.d.ts +98 -0
- package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
- package/dist/services/storage/database/workflow-operations.js +157 -0
- package/dist/services/storage/database/workflow-operations.js.map +1 -0
- package/dist/services/storage/database.d.ts +16 -0
- package/dist/services/storage/database.d.ts.map +1 -0
- package/dist/services/storage/database.js +15 -0
- package/dist/services/storage/database.js.map +1 -0
- package/dist/services/storage/index.d.ts +10 -0
- package/dist/services/storage/index.d.ts.map +1 -0
- package/dist/services/storage/index.js +10 -0
- package/dist/services/storage/index.js.map +1 -0
- package/dist/services/storage/migrations/index.d.ts +16 -0
- package/dist/services/storage/migrations/index.d.ts.map +1 -0
- package/dist/services/storage/migrations/index.js +20 -0
- package/dist/services/storage/migrations/index.js.map +1 -0
- package/dist/services/storage/migrations/operations.d.ts +40 -0
- package/dist/services/storage/migrations/operations.d.ts.map +1 -0
- package/dist/services/storage/migrations/operations.js +2910 -0
- package/dist/services/storage/migrations/operations.js.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.js +1006 -0
- package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.js +176 -0
- package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
- package/dist/services/storage/migrations/types.d.ts +15 -0
- package/dist/services/storage/migrations/types.d.ts.map +1 -0
- package/dist/services/storage/migrations/types.js +21 -0
- package/dist/services/storage/migrations/types.js.map +1 -0
- package/dist/services/storage/migrations/verification.d.ts +20 -0
- package/dist/services/storage/migrations/verification.d.ts.map +1 -0
- package/dist/services/storage/migrations/verification.js +78 -0
- package/dist/services/storage/migrations/verification.js.map +1 -0
- package/dist/services/storage/migrations.d.ts +16 -0
- package/dist/services/storage/migrations.d.ts.map +1 -0
- package/dist/services/storage/migrations.js +17 -0
- package/dist/services/storage/migrations.js.map +1 -0
- package/dist/services/storage/types.d.ts +12 -0
- package/dist/services/storage/types.d.ts.map +1 -0
- package/dist/services/storage/types.js +5 -0
- package/dist/services/storage/types.js.map +1 -0
- package/dist/services/storage/vector.d.ts +208 -0
- package/dist/services/storage/vector.d.ts.map +1 -0
- package/dist/services/storage/vector.js +526 -0
- package/dist/services/storage/vector.js.map +1 -0
- package/dist/services/vlm/pipeline.d.ts +194 -0
- package/dist/services/vlm/pipeline.d.ts.map +1 -0
- package/dist/services/vlm/pipeline.js +800 -0
- package/dist/services/vlm/pipeline.js.map +1 -0
- package/dist/services/vlm/prompts.d.ts +171 -0
- package/dist/services/vlm/prompts.d.ts.map +1 -0
- package/dist/services/vlm/prompts.js +229 -0
- package/dist/services/vlm/prompts.js.map +1 -0
- package/dist/services/vlm/service.d.ts +174 -0
- package/dist/services/vlm/service.d.ts.map +1 -0
- package/dist/services/vlm/service.js +256 -0
- package/dist/services/vlm/service.js.map +1 -0
- package/dist/services/webhook-delivery.d.ts +4 -0
- package/dist/services/webhook-delivery.d.ts.map +1 -0
- package/dist/services/webhook-delivery.js +140 -0
- package/dist/services/webhook-delivery.js.map +1 -0
- package/dist/tools/chunks.d.ts +19 -0
- package/dist/tools/chunks.d.ts.map +1 -0
- package/dist/tools/chunks.js +392 -0
- package/dist/tools/chunks.js.map +1 -0
- package/dist/tools/clm.d.ts +16 -0
- package/dist/tools/clm.d.ts.map +1 -0
- package/dist/tools/clm.js +668 -0
- package/dist/tools/clm.js.map +1 -0
- package/dist/tools/clustering.d.ts +13 -0
- package/dist/tools/clustering.d.ts.map +1 -0
- package/dist/tools/clustering.js +498 -0
- package/dist/tools/clustering.js.map +1 -0
- package/dist/tools/collaboration.d.ts +15 -0
- package/dist/tools/collaboration.d.ts.map +1 -0
- package/dist/tools/collaboration.js +516 -0
- package/dist/tools/collaboration.js.map +1 -0
- package/dist/tools/comparison.d.ts +13 -0
- package/dist/tools/comparison.d.ts.map +1 -0
- package/dist/tools/comparison.js +735 -0
- package/dist/tools/comparison.js.map +1 -0
- package/dist/tools/compliance.d.ts +15 -0
- package/dist/tools/compliance.d.ts.map +1 -0
- package/dist/tools/compliance.js +640 -0
- package/dist/tools/compliance.js.map +1 -0
- package/dist/tools/config.d.ts +19 -0
- package/dist/tools/config.d.ts.map +1 -0
- package/dist/tools/config.js +213 -0
- package/dist/tools/config.js.map +1 -0
- package/dist/tools/database.d.ts +62 -0
- package/dist/tools/database.d.ts.map +1 -0
- package/dist/tools/database.js +288 -0
- package/dist/tools/database.js.map +1 -0
- package/dist/tools/documents.d.ts +61 -0
- package/dist/tools/documents.d.ts.map +1 -0
- package/dist/tools/documents.js +1624 -0
- package/dist/tools/documents.js.map +1 -0
- package/dist/tools/embeddings.d.ts +14 -0
- package/dist/tools/embeddings.d.ts.map +1 -0
- package/dist/tools/embeddings.js +626 -0
- package/dist/tools/embeddings.js.map +1 -0
- package/dist/tools/evaluation.d.ts +25 -0
- package/dist/tools/evaluation.d.ts.map +1 -0
- package/dist/tools/evaluation.js +523 -0
- package/dist/tools/evaluation.js.map +1 -0
- package/dist/tools/events.d.ts +16 -0
- package/dist/tools/events.d.ts.map +1 -0
- package/dist/tools/events.js +493 -0
- package/dist/tools/events.js.map +1 -0
- package/dist/tools/extraction-structured.d.ts +13 -0
- package/dist/tools/extraction-structured.d.ts.map +1 -0
- package/dist/tools/extraction-structured.js +390 -0
- package/dist/tools/extraction-structured.js.map +1 -0
- package/dist/tools/extraction.d.ts +24 -0
- package/dist/tools/extraction.d.ts.map +1 -0
- package/dist/tools/extraction.js +424 -0
- package/dist/tools/extraction.js.map +1 -0
- package/dist/tools/file-management.d.ts +14 -0
- package/dist/tools/file-management.d.ts.map +1 -0
- package/dist/tools/file-management.js +523 -0
- package/dist/tools/file-management.js.map +1 -0
- package/dist/tools/form-fill.d.ts +13 -0
- package/dist/tools/form-fill.d.ts.map +1 -0
- package/dist/tools/form-fill.js +250 -0
- package/dist/tools/form-fill.js.map +1 -0
- package/dist/tools/health.d.ts +19 -0
- package/dist/tools/health.d.ts.map +1 -0
- package/dist/tools/health.js +229 -0
- package/dist/tools/health.js.map +1 -0
- package/dist/tools/images.d.ts +54 -0
- package/dist/tools/images.d.ts.map +1 -0
- package/dist/tools/images.js +787 -0
- package/dist/tools/images.js.map +1 -0
- package/dist/tools/ingestion.d.ts +94 -0
- package/dist/tools/ingestion.d.ts.map +1 -0
- package/dist/tools/ingestion.js +1659 -0
- package/dist/tools/ingestion.js.map +1 -0
- package/dist/tools/intelligence.d.ts +18 -0
- package/dist/tools/intelligence.d.ts.map +1 -0
- package/dist/tools/intelligence.js +1039 -0
- package/dist/tools/intelligence.js.map +1 -0
- package/dist/tools/provenance.d.ts +51 -0
- package/dist/tools/provenance.d.ts.map +1 -0
- package/dist/tools/provenance.js +691 -0
- package/dist/tools/provenance.js.map +1 -0
- package/dist/tools/reports.d.ts +41 -0
- package/dist/tools/reports.d.ts.map +1 -0
- package/dist/tools/reports.js +1394 -0
- package/dist/tools/reports.js.map +1 -0
- package/dist/tools/search.d.ts +35 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +2528 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/shared.d.ts +52 -0
- package/dist/tools/shared.d.ts.map +1 -0
- package/dist/tools/shared.js +54 -0
- package/dist/tools/shared.js.map +1 -0
- package/dist/tools/tags.d.ts +15 -0
- package/dist/tools/tags.d.ts.map +1 -0
- package/dist/tools/tags.js +287 -0
- package/dist/tools/tags.js.map +1 -0
- package/dist/tools/timeline.d.ts +15 -0
- package/dist/tools/timeline.d.ts.map +1 -0
- package/dist/tools/timeline.js +14 -0
- package/dist/tools/timeline.js.map +1 -0
- package/dist/tools/users.d.ts +14 -0
- package/dist/tools/users.d.ts.map +1 -0
- package/dist/tools/users.js +257 -0
- package/dist/tools/users.js.map +1 -0
- package/dist/tools/vlm.d.ts +40 -0
- package/dist/tools/vlm.d.ts.map +1 -0
- package/dist/tools/vlm.js +475 -0
- package/dist/tools/vlm.js.map +1 -0
- package/dist/tools/workflow.d.ts +16 -0
- package/dist/tools/workflow.d.ts.map +1 -0
- package/dist/tools/workflow.js +495 -0
- package/dist/tools/workflow.js.map +1 -0
- package/dist/utils/backoff.d.ts +53 -0
- package/dist/utils/backoff.d.ts.map +1 -0
- package/dist/utils/backoff.js +78 -0
- package/dist/utils/backoff.js.map +1 -0
- package/dist/utils/config-persistence.d.ts +33 -0
- package/dist/utils/config-persistence.d.ts.map +1 -0
- package/dist/utils/config-persistence.js +61 -0
- package/dist/utils/config-persistence.js.map +1 -0
- package/dist/utils/hash.d.ts +65 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +146 -0
- package/dist/utils/hash.js.map +1 -0
- package/dist/utils/math.d.ts +21 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +39 -0
- package/dist/utils/math.js.map +1 -0
- package/dist/utils/validation.d.ts +697 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +529 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +96 -0
- package/python/.gitkeep +0 -0
- package/python/__init__.py +104 -0
- package/python/clustering_worker.py +440 -0
- package/python/docx_image_extractor.py +524 -0
- package/python/embedding_worker.py +552 -0
- package/python/file_manager_worker.py +564 -0
- package/python/form_fill_worker.py +399 -0
- package/python/gpu_utils.py +582 -0
- package/python/image_extractor.py +317 -0
- package/python/image_optimizer.py +444 -0
- package/python/ocr_worker.py +712 -0
- package/python/pyproject.toml +76 -0
- package/python/requirements.txt +51 -0
- package/python/reranker_worker.py +87 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"converters.js","sourceRoot":"","sources":["../../../../src/services/storage/database/converters.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAqBH,yDAAyD;AACzD,MAAM,uBAAuB,GAA8B,CAAC,SAAS,EAAE,YAAY,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;AAE3G,yDAAyD;AACzD,MAAM,sBAAsB,GAAsB;IAChD,UAAU,EAAE,YAAY,EAAE,OAAO,EAAE,OAAO,EAAE,iBAAiB;IAC7D,YAAY,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,WAAW;CACnE,CAAC;AAEF,oDAAoD;AACpD,MAAM,kBAAkB,GAAyB,CAAC,SAAS,EAAE,YAAY,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;AAEjG;;;GAGG;AACH,SAAS,YAAY,CAAmB,KAAa,EAAE,WAAyB,EAAE,SAAiB,EAAE,EAAU;IAC7G,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,KAAU,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,KAAK,CAAC,WAAW,SAAS,KAAK,KAAK,eAAe,EAAE,mBAAmB,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC9G,CAAC;IACD,OAAO,KAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,SAAS,qBAAqB,CAAC,EAAU,EAAE,GAAW;IACpD,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAA4B,CAAC;IACpD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CACX,wDAAwD,EAAE,KAAK,GAAG,GAAG,EACrE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CACvD,CAAC;QACF,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC;IAC3C,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,EAAU,EAAE,GAAW;IAC5C,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAuB,CAAC;IAC/C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CACX,+CAA+C,EAAE,KAAK,GAAG,GAAG,EAC5D,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CACvD,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,sBAAsB,CAAC,EAAU,EAAE,GAAW;IACrD,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAsB,CAAC;IAC9C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CACX,qDAAqD,EAAE,KAAK,GAAG,GAAG,EAClE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CACvD,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,GAAgB;IAC5C,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,MAAM,EAAE,YAAY,CAAC,GAAG,CAAC,MAAM,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,GAAG,CAAC,EAAE,CAAC;QACnF,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,SAAS,EAAE,GAAG,CAAC,SAAS,IAAI,IAAI;QAChC,UAAU,EAAE,GAAG,CAAC,UAAU,IAAI,IAAI;QAClC,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,IAAI;QACpC,eAAe,EAAE,GAAG,CAAC,eAAe,IAAI,IAAI;KAC7C,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,GAAiB;IAC9C,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,cAAc,EAAE,GAAG,CAAC,cAAc;QAClC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,kBAAkB,EAAE,GAAG,CAAC,kBAAkB;QAC1C,YAAY,EAAE,GAAG,CAAC,YAAgD;QAClE,mBAAmB,EAAE,GAAG,CAAC,mBAAmB;QAC5C,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,qBAAqB,EAAE,GAAG,CAAC,qBAAqB;QAChD,uBAAuB,EAAE,GAAG,CAAC,uBAAuB;QACpD,sBAAsB,EAAE,GAAG,CAAC,sBAAsB;QAClD,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,IAAI;QACpC,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,IAAI;KACrC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,GAAa;IACtC,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,eAAe,EAAE,GAAG,CAAC,eAAe;QACpC,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,gBAAgB,EAAE,GAAG,CAAC,gBAAqD;QAC3E,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,iBAAiB,EAAE,GAAG,CAAC,iBAAiB,IAAI,IAAI;QAChD,eAAe,EAAE,GAAG,CAAC,eAAe;QACpC,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,iBAAiB,EAAE,GAAG,CAAC,iBAAiB;KACzC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,GAAiB;IAC9C,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,QAAQ,EAAE,GAAG,CAAC,QAAQ;QACtB,QAAQ,EAAE,GAAG,CAAC,QAAQ;QACtB,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,oBAAoB,EAAE,GAAG,CAAC,oBAAoB;QAC9C,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,eAAe,EAAE,GAAG,CAAC,eAAe;QACpC,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,SAAS,EAAE,GAAG,CAAC,SAA+C;QAC9D,cAAc,EAAE,GAAG,CAAC,cAAyB;QAC7C,UAAU,EAAE,GAAG,CAAC,UAAU,IAAI,EAAE;QAChC,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,sBAAsB,EAAE,GAAG,CAAC,sBAAsB;KACnD,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,GAAkB;IAChD,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,IAAI,EAAE,YAAY,CAAC,GAAG,CAAC,IAAI,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,GAAG,CAAC,EAAE,CAAmB;QAChG,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,sBAAsB,EAAE,GAAG,CAAC,sBAAsB;QAClD,uBAAuB,EAAE,GAAG,CAAC,uBAAuB;QACpD,WAAW,EAAE,GAAG,CAAC,WAAyB;QAC1C,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,QAAQ,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI;QACnE,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,iBAAiB,EAAE,GAAG,CAAC,iBAAiB;QACxC,iBAAiB,EAAE,qBAAqB,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,iBAAiB,CAAC;QACvE,sBAAsB,EAAE,GAAG,CAAC,sBAAsB;QAClD,wBAAwB,EAAE,GAAG,CAAC,wBAAwB;QACtD,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,UAAU,EAAE,GAAG,CAAC,UAAU;KAC3B,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,GAAa;IACtC,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,YAAY,EAAE;YACZ,CAAC,EAAE,GAAG,CAAC,MAAM;YACb,CAAC,EAAE,GAAG,CAAC,MAAM;YACb,KAAK,EAAE,GAAG,CAAC,UAAU;YACrB,MAAM,EAAE,GAAG,CAAC,WAAW;SACxB;QACD,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,UAAU,EAAE;YACV,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,MAAM,EAAE,GAAG,CAAC,MAAM;SACnB;QACD,cAAc,EAAE,GAAG,CAAC,cAAc;QAClC,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,UAAU,EAAE,YAAY,CAAC,GAAG,CAAC,UAAU,EAAE,kBAAkB,EAAE,WAAW,EAAE,GAAG,CAAC,EAAE,CAAC;QACjF,eAAe,EAAE,GAAG,CAAC,eAAe;QACpC,mBAAmB,EAAE,GAAG,CAAC,mBAAmB;YAC1C,CAAC,CAAC,sBAAsB,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,mBAAmB,CAAC;YACzD,CAAC,CAAC,IAAI;QACR,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,cAAc,EAAE,GAAG,CAAC,cAAc;QAClC,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,eAAe,EAAE,GAAG,CAAC,eAAe;QACpC,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,UAAU,EAAE,GAAG,CAAC,UAAU,IAAI,IAAI;QAClC,gBAAgB,EAAE,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC;QAC/C,YAAY,EAAE,GAAG,CAAC,YAAY,IAAI,IAAI;KACvC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document operations for DatabaseService
|
|
3
|
+
*
|
|
4
|
+
* Handles all CRUD operations for documents including
|
|
5
|
+
* insert, get, list, update, and delete with cascade.
|
|
6
|
+
*/
|
|
7
|
+
import Database from 'better-sqlite3';
|
|
8
|
+
import { Document, DocumentStatus } from '../../../models/document.js';
|
|
9
|
+
import { ListDocumentsOptions } from './types.js';
|
|
10
|
+
/**
|
|
11
|
+
* Encode a cursor from a (created_at, id) tuple.
|
|
12
|
+
* Uses base64url encoding for URL-safe transport.
|
|
13
|
+
*
|
|
14
|
+
* @param createdAt - ISO 8601 timestamp
|
|
15
|
+
* @param id - Document UUID
|
|
16
|
+
* @returns Base64url-encoded cursor string
|
|
17
|
+
*/
|
|
18
|
+
export declare function encodeCursor(createdAt: string, id: string): string;
|
|
19
|
+
/**
|
|
20
|
+
* Decode a cursor back to a (created_at, id) tuple.
|
|
21
|
+
*
|
|
22
|
+
* @param cursor - Base64url-encoded cursor string
|
|
23
|
+
* @returns Decoded cursor with created_at and id
|
|
24
|
+
* @throws Error if cursor is invalid or malformed
|
|
25
|
+
*/
|
|
26
|
+
export declare function decodeCursor(cursor: string): {
|
|
27
|
+
created_at: string;
|
|
28
|
+
id: string;
|
|
29
|
+
};
|
|
30
|
+
/**
|
|
31
|
+
* Insert a new document
|
|
32
|
+
*
|
|
33
|
+
* @param db - Database connection
|
|
34
|
+
* @param doc - Document data (created_at will be generated)
|
|
35
|
+
* @param updateMetadataCounts - Callback to update metadata counts
|
|
36
|
+
* @returns string - The document ID
|
|
37
|
+
*/
|
|
38
|
+
export declare function insertDocument(db: Database.Database, doc: Omit<Document, 'created_at'>, updateMetadataCounts: () => void): string;
|
|
39
|
+
/**
|
|
40
|
+
* Get a document by ID
|
|
41
|
+
*
|
|
42
|
+
* @param db - Database connection
|
|
43
|
+
* @param id - Document ID
|
|
44
|
+
* @returns Document | null - The document or null if not found
|
|
45
|
+
*/
|
|
46
|
+
export declare function getDocument(db: Database.Database, id: string): Document | null;
|
|
47
|
+
/**
|
|
48
|
+
* Get a document by file path
|
|
49
|
+
*
|
|
50
|
+
* @param db - Database connection
|
|
51
|
+
* @param filePath - Full file path
|
|
52
|
+
* @returns Document | null - The document or null if not found
|
|
53
|
+
*/
|
|
54
|
+
export declare function getDocumentByPath(db: Database.Database, filePath: string): Document | null;
|
|
55
|
+
/**
|
|
56
|
+
* Get a document by file hash
|
|
57
|
+
*
|
|
58
|
+
* @param db - Database connection
|
|
59
|
+
* @param fileHash - SHA-256 file hash
|
|
60
|
+
* @returns Document | null - The document or null if not found
|
|
61
|
+
*/
|
|
62
|
+
export declare function getDocumentByHash(db: Database.Database, fileHash: string): Document | null;
|
|
63
|
+
/**
|
|
64
|
+
* Result from listDocuments when cursor-based pagination is used
|
|
65
|
+
*/
|
|
66
|
+
export interface ListDocumentsResult {
|
|
67
|
+
documents: Document[];
|
|
68
|
+
/** Cursor for the next page (null if no more results) */
|
|
69
|
+
next_cursor: string | null;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* List documents with optional filtering.
|
|
73
|
+
*
|
|
74
|
+
* Supports both offset-based and cursor-based pagination:
|
|
75
|
+
* - When `cursor` is provided, uses keyset pagination (WHERE created_at < cursor.created_at
|
|
76
|
+
* OR (created_at = cursor.created_at AND id < cursor.id)) and ignores offset.
|
|
77
|
+
* - When `cursor` is absent, uses traditional LIMIT/OFFSET.
|
|
78
|
+
*
|
|
79
|
+
* @param db - Database connection
|
|
80
|
+
* @param options - Optional filter options (status, limit, offset, cursor)
|
|
81
|
+
* @returns Document[] - Array of documents (backward-compatible)
|
|
82
|
+
*/
|
|
83
|
+
export declare function listDocuments(db: Database.Database, options?: ListDocumentsOptions): Document[];
|
|
84
|
+
/**
|
|
85
|
+
* List documents with cursor-based pagination support.
|
|
86
|
+
*
|
|
87
|
+
* Returns both the documents and a next_cursor for fetching the next page.
|
|
88
|
+
*
|
|
89
|
+
* @param db - Database connection
|
|
90
|
+
* @param options - Optional filter options (status, limit, offset, cursor)
|
|
91
|
+
* @returns ListDocumentsResult with documents and next_cursor
|
|
92
|
+
*/
|
|
93
|
+
export declare function listDocumentsWithCursor(db: Database.Database, options?: ListDocumentsOptions): ListDocumentsResult;
|
|
94
|
+
/**
|
|
95
|
+
* Update document status
|
|
96
|
+
*
|
|
97
|
+
* @param db - Database connection
|
|
98
|
+
* @param id - Document ID
|
|
99
|
+
* @param status - New status
|
|
100
|
+
* @param errorMessage - Optional error message (for 'failed' status)
|
|
101
|
+
* @param updateMetadataModified - Callback to update metadata modified timestamp
|
|
102
|
+
*/
|
|
103
|
+
export declare function updateDocumentStatus(db: Database.Database, id: string, status: DocumentStatus, errorMessage: string | undefined, updateMetadataModified: () => void): void;
|
|
104
|
+
/**
|
|
105
|
+
* Update document when OCR completes
|
|
106
|
+
*
|
|
107
|
+
* @param db - Database connection
|
|
108
|
+
* @param id - Document ID
|
|
109
|
+
* @param pageCount - Number of pages processed
|
|
110
|
+
* @param ocrCompletedAt - ISO 8601 completion timestamp
|
|
111
|
+
* @param updateMetadataModified - Callback to update metadata modified timestamp
|
|
112
|
+
*/
|
|
113
|
+
export declare function updateDocumentOCRComplete(db: Database.Database, id: string, pageCount: number, ocrCompletedAt: string, updateMetadataModified: () => void): void;
|
|
114
|
+
/**
|
|
115
|
+
* Update document metadata (title, author, subject) from OCR extraction
|
|
116
|
+
*
|
|
117
|
+
* @param db - Database connection
|
|
118
|
+
* @param id - Document ID
|
|
119
|
+
* @param metadata - Metadata fields to update (null values are ignored via COALESCE)
|
|
120
|
+
* @param updateMetadataModified - Callback to update metadata modified timestamp
|
|
121
|
+
*/
|
|
122
|
+
export declare function updateDocumentMetadata(db: Database.Database, id: string, metadata: {
|
|
123
|
+
docTitle?: string | null;
|
|
124
|
+
docAuthor?: string | null;
|
|
125
|
+
docSubject?: string | null;
|
|
126
|
+
}, updateMetadataModified: () => void): void;
|
|
127
|
+
/**
|
|
128
|
+
* Delete a document and all related data (CASCADE DELETE)
|
|
129
|
+
*
|
|
130
|
+
* @param db - Database connection
|
|
131
|
+
* @param id - Document ID to delete
|
|
132
|
+
* @param updateMetadataCounts - Callback to update metadata counts
|
|
133
|
+
*/
|
|
134
|
+
export declare function deleteDocument(db: Database.Database, id: string, updateMetadataCounts: () => void): void;
|
|
135
|
+
/**
|
|
136
|
+
* Clean all derived data for a document, keeping the document record and its DOCUMENT-level provenance.
|
|
137
|
+
*
|
|
138
|
+
* Deletes: vec_embeddings, embeddings, images, chunks, ocr_results, and non-root provenance records.
|
|
139
|
+
* This is used by retry_failed to reset a document to a clean "pending" state.
|
|
140
|
+
*
|
|
141
|
+
* @param db - Database connection
|
|
142
|
+
* @param documentId - Document ID to clean
|
|
143
|
+
*/
|
|
144
|
+
export declare function cleanDocumentDerivedData(db: Database.Database, documentId: string): void;
|
|
145
|
+
//# sourceMappingURL=document-operations.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document-operations.d.ts","sourceRoot":"","sources":["../../../../src/services/storage/database/document-operations.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AAEtC,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AACvE,OAAO,EAAiD,oBAAoB,EAAE,MAAM,YAAY,CAAC;AASjG;;;;;;;GAOG;AACH,wBAAgB,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,GAAG,MAAM,CAElE;AAED;;;;;;GAMG;AACH,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,EAAE,EAAE,MAAM,CAAA;CAAE,CAU/E;AAED;;;;;;;GAOG;AACH,wBAAgB,cAAc,CAC5B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,GAAG,EAAE,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,EACjC,oBAAoB,EAAE,MAAM,IAAI,GAC/B,MAAM,CAiCR;AAED;;;;;;GAMG;AACH,wBAAgB,WAAW,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,EAAE,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAI9E;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAI1F;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAI1F;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,yDAAyD;IACzD,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,aAAa,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,oBAAoB,GAAG,QAAQ,EAAE,CAG/F;AAED;;;;;;;;GAQG;AACH,wBAAgB,uBAAuB,CACrC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,OAAO,CAAC,EAAE,oBAAoB,GAC7B,mBAAmB,CA6CrB;AAED;;;;;;;;GAQG;AACH,wBAAgB,oBAAoB,CAClC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,EAAE,EAAE,MAAM,EACV,MAAM,EAAE,cAAc,EACtB,YAAY,EAAE,MAAM,GAAG,SAAS,EAChC,sBAAsB,EAAE,MAAM,IAAI,GACjC,IAAI,CAgBN;AAED;;;;;;;;GAQG;AACH,wBAAgB,yBAAyB,CACvC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,EAAE,EAAE,MAAM,EACV,SAAS,EAAE,MAAM,EACjB,cAAc,EAAE,MAAM,EACtB,sBAAsB,EAAE,MAAM,IAAI,GACjC,IAAI,CAgBN;AAED;;;;;;;GAOG;AACH,wBAAgB,sBAAsB,CACpC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,EAAE,EAAE,MAAM,EACV,QAAQ,EAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,EAC7F,sBAAsB,EAAE,MAAM,IAAI,GACjC,IAAI,CAkBN;AAmPD;;;;;;GAMG;AACH,wBAAgB,cAAc,CAC5B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,EAAE,EAAE,MAAM,EACV,oBAAoB,EAAE,MAAM,IAAI,GAC/B,IAAI,CA4DN;AAED;;;;;;;;GAQG;AACH,wBAAgB,wBAAwB,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI,CAuCxF"}
|
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document operations for DatabaseService
|
|
3
|
+
*
|
|
4
|
+
* Handles all CRUD operations for documents including
|
|
5
|
+
* insert, get, list, update, and delete with cascade.
|
|
6
|
+
*/
|
|
7
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
8
|
+
import { DatabaseError, DatabaseErrorCode } from './types.js';
|
|
9
|
+
import { runWithForeignKeyCheck } from './helpers.js';
|
|
10
|
+
import { rowToDocument } from './converters.js';
|
|
11
|
+
import { computeHash } from '../../../utils/hash.js';
|
|
12
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
13
|
+
// CURSOR-BASED PAGINATION HELPERS
|
|
14
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
15
|
+
/**
|
|
16
|
+
* Encode a cursor from a (created_at, id) tuple.
|
|
17
|
+
* Uses base64url encoding for URL-safe transport.
|
|
18
|
+
*
|
|
19
|
+
* @param createdAt - ISO 8601 timestamp
|
|
20
|
+
* @param id - Document UUID
|
|
21
|
+
* @returns Base64url-encoded cursor string
|
|
22
|
+
*/
|
|
23
|
+
export function encodeCursor(createdAt, id) {
|
|
24
|
+
return Buffer.from(JSON.stringify({ created_at: createdAt, id })).toString('base64url');
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Decode a cursor back to a (created_at, id) tuple.
|
|
28
|
+
*
|
|
29
|
+
* @param cursor - Base64url-encoded cursor string
|
|
30
|
+
* @returns Decoded cursor with created_at and id
|
|
31
|
+
* @throws Error if cursor is invalid or malformed
|
|
32
|
+
*/
|
|
33
|
+
export function decodeCursor(cursor) {
|
|
34
|
+
try {
|
|
35
|
+
const decoded = JSON.parse(Buffer.from(cursor, 'base64url').toString('utf-8'));
|
|
36
|
+
if (typeof decoded.created_at !== 'string' || typeof decoded.id !== 'string') {
|
|
37
|
+
throw new Error('Invalid cursor format: missing created_at or id');
|
|
38
|
+
}
|
|
39
|
+
return { created_at: decoded.created_at, id: decoded.id };
|
|
40
|
+
}
|
|
41
|
+
catch (error) {
|
|
42
|
+
throw new Error(`Invalid cursor: ${error instanceof Error ? error.message : String(error)}`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Insert a new document
|
|
47
|
+
*
|
|
48
|
+
* @param db - Database connection
|
|
49
|
+
* @param doc - Document data (created_at will be generated)
|
|
50
|
+
* @param updateMetadataCounts - Callback to update metadata counts
|
|
51
|
+
* @returns string - The document ID
|
|
52
|
+
*/
|
|
53
|
+
export function insertDocument(db, doc, updateMetadataCounts) {
|
|
54
|
+
const created_at = new Date().toISOString();
|
|
55
|
+
const stmt = db.prepare(`
|
|
56
|
+
INSERT INTO documents (
|
|
57
|
+
id, file_path, file_name, file_hash, file_size, file_type,
|
|
58
|
+
status, page_count, provenance_id, created_at, modified_at,
|
|
59
|
+
ocr_completed_at, error_message
|
|
60
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
61
|
+
`);
|
|
62
|
+
runWithForeignKeyCheck(stmt, [
|
|
63
|
+
doc.id,
|
|
64
|
+
doc.file_path,
|
|
65
|
+
doc.file_name,
|
|
66
|
+
doc.file_hash,
|
|
67
|
+
doc.file_size,
|
|
68
|
+
doc.file_type,
|
|
69
|
+
doc.status,
|
|
70
|
+
doc.page_count,
|
|
71
|
+
doc.provenance_id,
|
|
72
|
+
created_at,
|
|
73
|
+
doc.modified_at,
|
|
74
|
+
doc.ocr_completed_at,
|
|
75
|
+
doc.error_message,
|
|
76
|
+
], `inserting document: provenance_id "${doc.provenance_id}" does not exist`);
|
|
77
|
+
updateMetadataCounts();
|
|
78
|
+
return doc.id;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Get a document by ID
|
|
82
|
+
*
|
|
83
|
+
* @param db - Database connection
|
|
84
|
+
* @param id - Document ID
|
|
85
|
+
* @returns Document | null - The document or null if not found
|
|
86
|
+
*/
|
|
87
|
+
export function getDocument(db, id) {
|
|
88
|
+
const stmt = db.prepare('SELECT * FROM documents WHERE id = ?');
|
|
89
|
+
const row = stmt.get(id);
|
|
90
|
+
return row ? rowToDocument(row) : null;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Get a document by file path
|
|
94
|
+
*
|
|
95
|
+
* @param db - Database connection
|
|
96
|
+
* @param filePath - Full file path
|
|
97
|
+
* @returns Document | null - The document or null if not found
|
|
98
|
+
*/
|
|
99
|
+
export function getDocumentByPath(db, filePath) {
|
|
100
|
+
const stmt = db.prepare('SELECT * FROM documents WHERE file_path = ?');
|
|
101
|
+
const row = stmt.get(filePath);
|
|
102
|
+
return row ? rowToDocument(row) : null;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Get a document by file hash
|
|
106
|
+
*
|
|
107
|
+
* @param db - Database connection
|
|
108
|
+
* @param fileHash - SHA-256 file hash
|
|
109
|
+
* @returns Document | null - The document or null if not found
|
|
110
|
+
*/
|
|
111
|
+
export function getDocumentByHash(db, fileHash) {
|
|
112
|
+
const stmt = db.prepare('SELECT * FROM documents WHERE file_hash = ?');
|
|
113
|
+
const row = stmt.get(fileHash);
|
|
114
|
+
return row ? rowToDocument(row) : null;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* List documents with optional filtering.
|
|
118
|
+
*
|
|
119
|
+
* Supports both offset-based and cursor-based pagination:
|
|
120
|
+
* - When `cursor` is provided, uses keyset pagination (WHERE created_at < cursor.created_at
|
|
121
|
+
* OR (created_at = cursor.created_at AND id < cursor.id)) and ignores offset.
|
|
122
|
+
* - When `cursor` is absent, uses traditional LIMIT/OFFSET.
|
|
123
|
+
*
|
|
124
|
+
* @param db - Database connection
|
|
125
|
+
* @param options - Optional filter options (status, limit, offset, cursor)
|
|
126
|
+
* @returns Document[] - Array of documents (backward-compatible)
|
|
127
|
+
*/
|
|
128
|
+
export function listDocuments(db, options) {
|
|
129
|
+
const result = listDocumentsWithCursor(db, options);
|
|
130
|
+
return result.documents;
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* List documents with cursor-based pagination support.
|
|
134
|
+
*
|
|
135
|
+
* Returns both the documents and a next_cursor for fetching the next page.
|
|
136
|
+
*
|
|
137
|
+
* @param db - Database connection
|
|
138
|
+
* @param options - Optional filter options (status, limit, offset, cursor)
|
|
139
|
+
* @returns ListDocumentsResult with documents and next_cursor
|
|
140
|
+
*/
|
|
141
|
+
export function listDocumentsWithCursor(db, options) {
|
|
142
|
+
const conditions = [];
|
|
143
|
+
const params = [];
|
|
144
|
+
if (options?.status) {
|
|
145
|
+
conditions.push('status = ?');
|
|
146
|
+
params.push(options.status);
|
|
147
|
+
}
|
|
148
|
+
// Cursor-based pagination: keyset filtering
|
|
149
|
+
if (options?.cursor) {
|
|
150
|
+
const decoded = decodeCursor(options.cursor);
|
|
151
|
+
conditions.push('(created_at < ? OR (created_at = ? AND id < ?))');
|
|
152
|
+
params.push(decoded.created_at, decoded.created_at, decoded.id);
|
|
153
|
+
}
|
|
154
|
+
let query = 'SELECT * FROM documents';
|
|
155
|
+
if (conditions.length > 0) {
|
|
156
|
+
query += ' WHERE ' + conditions.join(' AND ');
|
|
157
|
+
}
|
|
158
|
+
query += ' ORDER BY created_at DESC, id DESC';
|
|
159
|
+
const limit = options?.limit ?? 10000;
|
|
160
|
+
query += ' LIMIT ?';
|
|
161
|
+
params.push(limit);
|
|
162
|
+
// Only apply OFFSET when NOT using cursor-based pagination
|
|
163
|
+
if (!options?.cursor && options?.offset !== undefined) {
|
|
164
|
+
query += ' OFFSET ?';
|
|
165
|
+
params.push(options.offset);
|
|
166
|
+
}
|
|
167
|
+
const stmt = db.prepare(query);
|
|
168
|
+
const rows = stmt.all(...params);
|
|
169
|
+
const documents = rows.map(rowToDocument);
|
|
170
|
+
// Compute next_cursor from the last row
|
|
171
|
+
let next_cursor = null;
|
|
172
|
+
if (documents.length > 0 && documents.length === limit) {
|
|
173
|
+
const lastDoc = documents[documents.length - 1];
|
|
174
|
+
next_cursor = encodeCursor(lastDoc.created_at, lastDoc.id);
|
|
175
|
+
}
|
|
176
|
+
return { documents, next_cursor };
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Update document status
|
|
180
|
+
*
|
|
181
|
+
* @param db - Database connection
|
|
182
|
+
* @param id - Document ID
|
|
183
|
+
* @param status - New status
|
|
184
|
+
* @param errorMessage - Optional error message (for 'failed' status)
|
|
185
|
+
* @param updateMetadataModified - Callback to update metadata modified timestamp
|
|
186
|
+
*/
|
|
187
|
+
export function updateDocumentStatus(db, id, status, errorMessage, updateMetadataModified) {
|
|
188
|
+
const modified_at = new Date().toISOString();
|
|
189
|
+
const stmt = db.prepare(`
|
|
190
|
+
UPDATE documents
|
|
191
|
+
SET status = ?, error_message = ?, modified_at = ?
|
|
192
|
+
WHERE id = ?
|
|
193
|
+
`);
|
|
194
|
+
const result = stmt.run(status, errorMessage ?? null, modified_at, id);
|
|
195
|
+
if (result.changes === 0) {
|
|
196
|
+
throw new DatabaseError(`Document "${id}" not found`, DatabaseErrorCode.DOCUMENT_NOT_FOUND);
|
|
197
|
+
}
|
|
198
|
+
updateMetadataModified();
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Update document when OCR completes
|
|
202
|
+
*
|
|
203
|
+
* @param db - Database connection
|
|
204
|
+
* @param id - Document ID
|
|
205
|
+
* @param pageCount - Number of pages processed
|
|
206
|
+
* @param ocrCompletedAt - ISO 8601 completion timestamp
|
|
207
|
+
* @param updateMetadataModified - Callback to update metadata modified timestamp
|
|
208
|
+
*/
|
|
209
|
+
export function updateDocumentOCRComplete(db, id, pageCount, ocrCompletedAt, updateMetadataModified) {
|
|
210
|
+
const modified_at = new Date().toISOString();
|
|
211
|
+
const stmt = db.prepare(`
|
|
212
|
+
UPDATE documents
|
|
213
|
+
SET status = 'processing', page_count = ?, ocr_completed_at = ?, modified_at = ?
|
|
214
|
+
WHERE id = ?
|
|
215
|
+
`);
|
|
216
|
+
const result = stmt.run(pageCount, ocrCompletedAt, modified_at, id);
|
|
217
|
+
if (result.changes === 0) {
|
|
218
|
+
throw new DatabaseError(`Document "${id}" not found`, DatabaseErrorCode.DOCUMENT_NOT_FOUND);
|
|
219
|
+
}
|
|
220
|
+
updateMetadataModified();
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Update document metadata (title, author, subject) from OCR extraction
|
|
224
|
+
*
|
|
225
|
+
* @param db - Database connection
|
|
226
|
+
* @param id - Document ID
|
|
227
|
+
* @param metadata - Metadata fields to update (null values are ignored via COALESCE)
|
|
228
|
+
* @param updateMetadataModified - Callback to update metadata modified timestamp
|
|
229
|
+
*/
|
|
230
|
+
export function updateDocumentMetadata(db, id, metadata, updateMetadataModified) {
|
|
231
|
+
const modified_at = new Date().toISOString();
|
|
232
|
+
const stmt = db.prepare(`
|
|
233
|
+
UPDATE documents
|
|
234
|
+
SET doc_title = COALESCE(?, doc_title),
|
|
235
|
+
doc_author = COALESCE(?, doc_author),
|
|
236
|
+
doc_subject = COALESCE(?, doc_subject),
|
|
237
|
+
modified_at = ?
|
|
238
|
+
WHERE id = ?
|
|
239
|
+
`);
|
|
240
|
+
const result = stmt.run(metadata.docTitle ?? null, metadata.docAuthor ?? null, metadata.docSubject ?? null, modified_at, id);
|
|
241
|
+
if (result.changes > 0)
|
|
242
|
+
updateMetadataModified();
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Shared cleanup: delete all derived records for a document.
|
|
246
|
+
*
|
|
247
|
+
* Deletion order (FK-safe):
|
|
248
|
+
* 1. vec_embeddings (no inbound FKs)
|
|
249
|
+
* 2. NULL images.vlm_embedding_id (break circular FK with embeddings)
|
|
250
|
+
* 3. Re-queue orphaned images from other documents (VLM dedup)
|
|
251
|
+
* 4. embeddings (covers chunk, VLM, and extraction types in one pass)
|
|
252
|
+
* 5. images (safe after embeddings.image_id references gone)
|
|
253
|
+
* 6. chunks
|
|
254
|
+
* 7. extractions (before ocr_results: extractions.ocr_result_id -> ocr_results)
|
|
255
|
+
* 8. ocr_results
|
|
256
|
+
* 9. FTS metadata count updates (ids 1, 2, 3)
|
|
257
|
+
*
|
|
258
|
+
* @returns The number of embedding IDs deleted (for logging)
|
|
259
|
+
*/
|
|
260
|
+
function deleteDerivedRecords(db, documentId, caller) {
|
|
261
|
+
// M-3: Count embeddings first, then use subquery DELETE instead of loading all IDs
|
|
262
|
+
const embeddingCount = db.prepare('SELECT COUNT(*) as cnt FROM embeddings WHERE document_id = ?').get(documentId).cnt;
|
|
263
|
+
// Delete from vec_embeddings using a single subquery
|
|
264
|
+
db.prepare('DELETE FROM vec_embeddings WHERE embedding_id IN (SELECT id FROM embeddings WHERE document_id = ?)').run(documentId);
|
|
265
|
+
// Break circular FK: images.vlm_embedding_id → embeddings ↔ embeddings.image_id → images
|
|
266
|
+
// NULL out vlm_embedding_id on THIS document's images so embeddings can be deleted
|
|
267
|
+
db.prepare('UPDATE images SET vlm_embedding_id = NULL WHERE document_id = ?').run(documentId);
|
|
268
|
+
// Re-queue OTHER documents' images that shared embeddings via VLM dedup.
|
|
269
|
+
// Setting vlm_status='pending' ensures they get re-processed instead of
|
|
270
|
+
// silently remaining 'complete' but invisible to search (orphaned).
|
|
271
|
+
const orphanedImages = db
|
|
272
|
+
.prepare(`
|
|
273
|
+
SELECT id, document_id FROM images
|
|
274
|
+
WHERE vlm_embedding_id IN (SELECT id FROM embeddings WHERE document_id = ?)
|
|
275
|
+
AND document_id != ?
|
|
276
|
+
`)
|
|
277
|
+
.all(documentId, documentId);
|
|
278
|
+
if (orphanedImages.length > 0) {
|
|
279
|
+
console.error(`[WARN] ${caller} "${documentId}": re-queuing ${orphanedImages.length} images from other documents ` +
|
|
280
|
+
`that shared VLM embeddings (document_ids: ${[...new Set(orphanedImages.map((i) => i.document_id))].join(', ')})`);
|
|
281
|
+
db.prepare(`
|
|
282
|
+
UPDATE images SET vlm_embedding_id = NULL, vlm_status = 'pending'
|
|
283
|
+
WHERE vlm_embedding_id IN (SELECT id FROM embeddings WHERE document_id = ?)
|
|
284
|
+
AND document_id != ?
|
|
285
|
+
`).run(documentId, documentId);
|
|
286
|
+
}
|
|
287
|
+
// Delete entity_tags referencing entities about to be deleted (polymorphic FK, no CASCADE)
|
|
288
|
+
try {
|
|
289
|
+
db.prepare(`
|
|
290
|
+
DELETE FROM entity_tags WHERE
|
|
291
|
+
(entity_type = 'document' AND entity_id = ?)
|
|
292
|
+
OR (entity_type = 'chunk' AND entity_id IN (SELECT id FROM chunks WHERE document_id = ?))
|
|
293
|
+
OR (entity_type = 'image' AND entity_id IN (SELECT id FROM images WHERE document_id = ?))
|
|
294
|
+
OR (entity_type = 'extraction' AND entity_id IN (SELECT id FROM extractions WHERE document_id = ?))
|
|
295
|
+
`).run(documentId, documentId, documentId, documentId);
|
|
296
|
+
}
|
|
297
|
+
catch (e) {
|
|
298
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
299
|
+
if (!msg.includes('no such table'))
|
|
300
|
+
throw e;
|
|
301
|
+
console.error('[document-operations] entity_tags table not found, skipping:', msg);
|
|
302
|
+
}
|
|
303
|
+
// Delete from embeddings (safe: images.vlm_embedding_id already NULLed)
|
|
304
|
+
db.prepare('DELETE FROM embeddings WHERE document_id = ?').run(documentId);
|
|
305
|
+
// Delete from images (safe: embeddings.image_id references gone)
|
|
306
|
+
db.prepare('DELETE FROM images WHERE document_id = ?').run(documentId);
|
|
307
|
+
// Decrement cluster document_count before removing assignments
|
|
308
|
+
db.prepare(`UPDATE clusters SET document_count = document_count - 1
|
|
309
|
+
WHERE id IN (SELECT cluster_id FROM document_clusters WHERE document_id = ? AND cluster_id IS NOT NULL)`).run(documentId);
|
|
310
|
+
// Delete document-cluster assignments
|
|
311
|
+
db.prepare('DELETE FROM document_clusters WHERE document_id = ?').run(documentId);
|
|
312
|
+
// Delete comparisons referencing this document
|
|
313
|
+
db.prepare('DELETE FROM comparisons WHERE document_id_1 = ? OR document_id_2 = ?').run(documentId, documentId);
|
|
314
|
+
// Delete form_fills linked to this document via source_file_hash
|
|
315
|
+
// (form_fills has no document_id FK — it joins through source_file_hash)
|
|
316
|
+
try {
|
|
317
|
+
const docRow = db.prepare('SELECT file_hash FROM documents WHERE id = ?').get(documentId);
|
|
318
|
+
if (docRow) {
|
|
319
|
+
db.prepare('DELETE FROM form_fills WHERE source_file_hash = ?').run(docRow.file_hash);
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
catch (e) {
|
|
323
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
324
|
+
if (!msg.includes('no such table'))
|
|
325
|
+
throw e;
|
|
326
|
+
console.error('[document-operations] form_fills table not found, skipping:', msg);
|
|
327
|
+
}
|
|
328
|
+
// Delete from chunks
|
|
329
|
+
db.prepare('DELETE FROM chunks WHERE document_id = ?').run(documentId);
|
|
330
|
+
// Delete from extractions (BEFORE ocr_results: extractions.ocr_result_id REFERENCES ocr_results(id))
|
|
331
|
+
db.prepare('DELETE FROM extractions WHERE document_id = ?').run(documentId);
|
|
332
|
+
// Delete from ocr_results (safe now that extractions are gone)
|
|
333
|
+
db.prepare('DELETE FROM ocr_results WHERE document_id = ?').run(documentId);
|
|
334
|
+
// Delete uploaded_files whose provenance_id references this document's provenance chain.
|
|
335
|
+
// Must happen before provenance cleanup (callers delete provenance after this function).
|
|
336
|
+
// uploaded_files has provenance_id NOT NULL REFERENCES provenance(id).
|
|
337
|
+
try {
|
|
338
|
+
const docForProv = db.prepare('SELECT provenance_id FROM documents WHERE id = ?').get(documentId);
|
|
339
|
+
if (docForProv) {
|
|
340
|
+
db.prepare('DELETE FROM uploaded_files WHERE provenance_id IN (SELECT id FROM provenance WHERE root_document_id = ?)').run(docForProv.provenance_id);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
catch (e) {
|
|
344
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
345
|
+
if (!msg.includes('no such table'))
|
|
346
|
+
throw e;
|
|
347
|
+
console.error('[document-operations] uploaded_files table not found, skipping:', msg);
|
|
348
|
+
}
|
|
349
|
+
// Update FTS metadata counts after chunk/embedding deletion
|
|
350
|
+
try {
|
|
351
|
+
const chunkCount = db.prepare('SELECT COUNT(*) as cnt FROM chunks').get()
|
|
352
|
+
.cnt;
|
|
353
|
+
db.prepare(`
|
|
354
|
+
UPDATE fts_index_metadata SET chunks_indexed = ?, last_rebuild_at = ?
|
|
355
|
+
WHERE id = 1
|
|
356
|
+
`).run(chunkCount, new Date().toISOString());
|
|
357
|
+
// Update VLM FTS metadata if table exists
|
|
358
|
+
const vlmCount = db.prepare('SELECT COUNT(*) as cnt FROM embeddings WHERE image_id IS NOT NULL').get().cnt;
|
|
359
|
+
db.prepare(`
|
|
360
|
+
UPDATE fts_index_metadata SET chunks_indexed = ?, last_rebuild_at = ?
|
|
361
|
+
WHERE id = 2
|
|
362
|
+
`).run(vlmCount, new Date().toISOString());
|
|
363
|
+
// Update extractions FTS metadata (id=3)
|
|
364
|
+
const extCount = db.prepare('SELECT COUNT(*) as cnt FROM extractions').get().cnt;
|
|
365
|
+
db.prepare(`
|
|
366
|
+
UPDATE fts_index_metadata SET chunks_indexed = ?, last_rebuild_at = ?
|
|
367
|
+
WHERE id = 3
|
|
368
|
+
`).run(extCount, new Date().toISOString());
|
|
369
|
+
}
|
|
370
|
+
catch (e) {
|
|
371
|
+
// Only ignore "no such table" errors from older schemas pre-v4
|
|
372
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
373
|
+
if (!msg.includes('no such table')) {
|
|
374
|
+
throw e;
|
|
375
|
+
}
|
|
376
|
+
console.error('[document-operations] fts_index_metadata table not found, skipping FTS update:', msg);
|
|
377
|
+
}
|
|
378
|
+
return embeddingCount;
|
|
379
|
+
}
|
|
380
|
+
/**
|
|
381
|
+
* Get or create the synthetic ORPHANED_ROOT provenance record.
|
|
382
|
+
* Used to re-parent provenance records when their original document is deleted
|
|
383
|
+
* but surviving clusters still reference them (P1.4).
|
|
384
|
+
*
|
|
385
|
+
* @param db - Database connection
|
|
386
|
+
* @returns The ID of the ORPHANED_ROOT provenance record
|
|
387
|
+
*/
|
|
388
|
+
function getOrCreateOrphanedRoot(db) {
|
|
389
|
+
const existing = db
|
|
390
|
+
.prepare("SELECT id FROM provenance WHERE root_document_id = 'ORPHANED_ROOT' AND type = 'DOCUMENT' LIMIT 1")
|
|
391
|
+
.get();
|
|
392
|
+
if (existing) {
|
|
393
|
+
return existing.id;
|
|
394
|
+
}
|
|
395
|
+
// Create synthetic orphaned root provenance
|
|
396
|
+
const id = uuidv4();
|
|
397
|
+
const now = new Date().toISOString();
|
|
398
|
+
const contentHash = computeHash('ORPHANED_ROOT');
|
|
399
|
+
db.prepare(`
|
|
400
|
+
INSERT INTO provenance (
|
|
401
|
+
id, type, created_at, processed_at, source_type, source_id,
|
|
402
|
+
root_document_id, content_hash, input_hash, processor,
|
|
403
|
+
processor_version, processing_params, parent_id, parent_ids,
|
|
404
|
+
chain_depth, chain_path
|
|
405
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
406
|
+
`).run(id, 'DOCUMENT', now, now, 'FILE', null, 'ORPHANED_ROOT', contentHash, null, 'system', '1.0.0', '{}', null, '[]', 0, '["DOCUMENT"]');
|
|
407
|
+
return id;
|
|
408
|
+
}
|
|
409
|
+
/**
|
|
410
|
+
* Delete a document and all related data (CASCADE DELETE)
|
|
411
|
+
*
|
|
412
|
+
* @param db - Database connection
|
|
413
|
+
* @param id - Document ID to delete
|
|
414
|
+
* @param updateMetadataCounts - Callback to update metadata counts
|
|
415
|
+
*/
|
|
416
|
+
export function deleteDocument(db, id, updateMetadataCounts) {
|
|
417
|
+
// First check document exists (outside transaction - read-only)
|
|
418
|
+
const doc = getDocument(db, id);
|
|
419
|
+
if (!doc) {
|
|
420
|
+
throw new DatabaseError(`Document "${id}" not found`, DatabaseErrorCode.DOCUMENT_NOT_FOUND);
|
|
421
|
+
}
|
|
422
|
+
// H-5: Wrap entire cascade delete in a transaction so a crash mid-sequence
|
|
423
|
+
// cannot leave the database in an inconsistent state.
|
|
424
|
+
const runInTransaction = db.transaction(() => {
|
|
425
|
+
deleteDerivedRecords(db, id, 'deleteDocument');
|
|
426
|
+
// Delete the document itself BEFORE provenance
|
|
427
|
+
// (document has FK to provenance via provenance_id)
|
|
428
|
+
db.prepare('DELETE FROM documents WHERE id = ?').run(id);
|
|
429
|
+
// Delete from provenance - must delete in reverse chain_depth order
|
|
430
|
+
// due to self-referential FKs on source_id and parent_id
|
|
431
|
+
// NOTE: root_document_id stores the document's provenance_id, NOT document id
|
|
432
|
+
const provenanceIds = db
|
|
433
|
+
.prepare('SELECT id FROM provenance WHERE root_document_id = ? ORDER BY chain_depth DESC')
|
|
434
|
+
.all(doc.provenance_id);
|
|
435
|
+
// P1.4: Get or create orphaned root provenance for re-parenting
|
|
436
|
+
const orphanedRootId = getOrCreateOrphanedRoot(db);
|
|
437
|
+
// Pre-clear self-referencing FKs (parent_id, source_id) on provenance records being deleted.
|
|
438
|
+
// Within the same chain_depth, parent provenance may appear before child provenance in the
|
|
439
|
+
// iteration order, causing FK violations. NULLing these first breaks the circular references.
|
|
440
|
+
const clearSelfRefStmt = db.prepare('UPDATE provenance SET parent_id = NULL, source_id = NULL WHERE id = ?');
|
|
441
|
+
for (const { id: provId } of provenanceIds) {
|
|
442
|
+
clearSelfRefStmt.run(provId);
|
|
443
|
+
}
|
|
444
|
+
const deleteProvStmt = db.prepare('DELETE FROM provenance WHERE id = ?');
|
|
445
|
+
const clusterRefCheck = db.prepare('SELECT COUNT(*) as cnt FROM clusters WHERE provenance_id = ?');
|
|
446
|
+
const reparentProvStmt = db.prepare('UPDATE provenance SET source_id = NULL, parent_id = ?, root_document_id = ? WHERE id = ?');
|
|
447
|
+
for (const { id: provId } of provenanceIds) {
|
|
448
|
+
// Skip CLUSTERING provenance still referenced by clusters (NOT NULL FK).
|
|
449
|
+
// Re-parent to orphaned root so provenance chain is preserved (P1.4).
|
|
450
|
+
// These are cleaned up when the cluster run is deleted.
|
|
451
|
+
const clusterRefs = clusterRefCheck.get(provId).cnt;
|
|
452
|
+
if (clusterRefs > 0) {
|
|
453
|
+
reparentProvStmt.run(orphanedRootId, 'ORPHANED_ROOT', provId);
|
|
454
|
+
continue;
|
|
455
|
+
}
|
|
456
|
+
deleteProvStmt.run(provId);
|
|
457
|
+
}
|
|
458
|
+
// Update metadata counts inside transaction for atomicity
|
|
459
|
+
updateMetadataCounts();
|
|
460
|
+
});
|
|
461
|
+
runInTransaction();
|
|
462
|
+
}
|
|
463
|
+
/**
|
|
464
|
+
* Clean all derived data for a document, keeping the document record and its DOCUMENT-level provenance.
|
|
465
|
+
*
|
|
466
|
+
* Deletes: vec_embeddings, embeddings, images, chunks, ocr_results, and non-root provenance records.
|
|
467
|
+
* This is used by retry_failed to reset a document to a clean "pending" state.
|
|
468
|
+
*
|
|
469
|
+
* @param db - Database connection
|
|
470
|
+
* @param documentId - Document ID to clean
|
|
471
|
+
*/
|
|
472
|
+
export function cleanDocumentDerivedData(db, documentId) {
|
|
473
|
+
// Validate document exists (outside transaction - read-only)
|
|
474
|
+
const doc = getDocument(db, documentId);
|
|
475
|
+
if (!doc) {
|
|
476
|
+
throw new DatabaseError(`Document "${documentId}" not found`, DatabaseErrorCode.DOCUMENT_NOT_FOUND);
|
|
477
|
+
}
|
|
478
|
+
// H-5: Wrap cleanup in a transaction so partial deletes cannot leave
|
|
479
|
+
// the database in an inconsistent state.
|
|
480
|
+
let embeddingCount = 0;
|
|
481
|
+
let provenanceCount = 0;
|
|
482
|
+
const runInTransaction = db.transaction(() => {
|
|
483
|
+
embeddingCount = deleteDerivedRecords(db, documentId, 'cleanDocumentDerivedData');
|
|
484
|
+
// Delete non-root provenance records (keep DOCUMENT-level provenance at chain_depth=0)
|
|
485
|
+
// root_document_id stores the document's provenance_id, NOT document id
|
|
486
|
+
const nonRootProvIds = db
|
|
487
|
+
.prepare('SELECT id FROM provenance WHERE root_document_id = ? AND chain_depth > 0 ORDER BY chain_depth DESC')
|
|
488
|
+
.all(doc.provenance_id);
|
|
489
|
+
const deleteProvStmt = db.prepare('DELETE FROM provenance WHERE id = ?');
|
|
490
|
+
for (const { id: provId } of nonRootProvIds) {
|
|
491
|
+
deleteProvStmt.run(provId);
|
|
492
|
+
}
|
|
493
|
+
provenanceCount = nonRootProvIds.length;
|
|
494
|
+
});
|
|
495
|
+
runInTransaction();
|
|
496
|
+
console.error(`[INFO] Cleaned derived data for document ${documentId}: ${embeddingCount} embeddings, ${provenanceCount} provenance records removed`);
|
|
497
|
+
}
|
|
498
|
+
//# sourceMappingURL=document-operations.js.map
|