ocr-provenance-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocr-provenance-mcp might be problematic. Click here for more details.
- package/.env.example +55 -0
- package/LICENSE +78 -0
- package/README.md +1154 -0
- package/dist/bin-http.d.ts +24 -0
- package/dist/bin-http.d.ts.map +1 -0
- package/dist/bin-http.js +275 -0
- package/dist/bin-http.js.map +1 -0
- package/dist/bin-setup.d.ts +11 -0
- package/dist/bin-setup.d.ts.map +1 -0
- package/dist/bin-setup.js +610 -0
- package/dist/bin-setup.js.map +1 -0
- package/dist/bin.d.ts +16 -0
- package/dist/bin.d.ts.map +1 -0
- package/dist/bin.js +16 -0
- package/dist/bin.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +90 -0
- package/dist/index.js.map +1 -0
- package/dist/models/chunk.d.ts +136 -0
- package/dist/models/chunk.d.ts.map +1 -0
- package/dist/models/chunk.js +27 -0
- package/dist/models/chunk.js.map +1 -0
- package/dist/models/cluster.d.ts +79 -0
- package/dist/models/cluster.d.ts.map +1 -0
- package/dist/models/cluster.js +10 -0
- package/dist/models/cluster.js.map +1 -0
- package/dist/models/comparison.d.ts +62 -0
- package/dist/models/comparison.d.ts.map +1 -0
- package/dist/models/comparison.js +8 -0
- package/dist/models/comparison.js.map +1 -0
- package/dist/models/document.d.ts +104 -0
- package/dist/models/document.d.ts.map +1 -0
- package/dist/models/document.js +15 -0
- package/dist/models/document.js.map +1 -0
- package/dist/models/embedding.d.ts +87 -0
- package/dist/models/embedding.d.ts.map +1 -0
- package/dist/models/embedding.js +23 -0
- package/dist/models/embedding.js.map +1 -0
- package/dist/models/extraction.d.ts +15 -0
- package/dist/models/extraction.d.ts.map +1 -0
- package/dist/models/extraction.js +2 -0
- package/dist/models/extraction.js.map +1 -0
- package/dist/models/form-fill.d.ts +23 -0
- package/dist/models/form-fill.d.ts.map +1 -0
- package/dist/models/form-fill.js +2 -0
- package/dist/models/form-fill.js.map +1 -0
- package/dist/models/image.d.ts +177 -0
- package/dist/models/image.d.ts.map +1 -0
- package/dist/models/image.js +8 -0
- package/dist/models/image.js.map +1 -0
- package/dist/models/index.d.ts +14 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +22 -0
- package/dist/models/index.js.map +1 -0
- package/dist/models/provenance.d.ts +174 -0
- package/dist/models/provenance.d.ts.map +1 -0
- package/dist/models/provenance.js +53 -0
- package/dist/models/provenance.js.map +1 -0
- package/dist/models/uploaded-file.d.ts +20 -0
- package/dist/models/uploaded-file.d.ts.map +1 -0
- package/dist/models/uploaded-file.js +2 -0
- package/dist/models/uploaded-file.js.map +1 -0
- package/dist/server/errors.d.ts +93 -0
- package/dist/server/errors.d.ts.map +1 -0
- package/dist/server/errors.js +256 -0
- package/dist/server/errors.js.map +1 -0
- package/dist/server/events.d.ts +36 -0
- package/dist/server/events.d.ts.map +1 -0
- package/dist/server/events.js +48 -0
- package/dist/server/events.js.map +1 -0
- package/dist/server/permissions.d.ts +26 -0
- package/dist/server/permissions.d.ts.map +1 -0
- package/dist/server/permissions.js +194 -0
- package/dist/server/permissions.js.map +1 -0
- package/dist/server/register-tools.d.ts +25 -0
- package/dist/server/register-tools.d.ts.map +1 -0
- package/dist/server/register-tools.js +102 -0
- package/dist/server/register-tools.js.map +1 -0
- package/dist/server/startup.d.ts +16 -0
- package/dist/server/startup.d.ts.map +1 -0
- package/dist/server/startup.js +37 -0
- package/dist/server/startup.js.map +1 -0
- package/dist/server/state.d.ts +166 -0
- package/dist/server/state.d.ts.map +1 -0
- package/dist/server/state.js +424 -0
- package/dist/server/state.js.map +1 -0
- package/dist/server/transports/http-transport.d.ts +37 -0
- package/dist/server/transports/http-transport.d.ts.map +1 -0
- package/dist/server/transports/http-transport.js +204 -0
- package/dist/server/transports/http-transport.js.map +1 -0
- package/dist/server/transports/index.d.ts +9 -0
- package/dist/server/transports/index.d.ts.map +1 -0
- package/dist/server/transports/index.js +9 -0
- package/dist/server/transports/index.js.map +1 -0
- package/dist/server/transports/session-manager.d.ts +40 -0
- package/dist/server/transports/session-manager.d.ts.map +1 -0
- package/dist/server/transports/session-manager.js +74 -0
- package/dist/server/transports/session-manager.js.map +1 -0
- package/dist/server/types.d.ts +82 -0
- package/dist/server/types.d.ts.map +1 -0
- package/dist/server/types.js +14 -0
- package/dist/server/types.js.map +1 -0
- package/dist/services/audit.d.ts +26 -0
- package/dist/services/audit.d.ts.map +1 -0
- package/dist/services/audit.js +43 -0
- package/dist/services/audit.js.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.js +46 -0
- package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
- package/dist/services/chunking/chunk-merger.d.ts +26 -0
- package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
- package/dist/services/chunking/chunk-merger.js +94 -0
- package/dist/services/chunking/chunk-merger.js.map +1 -0
- package/dist/services/chunking/chunker.d.ts +62 -0
- package/dist/services/chunking/chunker.d.ts.map +1 -0
- package/dist/services/chunking/chunker.js +566 -0
- package/dist/services/chunking/chunker.js.map +1 -0
- package/dist/services/chunking/heading-normalizer.d.ts +33 -0
- package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/heading-normalizer.js +101 -0
- package/dist/services/chunking/heading-normalizer.js.map +1 -0
- package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
- package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
- package/dist/services/chunking/json-block-analyzer.js +1033 -0
- package/dist/services/chunking/json-block-analyzer.js.map +1 -0
- package/dist/services/chunking/markdown-parser.d.ts +75 -0
- package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
- package/dist/services/chunking/markdown-parser.js +428 -0
- package/dist/services/chunking/markdown-parser.js.map +1 -0
- package/dist/services/chunking/text-normalizer.d.ts +20 -0
- package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/text-normalizer.js +36 -0
- package/dist/services/chunking/text-normalizer.js.map +1 -0
- package/dist/services/clm/contract-schemas.d.ts +36 -0
- package/dist/services/clm/contract-schemas.d.ts.map +1 -0
- package/dist/services/clm/contract-schemas.js +92 -0
- package/dist/services/clm/contract-schemas.js.map +1 -0
- package/dist/services/clm/summarization.d.ts +46 -0
- package/dist/services/clm/summarization.d.ts.map +1 -0
- package/dist/services/clm/summarization.js +61 -0
- package/dist/services/clm/summarization.js.map +1 -0
- package/dist/services/clustering/clustering-service.d.ts +58 -0
- package/dist/services/clustering/clustering-service.d.ts.map +1 -0
- package/dist/services/clustering/clustering-service.js +467 -0
- package/dist/services/clustering/clustering-service.js.map +1 -0
- package/dist/services/comparison/diff-service.d.ts +41 -0
- package/dist/services/comparison/diff-service.d.ts.map +1 -0
- package/dist/services/comparison/diff-service.js +120 -0
- package/dist/services/comparison/diff-service.js.map +1 -0
- package/dist/services/embedding/embedder.d.ts +55 -0
- package/dist/services/embedding/embedder.d.ts.map +1 -0
- package/dist/services/embedding/embedder.js +202 -0
- package/dist/services/embedding/embedder.js.map +1 -0
- package/dist/services/embedding/nomic.d.ts +67 -0
- package/dist/services/embedding/nomic.d.ts.map +1 -0
- package/dist/services/embedding/nomic.js +280 -0
- package/dist/services/embedding/nomic.js.map +1 -0
- package/dist/services/gemini/circuit-breaker.d.ts +106 -0
- package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
- package/dist/services/gemini/circuit-breaker.js +237 -0
- package/dist/services/gemini/circuit-breaker.js.map +1 -0
- package/dist/services/gemini/client.d.ts +173 -0
- package/dist/services/gemini/client.d.ts.map +1 -0
- package/dist/services/gemini/client.js +483 -0
- package/dist/services/gemini/client.js.map +1 -0
- package/dist/services/gemini/config.d.ts +116 -0
- package/dist/services/gemini/config.d.ts.map +1 -0
- package/dist/services/gemini/config.js +118 -0
- package/dist/services/gemini/config.js.map +1 -0
- package/dist/services/gemini/index.d.ts +9 -0
- package/dist/services/gemini/index.d.ts.map +1 -0
- package/dist/services/gemini/index.js +13 -0
- package/dist/services/gemini/index.js.map +1 -0
- package/dist/services/gemini/rate-limiter.d.ts +62 -0
- package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
- package/dist/services/gemini/rate-limiter.js +120 -0
- package/dist/services/gemini/rate-limiter.js.map +1 -0
- package/dist/services/images/extractor.d.ts +88 -0
- package/dist/services/images/extractor.d.ts.map +1 -0
- package/dist/services/images/extractor.js +340 -0
- package/dist/services/images/extractor.js.map +1 -0
- package/dist/services/images/optimizer.d.ts +130 -0
- package/dist/services/images/optimizer.d.ts.map +1 -0
- package/dist/services/images/optimizer.js +228 -0
- package/dist/services/images/optimizer.js.map +1 -0
- package/dist/services/ocr/datalab.d.ts +64 -0
- package/dist/services/ocr/datalab.d.ts.map +1 -0
- package/dist/services/ocr/datalab.js +425 -0
- package/dist/services/ocr/datalab.js.map +1 -0
- package/dist/services/ocr/errors.d.ts +38 -0
- package/dist/services/ocr/errors.d.ts.map +1 -0
- package/dist/services/ocr/errors.js +83 -0
- package/dist/services/ocr/errors.js.map +1 -0
- package/dist/services/ocr/file-manager.d.ts +76 -0
- package/dist/services/ocr/file-manager.d.ts.map +1 -0
- package/dist/services/ocr/file-manager.js +238 -0
- package/dist/services/ocr/file-manager.js.map +1 -0
- package/dist/services/ocr/form-fill.d.ts +48 -0
- package/dist/services/ocr/form-fill.d.ts.map +1 -0
- package/dist/services/ocr/form-fill.js +213 -0
- package/dist/services/ocr/form-fill.js.map +1 -0
- package/dist/services/ocr/processor.d.ts +95 -0
- package/dist/services/ocr/processor.d.ts.map +1 -0
- package/dist/services/ocr/processor.js +259 -0
- package/dist/services/ocr/processor.js.map +1 -0
- package/dist/services/provenance/agent-metadata.d.ts +82 -0
- package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
- package/dist/services/provenance/agent-metadata.js +106 -0
- package/dist/services/provenance/agent-metadata.js.map +1 -0
- package/dist/services/provenance/chain-hash.d.ts +57 -0
- package/dist/services/provenance/chain-hash.d.ts.map +1 -0
- package/dist/services/provenance/chain-hash.js +131 -0
- package/dist/services/provenance/chain-hash.js.map +1 -0
- package/dist/services/provenance/exporter.d.ts +202 -0
- package/dist/services/provenance/exporter.d.ts.map +1 -0
- package/dist/services/provenance/exporter.js +457 -0
- package/dist/services/provenance/exporter.js.map +1 -0
- package/dist/services/provenance/index.d.ts +15 -0
- package/dist/services/provenance/index.d.ts.map +1 -0
- package/dist/services/provenance/index.js +17 -0
- package/dist/services/provenance/index.js.map +1 -0
- package/dist/services/provenance/tracker.d.ts +138 -0
- package/dist/services/provenance/tracker.d.ts.map +1 -0
- package/dist/services/provenance/tracker.js +293 -0
- package/dist/services/provenance/tracker.js.map +1 -0
- package/dist/services/provenance/verifier.d.ts +153 -0
- package/dist/services/provenance/verifier.d.ts.map +1 -0
- package/dist/services/provenance/verifier.js +536 -0
- package/dist/services/provenance/verifier.js.map +1 -0
- package/dist/services/python-pool.d.ts +70 -0
- package/dist/services/python-pool.d.ts.map +1 -0
- package/dist/services/python-pool.js +265 -0
- package/dist/services/python-pool.js.map +1 -0
- package/dist/services/search/bm25.d.ts +180 -0
- package/dist/services/search/bm25.d.ts.map +1 -0
- package/dist/services/search/bm25.js +656 -0
- package/dist/services/search/bm25.js.map +1 -0
- package/dist/services/search/fusion.d.ts +103 -0
- package/dist/services/search/fusion.d.ts.map +1 -0
- package/dist/services/search/fusion.js +122 -0
- package/dist/services/search/fusion.js.map +1 -0
- package/dist/services/search/local-reranker.d.ts +30 -0
- package/dist/services/search/local-reranker.d.ts.map +1 -0
- package/dist/services/search/local-reranker.js +123 -0
- package/dist/services/search/local-reranker.js.map +1 -0
- package/dist/services/search/quality.d.ts +11 -0
- package/dist/services/search/quality.d.ts.map +1 -0
- package/dist/services/search/quality.js +17 -0
- package/dist/services/search/quality.js.map +1 -0
- package/dist/services/search/query-classifier.d.ts +34 -0
- package/dist/services/search/query-classifier.d.ts.map +1 -0
- package/dist/services/search/query-classifier.js +114 -0
- package/dist/services/search/query-classifier.js.map +1 -0
- package/dist/services/search/query-expander.d.ts +73 -0
- package/dist/services/search/query-expander.d.ts.map +1 -0
- package/dist/services/search/query-expander.js +281 -0
- package/dist/services/search/query-expander.js.map +1 -0
- package/dist/services/search/reranker.d.ts +44 -0
- package/dist/services/search/reranker.d.ts.map +1 -0
- package/dist/services/search/reranker.js +101 -0
- package/dist/services/search/reranker.js.map +1 -0
- package/dist/services/storage/database/annotation-operations.d.ts +113 -0
- package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/annotation-operations.js +177 -0
- package/dist/services/storage/database/annotation-operations.js.map +1 -0
- package/dist/services/storage/database/approval-operations.d.ts +132 -0
- package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
- package/dist/services/storage/database/approval-operations.js +206 -0
- package/dist/services/storage/database/approval-operations.js.map +1 -0
- package/dist/services/storage/database/chunk-operations.d.ts +132 -0
- package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
- package/dist/services/storage/database/chunk-operations.js +306 -0
- package/dist/services/storage/database/chunk-operations.js.map +1 -0
- package/dist/services/storage/database/cluster-operations.d.ts +97 -0
- package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
- package/dist/services/storage/database/cluster-operations.js +258 -0
- package/dist/services/storage/database/cluster-operations.js.map +1 -0
- package/dist/services/storage/database/comparison-operations.d.ts +41 -0
- package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
- package/dist/services/storage/database/comparison-operations.js +65 -0
- package/dist/services/storage/database/comparison-operations.js.map +1 -0
- package/dist/services/storage/database/converters.d.ts +36 -0
- package/dist/services/storage/database/converters.d.ts.map +1 -0
- package/dist/services/storage/database/converters.js +244 -0
- package/dist/services/storage/database/converters.js.map +1 -0
- package/dist/services/storage/database/document-operations.d.ts +145 -0
- package/dist/services/storage/database/document-operations.d.ts.map +1 -0
- package/dist/services/storage/database/document-operations.js +498 -0
- package/dist/services/storage/database/document-operations.js.map +1 -0
- package/dist/services/storage/database/embedding-operations.d.ts +130 -0
- package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
- package/dist/services/storage/database/embedding-operations.js +315 -0
- package/dist/services/storage/database/embedding-operations.js.map +1 -0
- package/dist/services/storage/database/extraction-operations.d.ts +47 -0
- package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
- package/dist/services/storage/database/extraction-operations.js +85 -0
- package/dist/services/storage/database/extraction-operations.js.map +1 -0
- package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
- package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
- package/dist/services/storage/database/form-fill-operations.js +116 -0
- package/dist/services/storage/database/form-fill-operations.js.map +1 -0
- package/dist/services/storage/database/helpers.d.ts +29 -0
- package/dist/services/storage/database/helpers.d.ts.map +1 -0
- package/dist/services/storage/database/helpers.js +55 -0
- package/dist/services/storage/database/helpers.js.map +1 -0
- package/dist/services/storage/database/image-operations.d.ts +202 -0
- package/dist/services/storage/database/image-operations.d.ts.map +1 -0
- package/dist/services/storage/database/image-operations.js +484 -0
- package/dist/services/storage/database/image-operations.js.map +1 -0
- package/dist/services/storage/database/index.d.ts +13 -0
- package/dist/services/storage/database/index.d.ts.map +1 -0
- package/dist/services/storage/database/index.js +16 -0
- package/dist/services/storage/database/index.js.map +1 -0
- package/dist/services/storage/database/lock-operations.d.ts +59 -0
- package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
- package/dist/services/storage/database/lock-operations.js +89 -0
- package/dist/services/storage/database/lock-operations.js.map +1 -0
- package/dist/services/storage/database/obligation-operations.d.ts +88 -0
- package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/obligation-operations.js +206 -0
- package/dist/services/storage/database/obligation-operations.js.map +1 -0
- package/dist/services/storage/database/ocr-operations.d.ts +33 -0
- package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
- package/dist/services/storage/database/ocr-operations.js +70 -0
- package/dist/services/storage/database/ocr-operations.js.map +1 -0
- package/dist/services/storage/database/playbook-operations.d.ts +72 -0
- package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
- package/dist/services/storage/database/playbook-operations.js +247 -0
- package/dist/services/storage/database/playbook-operations.js.map +1 -0
- package/dist/services/storage/database/provenance-operations.d.ts +112 -0
- package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
- package/dist/services/storage/database/provenance-operations.js +251 -0
- package/dist/services/storage/database/provenance-operations.js.map +1 -0
- package/dist/services/storage/database/service.d.ts +142 -0
- package/dist/services/storage/database/service.d.ts.map +1 -0
- package/dist/services/storage/database/service.js +310 -0
- package/dist/services/storage/database/service.js.map +1 -0
- package/dist/services/storage/database/static-operations.d.ts +30 -0
- package/dist/services/storage/database/static-operations.d.ts.map +1 -0
- package/dist/services/storage/database/static-operations.js +218 -0
- package/dist/services/storage/database/static-operations.js.map +1 -0
- package/dist/services/storage/database/stats-operations.d.ts +101 -0
- package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
- package/dist/services/storage/database/stats-operations.js +394 -0
- package/dist/services/storage/database/stats-operations.js.map +1 -0
- package/dist/services/storage/database/tag-operations.d.ts +76 -0
- package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
- package/dist/services/storage/database/tag-operations.js +178 -0
- package/dist/services/storage/database/tag-operations.js.map +1 -0
- package/dist/services/storage/database/types.d.ts +286 -0
- package/dist/services/storage/database/types.d.ts.map +1 -0
- package/dist/services/storage/database/types.js +39 -0
- package/dist/services/storage/database/types.js.map +1 -0
- package/dist/services/storage/database/upload-operations.d.ts +71 -0
- package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
- package/dist/services/storage/database/upload-operations.js +124 -0
- package/dist/services/storage/database/upload-operations.js.map +1 -0
- package/dist/services/storage/database/user-operations.d.ts +102 -0
- package/dist/services/storage/database/user-operations.d.ts.map +1 -0
- package/dist/services/storage/database/user-operations.js +151 -0
- package/dist/services/storage/database/user-operations.js.map +1 -0
- package/dist/services/storage/database/workflow-operations.d.ts +98 -0
- package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
- package/dist/services/storage/database/workflow-operations.js +157 -0
- package/dist/services/storage/database/workflow-operations.js.map +1 -0
- package/dist/services/storage/database.d.ts +16 -0
- package/dist/services/storage/database.d.ts.map +1 -0
- package/dist/services/storage/database.js +15 -0
- package/dist/services/storage/database.js.map +1 -0
- package/dist/services/storage/index.d.ts +10 -0
- package/dist/services/storage/index.d.ts.map +1 -0
- package/dist/services/storage/index.js +10 -0
- package/dist/services/storage/index.js.map +1 -0
- package/dist/services/storage/migrations/index.d.ts +16 -0
- package/dist/services/storage/migrations/index.d.ts.map +1 -0
- package/dist/services/storage/migrations/index.js +20 -0
- package/dist/services/storage/migrations/index.js.map +1 -0
- package/dist/services/storage/migrations/operations.d.ts +40 -0
- package/dist/services/storage/migrations/operations.d.ts.map +1 -0
- package/dist/services/storage/migrations/operations.js +2910 -0
- package/dist/services/storage/migrations/operations.js.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.js +1006 -0
- package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.js +176 -0
- package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
- package/dist/services/storage/migrations/types.d.ts +15 -0
- package/dist/services/storage/migrations/types.d.ts.map +1 -0
- package/dist/services/storage/migrations/types.js +21 -0
- package/dist/services/storage/migrations/types.js.map +1 -0
- package/dist/services/storage/migrations/verification.d.ts +20 -0
- package/dist/services/storage/migrations/verification.d.ts.map +1 -0
- package/dist/services/storage/migrations/verification.js +78 -0
- package/dist/services/storage/migrations/verification.js.map +1 -0
- package/dist/services/storage/migrations.d.ts +16 -0
- package/dist/services/storage/migrations.d.ts.map +1 -0
- package/dist/services/storage/migrations.js +17 -0
- package/dist/services/storage/migrations.js.map +1 -0
- package/dist/services/storage/types.d.ts +12 -0
- package/dist/services/storage/types.d.ts.map +1 -0
- package/dist/services/storage/types.js +5 -0
- package/dist/services/storage/types.js.map +1 -0
- package/dist/services/storage/vector.d.ts +208 -0
- package/dist/services/storage/vector.d.ts.map +1 -0
- package/dist/services/storage/vector.js +526 -0
- package/dist/services/storage/vector.js.map +1 -0
- package/dist/services/vlm/pipeline.d.ts +194 -0
- package/dist/services/vlm/pipeline.d.ts.map +1 -0
- package/dist/services/vlm/pipeline.js +800 -0
- package/dist/services/vlm/pipeline.js.map +1 -0
- package/dist/services/vlm/prompts.d.ts +171 -0
- package/dist/services/vlm/prompts.d.ts.map +1 -0
- package/dist/services/vlm/prompts.js +229 -0
- package/dist/services/vlm/prompts.js.map +1 -0
- package/dist/services/vlm/service.d.ts +174 -0
- package/dist/services/vlm/service.d.ts.map +1 -0
- package/dist/services/vlm/service.js +256 -0
- package/dist/services/vlm/service.js.map +1 -0
- package/dist/services/webhook-delivery.d.ts +4 -0
- package/dist/services/webhook-delivery.d.ts.map +1 -0
- package/dist/services/webhook-delivery.js +140 -0
- package/dist/services/webhook-delivery.js.map +1 -0
- package/dist/tools/chunks.d.ts +19 -0
- package/dist/tools/chunks.d.ts.map +1 -0
- package/dist/tools/chunks.js +392 -0
- package/dist/tools/chunks.js.map +1 -0
- package/dist/tools/clm.d.ts +16 -0
- package/dist/tools/clm.d.ts.map +1 -0
- package/dist/tools/clm.js +668 -0
- package/dist/tools/clm.js.map +1 -0
- package/dist/tools/clustering.d.ts +13 -0
- package/dist/tools/clustering.d.ts.map +1 -0
- package/dist/tools/clustering.js +498 -0
- package/dist/tools/clustering.js.map +1 -0
- package/dist/tools/collaboration.d.ts +15 -0
- package/dist/tools/collaboration.d.ts.map +1 -0
- package/dist/tools/collaboration.js +516 -0
- package/dist/tools/collaboration.js.map +1 -0
- package/dist/tools/comparison.d.ts +13 -0
- package/dist/tools/comparison.d.ts.map +1 -0
- package/dist/tools/comparison.js +735 -0
- package/dist/tools/comparison.js.map +1 -0
- package/dist/tools/compliance.d.ts +15 -0
- package/dist/tools/compliance.d.ts.map +1 -0
- package/dist/tools/compliance.js +640 -0
- package/dist/tools/compliance.js.map +1 -0
- package/dist/tools/config.d.ts +19 -0
- package/dist/tools/config.d.ts.map +1 -0
- package/dist/tools/config.js +213 -0
- package/dist/tools/config.js.map +1 -0
- package/dist/tools/database.d.ts +62 -0
- package/dist/tools/database.d.ts.map +1 -0
- package/dist/tools/database.js +288 -0
- package/dist/tools/database.js.map +1 -0
- package/dist/tools/documents.d.ts +61 -0
- package/dist/tools/documents.d.ts.map +1 -0
- package/dist/tools/documents.js +1624 -0
- package/dist/tools/documents.js.map +1 -0
- package/dist/tools/embeddings.d.ts +14 -0
- package/dist/tools/embeddings.d.ts.map +1 -0
- package/dist/tools/embeddings.js +626 -0
- package/dist/tools/embeddings.js.map +1 -0
- package/dist/tools/evaluation.d.ts +25 -0
- package/dist/tools/evaluation.d.ts.map +1 -0
- package/dist/tools/evaluation.js +523 -0
- package/dist/tools/evaluation.js.map +1 -0
- package/dist/tools/events.d.ts +16 -0
- package/dist/tools/events.d.ts.map +1 -0
- package/dist/tools/events.js +493 -0
- package/dist/tools/events.js.map +1 -0
- package/dist/tools/extraction-structured.d.ts +13 -0
- package/dist/tools/extraction-structured.d.ts.map +1 -0
- package/dist/tools/extraction-structured.js +390 -0
- package/dist/tools/extraction-structured.js.map +1 -0
- package/dist/tools/extraction.d.ts +24 -0
- package/dist/tools/extraction.d.ts.map +1 -0
- package/dist/tools/extraction.js +424 -0
- package/dist/tools/extraction.js.map +1 -0
- package/dist/tools/file-management.d.ts +14 -0
- package/dist/tools/file-management.d.ts.map +1 -0
- package/dist/tools/file-management.js +523 -0
- package/dist/tools/file-management.js.map +1 -0
- package/dist/tools/form-fill.d.ts +13 -0
- package/dist/tools/form-fill.d.ts.map +1 -0
- package/dist/tools/form-fill.js +250 -0
- package/dist/tools/form-fill.js.map +1 -0
- package/dist/tools/health.d.ts +19 -0
- package/dist/tools/health.d.ts.map +1 -0
- package/dist/tools/health.js +229 -0
- package/dist/tools/health.js.map +1 -0
- package/dist/tools/images.d.ts +54 -0
- package/dist/tools/images.d.ts.map +1 -0
- package/dist/tools/images.js +787 -0
- package/dist/tools/images.js.map +1 -0
- package/dist/tools/ingestion.d.ts +94 -0
- package/dist/tools/ingestion.d.ts.map +1 -0
- package/dist/tools/ingestion.js +1659 -0
- package/dist/tools/ingestion.js.map +1 -0
- package/dist/tools/intelligence.d.ts +18 -0
- package/dist/tools/intelligence.d.ts.map +1 -0
- package/dist/tools/intelligence.js +1039 -0
- package/dist/tools/intelligence.js.map +1 -0
- package/dist/tools/provenance.d.ts +51 -0
- package/dist/tools/provenance.d.ts.map +1 -0
- package/dist/tools/provenance.js +691 -0
- package/dist/tools/provenance.js.map +1 -0
- package/dist/tools/reports.d.ts +41 -0
- package/dist/tools/reports.d.ts.map +1 -0
- package/dist/tools/reports.js +1394 -0
- package/dist/tools/reports.js.map +1 -0
- package/dist/tools/search.d.ts +35 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +2528 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/shared.d.ts +52 -0
- package/dist/tools/shared.d.ts.map +1 -0
- package/dist/tools/shared.js +54 -0
- package/dist/tools/shared.js.map +1 -0
- package/dist/tools/tags.d.ts +15 -0
- package/dist/tools/tags.d.ts.map +1 -0
- package/dist/tools/tags.js +287 -0
- package/dist/tools/tags.js.map +1 -0
- package/dist/tools/timeline.d.ts +15 -0
- package/dist/tools/timeline.d.ts.map +1 -0
- package/dist/tools/timeline.js +14 -0
- package/dist/tools/timeline.js.map +1 -0
- package/dist/tools/users.d.ts +14 -0
- package/dist/tools/users.d.ts.map +1 -0
- package/dist/tools/users.js +257 -0
- package/dist/tools/users.js.map +1 -0
- package/dist/tools/vlm.d.ts +40 -0
- package/dist/tools/vlm.d.ts.map +1 -0
- package/dist/tools/vlm.js +475 -0
- package/dist/tools/vlm.js.map +1 -0
- package/dist/tools/workflow.d.ts +16 -0
- package/dist/tools/workflow.d.ts.map +1 -0
- package/dist/tools/workflow.js +495 -0
- package/dist/tools/workflow.js.map +1 -0
- package/dist/utils/backoff.d.ts +53 -0
- package/dist/utils/backoff.d.ts.map +1 -0
- package/dist/utils/backoff.js +78 -0
- package/dist/utils/backoff.js.map +1 -0
- package/dist/utils/config-persistence.d.ts +33 -0
- package/dist/utils/config-persistence.d.ts.map +1 -0
- package/dist/utils/config-persistence.js +61 -0
- package/dist/utils/config-persistence.js.map +1 -0
- package/dist/utils/hash.d.ts +65 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +146 -0
- package/dist/utils/hash.js.map +1 -0
- package/dist/utils/math.d.ts +21 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +39 -0
- package/dist/utils/math.js.map +1 -0
- package/dist/utils/validation.d.ts +697 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +529 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +96 -0
- package/python/.gitkeep +0 -0
- package/python/__init__.py +104 -0
- package/python/clustering_worker.py +440 -0
- package/python/docx_image_extractor.py +524 -0
- package/python/embedding_worker.py +552 -0
- package/python/file_manager_worker.py +564 -0
- package/python/form_fill_worker.py +399 -0
- package/python/gpu_utils.py +582 -0
- package/python/image_extractor.py +317 -0
- package/python/image_optimizer.py +444 -0
- package/python/ocr_worker.py +712 -0
- package/python/pyproject.toml +76 -0
- package/python/requirements.txt +51 -0
- package/python/reranker_worker.py +87 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"http-transport.js","sourceRoot":"","sources":["../../../src/server/transports/http-transport.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAkBpC,kFAAkF;AAClF,iBAAiB;AACjB,kFAAkF;AAElF,MAAM,OAAO,aAAa;IAChB,QAAQ,GAAG,IAAI,GAAG,EAAmB,CAAC;IACtC,MAAM,GAAuB,IAAI,CAAC;IAClC,MAAM,CAAsB;IAC5B,YAAY,GAA0B,IAAI,CAAC;IAC3C,cAAc,GAAuE,IAAI,CAAC;IAElG,YAAY,MAAqC;QAC/C,IAAI,CAAC,MAAM,GAAG;YACZ,IAAI,EAAE,MAAM,EAAE,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,IAAI,IAAI,CAAC;YACjE,YAAY,EAAE,MAAM,EAAE,YAAY,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,IAAI,IAAI,CAAC,GAAG,IAAI;SAC3F,CAAC;IACJ,CAAC;IAED,4CAA4C;IAC5C,SAAS,CAAC,OAAkE;QAC1E,IAAI,CAAC,cAAc,GAAG,OAAO,CAAC;IAChC,CAAC;IAED,4BAA4B;IAC5B,KAAK,CAAC,KAAK;QACT,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,EAAE;YACjD,eAAe;YACf,GAAG,CAAC,SAAS,CAAC,6BAA6B,EAAE,GAAG,CAAC,CAAC;YAClD,GAAG,CAAC,SAAS,CAAC,8BAA8B,EAAE,oBAAoB,CAAC,CAAC;YACpE,GAAG,CAAC,SAAS,CAAC,8BAA8B,EAAE,8BAA8B,CAAC,CAAC;YAC9E,GAAG,CAAC,SAAS,CAAC,+BAA+B,EAAE,gBAAgB,CAAC,CAAC;YAEjE,IAAI,GAAG,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;gBAC7B,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;gBACnB,GAAG,CAAC,GAAG,EAAE,CAAC;gBACV,OAAO;YACT,CAAC;YAED,IAAI,CAAC;gBACH,IAAI,GAAG,CAAC,MAAM,KAAK,MAAM,IAAI,GAAG,CAAC,GAAG,KAAK,MAAM,EAAE,CAAC;oBAChD,MAAM,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;gBAClC,CAAC;qBAAM,IAAI,GAAG,CAAC,MAAM,KAAK,KAAK,IAAI,GAAG,CAAC,GAAG,KAAK,UAAU,EAAE,CAAC;oBAC1D,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;gBACjC,CAAC;qBAAM,IAAI,GAAG,CAAC,MAAM,KAAK,KAAK,IAAI,GAAG,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;oBACzD,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,CAAC,CAAC;oBAC3D,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;gBAC1E,CAAC;qBAAM,CAAC;oBACN,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;oBACnB,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;gBACvB,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,KAAK,CACX,gCAAgC,EAChC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CACvD,CAAC;gBACF,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC;oBACrB,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,CAAC,CAAC;oBAC3D,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE,CAAC,CAAC,CAAC;gBAC9D,CAAC;YACH,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC1C,IAAI,CAAC,MAAO,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;gBACzC,OAAO,CAAC,KAAK,CAAC,qCAAqC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;gBACvE,OAAO,EAAE,CAAC;YACZ,CAAC,CAAC,CAAC;YACH,IAAI,CAAC,MAAO,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QACnC,CAAC,CAAC,CAAC;QAEH,4BAA4B;QAC5B,IAAI,CAAC,YAAY,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,sBAAsB,EAAE,EAAE,KAAK,CAAC,CAAC;QAC5E,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,CAAC;IAC5B,CAAC;IAED,2BAA2B;IAC3B,KAAK,CAAC,IAAI;QACR,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,aAAa,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YACjC,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QAC3B,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;gBAClC,IAAI,CAAC,MAAO,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,OAAO,EAAE,CAAC,CAAC;YACtC,CAAC,CAAC,CAAC;YACH,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;QACrB,CAAC;QACD,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC;QACtB,OAAO,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAC;IAC3C,CAAC;IAEO,KAAK,CAAC,UAAU,CAAC,GAAyB,EAAE,GAAwB;QAC1E,wBAAwB;QACxB,IAAI,SAAS,GAAG,GAAG,CAAC,OAAO,CAAC,gBAAgB,CAAuB,CAAC;QAEpE,IAAI,SAAS,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;YAC/C,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,CAAC,CAAC;YAC3D,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,oBAAoB,EAAE,CAAC,CAAC,CAAC;YACzD,OAAO;QACT,CAAC;QAED,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,SAAS,GAAG,UAAU,EAAE,CAAC;YACzB,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,EAAE;gBAC3B,EAAE,EAAE,SAAS;gBACb,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;gBACrB,YAAY,EAAE,IAAI,CAAC,GAAG,EAAE;gBACxB,WAAW,EAAE,IAAI;aAClB,CAAC,CAAC;QACL,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAE,CAAC;QAC9C,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAElC,YAAY;QACZ,MAAM,IAAI,GAAG,MAAM,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACzD,IAAI,IAAI,GAAG,EAAE,CAAC;YACd,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;gBAC/B,IAAI,IAAI,KAAK,CAAC;YAChB,CAAC,CAAC,CAAC;YACH,GAAG,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;YACnC,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC1B,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;YACzB,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,CAAC,CAAC;YAC3D,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,+BAA+B,EAAE,CAAC,CAAC,CAAC;YACpE,OAAO;QACT,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACjC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAE/D,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE;YACjB,cAAc,EAAE,kBAAkB;YAClC,gBAAgB,EAAE,SAAS;SAC5B,CAAC,CAAC;QACH,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC;IACpC,CAAC;IAEO,KAAK,CAAC,SAAS,CAAC,GAAyB,EAAE,GAAwB;QACzE,MAAM,SAAS,GAAG,GAAG,CAAC,OAAO,CAAC,gBAAgB,CAAuB,CAAC;QACtE,IAAI,CAAC,SAAS,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;YAChD,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YACnB,GAAG,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;YAC3B,OAAO;QACT,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAE,CAAC;QAC9C,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAClC,OAAO,CAAC,WAAW,GAAG,GAAG,CAAC;QAE1B,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE;YACjB,cAAc,EAAE,mBAAmB;YACnC,eAAe,EAAE,UAAU;YAC3B,YAAY,EAAE,YAAY;YAC1B,gBAAgB,EAAE,SAAS;SAC5B,CAAC,CAAC;QAEH,4BAA4B;QAC5B,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE;YACjC,IAAI,CAAC;gBACH,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;YAC1B,CAAC;YAAC,MAAM,CAAC;gBACP,aAAa,CAAC,SAAS,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC,EAAE,KAAK,CAAC,CAAC;QAEV,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;YACnB,aAAa,CAAC,SAAS,CAAC,CAAC;YACzB,IAAI,OAAO,CAAC,WAAW,KAAK,GAAG,EAAE,CAAC;gBAChC,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC;YAC7B,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAED,kCAAkC;IAClC,SAAS,CAAC,SAAiB,EAAE,KAAa,EAAE,IAAa;QACvD,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAC7C,IAAI,OAAO,EAAE,WAAW,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,OAAO,CAAC,WAAW,CAAC,KAAK,CAAC,UAAU,KAAK,WAAW,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAClF,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,KAAK,CACX,uCAAuC,SAAS,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAC9G,CAAC;gBACF,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC;YAC7B,CAAC;QACH,CAAC;IACH,CAAC;IAEO,sBAAsB;QAC5B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,KAAK,MAAM,CAAC,EAAE,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC1C,IAAI,GAAG,GAAG,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;gBAC1D,OAAO,CAAC,KAAK,CAAC,oCAAoC,EAAE,EAAE,CAAC,CAAC;gBACxD,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;oBACxB,IAAI,CAAC;wBACH,OAAO,CAAC,WAAW,CAAC,GAAG,EAAE,CAAC;oBAC5B,CAAC;oBAAC,MAAM,CAAC;wBACP,yBAAyB;oBAC3B,CAAC;gBACH,CAAC;gBACD,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;IACH,CAAC;IAED,eAAe;QACb,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;IAC5B,CAAC;CACF"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transport Layer Index
|
|
3
|
+
* Re-exports transport implementations
|
|
4
|
+
*
|
|
5
|
+
* @module server/transports
|
|
6
|
+
*/
|
|
7
|
+
export { HttpTransport, type HttpTransportConfig } from './http-transport.js';
|
|
8
|
+
export { SessionManager, sessionManager, type SessionState } from './session-manager.js';
|
|
9
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/server/transports/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,aAAa,EAAE,KAAK,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAC9E,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,KAAK,YAAY,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transport Layer Index
|
|
3
|
+
* Re-exports transport implementations
|
|
4
|
+
*
|
|
5
|
+
* @module server/transports
|
|
6
|
+
*/
|
|
7
|
+
export { HttpTransport } from './http-transport.js';
|
|
8
|
+
export { SessionManager, sessionManager } from './session-manager.js';
|
|
9
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/server/transports/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,aAAa,EAA4B,MAAM,qBAAqB,CAAC;AAC9E,OAAO,EAAE,cAAc,EAAE,cAAc,EAAqB,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session Manager - Scoped state per MCP session
|
|
3
|
+
*
|
|
4
|
+
* Each session tracks its own currentDatabase, userId, and active operations.
|
|
5
|
+
* Stdio transport uses a default "local" session.
|
|
6
|
+
*
|
|
7
|
+
* CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
|
|
8
|
+
*
|
|
9
|
+
* @module server/transports/session-manager
|
|
10
|
+
*/
|
|
11
|
+
export interface SessionState {
|
|
12
|
+
sessionId: string;
|
|
13
|
+
currentDatabase: string | null;
|
|
14
|
+
userId: string | null;
|
|
15
|
+
activeOperations: number;
|
|
16
|
+
createdAt: number;
|
|
17
|
+
lastActivity: number;
|
|
18
|
+
metadata: Record<string, unknown>;
|
|
19
|
+
}
|
|
20
|
+
export declare class SessionManager {
|
|
21
|
+
private sessions;
|
|
22
|
+
private static readonly LOCAL_SESSION_ID;
|
|
23
|
+
/** Get or create session state */
|
|
24
|
+
getSession(sessionId: string): SessionState;
|
|
25
|
+
/** Get the local (stdio) session */
|
|
26
|
+
getLocalSession(): SessionState;
|
|
27
|
+
/** Remove a session */
|
|
28
|
+
removeSession(sessionId: string): void;
|
|
29
|
+
/** Set user for a session */
|
|
30
|
+
setSessionUser(sessionId: string, userId: string): void;
|
|
31
|
+
/** Get all active sessions */
|
|
32
|
+
listSessions(): SessionState[];
|
|
33
|
+
/** Get session count */
|
|
34
|
+
getSessionCount(): number;
|
|
35
|
+
/** Clean up expired sessions */
|
|
36
|
+
cleanupExpired(ttlMs: number): number;
|
|
37
|
+
}
|
|
38
|
+
/** Singleton session manager */
|
|
39
|
+
export declare const sessionManager: SessionManager;
|
|
40
|
+
//# sourceMappingURL=session-manager.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"session-manager.d.ts","sourceRoot":"","sources":["../../../src/server/transports/session-manager.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAMH,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAMD,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAmC;IACnD,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAW;IAEnD,kCAAkC;IAClC,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,YAAY;IAkB3C,oCAAoC;IACpC,eAAe,IAAI,YAAY;IAI/B,uBAAuB;IACvB,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI;IAItC,6BAA6B;IAC7B,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI;IAKvD,8BAA8B;IAC9B,YAAY,IAAI,YAAY,EAAE;IAI9B,wBAAwB;IACxB,eAAe,IAAI,MAAM;IAIzB,gCAAgC;IAChC,cAAc,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;CAWtC;AAMD,gCAAgC;AAChC,eAAO,MAAM,cAAc,gBAAuB,CAAC"}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session Manager - Scoped state per MCP session
|
|
3
|
+
*
|
|
4
|
+
* Each session tracks its own currentDatabase, userId, and active operations.
|
|
5
|
+
* Stdio transport uses a default "local" session.
|
|
6
|
+
*
|
|
7
|
+
* CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
|
|
8
|
+
*
|
|
9
|
+
* @module server/transports/session-manager
|
|
10
|
+
*/
|
|
11
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
12
|
+
// SESSION MANAGER
|
|
13
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
14
|
+
export class SessionManager {
|
|
15
|
+
sessions = new Map();
|
|
16
|
+
static LOCAL_SESSION_ID = 'local';
|
|
17
|
+
/** Get or create session state */
|
|
18
|
+
getSession(sessionId) {
|
|
19
|
+
let session = this.sessions.get(sessionId);
|
|
20
|
+
if (!session) {
|
|
21
|
+
session = {
|
|
22
|
+
sessionId,
|
|
23
|
+
currentDatabase: null,
|
|
24
|
+
userId: null,
|
|
25
|
+
activeOperations: 0,
|
|
26
|
+
createdAt: Date.now(),
|
|
27
|
+
lastActivity: Date.now(),
|
|
28
|
+
metadata: {},
|
|
29
|
+
};
|
|
30
|
+
this.sessions.set(sessionId, session);
|
|
31
|
+
}
|
|
32
|
+
session.lastActivity = Date.now();
|
|
33
|
+
return session;
|
|
34
|
+
}
|
|
35
|
+
/** Get the local (stdio) session */
|
|
36
|
+
getLocalSession() {
|
|
37
|
+
return this.getSession(SessionManager.LOCAL_SESSION_ID);
|
|
38
|
+
}
|
|
39
|
+
/** Remove a session */
|
|
40
|
+
removeSession(sessionId) {
|
|
41
|
+
this.sessions.delete(sessionId);
|
|
42
|
+
}
|
|
43
|
+
/** Set user for a session */
|
|
44
|
+
setSessionUser(sessionId, userId) {
|
|
45
|
+
const session = this.getSession(sessionId);
|
|
46
|
+
session.userId = userId;
|
|
47
|
+
}
|
|
48
|
+
/** Get all active sessions */
|
|
49
|
+
listSessions() {
|
|
50
|
+
return Array.from(this.sessions.values());
|
|
51
|
+
}
|
|
52
|
+
/** Get session count */
|
|
53
|
+
getSessionCount() {
|
|
54
|
+
return this.sessions.size;
|
|
55
|
+
}
|
|
56
|
+
/** Clean up expired sessions */
|
|
57
|
+
cleanupExpired(ttlMs) {
|
|
58
|
+
const now = Date.now();
|
|
59
|
+
let cleaned = 0;
|
|
60
|
+
for (const [id, session] of this.sessions) {
|
|
61
|
+
if (id !== SessionManager.LOCAL_SESSION_ID && now - session.lastActivity > ttlMs) {
|
|
62
|
+
this.sessions.delete(id);
|
|
63
|
+
cleaned++;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return cleaned;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
70
|
+
// SINGLETON INSTANCE
|
|
71
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
72
|
+
/** Singleton session manager */
|
|
73
|
+
export const sessionManager = new SessionManager();
|
|
74
|
+
//# sourceMappingURL=session-manager.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"session-manager.js","sourceRoot":"","sources":["../../../src/server/transports/session-manager.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAgBH,kFAAkF;AAClF,kBAAkB;AAClB,kFAAkF;AAElF,MAAM,OAAO,cAAc;IACjB,QAAQ,GAAG,IAAI,GAAG,EAAwB,CAAC;IAC3C,MAAM,CAAU,gBAAgB,GAAG,OAAO,CAAC;IAEnD,kCAAkC;IAClC,UAAU,CAAC,SAAiB;QAC1B,IAAI,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAC3C,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO,GAAG;gBACR,SAAS;gBACT,eAAe,EAAE,IAAI;gBACrB,MAAM,EAAE,IAAI;gBACZ,gBAAgB,EAAE,CAAC;gBACnB,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;gBACrB,YAAY,EAAE,IAAI,CAAC,GAAG,EAAE;gBACxB,QAAQ,EAAE,EAAE;aACb,CAAC;YACF,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QACxC,CAAC;QACD,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAClC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,oCAAoC;IACpC,eAAe;QACb,OAAO,IAAI,CAAC,UAAU,CAAC,cAAc,CAAC,gBAAgB,CAAC,CAAC;IAC1D,CAAC;IAED,uBAAuB;IACvB,aAAa,CAAC,SAAiB;QAC7B,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAClC,CAAC;IAED,6BAA6B;IAC7B,cAAc,CAAC,SAAiB,EAAE,MAAc;QAC9C,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAC3C,OAAO,CAAC,MAAM,GAAG,MAAM,CAAC;IAC1B,CAAC;IAED,8BAA8B;IAC9B,YAAY;QACV,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;IAC5C,CAAC;IAED,wBAAwB;IACxB,eAAe;QACb,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;IAC5B,CAAC;IAED,gCAAgC;IAChC,cAAc,CAAC,KAAa;QAC1B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,MAAM,CAAC,EAAE,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC1C,IAAI,EAAE,KAAK,cAAc,CAAC,gBAAgB,IAAI,GAAG,GAAG,OAAO,CAAC,YAAY,GAAG,KAAK,EAAE,CAAC;gBACjF,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;gBACzB,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;;AAGH,kFAAkF;AAClF,qBAAqB;AACrB,kFAAkF;AAElF,gCAAgC;AAChC,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,cAAc,EAAE,CAAC"}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP Server Type Definitions
|
|
3
|
+
*
|
|
4
|
+
* Defines interfaces for tool results, server configuration, and state.
|
|
5
|
+
*
|
|
6
|
+
* @module server/types
|
|
7
|
+
*/
|
|
8
|
+
import type { DatabaseService } from '../services/storage/database/index.js';
|
|
9
|
+
/**
|
|
10
|
+
* Successful tool result
|
|
11
|
+
*/
|
|
12
|
+
interface ToolResultSuccess<T = unknown> {
|
|
13
|
+
success: true;
|
|
14
|
+
data: T;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Helper to create success result
|
|
18
|
+
*/
|
|
19
|
+
export declare function successResult<T>(data: T): ToolResultSuccess<T>;
|
|
20
|
+
/**
|
|
21
|
+
* OCR processing mode
|
|
22
|
+
*/
|
|
23
|
+
export type OCRMode = 'fast' | 'balanced' | 'accurate';
|
|
24
|
+
/**
|
|
25
|
+
* Image optimization configuration
|
|
26
|
+
*/
|
|
27
|
+
export interface ImageOptimizationConfig {
|
|
28
|
+
/** Enable image optimization (default: true) */
|
|
29
|
+
enabled: boolean;
|
|
30
|
+
/** Maximum width for OCR resize - Datalab API limit (default: 4800) */
|
|
31
|
+
ocrMaxWidth: number;
|
|
32
|
+
/** Maximum dimension for VLM resize - optimize tokens (default: 2048) */
|
|
33
|
+
vlmMaxDimension: number;
|
|
34
|
+
/** Skip images smaller than this for VLM (default: 50) */
|
|
35
|
+
vlmSkipBelowSize: number;
|
|
36
|
+
/** Minimum relevance score for VLM processing (default: 0.3) */
|
|
37
|
+
vlmMinRelevance: number;
|
|
38
|
+
/** Skip images predicted as logos/icons (default: true) */
|
|
39
|
+
vlmSkipLogosIcons: boolean;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Server configuration options
|
|
43
|
+
*/
|
|
44
|
+
export interface ServerConfig {
|
|
45
|
+
/** Default path for database storage */
|
|
46
|
+
defaultStoragePath: string;
|
|
47
|
+
/** Default OCR processing mode */
|
|
48
|
+
defaultOCRMode: OCRMode;
|
|
49
|
+
/** Maximum concurrent OCR operations */
|
|
50
|
+
maxConcurrent: number;
|
|
51
|
+
/** Batch size for embedding generation */
|
|
52
|
+
embeddingBatchSize: number;
|
|
53
|
+
/** GPU device for embedding generation */
|
|
54
|
+
embeddingDevice: string;
|
|
55
|
+
/** Chunk size in characters */
|
|
56
|
+
chunkSize: number;
|
|
57
|
+
/** Chunk overlap percentage (0-50) */
|
|
58
|
+
chunkOverlapPercent: number;
|
|
59
|
+
/** Maximum chunk size for oversized sections (default: 8000) */
|
|
60
|
+
maxChunkSize: number;
|
|
61
|
+
/** Image optimization settings */
|
|
62
|
+
imageOptimization: ImageOptimizationConfig;
|
|
63
|
+
/** Enable auto-clustering after processing */
|
|
64
|
+
autoClusterEnabled?: boolean;
|
|
65
|
+
/** Minimum documents before auto-clustering triggers */
|
|
66
|
+
autoClusterThreshold?: number;
|
|
67
|
+
/** Algorithm for auto-clustering */
|
|
68
|
+
autoClusterAlgorithm?: 'hdbscan' | 'agglomerative' | 'kmeans';
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Server state tracking
|
|
72
|
+
*/
|
|
73
|
+
export interface ServerState {
|
|
74
|
+
/** Currently selected database instance */
|
|
75
|
+
currentDatabase: DatabaseService | null;
|
|
76
|
+
/** Name of the currently selected database */
|
|
77
|
+
currentDatabaseName: string | null;
|
|
78
|
+
/** Server configuration */
|
|
79
|
+
config: ServerConfig;
|
|
80
|
+
}
|
|
81
|
+
export {};
|
|
82
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/server/types.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uCAAuC,CAAC;AAM7E;;GAEG;AACH,UAAU,iBAAiB,CAAC,CAAC,GAAG,OAAO;IACrC,OAAO,EAAE,IAAI,CAAC;IACd,IAAI,EAAE,CAAC,CAAC;CACT;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,GAAG,iBAAiB,CAAC,CAAC,CAAC,CAE9D;AAMD;;GAEG;AACH,MAAM,MAAM,OAAO,GAAG,MAAM,GAAG,UAAU,GAAG,UAAU,CAAC;AAEvD;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC,gDAAgD;IAChD,OAAO,EAAE,OAAO,CAAC;IAEjB,uEAAuE;IACvE,WAAW,EAAE,MAAM,CAAC;IAEpB,yEAAyE;IACzE,eAAe,EAAE,MAAM,CAAC;IAExB,0DAA0D;IAC1D,gBAAgB,EAAE,MAAM,CAAC;IAEzB,gEAAgE;IAChE,eAAe,EAAE,MAAM,CAAC;IAExB,2DAA2D;IAC3D,iBAAiB,EAAE,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,wCAAwC;IACxC,kBAAkB,EAAE,MAAM,CAAC;IAE3B,kCAAkC;IAClC,cAAc,EAAE,OAAO,CAAC;IAExB,wCAAwC;IACxC,aAAa,EAAE,MAAM,CAAC;IAEtB,0CAA0C;IAC1C,kBAAkB,EAAE,MAAM,CAAC;IAE3B,0CAA0C;IAC1C,eAAe,EAAE,MAAM,CAAC;IAExB,+BAA+B;IAC/B,SAAS,EAAE,MAAM,CAAC;IAElB,sCAAsC;IACtC,mBAAmB,EAAE,MAAM,CAAC;IAE5B,gEAAgE;IAChE,YAAY,EAAE,MAAM,CAAC;IAErB,kCAAkC;IAClC,iBAAiB,EAAE,uBAAuB,CAAC;IAE3C,8CAA8C;IAC9C,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAE7B,wDAAwD;IACxD,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAE9B,oCAAoC;IACpC,oBAAoB,CAAC,EAAE,SAAS,GAAG,eAAe,GAAG,QAAQ,CAAC;CAC/D;AAMD;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,2CAA2C;IAC3C,eAAe,EAAE,eAAe,GAAG,IAAI,CAAC;IAExC,8CAA8C;IAC9C,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;IAEnC,2BAA2B;IAC3B,MAAM,EAAE,YAAY,CAAC;CACtB"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP Server Type Definitions
|
|
3
|
+
*
|
|
4
|
+
* Defines interfaces for tool results, server configuration, and state.
|
|
5
|
+
*
|
|
6
|
+
* @module server/types
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Helper to create success result
|
|
10
|
+
*/
|
|
11
|
+
export function successResult(data) {
|
|
12
|
+
return { success: true, data };
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/server/types.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAgBH;;GAEG;AACH,MAAM,UAAU,aAAa,CAAI,IAAO;IACtC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;AACjC,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Audit Logger - Records user actions in audit_log table
|
|
3
|
+
*
|
|
4
|
+
* Call this from tool handlers to track mutations.
|
|
5
|
+
* Audit logging is best-effort: failures are logged but never break the main operation.
|
|
6
|
+
*
|
|
7
|
+
* CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
|
|
8
|
+
*
|
|
9
|
+
* @module services/audit
|
|
10
|
+
*/
|
|
11
|
+
/**
|
|
12
|
+
* Log an action to the audit_log table.
|
|
13
|
+
*
|
|
14
|
+
* Best-effort: if no database is selected or the insert fails,
|
|
15
|
+
* the error is logged to stderr and the function returns silently.
|
|
16
|
+
* Audit logging should NEVER break the main operation.
|
|
17
|
+
*/
|
|
18
|
+
export declare function logAudit(params: {
|
|
19
|
+
userId?: string | null;
|
|
20
|
+
sessionId?: string | null;
|
|
21
|
+
action: string;
|
|
22
|
+
entityType?: string;
|
|
23
|
+
entityId?: string;
|
|
24
|
+
details?: Record<string, unknown>;
|
|
25
|
+
}): void;
|
|
26
|
+
//# sourceMappingURL=audit.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audit.d.ts","sourceRoot":"","sources":["../../src/services/audit.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AASH;;;;;;GAMG;AACH,wBAAgB,QAAQ,CAAC,MAAM,EAAE;IAC/B,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC,GAAG,IAAI,CAoBP"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Audit Logger - Records user actions in audit_log table
|
|
3
|
+
*
|
|
4
|
+
* Call this from tool handlers to track mutations.
|
|
5
|
+
* Audit logging is best-effort: failures are logged but never break the main operation.
|
|
6
|
+
*
|
|
7
|
+
* CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
|
|
8
|
+
*
|
|
9
|
+
* @module services/audit
|
|
10
|
+
*/
|
|
11
|
+
import { hasDatabase, requireDatabase } from '../server/state.js';
|
|
12
|
+
import { insertAuditLog } from './storage/database/user-operations.js';
|
|
13
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
14
|
+
// AUDIT LOGGING
|
|
15
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
16
|
+
/**
|
|
17
|
+
* Log an action to the audit_log table.
|
|
18
|
+
*
|
|
19
|
+
* Best-effort: if no database is selected or the insert fails,
|
|
20
|
+
* the error is logged to stderr and the function returns silently.
|
|
21
|
+
* Audit logging should NEVER break the main operation.
|
|
22
|
+
*/
|
|
23
|
+
export function logAudit(params) {
|
|
24
|
+
if (!hasDatabase())
|
|
25
|
+
return;
|
|
26
|
+
try {
|
|
27
|
+
const { db } = requireDatabase();
|
|
28
|
+
const conn = db.getConnection();
|
|
29
|
+
insertAuditLog(conn, {
|
|
30
|
+
user_id: params.userId ?? null,
|
|
31
|
+
session_id: params.sessionId ?? null,
|
|
32
|
+
action: params.action,
|
|
33
|
+
entity_type: params.entityType ?? null,
|
|
34
|
+
entity_id: params.entityId ?? null,
|
|
35
|
+
details_json: JSON.stringify(params.details ?? {}),
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
catch (error) {
|
|
39
|
+
// Audit logging should never break the main operation
|
|
40
|
+
console.error('[Audit] Failed to log:', error instanceof Error ? error.message : String(error));
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=audit.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audit.js","sourceRoot":"","sources":["../../src/services/audit.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,MAAM,uCAAuC,CAAC;AAEvE,kFAAkF;AAClF,gBAAgB;AAChB,kFAAkF;AAElF;;;;;;GAMG;AACH,MAAM,UAAU,QAAQ,CAAC,MAOxB;IACC,IAAI,CAAC,WAAW,EAAE;QAAE,OAAO;IAC3B,IAAI,CAAC;QACH,MAAM,EAAE,EAAE,EAAE,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,IAAI,GAAG,EAAE,CAAC,aAAa,EAAE,CAAC;QAChC,cAAc,CAAC,IAAI,EAAE;YACnB,OAAO,EAAE,MAAM,CAAC,MAAM,IAAI,IAAI;YAC9B,UAAU,EAAE,MAAM,CAAC,SAAS,IAAI,IAAI;YACpC,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,WAAW,EAAE,MAAM,CAAC,UAAU,IAAI,IAAI;YACtC,SAAS,EAAE,MAAM,CAAC,QAAQ,IAAI,IAAI;YAClC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC;SACnD,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,sDAAsD;QACtD,OAAO,CAAC,KAAK,CACX,wBAAwB,EACxB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CACvD,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunk Deduplication Service (HE-4 / Task 7.3)
|
|
3
|
+
*
|
|
4
|
+
* Identifies exact duplicate chunks by text_hash for deduplication
|
|
5
|
+
* in search results and storage analysis.
|
|
6
|
+
*
|
|
7
|
+
* @module services/chunking/chunk-deduplicator
|
|
8
|
+
*/
|
|
9
|
+
import type { DatabaseService } from '../storage/database/index.js';
|
|
10
|
+
/** Result of a chunk deduplication analysis */
|
|
11
|
+
export interface DeduplicationResult {
|
|
12
|
+
totalChunks: number;
|
|
13
|
+
uniqueChunks: number;
|
|
14
|
+
duplicateGroups: Array<{
|
|
15
|
+
hash: string;
|
|
16
|
+
count: number;
|
|
17
|
+
chunkIds: string[];
|
|
18
|
+
representativeChunkId: string;
|
|
19
|
+
}>;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Find exact duplicate chunks by text_hash.
|
|
23
|
+
*
|
|
24
|
+
* Groups chunks that share the same text_hash and identifies
|
|
25
|
+
* duplicate groups (2+ chunks with the same hash). Optionally
|
|
26
|
+
* scoped to a single document.
|
|
27
|
+
*
|
|
28
|
+
* @param db - Database service instance
|
|
29
|
+
* @param documentId - Optional document ID to scope analysis
|
|
30
|
+
* @returns DeduplicationResult with total, unique counts, and duplicate groups
|
|
31
|
+
*/
|
|
32
|
+
export declare function findExactDuplicateChunks(db: DatabaseService, documentId?: string): DeduplicationResult;
|
|
33
|
+
//# sourceMappingURL=chunk-deduplicator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-deduplicator.d.ts","sourceRoot":"","sources":["../../../src/services/chunking/chunk-deduplicator.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAC;AAEpE,+CAA+C;AAC/C,MAAM,WAAW,mBAAmB;IAClC,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,KAAK,CAAC;QACrB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,MAAM,EAAE,CAAC;QACnB,qBAAqB,EAAE,MAAM,CAAC;KAC/B,CAAC,CAAC;CACJ;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,wBAAwB,CACtC,EAAE,EAAE,eAAe,EACnB,UAAU,CAAC,EAAE,MAAM,GAClB,mBAAmB,CAoCrB"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunk Deduplication Service (HE-4 / Task 7.3)
|
|
3
|
+
*
|
|
4
|
+
* Identifies exact duplicate chunks by text_hash for deduplication
|
|
5
|
+
* in search results and storage analysis.
|
|
6
|
+
*
|
|
7
|
+
* @module services/chunking/chunk-deduplicator
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Find exact duplicate chunks by text_hash.
|
|
11
|
+
*
|
|
12
|
+
* Groups chunks that share the same text_hash and identifies
|
|
13
|
+
* duplicate groups (2+ chunks with the same hash). Optionally
|
|
14
|
+
* scoped to a single document.
|
|
15
|
+
*
|
|
16
|
+
* @param db - Database service instance
|
|
17
|
+
* @param documentId - Optional document ID to scope analysis
|
|
18
|
+
* @returns DeduplicationResult with total, unique counts, and duplicate groups
|
|
19
|
+
*/
|
|
20
|
+
export function findExactDuplicateChunks(db, documentId) {
|
|
21
|
+
const conn = db.getConnection();
|
|
22
|
+
const filter = documentId ? 'WHERE document_id = ?' : '';
|
|
23
|
+
const params = documentId ? [documentId] : [];
|
|
24
|
+
const groups = conn.prepare(`SELECT text_hash, COUNT(*) as count, GROUP_CONCAT(id) as chunk_ids
|
|
25
|
+
FROM chunks ${filter}
|
|
26
|
+
GROUP BY text_hash
|
|
27
|
+
HAVING COUNT(*) > 1
|
|
28
|
+
ORDER BY count DESC`).all(...params);
|
|
29
|
+
const duplicateGroups = groups.map(g => {
|
|
30
|
+
const chunkIds = g.chunk_ids.split(',');
|
|
31
|
+
return {
|
|
32
|
+
hash: g.text_hash,
|
|
33
|
+
count: g.count,
|
|
34
|
+
chunkIds,
|
|
35
|
+
representativeChunkId: chunkIds[0],
|
|
36
|
+
};
|
|
37
|
+
});
|
|
38
|
+
const totalRow = conn.prepare(`SELECT COUNT(*) as c FROM chunks ${filter}`).get(...params);
|
|
39
|
+
const uniqueRow = conn.prepare(`SELECT COUNT(DISTINCT text_hash) as c FROM chunks ${filter}`).get(...params);
|
|
40
|
+
return {
|
|
41
|
+
totalChunks: totalRow.c,
|
|
42
|
+
uniqueChunks: uniqueRow.c,
|
|
43
|
+
duplicateGroups,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
//# sourceMappingURL=chunk-deduplicator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-deduplicator.js","sourceRoot":"","sources":["../../../src/services/chunking/chunk-deduplicator.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAgBH;;;;;;;;;;GAUG;AACH,MAAM,UAAU,wBAAwB,CACtC,EAAmB,EACnB,UAAmB;IAEnB,MAAM,IAAI,GAAG,EAAE,CAAC,aAAa,EAAE,CAAC;IAChC,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC,uBAAuB,CAAC,CAAC,CAAC,EAAE,CAAC;IACzD,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAE9C,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CACzB;mBACe,MAAM;;;yBAGA,CACtB,CAAC,GAAG,CAAC,GAAG,MAAM,CAAmE,CAAC;IAEnF,MAAM,eAAe,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;QACrC,MAAM,QAAQ,GAAG,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACxC,OAAO;YACL,IAAI,EAAE,CAAC,CAAC,SAAS;YACjB,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,QAAQ;YACR,qBAAqB,EAAE,QAAQ,CAAC,CAAC,CAAC;SACnC,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAC3B,oCAAoC,MAAM,EAAE,CAC7C,CAAC,GAAG,CAAC,GAAG,MAAM,CAAkB,CAAC;IAElC,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAC5B,qDAAqD,MAAM,EAAE,CAC9D,CAAC,GAAG,CAAC,GAAG,MAAM,CAAkB,CAAC;IAElC,OAAO;QACL,WAAW,EAAE,QAAQ,CAAC,CAAC;QACvB,YAAY,EAAE,SAAS,CAAC,CAAC;QACzB,eAAe;KAChB,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Heading-Only Chunk Merger for Section-Aware Chunking
|
|
3
|
+
*
|
|
4
|
+
* Post-processing pass that merges tiny heading-only chunks with their
|
|
5
|
+
* nearest neighbor to improve embedding quality. Heading-only chunks
|
|
6
|
+
* (e.g., "## ARTICLE 5") produce poor embeddings because they lack
|
|
7
|
+
* semantic content.
|
|
8
|
+
*
|
|
9
|
+
* @module services/chunking/chunk-merger
|
|
10
|
+
*/
|
|
11
|
+
import { ChunkResult } from '../../models/chunk.js';
|
|
12
|
+
/**
|
|
13
|
+
* Merge heading-only chunks that are below the minimum size threshold.
|
|
14
|
+
*
|
|
15
|
+
* Strategy:
|
|
16
|
+
* - If a next chunk exists, merge heading into next (prepend)
|
|
17
|
+
* - If no next chunk (last in array), merge into previous (append)
|
|
18
|
+
* - Consecutive heading-only chunks cascade-merge via while loop
|
|
19
|
+
* - Re-indexes all chunks after merging
|
|
20
|
+
*
|
|
21
|
+
* @param chunks - Array of ChunkResult (not mutated; returns new array)
|
|
22
|
+
* @param minChunkSize - Minimum character threshold (default: 100)
|
|
23
|
+
* @returns New array with heading-only chunks merged
|
|
24
|
+
*/
|
|
25
|
+
export declare function mergeHeadingOnlyChunks(chunks: ChunkResult[], minChunkSize?: number): ChunkResult[];
|
|
26
|
+
//# sourceMappingURL=chunk-merger.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-merger.d.ts","sourceRoot":"","sources":["../../../src/services/chunking/chunk-merger.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AA2CpD;;;;;;;;;;;;GAYG;AACH,wBAAgB,sBAAsB,CACpC,MAAM,EAAE,WAAW,EAAE,EACrB,YAAY,GAAE,MAAY,GACzB,WAAW,EAAE,CAsCf"}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Heading-Only Chunk Merger for Section-Aware Chunking
|
|
3
|
+
*
|
|
4
|
+
* Post-processing pass that merges tiny heading-only chunks with their
|
|
5
|
+
* nearest neighbor to improve embedding quality. Heading-only chunks
|
|
6
|
+
* (e.g., "## ARTICLE 5") produce poor embeddings because they lack
|
|
7
|
+
* semantic content.
|
|
8
|
+
*
|
|
9
|
+
* @module services/chunking/chunk-merger
|
|
10
|
+
*/
|
|
11
|
+
/**
|
|
12
|
+
* Check if a chunk is heading-only and below the size threshold.
|
|
13
|
+
*/
|
|
14
|
+
function isHeadingOnlyTiny(chunk, minChunkSize) {
|
|
15
|
+
return (chunk.contentTypes.length === 1 &&
|
|
16
|
+
chunk.contentTypes[0] === 'heading' &&
|
|
17
|
+
chunk.text.trim().length < minChunkSize);
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Merge two chunks: prepend `source` text before `target`.
|
|
21
|
+
* Updates offsets, contentTypes, and heading context.
|
|
22
|
+
*/
|
|
23
|
+
function mergeIntoNext(source, target) {
|
|
24
|
+
target.text = source.text + '\n\n' + target.text;
|
|
25
|
+
target.startOffset = Math.min(source.startOffset, target.startOffset);
|
|
26
|
+
target.endOffset = Math.max(source.endOffset, target.endOffset);
|
|
27
|
+
target.headingContext = source.headingContext ?? target.headingContext;
|
|
28
|
+
target.headingLevel = source.headingLevel ?? target.headingLevel;
|
|
29
|
+
target.sectionPath = source.sectionPath ?? target.sectionPath;
|
|
30
|
+
target.pageNumber = source.pageNumber ?? target.pageNumber;
|
|
31
|
+
// Merge content types (deduplicated)
|
|
32
|
+
const types = new Set([...source.contentTypes, ...target.contentTypes]);
|
|
33
|
+
target.contentTypes = Array.from(types);
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Merge two chunks: append `source` text after `target`.
|
|
37
|
+
*/
|
|
38
|
+
function mergeIntoPrevious(target, source) {
|
|
39
|
+
target.text = target.text + '\n\n' + source.text;
|
|
40
|
+
target.endOffset = Math.max(target.endOffset, source.endOffset);
|
|
41
|
+
// Merge content types (deduplicated)
|
|
42
|
+
const types = new Set([...target.contentTypes, ...source.contentTypes]);
|
|
43
|
+
target.contentTypes = Array.from(types);
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Merge heading-only chunks that are below the minimum size threshold.
|
|
47
|
+
*
|
|
48
|
+
* Strategy:
|
|
49
|
+
* - If a next chunk exists, merge heading into next (prepend)
|
|
50
|
+
* - If no next chunk (last in array), merge into previous (append)
|
|
51
|
+
* - Consecutive heading-only chunks cascade-merge via while loop
|
|
52
|
+
* - Re-indexes all chunks after merging
|
|
53
|
+
*
|
|
54
|
+
* @param chunks - Array of ChunkResult (not mutated; returns new array)
|
|
55
|
+
* @param minChunkSize - Minimum character threshold (default: 100)
|
|
56
|
+
* @returns New array with heading-only chunks merged
|
|
57
|
+
*/
|
|
58
|
+
export function mergeHeadingOnlyChunks(chunks, minChunkSize = 100) {
|
|
59
|
+
if (chunks.length <= 1) {
|
|
60
|
+
return chunks.map(c => ({ ...c, contentTypes: [...c.contentTypes] }));
|
|
61
|
+
}
|
|
62
|
+
// Work on a shallow copy so we can splice without affecting the original
|
|
63
|
+
const result = chunks.map(c => ({ ...c, contentTypes: [...c.contentTypes] }));
|
|
64
|
+
let i = 0;
|
|
65
|
+
while (i < result.length) {
|
|
66
|
+
if (!isHeadingOnlyTiny(result[i], minChunkSize)) {
|
|
67
|
+
i++;
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
if (i < result.length - 1) {
|
|
71
|
+
// Merge into next chunk
|
|
72
|
+
mergeIntoNext(result[i], result[i + 1]);
|
|
73
|
+
result.splice(i, 1);
|
|
74
|
+
// Don't increment i - check the merged result again (cascade)
|
|
75
|
+
}
|
|
76
|
+
else if (i > 0) {
|
|
77
|
+
// Last chunk - merge into previous
|
|
78
|
+
mergeIntoPrevious(result[i - 1], result[i]);
|
|
79
|
+
result.splice(i, 1);
|
|
80
|
+
// Move back to check if previous is now also heading-only-tiny
|
|
81
|
+
i = Math.max(0, i - 1);
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
// Single chunk remaining - nothing to merge with
|
|
85
|
+
i++;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// Re-index all chunks
|
|
89
|
+
for (let idx = 0; idx < result.length; idx++) {
|
|
90
|
+
result[idx].index = idx;
|
|
91
|
+
}
|
|
92
|
+
return result;
|
|
93
|
+
}
|
|
94
|
+
//# sourceMappingURL=chunk-merger.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-merger.js","sourceRoot":"","sources":["../../../src/services/chunking/chunk-merger.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAIH;;GAEG;AACH,SAAS,iBAAiB,CAAC,KAAkB,EAAE,YAAoB;IACjE,OAAO,CACL,KAAK,CAAC,YAAY,CAAC,MAAM,KAAK,CAAC;QAC/B,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,KAAK,SAAS;QACnC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,YAAY,CACxC,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,MAAmB,EAAE,MAAmB;IAC7D,MAAM,CAAC,IAAI,GAAG,MAAM,CAAC,IAAI,GAAG,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC;IACjD,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,WAAW,CAAC,CAAC;IACtE,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC;IAChE,MAAM,CAAC,cAAc,GAAG,MAAM,CAAC,cAAc,IAAI,MAAM,CAAC,cAAc,CAAC;IACvE,MAAM,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,MAAM,CAAC,YAAY,CAAC;IACjE,MAAM,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,IAAI,MAAM,CAAC,WAAW,CAAC;IAC9D,MAAM,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,IAAI,MAAM,CAAC,UAAU,CAAC;IAE3D,qCAAqC;IACrC,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,YAAY,EAAE,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC;IACxE,MAAM,CAAC,YAAY,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AAC1C,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,MAAmB,EAAE,MAAmB;IACjE,MAAM,CAAC,IAAI,GAAG,MAAM,CAAC,IAAI,GAAG,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC;IACjD,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC;IAEhE,qCAAqC;IACrC,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,YAAY,EAAE,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC;IACxE,MAAM,CAAC,YAAY,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AAC1C,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,sBAAsB,CACpC,MAAqB,EACrB,eAAuB,GAAG;IAE1B,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACvB,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,YAAY,EAAE,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC;IACxE,CAAC;IAED,yEAAyE;IACzE,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,YAAY,EAAE,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC;IAC9E,IAAI,CAAC,GAAG,CAAC,CAAC;IAEV,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC;QACzB,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,YAAY,CAAC,EAAE,CAAC;YAChD,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,wBAAwB;YACxB,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACxC,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YACpB,8DAA8D;QAChE,CAAC;aAAM,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YACjB,mCAAmC;YACnC,iBAAiB,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YACpB,+DAA+D;YAC/D,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;QACzB,CAAC;aAAM,CAAC;YACN,iDAAiD;YACjD,CAAC,EAAE,CAAC;QACN,CAAC;IACH,CAAC;IAED,sBAAsB;IACtB,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;QAC7C,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG,GAAG,CAAC;IAC1B,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid Section-Aware Chunking Service for OCR Provenance MCP System
|
|
3
|
+
*
|
|
4
|
+
* Uses markdown structure (headings, paragraphs, tables), JSON block data
|
|
5
|
+
* (for atomic region detection), and page offsets (for page tracking) to
|
|
6
|
+
* produce semantically coherent chunks with provenance records (chain_depth=2).
|
|
7
|
+
*
|
|
8
|
+
* @module services/chunking/chunker
|
|
9
|
+
*/
|
|
10
|
+
import { ChunkResult, ChunkingConfig } from '../../models/chunk.js';
|
|
11
|
+
import { PageOffset } from '../../models/document.js';
|
|
12
|
+
import { CreateProvenanceParams } from '../../models/provenance.js';
|
|
13
|
+
/**
|
|
14
|
+
* Parameters for creating chunk provenance record
|
|
15
|
+
*/
|
|
16
|
+
export interface ChunkProvenanceParams {
|
|
17
|
+
/** The chunk result containing text and position info */
|
|
18
|
+
chunk: ChunkResult;
|
|
19
|
+
/** Pre-computed hash of chunk.text (sha256:...) */
|
|
20
|
+
chunkTextHash: string;
|
|
21
|
+
/** Parent provenance ID (OCR result, chain_depth=1) */
|
|
22
|
+
ocrProvenanceId: string;
|
|
23
|
+
/** Root document provenance ID (chain_depth=0) */
|
|
24
|
+
documentProvenanceId: string;
|
|
25
|
+
/** Hash of full OCR text (input_hash) */
|
|
26
|
+
ocrContentHash: string;
|
|
27
|
+
/** Hash of original file */
|
|
28
|
+
fileHash: string;
|
|
29
|
+
/** Total number of chunks produced */
|
|
30
|
+
totalChunks: number;
|
|
31
|
+
/** Processing duration in milliseconds */
|
|
32
|
+
processingDurationMs?: number;
|
|
33
|
+
/** Chunking config used (defaults to DEFAULT_CHUNKING_CONFIG) */
|
|
34
|
+
config?: ChunkingConfig;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Hybrid section-aware chunking.
|
|
38
|
+
*
|
|
39
|
+
* Uses markdown structure (headings, paragraphs, tables), JSON block data
|
|
40
|
+
* (for atomic region detection), and page offsets (for page tracking) to
|
|
41
|
+
* produce semantically coherent chunks.
|
|
42
|
+
*
|
|
43
|
+
* @param text - Full markdown text from OCR output
|
|
44
|
+
* @param pageOffsets - Page offset information for page number assignment
|
|
45
|
+
* @param jsonBlocks - JSON block hierarchy from Datalab OCR (may be null)
|
|
46
|
+
* @param config - Chunking configuration (default: 2000 chars, 10% overlap)
|
|
47
|
+
* @returns Array of ChunkResult with section context, content types, and page info
|
|
48
|
+
*/
|
|
49
|
+
export declare function chunkHybridSectionAware(text: string, pageOffsets: PageOffset[], jsonBlocks: Record<string, unknown> | null, config?: ChunkingConfig): ChunkResult[];
|
|
50
|
+
/**
|
|
51
|
+
* Create provenance parameters for a chunk.
|
|
52
|
+
*
|
|
53
|
+
* Generates a CreateProvenanceParams object suitable for creating
|
|
54
|
+
* a CHUNK provenance record (chain_depth=2).
|
|
55
|
+
*
|
|
56
|
+
* @param params - Chunk provenance parameters
|
|
57
|
+
* @returns CreateProvenanceParams ready for insertProvenance
|
|
58
|
+
*/
|
|
59
|
+
export declare function createChunkProvenance(params: ChunkProvenanceParams): CreateProvenanceParams;
|
|
60
|
+
export type { ChunkResult, ChunkingConfig } from '../../models/chunk.js';
|
|
61
|
+
export { DEFAULT_CHUNKING_CONFIG } from '../../models/chunk.js';
|
|
62
|
+
//# sourceMappingURL=chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../../src/services/chunking/chunker.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EACL,WAAW,EACX,cAAc,EAGf,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,UAAU,EAAE,MAAM,0BAA0B,CAAC;AACtD,OAAO,EAIL,sBAAsB,EACvB,MAAM,4BAA4B,CAAC;AAqDpC;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,yDAAyD;IACzD,KAAK,EAAE,WAAW,CAAC;IACnB,mDAAmD;IACnD,aAAa,EAAE,MAAM,CAAC;IACtB,uDAAuD;IACvD,eAAe,EAAE,MAAM,CAAC;IACxB,kDAAkD;IAClD,oBAAoB,EAAE,MAAM,CAAC;IAC7B,yCAAyC;IACzC,cAAc,EAAE,MAAM,CAAC;IACvB,4BAA4B;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,sCAAsC;IACtC,WAAW,EAAE,MAAM,CAAC;IACpB,0CAA0C;IAC1C,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,iEAAiE;IACjE,MAAM,CAAC,EAAE,cAAc,CAAC;CACzB;AA4ID;;;;;;;;;;;;GAYG;AACH,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,MAAM,EACZ,WAAW,EAAE,UAAU,EAAE,EACzB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,EAC1C,MAAM,GAAE,cAAwC,GAC/C,WAAW,EAAE,CA4Zf;AAMD;;;;;;;;GAQG;AACH,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,qBAAqB,GAAG,sBAAsB,CAuD3F;AAGD,YAAY,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACzE,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC"}
|