ocr-provenance-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocr-provenance-mcp might be problematic. Click here for more details.
- package/.env.example +55 -0
- package/LICENSE +78 -0
- package/README.md +1154 -0
- package/dist/bin-http.d.ts +24 -0
- package/dist/bin-http.d.ts.map +1 -0
- package/dist/bin-http.js +275 -0
- package/dist/bin-http.js.map +1 -0
- package/dist/bin-setup.d.ts +11 -0
- package/dist/bin-setup.d.ts.map +1 -0
- package/dist/bin-setup.js +610 -0
- package/dist/bin-setup.js.map +1 -0
- package/dist/bin.d.ts +16 -0
- package/dist/bin.d.ts.map +1 -0
- package/dist/bin.js +16 -0
- package/dist/bin.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +90 -0
- package/dist/index.js.map +1 -0
- package/dist/models/chunk.d.ts +136 -0
- package/dist/models/chunk.d.ts.map +1 -0
- package/dist/models/chunk.js +27 -0
- package/dist/models/chunk.js.map +1 -0
- package/dist/models/cluster.d.ts +79 -0
- package/dist/models/cluster.d.ts.map +1 -0
- package/dist/models/cluster.js +10 -0
- package/dist/models/cluster.js.map +1 -0
- package/dist/models/comparison.d.ts +62 -0
- package/dist/models/comparison.d.ts.map +1 -0
- package/dist/models/comparison.js +8 -0
- package/dist/models/comparison.js.map +1 -0
- package/dist/models/document.d.ts +104 -0
- package/dist/models/document.d.ts.map +1 -0
- package/dist/models/document.js +15 -0
- package/dist/models/document.js.map +1 -0
- package/dist/models/embedding.d.ts +87 -0
- package/dist/models/embedding.d.ts.map +1 -0
- package/dist/models/embedding.js +23 -0
- package/dist/models/embedding.js.map +1 -0
- package/dist/models/extraction.d.ts +15 -0
- package/dist/models/extraction.d.ts.map +1 -0
- package/dist/models/extraction.js +2 -0
- package/dist/models/extraction.js.map +1 -0
- package/dist/models/form-fill.d.ts +23 -0
- package/dist/models/form-fill.d.ts.map +1 -0
- package/dist/models/form-fill.js +2 -0
- package/dist/models/form-fill.js.map +1 -0
- package/dist/models/image.d.ts +177 -0
- package/dist/models/image.d.ts.map +1 -0
- package/dist/models/image.js +8 -0
- package/dist/models/image.js.map +1 -0
- package/dist/models/index.d.ts +14 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +22 -0
- package/dist/models/index.js.map +1 -0
- package/dist/models/provenance.d.ts +174 -0
- package/dist/models/provenance.d.ts.map +1 -0
- package/dist/models/provenance.js +53 -0
- package/dist/models/provenance.js.map +1 -0
- package/dist/models/uploaded-file.d.ts +20 -0
- package/dist/models/uploaded-file.d.ts.map +1 -0
- package/dist/models/uploaded-file.js +2 -0
- package/dist/models/uploaded-file.js.map +1 -0
- package/dist/server/errors.d.ts +93 -0
- package/dist/server/errors.d.ts.map +1 -0
- package/dist/server/errors.js +256 -0
- package/dist/server/errors.js.map +1 -0
- package/dist/server/events.d.ts +36 -0
- package/dist/server/events.d.ts.map +1 -0
- package/dist/server/events.js +48 -0
- package/dist/server/events.js.map +1 -0
- package/dist/server/permissions.d.ts +26 -0
- package/dist/server/permissions.d.ts.map +1 -0
- package/dist/server/permissions.js +194 -0
- package/dist/server/permissions.js.map +1 -0
- package/dist/server/register-tools.d.ts +25 -0
- package/dist/server/register-tools.d.ts.map +1 -0
- package/dist/server/register-tools.js +102 -0
- package/dist/server/register-tools.js.map +1 -0
- package/dist/server/startup.d.ts +16 -0
- package/dist/server/startup.d.ts.map +1 -0
- package/dist/server/startup.js +37 -0
- package/dist/server/startup.js.map +1 -0
- package/dist/server/state.d.ts +166 -0
- package/dist/server/state.d.ts.map +1 -0
- package/dist/server/state.js +424 -0
- package/dist/server/state.js.map +1 -0
- package/dist/server/transports/http-transport.d.ts +37 -0
- package/dist/server/transports/http-transport.d.ts.map +1 -0
- package/dist/server/transports/http-transport.js +204 -0
- package/dist/server/transports/http-transport.js.map +1 -0
- package/dist/server/transports/index.d.ts +9 -0
- package/dist/server/transports/index.d.ts.map +1 -0
- package/dist/server/transports/index.js +9 -0
- package/dist/server/transports/index.js.map +1 -0
- package/dist/server/transports/session-manager.d.ts +40 -0
- package/dist/server/transports/session-manager.d.ts.map +1 -0
- package/dist/server/transports/session-manager.js +74 -0
- package/dist/server/transports/session-manager.js.map +1 -0
- package/dist/server/types.d.ts +82 -0
- package/dist/server/types.d.ts.map +1 -0
- package/dist/server/types.js +14 -0
- package/dist/server/types.js.map +1 -0
- package/dist/services/audit.d.ts +26 -0
- package/dist/services/audit.d.ts.map +1 -0
- package/dist/services/audit.js +43 -0
- package/dist/services/audit.js.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.js +46 -0
- package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
- package/dist/services/chunking/chunk-merger.d.ts +26 -0
- package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
- package/dist/services/chunking/chunk-merger.js +94 -0
- package/dist/services/chunking/chunk-merger.js.map +1 -0
- package/dist/services/chunking/chunker.d.ts +62 -0
- package/dist/services/chunking/chunker.d.ts.map +1 -0
- package/dist/services/chunking/chunker.js +566 -0
- package/dist/services/chunking/chunker.js.map +1 -0
- package/dist/services/chunking/heading-normalizer.d.ts +33 -0
- package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/heading-normalizer.js +101 -0
- package/dist/services/chunking/heading-normalizer.js.map +1 -0
- package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
- package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
- package/dist/services/chunking/json-block-analyzer.js +1033 -0
- package/dist/services/chunking/json-block-analyzer.js.map +1 -0
- package/dist/services/chunking/markdown-parser.d.ts +75 -0
- package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
- package/dist/services/chunking/markdown-parser.js +428 -0
- package/dist/services/chunking/markdown-parser.js.map +1 -0
- package/dist/services/chunking/text-normalizer.d.ts +20 -0
- package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/text-normalizer.js +36 -0
- package/dist/services/chunking/text-normalizer.js.map +1 -0
- package/dist/services/clm/contract-schemas.d.ts +36 -0
- package/dist/services/clm/contract-schemas.d.ts.map +1 -0
- package/dist/services/clm/contract-schemas.js +92 -0
- package/dist/services/clm/contract-schemas.js.map +1 -0
- package/dist/services/clm/summarization.d.ts +46 -0
- package/dist/services/clm/summarization.d.ts.map +1 -0
- package/dist/services/clm/summarization.js +61 -0
- package/dist/services/clm/summarization.js.map +1 -0
- package/dist/services/clustering/clustering-service.d.ts +58 -0
- package/dist/services/clustering/clustering-service.d.ts.map +1 -0
- package/dist/services/clustering/clustering-service.js +467 -0
- package/dist/services/clustering/clustering-service.js.map +1 -0
- package/dist/services/comparison/diff-service.d.ts +41 -0
- package/dist/services/comparison/diff-service.d.ts.map +1 -0
- package/dist/services/comparison/diff-service.js +120 -0
- package/dist/services/comparison/diff-service.js.map +1 -0
- package/dist/services/embedding/embedder.d.ts +55 -0
- package/dist/services/embedding/embedder.d.ts.map +1 -0
- package/dist/services/embedding/embedder.js +202 -0
- package/dist/services/embedding/embedder.js.map +1 -0
- package/dist/services/embedding/nomic.d.ts +67 -0
- package/dist/services/embedding/nomic.d.ts.map +1 -0
- package/dist/services/embedding/nomic.js +280 -0
- package/dist/services/embedding/nomic.js.map +1 -0
- package/dist/services/gemini/circuit-breaker.d.ts +106 -0
- package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
- package/dist/services/gemini/circuit-breaker.js +237 -0
- package/dist/services/gemini/circuit-breaker.js.map +1 -0
- package/dist/services/gemini/client.d.ts +173 -0
- package/dist/services/gemini/client.d.ts.map +1 -0
- package/dist/services/gemini/client.js +483 -0
- package/dist/services/gemini/client.js.map +1 -0
- package/dist/services/gemini/config.d.ts +116 -0
- package/dist/services/gemini/config.d.ts.map +1 -0
- package/dist/services/gemini/config.js +118 -0
- package/dist/services/gemini/config.js.map +1 -0
- package/dist/services/gemini/index.d.ts +9 -0
- package/dist/services/gemini/index.d.ts.map +1 -0
- package/dist/services/gemini/index.js +13 -0
- package/dist/services/gemini/index.js.map +1 -0
- package/dist/services/gemini/rate-limiter.d.ts +62 -0
- package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
- package/dist/services/gemini/rate-limiter.js +120 -0
- package/dist/services/gemini/rate-limiter.js.map +1 -0
- package/dist/services/images/extractor.d.ts +88 -0
- package/dist/services/images/extractor.d.ts.map +1 -0
- package/dist/services/images/extractor.js +340 -0
- package/dist/services/images/extractor.js.map +1 -0
- package/dist/services/images/optimizer.d.ts +130 -0
- package/dist/services/images/optimizer.d.ts.map +1 -0
- package/dist/services/images/optimizer.js +228 -0
- package/dist/services/images/optimizer.js.map +1 -0
- package/dist/services/ocr/datalab.d.ts +64 -0
- package/dist/services/ocr/datalab.d.ts.map +1 -0
- package/dist/services/ocr/datalab.js +425 -0
- package/dist/services/ocr/datalab.js.map +1 -0
- package/dist/services/ocr/errors.d.ts +38 -0
- package/dist/services/ocr/errors.d.ts.map +1 -0
- package/dist/services/ocr/errors.js +83 -0
- package/dist/services/ocr/errors.js.map +1 -0
- package/dist/services/ocr/file-manager.d.ts +76 -0
- package/dist/services/ocr/file-manager.d.ts.map +1 -0
- package/dist/services/ocr/file-manager.js +238 -0
- package/dist/services/ocr/file-manager.js.map +1 -0
- package/dist/services/ocr/form-fill.d.ts +48 -0
- package/dist/services/ocr/form-fill.d.ts.map +1 -0
- package/dist/services/ocr/form-fill.js +213 -0
- package/dist/services/ocr/form-fill.js.map +1 -0
- package/dist/services/ocr/processor.d.ts +95 -0
- package/dist/services/ocr/processor.d.ts.map +1 -0
- package/dist/services/ocr/processor.js +259 -0
- package/dist/services/ocr/processor.js.map +1 -0
- package/dist/services/provenance/agent-metadata.d.ts +82 -0
- package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
- package/dist/services/provenance/agent-metadata.js +106 -0
- package/dist/services/provenance/agent-metadata.js.map +1 -0
- package/dist/services/provenance/chain-hash.d.ts +57 -0
- package/dist/services/provenance/chain-hash.d.ts.map +1 -0
- package/dist/services/provenance/chain-hash.js +131 -0
- package/dist/services/provenance/chain-hash.js.map +1 -0
- package/dist/services/provenance/exporter.d.ts +202 -0
- package/dist/services/provenance/exporter.d.ts.map +1 -0
- package/dist/services/provenance/exporter.js +457 -0
- package/dist/services/provenance/exporter.js.map +1 -0
- package/dist/services/provenance/index.d.ts +15 -0
- package/dist/services/provenance/index.d.ts.map +1 -0
- package/dist/services/provenance/index.js +17 -0
- package/dist/services/provenance/index.js.map +1 -0
- package/dist/services/provenance/tracker.d.ts +138 -0
- package/dist/services/provenance/tracker.d.ts.map +1 -0
- package/dist/services/provenance/tracker.js +293 -0
- package/dist/services/provenance/tracker.js.map +1 -0
- package/dist/services/provenance/verifier.d.ts +153 -0
- package/dist/services/provenance/verifier.d.ts.map +1 -0
- package/dist/services/provenance/verifier.js +536 -0
- package/dist/services/provenance/verifier.js.map +1 -0
- package/dist/services/python-pool.d.ts +70 -0
- package/dist/services/python-pool.d.ts.map +1 -0
- package/dist/services/python-pool.js +265 -0
- package/dist/services/python-pool.js.map +1 -0
- package/dist/services/search/bm25.d.ts +180 -0
- package/dist/services/search/bm25.d.ts.map +1 -0
- package/dist/services/search/bm25.js +656 -0
- package/dist/services/search/bm25.js.map +1 -0
- package/dist/services/search/fusion.d.ts +103 -0
- package/dist/services/search/fusion.d.ts.map +1 -0
- package/dist/services/search/fusion.js +122 -0
- package/dist/services/search/fusion.js.map +1 -0
- package/dist/services/search/local-reranker.d.ts +30 -0
- package/dist/services/search/local-reranker.d.ts.map +1 -0
- package/dist/services/search/local-reranker.js +123 -0
- package/dist/services/search/local-reranker.js.map +1 -0
- package/dist/services/search/quality.d.ts +11 -0
- package/dist/services/search/quality.d.ts.map +1 -0
- package/dist/services/search/quality.js +17 -0
- package/dist/services/search/quality.js.map +1 -0
- package/dist/services/search/query-classifier.d.ts +34 -0
- package/dist/services/search/query-classifier.d.ts.map +1 -0
- package/dist/services/search/query-classifier.js +114 -0
- package/dist/services/search/query-classifier.js.map +1 -0
- package/dist/services/search/query-expander.d.ts +73 -0
- package/dist/services/search/query-expander.d.ts.map +1 -0
- package/dist/services/search/query-expander.js +281 -0
- package/dist/services/search/query-expander.js.map +1 -0
- package/dist/services/search/reranker.d.ts +44 -0
- package/dist/services/search/reranker.d.ts.map +1 -0
- package/dist/services/search/reranker.js +101 -0
- package/dist/services/search/reranker.js.map +1 -0
- package/dist/services/storage/database/annotation-operations.d.ts +113 -0
- package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/annotation-operations.js +177 -0
- package/dist/services/storage/database/annotation-operations.js.map +1 -0
- package/dist/services/storage/database/approval-operations.d.ts +132 -0
- package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
- package/dist/services/storage/database/approval-operations.js +206 -0
- package/dist/services/storage/database/approval-operations.js.map +1 -0
- package/dist/services/storage/database/chunk-operations.d.ts +132 -0
- package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
- package/dist/services/storage/database/chunk-operations.js +306 -0
- package/dist/services/storage/database/chunk-operations.js.map +1 -0
- package/dist/services/storage/database/cluster-operations.d.ts +97 -0
- package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
- package/dist/services/storage/database/cluster-operations.js +258 -0
- package/dist/services/storage/database/cluster-operations.js.map +1 -0
- package/dist/services/storage/database/comparison-operations.d.ts +41 -0
- package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
- package/dist/services/storage/database/comparison-operations.js +65 -0
- package/dist/services/storage/database/comparison-operations.js.map +1 -0
- package/dist/services/storage/database/converters.d.ts +36 -0
- package/dist/services/storage/database/converters.d.ts.map +1 -0
- package/dist/services/storage/database/converters.js +244 -0
- package/dist/services/storage/database/converters.js.map +1 -0
- package/dist/services/storage/database/document-operations.d.ts +145 -0
- package/dist/services/storage/database/document-operations.d.ts.map +1 -0
- package/dist/services/storage/database/document-operations.js +498 -0
- package/dist/services/storage/database/document-operations.js.map +1 -0
- package/dist/services/storage/database/embedding-operations.d.ts +130 -0
- package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
- package/dist/services/storage/database/embedding-operations.js +315 -0
- package/dist/services/storage/database/embedding-operations.js.map +1 -0
- package/dist/services/storage/database/extraction-operations.d.ts +47 -0
- package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
- package/dist/services/storage/database/extraction-operations.js +85 -0
- package/dist/services/storage/database/extraction-operations.js.map +1 -0
- package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
- package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
- package/dist/services/storage/database/form-fill-operations.js +116 -0
- package/dist/services/storage/database/form-fill-operations.js.map +1 -0
- package/dist/services/storage/database/helpers.d.ts +29 -0
- package/dist/services/storage/database/helpers.d.ts.map +1 -0
- package/dist/services/storage/database/helpers.js +55 -0
- package/dist/services/storage/database/helpers.js.map +1 -0
- package/dist/services/storage/database/image-operations.d.ts +202 -0
- package/dist/services/storage/database/image-operations.d.ts.map +1 -0
- package/dist/services/storage/database/image-operations.js +484 -0
- package/dist/services/storage/database/image-operations.js.map +1 -0
- package/dist/services/storage/database/index.d.ts +13 -0
- package/dist/services/storage/database/index.d.ts.map +1 -0
- package/dist/services/storage/database/index.js +16 -0
- package/dist/services/storage/database/index.js.map +1 -0
- package/dist/services/storage/database/lock-operations.d.ts +59 -0
- package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
- package/dist/services/storage/database/lock-operations.js +89 -0
- package/dist/services/storage/database/lock-operations.js.map +1 -0
- package/dist/services/storage/database/obligation-operations.d.ts +88 -0
- package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/obligation-operations.js +206 -0
- package/dist/services/storage/database/obligation-operations.js.map +1 -0
- package/dist/services/storage/database/ocr-operations.d.ts +33 -0
- package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
- package/dist/services/storage/database/ocr-operations.js +70 -0
- package/dist/services/storage/database/ocr-operations.js.map +1 -0
- package/dist/services/storage/database/playbook-operations.d.ts +72 -0
- package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
- package/dist/services/storage/database/playbook-operations.js +247 -0
- package/dist/services/storage/database/playbook-operations.js.map +1 -0
- package/dist/services/storage/database/provenance-operations.d.ts +112 -0
- package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
- package/dist/services/storage/database/provenance-operations.js +251 -0
- package/dist/services/storage/database/provenance-operations.js.map +1 -0
- package/dist/services/storage/database/service.d.ts +142 -0
- package/dist/services/storage/database/service.d.ts.map +1 -0
- package/dist/services/storage/database/service.js +310 -0
- package/dist/services/storage/database/service.js.map +1 -0
- package/dist/services/storage/database/static-operations.d.ts +30 -0
- package/dist/services/storage/database/static-operations.d.ts.map +1 -0
- package/dist/services/storage/database/static-operations.js +218 -0
- package/dist/services/storage/database/static-operations.js.map +1 -0
- package/dist/services/storage/database/stats-operations.d.ts +101 -0
- package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
- package/dist/services/storage/database/stats-operations.js +394 -0
- package/dist/services/storage/database/stats-operations.js.map +1 -0
- package/dist/services/storage/database/tag-operations.d.ts +76 -0
- package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
- package/dist/services/storage/database/tag-operations.js +178 -0
- package/dist/services/storage/database/tag-operations.js.map +1 -0
- package/dist/services/storage/database/types.d.ts +286 -0
- package/dist/services/storage/database/types.d.ts.map +1 -0
- package/dist/services/storage/database/types.js +39 -0
- package/dist/services/storage/database/types.js.map +1 -0
- package/dist/services/storage/database/upload-operations.d.ts +71 -0
- package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
- package/dist/services/storage/database/upload-operations.js +124 -0
- package/dist/services/storage/database/upload-operations.js.map +1 -0
- package/dist/services/storage/database/user-operations.d.ts +102 -0
- package/dist/services/storage/database/user-operations.d.ts.map +1 -0
- package/dist/services/storage/database/user-operations.js +151 -0
- package/dist/services/storage/database/user-operations.js.map +1 -0
- package/dist/services/storage/database/workflow-operations.d.ts +98 -0
- package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
- package/dist/services/storage/database/workflow-operations.js +157 -0
- package/dist/services/storage/database/workflow-operations.js.map +1 -0
- package/dist/services/storage/database.d.ts +16 -0
- package/dist/services/storage/database.d.ts.map +1 -0
- package/dist/services/storage/database.js +15 -0
- package/dist/services/storage/database.js.map +1 -0
- package/dist/services/storage/index.d.ts +10 -0
- package/dist/services/storage/index.d.ts.map +1 -0
- package/dist/services/storage/index.js +10 -0
- package/dist/services/storage/index.js.map +1 -0
- package/dist/services/storage/migrations/index.d.ts +16 -0
- package/dist/services/storage/migrations/index.d.ts.map +1 -0
- package/dist/services/storage/migrations/index.js +20 -0
- package/dist/services/storage/migrations/index.js.map +1 -0
- package/dist/services/storage/migrations/operations.d.ts +40 -0
- package/dist/services/storage/migrations/operations.d.ts.map +1 -0
- package/dist/services/storage/migrations/operations.js +2910 -0
- package/dist/services/storage/migrations/operations.js.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.js +1006 -0
- package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.js +176 -0
- package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
- package/dist/services/storage/migrations/types.d.ts +15 -0
- package/dist/services/storage/migrations/types.d.ts.map +1 -0
- package/dist/services/storage/migrations/types.js +21 -0
- package/dist/services/storage/migrations/types.js.map +1 -0
- package/dist/services/storage/migrations/verification.d.ts +20 -0
- package/dist/services/storage/migrations/verification.d.ts.map +1 -0
- package/dist/services/storage/migrations/verification.js +78 -0
- package/dist/services/storage/migrations/verification.js.map +1 -0
- package/dist/services/storage/migrations.d.ts +16 -0
- package/dist/services/storage/migrations.d.ts.map +1 -0
- package/dist/services/storage/migrations.js +17 -0
- package/dist/services/storage/migrations.js.map +1 -0
- package/dist/services/storage/types.d.ts +12 -0
- package/dist/services/storage/types.d.ts.map +1 -0
- package/dist/services/storage/types.js +5 -0
- package/dist/services/storage/types.js.map +1 -0
- package/dist/services/storage/vector.d.ts +208 -0
- package/dist/services/storage/vector.d.ts.map +1 -0
- package/dist/services/storage/vector.js +526 -0
- package/dist/services/storage/vector.js.map +1 -0
- package/dist/services/vlm/pipeline.d.ts +194 -0
- package/dist/services/vlm/pipeline.d.ts.map +1 -0
- package/dist/services/vlm/pipeline.js +800 -0
- package/dist/services/vlm/pipeline.js.map +1 -0
- package/dist/services/vlm/prompts.d.ts +171 -0
- package/dist/services/vlm/prompts.d.ts.map +1 -0
- package/dist/services/vlm/prompts.js +229 -0
- package/dist/services/vlm/prompts.js.map +1 -0
- package/dist/services/vlm/service.d.ts +174 -0
- package/dist/services/vlm/service.d.ts.map +1 -0
- package/dist/services/vlm/service.js +256 -0
- package/dist/services/vlm/service.js.map +1 -0
- package/dist/services/webhook-delivery.d.ts +4 -0
- package/dist/services/webhook-delivery.d.ts.map +1 -0
- package/dist/services/webhook-delivery.js +140 -0
- package/dist/services/webhook-delivery.js.map +1 -0
- package/dist/tools/chunks.d.ts +19 -0
- package/dist/tools/chunks.d.ts.map +1 -0
- package/dist/tools/chunks.js +392 -0
- package/dist/tools/chunks.js.map +1 -0
- package/dist/tools/clm.d.ts +16 -0
- package/dist/tools/clm.d.ts.map +1 -0
- package/dist/tools/clm.js +668 -0
- package/dist/tools/clm.js.map +1 -0
- package/dist/tools/clustering.d.ts +13 -0
- package/dist/tools/clustering.d.ts.map +1 -0
- package/dist/tools/clustering.js +498 -0
- package/dist/tools/clustering.js.map +1 -0
- package/dist/tools/collaboration.d.ts +15 -0
- package/dist/tools/collaboration.d.ts.map +1 -0
- package/dist/tools/collaboration.js +516 -0
- package/dist/tools/collaboration.js.map +1 -0
- package/dist/tools/comparison.d.ts +13 -0
- package/dist/tools/comparison.d.ts.map +1 -0
- package/dist/tools/comparison.js +735 -0
- package/dist/tools/comparison.js.map +1 -0
- package/dist/tools/compliance.d.ts +15 -0
- package/dist/tools/compliance.d.ts.map +1 -0
- package/dist/tools/compliance.js +640 -0
- package/dist/tools/compliance.js.map +1 -0
- package/dist/tools/config.d.ts +19 -0
- package/dist/tools/config.d.ts.map +1 -0
- package/dist/tools/config.js +213 -0
- package/dist/tools/config.js.map +1 -0
- package/dist/tools/database.d.ts +62 -0
- package/dist/tools/database.d.ts.map +1 -0
- package/dist/tools/database.js +288 -0
- package/dist/tools/database.js.map +1 -0
- package/dist/tools/documents.d.ts +61 -0
- package/dist/tools/documents.d.ts.map +1 -0
- package/dist/tools/documents.js +1624 -0
- package/dist/tools/documents.js.map +1 -0
- package/dist/tools/embeddings.d.ts +14 -0
- package/dist/tools/embeddings.d.ts.map +1 -0
- package/dist/tools/embeddings.js +626 -0
- package/dist/tools/embeddings.js.map +1 -0
- package/dist/tools/evaluation.d.ts +25 -0
- package/dist/tools/evaluation.d.ts.map +1 -0
- package/dist/tools/evaluation.js +523 -0
- package/dist/tools/evaluation.js.map +1 -0
- package/dist/tools/events.d.ts +16 -0
- package/dist/tools/events.d.ts.map +1 -0
- package/dist/tools/events.js +493 -0
- package/dist/tools/events.js.map +1 -0
- package/dist/tools/extraction-structured.d.ts +13 -0
- package/dist/tools/extraction-structured.d.ts.map +1 -0
- package/dist/tools/extraction-structured.js +390 -0
- package/dist/tools/extraction-structured.js.map +1 -0
- package/dist/tools/extraction.d.ts +24 -0
- package/dist/tools/extraction.d.ts.map +1 -0
- package/dist/tools/extraction.js +424 -0
- package/dist/tools/extraction.js.map +1 -0
- package/dist/tools/file-management.d.ts +14 -0
- package/dist/tools/file-management.d.ts.map +1 -0
- package/dist/tools/file-management.js +523 -0
- package/dist/tools/file-management.js.map +1 -0
- package/dist/tools/form-fill.d.ts +13 -0
- package/dist/tools/form-fill.d.ts.map +1 -0
- package/dist/tools/form-fill.js +250 -0
- package/dist/tools/form-fill.js.map +1 -0
- package/dist/tools/health.d.ts +19 -0
- package/dist/tools/health.d.ts.map +1 -0
- package/dist/tools/health.js +229 -0
- package/dist/tools/health.js.map +1 -0
- package/dist/tools/images.d.ts +54 -0
- package/dist/tools/images.d.ts.map +1 -0
- package/dist/tools/images.js +787 -0
- package/dist/tools/images.js.map +1 -0
- package/dist/tools/ingestion.d.ts +94 -0
- package/dist/tools/ingestion.d.ts.map +1 -0
- package/dist/tools/ingestion.js +1659 -0
- package/dist/tools/ingestion.js.map +1 -0
- package/dist/tools/intelligence.d.ts +18 -0
- package/dist/tools/intelligence.d.ts.map +1 -0
- package/dist/tools/intelligence.js +1039 -0
- package/dist/tools/intelligence.js.map +1 -0
- package/dist/tools/provenance.d.ts +51 -0
- package/dist/tools/provenance.d.ts.map +1 -0
- package/dist/tools/provenance.js +691 -0
- package/dist/tools/provenance.js.map +1 -0
- package/dist/tools/reports.d.ts +41 -0
- package/dist/tools/reports.d.ts.map +1 -0
- package/dist/tools/reports.js +1394 -0
- package/dist/tools/reports.js.map +1 -0
- package/dist/tools/search.d.ts +35 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +2528 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/shared.d.ts +52 -0
- package/dist/tools/shared.d.ts.map +1 -0
- package/dist/tools/shared.js +54 -0
- package/dist/tools/shared.js.map +1 -0
- package/dist/tools/tags.d.ts +15 -0
- package/dist/tools/tags.d.ts.map +1 -0
- package/dist/tools/tags.js +287 -0
- package/dist/tools/tags.js.map +1 -0
- package/dist/tools/timeline.d.ts +15 -0
- package/dist/tools/timeline.d.ts.map +1 -0
- package/dist/tools/timeline.js +14 -0
- package/dist/tools/timeline.js.map +1 -0
- package/dist/tools/users.d.ts +14 -0
- package/dist/tools/users.d.ts.map +1 -0
- package/dist/tools/users.js +257 -0
- package/dist/tools/users.js.map +1 -0
- package/dist/tools/vlm.d.ts +40 -0
- package/dist/tools/vlm.d.ts.map +1 -0
- package/dist/tools/vlm.js +475 -0
- package/dist/tools/vlm.js.map +1 -0
- package/dist/tools/workflow.d.ts +16 -0
- package/dist/tools/workflow.d.ts.map +1 -0
- package/dist/tools/workflow.js +495 -0
- package/dist/tools/workflow.js.map +1 -0
- package/dist/utils/backoff.d.ts +53 -0
- package/dist/utils/backoff.d.ts.map +1 -0
- package/dist/utils/backoff.js +78 -0
- package/dist/utils/backoff.js.map +1 -0
- package/dist/utils/config-persistence.d.ts +33 -0
- package/dist/utils/config-persistence.d.ts.map +1 -0
- package/dist/utils/config-persistence.js +61 -0
- package/dist/utils/config-persistence.js.map +1 -0
- package/dist/utils/hash.d.ts +65 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +146 -0
- package/dist/utils/hash.js.map +1 -0
- package/dist/utils/math.d.ts +21 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +39 -0
- package/dist/utils/math.js.map +1 -0
- package/dist/utils/validation.d.ts +697 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +529 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +96 -0
- package/python/.gitkeep +0 -0
- package/python/__init__.py +104 -0
- package/python/clustering_worker.py +440 -0
- package/python/docx_image_extractor.py +524 -0
- package/python/embedding_worker.py +552 -0
- package/python/file_manager_worker.py +564 -0
- package/python/form_fill_worker.py +399 -0
- package/python/gpu_utils.py +582 -0
- package/python/image_extractor.py +317 -0
- package/python/image_optimizer.py +444 -0
- package/python/ocr_worker.py +712 -0
- package/python/pyproject.toml +76 -0
- package/python/requirements.txt +51 -0
- package/python/reranker_worker.py +87 -0
package/dist/bin.js
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* OCR Provenance MCP Server - CLI Entry Point
|
|
4
|
+
*
|
|
5
|
+
* This file serves as the bin entry point for global npm installation.
|
|
6
|
+
* It simply imports and runs the main server module.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* npx ocr-provenance-mcp # via npx
|
|
10
|
+
* ocr-provenance-mcp # after npm install -g
|
|
11
|
+
* node dist/index.js # direct invocation
|
|
12
|
+
*
|
|
13
|
+
* @module bin
|
|
14
|
+
*/
|
|
15
|
+
import './index.js';
|
|
16
|
+
//# sourceMappingURL=bin.js.map
|
package/dist/bin.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bin.js","sourceRoot":"","sources":["../src/bin.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;GAYG;AAEH,OAAO,YAAY,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OCR Provenance MCP Server
|
|
3
|
+
*
|
|
4
|
+
* Entry point for the MCP server using stdio transport.
|
|
5
|
+
* Exposes OCR, search, provenance, and clustering tools via JSON-RPC.
|
|
6
|
+
*
|
|
7
|
+
* CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
|
|
8
|
+
* Use console.error() for all logging.
|
|
9
|
+
*
|
|
10
|
+
* @module index
|
|
11
|
+
*/
|
|
12
|
+
export {};
|
|
13
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OCR Provenance MCP Server
|
|
3
|
+
*
|
|
4
|
+
* Entry point for the MCP server using stdio transport.
|
|
5
|
+
* Exposes OCR, search, provenance, and clustering tools via JSON-RPC.
|
|
6
|
+
*
|
|
7
|
+
* CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
|
|
8
|
+
* Use console.error() for all logging.
|
|
9
|
+
*
|
|
10
|
+
* @module index
|
|
11
|
+
*/
|
|
12
|
+
import dotenv from 'dotenv';
|
|
13
|
+
import path from 'path';
|
|
14
|
+
import fs from 'fs';
|
|
15
|
+
import { fileURLToPath } from 'url';
|
|
16
|
+
// Load .env from multiple candidate locations (first found wins):
|
|
17
|
+
// 1. OCR_PROVENANCE_ENV_FILE env var (explicit override)
|
|
18
|
+
// 2. CWD/.env (project-local)
|
|
19
|
+
// 3. Package root/.env (development)
|
|
20
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
const envCandidates = [
|
|
22
|
+
process.env.OCR_PROVENANCE_ENV_FILE,
|
|
23
|
+
path.resolve(process.cwd(), '.env'),
|
|
24
|
+
path.resolve(__dirname, '..', '.env'),
|
|
25
|
+
].filter((p) => typeof p === 'string');
|
|
26
|
+
for (const envPath of envCandidates) {
|
|
27
|
+
if (fs.existsSync(envPath)) {
|
|
28
|
+
dotenv.config({ path: envPath, quiet: true });
|
|
29
|
+
break;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
33
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
34
|
+
import { registerAllTools } from './server/register-tools.js';
|
|
35
|
+
import { validateStartupDependencies } from './server/startup.js';
|
|
36
|
+
// =============================================================================
|
|
37
|
+
// SERVER INITIALIZATION
|
|
38
|
+
// =============================================================================
|
|
39
|
+
const server = new McpServer({
|
|
40
|
+
name: 'ocr-provenance-mcp',
|
|
41
|
+
version: '1.0.0',
|
|
42
|
+
});
|
|
43
|
+
// =============================================================================
|
|
44
|
+
// TOOL REGISTRATION
|
|
45
|
+
// =============================================================================
|
|
46
|
+
const toolCount = registerAllTools(server);
|
|
47
|
+
// =============================================================================
|
|
48
|
+
// SERVER STARTUP
|
|
49
|
+
// =============================================================================
|
|
50
|
+
async function main() {
|
|
51
|
+
validateStartupDependencies();
|
|
52
|
+
const transport = new StdioServerTransport();
|
|
53
|
+
await server.connect(transport);
|
|
54
|
+
console.error(`OCR Provenance MCP Server running on stdio`);
|
|
55
|
+
console.error(`Tools registered: ${toolCount}`);
|
|
56
|
+
}
|
|
57
|
+
// Log memory usage every 5 minutes for observability (stderr only - safe for MCP)
|
|
58
|
+
setInterval(() => {
|
|
59
|
+
const mem = process.memoryUsage();
|
|
60
|
+
console.error(`[Memory] RSS=${(mem.rss / 1024 / 1024).toFixed(1)}MB ` +
|
|
61
|
+
`Heap=${(mem.heapUsed / 1024 / 1024).toFixed(1)}/${(mem.heapTotal / 1024 / 1024).toFixed(1)}MB ` +
|
|
62
|
+
`External=${(mem.external / 1024 / 1024).toFixed(1)}MB`);
|
|
63
|
+
}, 300_000).unref();
|
|
64
|
+
// Graceful shutdown handler
|
|
65
|
+
function handleShutdown(signal) {
|
|
66
|
+
console.error(`[Shutdown] Received ${signal}, shutting down gracefully...`);
|
|
67
|
+
// Close the MCP server connection
|
|
68
|
+
server
|
|
69
|
+
.close()
|
|
70
|
+
.then(() => {
|
|
71
|
+
console.error('[Shutdown] Server closed successfully');
|
|
72
|
+
process.exit(0);
|
|
73
|
+
})
|
|
74
|
+
.catch((err) => {
|
|
75
|
+
console.error(`[Shutdown] Error closing server: ${err}`);
|
|
76
|
+
process.exit(1);
|
|
77
|
+
});
|
|
78
|
+
// Force exit after 5s if graceful shutdown hangs
|
|
79
|
+
setTimeout(() => {
|
|
80
|
+
console.error('[Shutdown] Forced exit after timeout');
|
|
81
|
+
process.exit(1);
|
|
82
|
+
}, 5000).unref();
|
|
83
|
+
}
|
|
84
|
+
process.on('SIGTERM', () => handleShutdown('SIGTERM'));
|
|
85
|
+
process.on('SIGINT', () => handleShutdown('SIGINT'));
|
|
86
|
+
main().catch((error) => {
|
|
87
|
+
console.error('Fatal error starting MCP server:', error);
|
|
88
|
+
process.exit(1);
|
|
89
|
+
});
|
|
90
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AAEpC,kEAAkE;AAClE,yDAAyD;AACzD,8BAA8B;AAC9B,qCAAqC;AACrC,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC/D,MAAM,aAAa,GAAG;IACpB,OAAO,CAAC,GAAG,CAAC,uBAAuB;IACnC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC;IACnC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,MAAM,CAAC;CACtC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC;AAEpD,KAAK,MAAM,OAAO,IAAI,aAAa,EAAE,CAAC;IACpC,IAAI,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC3B,MAAM,CAAC,MAAM,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC9C,MAAM;IACR,CAAC;AACH,CAAC;AAED,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAC9D,OAAO,EAAE,2BAA2B,EAAE,MAAM,qBAAqB,CAAC;AAElE,gFAAgF;AAChF,wBAAwB;AACxB,gFAAgF;AAEhF,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;IAC3B,IAAI,EAAE,oBAAoB;IAC1B,OAAO,EAAE,OAAO;CACjB,CAAC,CAAC;AAEH,gFAAgF;AAChF,oBAAoB;AACpB,gFAAgF;AAEhF,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;AAE3C,gFAAgF;AAChF,iBAAiB;AACjB,gFAAgF;AAEhF,KAAK,UAAU,IAAI;IACjB,2BAA2B,EAAE,CAAC;IAE9B,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,OAAO,CAAC,KAAK,CAAC,4CAA4C,CAAC,CAAC;IAC5D,OAAO,CAAC,KAAK,CAAC,qBAAqB,SAAS,EAAE,CAAC,CAAC;AAClD,CAAC;AAED,kFAAkF;AAClF,WAAW,CAAC,GAAG,EAAE;IACf,MAAM,GAAG,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IAClC,OAAO,CAAC,KAAK,CACX,gBAAgB,CAAC,GAAG,CAAC,GAAG,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;QACrD,QAAQ,CAAC,GAAG,CAAC,QAAQ,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;QAChG,YAAY,CAAC,GAAG,CAAC,QAAQ,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAC1D,CAAC;AACJ,CAAC,EAAE,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC;AAEpB,4BAA4B;AAC5B,SAAS,cAAc,CAAC,MAAc;IACpC,OAAO,CAAC,KAAK,CAAC,uBAAuB,MAAM,+BAA+B,CAAC,CAAC;IAC5E,kCAAkC;IAClC,MAAM;SACH,KAAK,EAAE;SACP,IAAI,CAAC,GAAG,EAAE;QACT,OAAO,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;QACvD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC;SACD,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;QACb,OAAO,CAAC,KAAK,CAAC,oCAAoC,GAAG,EAAE,CAAC,CAAC;QACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,CAAC;IACL,iDAAiD;IACjD,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;QACtD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC,EAAE,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;AACnB,CAAC;AAED,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC,CAAC;AACvD,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC,CAAC;AAErD,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACrB,OAAO,CAAC,KAAK,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;IACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunk interfaces for OCR Provenance MCP System
|
|
3
|
+
*
|
|
4
|
+
* Represents text chunks extracted from OCR output.
|
|
5
|
+
* Provenance depth: 2
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Configuration for text chunking
|
|
9
|
+
*/
|
|
10
|
+
/** Configuration for heading level normalization */
|
|
11
|
+
export interface HeadingNormalizationConfig {
|
|
12
|
+
/** Enable heading normalization (default: false) */
|
|
13
|
+
enabled: boolean;
|
|
14
|
+
/** Minimum pattern group size to trigger normalization (default: 3) */
|
|
15
|
+
minPatternCount?: number;
|
|
16
|
+
}
|
|
17
|
+
export interface ChunkingConfig {
|
|
18
|
+
/** Maximum characters per chunk (default: 2000) */
|
|
19
|
+
chunkSize: number;
|
|
20
|
+
/** Overlap percentage between chunks (default: 10) */
|
|
21
|
+
overlapPercent: number;
|
|
22
|
+
/** Maximum chunk size for oversized sections (default: 8000) */
|
|
23
|
+
maxChunkSize: number;
|
|
24
|
+
/** Minimum chunk size - heading-only chunks below this are merged (default: 100) */
|
|
25
|
+
minChunkSize?: number;
|
|
26
|
+
/** Heading normalization configuration (default: disabled) */
|
|
27
|
+
headingNormalization?: HeadingNormalizationConfig;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Default chunking configuration per PRD
|
|
31
|
+
*/
|
|
32
|
+
export declare const DEFAULT_CHUNKING_CONFIG: ChunkingConfig;
|
|
33
|
+
/**
|
|
34
|
+
* Calculate overlap in characters
|
|
35
|
+
*/
|
|
36
|
+
export declare function getOverlapCharacters(config: ChunkingConfig): number;
|
|
37
|
+
/**
|
|
38
|
+
* Calculate step size for chunking
|
|
39
|
+
*/
|
|
40
|
+
export declare function getStepSize(config: ChunkingConfig): number;
|
|
41
|
+
/**
|
|
42
|
+
* Result of chunking operation (before database storage)
|
|
43
|
+
*/
|
|
44
|
+
export interface ChunkResult {
|
|
45
|
+
/** 0-indexed chunk position */
|
|
46
|
+
index: number;
|
|
47
|
+
/** The chunk text */
|
|
48
|
+
text: string;
|
|
49
|
+
/** Start offset in source text */
|
|
50
|
+
startOffset: number;
|
|
51
|
+
/** End offset in source text */
|
|
52
|
+
endOffset: number;
|
|
53
|
+
/** Characters overlapping with previous chunk */
|
|
54
|
+
overlapWithPrevious: number;
|
|
55
|
+
/** Characters overlapping with next chunk */
|
|
56
|
+
overlapWithNext: number;
|
|
57
|
+
/** Page number if determinable (1-indexed) */
|
|
58
|
+
pageNumber: number | null;
|
|
59
|
+
/** Page range if spanning pages (e.g., "4-5") */
|
|
60
|
+
pageRange: string | null;
|
|
61
|
+
/** Heading text that provides context for this chunk */
|
|
62
|
+
headingContext: string | null;
|
|
63
|
+
/** Heading level (1-6) of the section this chunk belongs to */
|
|
64
|
+
headingLevel: number | null;
|
|
65
|
+
/** Full section path (e.g., "Introduction > Background > History") */
|
|
66
|
+
sectionPath: string | null;
|
|
67
|
+
/** Content types present in this chunk (e.g., ["text", "table", "list"]) */
|
|
68
|
+
contentTypes: string[];
|
|
69
|
+
/** Whether this chunk is atomic (should not be split further) */
|
|
70
|
+
isAtomic: boolean;
|
|
71
|
+
/** Table metadata if this chunk contains a table */
|
|
72
|
+
tableMetadata?: {
|
|
73
|
+
columnHeaders: string[];
|
|
74
|
+
rowCount: number;
|
|
75
|
+
columnCount: number;
|
|
76
|
+
/** Human-readable summary of table content */
|
|
77
|
+
summary?: string;
|
|
78
|
+
/** Caption text from preceding block (e.g., "Table 1: Budget Summary") */
|
|
79
|
+
caption?: string;
|
|
80
|
+
/** Index of a prior table structure this continues (cross-page) */
|
|
81
|
+
continuationOf?: number;
|
|
82
|
+
} | null;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Represents a text chunk stored in database
|
|
86
|
+
* Provenance depth: 2
|
|
87
|
+
*/
|
|
88
|
+
export interface Chunk {
|
|
89
|
+
/** UUID v4 identifier */
|
|
90
|
+
id: string;
|
|
91
|
+
/** Reference to parent document */
|
|
92
|
+
document_id: string;
|
|
93
|
+
/** Reference to OCR result this chunk came from */
|
|
94
|
+
ocr_result_id: string;
|
|
95
|
+
/** The actual chunk text content */
|
|
96
|
+
text: string;
|
|
97
|
+
/** SHA-256 hash of text content */
|
|
98
|
+
text_hash: string;
|
|
99
|
+
/** 0-indexed position in document */
|
|
100
|
+
chunk_index: number;
|
|
101
|
+
/** Character offset where chunk starts in OCR text */
|
|
102
|
+
character_start: number;
|
|
103
|
+
/** Character offset where chunk ends in OCR text */
|
|
104
|
+
character_end: number;
|
|
105
|
+
/** Page number this chunk primarily belongs to (1-indexed) */
|
|
106
|
+
page_number: number | null;
|
|
107
|
+
/** Page range if chunk spans multiple pages (e.g., "4-5") */
|
|
108
|
+
page_range: string | null;
|
|
109
|
+
/** Characters overlapping with previous chunk */
|
|
110
|
+
overlap_previous: number;
|
|
111
|
+
/** Characters overlapping with next chunk */
|
|
112
|
+
overlap_next: number;
|
|
113
|
+
/** Reference to provenance record */
|
|
114
|
+
provenance_id: string;
|
|
115
|
+
/** ISO 8601 timestamp */
|
|
116
|
+
created_at: string;
|
|
117
|
+
/** Status of embedding generation */
|
|
118
|
+
embedding_status: 'pending' | 'complete' | 'failed';
|
|
119
|
+
/** ISO 8601 timestamp when embedded */
|
|
120
|
+
embedded_at: string | null;
|
|
121
|
+
/** OCR parse quality score from Datalab (0-5 range), propagated to chunk level */
|
|
122
|
+
ocr_quality_score: number | null;
|
|
123
|
+
/** Heading text that provides context for this chunk */
|
|
124
|
+
heading_context: string | null;
|
|
125
|
+
/** Heading level (1-6) of the section this chunk belongs to */
|
|
126
|
+
heading_level: number | null;
|
|
127
|
+
/** Full section path (e.g., "Introduction > Background > History") */
|
|
128
|
+
section_path: string | null;
|
|
129
|
+
/** JSON-encoded array of content types (e.g., '["text","table"]') */
|
|
130
|
+
content_types: string | null;
|
|
131
|
+
/** Whether this chunk is atomic (should not be split further) */
|
|
132
|
+
is_atomic: number;
|
|
133
|
+
/** Chunking strategy used to create this chunk (e.g., "fixed", "hybrid_section") */
|
|
134
|
+
chunking_strategy: string;
|
|
135
|
+
}
|
|
136
|
+
//# sourceMappingURL=chunk.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk.d.ts","sourceRoot":"","sources":["../../src/models/chunk.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;GAEG;AACH,oDAAoD;AACpD,MAAM,WAAW,0BAA0B;IACzC,oDAAoD;IACpD,OAAO,EAAE,OAAO,CAAC;IACjB,uEAAuE;IACvE,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,cAAc;IAC7B,mDAAmD;IACnD,SAAS,EAAE,MAAM,CAAC;IAElB,sDAAsD;IACtD,cAAc,EAAE,MAAM,CAAC;IAEvB,gEAAgE;IAChE,YAAY,EAAE,MAAM,CAAC;IAErB,oFAAoF;IACpF,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB,8DAA8D;IAC9D,oBAAoB,CAAC,EAAE,0BAA0B,CAAC;CACnD;AAED;;GAEG;AACH,eAAO,MAAM,uBAAuB,EAAE,cAIrC,CAAC;AAEF;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,cAAc,GAAG,MAAM,CAEnE;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,MAAM,EAAE,cAAc,GAAG,MAAM,CAE1D;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,+BAA+B;IAC/B,KAAK,EAAE,MAAM,CAAC;IAEd,qBAAqB;IACrB,IAAI,EAAE,MAAM,CAAC;IAEb,kCAAkC;IAClC,WAAW,EAAE,MAAM,CAAC;IAEpB,gCAAgC;IAChC,SAAS,EAAE,MAAM,CAAC;IAElB,iDAAiD;IACjD,mBAAmB,EAAE,MAAM,CAAC;IAE5B,6CAA6C;IAC7C,eAAe,EAAE,MAAM,CAAC;IAExB,8CAA8C;IAC9C,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAE1B,iDAAiD;IACjD,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IAEzB,wDAAwD;IACxD,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAE9B,+DAA+D;IAC/D,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAE5B,sEAAsE;IACtE,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAE3B,4EAA4E;IAC5E,YAAY,EAAE,MAAM,EAAE,CAAC;IAEvB,iEAAiE;IACjE,QAAQ,EAAE,OAAO,CAAC;IAElB,oDAAoD;IACpD,aAAa,CAAC,EAAE;QACd,aAAa,EAAE,MAAM,EAAE,CAAC;QACxB,QAAQ,EAAE,MAAM,CAAC;QACjB,WAAW,EAAE,MAAM,CAAC;QACpB,8CAA8C;QAC9C,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,0EAA0E;QAC1E,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,mEAAmE;QACnE,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,GAAG,IAAI,CAAC;CACV;AAED;;;GAGG;AACH,MAAM,WAAW,KAAK;IACpB,yBAAyB;IACzB,EAAE,EAAE,MAAM,CAAC;IAEX,mCAAmC;IACnC,WAAW,EAAE,MAAM,CAAC;IAEpB,mDAAmD;IACnD,aAAa,EAAE,MAAM,CAAC;IAEtB,oCAAoC;IACpC,IAAI,EAAE,MAAM,CAAC;IAEb,mCAAmC;IACnC,SAAS,EAAE,MAAM,CAAC;IAElB,qCAAqC;IACrC,WAAW,EAAE,MAAM,CAAC;IAEpB,sDAAsD;IACtD,eAAe,EAAE,MAAM,CAAC;IAExB,oDAAoD;IACpD,aAAa,EAAE,MAAM,CAAC;IAEtB,8DAA8D;IAC9D,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAE3B,6DAA6D;IAC7D,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAE1B,iDAAiD;IACjD,gBAAgB,EAAE,MAAM,CAAC;IAEzB,6CAA6C;IAC7C,YAAY,EAAE,MAAM,CAAC;IAErB,qCAAqC;IACrC,aAAa,EAAE,MAAM,CAAC;IAEtB,yBAAyB;IACzB,UAAU,EAAE,MAAM,CAAC;IAEnB,qCAAqC;IACrC,gBAAgB,EAAE,SAAS,GAAG,UAAU,GAAG,QAAQ,CAAC;IAEpD,uCAAuC;IACvC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAE3B,kFAAkF;IAClF,iBAAiB,EAAE,MAAM,GAAG,IAAI,CAAC;IAEjC,wDAAwD;IACxD,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAE/B,+DAA+D;IAC/D,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAE7B,sEAAsE;IACtE,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAE5B,qEAAqE;IACrE,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAE7B,iEAAiE;IACjE,SAAS,EAAE,MAAM,CAAC;IAElB,oFAAoF;IACpF,iBAAiB,EAAE,MAAM,CAAC;CAC3B"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunk interfaces for OCR Provenance MCP System
|
|
3
|
+
*
|
|
4
|
+
* Represents text chunks extracted from OCR output.
|
|
5
|
+
* Provenance depth: 2
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Default chunking configuration per PRD
|
|
9
|
+
*/
|
|
10
|
+
export const DEFAULT_CHUNKING_CONFIG = {
|
|
11
|
+
chunkSize: 2000,
|
|
12
|
+
overlapPercent: 10,
|
|
13
|
+
maxChunkSize: 8000,
|
|
14
|
+
};
|
|
15
|
+
/**
|
|
16
|
+
* Calculate overlap in characters
|
|
17
|
+
*/
|
|
18
|
+
export function getOverlapCharacters(config) {
|
|
19
|
+
return Math.floor(config.chunkSize * config.overlapPercent / 100);
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Calculate step size for chunking
|
|
23
|
+
*/
|
|
24
|
+
export function getStepSize(config) {
|
|
25
|
+
return config.chunkSize - getOverlapCharacters(config);
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=chunk.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk.js","sourceRoot":"","sources":["../../src/models/chunk.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AA8BH;;GAEG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAmB;IACrD,SAAS,EAAE,IAAI;IACf,cAAc,EAAE,EAAE;IAClB,YAAY,EAAE,IAAI;CACnB,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAAC,MAAsB;IACzD,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,GAAG,MAAM,CAAC,cAAc,GAAG,GAAG,CAAC,CAAC;AACpE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,MAAsB;IAChD,OAAO,MAAM,CAAC,SAAS,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAC;AACzD,CAAC"}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cluster and Document Clustering interfaces
|
|
3
|
+
*
|
|
4
|
+
* Types for document clustering and auto-classification.
|
|
5
|
+
* Clusters group semantically similar documents using HDBSCAN or agglomerative algorithms.
|
|
6
|
+
*
|
|
7
|
+
* @module models/cluster
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* A cluster of semantically similar documents
|
|
11
|
+
*/
|
|
12
|
+
export interface Cluster {
|
|
13
|
+
id: string;
|
|
14
|
+
run_id: string;
|
|
15
|
+
cluster_index: number;
|
|
16
|
+
label: string | null;
|
|
17
|
+
description: string | null;
|
|
18
|
+
classification_tag: string | null;
|
|
19
|
+
document_count: number;
|
|
20
|
+
centroid_json: string | null;
|
|
21
|
+
top_terms_json: string | null;
|
|
22
|
+
coherence_score: number | null;
|
|
23
|
+
algorithm: string;
|
|
24
|
+
algorithm_params_json: string;
|
|
25
|
+
silhouette_score: number | null;
|
|
26
|
+
content_hash: string;
|
|
27
|
+
provenance_id: string;
|
|
28
|
+
created_at: string;
|
|
29
|
+
processing_duration_ms: number | null;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Assignment of a document to a cluster within a specific run
|
|
33
|
+
*/
|
|
34
|
+
export interface DocumentCluster {
|
|
35
|
+
id: string;
|
|
36
|
+
document_id: string;
|
|
37
|
+
cluster_id: string | null;
|
|
38
|
+
run_id: string;
|
|
39
|
+
similarity_to_centroid: number;
|
|
40
|
+
membership_probability: number;
|
|
41
|
+
is_noise: boolean;
|
|
42
|
+
assigned_at: string;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Configuration for a clustering run
|
|
46
|
+
*/
|
|
47
|
+
export interface ClusterRunConfig {
|
|
48
|
+
algorithm: 'hdbscan' | 'agglomerative' | 'kmeans';
|
|
49
|
+
n_clusters: number | null;
|
|
50
|
+
min_cluster_size: number;
|
|
51
|
+
distance_threshold: number | null;
|
|
52
|
+
linkage: 'average' | 'complete' | 'single';
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Result of a clustering run
|
|
56
|
+
*/
|
|
57
|
+
export interface ClusterRunResult {
|
|
58
|
+
run_id: string;
|
|
59
|
+
algorithm: string;
|
|
60
|
+
n_clusters: number;
|
|
61
|
+
total_documents: number;
|
|
62
|
+
noise_document_ids: string[];
|
|
63
|
+
silhouette_score: number;
|
|
64
|
+
clusters: ClusterResultItem[];
|
|
65
|
+
processing_duration_ms: number;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* A single cluster within a run result
|
|
69
|
+
*/
|
|
70
|
+
export interface ClusterResultItem {
|
|
71
|
+
cluster_index: number;
|
|
72
|
+
document_count: number;
|
|
73
|
+
coherence_score: number;
|
|
74
|
+
centroid: number[];
|
|
75
|
+
document_ids: string[];
|
|
76
|
+
similarities: number[];
|
|
77
|
+
probabilities: number[];
|
|
78
|
+
}
|
|
79
|
+
//# sourceMappingURL=cluster.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cluster.d.ts","sourceRoot":"","sources":["../../src/models/cluster.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH;;GAEG;AACH,MAAM,WAAW,OAAO;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,aAAa,EAAE,MAAM,CAAC;IACtB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,cAAc,EAAE,MAAM,CAAC;IACvB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,SAAS,EAAE,MAAM,CAAC;IAClB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,sBAAsB,EAAE,MAAM,GAAG,IAAI,CAAC;CACvC;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,sBAAsB,EAAE,MAAM,CAAC;IAC/B,sBAAsB,EAAE,MAAM,CAAC;IAC/B,QAAQ,EAAE,OAAO,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,SAAS,EAAE,SAAS,GAAG,eAAe,GAAG,QAAQ,CAAC;IAClD,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,gBAAgB,EAAE,MAAM,CAAC;IACzB,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,OAAO,EAAE,SAAS,GAAG,UAAU,GAAG,QAAQ,CAAC;CAC5C;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,MAAM,CAAC;IACxB,kBAAkB,EAAE,MAAM,EAAE,CAAC;IAC7B,gBAAgB,EAAE,MAAM,CAAC;IACzB,QAAQ,EAAE,iBAAiB,EAAE,CAAC;IAC9B,sBAAsB,EAAE,MAAM,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,aAAa,EAAE,MAAM,EAAE,CAAC;CACzB"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cluster and Document Clustering interfaces
|
|
3
|
+
*
|
|
4
|
+
* Types for document clustering and auto-classification.
|
|
5
|
+
* Clusters group semantically similar documents using HDBSCAN or agglomerative algorithms.
|
|
6
|
+
*
|
|
7
|
+
* @module models/cluster
|
|
8
|
+
*/
|
|
9
|
+
export {};
|
|
10
|
+
//# sourceMappingURL=cluster.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cluster.js","sourceRoot":"","sources":["../../src/models/cluster.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG"}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Comparison interfaces for OCR Provenance MCP System
|
|
3
|
+
*
|
|
4
|
+
* Types for document comparison (text diff, structural diff).
|
|
5
|
+
* Pure types - no logic.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* A single diff operation (insert, delete, or equal)
|
|
9
|
+
*/
|
|
10
|
+
export interface TextDiffOperation {
|
|
11
|
+
type: 'insert' | 'delete' | 'equal';
|
|
12
|
+
text: string;
|
|
13
|
+
doc1_offset: number;
|
|
14
|
+
doc2_offset: number;
|
|
15
|
+
line_count: number;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Result of comparing two documents' text content
|
|
19
|
+
*/
|
|
20
|
+
export interface TextDiffResult {
|
|
21
|
+
operations: TextDiffOperation[];
|
|
22
|
+
total_operations: number;
|
|
23
|
+
truncated: boolean;
|
|
24
|
+
insertions: number;
|
|
25
|
+
deletions: number;
|
|
26
|
+
unchanged: number;
|
|
27
|
+
similarity_ratio: number;
|
|
28
|
+
doc1_length: number;
|
|
29
|
+
doc2_length: number;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Structural metadata comparison between two documents
|
|
33
|
+
*/
|
|
34
|
+
export interface StructuralDiff {
|
|
35
|
+
doc1_page_count: number | null;
|
|
36
|
+
doc2_page_count: number | null;
|
|
37
|
+
doc1_chunk_count: number;
|
|
38
|
+
doc2_chunk_count: number;
|
|
39
|
+
doc1_text_length: number;
|
|
40
|
+
doc2_text_length: number;
|
|
41
|
+
doc1_quality_score: number | null;
|
|
42
|
+
doc2_quality_score: number | null;
|
|
43
|
+
doc1_ocr_mode: string;
|
|
44
|
+
doc2_ocr_mode: string;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Stored comparison record (maps to comparisons table row)
|
|
48
|
+
*/
|
|
49
|
+
export interface Comparison {
|
|
50
|
+
id: string;
|
|
51
|
+
document_id_1: string;
|
|
52
|
+
document_id_2: string;
|
|
53
|
+
similarity_ratio: number;
|
|
54
|
+
text_diff_json: string;
|
|
55
|
+
structural_diff_json: string;
|
|
56
|
+
summary: string;
|
|
57
|
+
content_hash: string;
|
|
58
|
+
provenance_id: string;
|
|
59
|
+
created_at: string;
|
|
60
|
+
processing_duration_ms: number | null;
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=comparison.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"comparison.d.ts","sourceRoot":"","sources":["../../src/models/comparison.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,QAAQ,GAAG,QAAQ,GAAG,OAAO,CAAC;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,UAAU,EAAE,iBAAiB,EAAE,CAAC;IAChC,gBAAgB,EAAE,MAAM,CAAC;IACzB,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,CAAC;IACzB,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,MAAM,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;IACzB,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,sBAAsB,EAAE,MAAM,GAAG,IAAI,CAAC;CACvC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"comparison.js","sourceRoot":"","sources":["../../src/models/comparison.ts"],"names":[],"mappings":"AAAA;;;;;GAKG"}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document interfaces for OCR Provenance MCP System
|
|
3
|
+
*
|
|
4
|
+
* Represents source documents ingested into the system.
|
|
5
|
+
* Provenance depth: 0 (root of chain)
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Document status throughout processing lifecycle
|
|
9
|
+
*/
|
|
10
|
+
export type DocumentStatus = 'pending' | 'processing' | 'complete' | 'failed';
|
|
11
|
+
/**
|
|
12
|
+
* Supported file types for OCR processing
|
|
13
|
+
*/
|
|
14
|
+
export declare const SUPPORTED_FILE_TYPES: readonly ["pdf", "docx", "doc", "pptx", "ppt", "xlsx", "xls", "png", "jpg", "jpeg", "tiff", "tif", "bmp", "gif", "webp", "txt", "csv", "md"];
|
|
15
|
+
/**
|
|
16
|
+
* Represents a source document ingested into the system
|
|
17
|
+
* Provenance depth: 0 (root of chain)
|
|
18
|
+
*/
|
|
19
|
+
export interface Document {
|
|
20
|
+
/** UUID v4 identifier */
|
|
21
|
+
id: string;
|
|
22
|
+
/** Full absolute path to source file */
|
|
23
|
+
file_path: string;
|
|
24
|
+
/** Original filename */
|
|
25
|
+
file_name: string;
|
|
26
|
+
/** SHA-256 hash of file content (format: 'sha256:...') */
|
|
27
|
+
file_hash: string;
|
|
28
|
+
/** File size in bytes */
|
|
29
|
+
file_size: number;
|
|
30
|
+
/** File type/extension (e.g., 'pdf', 'png', 'docx') */
|
|
31
|
+
file_type: string;
|
|
32
|
+
/** Current processing status */
|
|
33
|
+
status: DocumentStatus;
|
|
34
|
+
/** Number of pages (populated after OCR) */
|
|
35
|
+
page_count: number | null;
|
|
36
|
+
/** Reference to provenance record */
|
|
37
|
+
provenance_id: string;
|
|
38
|
+
/** ISO 8601 timestamp when document was ingested */
|
|
39
|
+
created_at: string;
|
|
40
|
+
/** ISO 8601 timestamp when file was last modified */
|
|
41
|
+
modified_at: string | null;
|
|
42
|
+
/** ISO 8601 timestamp when OCR completed */
|
|
43
|
+
ocr_completed_at: string | null;
|
|
44
|
+
/** Error message if status is 'failed' */
|
|
45
|
+
error_message: string | null;
|
|
46
|
+
/** Document title from metadata */
|
|
47
|
+
doc_title: string | null;
|
|
48
|
+
/** Document author from metadata */
|
|
49
|
+
doc_author: string | null;
|
|
50
|
+
/** Document subject from metadata */
|
|
51
|
+
doc_subject: string | null;
|
|
52
|
+
/** Reference to Datalab uploaded file ID */
|
|
53
|
+
datalab_file_id: string | null;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* OCR result from Datalab processing
|
|
57
|
+
* Provenance depth: 1
|
|
58
|
+
*/
|
|
59
|
+
export interface OCRResult {
|
|
60
|
+
/** UUID v4 identifier */
|
|
61
|
+
id: string;
|
|
62
|
+
/** Reference to provenance record */
|
|
63
|
+
provenance_id: string;
|
|
64
|
+
/** Reference to source document */
|
|
65
|
+
document_id: string;
|
|
66
|
+
/** Extracted text content (Markdown format) */
|
|
67
|
+
extracted_text: string;
|
|
68
|
+
/** Length of extracted text */
|
|
69
|
+
text_length: number;
|
|
70
|
+
/** Datalab API request ID for tracing */
|
|
71
|
+
datalab_request_id: string;
|
|
72
|
+
/** OCR mode used: 'fast', 'balanced', 'accurate' */
|
|
73
|
+
datalab_mode: 'fast' | 'balanced' | 'accurate';
|
|
74
|
+
/** Datalab parse quality score (0-5) */
|
|
75
|
+
parse_quality_score: number | null;
|
|
76
|
+
/** Number of pages processed */
|
|
77
|
+
page_count: number;
|
|
78
|
+
/** Processing cost in cents */
|
|
79
|
+
cost_cents: number | null;
|
|
80
|
+
/** SHA-256 hash of extracted text */
|
|
81
|
+
content_hash: string;
|
|
82
|
+
/** ISO 8601 processing start timestamp */
|
|
83
|
+
processing_started_at: string;
|
|
84
|
+
/** ISO 8601 processing complete timestamp */
|
|
85
|
+
processing_completed_at: string;
|
|
86
|
+
/** Processing duration in milliseconds */
|
|
87
|
+
processing_duration_ms: number;
|
|
88
|
+
/** JSON block hierarchy from Datalab */
|
|
89
|
+
json_blocks?: string | null;
|
|
90
|
+
/** Extras metadata (cost_breakdown, Datalab metadata, etc.) */
|
|
91
|
+
extras_json?: string | null;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Page offset information for tracking text positions
|
|
95
|
+
*/
|
|
96
|
+
export interface PageOffset {
|
|
97
|
+
/** 1-indexed page number */
|
|
98
|
+
page: number;
|
|
99
|
+
/** Character offset where page starts */
|
|
100
|
+
charStart: number;
|
|
101
|
+
/** Character offset where page ends */
|
|
102
|
+
charEnd: number;
|
|
103
|
+
}
|
|
104
|
+
//# sourceMappingURL=document.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/models/document.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;GAEG;AACH,MAAM,MAAM,cAAc,GAAG,SAAS,GAAG,YAAY,GAAG,UAAU,GAAG,QAAQ,CAAC;AAE9E;;GAEG;AACH,eAAO,MAAM,oBAAoB,8IAIvB,CAAC;AAIX;;;GAGG;AACH,MAAM,WAAW,QAAQ;IACvB,yBAAyB;IACzB,EAAE,EAAE,MAAM,CAAC;IAEX,wCAAwC;IACxC,SAAS,EAAE,MAAM,CAAC;IAElB,wBAAwB;IACxB,SAAS,EAAE,MAAM,CAAC;IAElB,0DAA0D;IAC1D,SAAS,EAAE,MAAM,CAAC;IAElB,yBAAyB;IACzB,SAAS,EAAE,MAAM,CAAC;IAElB,uDAAuD;IACvD,SAAS,EAAE,MAAM,CAAC;IAElB,gCAAgC;IAChC,MAAM,EAAE,cAAc,CAAC;IAEvB,4CAA4C;IAC5C,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAE1B,qCAAqC;IACrC,aAAa,EAAE,MAAM,CAAC;IAEtB,oDAAoD;IACpD,UAAU,EAAE,MAAM,CAAC;IAEnB,qDAAqD;IACrD,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAE3B,4CAA4C;IAC5C,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAEhC,0CAA0C;IAC1C,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAE7B,mCAAmC;IACnC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,oCAAoC;IACpC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,qCAAqC;IACrC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAE3B,4CAA4C;IAC5C,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;CAChC;AAED;;;GAGG;AACH,MAAM,WAAW,SAAS;IACxB,yBAAyB;IACzB,EAAE,EAAE,MAAM,CAAC;IAEX,qCAAqC;IACrC,aAAa,EAAE,MAAM,CAAC;IAEtB,mCAAmC;IACnC,WAAW,EAAE,MAAM,CAAC;IAEpB,+CAA+C;IAC/C,cAAc,EAAE,MAAM,CAAC;IAEvB,+BAA+B;IAC/B,WAAW,EAAE,MAAM,CAAC;IAEpB,yCAAyC;IACzC,kBAAkB,EAAE,MAAM,CAAC;IAE3B,oDAAoD;IACpD,YAAY,EAAE,MAAM,GAAG,UAAU,GAAG,UAAU,CAAC;IAE/C,wCAAwC;IACxC,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;IAEnC,gCAAgC;IAChC,UAAU,EAAE,MAAM,CAAC;IAEnB,+BAA+B;IAC/B,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAE1B,qCAAqC;IACrC,YAAY,EAAE,MAAM,CAAC;IAErB,0CAA0C;IAC1C,qBAAqB,EAAE,MAAM,CAAC;IAE9B,6CAA6C;IAC7C,uBAAuB,EAAE,MAAM,CAAC;IAEhC,0CAA0C;IAC1C,sBAAsB,EAAE,MAAM,CAAC;IAE/B,wCAAwC;IACxC,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAE5B,+DAA+D;IAC/D,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,4BAA4B;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,yCAAyC;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAC;CACjB"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document interfaces for OCR Provenance MCP System
|
|
3
|
+
*
|
|
4
|
+
* Represents source documents ingested into the system.
|
|
5
|
+
* Provenance depth: 0 (root of chain)
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Supported file types for OCR processing
|
|
9
|
+
*/
|
|
10
|
+
export const SUPPORTED_FILE_TYPES = [
|
|
11
|
+
'pdf', 'docx', 'doc', 'pptx', 'ppt', 'xlsx', 'xls',
|
|
12
|
+
'png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp', 'gif', 'webp',
|
|
13
|
+
'txt', 'csv', 'md',
|
|
14
|
+
];
|
|
15
|
+
//# sourceMappingURL=document.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document.js","sourceRoot":"","sources":["../../src/models/document.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAOH;;GAEG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAG;IAClC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK;IAClD,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM;IACzD,KAAK,EAAE,KAAK,EAAE,IAAI;CACV,CAAC"}
|