ocr-provenance-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocr-provenance-mcp might be problematic. Click here for more details.
- package/.env.example +55 -0
- package/LICENSE +78 -0
- package/README.md +1154 -0
- package/dist/bin-http.d.ts +24 -0
- package/dist/bin-http.d.ts.map +1 -0
- package/dist/bin-http.js +275 -0
- package/dist/bin-http.js.map +1 -0
- package/dist/bin-setup.d.ts +11 -0
- package/dist/bin-setup.d.ts.map +1 -0
- package/dist/bin-setup.js +610 -0
- package/dist/bin-setup.js.map +1 -0
- package/dist/bin.d.ts +16 -0
- package/dist/bin.d.ts.map +1 -0
- package/dist/bin.js +16 -0
- package/dist/bin.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +90 -0
- package/dist/index.js.map +1 -0
- package/dist/models/chunk.d.ts +136 -0
- package/dist/models/chunk.d.ts.map +1 -0
- package/dist/models/chunk.js +27 -0
- package/dist/models/chunk.js.map +1 -0
- package/dist/models/cluster.d.ts +79 -0
- package/dist/models/cluster.d.ts.map +1 -0
- package/dist/models/cluster.js +10 -0
- package/dist/models/cluster.js.map +1 -0
- package/dist/models/comparison.d.ts +62 -0
- package/dist/models/comparison.d.ts.map +1 -0
- package/dist/models/comparison.js +8 -0
- package/dist/models/comparison.js.map +1 -0
- package/dist/models/document.d.ts +104 -0
- package/dist/models/document.d.ts.map +1 -0
- package/dist/models/document.js +15 -0
- package/dist/models/document.js.map +1 -0
- package/dist/models/embedding.d.ts +87 -0
- package/dist/models/embedding.d.ts.map +1 -0
- package/dist/models/embedding.js +23 -0
- package/dist/models/embedding.js.map +1 -0
- package/dist/models/extraction.d.ts +15 -0
- package/dist/models/extraction.d.ts.map +1 -0
- package/dist/models/extraction.js +2 -0
- package/dist/models/extraction.js.map +1 -0
- package/dist/models/form-fill.d.ts +23 -0
- package/dist/models/form-fill.d.ts.map +1 -0
- package/dist/models/form-fill.js +2 -0
- package/dist/models/form-fill.js.map +1 -0
- package/dist/models/image.d.ts +177 -0
- package/dist/models/image.d.ts.map +1 -0
- package/dist/models/image.js +8 -0
- package/dist/models/image.js.map +1 -0
- package/dist/models/index.d.ts +14 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +22 -0
- package/dist/models/index.js.map +1 -0
- package/dist/models/provenance.d.ts +174 -0
- package/dist/models/provenance.d.ts.map +1 -0
- package/dist/models/provenance.js +53 -0
- package/dist/models/provenance.js.map +1 -0
- package/dist/models/uploaded-file.d.ts +20 -0
- package/dist/models/uploaded-file.d.ts.map +1 -0
- package/dist/models/uploaded-file.js +2 -0
- package/dist/models/uploaded-file.js.map +1 -0
- package/dist/server/errors.d.ts +93 -0
- package/dist/server/errors.d.ts.map +1 -0
- package/dist/server/errors.js +256 -0
- package/dist/server/errors.js.map +1 -0
- package/dist/server/events.d.ts +36 -0
- package/dist/server/events.d.ts.map +1 -0
- package/dist/server/events.js +48 -0
- package/dist/server/events.js.map +1 -0
- package/dist/server/permissions.d.ts +26 -0
- package/dist/server/permissions.d.ts.map +1 -0
- package/dist/server/permissions.js +194 -0
- package/dist/server/permissions.js.map +1 -0
- package/dist/server/register-tools.d.ts +25 -0
- package/dist/server/register-tools.d.ts.map +1 -0
- package/dist/server/register-tools.js +102 -0
- package/dist/server/register-tools.js.map +1 -0
- package/dist/server/startup.d.ts +16 -0
- package/dist/server/startup.d.ts.map +1 -0
- package/dist/server/startup.js +37 -0
- package/dist/server/startup.js.map +1 -0
- package/dist/server/state.d.ts +166 -0
- package/dist/server/state.d.ts.map +1 -0
- package/dist/server/state.js +424 -0
- package/dist/server/state.js.map +1 -0
- package/dist/server/transports/http-transport.d.ts +37 -0
- package/dist/server/transports/http-transport.d.ts.map +1 -0
- package/dist/server/transports/http-transport.js +204 -0
- package/dist/server/transports/http-transport.js.map +1 -0
- package/dist/server/transports/index.d.ts +9 -0
- package/dist/server/transports/index.d.ts.map +1 -0
- package/dist/server/transports/index.js +9 -0
- package/dist/server/transports/index.js.map +1 -0
- package/dist/server/transports/session-manager.d.ts +40 -0
- package/dist/server/transports/session-manager.d.ts.map +1 -0
- package/dist/server/transports/session-manager.js +74 -0
- package/dist/server/transports/session-manager.js.map +1 -0
- package/dist/server/types.d.ts +82 -0
- package/dist/server/types.d.ts.map +1 -0
- package/dist/server/types.js +14 -0
- package/dist/server/types.js.map +1 -0
- package/dist/services/audit.d.ts +26 -0
- package/dist/services/audit.d.ts.map +1 -0
- package/dist/services/audit.js +43 -0
- package/dist/services/audit.js.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.js +46 -0
- package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
- package/dist/services/chunking/chunk-merger.d.ts +26 -0
- package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
- package/dist/services/chunking/chunk-merger.js +94 -0
- package/dist/services/chunking/chunk-merger.js.map +1 -0
- package/dist/services/chunking/chunker.d.ts +62 -0
- package/dist/services/chunking/chunker.d.ts.map +1 -0
- package/dist/services/chunking/chunker.js +566 -0
- package/dist/services/chunking/chunker.js.map +1 -0
- package/dist/services/chunking/heading-normalizer.d.ts +33 -0
- package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/heading-normalizer.js +101 -0
- package/dist/services/chunking/heading-normalizer.js.map +1 -0
- package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
- package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
- package/dist/services/chunking/json-block-analyzer.js +1033 -0
- package/dist/services/chunking/json-block-analyzer.js.map +1 -0
- package/dist/services/chunking/markdown-parser.d.ts +75 -0
- package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
- package/dist/services/chunking/markdown-parser.js +428 -0
- package/dist/services/chunking/markdown-parser.js.map +1 -0
- package/dist/services/chunking/text-normalizer.d.ts +20 -0
- package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/text-normalizer.js +36 -0
- package/dist/services/chunking/text-normalizer.js.map +1 -0
- package/dist/services/clm/contract-schemas.d.ts +36 -0
- package/dist/services/clm/contract-schemas.d.ts.map +1 -0
- package/dist/services/clm/contract-schemas.js +92 -0
- package/dist/services/clm/contract-schemas.js.map +1 -0
- package/dist/services/clm/summarization.d.ts +46 -0
- package/dist/services/clm/summarization.d.ts.map +1 -0
- package/dist/services/clm/summarization.js +61 -0
- package/dist/services/clm/summarization.js.map +1 -0
- package/dist/services/clustering/clustering-service.d.ts +58 -0
- package/dist/services/clustering/clustering-service.d.ts.map +1 -0
- package/dist/services/clustering/clustering-service.js +467 -0
- package/dist/services/clustering/clustering-service.js.map +1 -0
- package/dist/services/comparison/diff-service.d.ts +41 -0
- package/dist/services/comparison/diff-service.d.ts.map +1 -0
- package/dist/services/comparison/diff-service.js +120 -0
- package/dist/services/comparison/diff-service.js.map +1 -0
- package/dist/services/embedding/embedder.d.ts +55 -0
- package/dist/services/embedding/embedder.d.ts.map +1 -0
- package/dist/services/embedding/embedder.js +202 -0
- package/dist/services/embedding/embedder.js.map +1 -0
- package/dist/services/embedding/nomic.d.ts +67 -0
- package/dist/services/embedding/nomic.d.ts.map +1 -0
- package/dist/services/embedding/nomic.js +280 -0
- package/dist/services/embedding/nomic.js.map +1 -0
- package/dist/services/gemini/circuit-breaker.d.ts +106 -0
- package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
- package/dist/services/gemini/circuit-breaker.js +237 -0
- package/dist/services/gemini/circuit-breaker.js.map +1 -0
- package/dist/services/gemini/client.d.ts +173 -0
- package/dist/services/gemini/client.d.ts.map +1 -0
- package/dist/services/gemini/client.js +483 -0
- package/dist/services/gemini/client.js.map +1 -0
- package/dist/services/gemini/config.d.ts +116 -0
- package/dist/services/gemini/config.d.ts.map +1 -0
- package/dist/services/gemini/config.js +118 -0
- package/dist/services/gemini/config.js.map +1 -0
- package/dist/services/gemini/index.d.ts +9 -0
- package/dist/services/gemini/index.d.ts.map +1 -0
- package/dist/services/gemini/index.js +13 -0
- package/dist/services/gemini/index.js.map +1 -0
- package/dist/services/gemini/rate-limiter.d.ts +62 -0
- package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
- package/dist/services/gemini/rate-limiter.js +120 -0
- package/dist/services/gemini/rate-limiter.js.map +1 -0
- package/dist/services/images/extractor.d.ts +88 -0
- package/dist/services/images/extractor.d.ts.map +1 -0
- package/dist/services/images/extractor.js +340 -0
- package/dist/services/images/extractor.js.map +1 -0
- package/dist/services/images/optimizer.d.ts +130 -0
- package/dist/services/images/optimizer.d.ts.map +1 -0
- package/dist/services/images/optimizer.js +228 -0
- package/dist/services/images/optimizer.js.map +1 -0
- package/dist/services/ocr/datalab.d.ts +64 -0
- package/dist/services/ocr/datalab.d.ts.map +1 -0
- package/dist/services/ocr/datalab.js +425 -0
- package/dist/services/ocr/datalab.js.map +1 -0
- package/dist/services/ocr/errors.d.ts +38 -0
- package/dist/services/ocr/errors.d.ts.map +1 -0
- package/dist/services/ocr/errors.js +83 -0
- package/dist/services/ocr/errors.js.map +1 -0
- package/dist/services/ocr/file-manager.d.ts +76 -0
- package/dist/services/ocr/file-manager.d.ts.map +1 -0
- package/dist/services/ocr/file-manager.js +238 -0
- package/dist/services/ocr/file-manager.js.map +1 -0
- package/dist/services/ocr/form-fill.d.ts +48 -0
- package/dist/services/ocr/form-fill.d.ts.map +1 -0
- package/dist/services/ocr/form-fill.js +213 -0
- package/dist/services/ocr/form-fill.js.map +1 -0
- package/dist/services/ocr/processor.d.ts +95 -0
- package/dist/services/ocr/processor.d.ts.map +1 -0
- package/dist/services/ocr/processor.js +259 -0
- package/dist/services/ocr/processor.js.map +1 -0
- package/dist/services/provenance/agent-metadata.d.ts +82 -0
- package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
- package/dist/services/provenance/agent-metadata.js +106 -0
- package/dist/services/provenance/agent-metadata.js.map +1 -0
- package/dist/services/provenance/chain-hash.d.ts +57 -0
- package/dist/services/provenance/chain-hash.d.ts.map +1 -0
- package/dist/services/provenance/chain-hash.js +131 -0
- package/dist/services/provenance/chain-hash.js.map +1 -0
- package/dist/services/provenance/exporter.d.ts +202 -0
- package/dist/services/provenance/exporter.d.ts.map +1 -0
- package/dist/services/provenance/exporter.js +457 -0
- package/dist/services/provenance/exporter.js.map +1 -0
- package/dist/services/provenance/index.d.ts +15 -0
- package/dist/services/provenance/index.d.ts.map +1 -0
- package/dist/services/provenance/index.js +17 -0
- package/dist/services/provenance/index.js.map +1 -0
- package/dist/services/provenance/tracker.d.ts +138 -0
- package/dist/services/provenance/tracker.d.ts.map +1 -0
- package/dist/services/provenance/tracker.js +293 -0
- package/dist/services/provenance/tracker.js.map +1 -0
- package/dist/services/provenance/verifier.d.ts +153 -0
- package/dist/services/provenance/verifier.d.ts.map +1 -0
- package/dist/services/provenance/verifier.js +536 -0
- package/dist/services/provenance/verifier.js.map +1 -0
- package/dist/services/python-pool.d.ts +70 -0
- package/dist/services/python-pool.d.ts.map +1 -0
- package/dist/services/python-pool.js +265 -0
- package/dist/services/python-pool.js.map +1 -0
- package/dist/services/search/bm25.d.ts +180 -0
- package/dist/services/search/bm25.d.ts.map +1 -0
- package/dist/services/search/bm25.js +656 -0
- package/dist/services/search/bm25.js.map +1 -0
- package/dist/services/search/fusion.d.ts +103 -0
- package/dist/services/search/fusion.d.ts.map +1 -0
- package/dist/services/search/fusion.js +122 -0
- package/dist/services/search/fusion.js.map +1 -0
- package/dist/services/search/local-reranker.d.ts +30 -0
- package/dist/services/search/local-reranker.d.ts.map +1 -0
- package/dist/services/search/local-reranker.js +123 -0
- package/dist/services/search/local-reranker.js.map +1 -0
- package/dist/services/search/quality.d.ts +11 -0
- package/dist/services/search/quality.d.ts.map +1 -0
- package/dist/services/search/quality.js +17 -0
- package/dist/services/search/quality.js.map +1 -0
- package/dist/services/search/query-classifier.d.ts +34 -0
- package/dist/services/search/query-classifier.d.ts.map +1 -0
- package/dist/services/search/query-classifier.js +114 -0
- package/dist/services/search/query-classifier.js.map +1 -0
- package/dist/services/search/query-expander.d.ts +73 -0
- package/dist/services/search/query-expander.d.ts.map +1 -0
- package/dist/services/search/query-expander.js +281 -0
- package/dist/services/search/query-expander.js.map +1 -0
- package/dist/services/search/reranker.d.ts +44 -0
- package/dist/services/search/reranker.d.ts.map +1 -0
- package/dist/services/search/reranker.js +101 -0
- package/dist/services/search/reranker.js.map +1 -0
- package/dist/services/storage/database/annotation-operations.d.ts +113 -0
- package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/annotation-operations.js +177 -0
- package/dist/services/storage/database/annotation-operations.js.map +1 -0
- package/dist/services/storage/database/approval-operations.d.ts +132 -0
- package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
- package/dist/services/storage/database/approval-operations.js +206 -0
- package/dist/services/storage/database/approval-operations.js.map +1 -0
- package/dist/services/storage/database/chunk-operations.d.ts +132 -0
- package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
- package/dist/services/storage/database/chunk-operations.js +306 -0
- package/dist/services/storage/database/chunk-operations.js.map +1 -0
- package/dist/services/storage/database/cluster-operations.d.ts +97 -0
- package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
- package/dist/services/storage/database/cluster-operations.js +258 -0
- package/dist/services/storage/database/cluster-operations.js.map +1 -0
- package/dist/services/storage/database/comparison-operations.d.ts +41 -0
- package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
- package/dist/services/storage/database/comparison-operations.js +65 -0
- package/dist/services/storage/database/comparison-operations.js.map +1 -0
- package/dist/services/storage/database/converters.d.ts +36 -0
- package/dist/services/storage/database/converters.d.ts.map +1 -0
- package/dist/services/storage/database/converters.js +244 -0
- package/dist/services/storage/database/converters.js.map +1 -0
- package/dist/services/storage/database/document-operations.d.ts +145 -0
- package/dist/services/storage/database/document-operations.d.ts.map +1 -0
- package/dist/services/storage/database/document-operations.js +498 -0
- package/dist/services/storage/database/document-operations.js.map +1 -0
- package/dist/services/storage/database/embedding-operations.d.ts +130 -0
- package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
- package/dist/services/storage/database/embedding-operations.js +315 -0
- package/dist/services/storage/database/embedding-operations.js.map +1 -0
- package/dist/services/storage/database/extraction-operations.d.ts +47 -0
- package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
- package/dist/services/storage/database/extraction-operations.js +85 -0
- package/dist/services/storage/database/extraction-operations.js.map +1 -0
- package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
- package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
- package/dist/services/storage/database/form-fill-operations.js +116 -0
- package/dist/services/storage/database/form-fill-operations.js.map +1 -0
- package/dist/services/storage/database/helpers.d.ts +29 -0
- package/dist/services/storage/database/helpers.d.ts.map +1 -0
- package/dist/services/storage/database/helpers.js +55 -0
- package/dist/services/storage/database/helpers.js.map +1 -0
- package/dist/services/storage/database/image-operations.d.ts +202 -0
- package/dist/services/storage/database/image-operations.d.ts.map +1 -0
- package/dist/services/storage/database/image-operations.js +484 -0
- package/dist/services/storage/database/image-operations.js.map +1 -0
- package/dist/services/storage/database/index.d.ts +13 -0
- package/dist/services/storage/database/index.d.ts.map +1 -0
- package/dist/services/storage/database/index.js +16 -0
- package/dist/services/storage/database/index.js.map +1 -0
- package/dist/services/storage/database/lock-operations.d.ts +59 -0
- package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
- package/dist/services/storage/database/lock-operations.js +89 -0
- package/dist/services/storage/database/lock-operations.js.map +1 -0
- package/dist/services/storage/database/obligation-operations.d.ts +88 -0
- package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/obligation-operations.js +206 -0
- package/dist/services/storage/database/obligation-operations.js.map +1 -0
- package/dist/services/storage/database/ocr-operations.d.ts +33 -0
- package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
- package/dist/services/storage/database/ocr-operations.js +70 -0
- package/dist/services/storage/database/ocr-operations.js.map +1 -0
- package/dist/services/storage/database/playbook-operations.d.ts +72 -0
- package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
- package/dist/services/storage/database/playbook-operations.js +247 -0
- package/dist/services/storage/database/playbook-operations.js.map +1 -0
- package/dist/services/storage/database/provenance-operations.d.ts +112 -0
- package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
- package/dist/services/storage/database/provenance-operations.js +251 -0
- package/dist/services/storage/database/provenance-operations.js.map +1 -0
- package/dist/services/storage/database/service.d.ts +142 -0
- package/dist/services/storage/database/service.d.ts.map +1 -0
- package/dist/services/storage/database/service.js +310 -0
- package/dist/services/storage/database/service.js.map +1 -0
- package/dist/services/storage/database/static-operations.d.ts +30 -0
- package/dist/services/storage/database/static-operations.d.ts.map +1 -0
- package/dist/services/storage/database/static-operations.js +218 -0
- package/dist/services/storage/database/static-operations.js.map +1 -0
- package/dist/services/storage/database/stats-operations.d.ts +101 -0
- package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
- package/dist/services/storage/database/stats-operations.js +394 -0
- package/dist/services/storage/database/stats-operations.js.map +1 -0
- package/dist/services/storage/database/tag-operations.d.ts +76 -0
- package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
- package/dist/services/storage/database/tag-operations.js +178 -0
- package/dist/services/storage/database/tag-operations.js.map +1 -0
- package/dist/services/storage/database/types.d.ts +286 -0
- package/dist/services/storage/database/types.d.ts.map +1 -0
- package/dist/services/storage/database/types.js +39 -0
- package/dist/services/storage/database/types.js.map +1 -0
- package/dist/services/storage/database/upload-operations.d.ts +71 -0
- package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
- package/dist/services/storage/database/upload-operations.js +124 -0
- package/dist/services/storage/database/upload-operations.js.map +1 -0
- package/dist/services/storage/database/user-operations.d.ts +102 -0
- package/dist/services/storage/database/user-operations.d.ts.map +1 -0
- package/dist/services/storage/database/user-operations.js +151 -0
- package/dist/services/storage/database/user-operations.js.map +1 -0
- package/dist/services/storage/database/workflow-operations.d.ts +98 -0
- package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
- package/dist/services/storage/database/workflow-operations.js +157 -0
- package/dist/services/storage/database/workflow-operations.js.map +1 -0
- package/dist/services/storage/database.d.ts +16 -0
- package/dist/services/storage/database.d.ts.map +1 -0
- package/dist/services/storage/database.js +15 -0
- package/dist/services/storage/database.js.map +1 -0
- package/dist/services/storage/index.d.ts +10 -0
- package/dist/services/storage/index.d.ts.map +1 -0
- package/dist/services/storage/index.js +10 -0
- package/dist/services/storage/index.js.map +1 -0
- package/dist/services/storage/migrations/index.d.ts +16 -0
- package/dist/services/storage/migrations/index.d.ts.map +1 -0
- package/dist/services/storage/migrations/index.js +20 -0
- package/dist/services/storage/migrations/index.js.map +1 -0
- package/dist/services/storage/migrations/operations.d.ts +40 -0
- package/dist/services/storage/migrations/operations.d.ts.map +1 -0
- package/dist/services/storage/migrations/operations.js +2910 -0
- package/dist/services/storage/migrations/operations.js.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.js +1006 -0
- package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.js +176 -0
- package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
- package/dist/services/storage/migrations/types.d.ts +15 -0
- package/dist/services/storage/migrations/types.d.ts.map +1 -0
- package/dist/services/storage/migrations/types.js +21 -0
- package/dist/services/storage/migrations/types.js.map +1 -0
- package/dist/services/storage/migrations/verification.d.ts +20 -0
- package/dist/services/storage/migrations/verification.d.ts.map +1 -0
- package/dist/services/storage/migrations/verification.js +78 -0
- package/dist/services/storage/migrations/verification.js.map +1 -0
- package/dist/services/storage/migrations.d.ts +16 -0
- package/dist/services/storage/migrations.d.ts.map +1 -0
- package/dist/services/storage/migrations.js +17 -0
- package/dist/services/storage/migrations.js.map +1 -0
- package/dist/services/storage/types.d.ts +12 -0
- package/dist/services/storage/types.d.ts.map +1 -0
- package/dist/services/storage/types.js +5 -0
- package/dist/services/storage/types.js.map +1 -0
- package/dist/services/storage/vector.d.ts +208 -0
- package/dist/services/storage/vector.d.ts.map +1 -0
- package/dist/services/storage/vector.js +526 -0
- package/dist/services/storage/vector.js.map +1 -0
- package/dist/services/vlm/pipeline.d.ts +194 -0
- package/dist/services/vlm/pipeline.d.ts.map +1 -0
- package/dist/services/vlm/pipeline.js +800 -0
- package/dist/services/vlm/pipeline.js.map +1 -0
- package/dist/services/vlm/prompts.d.ts +171 -0
- package/dist/services/vlm/prompts.d.ts.map +1 -0
- package/dist/services/vlm/prompts.js +229 -0
- package/dist/services/vlm/prompts.js.map +1 -0
- package/dist/services/vlm/service.d.ts +174 -0
- package/dist/services/vlm/service.d.ts.map +1 -0
- package/dist/services/vlm/service.js +256 -0
- package/dist/services/vlm/service.js.map +1 -0
- package/dist/services/webhook-delivery.d.ts +4 -0
- package/dist/services/webhook-delivery.d.ts.map +1 -0
- package/dist/services/webhook-delivery.js +140 -0
- package/dist/services/webhook-delivery.js.map +1 -0
- package/dist/tools/chunks.d.ts +19 -0
- package/dist/tools/chunks.d.ts.map +1 -0
- package/dist/tools/chunks.js +392 -0
- package/dist/tools/chunks.js.map +1 -0
- package/dist/tools/clm.d.ts +16 -0
- package/dist/tools/clm.d.ts.map +1 -0
- package/dist/tools/clm.js +668 -0
- package/dist/tools/clm.js.map +1 -0
- package/dist/tools/clustering.d.ts +13 -0
- package/dist/tools/clustering.d.ts.map +1 -0
- package/dist/tools/clustering.js +498 -0
- package/dist/tools/clustering.js.map +1 -0
- package/dist/tools/collaboration.d.ts +15 -0
- package/dist/tools/collaboration.d.ts.map +1 -0
- package/dist/tools/collaboration.js +516 -0
- package/dist/tools/collaboration.js.map +1 -0
- package/dist/tools/comparison.d.ts +13 -0
- package/dist/tools/comparison.d.ts.map +1 -0
- package/dist/tools/comparison.js +735 -0
- package/dist/tools/comparison.js.map +1 -0
- package/dist/tools/compliance.d.ts +15 -0
- package/dist/tools/compliance.d.ts.map +1 -0
- package/dist/tools/compliance.js +640 -0
- package/dist/tools/compliance.js.map +1 -0
- package/dist/tools/config.d.ts +19 -0
- package/dist/tools/config.d.ts.map +1 -0
- package/dist/tools/config.js +213 -0
- package/dist/tools/config.js.map +1 -0
- package/dist/tools/database.d.ts +62 -0
- package/dist/tools/database.d.ts.map +1 -0
- package/dist/tools/database.js +288 -0
- package/dist/tools/database.js.map +1 -0
- package/dist/tools/documents.d.ts +61 -0
- package/dist/tools/documents.d.ts.map +1 -0
- package/dist/tools/documents.js +1624 -0
- package/dist/tools/documents.js.map +1 -0
- package/dist/tools/embeddings.d.ts +14 -0
- package/dist/tools/embeddings.d.ts.map +1 -0
- package/dist/tools/embeddings.js +626 -0
- package/dist/tools/embeddings.js.map +1 -0
- package/dist/tools/evaluation.d.ts +25 -0
- package/dist/tools/evaluation.d.ts.map +1 -0
- package/dist/tools/evaluation.js +523 -0
- package/dist/tools/evaluation.js.map +1 -0
- package/dist/tools/events.d.ts +16 -0
- package/dist/tools/events.d.ts.map +1 -0
- package/dist/tools/events.js +493 -0
- package/dist/tools/events.js.map +1 -0
- package/dist/tools/extraction-structured.d.ts +13 -0
- package/dist/tools/extraction-structured.d.ts.map +1 -0
- package/dist/tools/extraction-structured.js +390 -0
- package/dist/tools/extraction-structured.js.map +1 -0
- package/dist/tools/extraction.d.ts +24 -0
- package/dist/tools/extraction.d.ts.map +1 -0
- package/dist/tools/extraction.js +424 -0
- package/dist/tools/extraction.js.map +1 -0
- package/dist/tools/file-management.d.ts +14 -0
- package/dist/tools/file-management.d.ts.map +1 -0
- package/dist/tools/file-management.js +523 -0
- package/dist/tools/file-management.js.map +1 -0
- package/dist/tools/form-fill.d.ts +13 -0
- package/dist/tools/form-fill.d.ts.map +1 -0
- package/dist/tools/form-fill.js +250 -0
- package/dist/tools/form-fill.js.map +1 -0
- package/dist/tools/health.d.ts +19 -0
- package/dist/tools/health.d.ts.map +1 -0
- package/dist/tools/health.js +229 -0
- package/dist/tools/health.js.map +1 -0
- package/dist/tools/images.d.ts +54 -0
- package/dist/tools/images.d.ts.map +1 -0
- package/dist/tools/images.js +787 -0
- package/dist/tools/images.js.map +1 -0
- package/dist/tools/ingestion.d.ts +94 -0
- package/dist/tools/ingestion.d.ts.map +1 -0
- package/dist/tools/ingestion.js +1659 -0
- package/dist/tools/ingestion.js.map +1 -0
- package/dist/tools/intelligence.d.ts +18 -0
- package/dist/tools/intelligence.d.ts.map +1 -0
- package/dist/tools/intelligence.js +1039 -0
- package/dist/tools/intelligence.js.map +1 -0
- package/dist/tools/provenance.d.ts +51 -0
- package/dist/tools/provenance.d.ts.map +1 -0
- package/dist/tools/provenance.js +691 -0
- package/dist/tools/provenance.js.map +1 -0
- package/dist/tools/reports.d.ts +41 -0
- package/dist/tools/reports.d.ts.map +1 -0
- package/dist/tools/reports.js +1394 -0
- package/dist/tools/reports.js.map +1 -0
- package/dist/tools/search.d.ts +35 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +2528 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/shared.d.ts +52 -0
- package/dist/tools/shared.d.ts.map +1 -0
- package/dist/tools/shared.js +54 -0
- package/dist/tools/shared.js.map +1 -0
- package/dist/tools/tags.d.ts +15 -0
- package/dist/tools/tags.d.ts.map +1 -0
- package/dist/tools/tags.js +287 -0
- package/dist/tools/tags.js.map +1 -0
- package/dist/tools/timeline.d.ts +15 -0
- package/dist/tools/timeline.d.ts.map +1 -0
- package/dist/tools/timeline.js +14 -0
- package/dist/tools/timeline.js.map +1 -0
- package/dist/tools/users.d.ts +14 -0
- package/dist/tools/users.d.ts.map +1 -0
- package/dist/tools/users.js +257 -0
- package/dist/tools/users.js.map +1 -0
- package/dist/tools/vlm.d.ts +40 -0
- package/dist/tools/vlm.d.ts.map +1 -0
- package/dist/tools/vlm.js +475 -0
- package/dist/tools/vlm.js.map +1 -0
- package/dist/tools/workflow.d.ts +16 -0
- package/dist/tools/workflow.d.ts.map +1 -0
- package/dist/tools/workflow.js +495 -0
- package/dist/tools/workflow.js.map +1 -0
- package/dist/utils/backoff.d.ts +53 -0
- package/dist/utils/backoff.d.ts.map +1 -0
- package/dist/utils/backoff.js +78 -0
- package/dist/utils/backoff.js.map +1 -0
- package/dist/utils/config-persistence.d.ts +33 -0
- package/dist/utils/config-persistence.d.ts.map +1 -0
- package/dist/utils/config-persistence.js +61 -0
- package/dist/utils/config-persistence.js.map +1 -0
- package/dist/utils/hash.d.ts +65 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +146 -0
- package/dist/utils/hash.js.map +1 -0
- package/dist/utils/math.d.ts +21 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +39 -0
- package/dist/utils/math.js.map +1 -0
- package/dist/utils/validation.d.ts +697 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +529 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +96 -0
- package/python/.gitkeep +0 -0
- package/python/__init__.py +104 -0
- package/python/clustering_worker.py +440 -0
- package/python/docx_image_extractor.py +524 -0
- package/python/embedding_worker.py +552 -0
- package/python/file_manager_worker.py +564 -0
- package/python/form_fill_worker.py +399 -0
- package/python/gpu_utils.py +582 -0
- package/python/image_extractor.py +317 -0
- package/python/image_optimizer.py +444 -0
- package/python/ocr_worker.py +712 -0
- package/python/pyproject.toml +76 -0
- package/python/requirements.txt +51 -0
- package/python/reranker_worker.py +87 -0
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured Extraction MCP Tools
|
|
3
|
+
*
|
|
4
|
+
* Tools for structured data extraction using Datalab page_schema.
|
|
5
|
+
*
|
|
6
|
+
* CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
|
|
7
|
+
* Use console.error() for all logging.
|
|
8
|
+
*
|
|
9
|
+
* @module tools/extraction-structured
|
|
10
|
+
*/
|
|
11
|
+
import path from 'path';
|
|
12
|
+
import { z } from 'zod';
|
|
13
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
14
|
+
import { formatResponse, handleError, fetchProvenanceChain, } from './shared.js';
|
|
15
|
+
import { successResult } from '../server/types.js';
|
|
16
|
+
import { MCPError } from '../server/errors.js';
|
|
17
|
+
import { validateInput } from '../utils/validation.js';
|
|
18
|
+
import { requireDatabase } from '../server/state.js';
|
|
19
|
+
import { DatalabClient } from '../services/ocr/datalab.js';
|
|
20
|
+
import { ProvenanceType } from '../models/provenance.js';
|
|
21
|
+
import { computeHash } from '../utils/hash.js';
|
|
22
|
+
import { getEmbeddingClient, MODEL_NAME, MODEL_VERSION, EMBEDDING_DIM, } from '../services/embedding/nomic.js';
|
|
23
|
+
const ExtractStructuredInput = z.object({
|
|
24
|
+
document_id: z.string().min(1).describe('Document ID (must be OCR processed)'),
|
|
25
|
+
page_schema: z.string().min(1).describe('JSON schema string for structured extraction per page'),
|
|
26
|
+
});
|
|
27
|
+
const ExtractionListInput = z.object({
|
|
28
|
+
document_id: z.string().optional().describe('Document ID to list extractions for'),
|
|
29
|
+
label: z.string().optional().describe('Filter by extraction label'),
|
|
30
|
+
// Search params (when query is provided, search mode is used)
|
|
31
|
+
query: z.string().optional().describe('Search query to match within extraction JSON content'),
|
|
32
|
+
document_filter: z.array(z.string()).optional().describe('Filter by document IDs (search mode)'),
|
|
33
|
+
limit: z.number().min(1).max(100).default(50).describe('Maximum results'),
|
|
34
|
+
include_provenance: z.boolean().default(false).describe('Include provenance chain'),
|
|
35
|
+
});
|
|
36
|
+
const ExtractionGetInput = z.object({
|
|
37
|
+
extraction_id: z.string().min(1).describe('Extraction ID to retrieve'),
|
|
38
|
+
include_provenance: z.boolean().default(false).describe('Include provenance chain'),
|
|
39
|
+
});
|
|
40
|
+
async function handleExtractStructured(params) {
|
|
41
|
+
try {
|
|
42
|
+
const input = validateInput(ExtractStructuredInput, params);
|
|
43
|
+
const { db, vector } = requireDatabase();
|
|
44
|
+
// Get document - must exist and be OCR processed
|
|
45
|
+
const doc = db.getDocument(input.document_id);
|
|
46
|
+
if (!doc) {
|
|
47
|
+
throw new Error(`Document not found: ${input.document_id}`);
|
|
48
|
+
}
|
|
49
|
+
if (doc.status !== 'complete') {
|
|
50
|
+
throw new Error(`Document not OCR processed yet (status: ${doc.status}). Run ocr_process_pending first.`);
|
|
51
|
+
}
|
|
52
|
+
// Get the OCR result for provenance chaining
|
|
53
|
+
const ocrResult = db.getOCRResultByDocumentId(doc.id);
|
|
54
|
+
if (!ocrResult) {
|
|
55
|
+
throw new Error(`No OCR result found for document ${doc.id}`);
|
|
56
|
+
}
|
|
57
|
+
// Call Datalab with page_schema to get structured extraction
|
|
58
|
+
const client = new DatalabClient();
|
|
59
|
+
const tempProvId = uuidv4();
|
|
60
|
+
const response = await client.processDocument(doc.file_path, doc.id, tempProvId, 'accurate', {
|
|
61
|
+
pageSchema: input.page_schema,
|
|
62
|
+
});
|
|
63
|
+
if (!response.extractionJson) {
|
|
64
|
+
throw new Error('No extraction data returned. Verify page_schema is valid JSON schema.');
|
|
65
|
+
}
|
|
66
|
+
// Store extraction with provenance
|
|
67
|
+
const extractionContent = JSON.stringify(response.extractionJson);
|
|
68
|
+
const extractionHash = computeHash(extractionContent);
|
|
69
|
+
const extractionProvId = uuidv4();
|
|
70
|
+
const now = new Date().toISOString();
|
|
71
|
+
// Create EXTRACTION provenance
|
|
72
|
+
db.insertProvenance({
|
|
73
|
+
id: extractionProvId,
|
|
74
|
+
type: ProvenanceType.EXTRACTION,
|
|
75
|
+
created_at: now,
|
|
76
|
+
processed_at: now,
|
|
77
|
+
source_file_created_at: null,
|
|
78
|
+
source_file_modified_at: null,
|
|
79
|
+
source_type: 'EXTRACTION',
|
|
80
|
+
source_path: doc.file_path,
|
|
81
|
+
source_id: ocrResult.provenance_id,
|
|
82
|
+
root_document_id: doc.provenance_id,
|
|
83
|
+
location: null,
|
|
84
|
+
content_hash: extractionHash,
|
|
85
|
+
input_hash: ocrResult.content_hash,
|
|
86
|
+
file_hash: doc.file_hash,
|
|
87
|
+
processor: 'datalab-extraction',
|
|
88
|
+
processor_version: '1.0.0',
|
|
89
|
+
processing_params: { page_schema: input.page_schema },
|
|
90
|
+
processing_duration_ms: null,
|
|
91
|
+
processing_quality_score: null,
|
|
92
|
+
parent_id: ocrResult.provenance_id,
|
|
93
|
+
parent_ids: JSON.stringify([doc.provenance_id, ocrResult.provenance_id]),
|
|
94
|
+
chain_depth: 2,
|
|
95
|
+
chain_path: JSON.stringify(['DOCUMENT', 'OCR_RESULT', 'EXTRACTION']),
|
|
96
|
+
});
|
|
97
|
+
const extractionId = uuidv4();
|
|
98
|
+
db.insertExtraction({
|
|
99
|
+
id: extractionId,
|
|
100
|
+
document_id: doc.id,
|
|
101
|
+
ocr_result_id: ocrResult.id,
|
|
102
|
+
schema_json: input.page_schema,
|
|
103
|
+
extraction_json: extractionContent,
|
|
104
|
+
content_hash: extractionHash,
|
|
105
|
+
provenance_id: extractionProvId,
|
|
106
|
+
created_at: now,
|
|
107
|
+
});
|
|
108
|
+
// Generate embedding for extraction content (semantic search)
|
|
109
|
+
// Provenance chain: DOCUMENT(0) -> OCR_RESULT(1) -> EXTRACTION(2) -> EMBEDDING(3)
|
|
110
|
+
const warnings = [];
|
|
111
|
+
let embeddingId = null;
|
|
112
|
+
let embeddingProvId = null;
|
|
113
|
+
try {
|
|
114
|
+
const embeddingClient = getEmbeddingClient();
|
|
115
|
+
const vectors = await embeddingClient.embedChunks([extractionContent], 1);
|
|
116
|
+
if (vectors.length === 0) {
|
|
117
|
+
throw new Error('Embedding generation returned empty result');
|
|
118
|
+
}
|
|
119
|
+
embeddingId = uuidv4();
|
|
120
|
+
embeddingProvId = uuidv4();
|
|
121
|
+
// EMBEDDING provenance (depth 3, parent = EXTRACTION)
|
|
122
|
+
db.insertProvenance({
|
|
123
|
+
id: embeddingProvId,
|
|
124
|
+
type: ProvenanceType.EMBEDDING,
|
|
125
|
+
created_at: now,
|
|
126
|
+
processed_at: now,
|
|
127
|
+
source_file_created_at: null,
|
|
128
|
+
source_file_modified_at: null,
|
|
129
|
+
source_type: 'EMBEDDING',
|
|
130
|
+
source_path: doc.file_path,
|
|
131
|
+
source_id: extractionProvId,
|
|
132
|
+
root_document_id: doc.provenance_id,
|
|
133
|
+
location: null,
|
|
134
|
+
content_hash: extractionHash,
|
|
135
|
+
input_hash: extractionHash,
|
|
136
|
+
file_hash: doc.file_hash,
|
|
137
|
+
processor: MODEL_NAME,
|
|
138
|
+
processor_version: MODEL_VERSION,
|
|
139
|
+
processing_params: { task_type: 'search_document', dimensions: EMBEDDING_DIM },
|
|
140
|
+
processing_duration_ms: null,
|
|
141
|
+
processing_quality_score: null,
|
|
142
|
+
parent_id: extractionProvId,
|
|
143
|
+
parent_ids: JSON.stringify([doc.provenance_id, ocrResult.provenance_id, extractionProvId]),
|
|
144
|
+
chain_depth: 3,
|
|
145
|
+
chain_path: JSON.stringify(['DOCUMENT', 'OCR_RESULT', 'EXTRACTION', 'EMBEDDING']),
|
|
146
|
+
});
|
|
147
|
+
// Insert embedding record
|
|
148
|
+
db.insertEmbedding({
|
|
149
|
+
id: embeddingId,
|
|
150
|
+
chunk_id: null,
|
|
151
|
+
image_id: null,
|
|
152
|
+
extraction_id: extractionId,
|
|
153
|
+
document_id: doc.id,
|
|
154
|
+
original_text: extractionContent,
|
|
155
|
+
original_text_length: extractionContent.length,
|
|
156
|
+
source_file_path: doc.file_path,
|
|
157
|
+
source_file_name: path.basename(doc.file_path),
|
|
158
|
+
source_file_hash: doc.file_hash,
|
|
159
|
+
page_number: null,
|
|
160
|
+
page_range: null,
|
|
161
|
+
character_start: 0,
|
|
162
|
+
character_end: extractionContent.length,
|
|
163
|
+
chunk_index: 0,
|
|
164
|
+
total_chunks: 1,
|
|
165
|
+
model_name: MODEL_NAME,
|
|
166
|
+
model_version: MODEL_VERSION,
|
|
167
|
+
task_type: 'search_document',
|
|
168
|
+
inference_mode: 'local',
|
|
169
|
+
gpu_device: 'cuda:0',
|
|
170
|
+
provenance_id: embeddingProvId,
|
|
171
|
+
content_hash: extractionHash,
|
|
172
|
+
generation_duration_ms: null,
|
|
173
|
+
});
|
|
174
|
+
// Store vector in vec_embeddings
|
|
175
|
+
vector.storeVector(embeddingId, vectors[0]);
|
|
176
|
+
}
|
|
177
|
+
catch (embError) {
|
|
178
|
+
const errMsg = embError instanceof Error ? embError.message : String(embError);
|
|
179
|
+
console.error(`[WARN] Extraction embedding generation failed for extraction ${extractionId}: ${errMsg}`);
|
|
180
|
+
warnings.push(`Embedding generation failed: ${errMsg}. Extraction stored but not semantically searchable.`);
|
|
181
|
+
embeddingId = null;
|
|
182
|
+
embeddingProvId = null;
|
|
183
|
+
}
|
|
184
|
+
// Echo the schema back (parse to object if valid JSON, keep as string otherwise)
|
|
185
|
+
let parsedSchema = input.page_schema;
|
|
186
|
+
try {
|
|
187
|
+
parsedSchema = JSON.parse(input.page_schema);
|
|
188
|
+
}
|
|
189
|
+
catch (error) {
|
|
190
|
+
console.error('[extraction-structured] page_schema JSON parse failed, keeping as string:', error instanceof Error ? error.message : String(error));
|
|
191
|
+
/* keep as string */
|
|
192
|
+
}
|
|
193
|
+
return formatResponse(successResult({
|
|
194
|
+
extraction_id: extractionId,
|
|
195
|
+
document_id: doc.id,
|
|
196
|
+
schema_json: parsedSchema,
|
|
197
|
+
extraction_data: response.extractionJson,
|
|
198
|
+
content_hash: extractionHash,
|
|
199
|
+
provenance_id: extractionProvId,
|
|
200
|
+
embedding_id: embeddingId,
|
|
201
|
+
embedding_provenance_id: embeddingProvId,
|
|
202
|
+
cost_note: 'This call triggered a full re-OCR at standard Datalab cost. To avoid repeated costs, pass page_schema during ocr_process_pending instead.',
|
|
203
|
+
...(warnings.length > 0 ? { warnings } : {}),
|
|
204
|
+
next_steps: [{ tool: 'ocr_extraction_list', description: 'List all extractions for the document' }, { tool: 'ocr_extraction_get', description: 'View the extraction results in detail' }],
|
|
205
|
+
}));
|
|
206
|
+
}
|
|
207
|
+
catch (error) {
|
|
208
|
+
return handleError(error);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
async function handleExtractionList(params) {
|
|
212
|
+
try {
|
|
213
|
+
const input = validateInput(ExtractionListInput, params);
|
|
214
|
+
const { db } = requireDatabase();
|
|
215
|
+
if (input.query) {
|
|
216
|
+
// ── Search mode: query extraction JSON content ──
|
|
217
|
+
const results = db.searchExtractions(input.query, {
|
|
218
|
+
document_filter: input.document_filter,
|
|
219
|
+
limit: input.limit,
|
|
220
|
+
});
|
|
221
|
+
const enrichedResults = results.map((ext) => {
|
|
222
|
+
const doc = db.getDocument(ext.document_id);
|
|
223
|
+
let parsedExtractionJson;
|
|
224
|
+
try {
|
|
225
|
+
parsedExtractionJson = JSON.parse(ext.extraction_json);
|
|
226
|
+
}
|
|
227
|
+
catch (error) {
|
|
228
|
+
console.error(`[extraction-structured] Failed to parse extraction_json for extraction ${ext.id}: ${error instanceof Error ? error.message : String(error)}`);
|
|
229
|
+
parsedExtractionJson = ext.extraction_json;
|
|
230
|
+
}
|
|
231
|
+
let parsedSchemaJson;
|
|
232
|
+
try {
|
|
233
|
+
parsedSchemaJson = JSON.parse(ext.schema_json);
|
|
234
|
+
}
|
|
235
|
+
catch (error) {
|
|
236
|
+
console.error(`[extraction-structured] Failed to parse schema_json for extraction ${ext.id}: ${error instanceof Error ? error.message : String(error)}`);
|
|
237
|
+
parsedSchemaJson = ext.schema_json;
|
|
238
|
+
}
|
|
239
|
+
const provenanceChain = input.include_provenance
|
|
240
|
+
? fetchProvenanceChain(db, ext.provenance_id, '[extraction-search]')
|
|
241
|
+
: undefined;
|
|
242
|
+
return {
|
|
243
|
+
id: ext.id,
|
|
244
|
+
document_id: ext.document_id,
|
|
245
|
+
document_file_path: doc?.file_path ?? null,
|
|
246
|
+
document_file_name: doc?.file_name ?? null,
|
|
247
|
+
schema_json: parsedSchemaJson,
|
|
248
|
+
extraction_json: parsedExtractionJson,
|
|
249
|
+
content_hash: ext.content_hash,
|
|
250
|
+
provenance_id: ext.provenance_id,
|
|
251
|
+
created_at: ext.created_at,
|
|
252
|
+
provenance_chain: provenanceChain,
|
|
253
|
+
};
|
|
254
|
+
});
|
|
255
|
+
return formatResponse(successResult({
|
|
256
|
+
mode: 'search',
|
|
257
|
+
query: input.query,
|
|
258
|
+
total: enrichedResults.length,
|
|
259
|
+
results: enrichedResults,
|
|
260
|
+
next_steps: [
|
|
261
|
+
{ tool: 'ocr_extraction_get', description: 'View a specific matched extraction' },
|
|
262
|
+
{ tool: 'ocr_extract_structured', description: 'Run a new extraction with different schema' },
|
|
263
|
+
],
|
|
264
|
+
}));
|
|
265
|
+
}
|
|
266
|
+
else {
|
|
267
|
+
// ── List mode: list extractions for a document ──
|
|
268
|
+
if (!input.document_id) {
|
|
269
|
+
throw new MCPError('VALIDATION_ERROR', 'Provide document_id for listing or query for searching', {});
|
|
270
|
+
}
|
|
271
|
+
const extractions = db.getExtractionsByDocument(input.document_id);
|
|
272
|
+
return formatResponse(successResult({
|
|
273
|
+
mode: 'list',
|
|
274
|
+
document_id: input.document_id,
|
|
275
|
+
total: extractions.length,
|
|
276
|
+
extractions: extractions.map((ext) => {
|
|
277
|
+
const provenanceChain = input.include_provenance
|
|
278
|
+
? fetchProvenanceChain(db, ext.provenance_id, '[extraction-list]')
|
|
279
|
+
: undefined;
|
|
280
|
+
let parsedExtractionJson;
|
|
281
|
+
try {
|
|
282
|
+
parsedExtractionJson = JSON.parse(ext.extraction_json);
|
|
283
|
+
}
|
|
284
|
+
catch (error) {
|
|
285
|
+
console.error(`[extraction-structured] Failed to parse extraction_json for extraction ${ext.id}: ${error instanceof Error ? error.message : String(error)}`);
|
|
286
|
+
parsedExtractionJson = ext.extraction_json;
|
|
287
|
+
}
|
|
288
|
+
let parsedSchemaJson;
|
|
289
|
+
try {
|
|
290
|
+
parsedSchemaJson = JSON.parse(ext.schema_json);
|
|
291
|
+
}
|
|
292
|
+
catch (error) {
|
|
293
|
+
console.error(`[extraction-structured] Failed to parse schema_json for extraction ${ext.id}: ${error instanceof Error ? error.message : String(error)}`);
|
|
294
|
+
parsedSchemaJson = ext.schema_json;
|
|
295
|
+
}
|
|
296
|
+
return {
|
|
297
|
+
id: ext.id,
|
|
298
|
+
schema_json: parsedSchemaJson,
|
|
299
|
+
extraction_json: parsedExtractionJson,
|
|
300
|
+
content_hash: ext.content_hash,
|
|
301
|
+
provenance_id: ext.provenance_id,
|
|
302
|
+
created_at: ext.created_at,
|
|
303
|
+
provenance_chain: provenanceChain,
|
|
304
|
+
};
|
|
305
|
+
}),
|
|
306
|
+
next_steps: [
|
|
307
|
+
{ tool: 'ocr_extraction_get', description: 'View a specific extraction in detail' },
|
|
308
|
+
{ tool: 'ocr_extract_structured', description: 'Run a new structured extraction' },
|
|
309
|
+
],
|
|
310
|
+
}));
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
catch (error) {
|
|
314
|
+
return handleError(error);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
async function handleExtractionGet(params) {
|
|
318
|
+
try {
|
|
319
|
+
const input = validateInput(ExtractionGetInput, params);
|
|
320
|
+
const { db } = requireDatabase();
|
|
321
|
+
const extraction = db.getExtraction(input.extraction_id);
|
|
322
|
+
if (!extraction) {
|
|
323
|
+
throw new Error(`Extraction not found: ${input.extraction_id}`);
|
|
324
|
+
}
|
|
325
|
+
// Get document for context
|
|
326
|
+
const doc = db.getDocument(extraction.document_id);
|
|
327
|
+
// Check if an embedding exists for this extraction
|
|
328
|
+
const embedding = db.getEmbeddingByExtractionId(extraction.id);
|
|
329
|
+
const hasEmbedding = embedding !== null;
|
|
330
|
+
// Parse the stored JSON string back to object
|
|
331
|
+
let parsedExtractionJson;
|
|
332
|
+
try {
|
|
333
|
+
parsedExtractionJson = JSON.parse(extraction.extraction_json);
|
|
334
|
+
}
|
|
335
|
+
catch (error) {
|
|
336
|
+
console.error(`[extraction-structured] Failed to parse extraction_json for extraction ${extraction.id}: ${error instanceof Error ? error.message : String(error)}`);
|
|
337
|
+
parsedExtractionJson = extraction.extraction_json;
|
|
338
|
+
}
|
|
339
|
+
// Parse schema_json
|
|
340
|
+
let parsedSchemaJson;
|
|
341
|
+
try {
|
|
342
|
+
parsedSchemaJson = JSON.parse(extraction.schema_json);
|
|
343
|
+
}
|
|
344
|
+
catch (error) {
|
|
345
|
+
console.error(`[extraction-structured] Failed to parse schema_json for extraction ${extraction.id}: ${error instanceof Error ? error.message : String(error)}`);
|
|
346
|
+
parsedSchemaJson = extraction.schema_json;
|
|
347
|
+
}
|
|
348
|
+
// Optionally fetch provenance chain
|
|
349
|
+
const provenanceChain = input.include_provenance
|
|
350
|
+
? fetchProvenanceChain(db, extraction.provenance_id, '[extraction-get]')
|
|
351
|
+
: undefined;
|
|
352
|
+
return formatResponse(successResult({
|
|
353
|
+
id: extraction.id,
|
|
354
|
+
document_id: extraction.document_id,
|
|
355
|
+
document_file_path: doc?.file_path ?? null,
|
|
356
|
+
document_file_name: doc?.file_name ?? null,
|
|
357
|
+
ocr_result_id: extraction.ocr_result_id,
|
|
358
|
+
schema_json: parsedSchemaJson,
|
|
359
|
+
extraction_json: parsedExtractionJson,
|
|
360
|
+
content_hash: extraction.content_hash,
|
|
361
|
+
provenance_id: extraction.provenance_id,
|
|
362
|
+
created_at: extraction.created_at,
|
|
363
|
+
has_embedding: hasEmbedding,
|
|
364
|
+
embedding_id: embedding?.id ?? null,
|
|
365
|
+
provenance_chain: provenanceChain,
|
|
366
|
+
next_steps: [{ tool: 'ocr_extraction_list', description: 'Search across all extractions (pass query param)' }, { tool: 'ocr_document_get', description: 'View the source document' }],
|
|
367
|
+
}));
|
|
368
|
+
}
|
|
369
|
+
catch (error) {
|
|
370
|
+
return handleError(error);
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
export const structuredExtractionTools = {
|
|
374
|
+
ocr_extract_structured: {
|
|
375
|
+
description: '[PROCESSING] Extract custom structured data from pages via JSON page_schema. WARNING: Triggers a full re-OCR at standard cost (~$6/1000 pages) because page_schema requires re-processing. To avoid repeated costs, pass page_schema during initial ocr_process_pending instead. Use ocr_form_fill for key-value forms. Requires status "complete".',
|
|
376
|
+
inputSchema: ExtractStructuredInput.shape,
|
|
377
|
+
handler: handleExtractStructured,
|
|
378
|
+
},
|
|
379
|
+
ocr_extraction_list: {
|
|
380
|
+
description: '[STATUS] Use to list or search structured extractions. Filter by document_id, or search by query across all extractions.',
|
|
381
|
+
inputSchema: ExtractionListInput.shape,
|
|
382
|
+
handler: handleExtractionList,
|
|
383
|
+
},
|
|
384
|
+
ocr_extraction_get: {
|
|
385
|
+
description: '[STATUS] Use to retrieve full results of a specific structured extraction by ID. Returns parsed extraction JSON, schema, embedding status, and optional provenance chain.',
|
|
386
|
+
inputSchema: ExtractionGetInput.shape,
|
|
387
|
+
handler: handleExtractionGet,
|
|
388
|
+
},
|
|
389
|
+
};
|
|
390
|
+
//# sourceMappingURL=extraction-structured.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extraction-structured.js","sourceRoot":"","sources":["../../src/tools/extraction-structured.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,EAAE,IAAI,MAAM,EAAE,MAAM,MAAM,CAAC;AACpC,OAAO,EACL,cAAc,EACd,WAAW,EACX,oBAAoB,GAGrB,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AAC/C,OAAO,EAAE,aAAa,EAAE,MAAM,wBAAwB,CAAC;AACvD,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EACL,kBAAkB,EAClB,UAAU,EACV,aAAa,EACb,aAAa,GACd,MAAM,gCAAgC,CAAC;AAExC,MAAM,sBAAsB,GAAG,CAAC,CAAC,MAAM,CAAC;IACtC,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,qCAAqC,CAAC;IAC9E,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,uDAAuD,CAAC;CACjG,CAAC,CAAC;AAEH,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;IACnC,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qCAAqC,CAAC;IAClF,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,4BAA4B,CAAC;IACnE,8DAA8D;IAC9D,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,sDAAsD,CAAC;IAC7F,eAAe,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,sCAAsC,CAAC;IAChG,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,iBAAiB,CAAC;IACzE,kBAAkB,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,QAAQ,CAAC,0BAA0B,CAAC;CACpF,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,aAAa,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,2BAA2B,CAAC;IACtE,kBAAkB,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,QAAQ,CAAC,0BAA0B,CAAC;CACpF,CAAC,CAAC;AAEH,KAAK,UAAU,uBAAuB,CAAC,MAA+B;IACpE,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,aAAa,CAAC,sBAAsB,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,GAAG,eAAe,EAAE,CAAC;QAEzC,iDAAiD;QACjD,MAAM,GAAG,GAAG,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QAC9C,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,MAAM,IAAI,KAAK,CAAC,uBAAuB,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;QAC9D,CAAC;QACD,IAAI,GAAG,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YAC9B,MAAM,IAAI,KAAK,CACb,2CAA2C,GAAG,CAAC,MAAM,mCAAmC,CACzF,CAAC;QACJ,CAAC;QAED,6CAA6C;QAC7C,MAAM,SAAS,GAAG,EAAE,CAAC,wBAAwB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACtD,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,oCAAoC,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;QAChE,CAAC;QAED,6DAA6D;QAC7D,MAAM,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;QAEnC,MAAM,UAAU,GAAG,MAAM,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,GAAG,CAAC,SAAS,EAAE,GAAG,CAAC,EAAE,EAAE,UAAU,EAAE,UAAU,EAAE;YAC3F,UAAU,EAAE,KAAK,CAAC,WAAW;SAC9B,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,cAAc,EAAE,CAAC;YAC7B,MAAM,IAAI,KAAK,CAAC,uEAAuE,CAAC,CAAC;QAC3F,CAAC;QAED,mCAAmC;QACnC,MAAM,iBAAiB,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC;QAClE,MAAM,cAAc,GAAG,WAAW,CAAC,iBAAiB,CAAC,CAAC;QACtD,MAAM,gBAAgB,GAAG,MAAM,EAAE,CAAC;QAClC,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAErC,+BAA+B;QAC/B,EAAE,CAAC,gBAAgB,CAAC;YAClB,EAAE,EAAE,gBAAgB;YACpB,IAAI,EAAE,cAAc,CAAC,UAAU;YAC/B,UAAU,EAAE,GAAG;YACf,YAAY,EAAE,GAAG;YACjB,sBAAsB,EAAE,IAAI;YAC5B,uBAAuB,EAAE,IAAI;YAC7B,WAAW,EAAE,YAAY;YACzB,WAAW,EAAE,GAAG,CAAC,SAAS;YAC1B,SAAS,EAAE,SAAS,CAAC,aAAa;YAClC,gBAAgB,EAAE,GAAG,CAAC,aAAa;YACnC,QAAQ,EAAE,IAAI;YACd,YAAY,EAAE,cAAc;YAC5B,UAAU,EAAE,SAAS,CAAC,YAAY;YAClC,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,SAAS,EAAE,oBAAoB;YAC/B,iBAAiB,EAAE,OAAO;YAC1B,iBAAiB,EAAE,EAAE,WAAW,EAAE,KAAK,CAAC,WAAW,EAAE;YACrD,sBAAsB,EAAE,IAAI;YAC5B,wBAAwB,EAAE,IAAI;YAC9B,SAAS,EAAE,SAAS,CAAC,aAAa;YAClC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,GAAG,CAAC,aAAa,EAAE,SAAS,CAAC,aAAa,CAAC,CAAC;YACxE,WAAW,EAAE,CAAC;YACd,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,UAAU,EAAE,YAAY,EAAE,YAAY,CAAC,CAAC;SACrE,CAAC,CAAC;QAEH,MAAM,YAAY,GAAG,MAAM,EAAE,CAAC;QAC9B,EAAE,CAAC,gBAAgB,CAAC;YAClB,EAAE,EAAE,YAAY;YAChB,WAAW,EAAE,GAAG,CAAC,EAAE;YACnB,aAAa,EAAE,SAAS,CAAC,EAAE;YAC3B,WAAW,EAAE,KAAK,CAAC,WAAW;YAC9B,eAAe,EAAE,iBAAiB;YAClC,YAAY,EAAE,cAAc;YAC5B,aAAa,EAAE,gBAAgB;YAC/B,UAAU,EAAE,GAAG;SAChB,CAAC,CAAC;QAEH,8DAA8D;QAC9D,kFAAkF;QAClF,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,IAAI,WAAW,GAAkB,IAAI,CAAC;QACtC,IAAI,eAAe,GAAkB,IAAI,CAAC;QAC1C,IAAI,CAAC;YACH,MAAM,eAAe,GAAG,kBAAkB,EAAE,CAAC;YAC7C,MAAM,OAAO,GAAG,MAAM,eAAe,CAAC,WAAW,CAAC,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC,CAAC;YAE1E,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACzB,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;YAChE,CAAC;YAED,WAAW,GAAG,MAAM,EAAE,CAAC;YACvB,eAAe,GAAG,MAAM,EAAE,CAAC;YAE3B,sDAAsD;YACtD,EAAE,CAAC,gBAAgB,CAAC;gBAClB,EAAE,EAAE,eAAe;gBACnB,IAAI,EAAE,cAAc,CAAC,SAAS;gBAC9B,UAAU,EAAE,GAAG;gBACf,YAAY,EAAE,GAAG;gBACjB,sBAAsB,EAAE,IAAI;gBAC5B,uBAAuB,EAAE,IAAI;gBAC7B,WAAW,EAAE,WAAW;gBACxB,WAAW,EAAE,GAAG,CAAC,SAAS;gBAC1B,SAAS,EAAE,gBAAgB;gBAC3B,gBAAgB,EAAE,GAAG,CAAC,aAAa;gBACnC,QAAQ,EAAE,IAAI;gBACd,YAAY,EAAE,cAAc;gBAC5B,UAAU,EAAE,cAAc;gBAC1B,SAAS,EAAE,GAAG,CAAC,SAAS;gBACxB,SAAS,EAAE,UAAU;gBACrB,iBAAiB,EAAE,aAAa;gBAChC,iBAAiB,EAAE,EAAE,SAAS,EAAE,iBAAiB,EAAE,UAAU,EAAE,aAAa,EAAE;gBAC9E,sBAAsB,EAAE,IAAI;gBAC5B,wBAAwB,EAAE,IAAI;gBAC9B,SAAS,EAAE,gBAAgB;gBAC3B,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,GAAG,CAAC,aAAa,EAAE,SAAS,CAAC,aAAa,EAAE,gBAAgB,CAAC,CAAC;gBAC1F,WAAW,EAAE,CAAC;gBACd,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,UAAU,EAAE,YAAY,EAAE,YAAY,EAAE,WAAW,CAAC,CAAC;aAClF,CAAC,CAAC;YAEH,0BAA0B;YAC1B,EAAE,CAAC,eAAe,CAAC;gBACjB,EAAE,EAAE,WAAW;gBACf,QAAQ,EAAE,IAAI;gBACd,QAAQ,EAAE,IAAI;gBACd,aAAa,EAAE,YAAY;gBAC3B,WAAW,EAAE,GAAG,CAAC,EAAE;gBACnB,aAAa,EAAE,iBAAiB;gBAChC,oBAAoB,EAAE,iBAAiB,CAAC,MAAM;gBAC9C,gBAAgB,EAAE,GAAG,CAAC,SAAS;gBAC/B,gBAAgB,EAAE,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC;gBAC9C,gBAAgB,EAAE,GAAG,CAAC,SAAS;gBAC/B,WAAW,EAAE,IAAI;gBACjB,UAAU,EAAE,IAAI;gBAChB,eAAe,EAAE,CAAC;gBAClB,aAAa,EAAE,iBAAiB,CAAC,MAAM;gBACvC,WAAW,EAAE,CAAC;gBACd,YAAY,EAAE,CAAC;gBACf,UAAU,EAAE,UAAU;gBACtB,aAAa,EAAE,aAAa;gBAC5B,SAAS,EAAE,iBAAiB;gBAC5B,cAAc,EAAE,OAAO;gBACvB,UAAU,EAAE,QAAQ;gBACpB,aAAa,EAAE,eAAe;gBAC9B,YAAY,EAAE,cAAc;gBAC5B,sBAAsB,EAAE,IAAI;aAC7B,CAAC,CAAC;YAEH,iCAAiC;YACjC,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9C,CAAC;QAAC,OAAO,QAAQ,EAAE,CAAC;YAClB,MAAM,MAAM,GAAG,QAAQ,YAAY,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YAC/E,OAAO,CAAC,KAAK,CACX,gEAAgE,YAAY,KAAK,MAAM,EAAE,CAC1F,CAAC;YACF,QAAQ,CAAC,IAAI,CAAC,gCAAgC,MAAM,sDAAsD,CAAC,CAAC;YAC5G,WAAW,GAAG,IAAI,CAAC;YACnB,eAAe,GAAG,IAAI,CAAC;QACzB,CAAC;QAED,iFAAiF;QACjF,IAAI,YAAY,GAAY,KAAK,CAAC,WAAW,CAAC;QAC9C,IAAI,CAAC;YACH,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QAC/C,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CACX,2EAA2E,EAC3E,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CACvD,CAAC;YACF,oBAAoB;QACtB,CAAC;QAED,OAAO,cAAc,CACnB,aAAa,CAAC;YACZ,aAAa,EAAE,YAAY;YAC3B,WAAW,EAAE,GAAG,CAAC,EAAE;YACnB,WAAW,EAAE,YAAY;YACzB,eAAe,EAAE,QAAQ,CAAC,cAAc;YACxC,YAAY,EAAE,cAAc;YAC5B,aAAa,EAAE,gBAAgB;YAC/B,YAAY,EAAE,WAAW;YACzB,uBAAuB,EAAE,eAAe;YACxC,SAAS,EAAE,2IAA2I;YACtJ,GAAG,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5C,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,qBAAqB,EAAE,WAAW,EAAE,uCAAuC,EAAE,EAAE,EAAE,IAAI,EAAE,oBAAoB,EAAE,WAAW,EAAE,uCAAuC,EAAE,CAAC;SAC1L,CAAC,CACH,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,WAAW,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC;AACH,CAAC;AAED,KAAK,UAAU,oBAAoB,CAAC,MAA+B;IACjE,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,aAAa,CAAC,mBAAmB,EAAE,MAAM,CAAC,CAAC;QACzD,MAAM,EAAE,EAAE,EAAE,GAAG,eAAe,EAAE,CAAC;QAEjC,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;YAChB,mDAAmD;YACnD,MAAM,OAAO,GAAG,EAAE,CAAC,iBAAiB,CAAC,KAAK,CAAC,KAAK,EAAE;gBAChD,eAAe,EAAE,KAAK,CAAC,eAAe;gBACtC,KAAK,EAAE,KAAK,CAAC,KAAK;aACnB,CAAC,CAAC;YAEH,MAAM,eAAe,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;gBAC1C,MAAM,GAAG,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;gBAE5C,IAAI,oBAA6B,CAAC;gBAClC,IAAI,CAAC;oBACH,oBAAoB,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;gBACzD,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,OAAO,CAAC,KAAK,CAAC,0EAA0E,GAAG,CAAC,EAAE,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;oBAC7J,oBAAoB,GAAG,GAAG,CAAC,eAAe,CAAC;gBAC7C,CAAC;gBAED,IAAI,gBAAyB,CAAC;gBAC9B,IAAI,CAAC;oBACH,gBAAgB,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;gBACjD,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,OAAO,CAAC,KAAK,CAAC,sEAAsE,GAAG,CAAC,EAAE,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;oBACzJ,gBAAgB,GAAG,GAAG,CAAC,WAAW,CAAC;gBACrC,CAAC;gBAED,MAAM,eAAe,GAAG,KAAK,CAAC,kBAAkB;oBAC9C,CAAC,CAAC,oBAAoB,CAAC,EAAE,EAAE,GAAG,CAAC,aAAa,EAAE,qBAAqB,CAAC;oBACpE,CAAC,CAAC,SAAS,CAAC;gBAEd,OAAO;oBACL,EAAE,EAAE,GAAG,CAAC,EAAE;oBACV,WAAW,EAAE,GAAG,CAAC,WAAW;oBAC5B,kBAAkB,EAAE,GAAG,EAAE,SAAS,IAAI,IAAI;oBAC1C,kBAAkB,EAAE,GAAG,EAAE,SAAS,IAAI,IAAI;oBAC1C,WAAW,EAAE,gBAAgB;oBAC7B,eAAe,EAAE,oBAAoB;oBACrC,YAAY,EAAE,GAAG,CAAC,YAAY;oBAC9B,aAAa,EAAE,GAAG,CAAC,aAAa;oBAChC,UAAU,EAAE,GAAG,CAAC,UAAU;oBAC1B,gBAAgB,EAAE,eAAe;iBAClC,CAAC;YACJ,CAAC,CAAC,CAAC;YAEH,OAAO,cAAc,CACnB,aAAa,CAAC;gBACZ,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,KAAK,EAAE,eAAe,CAAC,MAAM;gBAC7B,OAAO,EAAE,eAAe;gBACxB,UAAU,EAAE;oBACV,EAAE,IAAI,EAAE,oBAAoB,EAAE,WAAW,EAAE,oCAAoC,EAAE;oBACjF,EAAE,IAAI,EAAE,wBAAwB,EAAE,WAAW,EAAE,4CAA4C,EAAE;iBAC9F;aACF,CAAC,CACH,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,mDAAmD;YACnD,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC;gBACvB,MAAM,IAAI,QAAQ,CAAC,kBAAkB,EAAE,wDAAwD,EAAE,EAAE,CAAC,CAAC;YACvG,CAAC;YAED,MAAM,WAAW,GAAG,EAAE,CAAC,wBAAwB,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YAEnE,OAAO,cAAc,CACnB,aAAa,CAAC;gBACZ,IAAI,EAAE,MAAM;gBACZ,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,KAAK,EAAE,WAAW,CAAC,MAAM;gBACzB,WAAW,EAAE,WAAW,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;oBACnC,MAAM,eAAe,GAAG,KAAK,CAAC,kBAAkB;wBAC9C,CAAC,CAAC,oBAAoB,CAAC,EAAE,EAAE,GAAG,CAAC,aAAa,EAAE,mBAAmB,CAAC;wBAClE,CAAC,CAAC,SAAS,CAAC;oBAEd,IAAI,oBAA6B,CAAC;oBAClC,IAAI,CAAC;wBACH,oBAAoB,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;oBACzD,CAAC;oBAAC,OAAO,KAAK,EAAE,CAAC;wBACf,OAAO,CAAC,KAAK,CAAC,0EAA0E,GAAG,CAAC,EAAE,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;wBAC7J,oBAAoB,GAAG,GAAG,CAAC,eAAe,CAAC;oBAC7C,CAAC;oBAED,IAAI,gBAAyB,CAAC;oBAC9B,IAAI,CAAC;wBACH,gBAAgB,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;oBACjD,CAAC;oBAAC,OAAO,KAAK,EAAE,CAAC;wBACf,OAAO,CAAC,KAAK,CAAC,sEAAsE,GAAG,CAAC,EAAE,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;wBACzJ,gBAAgB,GAAG,GAAG,CAAC,WAAW,CAAC;oBACrC,CAAC;oBAED,OAAO;wBACL,EAAE,EAAE,GAAG,CAAC,EAAE;wBACV,WAAW,EAAE,gBAAgB;wBAC7B,eAAe,EAAE,oBAAoB;wBACrC,YAAY,EAAE,GAAG,CAAC,YAAY;wBAC9B,aAAa,EAAE,GAAG,CAAC,aAAa;wBAChC,UAAU,EAAE,GAAG,CAAC,UAAU;wBAC1B,gBAAgB,EAAE,eAAe;qBAClC,CAAC;gBACJ,CAAC,CAAC;gBACF,UAAU,EAAE;oBACV,EAAE,IAAI,EAAE,oBAAoB,EAAE,WAAW,EAAE,sCAAsC,EAAE;oBACnF,EAAE,IAAI,EAAE,wBAAwB,EAAE,WAAW,EAAE,iCAAiC,EAAE;iBACnF;aACF,CAAC,CACH,CAAC;QACJ,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,WAAW,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC;AACH,CAAC;AAED,KAAK,UAAU,mBAAmB,CAAC,MAA+B;IAChE,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,aAAa,CAAC,kBAAkB,EAAE,MAAM,CAAC,CAAC;QACxD,MAAM,EAAE,EAAE,EAAE,GAAG,eAAe,EAAE,CAAC;QAEjC,MAAM,UAAU,GAAG,EAAE,CAAC,aAAa,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QACzD,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,yBAAyB,KAAK,CAAC,aAAa,EAAE,CAAC,CAAC;QAClE,CAAC;QAED,2BAA2B;QAC3B,MAAM,GAAG,GAAG,EAAE,CAAC,WAAW,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC;QAEnD,mDAAmD;QACnD,MAAM,SAAS,GAAG,EAAE,CAAC,0BAA0B,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;QAC/D,MAAM,YAAY,GAAG,SAAS,KAAK,IAAI,CAAC;QAExC,8CAA8C;QAC9C,IAAI,oBAA6B,CAAC;QAClC,IAAI,CAAC;YACH,oBAAoB,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,eAAe,CAAC,CAAC;QAChE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,0EAA0E,UAAU,CAAC,EAAE,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACpK,oBAAoB,GAAG,UAAU,CAAC,eAAe,CAAC;QACpD,CAAC;QAED,oBAAoB;QACpB,IAAI,gBAAyB,CAAC;QAC9B,IAAI,CAAC;YACH,gBAAgB,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC;QACxD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,sEAAsE,UAAU,CAAC,EAAE,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YAChK,gBAAgB,GAAG,UAAU,CAAC,WAAW,CAAC;QAC5C,CAAC;QAED,oCAAoC;QACpC,MAAM,eAAe,GAAG,KAAK,CAAC,kBAAkB;YAC9C,CAAC,CAAC,oBAAoB,CAAC,EAAE,EAAE,UAAU,CAAC,aAAa,EAAE,kBAAkB,CAAC;YACxE,CAAC,CAAC,SAAS,CAAC;QAEd,OAAO,cAAc,CACnB,aAAa,CAAC;YACZ,EAAE,EAAE,UAAU,CAAC,EAAE;YACjB,WAAW,EAAE,UAAU,CAAC,WAAW;YACnC,kBAAkB,EAAE,GAAG,EAAE,SAAS,IAAI,IAAI;YAC1C,kBAAkB,EAAE,GAAG,EAAE,SAAS,IAAI,IAAI;YAC1C,aAAa,EAAE,UAAU,CAAC,aAAa;YACvC,WAAW,EAAE,gBAAgB;YAC7B,eAAe,EAAE,oBAAoB;YACrC,YAAY,EAAE,UAAU,CAAC,YAAY;YACrC,aAAa,EAAE,UAAU,CAAC,aAAa;YACvC,UAAU,EAAE,UAAU,CAAC,UAAU;YACjC,aAAa,EAAE,YAAY;YAC3B,YAAY,EAAE,SAAS,EAAE,EAAE,IAAI,IAAI;YACnC,gBAAgB,EAAE,eAAe;YACjC,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,qBAAqB,EAAE,WAAW,EAAE,kDAAkD,EAAE,EAAE,EAAE,IAAI,EAAE,kBAAkB,EAAE,WAAW,EAAE,0BAA0B,EAAE,CAAC;SACtL,CAAC,CACH,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,WAAW,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC;AACH,CAAC;AAED,MAAM,CAAC,MAAM,yBAAyB,GAAmC;IACvE,sBAAsB,EAAE;QACtB,WAAW,EACT,qVAAqV;QACvV,WAAW,EAAE,sBAAsB,CAAC,KAAK;QACzC,OAAO,EAAE,uBAAuB;KACjC;IACD,mBAAmB,EAAE;QACnB,WAAW,EACT,0HAA0H;QAC5H,WAAW,EAAE,mBAAmB,CAAC,KAAK;QACtC,OAAO,EAAE,oBAAoB;KAC9B;IACD,kBAAkB,EAAE;QAClB,WAAW,EACT,2KAA2K;QAC7K,WAAW,EAAE,kBAAkB,CAAC,KAAK;QACrC,OAAO,EAAE,mBAAmB;KAC7B;CACF,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image Extraction MCP Tools
|
|
3
|
+
*
|
|
4
|
+
* Tools for extracting images directly from PDFs using PyMuPDF.
|
|
5
|
+
* Independent of Datalab - gives full control over image extraction.
|
|
6
|
+
*
|
|
7
|
+
* CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
|
|
8
|
+
* Use console.error() for all logging.
|
|
9
|
+
*
|
|
10
|
+
* @module tools/extraction
|
|
11
|
+
*/
|
|
12
|
+
import { type ToolResponse, type ToolDefinition } from './shared.js';
|
|
13
|
+
/**
|
|
14
|
+
* Handle ocr_extract_images - Extract images from PDF/DOCX files
|
|
15
|
+
*
|
|
16
|
+
* If document_id is provided, extracts images from that single document.
|
|
17
|
+
* If document_id is omitted, batch-extracts images from all OCR-processed documents.
|
|
18
|
+
*/
|
|
19
|
+
export declare function handleExtractImages(params: Record<string, unknown>): Promise<ToolResponse>;
|
|
20
|
+
/**
|
|
21
|
+
* Extraction tools collection for MCP server registration
|
|
22
|
+
*/
|
|
23
|
+
export declare const extractionTools: Record<string, ToolDefinition>;
|
|
24
|
+
//# sourceMappingURL=extraction.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extraction.d.ts","sourceRoot":"","sources":["../../src/tools/extraction.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAQH,OAAO,EAA+B,KAAK,YAAY,EAAE,KAAK,cAAc,EAAE,MAAM,aAAa,CAAC;AAuClG;;;;;GAKG;AACH,wBAAsB,mBAAmB,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO,CAAC,YAAY,CAAC,CA+XhG;AAMD;;GAEG;AACH,eAAO,MAAM,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,cAAc,CAwC1D,CAAC"}
|