ocr-provenance-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocr-provenance-mcp might be problematic. Click here for more details.
- package/.env.example +55 -0
- package/LICENSE +78 -0
- package/README.md +1154 -0
- package/dist/bin-http.d.ts +24 -0
- package/dist/bin-http.d.ts.map +1 -0
- package/dist/bin-http.js +275 -0
- package/dist/bin-http.js.map +1 -0
- package/dist/bin-setup.d.ts +11 -0
- package/dist/bin-setup.d.ts.map +1 -0
- package/dist/bin-setup.js +610 -0
- package/dist/bin-setup.js.map +1 -0
- package/dist/bin.d.ts +16 -0
- package/dist/bin.d.ts.map +1 -0
- package/dist/bin.js +16 -0
- package/dist/bin.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +90 -0
- package/dist/index.js.map +1 -0
- package/dist/models/chunk.d.ts +136 -0
- package/dist/models/chunk.d.ts.map +1 -0
- package/dist/models/chunk.js +27 -0
- package/dist/models/chunk.js.map +1 -0
- package/dist/models/cluster.d.ts +79 -0
- package/dist/models/cluster.d.ts.map +1 -0
- package/dist/models/cluster.js +10 -0
- package/dist/models/cluster.js.map +1 -0
- package/dist/models/comparison.d.ts +62 -0
- package/dist/models/comparison.d.ts.map +1 -0
- package/dist/models/comparison.js +8 -0
- package/dist/models/comparison.js.map +1 -0
- package/dist/models/document.d.ts +104 -0
- package/dist/models/document.d.ts.map +1 -0
- package/dist/models/document.js +15 -0
- package/dist/models/document.js.map +1 -0
- package/dist/models/embedding.d.ts +87 -0
- package/dist/models/embedding.d.ts.map +1 -0
- package/dist/models/embedding.js +23 -0
- package/dist/models/embedding.js.map +1 -0
- package/dist/models/extraction.d.ts +15 -0
- package/dist/models/extraction.d.ts.map +1 -0
- package/dist/models/extraction.js +2 -0
- package/dist/models/extraction.js.map +1 -0
- package/dist/models/form-fill.d.ts +23 -0
- package/dist/models/form-fill.d.ts.map +1 -0
- package/dist/models/form-fill.js +2 -0
- package/dist/models/form-fill.js.map +1 -0
- package/dist/models/image.d.ts +177 -0
- package/dist/models/image.d.ts.map +1 -0
- package/dist/models/image.js +8 -0
- package/dist/models/image.js.map +1 -0
- package/dist/models/index.d.ts +14 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +22 -0
- package/dist/models/index.js.map +1 -0
- package/dist/models/provenance.d.ts +174 -0
- package/dist/models/provenance.d.ts.map +1 -0
- package/dist/models/provenance.js +53 -0
- package/dist/models/provenance.js.map +1 -0
- package/dist/models/uploaded-file.d.ts +20 -0
- package/dist/models/uploaded-file.d.ts.map +1 -0
- package/dist/models/uploaded-file.js +2 -0
- package/dist/models/uploaded-file.js.map +1 -0
- package/dist/server/errors.d.ts +93 -0
- package/dist/server/errors.d.ts.map +1 -0
- package/dist/server/errors.js +256 -0
- package/dist/server/errors.js.map +1 -0
- package/dist/server/events.d.ts +36 -0
- package/dist/server/events.d.ts.map +1 -0
- package/dist/server/events.js +48 -0
- package/dist/server/events.js.map +1 -0
- package/dist/server/permissions.d.ts +26 -0
- package/dist/server/permissions.d.ts.map +1 -0
- package/dist/server/permissions.js +194 -0
- package/dist/server/permissions.js.map +1 -0
- package/dist/server/register-tools.d.ts +25 -0
- package/dist/server/register-tools.d.ts.map +1 -0
- package/dist/server/register-tools.js +102 -0
- package/dist/server/register-tools.js.map +1 -0
- package/dist/server/startup.d.ts +16 -0
- package/dist/server/startup.d.ts.map +1 -0
- package/dist/server/startup.js +37 -0
- package/dist/server/startup.js.map +1 -0
- package/dist/server/state.d.ts +166 -0
- package/dist/server/state.d.ts.map +1 -0
- package/dist/server/state.js +424 -0
- package/dist/server/state.js.map +1 -0
- package/dist/server/transports/http-transport.d.ts +37 -0
- package/dist/server/transports/http-transport.d.ts.map +1 -0
- package/dist/server/transports/http-transport.js +204 -0
- package/dist/server/transports/http-transport.js.map +1 -0
- package/dist/server/transports/index.d.ts +9 -0
- package/dist/server/transports/index.d.ts.map +1 -0
- package/dist/server/transports/index.js +9 -0
- package/dist/server/transports/index.js.map +1 -0
- package/dist/server/transports/session-manager.d.ts +40 -0
- package/dist/server/transports/session-manager.d.ts.map +1 -0
- package/dist/server/transports/session-manager.js +74 -0
- package/dist/server/transports/session-manager.js.map +1 -0
- package/dist/server/types.d.ts +82 -0
- package/dist/server/types.d.ts.map +1 -0
- package/dist/server/types.js +14 -0
- package/dist/server/types.js.map +1 -0
- package/dist/services/audit.d.ts +26 -0
- package/dist/services/audit.d.ts.map +1 -0
- package/dist/services/audit.js +43 -0
- package/dist/services/audit.js.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.js +46 -0
- package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
- package/dist/services/chunking/chunk-merger.d.ts +26 -0
- package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
- package/dist/services/chunking/chunk-merger.js +94 -0
- package/dist/services/chunking/chunk-merger.js.map +1 -0
- package/dist/services/chunking/chunker.d.ts +62 -0
- package/dist/services/chunking/chunker.d.ts.map +1 -0
- package/dist/services/chunking/chunker.js +566 -0
- package/dist/services/chunking/chunker.js.map +1 -0
- package/dist/services/chunking/heading-normalizer.d.ts +33 -0
- package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/heading-normalizer.js +101 -0
- package/dist/services/chunking/heading-normalizer.js.map +1 -0
- package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
- package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
- package/dist/services/chunking/json-block-analyzer.js +1033 -0
- package/dist/services/chunking/json-block-analyzer.js.map +1 -0
- package/dist/services/chunking/markdown-parser.d.ts +75 -0
- package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
- package/dist/services/chunking/markdown-parser.js +428 -0
- package/dist/services/chunking/markdown-parser.js.map +1 -0
- package/dist/services/chunking/text-normalizer.d.ts +20 -0
- package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/text-normalizer.js +36 -0
- package/dist/services/chunking/text-normalizer.js.map +1 -0
- package/dist/services/clm/contract-schemas.d.ts +36 -0
- package/dist/services/clm/contract-schemas.d.ts.map +1 -0
- package/dist/services/clm/contract-schemas.js +92 -0
- package/dist/services/clm/contract-schemas.js.map +1 -0
- package/dist/services/clm/summarization.d.ts +46 -0
- package/dist/services/clm/summarization.d.ts.map +1 -0
- package/dist/services/clm/summarization.js +61 -0
- package/dist/services/clm/summarization.js.map +1 -0
- package/dist/services/clustering/clustering-service.d.ts +58 -0
- package/dist/services/clustering/clustering-service.d.ts.map +1 -0
- package/dist/services/clustering/clustering-service.js +467 -0
- package/dist/services/clustering/clustering-service.js.map +1 -0
- package/dist/services/comparison/diff-service.d.ts +41 -0
- package/dist/services/comparison/diff-service.d.ts.map +1 -0
- package/dist/services/comparison/diff-service.js +120 -0
- package/dist/services/comparison/diff-service.js.map +1 -0
- package/dist/services/embedding/embedder.d.ts +55 -0
- package/dist/services/embedding/embedder.d.ts.map +1 -0
- package/dist/services/embedding/embedder.js +202 -0
- package/dist/services/embedding/embedder.js.map +1 -0
- package/dist/services/embedding/nomic.d.ts +67 -0
- package/dist/services/embedding/nomic.d.ts.map +1 -0
- package/dist/services/embedding/nomic.js +280 -0
- package/dist/services/embedding/nomic.js.map +1 -0
- package/dist/services/gemini/circuit-breaker.d.ts +106 -0
- package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
- package/dist/services/gemini/circuit-breaker.js +237 -0
- package/dist/services/gemini/circuit-breaker.js.map +1 -0
- package/dist/services/gemini/client.d.ts +173 -0
- package/dist/services/gemini/client.d.ts.map +1 -0
- package/dist/services/gemini/client.js +483 -0
- package/dist/services/gemini/client.js.map +1 -0
- package/dist/services/gemini/config.d.ts +116 -0
- package/dist/services/gemini/config.d.ts.map +1 -0
- package/dist/services/gemini/config.js +118 -0
- package/dist/services/gemini/config.js.map +1 -0
- package/dist/services/gemini/index.d.ts +9 -0
- package/dist/services/gemini/index.d.ts.map +1 -0
- package/dist/services/gemini/index.js +13 -0
- package/dist/services/gemini/index.js.map +1 -0
- package/dist/services/gemini/rate-limiter.d.ts +62 -0
- package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
- package/dist/services/gemini/rate-limiter.js +120 -0
- package/dist/services/gemini/rate-limiter.js.map +1 -0
- package/dist/services/images/extractor.d.ts +88 -0
- package/dist/services/images/extractor.d.ts.map +1 -0
- package/dist/services/images/extractor.js +340 -0
- package/dist/services/images/extractor.js.map +1 -0
- package/dist/services/images/optimizer.d.ts +130 -0
- package/dist/services/images/optimizer.d.ts.map +1 -0
- package/dist/services/images/optimizer.js +228 -0
- package/dist/services/images/optimizer.js.map +1 -0
- package/dist/services/ocr/datalab.d.ts +64 -0
- package/dist/services/ocr/datalab.d.ts.map +1 -0
- package/dist/services/ocr/datalab.js +425 -0
- package/dist/services/ocr/datalab.js.map +1 -0
- package/dist/services/ocr/errors.d.ts +38 -0
- package/dist/services/ocr/errors.d.ts.map +1 -0
- package/dist/services/ocr/errors.js +83 -0
- package/dist/services/ocr/errors.js.map +1 -0
- package/dist/services/ocr/file-manager.d.ts +76 -0
- package/dist/services/ocr/file-manager.d.ts.map +1 -0
- package/dist/services/ocr/file-manager.js +238 -0
- package/dist/services/ocr/file-manager.js.map +1 -0
- package/dist/services/ocr/form-fill.d.ts +48 -0
- package/dist/services/ocr/form-fill.d.ts.map +1 -0
- package/dist/services/ocr/form-fill.js +213 -0
- package/dist/services/ocr/form-fill.js.map +1 -0
- package/dist/services/ocr/processor.d.ts +95 -0
- package/dist/services/ocr/processor.d.ts.map +1 -0
- package/dist/services/ocr/processor.js +259 -0
- package/dist/services/ocr/processor.js.map +1 -0
- package/dist/services/provenance/agent-metadata.d.ts +82 -0
- package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
- package/dist/services/provenance/agent-metadata.js +106 -0
- package/dist/services/provenance/agent-metadata.js.map +1 -0
- package/dist/services/provenance/chain-hash.d.ts +57 -0
- package/dist/services/provenance/chain-hash.d.ts.map +1 -0
- package/dist/services/provenance/chain-hash.js +131 -0
- package/dist/services/provenance/chain-hash.js.map +1 -0
- package/dist/services/provenance/exporter.d.ts +202 -0
- package/dist/services/provenance/exporter.d.ts.map +1 -0
- package/dist/services/provenance/exporter.js +457 -0
- package/dist/services/provenance/exporter.js.map +1 -0
- package/dist/services/provenance/index.d.ts +15 -0
- package/dist/services/provenance/index.d.ts.map +1 -0
- package/dist/services/provenance/index.js +17 -0
- package/dist/services/provenance/index.js.map +1 -0
- package/dist/services/provenance/tracker.d.ts +138 -0
- package/dist/services/provenance/tracker.d.ts.map +1 -0
- package/dist/services/provenance/tracker.js +293 -0
- package/dist/services/provenance/tracker.js.map +1 -0
- package/dist/services/provenance/verifier.d.ts +153 -0
- package/dist/services/provenance/verifier.d.ts.map +1 -0
- package/dist/services/provenance/verifier.js +536 -0
- package/dist/services/provenance/verifier.js.map +1 -0
- package/dist/services/python-pool.d.ts +70 -0
- package/dist/services/python-pool.d.ts.map +1 -0
- package/dist/services/python-pool.js +265 -0
- package/dist/services/python-pool.js.map +1 -0
- package/dist/services/search/bm25.d.ts +180 -0
- package/dist/services/search/bm25.d.ts.map +1 -0
- package/dist/services/search/bm25.js +656 -0
- package/dist/services/search/bm25.js.map +1 -0
- package/dist/services/search/fusion.d.ts +103 -0
- package/dist/services/search/fusion.d.ts.map +1 -0
- package/dist/services/search/fusion.js +122 -0
- package/dist/services/search/fusion.js.map +1 -0
- package/dist/services/search/local-reranker.d.ts +30 -0
- package/dist/services/search/local-reranker.d.ts.map +1 -0
- package/dist/services/search/local-reranker.js +123 -0
- package/dist/services/search/local-reranker.js.map +1 -0
- package/dist/services/search/quality.d.ts +11 -0
- package/dist/services/search/quality.d.ts.map +1 -0
- package/dist/services/search/quality.js +17 -0
- package/dist/services/search/quality.js.map +1 -0
- package/dist/services/search/query-classifier.d.ts +34 -0
- package/dist/services/search/query-classifier.d.ts.map +1 -0
- package/dist/services/search/query-classifier.js +114 -0
- package/dist/services/search/query-classifier.js.map +1 -0
- package/dist/services/search/query-expander.d.ts +73 -0
- package/dist/services/search/query-expander.d.ts.map +1 -0
- package/dist/services/search/query-expander.js +281 -0
- package/dist/services/search/query-expander.js.map +1 -0
- package/dist/services/search/reranker.d.ts +44 -0
- package/dist/services/search/reranker.d.ts.map +1 -0
- package/dist/services/search/reranker.js +101 -0
- package/dist/services/search/reranker.js.map +1 -0
- package/dist/services/storage/database/annotation-operations.d.ts +113 -0
- package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/annotation-operations.js +177 -0
- package/dist/services/storage/database/annotation-operations.js.map +1 -0
- package/dist/services/storage/database/approval-operations.d.ts +132 -0
- package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
- package/dist/services/storage/database/approval-operations.js +206 -0
- package/dist/services/storage/database/approval-operations.js.map +1 -0
- package/dist/services/storage/database/chunk-operations.d.ts +132 -0
- package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
- package/dist/services/storage/database/chunk-operations.js +306 -0
- package/dist/services/storage/database/chunk-operations.js.map +1 -0
- package/dist/services/storage/database/cluster-operations.d.ts +97 -0
- package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
- package/dist/services/storage/database/cluster-operations.js +258 -0
- package/dist/services/storage/database/cluster-operations.js.map +1 -0
- package/dist/services/storage/database/comparison-operations.d.ts +41 -0
- package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
- package/dist/services/storage/database/comparison-operations.js +65 -0
- package/dist/services/storage/database/comparison-operations.js.map +1 -0
- package/dist/services/storage/database/converters.d.ts +36 -0
- package/dist/services/storage/database/converters.d.ts.map +1 -0
- package/dist/services/storage/database/converters.js +244 -0
- package/dist/services/storage/database/converters.js.map +1 -0
- package/dist/services/storage/database/document-operations.d.ts +145 -0
- package/dist/services/storage/database/document-operations.d.ts.map +1 -0
- package/dist/services/storage/database/document-operations.js +498 -0
- package/dist/services/storage/database/document-operations.js.map +1 -0
- package/dist/services/storage/database/embedding-operations.d.ts +130 -0
- package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
- package/dist/services/storage/database/embedding-operations.js +315 -0
- package/dist/services/storage/database/embedding-operations.js.map +1 -0
- package/dist/services/storage/database/extraction-operations.d.ts +47 -0
- package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
- package/dist/services/storage/database/extraction-operations.js +85 -0
- package/dist/services/storage/database/extraction-operations.js.map +1 -0
- package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
- package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
- package/dist/services/storage/database/form-fill-operations.js +116 -0
- package/dist/services/storage/database/form-fill-operations.js.map +1 -0
- package/dist/services/storage/database/helpers.d.ts +29 -0
- package/dist/services/storage/database/helpers.d.ts.map +1 -0
- package/dist/services/storage/database/helpers.js +55 -0
- package/dist/services/storage/database/helpers.js.map +1 -0
- package/dist/services/storage/database/image-operations.d.ts +202 -0
- package/dist/services/storage/database/image-operations.d.ts.map +1 -0
- package/dist/services/storage/database/image-operations.js +484 -0
- package/dist/services/storage/database/image-operations.js.map +1 -0
- package/dist/services/storage/database/index.d.ts +13 -0
- package/dist/services/storage/database/index.d.ts.map +1 -0
- package/dist/services/storage/database/index.js +16 -0
- package/dist/services/storage/database/index.js.map +1 -0
- package/dist/services/storage/database/lock-operations.d.ts +59 -0
- package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
- package/dist/services/storage/database/lock-operations.js +89 -0
- package/dist/services/storage/database/lock-operations.js.map +1 -0
- package/dist/services/storage/database/obligation-operations.d.ts +88 -0
- package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/obligation-operations.js +206 -0
- package/dist/services/storage/database/obligation-operations.js.map +1 -0
- package/dist/services/storage/database/ocr-operations.d.ts +33 -0
- package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
- package/dist/services/storage/database/ocr-operations.js +70 -0
- package/dist/services/storage/database/ocr-operations.js.map +1 -0
- package/dist/services/storage/database/playbook-operations.d.ts +72 -0
- package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
- package/dist/services/storage/database/playbook-operations.js +247 -0
- package/dist/services/storage/database/playbook-operations.js.map +1 -0
- package/dist/services/storage/database/provenance-operations.d.ts +112 -0
- package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
- package/dist/services/storage/database/provenance-operations.js +251 -0
- package/dist/services/storage/database/provenance-operations.js.map +1 -0
- package/dist/services/storage/database/service.d.ts +142 -0
- package/dist/services/storage/database/service.d.ts.map +1 -0
- package/dist/services/storage/database/service.js +310 -0
- package/dist/services/storage/database/service.js.map +1 -0
- package/dist/services/storage/database/static-operations.d.ts +30 -0
- package/dist/services/storage/database/static-operations.d.ts.map +1 -0
- package/dist/services/storage/database/static-operations.js +218 -0
- package/dist/services/storage/database/static-operations.js.map +1 -0
- package/dist/services/storage/database/stats-operations.d.ts +101 -0
- package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
- package/dist/services/storage/database/stats-operations.js +394 -0
- package/dist/services/storage/database/stats-operations.js.map +1 -0
- package/dist/services/storage/database/tag-operations.d.ts +76 -0
- package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
- package/dist/services/storage/database/tag-operations.js +178 -0
- package/dist/services/storage/database/tag-operations.js.map +1 -0
- package/dist/services/storage/database/types.d.ts +286 -0
- package/dist/services/storage/database/types.d.ts.map +1 -0
- package/dist/services/storage/database/types.js +39 -0
- package/dist/services/storage/database/types.js.map +1 -0
- package/dist/services/storage/database/upload-operations.d.ts +71 -0
- package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
- package/dist/services/storage/database/upload-operations.js +124 -0
- package/dist/services/storage/database/upload-operations.js.map +1 -0
- package/dist/services/storage/database/user-operations.d.ts +102 -0
- package/dist/services/storage/database/user-operations.d.ts.map +1 -0
- package/dist/services/storage/database/user-operations.js +151 -0
- package/dist/services/storage/database/user-operations.js.map +1 -0
- package/dist/services/storage/database/workflow-operations.d.ts +98 -0
- package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
- package/dist/services/storage/database/workflow-operations.js +157 -0
- package/dist/services/storage/database/workflow-operations.js.map +1 -0
- package/dist/services/storage/database.d.ts +16 -0
- package/dist/services/storage/database.d.ts.map +1 -0
- package/dist/services/storage/database.js +15 -0
- package/dist/services/storage/database.js.map +1 -0
- package/dist/services/storage/index.d.ts +10 -0
- package/dist/services/storage/index.d.ts.map +1 -0
- package/dist/services/storage/index.js +10 -0
- package/dist/services/storage/index.js.map +1 -0
- package/dist/services/storage/migrations/index.d.ts +16 -0
- package/dist/services/storage/migrations/index.d.ts.map +1 -0
- package/dist/services/storage/migrations/index.js +20 -0
- package/dist/services/storage/migrations/index.js.map +1 -0
- package/dist/services/storage/migrations/operations.d.ts +40 -0
- package/dist/services/storage/migrations/operations.d.ts.map +1 -0
- package/dist/services/storage/migrations/operations.js +2910 -0
- package/dist/services/storage/migrations/operations.js.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.js +1006 -0
- package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.js +176 -0
- package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
- package/dist/services/storage/migrations/types.d.ts +15 -0
- package/dist/services/storage/migrations/types.d.ts.map +1 -0
- package/dist/services/storage/migrations/types.js +21 -0
- package/dist/services/storage/migrations/types.js.map +1 -0
- package/dist/services/storage/migrations/verification.d.ts +20 -0
- package/dist/services/storage/migrations/verification.d.ts.map +1 -0
- package/dist/services/storage/migrations/verification.js +78 -0
- package/dist/services/storage/migrations/verification.js.map +1 -0
- package/dist/services/storage/migrations.d.ts +16 -0
- package/dist/services/storage/migrations.d.ts.map +1 -0
- package/dist/services/storage/migrations.js +17 -0
- package/dist/services/storage/migrations.js.map +1 -0
- package/dist/services/storage/types.d.ts +12 -0
- package/dist/services/storage/types.d.ts.map +1 -0
- package/dist/services/storage/types.js +5 -0
- package/dist/services/storage/types.js.map +1 -0
- package/dist/services/storage/vector.d.ts +208 -0
- package/dist/services/storage/vector.d.ts.map +1 -0
- package/dist/services/storage/vector.js +526 -0
- package/dist/services/storage/vector.js.map +1 -0
- package/dist/services/vlm/pipeline.d.ts +194 -0
- package/dist/services/vlm/pipeline.d.ts.map +1 -0
- package/dist/services/vlm/pipeline.js +800 -0
- package/dist/services/vlm/pipeline.js.map +1 -0
- package/dist/services/vlm/prompts.d.ts +171 -0
- package/dist/services/vlm/prompts.d.ts.map +1 -0
- package/dist/services/vlm/prompts.js +229 -0
- package/dist/services/vlm/prompts.js.map +1 -0
- package/dist/services/vlm/service.d.ts +174 -0
- package/dist/services/vlm/service.d.ts.map +1 -0
- package/dist/services/vlm/service.js +256 -0
- package/dist/services/vlm/service.js.map +1 -0
- package/dist/services/webhook-delivery.d.ts +4 -0
- package/dist/services/webhook-delivery.d.ts.map +1 -0
- package/dist/services/webhook-delivery.js +140 -0
- package/dist/services/webhook-delivery.js.map +1 -0
- package/dist/tools/chunks.d.ts +19 -0
- package/dist/tools/chunks.d.ts.map +1 -0
- package/dist/tools/chunks.js +392 -0
- package/dist/tools/chunks.js.map +1 -0
- package/dist/tools/clm.d.ts +16 -0
- package/dist/tools/clm.d.ts.map +1 -0
- package/dist/tools/clm.js +668 -0
- package/dist/tools/clm.js.map +1 -0
- package/dist/tools/clustering.d.ts +13 -0
- package/dist/tools/clustering.d.ts.map +1 -0
- package/dist/tools/clustering.js +498 -0
- package/dist/tools/clustering.js.map +1 -0
- package/dist/tools/collaboration.d.ts +15 -0
- package/dist/tools/collaboration.d.ts.map +1 -0
- package/dist/tools/collaboration.js +516 -0
- package/dist/tools/collaboration.js.map +1 -0
- package/dist/tools/comparison.d.ts +13 -0
- package/dist/tools/comparison.d.ts.map +1 -0
- package/dist/tools/comparison.js +735 -0
- package/dist/tools/comparison.js.map +1 -0
- package/dist/tools/compliance.d.ts +15 -0
- package/dist/tools/compliance.d.ts.map +1 -0
- package/dist/tools/compliance.js +640 -0
- package/dist/tools/compliance.js.map +1 -0
- package/dist/tools/config.d.ts +19 -0
- package/dist/tools/config.d.ts.map +1 -0
- package/dist/tools/config.js +213 -0
- package/dist/tools/config.js.map +1 -0
- package/dist/tools/database.d.ts +62 -0
- package/dist/tools/database.d.ts.map +1 -0
- package/dist/tools/database.js +288 -0
- package/dist/tools/database.js.map +1 -0
- package/dist/tools/documents.d.ts +61 -0
- package/dist/tools/documents.d.ts.map +1 -0
- package/dist/tools/documents.js +1624 -0
- package/dist/tools/documents.js.map +1 -0
- package/dist/tools/embeddings.d.ts +14 -0
- package/dist/tools/embeddings.d.ts.map +1 -0
- package/dist/tools/embeddings.js +626 -0
- package/dist/tools/embeddings.js.map +1 -0
- package/dist/tools/evaluation.d.ts +25 -0
- package/dist/tools/evaluation.d.ts.map +1 -0
- package/dist/tools/evaluation.js +523 -0
- package/dist/tools/evaluation.js.map +1 -0
- package/dist/tools/events.d.ts +16 -0
- package/dist/tools/events.d.ts.map +1 -0
- package/dist/tools/events.js +493 -0
- package/dist/tools/events.js.map +1 -0
- package/dist/tools/extraction-structured.d.ts +13 -0
- package/dist/tools/extraction-structured.d.ts.map +1 -0
- package/dist/tools/extraction-structured.js +390 -0
- package/dist/tools/extraction-structured.js.map +1 -0
- package/dist/tools/extraction.d.ts +24 -0
- package/dist/tools/extraction.d.ts.map +1 -0
- package/dist/tools/extraction.js +424 -0
- package/dist/tools/extraction.js.map +1 -0
- package/dist/tools/file-management.d.ts +14 -0
- package/dist/tools/file-management.d.ts.map +1 -0
- package/dist/tools/file-management.js +523 -0
- package/dist/tools/file-management.js.map +1 -0
- package/dist/tools/form-fill.d.ts +13 -0
- package/dist/tools/form-fill.d.ts.map +1 -0
- package/dist/tools/form-fill.js +250 -0
- package/dist/tools/form-fill.js.map +1 -0
- package/dist/tools/health.d.ts +19 -0
- package/dist/tools/health.d.ts.map +1 -0
- package/dist/tools/health.js +229 -0
- package/dist/tools/health.js.map +1 -0
- package/dist/tools/images.d.ts +54 -0
- package/dist/tools/images.d.ts.map +1 -0
- package/dist/tools/images.js +787 -0
- package/dist/tools/images.js.map +1 -0
- package/dist/tools/ingestion.d.ts +94 -0
- package/dist/tools/ingestion.d.ts.map +1 -0
- package/dist/tools/ingestion.js +1659 -0
- package/dist/tools/ingestion.js.map +1 -0
- package/dist/tools/intelligence.d.ts +18 -0
- package/dist/tools/intelligence.d.ts.map +1 -0
- package/dist/tools/intelligence.js +1039 -0
- package/dist/tools/intelligence.js.map +1 -0
- package/dist/tools/provenance.d.ts +51 -0
- package/dist/tools/provenance.d.ts.map +1 -0
- package/dist/tools/provenance.js +691 -0
- package/dist/tools/provenance.js.map +1 -0
- package/dist/tools/reports.d.ts +41 -0
- package/dist/tools/reports.d.ts.map +1 -0
- package/dist/tools/reports.js +1394 -0
- package/dist/tools/reports.js.map +1 -0
- package/dist/tools/search.d.ts +35 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +2528 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/shared.d.ts +52 -0
- package/dist/tools/shared.d.ts.map +1 -0
- package/dist/tools/shared.js +54 -0
- package/dist/tools/shared.js.map +1 -0
- package/dist/tools/tags.d.ts +15 -0
- package/dist/tools/tags.d.ts.map +1 -0
- package/dist/tools/tags.js +287 -0
- package/dist/tools/tags.js.map +1 -0
- package/dist/tools/timeline.d.ts +15 -0
- package/dist/tools/timeline.d.ts.map +1 -0
- package/dist/tools/timeline.js +14 -0
- package/dist/tools/timeline.js.map +1 -0
- package/dist/tools/users.d.ts +14 -0
- package/dist/tools/users.d.ts.map +1 -0
- package/dist/tools/users.js +257 -0
- package/dist/tools/users.js.map +1 -0
- package/dist/tools/vlm.d.ts +40 -0
- package/dist/tools/vlm.d.ts.map +1 -0
- package/dist/tools/vlm.js +475 -0
- package/dist/tools/vlm.js.map +1 -0
- package/dist/tools/workflow.d.ts +16 -0
- package/dist/tools/workflow.d.ts.map +1 -0
- package/dist/tools/workflow.js +495 -0
- package/dist/tools/workflow.js.map +1 -0
- package/dist/utils/backoff.d.ts +53 -0
- package/dist/utils/backoff.d.ts.map +1 -0
- package/dist/utils/backoff.js +78 -0
- package/dist/utils/backoff.js.map +1 -0
- package/dist/utils/config-persistence.d.ts +33 -0
- package/dist/utils/config-persistence.d.ts.map +1 -0
- package/dist/utils/config-persistence.js +61 -0
- package/dist/utils/config-persistence.js.map +1 -0
- package/dist/utils/hash.d.ts +65 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +146 -0
- package/dist/utils/hash.js.map +1 -0
- package/dist/utils/math.d.ts +21 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +39 -0
- package/dist/utils/math.js.map +1 -0
- package/dist/utils/validation.d.ts +697 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +529 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +96 -0
- package/python/.gitkeep +0 -0
- package/python/__init__.py +104 -0
- package/python/clustering_worker.py +440 -0
- package/python/docx_image_extractor.py +524 -0
- package/python/embedding_worker.py +552 -0
- package/python/file_manager_worker.py +564 -0
- package/python/form_fill_worker.py +399 -0
- package/python/gpu_utils.py +582 -0
- package/python/image_extractor.py +317 -0
- package/python/image_optimizer.py +444 -0
- package/python/ocr_worker.py +712 -0
- package/python/pyproject.toml +76 -0
- package/python/requirements.txt +51 -0
- package/python/reranker_worker.py +87 -0
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OCR Processing Orchestrator
|
|
3
|
+
*
|
|
4
|
+
* Complete pipeline: Document -> OCR -> Provenance -> Store -> Status Update
|
|
5
|
+
* FAIL-FAST: No fallbacks, errors propagate immediately
|
|
6
|
+
*/
|
|
7
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
8
|
+
import { DatalabClient } from './datalab.js';
|
|
9
|
+
import { OCRError } from './errors.js';
|
|
10
|
+
import { backoffSleep } from '../../utils/backoff.js';
|
|
11
|
+
import { ProvenanceType } from '../../models/provenance.js';
|
|
12
|
+
/**
|
|
13
|
+
* SDK version for provenance - hardcoded since we can't easily get it at runtime
|
|
14
|
+
* Update this when datalab-sdk version changes
|
|
15
|
+
*/
|
|
16
|
+
const DATALAB_SDK_VERSION = '1.0.0';
|
|
17
|
+
function parseMaxConcurrent() {
|
|
18
|
+
const raw = process.env.DATALAB_MAX_CONCURRENT ?? '3';
|
|
19
|
+
const parsed = parseInt(raw, 10);
|
|
20
|
+
if (Number.isNaN(parsed)) {
|
|
21
|
+
throw new Error(`Invalid numeric env var DATALAB_MAX_CONCURRENT: "${raw}"`);
|
|
22
|
+
}
|
|
23
|
+
return parsed;
|
|
24
|
+
}
|
|
25
|
+
export class OCRProcessor {
|
|
26
|
+
client;
|
|
27
|
+
db;
|
|
28
|
+
maxConcurrent;
|
|
29
|
+
defaultMode;
|
|
30
|
+
constructor(db, config = {}) {
|
|
31
|
+
this.db = db;
|
|
32
|
+
this.client = new DatalabClient(config);
|
|
33
|
+
this.maxConcurrent = config.maxConcurrent ?? parseMaxConcurrent();
|
|
34
|
+
this.defaultMode = config.defaultMode ?? 'balanced';
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Process single document through OCR
|
|
38
|
+
*
|
|
39
|
+
* Pipeline:
|
|
40
|
+
* 1. Get document from database (FAIL if not found)
|
|
41
|
+
* 2. Update status to 'processing'
|
|
42
|
+
* 3. Call Datalab OCR via Python worker
|
|
43
|
+
* 4. Create OCR_RESULT provenance record
|
|
44
|
+
* 5. Store OCR result in database
|
|
45
|
+
* 6. Update document status to 'complete'
|
|
46
|
+
*
|
|
47
|
+
* On failure: Update status to 'failed' with error message
|
|
48
|
+
*/
|
|
49
|
+
async processDocument(documentId, mode, ocrOptions) {
|
|
50
|
+
const ocrMode = mode ?? this.defaultMode;
|
|
51
|
+
const startTime = Date.now();
|
|
52
|
+
// 1. Get document (FAIL-FAST: throw if not found)
|
|
53
|
+
const document = this.db.getDocument(documentId);
|
|
54
|
+
if (!document) {
|
|
55
|
+
throw new OCRError(`Document not found: ${documentId}`, 'OCR_FILE_ERROR');
|
|
56
|
+
}
|
|
57
|
+
// 2. Update status to 'processing'
|
|
58
|
+
this.db.updateDocumentStatus(documentId, 'processing');
|
|
59
|
+
try {
|
|
60
|
+
// 3. Generate provenance ID and call OCR (with 1 retry on timeout)
|
|
61
|
+
const ocrProvenanceId = uuidv4();
|
|
62
|
+
let ocrResult;
|
|
63
|
+
let images;
|
|
64
|
+
let jsonBlocks = null;
|
|
65
|
+
let metadata = null;
|
|
66
|
+
let pageOffsets = [];
|
|
67
|
+
let extractionJson = null;
|
|
68
|
+
let docTitle = null;
|
|
69
|
+
let docAuthor = null;
|
|
70
|
+
let docSubject = null;
|
|
71
|
+
for (let attempt = 1; attempt <= 2; attempt++) {
|
|
72
|
+
try {
|
|
73
|
+
const response = await this.client.processDocument(document.file_path, documentId, ocrProvenanceId, ocrMode, ocrOptions);
|
|
74
|
+
ocrResult = response.result;
|
|
75
|
+
images = response.images;
|
|
76
|
+
jsonBlocks = response.jsonBlocks;
|
|
77
|
+
metadata = response.metadata;
|
|
78
|
+
pageOffsets = response.pageOffsets;
|
|
79
|
+
extractionJson = response.extractionJson;
|
|
80
|
+
docTitle = response.docTitle;
|
|
81
|
+
docAuthor = response.docAuthor;
|
|
82
|
+
docSubject = response.docSubject;
|
|
83
|
+
break;
|
|
84
|
+
}
|
|
85
|
+
catch (error) {
|
|
86
|
+
if (attempt === 1 && error instanceof OCRError && error.category === 'OCR_TIMEOUT') {
|
|
87
|
+
console.error(`[WARN] OCR timeout on attempt 1 for ${documentId}, retrying...`);
|
|
88
|
+
continue;
|
|
89
|
+
}
|
|
90
|
+
if (attempt === 1 && error instanceof OCRError && error.category === 'OCR_RATE_LIMIT') {
|
|
91
|
+
const retryAfter = error.retryAfter;
|
|
92
|
+
if (retryAfter !== undefined && retryAfter > 0) {
|
|
93
|
+
console.error(`[WARN] OCR rate limited on attempt 1 for ${documentId}, server says wait ${retryAfter}s`);
|
|
94
|
+
await new Promise((resolve) => setTimeout(resolve, retryAfter * 1000));
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
console.error(`[WARN] OCR rate limited on attempt 1 for ${documentId}, using exponential backoff`);
|
|
98
|
+
await backoffSleep(0);
|
|
99
|
+
}
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
throw error;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
// TypeScript: guaranteed assigned after loop or thrown
|
|
106
|
+
ocrResult = ocrResult;
|
|
107
|
+
images = images;
|
|
108
|
+
// 4. Create OCR_RESULT provenance record
|
|
109
|
+
const provenance = this.createOCRProvenance(ocrProvenanceId, document, ocrResult, ocrMode);
|
|
110
|
+
this.db.insertProvenance(provenance);
|
|
111
|
+
// 5. Store OCR result
|
|
112
|
+
this.db.insertOCRResult(ocrResult);
|
|
113
|
+
// 6. Update document status
|
|
114
|
+
this.db.updateDocumentOCRComplete(documentId, ocrResult.page_count, ocrResult.processing_completed_at);
|
|
115
|
+
// Capture image count for logging
|
|
116
|
+
const imageCount = Object.keys(images).length;
|
|
117
|
+
if (imageCount > 0) {
|
|
118
|
+
console.error(`[INFO] Captured ${imageCount} images from Datalab`);
|
|
119
|
+
}
|
|
120
|
+
return {
|
|
121
|
+
success: true,
|
|
122
|
+
documentId,
|
|
123
|
+
ocrResultId: ocrResult.id,
|
|
124
|
+
provenanceId: ocrProvenanceId,
|
|
125
|
+
pageCount: ocrResult.page_count,
|
|
126
|
+
textLength: ocrResult.text_length,
|
|
127
|
+
durationMs: Date.now() - startTime,
|
|
128
|
+
images: imageCount > 0 ? images : undefined,
|
|
129
|
+
jsonBlocks,
|
|
130
|
+
metadata,
|
|
131
|
+
pageOffsets: pageOffsets.length > 0 ? pageOffsets : undefined,
|
|
132
|
+
extractionJson: extractionJson ?? undefined,
|
|
133
|
+
docTitle: docTitle ?? undefined,
|
|
134
|
+
docAuthor: docAuthor ?? undefined,
|
|
135
|
+
docSubject: docSubject ?? undefined,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
catch (error) {
|
|
139
|
+
// Update status to 'failed' and re-throw (FAIL-FAST: callers must handle)
|
|
140
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
141
|
+
this.db.updateDocumentStatus(documentId, 'failed', errorMsg);
|
|
142
|
+
// Re-throw as OCRError if not already one
|
|
143
|
+
if (error instanceof OCRError) {
|
|
144
|
+
throw error;
|
|
145
|
+
}
|
|
146
|
+
throw new OCRError(`OCR processing failed for ${documentId}: ${errorMsg}`, 'OCR_API_ERROR');
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Process all pending documents.
|
|
151
|
+
*
|
|
152
|
+
* H-2 fix: Before processing, recover any documents stuck in 'processing'
|
|
153
|
+
* status for longer than 30 minutes (indicates a prior server crash).
|
|
154
|
+
* These are reset to 'pending' so they get re-processed.
|
|
155
|
+
*/
|
|
156
|
+
async processPending(mode) {
|
|
157
|
+
const startTime = Date.now();
|
|
158
|
+
const ocrMode = mode ?? this.defaultMode;
|
|
159
|
+
// H-2: Recover stale 'processing' documents (crashed mid-OCR)
|
|
160
|
+
this.recoverStaleProcessingDocuments();
|
|
161
|
+
const pending = this.db.listDocuments({ status: 'pending' });
|
|
162
|
+
if (pending.length === 0) {
|
|
163
|
+
return {
|
|
164
|
+
processed: 0,
|
|
165
|
+
failed: 0,
|
|
166
|
+
remaining: 0,
|
|
167
|
+
totalDurationMs: 0,
|
|
168
|
+
results: [],
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
const results = [];
|
|
172
|
+
// Process in batches for concurrency control
|
|
173
|
+
for (let i = 0; i < pending.length; i += this.maxConcurrent) {
|
|
174
|
+
const batch = pending.slice(i, i + this.maxConcurrent);
|
|
175
|
+
const batchResults = await Promise.all(batch.map(async (doc) => {
|
|
176
|
+
try {
|
|
177
|
+
return await this.processDocument(doc.id, ocrMode);
|
|
178
|
+
}
|
|
179
|
+
catch (error) {
|
|
180
|
+
// processDocument already marks doc as 'failed' before throwing
|
|
181
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
182
|
+
return {
|
|
183
|
+
success: false,
|
|
184
|
+
documentId: doc.id,
|
|
185
|
+
error: errorMsg,
|
|
186
|
+
durationMs: 0,
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
}));
|
|
190
|
+
results.push(...batchResults);
|
|
191
|
+
}
|
|
192
|
+
const processed = results.filter((r) => r.success).length;
|
|
193
|
+
const failed = results.length - processed;
|
|
194
|
+
const remaining = this.db.listDocuments({ status: 'pending' }).length;
|
|
195
|
+
return {
|
|
196
|
+
processed,
|
|
197
|
+
failed,
|
|
198
|
+
remaining,
|
|
199
|
+
totalDurationMs: Date.now() - startTime,
|
|
200
|
+
results,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* H-2: Recover documents stuck in 'processing' status after a server crash.
|
|
205
|
+
* Any document that has been 'processing' for longer than 30 minutes is
|
|
206
|
+
* assumed to be orphaned from a crash and is reset to 'pending'.
|
|
207
|
+
*/
|
|
208
|
+
recoverStaleProcessingDocuments() {
|
|
209
|
+
const conn = this.db.getConnection();
|
|
210
|
+
const staleRows = conn
|
|
211
|
+
.prepare(`SELECT id FROM documents WHERE status = 'processing'
|
|
212
|
+
AND modified_at < datetime('now', '-30 minutes')`)
|
|
213
|
+
.all();
|
|
214
|
+
for (const row of staleRows) {
|
|
215
|
+
console.error(`[WARN] Recovering stale 'processing' document ${row.id} (stuck >30min, likely server crash). Resetting to 'pending'.`);
|
|
216
|
+
this.db.updateDocumentStatus(row.id, 'pending');
|
|
217
|
+
}
|
|
218
|
+
if (staleRows.length > 0) {
|
|
219
|
+
console.error(`[INFO] Recovered ${staleRows.length} stale 'processing' document(s) to 'pending' status.`);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Create OCR_RESULT provenance record
|
|
224
|
+
*/
|
|
225
|
+
createOCRProvenance(id, document, ocrResult, mode) {
|
|
226
|
+
const now = new Date().toISOString();
|
|
227
|
+
return {
|
|
228
|
+
id,
|
|
229
|
+
type: ProvenanceType.OCR_RESULT,
|
|
230
|
+
created_at: now,
|
|
231
|
+
processed_at: ocrResult.processing_completed_at,
|
|
232
|
+
source_file_created_at: null,
|
|
233
|
+
source_file_modified_at: document.modified_at,
|
|
234
|
+
source_type: 'OCR',
|
|
235
|
+
source_path: document.file_path,
|
|
236
|
+
source_id: document.provenance_id,
|
|
237
|
+
root_document_id: document.provenance_id,
|
|
238
|
+
location: null,
|
|
239
|
+
content_hash: ocrResult.content_hash,
|
|
240
|
+
input_hash: document.file_hash,
|
|
241
|
+
file_hash: document.file_hash,
|
|
242
|
+
processor: 'datalab-ocr',
|
|
243
|
+
processor_version: DATALAB_SDK_VERSION,
|
|
244
|
+
processing_params: {
|
|
245
|
+
mode,
|
|
246
|
+
output_format: 'markdown,json',
|
|
247
|
+
request_id: ocrResult.datalab_request_id,
|
|
248
|
+
paginate: true,
|
|
249
|
+
},
|
|
250
|
+
processing_duration_ms: ocrResult.processing_duration_ms,
|
|
251
|
+
processing_quality_score: ocrResult.parse_quality_score,
|
|
252
|
+
parent_id: document.provenance_id,
|
|
253
|
+
parent_ids: JSON.stringify([document.provenance_id]),
|
|
254
|
+
chain_depth: 1,
|
|
255
|
+
chain_path: JSON.stringify(['DOCUMENT', 'OCR_RESULT']),
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
//# sourceMappingURL=processor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"processor.js","sourceRoot":"","sources":["../../../src/services/ocr/processor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,EAAE,IAAI,MAAM,EAAE,MAAM,MAAM,CAAC;AACpC,OAAO,EAAE,aAAa,EAA4B,MAAM,cAAc,CAAC;AACvE,OAAO,EAAE,QAAQ,EAAqB,MAAM,aAAa,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGtD,OAAO,EAAE,cAAc,EAAyB,MAAM,4BAA4B,CAAC;AA0CnF;;;GAGG;AACH,MAAM,mBAAmB,GAAG,OAAO,CAAC;AAEpC,SAAS,kBAAkB;IACzB,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,sBAAsB,IAAI,GAAG,CAAC;IACtD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;IACjC,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,oDAAoD,GAAG,GAAG,CAAC,CAAC;IAC9E,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,OAAO,YAAY;IACN,MAAM,CAAgB;IACtB,EAAE,CAAkB;IACpB,aAAa,CAAS;IACtB,WAAW,CAAmC;IAE/D,YAAY,EAAmB,EAAE,SAA0B,EAAE;QAC3D,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC;QACb,IAAI,CAAC,MAAM,GAAG,IAAI,aAAa,CAAC,MAAM,CAAC,CAAC;QACxC,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,aAAa,IAAI,kBAAkB,EAAE,CAAC;QAClE,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,IAAI,UAAU,CAAC;IACtD,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,KAAK,CAAC,eAAe,CACnB,UAAkB,EAClB,IAAuC,EACvC,UAQC;QAED,MAAM,OAAO,GAAG,IAAI,IAAI,IAAI,CAAC,WAAW,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,kDAAkD;QAClD,MAAM,QAAQ,GAAG,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;QACjD,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,QAAQ,CAAC,uBAAuB,UAAU,EAAE,EAAE,gBAAgB,CAAC,CAAC;QAC5E,CAAC;QAED,mCAAmC;QACnC,IAAI,CAAC,EAAE,CAAC,oBAAoB,CAAC,UAAU,EAAE,YAAY,CAAC,CAAC;QAEvD,IAAI,CAAC;YACH,mEAAmE;YACnE,MAAM,eAAe,GAAG,MAAM,EAAE,CAAC;YACjC,IAAI,SAAoB,CAAC;YACzB,IAAI,MAA8B,CAAC;YACnC,IAAI,UAAU,GAAmC,IAAI,CAAC;YACtD,IAAI,QAAQ,GAAmC,IAAI,CAAC;YACpD,IAAI,WAAW,GAAiB,EAAE,CAAC;YACnC,IAAI,cAAc,GAA+C,IAAI,CAAC;YACtE,IAAI,QAAQ,GAAkB,IAAI,CAAC;YACnC,IAAI,SAAS,GAAkB,IAAI,CAAC;YACpC,IAAI,UAAU,GAAkB,IAAI,CAAC;YAErC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,CAAC,EAAE,OAAO,EAAE,EAAE,CAAC;gBAC9C,IAAI,CAAC;oBACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,eAAe,CAChD,QAAQ,CAAC,SAAS,EAClB,UAAU,EACV,eAAe,EACf,OAAO,EACP,UAAU,CACX,CAAC;oBACF,SAAS,GAAG,QAAQ,CAAC,MAAM,CAAC;oBAC5B,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;oBACzB,UAAU,GAAG,QAAQ,CAAC,UAAU,CAAC;oBACjC,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC;oBAC7B,WAAW,GAAG,QAAQ,CAAC,WAAW,CAAC;oBACnC,cAAc,GAAG,QAAQ,CAAC,cAAc,CAAC;oBACzC,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC;oBAC7B,SAAS,GAAG,QAAQ,CAAC,SAAS,CAAC;oBAC/B,UAAU,GAAG,QAAQ,CAAC,UAAU,CAAC;oBACjC,MAAM;gBACR,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,IAAI,OAAO,KAAK,CAAC,IAAI,KAAK,YAAY,QAAQ,IAAI,KAAK,CAAC,QAAQ,KAAK,aAAa,EAAE,CAAC;wBACnF,OAAO,CAAC,KAAK,CAAC,uCAAuC,UAAU,eAAe,CAAC,CAAC;wBAChF,SAAS;oBACX,CAAC;oBACD,IAAI,OAAO,KAAK,CAAC,IAAI,KAAK,YAAY,QAAQ,IAAI,KAAK,CAAC,QAAQ,KAAK,gBAAgB,EAAE,CAAC;wBACtF,MAAM,UAAU,GAAI,KAA2B,CAAC,UAAU,CAAC;wBAC3D,IAAI,UAAU,KAAK,SAAS,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;4BAC/C,OAAO,CAAC,KAAK,CACX,4CAA4C,UAAU,sBAAsB,UAAU,GAAG,CAC1F,CAAC;4BACF,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC;wBACzE,CAAC;6BAAM,CAAC;4BACN,OAAO,CAAC,KAAK,CACX,4CAA4C,UAAU,6BAA6B,CACpF,CAAC;4BACF,MAAM,YAAY,CAAC,CAAC,CAAC,CAAC;wBACxB,CAAC;wBACD,SAAS;oBACX,CAAC;oBACD,MAAM,KAAK,CAAC;gBACd,CAAC;YACH,CAAC;YAED,uDAAuD;YACvD,SAAS,GAAG,SAAU,CAAC;YACvB,MAAM,GAAG,MAAO,CAAC;YAEjB,yCAAyC;YACzC,MAAM,UAAU,GAAG,IAAI,CAAC,mBAAmB,CAAC,eAAe,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YAC3F,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;YAErC,sBAAsB;YACtB,IAAI,CAAC,EAAE,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;YAEnC,4BAA4B;YAC5B,IAAI,CAAC,EAAE,CAAC,yBAAyB,CAC/B,UAAU,EACV,SAAS,CAAC,UAAU,EACpB,SAAS,CAAC,uBAAuB,CAClC,CAAC;YAEF,kCAAkC;YAClC,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;YAC9C,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;gBACnB,OAAO,CAAC,KAAK,CAAC,mBAAmB,UAAU,sBAAsB,CAAC,CAAC;YACrE,CAAC;YAED,OAAO;gBACL,OAAO,EAAE,IAAI;gBACb,UAAU;gBACV,WAAW,EAAE,SAAS,CAAC,EAAE;gBACzB,YAAY,EAAE,eAAe;gBAC7B,SAAS,EAAE,SAAS,CAAC,UAAU;gBAC/B,UAAU,EAAE,SAAS,CAAC,WAAW;gBACjC,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;gBAClC,MAAM,EAAE,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;gBAC3C,UAAU;gBACV,QAAQ;gBACR,WAAW,EAAE,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,SAAS;gBAC7D,cAAc,EAAE,cAAc,IAAI,SAAS;gBAC3C,QAAQ,EAAE,QAAQ,IAAI,SAAS;gBAC/B,SAAS,EAAE,SAAS,IAAI,SAAS;gBACjC,UAAU,EAAE,UAAU,IAAI,SAAS;aACpC,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,0EAA0E;YAC1E,MAAM,QAAQ,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACxE,IAAI,CAAC,EAAE,CAAC,oBAAoB,CAAC,UAAU,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;YAE7D,0CAA0C;YAC1C,IAAI,KAAK,YAAY,QAAQ,EAAE,CAAC;gBAC9B,MAAM,KAAK,CAAC;YACd,CAAC;YACD,MAAM,IAAI,QAAQ,CAAC,6BAA6B,UAAU,KAAK,QAAQ,EAAE,EAAE,eAAe,CAAC,CAAC;QAC9F,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,cAAc,CAAC,IAAuC;QAC1D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,IAAI,IAAI,IAAI,CAAC,WAAW,CAAC;QAEzC,8DAA8D;QAC9D,IAAI,CAAC,+BAA+B,EAAE,CAAC;QAEvC,MAAM,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,aAAa,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;QAC7D,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO;gBACL,SAAS,EAAE,CAAC;gBACZ,MAAM,EAAE,CAAC;gBACT,SAAS,EAAE,CAAC;gBACZ,eAAe,EAAE,CAAC;gBAClB,OAAO,EAAE,EAAE;aACZ,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAoB,EAAE,CAAC;QAEpC,6CAA6C;QAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YAC5D,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,aAAa,CAAC,CAAC;YACvD,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,GAAG,CACpC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;gBACtB,IAAI,CAAC;oBACH,OAAO,MAAM,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;gBACrD,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,gEAAgE;oBAChE,MAAM,QAAQ,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;oBACxE,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,UAAU,EAAE,GAAG,CAAC,EAAE;wBAClB,KAAK,EAAE,QAAQ;wBACf,UAAU,EAAE,CAAC;qBACG,CAAC;gBACrB,CAAC;YACH,CAAC,CAAC,CACH,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;QAChC,CAAC;QAED,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;QAC1D,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;QAE1C,MAAM,SAAS,GAAG,IAAI,CAAC,EAAE,CAAC,aAAa,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC,MAAM,CAAC;QAEtE,OAAO;YACL,SAAS;YACT,MAAM;YACN,SAAS;YACT,eAAe,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YACvC,OAAO;SACR,CAAC;IACJ,CAAC;IAED;;;;OAIG;IACK,+BAA+B;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,aAAa,EAAE,CAAC;QACrC,MAAM,SAAS,GAAG,IAAI;aACnB,OAAO,CACN;0DACkD,CACnD;aACA,GAAG,EAA2B,CAAC;QAElC,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC5B,OAAO,CAAC,KAAK,CACX,iDAAiD,GAAG,CAAC,EAAE,+DAA+D,CACvH,CAAC;YACF,IAAI,CAAC,EAAE,CAAC,oBAAoB,CAAC,GAAG,CAAC,EAAE,EAAE,SAAS,CAAC,CAAC;QAClD,CAAC;QAED,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,OAAO,CAAC,KAAK,CACX,oBAAoB,SAAS,CAAC,MAAM,sDAAsD,CAC3F,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACK,mBAAmB,CACzB,EAAU,EACV,QAAkB,EAClB,SAAoB,EACpB,IAAsC;QAEtC,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAErC,OAAO;YACL,EAAE;YACF,IAAI,EAAE,cAAc,CAAC,UAAU;YAC/B,UAAU,EAAE,GAAG;YACf,YAAY,EAAE,SAAS,CAAC,uBAAuB;YAC/C,sBAAsB,EAAE,IAAI;YAC5B,uBAAuB,EAAE,QAAQ,CAAC,WAAW;YAC7C,WAAW,EAAE,KAAK;YAClB,WAAW,EAAE,QAAQ,CAAC,SAAS;YAC/B,SAAS,EAAE,QAAQ,CAAC,aAAa;YACjC,gBAAgB,EAAE,QAAQ,CAAC,aAAa;YACxC,QAAQ,EAAE,IAAI;YACd,YAAY,EAAE,SAAS,CAAC,YAAY;YACpC,UAAU,EAAE,QAAQ,CAAC,SAAS;YAC9B,SAAS,EAAE,QAAQ,CAAC,SAAS;YAC7B,SAAS,EAAE,aAAa;YACxB,iBAAiB,EAAE,mBAAmB;YACtC,iBAAiB,EAAE;gBACjB,IAAI;gBACJ,aAAa,EAAE,eAAe;gBAC9B,UAAU,EAAE,SAAS,CAAC,kBAAkB;gBACxC,QAAQ,EAAE,IAAI;aACf;YACD,sBAAsB,EAAE,SAAS,CAAC,sBAAsB;YACxD,wBAAwB,EAAE,SAAS,CAAC,mBAAmB;YACvD,SAAS,EAAE,QAAQ,CAAC,aAAa;YACjC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;YACpD,WAAW,EAAE,CAAC;YACd,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,UAAU,EAAE,YAAY,CAAC,CAAC;SACvD,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PROV-AGENT Metadata Builder
|
|
3
|
+
*
|
|
4
|
+
* Builds agent metadata for provenance records.
|
|
5
|
+
* Records model names, versions, parameters for AI operations.
|
|
6
|
+
* Records user context for user-triggered actions.
|
|
7
|
+
*
|
|
8
|
+
* CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
|
|
9
|
+
*
|
|
10
|
+
* @module services/provenance/agent-metadata
|
|
11
|
+
*/
|
|
12
|
+
export interface AgentMetadata {
|
|
13
|
+
agent_type: 'ai_model' | 'user' | 'system';
|
|
14
|
+
model_name?: string;
|
|
15
|
+
model_version?: string;
|
|
16
|
+
temperature?: number;
|
|
17
|
+
max_tokens?: number;
|
|
18
|
+
token_count?: {
|
|
19
|
+
input: number;
|
|
20
|
+
output: number;
|
|
21
|
+
};
|
|
22
|
+
confidence?: number;
|
|
23
|
+
user_id?: string;
|
|
24
|
+
session_id?: string;
|
|
25
|
+
client_type?: string;
|
|
26
|
+
duration_ms?: number;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Build agent metadata for OCR processing operations.
|
|
30
|
+
*
|
|
31
|
+
* @param params - OCR operation parameters
|
|
32
|
+
* @returns Serialized JSON string of agent metadata
|
|
33
|
+
*/
|
|
34
|
+
export declare function buildOCRAgentMetadata(params: {
|
|
35
|
+
mode: string;
|
|
36
|
+
durationMs: number;
|
|
37
|
+
pageCount: number;
|
|
38
|
+
}): string;
|
|
39
|
+
/**
|
|
40
|
+
* Build agent metadata for embedding generation operations.
|
|
41
|
+
*
|
|
42
|
+
* @param params - Embedding operation parameters
|
|
43
|
+
* @returns Serialized JSON string of agent metadata
|
|
44
|
+
*/
|
|
45
|
+
export declare function buildEmbeddingAgentMetadata(params: {
|
|
46
|
+
model: string;
|
|
47
|
+
device: string;
|
|
48
|
+
batchSize: number;
|
|
49
|
+
chunkCount: number;
|
|
50
|
+
durationMs: number;
|
|
51
|
+
}): string;
|
|
52
|
+
/**
|
|
53
|
+
* Build agent metadata for VLM (Vision Language Model) operations.
|
|
54
|
+
*
|
|
55
|
+
* @param params - VLM operation parameters
|
|
56
|
+
* @returns Serialized JSON string of agent metadata
|
|
57
|
+
*/
|
|
58
|
+
export declare function buildVLMAgentMetadata(params: {
|
|
59
|
+
model: string;
|
|
60
|
+
tokensUsed?: number;
|
|
61
|
+
confidence?: number;
|
|
62
|
+
durationMs?: number;
|
|
63
|
+
}): string;
|
|
64
|
+
/**
|
|
65
|
+
* Build agent metadata for user-triggered actions.
|
|
66
|
+
*
|
|
67
|
+
* @param params - User action parameters
|
|
68
|
+
* @returns Serialized JSON string of agent metadata
|
|
69
|
+
*/
|
|
70
|
+
export declare function buildUserAgentMetadata(params: {
|
|
71
|
+
userId?: string;
|
|
72
|
+
sessionId?: string;
|
|
73
|
+
action: string;
|
|
74
|
+
}): string;
|
|
75
|
+
/**
|
|
76
|
+
* Build agent metadata for system-triggered actions (migrations, background tasks).
|
|
77
|
+
*
|
|
78
|
+
* @param action - Description of the system action
|
|
79
|
+
* @returns Serialized JSON string of agent metadata
|
|
80
|
+
*/
|
|
81
|
+
export declare function buildSystemAgentMetadata(action: string): string;
|
|
82
|
+
//# sourceMappingURL=agent-metadata.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-metadata.d.ts","sourceRoot":"","sources":["../../../src/services/provenance/agent-metadata.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAMH,MAAM,WAAW,aAAa;IAC5B,UAAU,EAAE,UAAU,GAAG,MAAM,GAAG,QAAQ,CAAC;IAC3C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAChD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAMD;;;;;GAKG;AACH,wBAAgB,qBAAqB,CAAC,MAAM,EAAE;IAC5C,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;CACnB,GAAG,MAAM,CAQT;AAMD;;;;;GAKG;AACH,wBAAgB,2BAA2B,CAAC,MAAM,EAAE;IAClD,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB,GAAG,MAAM,CAaT;AAMD;;;;;GAKG;AACH,wBAAgB,qBAAqB,CAAC,MAAM,EAAE;IAC5C,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,MAAM,CAWT;AAMD;;;;;GAKG;AACH,wBAAgB,sBAAsB,CAAC,MAAM,EAAE;IAC7C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB,GAAG,MAAM,CAOT;AAMD;;;;;GAKG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAK/D"}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PROV-AGENT Metadata Builder
|
|
3
|
+
*
|
|
4
|
+
* Builds agent metadata for provenance records.
|
|
5
|
+
* Records model names, versions, parameters for AI operations.
|
|
6
|
+
* Records user context for user-triggered actions.
|
|
7
|
+
*
|
|
8
|
+
* CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
|
|
9
|
+
*
|
|
10
|
+
* @module services/provenance/agent-metadata
|
|
11
|
+
*/
|
|
12
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
13
|
+
// OCR AGENT METADATA
|
|
14
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
15
|
+
/**
|
|
16
|
+
* Build agent metadata for OCR processing operations.
|
|
17
|
+
*
|
|
18
|
+
* @param params - OCR operation parameters
|
|
19
|
+
* @returns Serialized JSON string of agent metadata
|
|
20
|
+
*/
|
|
21
|
+
export function buildOCRAgentMetadata(params) {
|
|
22
|
+
const metadata = {
|
|
23
|
+
agent_type: 'ai_model',
|
|
24
|
+
model_name: 'datalab-ocr',
|
|
25
|
+
model_version: params.mode,
|
|
26
|
+
duration_ms: params.durationMs,
|
|
27
|
+
};
|
|
28
|
+
return JSON.stringify({ ...metadata, page_count: params.pageCount });
|
|
29
|
+
}
|
|
30
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
31
|
+
// EMBEDDING AGENT METADATA
|
|
32
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
33
|
+
/**
|
|
34
|
+
* Build agent metadata for embedding generation operations.
|
|
35
|
+
*
|
|
36
|
+
* @param params - Embedding operation parameters
|
|
37
|
+
* @returns Serialized JSON string of agent metadata
|
|
38
|
+
*/
|
|
39
|
+
export function buildEmbeddingAgentMetadata(params) {
|
|
40
|
+
const metadata = {
|
|
41
|
+
agent_type: 'ai_model',
|
|
42
|
+
model_name: params.model,
|
|
43
|
+
model_version: '1.5',
|
|
44
|
+
duration_ms: params.durationMs,
|
|
45
|
+
};
|
|
46
|
+
return JSON.stringify({
|
|
47
|
+
...metadata,
|
|
48
|
+
device: params.device,
|
|
49
|
+
batch_size: params.batchSize,
|
|
50
|
+
chunks_processed: params.chunkCount,
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
54
|
+
// VLM AGENT METADATA
|
|
55
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
56
|
+
/**
|
|
57
|
+
* Build agent metadata for VLM (Vision Language Model) operations.
|
|
58
|
+
*
|
|
59
|
+
* @param params - VLM operation parameters
|
|
60
|
+
* @returns Serialized JSON string of agent metadata
|
|
61
|
+
*/
|
|
62
|
+
export function buildVLMAgentMetadata(params) {
|
|
63
|
+
const metadata = {
|
|
64
|
+
agent_type: 'ai_model',
|
|
65
|
+
model_name: params.model,
|
|
66
|
+
confidence: params.confidence,
|
|
67
|
+
duration_ms: params.durationMs,
|
|
68
|
+
};
|
|
69
|
+
if (params.tokensUsed) {
|
|
70
|
+
metadata.token_count = { input: 0, output: params.tokensUsed };
|
|
71
|
+
}
|
|
72
|
+
return JSON.stringify(metadata);
|
|
73
|
+
}
|
|
74
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
75
|
+
// USER AGENT METADATA
|
|
76
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
77
|
+
/**
|
|
78
|
+
* Build agent metadata for user-triggered actions.
|
|
79
|
+
*
|
|
80
|
+
* @param params - User action parameters
|
|
81
|
+
* @returns Serialized JSON string of agent metadata
|
|
82
|
+
*/
|
|
83
|
+
export function buildUserAgentMetadata(params) {
|
|
84
|
+
return JSON.stringify({
|
|
85
|
+
agent_type: 'user',
|
|
86
|
+
user_id: params.userId,
|
|
87
|
+
session_id: params.sessionId,
|
|
88
|
+
action: params.action,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
92
|
+
// SYSTEM AGENT METADATA
|
|
93
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
94
|
+
/**
|
|
95
|
+
* Build agent metadata for system-triggered actions (migrations, background tasks).
|
|
96
|
+
*
|
|
97
|
+
* @param action - Description of the system action
|
|
98
|
+
* @returns Serialized JSON string of agent metadata
|
|
99
|
+
*/
|
|
100
|
+
export function buildSystemAgentMetadata(action) {
|
|
101
|
+
return JSON.stringify({
|
|
102
|
+
agent_type: 'system',
|
|
103
|
+
action,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
//# sourceMappingURL=agent-metadata.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-metadata.js","sourceRoot":"","sources":["../../../src/services/provenance/agent-metadata.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAoBH,kFAAkF;AAClF,qBAAqB;AACrB,kFAAkF;AAElF;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CAAC,MAIrC;IACC,MAAM,QAAQ,GAAkB;QAC9B,UAAU,EAAE,UAAU;QACtB,UAAU,EAAE,aAAa;QACzB,aAAa,EAAE,MAAM,CAAC,IAAI;QAC1B,WAAW,EAAE,MAAM,CAAC,UAAU;KAC/B,CAAC;IACF,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,QAAQ,EAAE,UAAU,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;AACvE,CAAC;AAED,kFAAkF;AAClF,2BAA2B;AAC3B,kFAAkF;AAElF;;;;;GAKG;AACH,MAAM,UAAU,2BAA2B,CAAC,MAM3C;IACC,MAAM,QAAQ,GAAkB;QAC9B,UAAU,EAAE,UAAU;QACtB,UAAU,EAAE,MAAM,CAAC,KAAK;QACxB,aAAa,EAAE,KAAK;QACpB,WAAW,EAAE,MAAM,CAAC,UAAU;KAC/B,CAAC;IACF,OAAO,IAAI,CAAC,SAAS,CAAC;QACpB,GAAG,QAAQ;QACX,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,UAAU,EAAE,MAAM,CAAC,SAAS;QAC5B,gBAAgB,EAAE,MAAM,CAAC,UAAU;KACpC,CAAC,CAAC;AACL,CAAC;AAED,kFAAkF;AAClF,qBAAqB;AACrB,kFAAkF;AAElF;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CAAC,MAKrC;IACC,MAAM,QAAQ,GAAkB;QAC9B,UAAU,EAAE,UAAU;QACtB,UAAU,EAAE,MAAM,CAAC,KAAK;QACxB,UAAU,EAAE,MAAM,CAAC,UAAU;QAC7B,WAAW,EAAE,MAAM,CAAC,UAAU;KAC/B,CAAC;IACF,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACtB,QAAQ,CAAC,WAAW,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,UAAU,EAAE,CAAC;IACjE,CAAC;IACD,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;AAClC,CAAC;AAED,kFAAkF;AAClF,sBAAsB;AACtB,kFAAkF;AAElF;;;;;GAKG;AACH,MAAM,UAAU,sBAAsB,CAAC,MAItC;IACC,OAAO,IAAI,CAAC,SAAS,CAAC;QACpB,UAAU,EAAE,MAAM;QAClB,OAAO,EAAE,MAAM,CAAC,MAAM;QACtB,UAAU,EAAE,MAAM,CAAC,SAAS;QAC5B,MAAM,EAAE,MAAM,CAAC,MAAM;KACtB,CAAC,CAAC;AACL,CAAC;AAED,kFAAkF;AAClF,wBAAwB;AACxB,kFAAkF;AAElF;;;;;GAKG;AACH,MAAM,UAAU,wBAAwB,CAAC,MAAc;IACrD,OAAO,IAAI,CAAC,SAAS,CAAC;QACpB,UAAU,EAAE,QAAQ;QACpB,MAAM;KACP,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hash-Chain Verification for Provenance
|
|
3
|
+
*
|
|
4
|
+
* Creates a tamper-evident Merkle-like chain where each provenance record
|
|
5
|
+
* includes SHA-256(content_hash + parent.chain_hash).
|
|
6
|
+
*
|
|
7
|
+
* CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
|
|
8
|
+
*
|
|
9
|
+
* @module services/provenance/chain-hash
|
|
10
|
+
*/
|
|
11
|
+
import type Database from 'better-sqlite3';
|
|
12
|
+
export interface ChainVerificationResult {
|
|
13
|
+
valid: boolean;
|
|
14
|
+
total_records: number;
|
|
15
|
+
verified: number;
|
|
16
|
+
broken_at: string | null;
|
|
17
|
+
error?: string;
|
|
18
|
+
}
|
|
19
|
+
export interface BackfillResult {
|
|
20
|
+
updated: number;
|
|
21
|
+
errors: number;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Compute chain hash for a provenance record.
|
|
25
|
+
*
|
|
26
|
+
* Chain hash = SHA-256(content_hash + ":" + parent_chain_hash)
|
|
27
|
+
* For root records (no parent): SHA-256(content_hash)
|
|
28
|
+
*
|
|
29
|
+
* @param contentHash - The content hash of the current record
|
|
30
|
+
* @param parentChainHash - The chain hash of the parent record, or null for roots
|
|
31
|
+
* @returns Hex-encoded SHA-256 chain hash
|
|
32
|
+
*/
|
|
33
|
+
export declare function computeChainHash(contentHash: string, parentChainHash: string | null): string;
|
|
34
|
+
/**
|
|
35
|
+
* Verify the chain hash integrity for all provenance records of a root document.
|
|
36
|
+
*
|
|
37
|
+
* Walks the provenance chain in depth-first order, recomputing each chain_hash
|
|
38
|
+
* from (content_hash, parent.chain_hash) and comparing against the stored value.
|
|
39
|
+
*
|
|
40
|
+
* Pre-v32 records without chain_hash are considered valid (unverifiable).
|
|
41
|
+
*
|
|
42
|
+
* @param conn - Database connection
|
|
43
|
+
* @param rootDocumentId - The root document provenance ID to verify
|
|
44
|
+
* @returns Verification result with record counts and any breakage point
|
|
45
|
+
*/
|
|
46
|
+
export declare function verifyChainHashes(conn: Database.Database, rootDocumentId: string): ChainVerificationResult;
|
|
47
|
+
/**
|
|
48
|
+
* Backfill chain hashes for existing provenance records that don't have them.
|
|
49
|
+
*
|
|
50
|
+
* Processes records in depth-first order (chain_depth ASC) so that parent
|
|
51
|
+
* chain hashes are available when computing child hashes.
|
|
52
|
+
*
|
|
53
|
+
* @param conn - Database connection
|
|
54
|
+
* @returns Count of updated records and errors
|
|
55
|
+
*/
|
|
56
|
+
export declare function backfillChainHashes(conn: Database.Database): BackfillResult;
|
|
57
|
+
//# sourceMappingURL=chain-hash.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chain-hash.d.ts","sourceRoot":"","sources":["../../../src/services/provenance/chain-hash.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAGH,OAAO,KAAK,QAAQ,MAAM,gBAAgB,CAAC;AAM3C,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,OAAO,CAAC;IACf,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;CAChB;AAMD;;;;;;;;;GASG;AACH,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,MAAM,EAAE,eAAe,EAAE,MAAM,GAAG,IAAI,GAAG,MAAM,CAK5F;AAMD;;;;;;;;;;;GAWG;AACH,wBAAgB,iBAAiB,CAC/B,IAAI,EAAE,QAAQ,CAAC,QAAQ,EACvB,cAAc,EAAE,MAAM,GACrB,uBAAuB,CAkDzB;AAMD;;;;;;;;GAQG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,QAAQ,CAAC,QAAQ,GAAG,cAAc,CA6C3E"}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hash-Chain Verification for Provenance
|
|
3
|
+
*
|
|
4
|
+
* Creates a tamper-evident Merkle-like chain where each provenance record
|
|
5
|
+
* includes SHA-256(content_hash + parent.chain_hash).
|
|
6
|
+
*
|
|
7
|
+
* CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
|
|
8
|
+
*
|
|
9
|
+
* @module services/provenance/chain-hash
|
|
10
|
+
*/
|
|
11
|
+
import { createHash } from 'crypto';
|
|
12
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
13
|
+
// HASH COMPUTATION
|
|
14
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
15
|
+
/**
|
|
16
|
+
* Compute chain hash for a provenance record.
|
|
17
|
+
*
|
|
18
|
+
* Chain hash = SHA-256(content_hash + ":" + parent_chain_hash)
|
|
19
|
+
* For root records (no parent): SHA-256(content_hash)
|
|
20
|
+
*
|
|
21
|
+
* @param contentHash - The content hash of the current record
|
|
22
|
+
* @param parentChainHash - The chain hash of the parent record, or null for roots
|
|
23
|
+
* @returns Hex-encoded SHA-256 chain hash
|
|
24
|
+
*/
|
|
25
|
+
export function computeChainHash(contentHash, parentChainHash) {
|
|
26
|
+
const input = parentChainHash
|
|
27
|
+
? `${contentHash}:${parentChainHash}`
|
|
28
|
+
: contentHash;
|
|
29
|
+
return createHash('sha256').update(input).digest('hex');
|
|
30
|
+
}
|
|
31
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
32
|
+
// CHAIN VERIFICATION
|
|
33
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
34
|
+
/**
|
|
35
|
+
* Verify the chain hash integrity for all provenance records of a root document.
|
|
36
|
+
*
|
|
37
|
+
* Walks the provenance chain in depth-first order, recomputing each chain_hash
|
|
38
|
+
* from (content_hash, parent.chain_hash) and comparing against the stored value.
|
|
39
|
+
*
|
|
40
|
+
* Pre-v32 records without chain_hash are considered valid (unverifiable).
|
|
41
|
+
*
|
|
42
|
+
* @param conn - Database connection
|
|
43
|
+
* @param rootDocumentId - The root document provenance ID to verify
|
|
44
|
+
* @returns Verification result with record counts and any breakage point
|
|
45
|
+
*/
|
|
46
|
+
export function verifyChainHashes(conn, rootDocumentId) {
|
|
47
|
+
const records = conn.prepare(`
|
|
48
|
+
SELECT id, content_hash, chain_hash, parent_id
|
|
49
|
+
FROM provenance
|
|
50
|
+
WHERE root_document_id = ?
|
|
51
|
+
ORDER BY chain_depth ASC, created_at ASC
|
|
52
|
+
`).all(rootDocumentId);
|
|
53
|
+
if (records.length === 0) {
|
|
54
|
+
return { valid: true, total_records: 0, verified: 0, broken_at: null };
|
|
55
|
+
}
|
|
56
|
+
// Build a map for fast parent lookups
|
|
57
|
+
const recordMap = new Map(records.map(r => [r.id, r]));
|
|
58
|
+
let verified = 0;
|
|
59
|
+
for (const record of records) {
|
|
60
|
+
if (!record.chain_hash) {
|
|
61
|
+
// Records without chain_hash are pre-v32 (valid, just unverifiable)
|
|
62
|
+
verified++;
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
const parentRecord = record.parent_id ? recordMap.get(record.parent_id) : null;
|
|
66
|
+
const parentChainHash = parentRecord?.chain_hash ?? null;
|
|
67
|
+
const expectedHash = computeChainHash(record.content_hash, parentChainHash);
|
|
68
|
+
if (record.chain_hash !== expectedHash) {
|
|
69
|
+
return {
|
|
70
|
+
valid: false,
|
|
71
|
+
total_records: records.length,
|
|
72
|
+
verified,
|
|
73
|
+
broken_at: record.id,
|
|
74
|
+
error: `Chain hash mismatch at ${record.id}: expected ${expectedHash}, got ${record.chain_hash}`,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
verified++;
|
|
78
|
+
}
|
|
79
|
+
return {
|
|
80
|
+
valid: true,
|
|
81
|
+
total_records: records.length,
|
|
82
|
+
verified,
|
|
83
|
+
broken_at: null,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
87
|
+
// BACKFILL
|
|
88
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
89
|
+
/**
|
|
90
|
+
* Backfill chain hashes for existing provenance records that don't have them.
|
|
91
|
+
*
|
|
92
|
+
* Processes records in depth-first order (chain_depth ASC) so that parent
|
|
93
|
+
* chain hashes are available when computing child hashes.
|
|
94
|
+
*
|
|
95
|
+
* @param conn - Database connection
|
|
96
|
+
* @returns Count of updated records and errors
|
|
97
|
+
*/
|
|
98
|
+
export function backfillChainHashes(conn) {
|
|
99
|
+
const records = conn.prepare(`
|
|
100
|
+
SELECT id, content_hash, parent_id, chain_hash
|
|
101
|
+
FROM provenance
|
|
102
|
+
WHERE chain_hash IS NULL
|
|
103
|
+
ORDER BY chain_depth ASC, created_at ASC
|
|
104
|
+
`).all();
|
|
105
|
+
let updated = 0;
|
|
106
|
+
let errors = 0;
|
|
107
|
+
const hashMap = new Map();
|
|
108
|
+
// Also load existing chain hashes for parent lookups
|
|
109
|
+
const existing = conn.prepare('SELECT id, chain_hash FROM provenance WHERE chain_hash IS NOT NULL').all();
|
|
110
|
+
for (const r of existing) {
|
|
111
|
+
hashMap.set(r.id, r.chain_hash);
|
|
112
|
+
}
|
|
113
|
+
const updateStmt = conn.prepare('UPDATE provenance SET chain_hash = ? WHERE id = ?');
|
|
114
|
+
for (const record of records) {
|
|
115
|
+
try {
|
|
116
|
+
const parentChainHash = record.parent_id
|
|
117
|
+
? (hashMap.get(record.parent_id) ?? null)
|
|
118
|
+
: null;
|
|
119
|
+
const chainHash = computeChainHash(record.content_hash, parentChainHash);
|
|
120
|
+
updateStmt.run(chainHash, record.id);
|
|
121
|
+
hashMap.set(record.id, chainHash);
|
|
122
|
+
updated++;
|
|
123
|
+
}
|
|
124
|
+
catch (error) {
|
|
125
|
+
console.error(`[ChainHash] Failed to backfill ${record.id}: ${error instanceof Error ? error.message : String(error)}`);
|
|
126
|
+
errors++;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
return { updated, errors };
|
|
130
|
+
}
|
|
131
|
+
//# sourceMappingURL=chain-hash.js.map
|