ocr-provenance-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocr-provenance-mcp might be problematic. Click here for more details.
- package/.env.example +55 -0
- package/LICENSE +78 -0
- package/README.md +1154 -0
- package/dist/bin-http.d.ts +24 -0
- package/dist/bin-http.d.ts.map +1 -0
- package/dist/bin-http.js +275 -0
- package/dist/bin-http.js.map +1 -0
- package/dist/bin-setup.d.ts +11 -0
- package/dist/bin-setup.d.ts.map +1 -0
- package/dist/bin-setup.js +610 -0
- package/dist/bin-setup.js.map +1 -0
- package/dist/bin.d.ts +16 -0
- package/dist/bin.d.ts.map +1 -0
- package/dist/bin.js +16 -0
- package/dist/bin.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +90 -0
- package/dist/index.js.map +1 -0
- package/dist/models/chunk.d.ts +136 -0
- package/dist/models/chunk.d.ts.map +1 -0
- package/dist/models/chunk.js +27 -0
- package/dist/models/chunk.js.map +1 -0
- package/dist/models/cluster.d.ts +79 -0
- package/dist/models/cluster.d.ts.map +1 -0
- package/dist/models/cluster.js +10 -0
- package/dist/models/cluster.js.map +1 -0
- package/dist/models/comparison.d.ts +62 -0
- package/dist/models/comparison.d.ts.map +1 -0
- package/dist/models/comparison.js +8 -0
- package/dist/models/comparison.js.map +1 -0
- package/dist/models/document.d.ts +104 -0
- package/dist/models/document.d.ts.map +1 -0
- package/dist/models/document.js +15 -0
- package/dist/models/document.js.map +1 -0
- package/dist/models/embedding.d.ts +87 -0
- package/dist/models/embedding.d.ts.map +1 -0
- package/dist/models/embedding.js +23 -0
- package/dist/models/embedding.js.map +1 -0
- package/dist/models/extraction.d.ts +15 -0
- package/dist/models/extraction.d.ts.map +1 -0
- package/dist/models/extraction.js +2 -0
- package/dist/models/extraction.js.map +1 -0
- package/dist/models/form-fill.d.ts +23 -0
- package/dist/models/form-fill.d.ts.map +1 -0
- package/dist/models/form-fill.js +2 -0
- package/dist/models/form-fill.js.map +1 -0
- package/dist/models/image.d.ts +177 -0
- package/dist/models/image.d.ts.map +1 -0
- package/dist/models/image.js +8 -0
- package/dist/models/image.js.map +1 -0
- package/dist/models/index.d.ts +14 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +22 -0
- package/dist/models/index.js.map +1 -0
- package/dist/models/provenance.d.ts +174 -0
- package/dist/models/provenance.d.ts.map +1 -0
- package/dist/models/provenance.js +53 -0
- package/dist/models/provenance.js.map +1 -0
- package/dist/models/uploaded-file.d.ts +20 -0
- package/dist/models/uploaded-file.d.ts.map +1 -0
- package/dist/models/uploaded-file.js +2 -0
- package/dist/models/uploaded-file.js.map +1 -0
- package/dist/server/errors.d.ts +93 -0
- package/dist/server/errors.d.ts.map +1 -0
- package/dist/server/errors.js +256 -0
- package/dist/server/errors.js.map +1 -0
- package/dist/server/events.d.ts +36 -0
- package/dist/server/events.d.ts.map +1 -0
- package/dist/server/events.js +48 -0
- package/dist/server/events.js.map +1 -0
- package/dist/server/permissions.d.ts +26 -0
- package/dist/server/permissions.d.ts.map +1 -0
- package/dist/server/permissions.js +194 -0
- package/dist/server/permissions.js.map +1 -0
- package/dist/server/register-tools.d.ts +25 -0
- package/dist/server/register-tools.d.ts.map +1 -0
- package/dist/server/register-tools.js +102 -0
- package/dist/server/register-tools.js.map +1 -0
- package/dist/server/startup.d.ts +16 -0
- package/dist/server/startup.d.ts.map +1 -0
- package/dist/server/startup.js +37 -0
- package/dist/server/startup.js.map +1 -0
- package/dist/server/state.d.ts +166 -0
- package/dist/server/state.d.ts.map +1 -0
- package/dist/server/state.js +424 -0
- package/dist/server/state.js.map +1 -0
- package/dist/server/transports/http-transport.d.ts +37 -0
- package/dist/server/transports/http-transport.d.ts.map +1 -0
- package/dist/server/transports/http-transport.js +204 -0
- package/dist/server/transports/http-transport.js.map +1 -0
- package/dist/server/transports/index.d.ts +9 -0
- package/dist/server/transports/index.d.ts.map +1 -0
- package/dist/server/transports/index.js +9 -0
- package/dist/server/transports/index.js.map +1 -0
- package/dist/server/transports/session-manager.d.ts +40 -0
- package/dist/server/transports/session-manager.d.ts.map +1 -0
- package/dist/server/transports/session-manager.js +74 -0
- package/dist/server/transports/session-manager.js.map +1 -0
- package/dist/server/types.d.ts +82 -0
- package/dist/server/types.d.ts.map +1 -0
- package/dist/server/types.js +14 -0
- package/dist/server/types.js.map +1 -0
- package/dist/services/audit.d.ts +26 -0
- package/dist/services/audit.d.ts.map +1 -0
- package/dist/services/audit.js +43 -0
- package/dist/services/audit.js.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.js +46 -0
- package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
- package/dist/services/chunking/chunk-merger.d.ts +26 -0
- package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
- package/dist/services/chunking/chunk-merger.js +94 -0
- package/dist/services/chunking/chunk-merger.js.map +1 -0
- package/dist/services/chunking/chunker.d.ts +62 -0
- package/dist/services/chunking/chunker.d.ts.map +1 -0
- package/dist/services/chunking/chunker.js +566 -0
- package/dist/services/chunking/chunker.js.map +1 -0
- package/dist/services/chunking/heading-normalizer.d.ts +33 -0
- package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/heading-normalizer.js +101 -0
- package/dist/services/chunking/heading-normalizer.js.map +1 -0
- package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
- package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
- package/dist/services/chunking/json-block-analyzer.js +1033 -0
- package/dist/services/chunking/json-block-analyzer.js.map +1 -0
- package/dist/services/chunking/markdown-parser.d.ts +75 -0
- package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
- package/dist/services/chunking/markdown-parser.js +428 -0
- package/dist/services/chunking/markdown-parser.js.map +1 -0
- package/dist/services/chunking/text-normalizer.d.ts +20 -0
- package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/text-normalizer.js +36 -0
- package/dist/services/chunking/text-normalizer.js.map +1 -0
- package/dist/services/clm/contract-schemas.d.ts +36 -0
- package/dist/services/clm/contract-schemas.d.ts.map +1 -0
- package/dist/services/clm/contract-schemas.js +92 -0
- package/dist/services/clm/contract-schemas.js.map +1 -0
- package/dist/services/clm/summarization.d.ts +46 -0
- package/dist/services/clm/summarization.d.ts.map +1 -0
- package/dist/services/clm/summarization.js +61 -0
- package/dist/services/clm/summarization.js.map +1 -0
- package/dist/services/clustering/clustering-service.d.ts +58 -0
- package/dist/services/clustering/clustering-service.d.ts.map +1 -0
- package/dist/services/clustering/clustering-service.js +467 -0
- package/dist/services/clustering/clustering-service.js.map +1 -0
- package/dist/services/comparison/diff-service.d.ts +41 -0
- package/dist/services/comparison/diff-service.d.ts.map +1 -0
- package/dist/services/comparison/diff-service.js +120 -0
- package/dist/services/comparison/diff-service.js.map +1 -0
- package/dist/services/embedding/embedder.d.ts +55 -0
- package/dist/services/embedding/embedder.d.ts.map +1 -0
- package/dist/services/embedding/embedder.js +202 -0
- package/dist/services/embedding/embedder.js.map +1 -0
- package/dist/services/embedding/nomic.d.ts +67 -0
- package/dist/services/embedding/nomic.d.ts.map +1 -0
- package/dist/services/embedding/nomic.js +280 -0
- package/dist/services/embedding/nomic.js.map +1 -0
- package/dist/services/gemini/circuit-breaker.d.ts +106 -0
- package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
- package/dist/services/gemini/circuit-breaker.js +237 -0
- package/dist/services/gemini/circuit-breaker.js.map +1 -0
- package/dist/services/gemini/client.d.ts +173 -0
- package/dist/services/gemini/client.d.ts.map +1 -0
- package/dist/services/gemini/client.js +483 -0
- package/dist/services/gemini/client.js.map +1 -0
- package/dist/services/gemini/config.d.ts +116 -0
- package/dist/services/gemini/config.d.ts.map +1 -0
- package/dist/services/gemini/config.js +118 -0
- package/dist/services/gemini/config.js.map +1 -0
- package/dist/services/gemini/index.d.ts +9 -0
- package/dist/services/gemini/index.d.ts.map +1 -0
- package/dist/services/gemini/index.js +13 -0
- package/dist/services/gemini/index.js.map +1 -0
- package/dist/services/gemini/rate-limiter.d.ts +62 -0
- package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
- package/dist/services/gemini/rate-limiter.js +120 -0
- package/dist/services/gemini/rate-limiter.js.map +1 -0
- package/dist/services/images/extractor.d.ts +88 -0
- package/dist/services/images/extractor.d.ts.map +1 -0
- package/dist/services/images/extractor.js +340 -0
- package/dist/services/images/extractor.js.map +1 -0
- package/dist/services/images/optimizer.d.ts +130 -0
- package/dist/services/images/optimizer.d.ts.map +1 -0
- package/dist/services/images/optimizer.js +228 -0
- package/dist/services/images/optimizer.js.map +1 -0
- package/dist/services/ocr/datalab.d.ts +64 -0
- package/dist/services/ocr/datalab.d.ts.map +1 -0
- package/dist/services/ocr/datalab.js +425 -0
- package/dist/services/ocr/datalab.js.map +1 -0
- package/dist/services/ocr/errors.d.ts +38 -0
- package/dist/services/ocr/errors.d.ts.map +1 -0
- package/dist/services/ocr/errors.js +83 -0
- package/dist/services/ocr/errors.js.map +1 -0
- package/dist/services/ocr/file-manager.d.ts +76 -0
- package/dist/services/ocr/file-manager.d.ts.map +1 -0
- package/dist/services/ocr/file-manager.js +238 -0
- package/dist/services/ocr/file-manager.js.map +1 -0
- package/dist/services/ocr/form-fill.d.ts +48 -0
- package/dist/services/ocr/form-fill.d.ts.map +1 -0
- package/dist/services/ocr/form-fill.js +213 -0
- package/dist/services/ocr/form-fill.js.map +1 -0
- package/dist/services/ocr/processor.d.ts +95 -0
- package/dist/services/ocr/processor.d.ts.map +1 -0
- package/dist/services/ocr/processor.js +259 -0
- package/dist/services/ocr/processor.js.map +1 -0
- package/dist/services/provenance/agent-metadata.d.ts +82 -0
- package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
- package/dist/services/provenance/agent-metadata.js +106 -0
- package/dist/services/provenance/agent-metadata.js.map +1 -0
- package/dist/services/provenance/chain-hash.d.ts +57 -0
- package/dist/services/provenance/chain-hash.d.ts.map +1 -0
- package/dist/services/provenance/chain-hash.js +131 -0
- package/dist/services/provenance/chain-hash.js.map +1 -0
- package/dist/services/provenance/exporter.d.ts +202 -0
- package/dist/services/provenance/exporter.d.ts.map +1 -0
- package/dist/services/provenance/exporter.js +457 -0
- package/dist/services/provenance/exporter.js.map +1 -0
- package/dist/services/provenance/index.d.ts +15 -0
- package/dist/services/provenance/index.d.ts.map +1 -0
- package/dist/services/provenance/index.js +17 -0
- package/dist/services/provenance/index.js.map +1 -0
- package/dist/services/provenance/tracker.d.ts +138 -0
- package/dist/services/provenance/tracker.d.ts.map +1 -0
- package/dist/services/provenance/tracker.js +293 -0
- package/dist/services/provenance/tracker.js.map +1 -0
- package/dist/services/provenance/verifier.d.ts +153 -0
- package/dist/services/provenance/verifier.d.ts.map +1 -0
- package/dist/services/provenance/verifier.js +536 -0
- package/dist/services/provenance/verifier.js.map +1 -0
- package/dist/services/python-pool.d.ts +70 -0
- package/dist/services/python-pool.d.ts.map +1 -0
- package/dist/services/python-pool.js +265 -0
- package/dist/services/python-pool.js.map +1 -0
- package/dist/services/search/bm25.d.ts +180 -0
- package/dist/services/search/bm25.d.ts.map +1 -0
- package/dist/services/search/bm25.js +656 -0
- package/dist/services/search/bm25.js.map +1 -0
- package/dist/services/search/fusion.d.ts +103 -0
- package/dist/services/search/fusion.d.ts.map +1 -0
- package/dist/services/search/fusion.js +122 -0
- package/dist/services/search/fusion.js.map +1 -0
- package/dist/services/search/local-reranker.d.ts +30 -0
- package/dist/services/search/local-reranker.d.ts.map +1 -0
- package/dist/services/search/local-reranker.js +123 -0
- package/dist/services/search/local-reranker.js.map +1 -0
- package/dist/services/search/quality.d.ts +11 -0
- package/dist/services/search/quality.d.ts.map +1 -0
- package/dist/services/search/quality.js +17 -0
- package/dist/services/search/quality.js.map +1 -0
- package/dist/services/search/query-classifier.d.ts +34 -0
- package/dist/services/search/query-classifier.d.ts.map +1 -0
- package/dist/services/search/query-classifier.js +114 -0
- package/dist/services/search/query-classifier.js.map +1 -0
- package/dist/services/search/query-expander.d.ts +73 -0
- package/dist/services/search/query-expander.d.ts.map +1 -0
- package/dist/services/search/query-expander.js +281 -0
- package/dist/services/search/query-expander.js.map +1 -0
- package/dist/services/search/reranker.d.ts +44 -0
- package/dist/services/search/reranker.d.ts.map +1 -0
- package/dist/services/search/reranker.js +101 -0
- package/dist/services/search/reranker.js.map +1 -0
- package/dist/services/storage/database/annotation-operations.d.ts +113 -0
- package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/annotation-operations.js +177 -0
- package/dist/services/storage/database/annotation-operations.js.map +1 -0
- package/dist/services/storage/database/approval-operations.d.ts +132 -0
- package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
- package/dist/services/storage/database/approval-operations.js +206 -0
- package/dist/services/storage/database/approval-operations.js.map +1 -0
- package/dist/services/storage/database/chunk-operations.d.ts +132 -0
- package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
- package/dist/services/storage/database/chunk-operations.js +306 -0
- package/dist/services/storage/database/chunk-operations.js.map +1 -0
- package/dist/services/storage/database/cluster-operations.d.ts +97 -0
- package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
- package/dist/services/storage/database/cluster-operations.js +258 -0
- package/dist/services/storage/database/cluster-operations.js.map +1 -0
- package/dist/services/storage/database/comparison-operations.d.ts +41 -0
- package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
- package/dist/services/storage/database/comparison-operations.js +65 -0
- package/dist/services/storage/database/comparison-operations.js.map +1 -0
- package/dist/services/storage/database/converters.d.ts +36 -0
- package/dist/services/storage/database/converters.d.ts.map +1 -0
- package/dist/services/storage/database/converters.js +244 -0
- package/dist/services/storage/database/converters.js.map +1 -0
- package/dist/services/storage/database/document-operations.d.ts +145 -0
- package/dist/services/storage/database/document-operations.d.ts.map +1 -0
- package/dist/services/storage/database/document-operations.js +498 -0
- package/dist/services/storage/database/document-operations.js.map +1 -0
- package/dist/services/storage/database/embedding-operations.d.ts +130 -0
- package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
- package/dist/services/storage/database/embedding-operations.js +315 -0
- package/dist/services/storage/database/embedding-operations.js.map +1 -0
- package/dist/services/storage/database/extraction-operations.d.ts +47 -0
- package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
- package/dist/services/storage/database/extraction-operations.js +85 -0
- package/dist/services/storage/database/extraction-operations.js.map +1 -0
- package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
- package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
- package/dist/services/storage/database/form-fill-operations.js +116 -0
- package/dist/services/storage/database/form-fill-operations.js.map +1 -0
- package/dist/services/storage/database/helpers.d.ts +29 -0
- package/dist/services/storage/database/helpers.d.ts.map +1 -0
- package/dist/services/storage/database/helpers.js +55 -0
- package/dist/services/storage/database/helpers.js.map +1 -0
- package/dist/services/storage/database/image-operations.d.ts +202 -0
- package/dist/services/storage/database/image-operations.d.ts.map +1 -0
- package/dist/services/storage/database/image-operations.js +484 -0
- package/dist/services/storage/database/image-operations.js.map +1 -0
- package/dist/services/storage/database/index.d.ts +13 -0
- package/dist/services/storage/database/index.d.ts.map +1 -0
- package/dist/services/storage/database/index.js +16 -0
- package/dist/services/storage/database/index.js.map +1 -0
- package/dist/services/storage/database/lock-operations.d.ts +59 -0
- package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
- package/dist/services/storage/database/lock-operations.js +89 -0
- package/dist/services/storage/database/lock-operations.js.map +1 -0
- package/dist/services/storage/database/obligation-operations.d.ts +88 -0
- package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/obligation-operations.js +206 -0
- package/dist/services/storage/database/obligation-operations.js.map +1 -0
- package/dist/services/storage/database/ocr-operations.d.ts +33 -0
- package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
- package/dist/services/storage/database/ocr-operations.js +70 -0
- package/dist/services/storage/database/ocr-operations.js.map +1 -0
- package/dist/services/storage/database/playbook-operations.d.ts +72 -0
- package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
- package/dist/services/storage/database/playbook-operations.js +247 -0
- package/dist/services/storage/database/playbook-operations.js.map +1 -0
- package/dist/services/storage/database/provenance-operations.d.ts +112 -0
- package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
- package/dist/services/storage/database/provenance-operations.js +251 -0
- package/dist/services/storage/database/provenance-operations.js.map +1 -0
- package/dist/services/storage/database/service.d.ts +142 -0
- package/dist/services/storage/database/service.d.ts.map +1 -0
- package/dist/services/storage/database/service.js +310 -0
- package/dist/services/storage/database/service.js.map +1 -0
- package/dist/services/storage/database/static-operations.d.ts +30 -0
- package/dist/services/storage/database/static-operations.d.ts.map +1 -0
- package/dist/services/storage/database/static-operations.js +218 -0
- package/dist/services/storage/database/static-operations.js.map +1 -0
- package/dist/services/storage/database/stats-operations.d.ts +101 -0
- package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
- package/dist/services/storage/database/stats-operations.js +394 -0
- package/dist/services/storage/database/stats-operations.js.map +1 -0
- package/dist/services/storage/database/tag-operations.d.ts +76 -0
- package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
- package/dist/services/storage/database/tag-operations.js +178 -0
- package/dist/services/storage/database/tag-operations.js.map +1 -0
- package/dist/services/storage/database/types.d.ts +286 -0
- package/dist/services/storage/database/types.d.ts.map +1 -0
- package/dist/services/storage/database/types.js +39 -0
- package/dist/services/storage/database/types.js.map +1 -0
- package/dist/services/storage/database/upload-operations.d.ts +71 -0
- package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
- package/dist/services/storage/database/upload-operations.js +124 -0
- package/dist/services/storage/database/upload-operations.js.map +1 -0
- package/dist/services/storage/database/user-operations.d.ts +102 -0
- package/dist/services/storage/database/user-operations.d.ts.map +1 -0
- package/dist/services/storage/database/user-operations.js +151 -0
- package/dist/services/storage/database/user-operations.js.map +1 -0
- package/dist/services/storage/database/workflow-operations.d.ts +98 -0
- package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
- package/dist/services/storage/database/workflow-operations.js +157 -0
- package/dist/services/storage/database/workflow-operations.js.map +1 -0
- package/dist/services/storage/database.d.ts +16 -0
- package/dist/services/storage/database.d.ts.map +1 -0
- package/dist/services/storage/database.js +15 -0
- package/dist/services/storage/database.js.map +1 -0
- package/dist/services/storage/index.d.ts +10 -0
- package/dist/services/storage/index.d.ts.map +1 -0
- package/dist/services/storage/index.js +10 -0
- package/dist/services/storage/index.js.map +1 -0
- package/dist/services/storage/migrations/index.d.ts +16 -0
- package/dist/services/storage/migrations/index.d.ts.map +1 -0
- package/dist/services/storage/migrations/index.js +20 -0
- package/dist/services/storage/migrations/index.js.map +1 -0
- package/dist/services/storage/migrations/operations.d.ts +40 -0
- package/dist/services/storage/migrations/operations.d.ts.map +1 -0
- package/dist/services/storage/migrations/operations.js +2910 -0
- package/dist/services/storage/migrations/operations.js.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.js +1006 -0
- package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.js +176 -0
- package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
- package/dist/services/storage/migrations/types.d.ts +15 -0
- package/dist/services/storage/migrations/types.d.ts.map +1 -0
- package/dist/services/storage/migrations/types.js +21 -0
- package/dist/services/storage/migrations/types.js.map +1 -0
- package/dist/services/storage/migrations/verification.d.ts +20 -0
- package/dist/services/storage/migrations/verification.d.ts.map +1 -0
- package/dist/services/storage/migrations/verification.js +78 -0
- package/dist/services/storage/migrations/verification.js.map +1 -0
- package/dist/services/storage/migrations.d.ts +16 -0
- package/dist/services/storage/migrations.d.ts.map +1 -0
- package/dist/services/storage/migrations.js +17 -0
- package/dist/services/storage/migrations.js.map +1 -0
- package/dist/services/storage/types.d.ts +12 -0
- package/dist/services/storage/types.d.ts.map +1 -0
- package/dist/services/storage/types.js +5 -0
- package/dist/services/storage/types.js.map +1 -0
- package/dist/services/storage/vector.d.ts +208 -0
- package/dist/services/storage/vector.d.ts.map +1 -0
- package/dist/services/storage/vector.js +526 -0
- package/dist/services/storage/vector.js.map +1 -0
- package/dist/services/vlm/pipeline.d.ts +194 -0
- package/dist/services/vlm/pipeline.d.ts.map +1 -0
- package/dist/services/vlm/pipeline.js +800 -0
- package/dist/services/vlm/pipeline.js.map +1 -0
- package/dist/services/vlm/prompts.d.ts +171 -0
- package/dist/services/vlm/prompts.d.ts.map +1 -0
- package/dist/services/vlm/prompts.js +229 -0
- package/dist/services/vlm/prompts.js.map +1 -0
- package/dist/services/vlm/service.d.ts +174 -0
- package/dist/services/vlm/service.d.ts.map +1 -0
- package/dist/services/vlm/service.js +256 -0
- package/dist/services/vlm/service.js.map +1 -0
- package/dist/services/webhook-delivery.d.ts +4 -0
- package/dist/services/webhook-delivery.d.ts.map +1 -0
- package/dist/services/webhook-delivery.js +140 -0
- package/dist/services/webhook-delivery.js.map +1 -0
- package/dist/tools/chunks.d.ts +19 -0
- package/dist/tools/chunks.d.ts.map +1 -0
- package/dist/tools/chunks.js +392 -0
- package/dist/tools/chunks.js.map +1 -0
- package/dist/tools/clm.d.ts +16 -0
- package/dist/tools/clm.d.ts.map +1 -0
- package/dist/tools/clm.js +668 -0
- package/dist/tools/clm.js.map +1 -0
- package/dist/tools/clustering.d.ts +13 -0
- package/dist/tools/clustering.d.ts.map +1 -0
- package/dist/tools/clustering.js +498 -0
- package/dist/tools/clustering.js.map +1 -0
- package/dist/tools/collaboration.d.ts +15 -0
- package/dist/tools/collaboration.d.ts.map +1 -0
- package/dist/tools/collaboration.js +516 -0
- package/dist/tools/collaboration.js.map +1 -0
- package/dist/tools/comparison.d.ts +13 -0
- package/dist/tools/comparison.d.ts.map +1 -0
- package/dist/tools/comparison.js +735 -0
- package/dist/tools/comparison.js.map +1 -0
- package/dist/tools/compliance.d.ts +15 -0
- package/dist/tools/compliance.d.ts.map +1 -0
- package/dist/tools/compliance.js +640 -0
- package/dist/tools/compliance.js.map +1 -0
- package/dist/tools/config.d.ts +19 -0
- package/dist/tools/config.d.ts.map +1 -0
- package/dist/tools/config.js +213 -0
- package/dist/tools/config.js.map +1 -0
- package/dist/tools/database.d.ts +62 -0
- package/dist/tools/database.d.ts.map +1 -0
- package/dist/tools/database.js +288 -0
- package/dist/tools/database.js.map +1 -0
- package/dist/tools/documents.d.ts +61 -0
- package/dist/tools/documents.d.ts.map +1 -0
- package/dist/tools/documents.js +1624 -0
- package/dist/tools/documents.js.map +1 -0
- package/dist/tools/embeddings.d.ts +14 -0
- package/dist/tools/embeddings.d.ts.map +1 -0
- package/dist/tools/embeddings.js +626 -0
- package/dist/tools/embeddings.js.map +1 -0
- package/dist/tools/evaluation.d.ts +25 -0
- package/dist/tools/evaluation.d.ts.map +1 -0
- package/dist/tools/evaluation.js +523 -0
- package/dist/tools/evaluation.js.map +1 -0
- package/dist/tools/events.d.ts +16 -0
- package/dist/tools/events.d.ts.map +1 -0
- package/dist/tools/events.js +493 -0
- package/dist/tools/events.js.map +1 -0
- package/dist/tools/extraction-structured.d.ts +13 -0
- package/dist/tools/extraction-structured.d.ts.map +1 -0
- package/dist/tools/extraction-structured.js +390 -0
- package/dist/tools/extraction-structured.js.map +1 -0
- package/dist/tools/extraction.d.ts +24 -0
- package/dist/tools/extraction.d.ts.map +1 -0
- package/dist/tools/extraction.js +424 -0
- package/dist/tools/extraction.js.map +1 -0
- package/dist/tools/file-management.d.ts +14 -0
- package/dist/tools/file-management.d.ts.map +1 -0
- package/dist/tools/file-management.js +523 -0
- package/dist/tools/file-management.js.map +1 -0
- package/dist/tools/form-fill.d.ts +13 -0
- package/dist/tools/form-fill.d.ts.map +1 -0
- package/dist/tools/form-fill.js +250 -0
- package/dist/tools/form-fill.js.map +1 -0
- package/dist/tools/health.d.ts +19 -0
- package/dist/tools/health.d.ts.map +1 -0
- package/dist/tools/health.js +229 -0
- package/dist/tools/health.js.map +1 -0
- package/dist/tools/images.d.ts +54 -0
- package/dist/tools/images.d.ts.map +1 -0
- package/dist/tools/images.js +787 -0
- package/dist/tools/images.js.map +1 -0
- package/dist/tools/ingestion.d.ts +94 -0
- package/dist/tools/ingestion.d.ts.map +1 -0
- package/dist/tools/ingestion.js +1659 -0
- package/dist/tools/ingestion.js.map +1 -0
- package/dist/tools/intelligence.d.ts +18 -0
- package/dist/tools/intelligence.d.ts.map +1 -0
- package/dist/tools/intelligence.js +1039 -0
- package/dist/tools/intelligence.js.map +1 -0
- package/dist/tools/provenance.d.ts +51 -0
- package/dist/tools/provenance.d.ts.map +1 -0
- package/dist/tools/provenance.js +691 -0
- package/dist/tools/provenance.js.map +1 -0
- package/dist/tools/reports.d.ts +41 -0
- package/dist/tools/reports.d.ts.map +1 -0
- package/dist/tools/reports.js +1394 -0
- package/dist/tools/reports.js.map +1 -0
- package/dist/tools/search.d.ts +35 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +2528 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/shared.d.ts +52 -0
- package/dist/tools/shared.d.ts.map +1 -0
- package/dist/tools/shared.js +54 -0
- package/dist/tools/shared.js.map +1 -0
- package/dist/tools/tags.d.ts +15 -0
- package/dist/tools/tags.d.ts.map +1 -0
- package/dist/tools/tags.js +287 -0
- package/dist/tools/tags.js.map +1 -0
- package/dist/tools/timeline.d.ts +15 -0
- package/dist/tools/timeline.d.ts.map +1 -0
- package/dist/tools/timeline.js +14 -0
- package/dist/tools/timeline.js.map +1 -0
- package/dist/tools/users.d.ts +14 -0
- package/dist/tools/users.d.ts.map +1 -0
- package/dist/tools/users.js +257 -0
- package/dist/tools/users.js.map +1 -0
- package/dist/tools/vlm.d.ts +40 -0
- package/dist/tools/vlm.d.ts.map +1 -0
- package/dist/tools/vlm.js +475 -0
- package/dist/tools/vlm.js.map +1 -0
- package/dist/tools/workflow.d.ts +16 -0
- package/dist/tools/workflow.d.ts.map +1 -0
- package/dist/tools/workflow.js +495 -0
- package/dist/tools/workflow.js.map +1 -0
- package/dist/utils/backoff.d.ts +53 -0
- package/dist/utils/backoff.d.ts.map +1 -0
- package/dist/utils/backoff.js +78 -0
- package/dist/utils/backoff.js.map +1 -0
- package/dist/utils/config-persistence.d.ts +33 -0
- package/dist/utils/config-persistence.d.ts.map +1 -0
- package/dist/utils/config-persistence.js +61 -0
- package/dist/utils/config-persistence.js.map +1 -0
- package/dist/utils/hash.d.ts +65 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +146 -0
- package/dist/utils/hash.js.map +1 -0
- package/dist/utils/math.d.ts +21 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +39 -0
- package/dist/utils/math.js.map +1 -0
- package/dist/utils/validation.d.ts +697 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +529 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +96 -0
- package/python/.gitkeep +0 -0
- package/python/__init__.py +104 -0
- package/python/clustering_worker.py +440 -0
- package/python/docx_image_extractor.py +524 -0
- package/python/embedding_worker.py +552 -0
- package/python/file_manager_worker.py +564 -0
- package/python/form_fill_worker.py +399 -0
- package/python/gpu_utils.py +582 -0
- package/python/image_extractor.py +317 -0
- package/python/image_optimizer.py +444 -0
- package/python/ocr_worker.py +712 -0
- package/python/pyproject.toml +76 -0
- package/python/requirements.txt +51 -0
- package/python/reranker_worker.py +87 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chain-hash.js","sourceRoot":"","sources":["../../../src/services/provenance/chain-hash.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAoBpC,kFAAkF;AAClF,mBAAmB;AACnB,kFAAkF;AAElF;;;;;;;;;GASG;AACH,MAAM,UAAU,gBAAgB,CAAC,WAAmB,EAAE,eAA8B;IAClF,MAAM,KAAK,GAAG,eAAe;QAC3B,CAAC,CAAC,GAAG,WAAW,IAAI,eAAe,EAAE;QACrC,CAAC,CAAC,WAAW,CAAC;IAChB,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC1D,CAAC;AAED,kFAAkF;AAClF,qBAAqB;AACrB,kFAAkF;AAElF;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,iBAAiB,CAC/B,IAAuB,EACvB,cAAsB;IAEtB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;;;;;GAK5B,CAAC,CAAC,GAAG,CAAC,cAAc,CAKnB,CAAC;IAEH,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,aAAa,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IACzE,CAAC;IAED,sCAAsC;IACtC,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IACvD,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;YACvB,oEAAoE;YACpE,QAAQ,EAAE,CAAC;YACX,SAAS;QACX,CAAC;QAED,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAC/E,MAAM,eAAe,GAAG,YAAY,EAAE,UAAU,IAAI,IAAI,CAAC;QACzD,MAAM,YAAY,GAAG,gBAAgB,CAAC,MAAM,CAAC,YAAY,EAAE,eAAe,CAAC,CAAC;QAE5E,IAAI,MAAM,CAAC,UAAU,KAAK,YAAY,EAAE,CAAC;YACvC,OAAO;gBACL,KAAK,EAAE,KAAK;gBACZ,aAAa,EAAE,OAAO,CAAC,MAAM;gBAC7B,QAAQ;gBACR,SAAS,EAAE,MAAM,CAAC,EAAE;gBACpB,KAAK,EAAE,0BAA0B,MAAM,CAAC,EAAE,cAAc,YAAY,SAAS,MAAM,CAAC,UAAU,EAAE;aACjG,CAAC;QACJ,CAAC;QACD,QAAQ,EAAE,CAAC;IACb,CAAC;IAED,OAAO;QACL,KAAK,EAAE,IAAI;QACX,aAAa,EAAE,OAAO,CAAC,MAAM;QAC7B,QAAQ;QACR,SAAS,EAAE,IAAI;KAChB,CAAC;AACJ,CAAC;AAED,kFAAkF;AAClF,WAAW;AACX,kFAAkF;AAElF;;;;;;;;GAQG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAAuB;IACzD,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;;;;;GAK5B,CAAC,CAAC,GAAG,EAKJ,CAAC;IAEH,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE1C,qDAAqD;IACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAC3B,oEAAoE,CACrE,CAAC,GAAG,EAA+C,CAAC;IACrD,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,mDAAmD,CAAC,CAAC;IAErF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,CAAC;YACH,MAAM,eAAe,GAAG,MAAM,CAAC,SAAS;gBACtC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC;gBACzC,CAAC,CAAC,IAAI,CAAC;YACT,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,YAAY,EAAE,eAAe,CAAC,CAAC;YACzE,UAAU,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,EAAE,SAAS,CAAC,CAAC;YAClC,OAAO,EAAE,CAAC;QACZ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CACX,kCAAkC,MAAM,CAAC,EAAE,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CACzG,CAAC;YACF,MAAM,EAAE,CAAC;QACX,CAAC;IACH,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;AAC7B,CAAC"}
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ProvenanceExporter - Export provenance records in JSON, W3C PROV-JSON, and CSV
|
|
3
|
+
*
|
|
4
|
+
* Constitution Compliance:
|
|
5
|
+
* - CP-001: Complete provenance chain for every data item
|
|
6
|
+
* - CP-003: SHA-256 content hashing
|
|
7
|
+
* - CP-005: Full reproducibility via processing params
|
|
8
|
+
*
|
|
9
|
+
* FAIL FAST: All errors throw immediately with detailed error info
|
|
10
|
+
* NO MOCKS: Tests use real DatabaseService
|
|
11
|
+
*/
|
|
12
|
+
import { DatabaseService } from '../storage/database/index.js';
|
|
13
|
+
import { ProvenanceTracker } from './tracker.js';
|
|
14
|
+
import { ProvenanceRecord } from '../../models/provenance.js';
|
|
15
|
+
export type ExportScope = 'document' | 'database' | 'all';
|
|
16
|
+
export type ExportFormat = 'json' | 'w3c-prov' | 'csv';
|
|
17
|
+
/**
|
|
18
|
+
* W3C PROV-JSON Document structure
|
|
19
|
+
* Reference: https://www.w3.org/submissions/prov-json/
|
|
20
|
+
*/
|
|
21
|
+
interface PROVDocument {
|
|
22
|
+
prefix: Record<string, string>;
|
|
23
|
+
entity: Record<string, Record<string, unknown>>;
|
|
24
|
+
activity: Record<string, Record<string, unknown>>;
|
|
25
|
+
agent: Record<string, Record<string, unknown>>;
|
|
26
|
+
wasDerivedFrom: Record<string, Record<string, unknown>>;
|
|
27
|
+
wasGeneratedBy: Record<string, Record<string, unknown>>;
|
|
28
|
+
wasAttributedTo: Record<string, Record<string, unknown>>;
|
|
29
|
+
}
|
|
30
|
+
/** Result of JSON export */
|
|
31
|
+
interface JSONExportResult {
|
|
32
|
+
format: 'json';
|
|
33
|
+
scope: ExportScope;
|
|
34
|
+
document_id?: string;
|
|
35
|
+
exported_at: string;
|
|
36
|
+
record_count: number;
|
|
37
|
+
records: ProvenanceRecord[];
|
|
38
|
+
}
|
|
39
|
+
/** Result of W3C PROV-JSON export */
|
|
40
|
+
interface W3CPROVExportResult {
|
|
41
|
+
format: 'w3c-prov';
|
|
42
|
+
scope: ExportScope;
|
|
43
|
+
document_id?: string;
|
|
44
|
+
exported_at: string;
|
|
45
|
+
entity_count: number;
|
|
46
|
+
activity_count: number;
|
|
47
|
+
agent_count: number;
|
|
48
|
+
prov_document: PROVDocument;
|
|
49
|
+
}
|
|
50
|
+
/** Result of CSV export */
|
|
51
|
+
interface CSVExportResult {
|
|
52
|
+
format: 'csv';
|
|
53
|
+
scope: ExportScope;
|
|
54
|
+
document_id?: string;
|
|
55
|
+
exported_at: string;
|
|
56
|
+
record_count: number;
|
|
57
|
+
csv_content: string;
|
|
58
|
+
}
|
|
59
|
+
/** Result of file export */
|
|
60
|
+
interface FileExportResult {
|
|
61
|
+
success: boolean;
|
|
62
|
+
format: ExportFormat;
|
|
63
|
+
output_path: string;
|
|
64
|
+
bytes_written: number;
|
|
65
|
+
record_count: number;
|
|
66
|
+
}
|
|
67
|
+
/** Error codes for exporter operations - extends ProvenanceErrorCode */
|
|
68
|
+
export declare const ExporterErrorCode: {
|
|
69
|
+
readonly INVALID_SCOPE: "INVALID_EXPORT_SCOPE";
|
|
70
|
+
readonly INVALID_FORMAT: "INVALID_EXPORT_FORMAT";
|
|
71
|
+
readonly FILE_WRITE_ERROR: "FILE_WRITE_ERROR";
|
|
72
|
+
readonly DOCUMENT_REQUIRED: "DOCUMENT_ID_REQUIRED_FOR_SCOPE";
|
|
73
|
+
readonly NOT_FOUND: "PROVENANCE_NOT_FOUND";
|
|
74
|
+
readonly CHAIN_BROKEN: "PROVENANCE_CHAIN_BROKEN";
|
|
75
|
+
readonly ROOT_NOT_FOUND: "ROOT_DOCUMENT_NOT_FOUND";
|
|
76
|
+
readonly INVALID_TYPE: "INVALID_PROVENANCE_TYPE";
|
|
77
|
+
readonly INVALID_PARAMS: "INVALID_PROVENANCE_PARAMS";
|
|
78
|
+
};
|
|
79
|
+
type ExporterErrorCodeType = (typeof ExporterErrorCode)[keyof typeof ExporterErrorCode];
|
|
80
|
+
/**
|
|
81
|
+
* ExporterError - Typed error for export operations
|
|
82
|
+
* FAIL FAST: Always throw with detailed error information
|
|
83
|
+
*/
|
|
84
|
+
export declare class ExporterError extends Error {
|
|
85
|
+
readonly code: ExporterErrorCodeType;
|
|
86
|
+
readonly details?: Record<string, unknown> | undefined;
|
|
87
|
+
constructor(message: string, code: ExporterErrorCodeType, details?: Record<string, unknown> | undefined);
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* ProvenanceExporter - Export provenance records in multiple formats
|
|
91
|
+
*
|
|
92
|
+
* Supports:
|
|
93
|
+
* - JSON: Internal JSON format with full ProvenanceRecord data
|
|
94
|
+
* - W3C PROV-JSON: Standard W3C PROV-JSON format for interoperability
|
|
95
|
+
* - CSV: Tabular format for spreadsheet/analysis tools
|
|
96
|
+
*
|
|
97
|
+
* Scopes:
|
|
98
|
+
* - document: Export all records for a specific root document
|
|
99
|
+
* - database/all: Export all records in the database
|
|
100
|
+
*/
|
|
101
|
+
export declare class ProvenanceExporter {
|
|
102
|
+
private readonly tracker;
|
|
103
|
+
private readonly rawDb;
|
|
104
|
+
private static readonly PROV_PREFIX;
|
|
105
|
+
private static readonly OCR_PREFIX;
|
|
106
|
+
private static readonly OCRP_PREFIX;
|
|
107
|
+
constructor(db: DatabaseService, tracker: ProvenanceTracker);
|
|
108
|
+
/**
|
|
109
|
+
* Export provenance records as internal JSON format
|
|
110
|
+
*
|
|
111
|
+
* @param scope - 'document' (requires documentId), 'database', or 'all'
|
|
112
|
+
* @param documentId - Required when scope='document'
|
|
113
|
+
* @returns JSONExportResult with full ProvenanceRecord array
|
|
114
|
+
* @throws ExporterError if scope invalid or documentId missing when required
|
|
115
|
+
*/
|
|
116
|
+
exportJSON(scope: ExportScope, documentId?: string): Promise<JSONExportResult>;
|
|
117
|
+
/**
|
|
118
|
+
* Export provenance records as W3C PROV-JSON format
|
|
119
|
+
*
|
|
120
|
+
* Reference: https://www.w3.org/submissions/prov-json/
|
|
121
|
+
*
|
|
122
|
+
* Mapping:
|
|
123
|
+
* - ProvenanceRecord -> prov:Entity
|
|
124
|
+
* - Processing operation -> prov:Activity (non-DOCUMENT types)
|
|
125
|
+
* - Processor -> prov:Agent
|
|
126
|
+
* - source_id relationship -> wasDerivedFrom
|
|
127
|
+
* - Processing creates record -> wasGeneratedBy
|
|
128
|
+
* - Processor attribution -> wasAttributedTo
|
|
129
|
+
*
|
|
130
|
+
* @param scope - 'document' (requires documentId), 'database', or 'all'
|
|
131
|
+
* @param documentId - Required when scope='document'
|
|
132
|
+
* @returns W3CPROVExportResult with PROV-JSON document
|
|
133
|
+
* @throws ExporterError if scope invalid or documentId missing when required
|
|
134
|
+
*/
|
|
135
|
+
exportW3CPROV(scope: ExportScope, documentId?: string): Promise<W3CPROVExportResult>;
|
|
136
|
+
/**
|
|
137
|
+
* Export provenance records as CSV format
|
|
138
|
+
*
|
|
139
|
+
* @param scope - 'document' (requires documentId), 'database', or 'all'
|
|
140
|
+
* @param documentId - Required when scope='document'
|
|
141
|
+
* @returns CSVExportResult with CSV content string
|
|
142
|
+
* @throws ExporterError if scope invalid or documentId missing when required
|
|
143
|
+
*/
|
|
144
|
+
exportCSV(scope: ExportScope, documentId?: string): Promise<CSVExportResult>;
|
|
145
|
+
/**
|
|
146
|
+
* Export provenance records to a file
|
|
147
|
+
*
|
|
148
|
+
* Creates parent directories if they don't exist.
|
|
149
|
+
*
|
|
150
|
+
* @param outputPath - Absolute path to output file
|
|
151
|
+
* @param format - 'json', 'w3c-prov', or 'csv'
|
|
152
|
+
* @param scope - 'document' (requires documentId), 'database', or 'all'
|
|
153
|
+
* @param documentId - Required when scope='document'
|
|
154
|
+
* @returns FileExportResult with bytes written and path
|
|
155
|
+
* @throws ExporterError if format invalid, scope invalid, or write fails
|
|
156
|
+
*/
|
|
157
|
+
exportToFile(outputPath: string, format: ExportFormat, scope: ExportScope, documentId?: string): Promise<FileExportResult>;
|
|
158
|
+
/**
|
|
159
|
+
* Validate scope and documentId
|
|
160
|
+
* @throws ExporterError if invalid
|
|
161
|
+
*/
|
|
162
|
+
private validateScope;
|
|
163
|
+
/**
|
|
164
|
+
* Get provenance records for the given scope
|
|
165
|
+
* Note: validateScope() must be called before this to ensure documentId is present when needed
|
|
166
|
+
*/
|
|
167
|
+
private getRecordsForScope;
|
|
168
|
+
/**
|
|
169
|
+
* Get all provenance records from database
|
|
170
|
+
* Ordered by chain_depth ASC, created_at ASC
|
|
171
|
+
*/
|
|
172
|
+
private getAllProvenance;
|
|
173
|
+
/**
|
|
174
|
+
* Transform provenance records to W3C PROV-JSON format
|
|
175
|
+
*
|
|
176
|
+
* Mapping rules:
|
|
177
|
+
* - Each ProvenanceRecord becomes an entity: ocr:<type>-<id>
|
|
178
|
+
* - Non-DOCUMENT types create an activity: ocr:activity-<id>
|
|
179
|
+
* - Unique agents per processor name
|
|
180
|
+
* - wasDerivedFrom links entities with source_id
|
|
181
|
+
* - wasGeneratedBy links entities to their activities
|
|
182
|
+
* - wasAttributedTo links all entities to their processor agents
|
|
183
|
+
*/
|
|
184
|
+
private transformToPROVJSON;
|
|
185
|
+
/**
|
|
186
|
+
* Sanitize processor name for use as agent ID
|
|
187
|
+
* Only allows alphanumeric and hyphens
|
|
188
|
+
*/
|
|
189
|
+
private sanitizeAgentId;
|
|
190
|
+
/**
|
|
191
|
+
* Transform provenance records to CSV format
|
|
192
|
+
*/
|
|
193
|
+
private transformToCSV;
|
|
194
|
+
/**
|
|
195
|
+
* Escape a value for CSV
|
|
196
|
+
* - Wrap in quotes if contains comma, quote, or newline
|
|
197
|
+
* - Escape quotes by doubling them
|
|
198
|
+
*/
|
|
199
|
+
private escapeCSV;
|
|
200
|
+
}
|
|
201
|
+
export {};
|
|
202
|
+
//# sourceMappingURL=exporter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"exporter.d.ts","sourceRoot":"","sources":["../../../src/services/provenance/exporter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAMH,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAuB,MAAM,cAAc,CAAC;AACtE,OAAO,EAAE,gBAAgB,EAAkB,MAAM,4BAA4B,CAAC;AAQ9E,MAAM,MAAM,WAAW,GAAG,UAAU,GAAG,UAAU,GAAG,KAAK,CAAC;AAC1D,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,UAAU,GAAG,KAAK,CAAC;AAEvD;;;GAGG;AACH,UAAU,YAAY;IACpB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC/B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAChD,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAClD,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAC/C,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IACxD,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IACxD,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;CAC1D;AAED,4BAA4B;AAC5B,UAAU,gBAAgB;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,WAAW,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,gBAAgB,EAAE,CAAC;CAC7B;AAED,qCAAqC;AACrC,UAAU,mBAAmB;IAC3B,MAAM,EAAE,UAAU,CAAC;IACnB,KAAK,EAAE,WAAW,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,YAAY,CAAC;CAC7B;AAED,2BAA2B;AAC3B,UAAU,eAAe;IACvB,MAAM,EAAE,KAAK,CAAC;IACd,KAAK,EAAE,WAAW,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,4BAA4B;AAC5B,UAAU,gBAAgB;IACxB,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,YAAY,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;CACtB;AAMD,wEAAwE;AACxE,eAAO,MAAM,iBAAiB;;;;;;;;;;CAMpB,CAAC;AAEX,KAAK,qBAAqB,GAAG,CAAC,OAAO,iBAAiB,CAAC,CAAC,MAAM,OAAO,iBAAiB,CAAC,CAAC;AAExF;;;GAGG;AACH,qBAAa,aAAc,SAAQ,KAAK;aAGpB,IAAI,EAAE,qBAAqB;aAC3B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;gBAFjD,OAAO,EAAE,MAAM,EACC,IAAI,EAAE,qBAAqB,EAC3B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,YAAA;CAMpD;AAMD;;;;;;;;;;;GAWG;AACH,qBAAa,kBAAkB;IAS3B,OAAO,CAAC,QAAQ,CAAC,OAAO;IAR1B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAoB;IAE1C,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAgC;IACnE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAqC;IACvE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAuC;gBAGxE,EAAE,EAAE,eAAe,EACF,OAAO,EAAE,iBAAiB;IAS7C;;;;;;;OAOG;IACG,UAAU,CAAC,KAAK,EAAE,WAAW,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAepF;;;;;;;;;;;;;;;;;OAiBG;IACG,aAAa,CAAC,KAAK,EAAE,WAAW,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,mBAAmB,CAAC;IAkB1F;;;;;;;OAOG;IACG,SAAS,CAAC,KAAK,EAAE,WAAW,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAgBlF;;;;;;;;;;;OAWG;IACG,YAAY,CAChB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,YAAY,EACpB,KAAK,EAAE,WAAW,EAClB,UAAU,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,gBAAgB,CAAC;IAmE5B;;;OAGG;IACH,OAAO,CAAC,aAAa;IAoBrB;;;OAGG;IACH,OAAO,CAAC,kBAAkB;IAQ1B;;;OAGG;IACH,OAAO,CAAC,gBAAgB;IAYxB;;;;;;;;;;OAUG;IACH,OAAO,CAAC,mBAAmB;IAmI3B;;;OAGG;IACH,OAAO,CAAC,eAAe;IAOvB;;OAEG;IACH,OAAO,CAAC,cAAc;IA6DtB;;;;OAIG;IACH,OAAO,CAAC,SAAS;CAWlB"}
|
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ProvenanceExporter - Export provenance records in JSON, W3C PROV-JSON, and CSV
|
|
3
|
+
*
|
|
4
|
+
* Constitution Compliance:
|
|
5
|
+
* - CP-001: Complete provenance chain for every data item
|
|
6
|
+
* - CP-003: SHA-256 content hashing
|
|
7
|
+
* - CP-005: Full reproducibility via processing params
|
|
8
|
+
*
|
|
9
|
+
* FAIL FAST: All errors throw immediately with detailed error info
|
|
10
|
+
* NO MOCKS: Tests use real DatabaseService
|
|
11
|
+
*/
|
|
12
|
+
import { writeFile } from 'fs/promises';
|
|
13
|
+
import { existsSync, mkdirSync } from 'fs';
|
|
14
|
+
import { dirname } from 'path';
|
|
15
|
+
import { ProvenanceErrorCode } from './tracker.js';
|
|
16
|
+
import { ProvenanceType } from '../../models/provenance.js';
|
|
17
|
+
import { rowToProvenance } from '../storage/database/converters.js';
|
|
18
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
19
|
+
// ERROR HANDLING - FAIL FAST
|
|
20
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
21
|
+
/** Error codes for exporter operations - extends ProvenanceErrorCode */
|
|
22
|
+
export const ExporterErrorCode = {
|
|
23
|
+
...ProvenanceErrorCode,
|
|
24
|
+
INVALID_SCOPE: 'INVALID_EXPORT_SCOPE',
|
|
25
|
+
INVALID_FORMAT: 'INVALID_EXPORT_FORMAT',
|
|
26
|
+
FILE_WRITE_ERROR: 'FILE_WRITE_ERROR',
|
|
27
|
+
DOCUMENT_REQUIRED: 'DOCUMENT_ID_REQUIRED_FOR_SCOPE',
|
|
28
|
+
};
|
|
29
|
+
/**
|
|
30
|
+
* ExporterError - Typed error for export operations
|
|
31
|
+
* FAIL FAST: Always throw with detailed error information
|
|
32
|
+
*/
|
|
33
|
+
export class ExporterError extends Error {
|
|
34
|
+
code;
|
|
35
|
+
details;
|
|
36
|
+
constructor(message, code, details) {
|
|
37
|
+
super(message);
|
|
38
|
+
this.code = code;
|
|
39
|
+
this.details = details;
|
|
40
|
+
this.name = 'ExporterError';
|
|
41
|
+
Object.setPrototypeOf(this, ExporterError.prototype);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
45
|
+
// MAIN CLASS
|
|
46
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
47
|
+
/**
|
|
48
|
+
* ProvenanceExporter - Export provenance records in multiple formats
|
|
49
|
+
*
|
|
50
|
+
* Supports:
|
|
51
|
+
* - JSON: Internal JSON format with full ProvenanceRecord data
|
|
52
|
+
* - W3C PROV-JSON: Standard W3C PROV-JSON format for interoperability
|
|
53
|
+
* - CSV: Tabular format for spreadsheet/analysis tools
|
|
54
|
+
*
|
|
55
|
+
* Scopes:
|
|
56
|
+
* - document: Export all records for a specific root document
|
|
57
|
+
* - database/all: Export all records in the database
|
|
58
|
+
*/
|
|
59
|
+
export class ProvenanceExporter {
|
|
60
|
+
tracker;
|
|
61
|
+
rawDb;
|
|
62
|
+
static PROV_PREFIX = 'http://www.w3.org/ns/prov#';
|
|
63
|
+
static OCR_PREFIX = 'http://ocr-provenance.local/ns#';
|
|
64
|
+
static OCRP_PREFIX = 'http://ocr-provenance.local/prov#';
|
|
65
|
+
constructor(db, tracker) {
|
|
66
|
+
this.tracker = tracker;
|
|
67
|
+
this.rawDb = db.getConnection();
|
|
68
|
+
}
|
|
69
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
70
|
+
// PUBLIC API
|
|
71
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
72
|
+
/**
|
|
73
|
+
* Export provenance records as internal JSON format
|
|
74
|
+
*
|
|
75
|
+
* @param scope - 'document' (requires documentId), 'database', or 'all'
|
|
76
|
+
* @param documentId - Required when scope='document'
|
|
77
|
+
* @returns JSONExportResult with full ProvenanceRecord array
|
|
78
|
+
* @throws ExporterError if scope invalid or documentId missing when required
|
|
79
|
+
*/
|
|
80
|
+
async exportJSON(scope, documentId) {
|
|
81
|
+
this.validateScope(scope, documentId);
|
|
82
|
+
const records = this.getRecordsForScope(scope, documentId);
|
|
83
|
+
return {
|
|
84
|
+
format: 'json',
|
|
85
|
+
scope,
|
|
86
|
+
document_id: documentId,
|
|
87
|
+
exported_at: new Date().toISOString(),
|
|
88
|
+
record_count: records.length,
|
|
89
|
+
records,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Export provenance records as W3C PROV-JSON format
|
|
94
|
+
*
|
|
95
|
+
* Reference: https://www.w3.org/submissions/prov-json/
|
|
96
|
+
*
|
|
97
|
+
* Mapping:
|
|
98
|
+
* - ProvenanceRecord -> prov:Entity
|
|
99
|
+
* - Processing operation -> prov:Activity (non-DOCUMENT types)
|
|
100
|
+
* - Processor -> prov:Agent
|
|
101
|
+
* - source_id relationship -> wasDerivedFrom
|
|
102
|
+
* - Processing creates record -> wasGeneratedBy
|
|
103
|
+
* - Processor attribution -> wasAttributedTo
|
|
104
|
+
*
|
|
105
|
+
* @param scope - 'document' (requires documentId), 'database', or 'all'
|
|
106
|
+
* @param documentId - Required when scope='document'
|
|
107
|
+
* @returns W3CPROVExportResult with PROV-JSON document
|
|
108
|
+
* @throws ExporterError if scope invalid or documentId missing when required
|
|
109
|
+
*/
|
|
110
|
+
async exportW3CPROV(scope, documentId) {
|
|
111
|
+
this.validateScope(scope, documentId);
|
|
112
|
+
const records = this.getRecordsForScope(scope, documentId);
|
|
113
|
+
const provDoc = this.transformToPROVJSON(records);
|
|
114
|
+
return {
|
|
115
|
+
format: 'w3c-prov',
|
|
116
|
+
scope,
|
|
117
|
+
document_id: documentId,
|
|
118
|
+
exported_at: new Date().toISOString(),
|
|
119
|
+
entity_count: Object.keys(provDoc.entity).length,
|
|
120
|
+
activity_count: Object.keys(provDoc.activity).length,
|
|
121
|
+
agent_count: Object.keys(provDoc.agent).length,
|
|
122
|
+
prov_document: provDoc,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Export provenance records as CSV format
|
|
127
|
+
*
|
|
128
|
+
* @param scope - 'document' (requires documentId), 'database', or 'all'
|
|
129
|
+
* @param documentId - Required when scope='document'
|
|
130
|
+
* @returns CSVExportResult with CSV content string
|
|
131
|
+
* @throws ExporterError if scope invalid or documentId missing when required
|
|
132
|
+
*/
|
|
133
|
+
async exportCSV(scope, documentId) {
|
|
134
|
+
this.validateScope(scope, documentId);
|
|
135
|
+
const records = this.getRecordsForScope(scope, documentId);
|
|
136
|
+
const csvContent = this.transformToCSV(records);
|
|
137
|
+
return {
|
|
138
|
+
format: 'csv',
|
|
139
|
+
scope,
|
|
140
|
+
document_id: documentId,
|
|
141
|
+
exported_at: new Date().toISOString(),
|
|
142
|
+
record_count: records.length,
|
|
143
|
+
csv_content: csvContent,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Export provenance records to a file
|
|
148
|
+
*
|
|
149
|
+
* Creates parent directories if they don't exist.
|
|
150
|
+
*
|
|
151
|
+
* @param outputPath - Absolute path to output file
|
|
152
|
+
* @param format - 'json', 'w3c-prov', or 'csv'
|
|
153
|
+
* @param scope - 'document' (requires documentId), 'database', or 'all'
|
|
154
|
+
* @param documentId - Required when scope='document'
|
|
155
|
+
* @returns FileExportResult with bytes written and path
|
|
156
|
+
* @throws ExporterError if format invalid, scope invalid, or write fails
|
|
157
|
+
*/
|
|
158
|
+
async exportToFile(outputPath, format, scope, documentId) {
|
|
159
|
+
// Get content based on format (export methods validate scope internally)
|
|
160
|
+
let content;
|
|
161
|
+
let recordCount;
|
|
162
|
+
switch (format) {
|
|
163
|
+
case 'json': {
|
|
164
|
+
const result = await this.exportJSON(scope, documentId);
|
|
165
|
+
content = JSON.stringify(result, null, 2);
|
|
166
|
+
recordCount = result.record_count;
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
case 'w3c-prov': {
|
|
170
|
+
const result = await this.exportW3CPROV(scope, documentId);
|
|
171
|
+
content = JSON.stringify(result, null, 2);
|
|
172
|
+
recordCount = result.entity_count;
|
|
173
|
+
break;
|
|
174
|
+
}
|
|
175
|
+
case 'csv': {
|
|
176
|
+
const result = await this.exportCSV(scope, documentId);
|
|
177
|
+
content = result.csv_content;
|
|
178
|
+
recordCount = result.record_count;
|
|
179
|
+
break;
|
|
180
|
+
}
|
|
181
|
+
default:
|
|
182
|
+
throw new ExporterError(`Invalid export format: ${format}. Valid formats: json, w3c-prov, csv`, ExporterErrorCode.INVALID_FORMAT, { providedFormat: format, validFormats: ['json', 'w3c-prov', 'csv'] });
|
|
183
|
+
}
|
|
184
|
+
// Create parent directories if needed
|
|
185
|
+
const dir = dirname(outputPath);
|
|
186
|
+
if (!existsSync(dir)) {
|
|
187
|
+
mkdirSync(dir, { recursive: true });
|
|
188
|
+
}
|
|
189
|
+
// Write file
|
|
190
|
+
try {
|
|
191
|
+
await writeFile(outputPath, content, 'utf-8');
|
|
192
|
+
}
|
|
193
|
+
catch (error) {
|
|
194
|
+
throw new ExporterError(`Failed to write export file: ${outputPath}`, ExporterErrorCode.FILE_WRITE_ERROR, {
|
|
195
|
+
outputPath,
|
|
196
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
const bytesWritten = Buffer.byteLength(content, 'utf-8');
|
|
200
|
+
return {
|
|
201
|
+
success: true,
|
|
202
|
+
format,
|
|
203
|
+
output_path: outputPath,
|
|
204
|
+
bytes_written: bytesWritten,
|
|
205
|
+
record_count: recordCount,
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
209
|
+
// PRIVATE HELPERS
|
|
210
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
211
|
+
/**
|
|
212
|
+
* Validate scope and documentId
|
|
213
|
+
* @throws ExporterError if invalid
|
|
214
|
+
*/
|
|
215
|
+
validateScope(scope, documentId) {
|
|
216
|
+
const validScopes = ['document', 'database', 'all'];
|
|
217
|
+
if (!validScopes.includes(scope)) {
|
|
218
|
+
throw new ExporterError(`Invalid export scope: ${scope}. Valid scopes: ${validScopes.join(', ')}`, ExporterErrorCode.INVALID_SCOPE, { providedScope: scope, validScopes });
|
|
219
|
+
}
|
|
220
|
+
if (scope === 'document' && !documentId) {
|
|
221
|
+
throw new ExporterError('documentId is required when scope is "document"', ExporterErrorCode.DOCUMENT_REQUIRED, { scope });
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
/**
|
|
225
|
+
* Get provenance records for the given scope
|
|
226
|
+
* Note: validateScope() must be called before this to ensure documentId is present when needed
|
|
227
|
+
*/
|
|
228
|
+
getRecordsForScope(scope, documentId) {
|
|
229
|
+
if (scope === 'document') {
|
|
230
|
+
return this.tracker.getProvenanceByRootDocument(documentId);
|
|
231
|
+
}
|
|
232
|
+
// 'database' and 'all' are equivalent - return all records
|
|
233
|
+
return this.getAllProvenance();
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Get all provenance records from database
|
|
237
|
+
* Ordered by chain_depth ASC, created_at ASC
|
|
238
|
+
*/
|
|
239
|
+
getAllProvenance() {
|
|
240
|
+
// H-5: Use iterate() to avoid double-allocation (.all() + .map())
|
|
241
|
+
const records = [];
|
|
242
|
+
const stmt = this.rawDb.prepare('SELECT * FROM provenance ORDER BY chain_depth ASC, created_at ASC');
|
|
243
|
+
for (const row of stmt.iterate()) {
|
|
244
|
+
records.push(rowToProvenance(row));
|
|
245
|
+
}
|
|
246
|
+
return records;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Transform provenance records to W3C PROV-JSON format
|
|
250
|
+
*
|
|
251
|
+
* Mapping rules:
|
|
252
|
+
* - Each ProvenanceRecord becomes an entity: ocr:<type>-<id>
|
|
253
|
+
* - Non-DOCUMENT types create an activity: ocr:activity-<id>
|
|
254
|
+
* - Unique agents per processor name
|
|
255
|
+
* - wasDerivedFrom links entities with source_id
|
|
256
|
+
* - wasGeneratedBy links entities to their activities
|
|
257
|
+
* - wasAttributedTo links all entities to their processor agents
|
|
258
|
+
*/
|
|
259
|
+
transformToPROVJSON(records) {
|
|
260
|
+
const doc = {
|
|
261
|
+
prefix: {
|
|
262
|
+
prov: ProvenanceExporter.PROV_PREFIX,
|
|
263
|
+
ocr: ProvenanceExporter.OCR_PREFIX,
|
|
264
|
+
ocrp: ProvenanceExporter.OCRP_PREFIX,
|
|
265
|
+
},
|
|
266
|
+
entity: {},
|
|
267
|
+
activity: {},
|
|
268
|
+
agent: {},
|
|
269
|
+
wasDerivedFrom: {},
|
|
270
|
+
wasGeneratedBy: {},
|
|
271
|
+
wasAttributedTo: {},
|
|
272
|
+
};
|
|
273
|
+
// M-13: Map-based lookup to avoid O(N^2) find() in wasDerivedFrom
|
|
274
|
+
const recordById = new Map();
|
|
275
|
+
for (const record of records) {
|
|
276
|
+
recordById.set(record.id, record);
|
|
277
|
+
}
|
|
278
|
+
// Track unique agents
|
|
279
|
+
const agents = new Map(); // processor name -> agent ID
|
|
280
|
+
for (const record of records) {
|
|
281
|
+
// 1. Create entity
|
|
282
|
+
const entityId = `ocr:${record.type.toLowerCase()}-${record.id}`;
|
|
283
|
+
doc.entity[entityId] = {
|
|
284
|
+
'prov:type': `ocr:${record.type}`,
|
|
285
|
+
'prov:generatedAtTime': record.created_at,
|
|
286
|
+
'ocr:content_hash': record.content_hash,
|
|
287
|
+
'ocr:chain_depth': record.chain_depth,
|
|
288
|
+
'ocr:root_document_id': record.root_document_id,
|
|
289
|
+
'ocr:processor': record.processor,
|
|
290
|
+
'ocr:processor_version': record.processor_version,
|
|
291
|
+
};
|
|
292
|
+
// Add location if present
|
|
293
|
+
if (record.location) {
|
|
294
|
+
doc.entity[entityId]['ocr:location'] = record.location;
|
|
295
|
+
}
|
|
296
|
+
// Add source_path for DOCUMENT type
|
|
297
|
+
if (record.type === ProvenanceType.DOCUMENT && record.source_path) {
|
|
298
|
+
doc.entity[entityId]['ocr:source_path'] = record.source_path;
|
|
299
|
+
}
|
|
300
|
+
// Add specific attributes for IMAGE type
|
|
301
|
+
if (record.type === ProvenanceType.IMAGE) {
|
|
302
|
+
doc.entity[entityId]['prov:type'] = 'ocrp:Image';
|
|
303
|
+
if (record.location?.bounding_box) {
|
|
304
|
+
doc.entity[entityId]['ocrp:bounding_box'] = record.location.bounding_box;
|
|
305
|
+
}
|
|
306
|
+
if (record.location?.page_number) {
|
|
307
|
+
doc.entity[entityId]['ocrp:page_number'] = record.location.page_number;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
// Add specific attributes for VLM_DESCRIPTION type
|
|
311
|
+
if (record.type === ProvenanceType.VLM_DESCRIPTION) {
|
|
312
|
+
doc.entity[entityId]['prov:type'] = 'ocrp:VLMDescription';
|
|
313
|
+
}
|
|
314
|
+
// Add file_hash if present
|
|
315
|
+
if (record.file_hash) {
|
|
316
|
+
doc.entity[entityId]['ocr:file_hash'] = record.file_hash;
|
|
317
|
+
}
|
|
318
|
+
// 2. Create activity for non-DOCUMENT types
|
|
319
|
+
if (record.type !== ProvenanceType.DOCUMENT) {
|
|
320
|
+
const activityId = `ocr:activity-${record.id}`;
|
|
321
|
+
doc.activity[activityId] = {
|
|
322
|
+
'prov:type': `ocr:${record.source_type}Activity`,
|
|
323
|
+
'prov:startTime': record.created_at,
|
|
324
|
+
'prov:endTime': record.processed_at,
|
|
325
|
+
'ocr:processor': record.processor,
|
|
326
|
+
'ocr:processor_version': record.processor_version,
|
|
327
|
+
'ocr:processing_params': record.processing_params,
|
|
328
|
+
};
|
|
329
|
+
if (record.processing_duration_ms !== null) {
|
|
330
|
+
doc.activity[activityId]['ocr:processing_duration_ms'] = record.processing_duration_ms;
|
|
331
|
+
}
|
|
332
|
+
// 3. wasGeneratedBy - link entity to activity
|
|
333
|
+
const wgbId = `ocr:wgb-${record.id}`;
|
|
334
|
+
doc.wasGeneratedBy[wgbId] = {
|
|
335
|
+
'prov:entity': entityId,
|
|
336
|
+
'prov:activity': activityId,
|
|
337
|
+
'prov:time': record.processed_at,
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
// 4. Create/link agent
|
|
341
|
+
const sanitizedProcessor = this.sanitizeAgentId(record.processor);
|
|
342
|
+
const agentId = `ocr:agent-${sanitizedProcessor}`;
|
|
343
|
+
if (!agents.has(record.processor)) {
|
|
344
|
+
agents.set(record.processor, agentId);
|
|
345
|
+
doc.agent[agentId] = {
|
|
346
|
+
'prov:type': 'prov:SoftwareAgent',
|
|
347
|
+
'ocr:name': record.processor,
|
|
348
|
+
'ocr:version': record.processor_version,
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
// 5. wasAttributedTo - link entity to agent
|
|
352
|
+
const watId = `ocr:wat-${record.id}`;
|
|
353
|
+
doc.wasAttributedTo[watId] = {
|
|
354
|
+
'prov:entity': entityId,
|
|
355
|
+
'prov:agent': agents.get(record.processor),
|
|
356
|
+
};
|
|
357
|
+
// 6. wasDerivedFrom - link to source entity if present
|
|
358
|
+
if (record.source_id) {
|
|
359
|
+
// M-13: Use Map lookup instead of O(N) find()
|
|
360
|
+
const sourceRecord = recordById.get(record.source_id);
|
|
361
|
+
if (sourceRecord) {
|
|
362
|
+
const sourceEntityId = `ocr:${sourceRecord.type.toLowerCase()}-${sourceRecord.id}`;
|
|
363
|
+
const wdfId = `ocr:wdf-${record.id}`;
|
|
364
|
+
doc.wasDerivedFrom[wdfId] = {
|
|
365
|
+
'prov:generatedEntity': entityId,
|
|
366
|
+
'prov:usedEntity': sourceEntityId,
|
|
367
|
+
};
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
return doc;
|
|
372
|
+
}
|
|
373
|
+
/**
|
|
374
|
+
* Sanitize processor name for use as agent ID
|
|
375
|
+
* Only allows alphanumeric and hyphens
|
|
376
|
+
*/
|
|
377
|
+
sanitizeAgentId(processor) {
|
|
378
|
+
return processor
|
|
379
|
+
.toLowerCase()
|
|
380
|
+
.replace(/[^a-z0-9-]+/g, '-')
|
|
381
|
+
.replace(/^-|-$/g, '');
|
|
382
|
+
}
|
|
383
|
+
/**
|
|
384
|
+
* Transform provenance records to CSV format
|
|
385
|
+
*/
|
|
386
|
+
transformToCSV(records) {
|
|
387
|
+
const headers = [
|
|
388
|
+
'id',
|
|
389
|
+
'type',
|
|
390
|
+
'created_at',
|
|
391
|
+
'processed_at',
|
|
392
|
+
'source_type',
|
|
393
|
+
'source_path',
|
|
394
|
+
'source_id',
|
|
395
|
+
'root_document_id',
|
|
396
|
+
'content_hash',
|
|
397
|
+
'input_hash',
|
|
398
|
+
'file_hash',
|
|
399
|
+
'processor',
|
|
400
|
+
'processor_version',
|
|
401
|
+
'processing_params',
|
|
402
|
+
'processing_duration_ms',
|
|
403
|
+
'processing_quality_score',
|
|
404
|
+
'parent_id',
|
|
405
|
+
'parent_ids',
|
|
406
|
+
'chain_depth',
|
|
407
|
+
'chain_path',
|
|
408
|
+
'location',
|
|
409
|
+
];
|
|
410
|
+
const headerRow = headers.join(',');
|
|
411
|
+
if (records.length === 0) {
|
|
412
|
+
return headerRow;
|
|
413
|
+
}
|
|
414
|
+
const dataRows = records.map((record) => {
|
|
415
|
+
const values = [
|
|
416
|
+
this.escapeCSV(record.id),
|
|
417
|
+
this.escapeCSV(record.type),
|
|
418
|
+
this.escapeCSV(record.created_at),
|
|
419
|
+
this.escapeCSV(record.processed_at),
|
|
420
|
+
this.escapeCSV(record.source_type),
|
|
421
|
+
this.escapeCSV(record.source_path ?? ''),
|
|
422
|
+
this.escapeCSV(record.source_id ?? ''),
|
|
423
|
+
this.escapeCSV(record.root_document_id),
|
|
424
|
+
this.escapeCSV(record.content_hash),
|
|
425
|
+
this.escapeCSV(record.input_hash ?? ''),
|
|
426
|
+
this.escapeCSV(record.file_hash ?? ''),
|
|
427
|
+
this.escapeCSV(record.processor),
|
|
428
|
+
this.escapeCSV(record.processor_version),
|
|
429
|
+
this.escapeCSV(JSON.stringify(record.processing_params)),
|
|
430
|
+
record.processing_duration_ms?.toString() ?? '',
|
|
431
|
+
record.processing_quality_score?.toString() ?? '',
|
|
432
|
+
this.escapeCSV(record.parent_id ?? ''),
|
|
433
|
+
this.escapeCSV(record.parent_ids),
|
|
434
|
+
record.chain_depth.toString(),
|
|
435
|
+
this.escapeCSV(record.chain_path ?? ''),
|
|
436
|
+
this.escapeCSV(record.location ? JSON.stringify(record.location) : ''),
|
|
437
|
+
];
|
|
438
|
+
return values.join(',');
|
|
439
|
+
});
|
|
440
|
+
return [headerRow, ...dataRows].join('\n');
|
|
441
|
+
}
|
|
442
|
+
/**
|
|
443
|
+
* Escape a value for CSV
|
|
444
|
+
* - Wrap in quotes if contains comma, quote, or newline
|
|
445
|
+
* - Escape quotes by doubling them
|
|
446
|
+
*/
|
|
447
|
+
escapeCSV(value) {
|
|
448
|
+
if (value.includes(',') ||
|
|
449
|
+
value.includes('"') ||
|
|
450
|
+
value.includes('\n') ||
|
|
451
|
+
value.includes('\r')) {
|
|
452
|
+
return '"' + value.replace(/"/g, '""') + '"';
|
|
453
|
+
}
|
|
454
|
+
return value;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
//# sourceMappingURL=exporter.js.map
|