ocr-provenance-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocr-provenance-mcp might be problematic. Click here for more details.
- package/.env.example +55 -0
- package/LICENSE +78 -0
- package/README.md +1154 -0
- package/dist/bin-http.d.ts +24 -0
- package/dist/bin-http.d.ts.map +1 -0
- package/dist/bin-http.js +275 -0
- package/dist/bin-http.js.map +1 -0
- package/dist/bin-setup.d.ts +11 -0
- package/dist/bin-setup.d.ts.map +1 -0
- package/dist/bin-setup.js +610 -0
- package/dist/bin-setup.js.map +1 -0
- package/dist/bin.d.ts +16 -0
- package/dist/bin.d.ts.map +1 -0
- package/dist/bin.js +16 -0
- package/dist/bin.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +90 -0
- package/dist/index.js.map +1 -0
- package/dist/models/chunk.d.ts +136 -0
- package/dist/models/chunk.d.ts.map +1 -0
- package/dist/models/chunk.js +27 -0
- package/dist/models/chunk.js.map +1 -0
- package/dist/models/cluster.d.ts +79 -0
- package/dist/models/cluster.d.ts.map +1 -0
- package/dist/models/cluster.js +10 -0
- package/dist/models/cluster.js.map +1 -0
- package/dist/models/comparison.d.ts +62 -0
- package/dist/models/comparison.d.ts.map +1 -0
- package/dist/models/comparison.js +8 -0
- package/dist/models/comparison.js.map +1 -0
- package/dist/models/document.d.ts +104 -0
- package/dist/models/document.d.ts.map +1 -0
- package/dist/models/document.js +15 -0
- package/dist/models/document.js.map +1 -0
- package/dist/models/embedding.d.ts +87 -0
- package/dist/models/embedding.d.ts.map +1 -0
- package/dist/models/embedding.js +23 -0
- package/dist/models/embedding.js.map +1 -0
- package/dist/models/extraction.d.ts +15 -0
- package/dist/models/extraction.d.ts.map +1 -0
- package/dist/models/extraction.js +2 -0
- package/dist/models/extraction.js.map +1 -0
- package/dist/models/form-fill.d.ts +23 -0
- package/dist/models/form-fill.d.ts.map +1 -0
- package/dist/models/form-fill.js +2 -0
- package/dist/models/form-fill.js.map +1 -0
- package/dist/models/image.d.ts +177 -0
- package/dist/models/image.d.ts.map +1 -0
- package/dist/models/image.js +8 -0
- package/dist/models/image.js.map +1 -0
- package/dist/models/index.d.ts +14 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +22 -0
- package/dist/models/index.js.map +1 -0
- package/dist/models/provenance.d.ts +174 -0
- package/dist/models/provenance.d.ts.map +1 -0
- package/dist/models/provenance.js +53 -0
- package/dist/models/provenance.js.map +1 -0
- package/dist/models/uploaded-file.d.ts +20 -0
- package/dist/models/uploaded-file.d.ts.map +1 -0
- package/dist/models/uploaded-file.js +2 -0
- package/dist/models/uploaded-file.js.map +1 -0
- package/dist/server/errors.d.ts +93 -0
- package/dist/server/errors.d.ts.map +1 -0
- package/dist/server/errors.js +256 -0
- package/dist/server/errors.js.map +1 -0
- package/dist/server/events.d.ts +36 -0
- package/dist/server/events.d.ts.map +1 -0
- package/dist/server/events.js +48 -0
- package/dist/server/events.js.map +1 -0
- package/dist/server/permissions.d.ts +26 -0
- package/dist/server/permissions.d.ts.map +1 -0
- package/dist/server/permissions.js +194 -0
- package/dist/server/permissions.js.map +1 -0
- package/dist/server/register-tools.d.ts +25 -0
- package/dist/server/register-tools.d.ts.map +1 -0
- package/dist/server/register-tools.js +102 -0
- package/dist/server/register-tools.js.map +1 -0
- package/dist/server/startup.d.ts +16 -0
- package/dist/server/startup.d.ts.map +1 -0
- package/dist/server/startup.js +37 -0
- package/dist/server/startup.js.map +1 -0
- package/dist/server/state.d.ts +166 -0
- package/dist/server/state.d.ts.map +1 -0
- package/dist/server/state.js +424 -0
- package/dist/server/state.js.map +1 -0
- package/dist/server/transports/http-transport.d.ts +37 -0
- package/dist/server/transports/http-transport.d.ts.map +1 -0
- package/dist/server/transports/http-transport.js +204 -0
- package/dist/server/transports/http-transport.js.map +1 -0
- package/dist/server/transports/index.d.ts +9 -0
- package/dist/server/transports/index.d.ts.map +1 -0
- package/dist/server/transports/index.js +9 -0
- package/dist/server/transports/index.js.map +1 -0
- package/dist/server/transports/session-manager.d.ts +40 -0
- package/dist/server/transports/session-manager.d.ts.map +1 -0
- package/dist/server/transports/session-manager.js +74 -0
- package/dist/server/transports/session-manager.js.map +1 -0
- package/dist/server/types.d.ts +82 -0
- package/dist/server/types.d.ts.map +1 -0
- package/dist/server/types.js +14 -0
- package/dist/server/types.js.map +1 -0
- package/dist/services/audit.d.ts +26 -0
- package/dist/services/audit.d.ts.map +1 -0
- package/dist/services/audit.js +43 -0
- package/dist/services/audit.js.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
- package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
- package/dist/services/chunking/chunk-deduplicator.js +46 -0
- package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
- package/dist/services/chunking/chunk-merger.d.ts +26 -0
- package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
- package/dist/services/chunking/chunk-merger.js +94 -0
- package/dist/services/chunking/chunk-merger.js.map +1 -0
- package/dist/services/chunking/chunker.d.ts +62 -0
- package/dist/services/chunking/chunker.d.ts.map +1 -0
- package/dist/services/chunking/chunker.js +566 -0
- package/dist/services/chunking/chunker.js.map +1 -0
- package/dist/services/chunking/heading-normalizer.d.ts +33 -0
- package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/heading-normalizer.js +101 -0
- package/dist/services/chunking/heading-normalizer.js.map +1 -0
- package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
- package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
- package/dist/services/chunking/json-block-analyzer.js +1033 -0
- package/dist/services/chunking/json-block-analyzer.js.map +1 -0
- package/dist/services/chunking/markdown-parser.d.ts +75 -0
- package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
- package/dist/services/chunking/markdown-parser.js +428 -0
- package/dist/services/chunking/markdown-parser.js.map +1 -0
- package/dist/services/chunking/text-normalizer.d.ts +20 -0
- package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
- package/dist/services/chunking/text-normalizer.js +36 -0
- package/dist/services/chunking/text-normalizer.js.map +1 -0
- package/dist/services/clm/contract-schemas.d.ts +36 -0
- package/dist/services/clm/contract-schemas.d.ts.map +1 -0
- package/dist/services/clm/contract-schemas.js +92 -0
- package/dist/services/clm/contract-schemas.js.map +1 -0
- package/dist/services/clm/summarization.d.ts +46 -0
- package/dist/services/clm/summarization.d.ts.map +1 -0
- package/dist/services/clm/summarization.js +61 -0
- package/dist/services/clm/summarization.js.map +1 -0
- package/dist/services/clustering/clustering-service.d.ts +58 -0
- package/dist/services/clustering/clustering-service.d.ts.map +1 -0
- package/dist/services/clustering/clustering-service.js +467 -0
- package/dist/services/clustering/clustering-service.js.map +1 -0
- package/dist/services/comparison/diff-service.d.ts +41 -0
- package/dist/services/comparison/diff-service.d.ts.map +1 -0
- package/dist/services/comparison/diff-service.js +120 -0
- package/dist/services/comparison/diff-service.js.map +1 -0
- package/dist/services/embedding/embedder.d.ts +55 -0
- package/dist/services/embedding/embedder.d.ts.map +1 -0
- package/dist/services/embedding/embedder.js +202 -0
- package/dist/services/embedding/embedder.js.map +1 -0
- package/dist/services/embedding/nomic.d.ts +67 -0
- package/dist/services/embedding/nomic.d.ts.map +1 -0
- package/dist/services/embedding/nomic.js +280 -0
- package/dist/services/embedding/nomic.js.map +1 -0
- package/dist/services/gemini/circuit-breaker.d.ts +106 -0
- package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
- package/dist/services/gemini/circuit-breaker.js +237 -0
- package/dist/services/gemini/circuit-breaker.js.map +1 -0
- package/dist/services/gemini/client.d.ts +173 -0
- package/dist/services/gemini/client.d.ts.map +1 -0
- package/dist/services/gemini/client.js +483 -0
- package/dist/services/gemini/client.js.map +1 -0
- package/dist/services/gemini/config.d.ts +116 -0
- package/dist/services/gemini/config.d.ts.map +1 -0
- package/dist/services/gemini/config.js +118 -0
- package/dist/services/gemini/config.js.map +1 -0
- package/dist/services/gemini/index.d.ts +9 -0
- package/dist/services/gemini/index.d.ts.map +1 -0
- package/dist/services/gemini/index.js +13 -0
- package/dist/services/gemini/index.js.map +1 -0
- package/dist/services/gemini/rate-limiter.d.ts +62 -0
- package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
- package/dist/services/gemini/rate-limiter.js +120 -0
- package/dist/services/gemini/rate-limiter.js.map +1 -0
- package/dist/services/images/extractor.d.ts +88 -0
- package/dist/services/images/extractor.d.ts.map +1 -0
- package/dist/services/images/extractor.js +340 -0
- package/dist/services/images/extractor.js.map +1 -0
- package/dist/services/images/optimizer.d.ts +130 -0
- package/dist/services/images/optimizer.d.ts.map +1 -0
- package/dist/services/images/optimizer.js +228 -0
- package/dist/services/images/optimizer.js.map +1 -0
- package/dist/services/ocr/datalab.d.ts +64 -0
- package/dist/services/ocr/datalab.d.ts.map +1 -0
- package/dist/services/ocr/datalab.js +425 -0
- package/dist/services/ocr/datalab.js.map +1 -0
- package/dist/services/ocr/errors.d.ts +38 -0
- package/dist/services/ocr/errors.d.ts.map +1 -0
- package/dist/services/ocr/errors.js +83 -0
- package/dist/services/ocr/errors.js.map +1 -0
- package/dist/services/ocr/file-manager.d.ts +76 -0
- package/dist/services/ocr/file-manager.d.ts.map +1 -0
- package/dist/services/ocr/file-manager.js +238 -0
- package/dist/services/ocr/file-manager.js.map +1 -0
- package/dist/services/ocr/form-fill.d.ts +48 -0
- package/dist/services/ocr/form-fill.d.ts.map +1 -0
- package/dist/services/ocr/form-fill.js +213 -0
- package/dist/services/ocr/form-fill.js.map +1 -0
- package/dist/services/ocr/processor.d.ts +95 -0
- package/dist/services/ocr/processor.d.ts.map +1 -0
- package/dist/services/ocr/processor.js +259 -0
- package/dist/services/ocr/processor.js.map +1 -0
- package/dist/services/provenance/agent-metadata.d.ts +82 -0
- package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
- package/dist/services/provenance/agent-metadata.js +106 -0
- package/dist/services/provenance/agent-metadata.js.map +1 -0
- package/dist/services/provenance/chain-hash.d.ts +57 -0
- package/dist/services/provenance/chain-hash.d.ts.map +1 -0
- package/dist/services/provenance/chain-hash.js +131 -0
- package/dist/services/provenance/chain-hash.js.map +1 -0
- package/dist/services/provenance/exporter.d.ts +202 -0
- package/dist/services/provenance/exporter.d.ts.map +1 -0
- package/dist/services/provenance/exporter.js +457 -0
- package/dist/services/provenance/exporter.js.map +1 -0
- package/dist/services/provenance/index.d.ts +15 -0
- package/dist/services/provenance/index.d.ts.map +1 -0
- package/dist/services/provenance/index.js +17 -0
- package/dist/services/provenance/index.js.map +1 -0
- package/dist/services/provenance/tracker.d.ts +138 -0
- package/dist/services/provenance/tracker.d.ts.map +1 -0
- package/dist/services/provenance/tracker.js +293 -0
- package/dist/services/provenance/tracker.js.map +1 -0
- package/dist/services/provenance/verifier.d.ts +153 -0
- package/dist/services/provenance/verifier.d.ts.map +1 -0
- package/dist/services/provenance/verifier.js +536 -0
- package/dist/services/provenance/verifier.js.map +1 -0
- package/dist/services/python-pool.d.ts +70 -0
- package/dist/services/python-pool.d.ts.map +1 -0
- package/dist/services/python-pool.js +265 -0
- package/dist/services/python-pool.js.map +1 -0
- package/dist/services/search/bm25.d.ts +180 -0
- package/dist/services/search/bm25.d.ts.map +1 -0
- package/dist/services/search/bm25.js +656 -0
- package/dist/services/search/bm25.js.map +1 -0
- package/dist/services/search/fusion.d.ts +103 -0
- package/dist/services/search/fusion.d.ts.map +1 -0
- package/dist/services/search/fusion.js +122 -0
- package/dist/services/search/fusion.js.map +1 -0
- package/dist/services/search/local-reranker.d.ts +30 -0
- package/dist/services/search/local-reranker.d.ts.map +1 -0
- package/dist/services/search/local-reranker.js +123 -0
- package/dist/services/search/local-reranker.js.map +1 -0
- package/dist/services/search/quality.d.ts +11 -0
- package/dist/services/search/quality.d.ts.map +1 -0
- package/dist/services/search/quality.js +17 -0
- package/dist/services/search/quality.js.map +1 -0
- package/dist/services/search/query-classifier.d.ts +34 -0
- package/dist/services/search/query-classifier.d.ts.map +1 -0
- package/dist/services/search/query-classifier.js +114 -0
- package/dist/services/search/query-classifier.js.map +1 -0
- package/dist/services/search/query-expander.d.ts +73 -0
- package/dist/services/search/query-expander.d.ts.map +1 -0
- package/dist/services/search/query-expander.js +281 -0
- package/dist/services/search/query-expander.js.map +1 -0
- package/dist/services/search/reranker.d.ts +44 -0
- package/dist/services/search/reranker.d.ts.map +1 -0
- package/dist/services/search/reranker.js +101 -0
- package/dist/services/search/reranker.js.map +1 -0
- package/dist/services/storage/database/annotation-operations.d.ts +113 -0
- package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/annotation-operations.js +177 -0
- package/dist/services/storage/database/annotation-operations.js.map +1 -0
- package/dist/services/storage/database/approval-operations.d.ts +132 -0
- package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
- package/dist/services/storage/database/approval-operations.js +206 -0
- package/dist/services/storage/database/approval-operations.js.map +1 -0
- package/dist/services/storage/database/chunk-operations.d.ts +132 -0
- package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
- package/dist/services/storage/database/chunk-operations.js +306 -0
- package/dist/services/storage/database/chunk-operations.js.map +1 -0
- package/dist/services/storage/database/cluster-operations.d.ts +97 -0
- package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
- package/dist/services/storage/database/cluster-operations.js +258 -0
- package/dist/services/storage/database/cluster-operations.js.map +1 -0
- package/dist/services/storage/database/comparison-operations.d.ts +41 -0
- package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
- package/dist/services/storage/database/comparison-operations.js +65 -0
- package/dist/services/storage/database/comparison-operations.js.map +1 -0
- package/dist/services/storage/database/converters.d.ts +36 -0
- package/dist/services/storage/database/converters.d.ts.map +1 -0
- package/dist/services/storage/database/converters.js +244 -0
- package/dist/services/storage/database/converters.js.map +1 -0
- package/dist/services/storage/database/document-operations.d.ts +145 -0
- package/dist/services/storage/database/document-operations.d.ts.map +1 -0
- package/dist/services/storage/database/document-operations.js +498 -0
- package/dist/services/storage/database/document-operations.js.map +1 -0
- package/dist/services/storage/database/embedding-operations.d.ts +130 -0
- package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
- package/dist/services/storage/database/embedding-operations.js +315 -0
- package/dist/services/storage/database/embedding-operations.js.map +1 -0
- package/dist/services/storage/database/extraction-operations.d.ts +47 -0
- package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
- package/dist/services/storage/database/extraction-operations.js +85 -0
- package/dist/services/storage/database/extraction-operations.js.map +1 -0
- package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
- package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
- package/dist/services/storage/database/form-fill-operations.js +116 -0
- package/dist/services/storage/database/form-fill-operations.js.map +1 -0
- package/dist/services/storage/database/helpers.d.ts +29 -0
- package/dist/services/storage/database/helpers.d.ts.map +1 -0
- package/dist/services/storage/database/helpers.js +55 -0
- package/dist/services/storage/database/helpers.js.map +1 -0
- package/dist/services/storage/database/image-operations.d.ts +202 -0
- package/dist/services/storage/database/image-operations.d.ts.map +1 -0
- package/dist/services/storage/database/image-operations.js +484 -0
- package/dist/services/storage/database/image-operations.js.map +1 -0
- package/dist/services/storage/database/index.d.ts +13 -0
- package/dist/services/storage/database/index.d.ts.map +1 -0
- package/dist/services/storage/database/index.js +16 -0
- package/dist/services/storage/database/index.js.map +1 -0
- package/dist/services/storage/database/lock-operations.d.ts +59 -0
- package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
- package/dist/services/storage/database/lock-operations.js +89 -0
- package/dist/services/storage/database/lock-operations.js.map +1 -0
- package/dist/services/storage/database/obligation-operations.d.ts +88 -0
- package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
- package/dist/services/storage/database/obligation-operations.js +206 -0
- package/dist/services/storage/database/obligation-operations.js.map +1 -0
- package/dist/services/storage/database/ocr-operations.d.ts +33 -0
- package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
- package/dist/services/storage/database/ocr-operations.js +70 -0
- package/dist/services/storage/database/ocr-operations.js.map +1 -0
- package/dist/services/storage/database/playbook-operations.d.ts +72 -0
- package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
- package/dist/services/storage/database/playbook-operations.js +247 -0
- package/dist/services/storage/database/playbook-operations.js.map +1 -0
- package/dist/services/storage/database/provenance-operations.d.ts +112 -0
- package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
- package/dist/services/storage/database/provenance-operations.js +251 -0
- package/dist/services/storage/database/provenance-operations.js.map +1 -0
- package/dist/services/storage/database/service.d.ts +142 -0
- package/dist/services/storage/database/service.d.ts.map +1 -0
- package/dist/services/storage/database/service.js +310 -0
- package/dist/services/storage/database/service.js.map +1 -0
- package/dist/services/storage/database/static-operations.d.ts +30 -0
- package/dist/services/storage/database/static-operations.d.ts.map +1 -0
- package/dist/services/storage/database/static-operations.js +218 -0
- package/dist/services/storage/database/static-operations.js.map +1 -0
- package/dist/services/storage/database/stats-operations.d.ts +101 -0
- package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
- package/dist/services/storage/database/stats-operations.js +394 -0
- package/dist/services/storage/database/stats-operations.js.map +1 -0
- package/dist/services/storage/database/tag-operations.d.ts +76 -0
- package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
- package/dist/services/storage/database/tag-operations.js +178 -0
- package/dist/services/storage/database/tag-operations.js.map +1 -0
- package/dist/services/storage/database/types.d.ts +286 -0
- package/dist/services/storage/database/types.d.ts.map +1 -0
- package/dist/services/storage/database/types.js +39 -0
- package/dist/services/storage/database/types.js.map +1 -0
- package/dist/services/storage/database/upload-operations.d.ts +71 -0
- package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
- package/dist/services/storage/database/upload-operations.js +124 -0
- package/dist/services/storage/database/upload-operations.js.map +1 -0
- package/dist/services/storage/database/user-operations.d.ts +102 -0
- package/dist/services/storage/database/user-operations.d.ts.map +1 -0
- package/dist/services/storage/database/user-operations.js +151 -0
- package/dist/services/storage/database/user-operations.js.map +1 -0
- package/dist/services/storage/database/workflow-operations.d.ts +98 -0
- package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
- package/dist/services/storage/database/workflow-operations.js +157 -0
- package/dist/services/storage/database/workflow-operations.js.map +1 -0
- package/dist/services/storage/database.d.ts +16 -0
- package/dist/services/storage/database.d.ts.map +1 -0
- package/dist/services/storage/database.js +15 -0
- package/dist/services/storage/database.js.map +1 -0
- package/dist/services/storage/index.d.ts +10 -0
- package/dist/services/storage/index.d.ts.map +1 -0
- package/dist/services/storage/index.js +10 -0
- package/dist/services/storage/index.js.map +1 -0
- package/dist/services/storage/migrations/index.d.ts +16 -0
- package/dist/services/storage/migrations/index.d.ts.map +1 -0
- package/dist/services/storage/migrations/index.js +20 -0
- package/dist/services/storage/migrations/index.js.map +1 -0
- package/dist/services/storage/migrations/operations.d.ts +40 -0
- package/dist/services/storage/migrations/operations.d.ts.map +1 -0
- package/dist/services/storage/migrations/operations.js +2910 -0
- package/dist/services/storage/migrations/operations.js.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
- package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-definitions.js +1006 -0
- package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
- package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
- package/dist/services/storage/migrations/schema-helpers.js +176 -0
- package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
- package/dist/services/storage/migrations/types.d.ts +15 -0
- package/dist/services/storage/migrations/types.d.ts.map +1 -0
- package/dist/services/storage/migrations/types.js +21 -0
- package/dist/services/storage/migrations/types.js.map +1 -0
- package/dist/services/storage/migrations/verification.d.ts +20 -0
- package/dist/services/storage/migrations/verification.d.ts.map +1 -0
- package/dist/services/storage/migrations/verification.js +78 -0
- package/dist/services/storage/migrations/verification.js.map +1 -0
- package/dist/services/storage/migrations.d.ts +16 -0
- package/dist/services/storage/migrations.d.ts.map +1 -0
- package/dist/services/storage/migrations.js +17 -0
- package/dist/services/storage/migrations.js.map +1 -0
- package/dist/services/storage/types.d.ts +12 -0
- package/dist/services/storage/types.d.ts.map +1 -0
- package/dist/services/storage/types.js +5 -0
- package/dist/services/storage/types.js.map +1 -0
- package/dist/services/storage/vector.d.ts +208 -0
- package/dist/services/storage/vector.d.ts.map +1 -0
- package/dist/services/storage/vector.js +526 -0
- package/dist/services/storage/vector.js.map +1 -0
- package/dist/services/vlm/pipeline.d.ts +194 -0
- package/dist/services/vlm/pipeline.d.ts.map +1 -0
- package/dist/services/vlm/pipeline.js +800 -0
- package/dist/services/vlm/pipeline.js.map +1 -0
- package/dist/services/vlm/prompts.d.ts +171 -0
- package/dist/services/vlm/prompts.d.ts.map +1 -0
- package/dist/services/vlm/prompts.js +229 -0
- package/dist/services/vlm/prompts.js.map +1 -0
- package/dist/services/vlm/service.d.ts +174 -0
- package/dist/services/vlm/service.d.ts.map +1 -0
- package/dist/services/vlm/service.js +256 -0
- package/dist/services/vlm/service.js.map +1 -0
- package/dist/services/webhook-delivery.d.ts +4 -0
- package/dist/services/webhook-delivery.d.ts.map +1 -0
- package/dist/services/webhook-delivery.js +140 -0
- package/dist/services/webhook-delivery.js.map +1 -0
- package/dist/tools/chunks.d.ts +19 -0
- package/dist/tools/chunks.d.ts.map +1 -0
- package/dist/tools/chunks.js +392 -0
- package/dist/tools/chunks.js.map +1 -0
- package/dist/tools/clm.d.ts +16 -0
- package/dist/tools/clm.d.ts.map +1 -0
- package/dist/tools/clm.js +668 -0
- package/dist/tools/clm.js.map +1 -0
- package/dist/tools/clustering.d.ts +13 -0
- package/dist/tools/clustering.d.ts.map +1 -0
- package/dist/tools/clustering.js +498 -0
- package/dist/tools/clustering.js.map +1 -0
- package/dist/tools/collaboration.d.ts +15 -0
- package/dist/tools/collaboration.d.ts.map +1 -0
- package/dist/tools/collaboration.js +516 -0
- package/dist/tools/collaboration.js.map +1 -0
- package/dist/tools/comparison.d.ts +13 -0
- package/dist/tools/comparison.d.ts.map +1 -0
- package/dist/tools/comparison.js +735 -0
- package/dist/tools/comparison.js.map +1 -0
- package/dist/tools/compliance.d.ts +15 -0
- package/dist/tools/compliance.d.ts.map +1 -0
- package/dist/tools/compliance.js +640 -0
- package/dist/tools/compliance.js.map +1 -0
- package/dist/tools/config.d.ts +19 -0
- package/dist/tools/config.d.ts.map +1 -0
- package/dist/tools/config.js +213 -0
- package/dist/tools/config.js.map +1 -0
- package/dist/tools/database.d.ts +62 -0
- package/dist/tools/database.d.ts.map +1 -0
- package/dist/tools/database.js +288 -0
- package/dist/tools/database.js.map +1 -0
- package/dist/tools/documents.d.ts +61 -0
- package/dist/tools/documents.d.ts.map +1 -0
- package/dist/tools/documents.js +1624 -0
- package/dist/tools/documents.js.map +1 -0
- package/dist/tools/embeddings.d.ts +14 -0
- package/dist/tools/embeddings.d.ts.map +1 -0
- package/dist/tools/embeddings.js +626 -0
- package/dist/tools/embeddings.js.map +1 -0
- package/dist/tools/evaluation.d.ts +25 -0
- package/dist/tools/evaluation.d.ts.map +1 -0
- package/dist/tools/evaluation.js +523 -0
- package/dist/tools/evaluation.js.map +1 -0
- package/dist/tools/events.d.ts +16 -0
- package/dist/tools/events.d.ts.map +1 -0
- package/dist/tools/events.js +493 -0
- package/dist/tools/events.js.map +1 -0
- package/dist/tools/extraction-structured.d.ts +13 -0
- package/dist/tools/extraction-structured.d.ts.map +1 -0
- package/dist/tools/extraction-structured.js +390 -0
- package/dist/tools/extraction-structured.js.map +1 -0
- package/dist/tools/extraction.d.ts +24 -0
- package/dist/tools/extraction.d.ts.map +1 -0
- package/dist/tools/extraction.js +424 -0
- package/dist/tools/extraction.js.map +1 -0
- package/dist/tools/file-management.d.ts +14 -0
- package/dist/tools/file-management.d.ts.map +1 -0
- package/dist/tools/file-management.js +523 -0
- package/dist/tools/file-management.js.map +1 -0
- package/dist/tools/form-fill.d.ts +13 -0
- package/dist/tools/form-fill.d.ts.map +1 -0
- package/dist/tools/form-fill.js +250 -0
- package/dist/tools/form-fill.js.map +1 -0
- package/dist/tools/health.d.ts +19 -0
- package/dist/tools/health.d.ts.map +1 -0
- package/dist/tools/health.js +229 -0
- package/dist/tools/health.js.map +1 -0
- package/dist/tools/images.d.ts +54 -0
- package/dist/tools/images.d.ts.map +1 -0
- package/dist/tools/images.js +787 -0
- package/dist/tools/images.js.map +1 -0
- package/dist/tools/ingestion.d.ts +94 -0
- package/dist/tools/ingestion.d.ts.map +1 -0
- package/dist/tools/ingestion.js +1659 -0
- package/dist/tools/ingestion.js.map +1 -0
- package/dist/tools/intelligence.d.ts +18 -0
- package/dist/tools/intelligence.d.ts.map +1 -0
- package/dist/tools/intelligence.js +1039 -0
- package/dist/tools/intelligence.js.map +1 -0
- package/dist/tools/provenance.d.ts +51 -0
- package/dist/tools/provenance.d.ts.map +1 -0
- package/dist/tools/provenance.js +691 -0
- package/dist/tools/provenance.js.map +1 -0
- package/dist/tools/reports.d.ts +41 -0
- package/dist/tools/reports.d.ts.map +1 -0
- package/dist/tools/reports.js +1394 -0
- package/dist/tools/reports.js.map +1 -0
- package/dist/tools/search.d.ts +35 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +2528 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/shared.d.ts +52 -0
- package/dist/tools/shared.d.ts.map +1 -0
- package/dist/tools/shared.js +54 -0
- package/dist/tools/shared.js.map +1 -0
- package/dist/tools/tags.d.ts +15 -0
- package/dist/tools/tags.d.ts.map +1 -0
- package/dist/tools/tags.js +287 -0
- package/dist/tools/tags.js.map +1 -0
- package/dist/tools/timeline.d.ts +15 -0
- package/dist/tools/timeline.d.ts.map +1 -0
- package/dist/tools/timeline.js +14 -0
- package/dist/tools/timeline.js.map +1 -0
- package/dist/tools/users.d.ts +14 -0
- package/dist/tools/users.d.ts.map +1 -0
- package/dist/tools/users.js +257 -0
- package/dist/tools/users.js.map +1 -0
- package/dist/tools/vlm.d.ts +40 -0
- package/dist/tools/vlm.d.ts.map +1 -0
- package/dist/tools/vlm.js +475 -0
- package/dist/tools/vlm.js.map +1 -0
- package/dist/tools/workflow.d.ts +16 -0
- package/dist/tools/workflow.d.ts.map +1 -0
- package/dist/tools/workflow.js +495 -0
- package/dist/tools/workflow.js.map +1 -0
- package/dist/utils/backoff.d.ts +53 -0
- package/dist/utils/backoff.d.ts.map +1 -0
- package/dist/utils/backoff.js +78 -0
- package/dist/utils/backoff.js.map +1 -0
- package/dist/utils/config-persistence.d.ts +33 -0
- package/dist/utils/config-persistence.d.ts.map +1 -0
- package/dist/utils/config-persistence.js +61 -0
- package/dist/utils/config-persistence.js.map +1 -0
- package/dist/utils/hash.d.ts +65 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +146 -0
- package/dist/utils/hash.js.map +1 -0
- package/dist/utils/math.d.ts +21 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +39 -0
- package/dist/utils/math.js.map +1 -0
- package/dist/utils/validation.d.ts +697 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +529 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +96 -0
- package/python/.gitkeep +0 -0
- package/python/__init__.py +104 -0
- package/python/clustering_worker.py +440 -0
- package/python/docx_image_extractor.py +524 -0
- package/python/embedding_worker.py +552 -0
- package/python/file_manager_worker.py +564 -0
- package/python/form_fill_worker.py +399 -0
- package/python/gpu_utils.py +582 -0
- package/python/image_extractor.py +317 -0
- package/python/image_optimizer.py +444 -0
- package/python/ocr_worker.py +712 -0
- package/python/pyproject.toml +76 -0
- package/python/requirements.txt +51 -0
- package/python/reranker_worker.py +87 -0
|
@@ -0,0 +1,566 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid Section-Aware Chunking Service for OCR Provenance MCP System
|
|
3
|
+
*
|
|
4
|
+
* Uses markdown structure (headings, paragraphs, tables), JSON block data
|
|
5
|
+
* (for atomic region detection), and page offsets (for page tracking) to
|
|
6
|
+
* produce semantically coherent chunks with provenance records (chain_depth=2).
|
|
7
|
+
*
|
|
8
|
+
* @module services/chunking/chunker
|
|
9
|
+
*/
|
|
10
|
+
import { DEFAULT_CHUNKING_CONFIG, getOverlapCharacters, } from '../../models/chunk.js';
|
|
11
|
+
import { ProvenanceType, } from '../../models/provenance.js';
|
|
12
|
+
import { parseMarkdownBlocks, buildSectionHierarchy, getPageNumberForOffset, } from './markdown-parser.js';
|
|
13
|
+
import { findAtomicRegions, isOffsetInAtomicRegion, extractTableStructures, extractHeadersFromMarkdown, countTableDimensionsFromMarkdown, extractFirstDataRow, generateTableSummary, } from './json-block-analyzer.js';
|
|
14
|
+
import { normalizeHeadingLevels } from './heading-normalizer.js';
|
|
15
|
+
import { mergeHeadingOnlyChunks } from './chunk-merger.js';
|
|
16
|
+
/**
|
|
17
|
+
* Strip HTML tags from text and collapse whitespace.
|
|
18
|
+
* Used to clean table chunk text before FTS5 indexing.
|
|
19
|
+
*/
|
|
20
|
+
function stripHtmlForFTS(text) {
|
|
21
|
+
return text.replace(/<[^>]+>/g, '').replace(/\s{2,}/g, ' ');
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Create table metadata directly from chunk text by parsing pipe-delimited markdown.
|
|
25
|
+
* Used as a fallback when offset-based table structure matching fails
|
|
26
|
+
* (common with DOCX tables where locateBlockInMarkdown cannot find the table).
|
|
27
|
+
*/
|
|
28
|
+
function createTableMetadataFromText(chunkText) {
|
|
29
|
+
// Only attempt if text contains pipe-delimited table patterns
|
|
30
|
+
const pipeLines = chunkText.split('\n').filter(l => l.includes('|'));
|
|
31
|
+
if (pipeLines.length < 2) {
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
const columnHeaders = extractHeadersFromMarkdown(chunkText);
|
|
35
|
+
const { rowCount, columnCount } = countTableDimensionsFromMarkdown(chunkText);
|
|
36
|
+
const firstRowValues = extractFirstDataRow(chunkText);
|
|
37
|
+
const summary = generateTableSummary(columnHeaders, rowCount, firstRowValues);
|
|
38
|
+
return {
|
|
39
|
+
columnHeaders,
|
|
40
|
+
rowCount,
|
|
41
|
+
columnCount: columnCount > 0 ? columnCount : columnHeaders.length,
|
|
42
|
+
summary,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
function createEmptyAccumulator(startOffset) {
|
|
46
|
+
return {
|
|
47
|
+
text: '',
|
|
48
|
+
blocks: [],
|
|
49
|
+
startOffset,
|
|
50
|
+
contentTypes: new Set(),
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
function accumulatorHasContent(acc) {
|
|
54
|
+
return acc.text.trim().length > 0;
|
|
55
|
+
}
|
|
56
|
+
function addBlockToAccumulator(acc, block) {
|
|
57
|
+
if (acc.text.length > 0) {
|
|
58
|
+
acc.text += '\n\n';
|
|
59
|
+
}
|
|
60
|
+
acc.text += block.text;
|
|
61
|
+
acc.blocks.push(block);
|
|
62
|
+
acc.contentTypes.add(mapBlockTypeToContentType(block.type));
|
|
63
|
+
}
|
|
64
|
+
function mapBlockTypeToContentType(blockType) {
|
|
65
|
+
switch (blockType) {
|
|
66
|
+
case 'heading': return 'heading';
|
|
67
|
+
case 'table': return 'table';
|
|
68
|
+
case 'code': return 'code';
|
|
69
|
+
case 'list': return 'list';
|
|
70
|
+
case 'paragraph': return 'text';
|
|
71
|
+
default: return 'text';
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
// Sentence boundary detection
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
/**
|
|
78
|
+
* Find a sentence boundary position for splitting text.
|
|
79
|
+
*
|
|
80
|
+
* Scans backward from `maxPos` looking for sentence-ending punctuation,
|
|
81
|
+
* paragraph breaks, line breaks, or spaces. Returns the position just
|
|
82
|
+
* after the boundary character (i.e., the start of the next sentence).
|
|
83
|
+
*
|
|
84
|
+
* @param text - The text to scan
|
|
85
|
+
* @param maxPos - Maximum position (typically chunkSize)
|
|
86
|
+
* @returns Position to split at
|
|
87
|
+
*/
|
|
88
|
+
function findSentenceBoundary(text, maxPos) {
|
|
89
|
+
const searchStart = Math.max(0, maxPos - 500);
|
|
90
|
+
// Priority 1: Sentence endings (. ? !) followed by whitespace
|
|
91
|
+
for (let i = maxPos; i >= searchStart; i--) {
|
|
92
|
+
const ch = text[i];
|
|
93
|
+
if ((ch === '.' || ch === '?' || ch === '!') && i + 1 < text.length) {
|
|
94
|
+
const next = text[i + 1];
|
|
95
|
+
if (next === ' ' || next === '\n') {
|
|
96
|
+
return i + 1; // Split after the punctuation
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// Priority 2: Paragraph break (\n\n)
|
|
101
|
+
for (let i = maxPos; i >= searchStart + 1; i--) {
|
|
102
|
+
if (text[i] === '\n' && text[i - 1] === '\n') {
|
|
103
|
+
return i + 1;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
// Priority 3: Line break (\n)
|
|
107
|
+
for (let i = maxPos; i >= searchStart; i--) {
|
|
108
|
+
if (text[i] === '\n') {
|
|
109
|
+
return i + 1;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
// Priority 4: Any space
|
|
113
|
+
for (let i = maxPos; i >= searchStart; i--) {
|
|
114
|
+
if (text[i] === ' ') {
|
|
115
|
+
return i + 1;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
// Last resort: force split at maxPos
|
|
119
|
+
return maxPos;
|
|
120
|
+
}
|
|
121
|
+
// ---------------------------------------------------------------------------
|
|
122
|
+
// Page info determination
|
|
123
|
+
// ---------------------------------------------------------------------------
|
|
124
|
+
/**
|
|
125
|
+
* Determine page number and page range for a character span.
|
|
126
|
+
*/
|
|
127
|
+
function determinePageInfoForSpan(startOffset, endOffset, pageOffsets) {
|
|
128
|
+
if (pageOffsets.length === 0) {
|
|
129
|
+
return { pageNumber: null, pageRange: null };
|
|
130
|
+
}
|
|
131
|
+
const startPage = getPageNumberForOffset(startOffset, pageOffsets);
|
|
132
|
+
const endPage = getPageNumberForOffset(Math.max(startOffset, endOffset - 1), pageOffsets);
|
|
133
|
+
if (startPage === null) {
|
|
134
|
+
return { pageNumber: null, pageRange: null };
|
|
135
|
+
}
|
|
136
|
+
if (endPage === null || startPage === endPage) {
|
|
137
|
+
return { pageNumber: startPage, pageRange: null };
|
|
138
|
+
}
|
|
139
|
+
return {
|
|
140
|
+
pageNumber: startPage,
|
|
141
|
+
pageRange: `${startPage}-${endPage}`,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
// ---------------------------------------------------------------------------
|
|
145
|
+
// Main hybrid chunking function
|
|
146
|
+
// ---------------------------------------------------------------------------
|
|
147
|
+
/**
|
|
148
|
+
* Hybrid section-aware chunking.
|
|
149
|
+
*
|
|
150
|
+
* Uses markdown structure (headings, paragraphs, tables), JSON block data
|
|
151
|
+
* (for atomic region detection), and page offsets (for page tracking) to
|
|
152
|
+
* produce semantically coherent chunks.
|
|
153
|
+
*
|
|
154
|
+
* @param text - Full markdown text from OCR output
|
|
155
|
+
* @param pageOffsets - Page offset information for page number assignment
|
|
156
|
+
* @param jsonBlocks - JSON block hierarchy from Datalab OCR (may be null)
|
|
157
|
+
* @param config - Chunking configuration (default: 2000 chars, 10% overlap)
|
|
158
|
+
* @returns Array of ChunkResult with section context, content types, and page info
|
|
159
|
+
*/
|
|
160
|
+
export function chunkHybridSectionAware(text, pageOffsets, jsonBlocks, config = DEFAULT_CHUNKING_CONFIG) {
|
|
161
|
+
// 1. Empty text returns empty array
|
|
162
|
+
if (text.length === 0) {
|
|
163
|
+
return [];
|
|
164
|
+
}
|
|
165
|
+
// 2. Parse markdown blocks
|
|
166
|
+
const blocks = parseMarkdownBlocks(text, pageOffsets);
|
|
167
|
+
// 3. If blocks is empty but text is not, something is wrong
|
|
168
|
+
if (blocks.length === 0) {
|
|
169
|
+
throw new Error(`Markdown parser returned no blocks for non-empty text (${text.length} chars)`);
|
|
170
|
+
}
|
|
171
|
+
// 3.5. Normalize heading levels if configured
|
|
172
|
+
if (config.headingNormalization) {
|
|
173
|
+
normalizeHeadingLevels(blocks, config.headingNormalization);
|
|
174
|
+
}
|
|
175
|
+
// 4. Build section hierarchy
|
|
176
|
+
const sections = buildSectionHierarchy(blocks);
|
|
177
|
+
// 5. Find atomic regions from JSON blocks
|
|
178
|
+
const atomicRegions = findAtomicRegions(jsonBlocks, text, pageOffsets);
|
|
179
|
+
// 5.5. Extract table structures for column header context (Task 7.2)
|
|
180
|
+
const tableStructures = extractTableStructures(jsonBlocks, text, pageOffsets);
|
|
181
|
+
/**
|
|
182
|
+
* Find a table structure whose offset range overlaps a given block.
|
|
183
|
+
*/
|
|
184
|
+
function findTableStructureForBlock(block) {
|
|
185
|
+
for (const ts of tableStructures) {
|
|
186
|
+
// Check if block overlaps this table structure
|
|
187
|
+
if (block.startOffset < ts.endOffset && block.endOffset > ts.startOffset) {
|
|
188
|
+
return ts;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
return null;
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Find table metadata for a chunk based on its offset range.
|
|
195
|
+
* Returns metadata if the chunk overlaps with a known table structure.
|
|
196
|
+
*/
|
|
197
|
+
function findTableMetadata(offset, length) {
|
|
198
|
+
const end = offset + length;
|
|
199
|
+
for (const ts of tableStructures) {
|
|
200
|
+
// Check if chunk overlaps with table structure
|
|
201
|
+
if (offset <= ts.endOffset && end >= ts.startOffset) {
|
|
202
|
+
return {
|
|
203
|
+
columnHeaders: ts.columnHeaders,
|
|
204
|
+
rowCount: ts.rowCount,
|
|
205
|
+
columnCount: ts.columnCount,
|
|
206
|
+
summary: ts.summary,
|
|
207
|
+
caption: ts.caption,
|
|
208
|
+
continuationOf: ts.continuationOf,
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
return null;
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Build a column header prefix string for a table chunk.
|
|
216
|
+
* Format: "[Table: col1 | col2 | col3] "
|
|
217
|
+
*/
|
|
218
|
+
function buildTableHeaderPrefix(ts) {
|
|
219
|
+
if (ts.columnHeaders.length === 0)
|
|
220
|
+
return '';
|
|
221
|
+
return `[Table: ${ts.columnHeaders.join(' | ')}] `;
|
|
222
|
+
}
|
|
223
|
+
// 6. Walk blocks, accumulating into chunks
|
|
224
|
+
const chunks = [];
|
|
225
|
+
let accumulator = createEmptyAccumulator(0);
|
|
226
|
+
let currentSectionPath = null;
|
|
227
|
+
let currentHeadingText = null;
|
|
228
|
+
let currentHeadingLevel = null;
|
|
229
|
+
let chunkIndex = 0;
|
|
230
|
+
const overlapSize = getOverlapCharacters(config);
|
|
231
|
+
/**
|
|
232
|
+
* Flush the accumulator as a chunk and reset it.
|
|
233
|
+
*/
|
|
234
|
+
function flushAccumulator(isAtomic) {
|
|
235
|
+
if (!accumulatorHasContent(accumulator)) {
|
|
236
|
+
return;
|
|
237
|
+
}
|
|
238
|
+
const chunkText = accumulator.text;
|
|
239
|
+
const startOff = accumulator.startOffset;
|
|
240
|
+
const endOff = startOff + chunkText.length;
|
|
241
|
+
const pageInfo = determinePageInfoForSpan(startOff, endOff, pageOffsets);
|
|
242
|
+
// Check if this flushed chunk overlaps with a table structure
|
|
243
|
+
const hasTableContent = accumulator.contentTypes.has('table');
|
|
244
|
+
const tableMetaForFlushed = hasTableContent
|
|
245
|
+
? (findTableMetadata(startOff, endOff - startOff) ?? createTableMetadataFromText(chunkText))
|
|
246
|
+
: null;
|
|
247
|
+
chunks.push({
|
|
248
|
+
index: chunkIndex++,
|
|
249
|
+
text: chunkText,
|
|
250
|
+
startOffset: startOff,
|
|
251
|
+
endOffset: endOff,
|
|
252
|
+
overlapWithPrevious: 0, // Set in post-processing
|
|
253
|
+
overlapWithNext: 0, // Set in post-processing
|
|
254
|
+
pageNumber: pageInfo.pageNumber,
|
|
255
|
+
pageRange: pageInfo.pageRange,
|
|
256
|
+
headingContext: currentHeadingText,
|
|
257
|
+
headingLevel: currentHeadingLevel,
|
|
258
|
+
sectionPath: currentSectionPath,
|
|
259
|
+
contentTypes: Array.from(accumulator.contentTypes),
|
|
260
|
+
isAtomic,
|
|
261
|
+
tableMetadata: tableMetaForFlushed,
|
|
262
|
+
});
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Emit a single block as an atomic chunk (table, code, or JSON-detected region).
|
|
266
|
+
* For table blocks, prepends column header context if available (Task 7.2).
|
|
267
|
+
*/
|
|
268
|
+
function emitAtomicChunk(block) {
|
|
269
|
+
// Guard: reject empty blocks (same as flushAccumulator's accumulatorHasContent)
|
|
270
|
+
if (block.text.trim().length === 0) {
|
|
271
|
+
console.error(`[chunker] Skipping empty atomic block at offset ${block.startOffset}-${block.endOffset} ` +
|
|
272
|
+
`(type=${block.type}, raw length=${block.text.length})`);
|
|
273
|
+
return;
|
|
274
|
+
}
|
|
275
|
+
const startOff = block.startOffset;
|
|
276
|
+
const endOff = block.endOffset;
|
|
277
|
+
const pageInfo = determinePageInfoForSpan(startOff, endOff, pageOffsets);
|
|
278
|
+
// Task 7.2: Prepend column header context for table chunks
|
|
279
|
+
let chunkText = block.text;
|
|
280
|
+
if (block.type === 'table') {
|
|
281
|
+
// Strip HTML tags from table text for clean FTS indexing
|
|
282
|
+
chunkText = stripHtmlForFTS(chunkText);
|
|
283
|
+
const ts = findTableStructureForBlock(block);
|
|
284
|
+
if (ts) {
|
|
285
|
+
const prefix = buildTableHeaderPrefix(ts);
|
|
286
|
+
if (prefix.length > 0) {
|
|
287
|
+
chunkText = prefix + chunkText;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
// Post-processing guard: HTML stripping or prefix may leave empty text
|
|
292
|
+
if (chunkText.trim().length === 0) {
|
|
293
|
+
console.error(`[chunker] Atomic chunk became empty after processing at offset ${startOff}-${endOff} ` +
|
|
294
|
+
`(type=${block.type}, original length=${block.text.length})`);
|
|
295
|
+
return;
|
|
296
|
+
}
|
|
297
|
+
const tableMetaForAtomicChunk = block.type === 'table'
|
|
298
|
+
? (findTableMetadata(startOff, endOff - startOff) ?? createTableMetadataFromText(chunkText))
|
|
299
|
+
: null;
|
|
300
|
+
chunks.push({
|
|
301
|
+
index: chunkIndex++,
|
|
302
|
+
text: chunkText,
|
|
303
|
+
startOffset: startOff,
|
|
304
|
+
endOffset: endOff,
|
|
305
|
+
overlapWithPrevious: 0,
|
|
306
|
+
overlapWithNext: 0,
|
|
307
|
+
pageNumber: pageInfo.pageNumber,
|
|
308
|
+
pageRange: pageInfo.pageRange,
|
|
309
|
+
headingContext: currentHeadingText,
|
|
310
|
+
headingLevel: currentHeadingLevel,
|
|
311
|
+
sectionPath: currentSectionPath,
|
|
312
|
+
contentTypes: [mapBlockTypeToContentType(block.type)],
|
|
313
|
+
isAtomic: true,
|
|
314
|
+
tableMetadata: tableMetaForAtomicChunk,
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
/**
|
|
318
|
+
* Emit an atomic block with size awareness: if the block exceeds
|
|
319
|
+
* maxChunkSize, split it at line boundaries (row breaks for tables,
|
|
320
|
+
* line breaks for code). Each sub-chunk inherits atomic status.
|
|
321
|
+
* For table blocks, prepends column header context to each sub-chunk (Task 7.2).
|
|
322
|
+
*/
|
|
323
|
+
function emitSizedAtomicChunk(block) {
|
|
324
|
+
if (block.text.length <= config.maxChunkSize) {
|
|
325
|
+
emitAtomicChunk(block);
|
|
326
|
+
return;
|
|
327
|
+
}
|
|
328
|
+
// Task 7.2: Get table header prefix for table blocks
|
|
329
|
+
let tablePrefix = '';
|
|
330
|
+
if (block.type === 'table') {
|
|
331
|
+
const ts = findTableStructureForBlock(block);
|
|
332
|
+
if (ts) {
|
|
333
|
+
tablePrefix = buildTableHeaderPrefix(ts);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
// Split oversized atomic block at line boundaries
|
|
337
|
+
// Strip HTML tags from table text for clean FTS indexing
|
|
338
|
+
const blockText = block.type === 'table' ? stripHtmlForFTS(block.text) : block.text;
|
|
339
|
+
let pos = 0;
|
|
340
|
+
while (pos < blockText.length) {
|
|
341
|
+
let endPos;
|
|
342
|
+
if (blockText.length - pos <= config.maxChunkSize) {
|
|
343
|
+
// Remaining text fits in one chunk
|
|
344
|
+
endPos = blockText.length;
|
|
345
|
+
}
|
|
346
|
+
else {
|
|
347
|
+
// Find last newline before maxChunkSize boundary
|
|
348
|
+
endPos = blockText.lastIndexOf('\n', pos + config.maxChunkSize);
|
|
349
|
+
if (endPos <= pos) {
|
|
350
|
+
// No newline found within range, force split at maxChunkSize
|
|
351
|
+
endPos = pos + config.maxChunkSize;
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
let chunkText = blockText.slice(pos, endPos);
|
|
355
|
+
if (chunkText.trim().length > 0) {
|
|
356
|
+
// Task 7.2: Prepend column header context to each table sub-chunk
|
|
357
|
+
if (tablePrefix.length > 0) {
|
|
358
|
+
chunkText = tablePrefix + chunkText;
|
|
359
|
+
}
|
|
360
|
+
const startOff = block.startOffset + pos;
|
|
361
|
+
const endOff = block.startOffset + endPos;
|
|
362
|
+
const pageInfo = determinePageInfoForSpan(startOff, endOff, pageOffsets);
|
|
363
|
+
const tableMetaForSubChunk = block.type === 'table'
|
|
364
|
+
? (findTableMetadata(startOff, endOff - startOff) ?? createTableMetadataFromText(chunkText))
|
|
365
|
+
: null;
|
|
366
|
+
chunks.push({
|
|
367
|
+
index: chunkIndex++,
|
|
368
|
+
text: chunkText,
|
|
369
|
+
startOffset: startOff,
|
|
370
|
+
endOffset: endOff,
|
|
371
|
+
overlapWithPrevious: 0,
|
|
372
|
+
overlapWithNext: 0,
|
|
373
|
+
pageNumber: pageInfo.pageNumber,
|
|
374
|
+
pageRange: pageInfo.pageRange,
|
|
375
|
+
headingContext: currentHeadingText,
|
|
376
|
+
headingLevel: currentHeadingLevel,
|
|
377
|
+
sectionPath: currentSectionPath,
|
|
378
|
+
contentTypes: [mapBlockTypeToContentType(block.type)],
|
|
379
|
+
isAtomic: true,
|
|
380
|
+
tableMetadata: tableMetaForSubChunk,
|
|
381
|
+
});
|
|
382
|
+
}
|
|
383
|
+
// Advance past the split point (skip newline if present)
|
|
384
|
+
pos = endPos < blockText.length && blockText[endPos] === '\n' ? endPos + 1 : endPos;
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) {
|
|
388
|
+
const block = blocks[blockIdx];
|
|
389
|
+
// Skip empty and page_marker blocks
|
|
390
|
+
if (block.type === 'empty' || block.type === 'page_marker') {
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
// Get section info for this block
|
|
394
|
+
const sectionNode = sections.get(blockIdx);
|
|
395
|
+
if (sectionNode) {
|
|
396
|
+
currentSectionPath = sectionNode.path;
|
|
397
|
+
}
|
|
398
|
+
if (block.type === 'heading') {
|
|
399
|
+
// Flush accumulator before starting new section
|
|
400
|
+
flushAccumulator(false);
|
|
401
|
+
// Update heading context
|
|
402
|
+
currentHeadingText = block.headingText;
|
|
403
|
+
currentHeadingLevel = block.headingLevel;
|
|
404
|
+
// Start new accumulator with the heading
|
|
405
|
+
accumulator = createEmptyAccumulator(block.startOffset);
|
|
406
|
+
addBlockToAccumulator(accumulator, block);
|
|
407
|
+
}
|
|
408
|
+
else if (block.type === 'table' || block.type === 'code') {
|
|
409
|
+
// Size-aware atomic treatment: only emit as atomic if block is large enough
|
|
410
|
+
// to produce meaningful standalone embeddings. Small tables/code blocks are
|
|
411
|
+
// merged into surrounding content for better embedding quality.
|
|
412
|
+
const minAtomicSize = Math.floor(config.chunkSize / 4);
|
|
413
|
+
if (block.text.length >= minAtomicSize) {
|
|
414
|
+
// Large table/code → atomic chunk (with oversized splitting)
|
|
415
|
+
flushAccumulator(false);
|
|
416
|
+
emitSizedAtomicChunk(block);
|
|
417
|
+
accumulator = createEmptyAccumulator(block.endOffset);
|
|
418
|
+
}
|
|
419
|
+
else {
|
|
420
|
+
// Small table/code → treat as regular content, merge into accumulator
|
|
421
|
+
addBlockToAccumulator(accumulator, block);
|
|
422
|
+
// Check if accumulator exceeds chunk size (same logic as paragraph branch)
|
|
423
|
+
if (accumulator.text.length > config.chunkSize) {
|
|
424
|
+
const splitPos = findSentenceBoundary(accumulator.text, config.chunkSize);
|
|
425
|
+
const fullText = accumulator.text;
|
|
426
|
+
const savedStartOffset = accumulator.startOffset;
|
|
427
|
+
const savedContentTypes = new Set(accumulator.contentTypes);
|
|
428
|
+
accumulator.text = fullText.slice(0, splitPos);
|
|
429
|
+
flushAccumulator(false);
|
|
430
|
+
const remainder = fullText.slice(splitPos);
|
|
431
|
+
accumulator = createEmptyAccumulator(savedStartOffset + splitPos);
|
|
432
|
+
accumulator.text = remainder;
|
|
433
|
+
accumulator.contentTypes = savedContentTypes;
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
else {
|
|
438
|
+
// Regular content (paragraph, list)
|
|
439
|
+
// Check if this block overlaps an atomic region from JSON blocks
|
|
440
|
+
const atomicRegion = isOffsetInAtomicRegion(block.startOffset, atomicRegions);
|
|
441
|
+
if (atomicRegion) {
|
|
442
|
+
// This was detected as part of an atomic region by JSON analysis
|
|
443
|
+
flushAccumulator(false);
|
|
444
|
+
emitAtomicChunk(block);
|
|
445
|
+
accumulator = createEmptyAccumulator(block.endOffset);
|
|
446
|
+
}
|
|
447
|
+
else {
|
|
448
|
+
// Add to accumulator
|
|
449
|
+
addBlockToAccumulator(accumulator, block);
|
|
450
|
+
// Check if accumulator exceeds chunk size
|
|
451
|
+
if (accumulator.text.length > config.chunkSize) {
|
|
452
|
+
// Need to split - find sentence boundary
|
|
453
|
+
const splitPos = findSentenceBoundary(accumulator.text, config.chunkSize);
|
|
454
|
+
// Save state before flush
|
|
455
|
+
const fullText = accumulator.text;
|
|
456
|
+
const savedStartOffset = accumulator.startOffset;
|
|
457
|
+
const savedContentTypes = new Set(accumulator.contentTypes);
|
|
458
|
+
// Truncate accumulator text to split point and flush
|
|
459
|
+
accumulator.text = fullText.slice(0, splitPos);
|
|
460
|
+
flushAccumulator(false);
|
|
461
|
+
// Keep the remainder in a new accumulator
|
|
462
|
+
const remainder = fullText.slice(splitPos);
|
|
463
|
+
accumulator = createEmptyAccumulator(savedStartOffset + splitPos);
|
|
464
|
+
accumulator.text = remainder;
|
|
465
|
+
accumulator.contentTypes = savedContentTypes;
|
|
466
|
+
// If remainder still exceeds maxChunkSize, keep splitting
|
|
467
|
+
while (accumulator.text.length > config.chunkSize) {
|
|
468
|
+
const innerSplitPos = findSentenceBoundary(accumulator.text, config.chunkSize);
|
|
469
|
+
const innerFullText = accumulator.text;
|
|
470
|
+
const innerStartOffset = accumulator.startOffset;
|
|
471
|
+
const innerContentTypes = new Set(accumulator.contentTypes);
|
|
472
|
+
accumulator.text = innerFullText.slice(0, innerSplitPos);
|
|
473
|
+
flushAccumulator(false);
|
|
474
|
+
const innerRemainder = innerFullText.slice(innerSplitPos);
|
|
475
|
+
accumulator = createEmptyAccumulator(innerStartOffset + innerSplitPos);
|
|
476
|
+
accumulator.text = innerRemainder;
|
|
477
|
+
accumulator.contentTypes = innerContentTypes;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
// Flush any remaining content
|
|
484
|
+
flushAccumulator(false);
|
|
485
|
+
// 8.5. Merge heading-only tiny chunks with neighbors
|
|
486
|
+
const mergedChunks = mergeHeadingOnlyChunks(chunks, config.minChunkSize ?? 100);
|
|
487
|
+
// Replace chunks array contents with merged results
|
|
488
|
+
chunks.length = 0;
|
|
489
|
+
chunks.push(...mergedChunks);
|
|
490
|
+
// 9. Set overlap values for non-atomic chunks
|
|
491
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
492
|
+
const chunk = chunks[i];
|
|
493
|
+
if (chunk.isAtomic) {
|
|
494
|
+
// Atomic chunks never participate in overlap
|
|
495
|
+
chunk.overlapWithPrevious = 0;
|
|
496
|
+
chunk.overlapWithNext = 0;
|
|
497
|
+
continue;
|
|
498
|
+
}
|
|
499
|
+
// Set overlapWithPrevious for non-first, non-atomic chunks
|
|
500
|
+
if (i > 0 && !chunks[i - 1].isAtomic) {
|
|
501
|
+
chunk.overlapWithPrevious = overlapSize;
|
|
502
|
+
}
|
|
503
|
+
// Set overlapWithNext for non-last, non-atomic chunks
|
|
504
|
+
if (i < chunks.length - 1 && !chunks[i + 1].isAtomic) {
|
|
505
|
+
chunk.overlapWithNext = overlapSize;
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
return chunks;
|
|
509
|
+
}
|
|
510
|
+
// ---------------------------------------------------------------------------
|
|
511
|
+
// Provenance creation
|
|
512
|
+
// ---------------------------------------------------------------------------
|
|
513
|
+
/**
|
|
514
|
+
* Create provenance parameters for a chunk.
|
|
515
|
+
*
|
|
516
|
+
* Generates a CreateProvenanceParams object suitable for creating
|
|
517
|
+
* a CHUNK provenance record (chain_depth=2).
|
|
518
|
+
*
|
|
519
|
+
* @param params - Chunk provenance parameters
|
|
520
|
+
* @returns CreateProvenanceParams ready for insertProvenance
|
|
521
|
+
*/
|
|
522
|
+
export function createChunkProvenance(params) {
|
|
523
|
+
const { chunk, chunkTextHash, ocrProvenanceId, documentProvenanceId, ocrContentHash, fileHash, totalChunks, processingDurationMs, config = DEFAULT_CHUNKING_CONFIG, } = params;
|
|
524
|
+
// Build location information
|
|
525
|
+
const location = {
|
|
526
|
+
chunk_index: chunk.index,
|
|
527
|
+
character_start: chunk.startOffset,
|
|
528
|
+
character_end: chunk.endOffset,
|
|
529
|
+
};
|
|
530
|
+
// Add page info only if available
|
|
531
|
+
if (chunk.pageNumber !== null) {
|
|
532
|
+
location.page_number = chunk.pageNumber;
|
|
533
|
+
}
|
|
534
|
+
if (chunk.pageRange !== null) {
|
|
535
|
+
location.page_range = chunk.pageRange;
|
|
536
|
+
}
|
|
537
|
+
return {
|
|
538
|
+
type: ProvenanceType.CHUNK,
|
|
539
|
+
source_type: 'CHUNKING',
|
|
540
|
+
source_id: ocrProvenanceId,
|
|
541
|
+
root_document_id: documentProvenanceId,
|
|
542
|
+
content_hash: chunkTextHash,
|
|
543
|
+
input_hash: ocrContentHash,
|
|
544
|
+
file_hash: fileHash,
|
|
545
|
+
processor: 'chunker',
|
|
546
|
+
processor_version: '2.0.0',
|
|
547
|
+
processing_params: {
|
|
548
|
+
chunk_size: config.chunkSize,
|
|
549
|
+
overlap_percent: config.overlapPercent,
|
|
550
|
+
max_chunk_size: config.maxChunkSize,
|
|
551
|
+
strategy: 'hybrid_section',
|
|
552
|
+
chunk_index: chunk.index,
|
|
553
|
+
total_chunks: totalChunks,
|
|
554
|
+
character_start: chunk.startOffset,
|
|
555
|
+
character_end: chunk.endOffset,
|
|
556
|
+
heading_context: chunk.headingContext ?? null,
|
|
557
|
+
section_path: chunk.sectionPath ?? null,
|
|
558
|
+
is_atomic: chunk.isAtomic,
|
|
559
|
+
content_types: chunk.contentTypes,
|
|
560
|
+
},
|
|
561
|
+
processing_duration_ms: processingDurationMs ?? null,
|
|
562
|
+
location,
|
|
563
|
+
};
|
|
564
|
+
}
|
|
565
|
+
export { DEFAULT_CHUNKING_CONFIG } from '../../models/chunk.js';
|
|
566
|
+
//# sourceMappingURL=chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../../src/services/chunking/chunker.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAGL,uBAAuB,EACvB,oBAAoB,GACrB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EACL,cAAc,GAIf,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACL,mBAAmB,EACnB,qBAAqB,EACrB,sBAAsB,GAEvB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EACL,iBAAiB,EACjB,sBAAsB,EACtB,sBAAsB,EACtB,0BAA0B,EAC1B,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,GAErB,MAAM,0BAA0B,CAAC;AAClC,OAAO,EAAE,sBAAsB,EAAE,MAAM,yBAAyB,CAAC;AACjE,OAAO,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AAE3D;;;GAGG;AACH,SAAS,eAAe,CAAC,IAAY;IACnC,OAAO,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;AAC9D,CAAC;AAED;;;;GAIG;AACH,SAAS,2BAA2B,CAAC,SAAiB;IACpD,8DAA8D;IAC9D,MAAM,SAAS,GAAG,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;IACrE,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,aAAa,GAAG,0BAA0B,CAAC,SAAS,CAAC,CAAC;IAC5D,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,GAAG,gCAAgC,CAAC,SAAS,CAAC,CAAC;IAC9E,MAAM,cAAc,GAAG,mBAAmB,CAAC,SAAS,CAAC,CAAC;IACtD,MAAM,OAAO,GAAG,oBAAoB,CAAC,aAAa,EAAE,QAAQ,EAAE,cAAc,CAAC,CAAC;IAE9E,OAAO;QACL,aAAa;QACb,QAAQ;QACR,WAAW,EAAE,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,aAAa,CAAC,MAAM;QACjE,OAAO;KACR,CAAC;AACJ,CAAC;AAqCD,SAAS,sBAAsB,CAAC,WAAmB;IACjD,OAAO;QACL,IAAI,EAAE,EAAE;QACR,MAAM,EAAE,EAAE;QACV,WAAW;QACX,YAAY,EAAE,IAAI,GAAG,EAAE;KACxB,CAAC;AACJ,CAAC;AAED,SAAS,qBAAqB,CAAC,GAAgB;IAC7C,OAAO,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;AACpC,CAAC;AAED,SAAS,qBAAqB,CAAC,GAAgB,EAAE,KAAoB;IACnE,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,GAAG,CAAC,IAAI,IAAI,MAAM,CAAC;IACrB,CAAC;IACD,GAAG,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC;IACvB,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACvB,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,yBAAyB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;AAC9D,CAAC;AAED,SAAS,yBAAyB,CAAC,SAAiB;IAClD,QAAQ,SAAS,EAAE,CAAC;QAClB,KAAK,SAAS,CAAC,CAAC,OAAO,SAAS,CAAC;QACjC,KAAK,OAAO,CAAC,CAAC,OAAO,OAAO,CAAC;QAC7B,KAAK,MAAM,CAAC,CAAC,OAAO,MAAM,CAAC;QAC3B,KAAK,MAAM,CAAC,CAAC,OAAO,MAAM,CAAC;QAC3B,KAAK,WAAW,CAAC,CAAC,OAAO,MAAM,CAAC;QAChC,OAAO,CAAC,CAAC,OAAO,MAAM,CAAC;IACzB,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,8BAA8B;AAC9B,8EAA8E;AAE9E;;;;;;;;;;GAUG;AACH,SAAS,oBAAoB,CAAC,IAAY,EAAE,MAAc;IACxD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,GAAG,GAAG,CAAC,CAAC;IAE9C,8DAA8D;IAC9D,KAAK,IAAI,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACnB,IAAI,CAAC,EAAE,KAAK,GAAG,IAAI,EAAE,KAAK,GAAG,IAAI,EAAE,KAAK,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YACpE,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACzB,IAAI,IAAI,KAAK,GAAG,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;gBAClC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,8BAA8B;YAC9C,CAAC;QACH,CAAC;IACH,CAAC;IAED,qCAAqC;IACrC,KAAK,IAAI,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/C,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IAED,8BAA8B;IAC9B,KAAK,IAAI,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACrB,OAAO,CAAC,GAAG,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IAED,wBAAwB;IACxB,KAAK,IAAI,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;YACpB,OAAO,CAAC,GAAG,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IAED,qCAAqC;IACrC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,8EAA8E;AAC9E,0BAA0B;AAC1B,8EAA8E;AAE9E;;GAEG;AACH,SAAS,wBAAwB,CAC/B,WAAmB,EACnB,SAAiB,EACjB,WAAyB;IAEzB,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IAC/C,CAAC;IAED,MAAM,SAAS,GAAG,sBAAsB,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC;IACnE,MAAM,OAAO,GAAG,sBAAsB,CACpC,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,SAAS,GAAG,CAAC,CAAC,EACpC,WAAW,CACZ,CAAC;IAEF,IAAI,SAAS,KAAK,IAAI,EAAE,CAAC;QACvB,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IAC/C,CAAC;IAED,IAAI,OAAO,KAAK,IAAI,IAAI,SAAS,KAAK,OAAO,EAAE,CAAC;QAC9C,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IACpD,CAAC;IAED,OAAO;QACL,UAAU,EAAE,SAAS;QACrB,SAAS,EAAE,GAAG,SAAS,IAAI,OAAO,EAAE;KACrC,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,gCAAgC;AAChC,8EAA8E;AAE9E;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,uBAAuB,CACrC,IAAY,EACZ,WAAyB,EACzB,UAA0C,EAC1C,SAAyB,uBAAuB;IAEhD,oCAAoC;IACpC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,2BAA2B;IAC3B,MAAM,MAAM,GAAG,mBAAmB,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IAEtD,4DAA4D;IAC5D,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,MAAM,IAAI,KAAK,CACb,0DAA0D,IAAI,CAAC,MAAM,SAAS,CAC/E,CAAC;IACJ,CAAC;IAED,8CAA8C;IAC9C,IAAI,MAAM,CAAC,oBAAoB,EAAE,CAAC;QAChC,sBAAsB,CAAC,MAAM,EAAE,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAC9D,CAAC;IAED,6BAA6B;IAC7B,MAAM,QAAQ,GAAG,qBAAqB,CAAC,MAAM,CAAC,CAAC;IAE/C,0CAA0C;IAC1C,MAAM,aAAa,GAAG,iBAAiB,CAAC,UAAU,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;IAEvE,qEAAqE;IACrE,MAAM,eAAe,GAAG,sBAAsB,CAAC,UAAU,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;IAE9E;;OAEG;IACH,SAAS,0BAA0B,CAAC,KAAoB;QACtD,KAAK,MAAM,EAAE,IAAI,eAAe,EAAE,CAAC;YACjC,+CAA+C;YAC/C,IAAI,KAAK,CAAC,WAAW,GAAG,EAAE,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC;gBACzE,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;;OAGG;IACH,SAAS,iBAAiB,CACxB,MAAc,EACd,MAAc;QAEd,MAAM,GAAG,GAAG,MAAM,GAAG,MAAM,CAAC;QAC5B,KAAK,MAAM,EAAE,IAAI,eAAe,EAAE,CAAC;YACjC,+CAA+C;YAC/C,IAAI,MAAM,IAAI,EAAE,CAAC,SAAS,IAAI,GAAG,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;gBACpD,OAAO;oBACL,aAAa,EAAE,EAAE,CAAC,aAAa;oBAC/B,QAAQ,EAAE,EAAE,CAAC,QAAQ;oBACrB,WAAW,EAAE,EAAE,CAAC,WAAW;oBAC3B,OAAO,EAAE,EAAE,CAAC,OAAO;oBACnB,OAAO,EAAE,EAAE,CAAC,OAAO;oBACnB,cAAc,EAAE,EAAE,CAAC,cAAc;iBAClC,CAAC;YACJ,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;;OAGG;IACH,SAAS,sBAAsB,CAAC,EAAkB;QAChD,IAAI,EAAE,CAAC,aAAa,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAC7C,OAAO,WAAW,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;IACrD,CAAC;IAED,2CAA2C;IAC3C,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,IAAI,WAAW,GAAG,sBAAsB,CAAC,CAAC,CAAC,CAAC;IAC5C,IAAI,kBAAkB,GAAkB,IAAI,CAAC;IAC7C,IAAI,kBAAkB,GAAkB,IAAI,CAAC;IAC7C,IAAI,mBAAmB,GAAkB,IAAI,CAAC;IAC9C,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,MAAM,WAAW,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAC;IAEjD;;OAEG;IACH,SAAS,gBAAgB,CAAC,QAAiB;QACzC,IAAI,CAAC,qBAAqB,CAAC,WAAW,CAAC,EAAE,CAAC;YACxC,OAAO;QACT,CAAC;QAED,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC;QACnC,MAAM,QAAQ,GAAG,WAAW,CAAC,WAAW,CAAC;QACzC,MAAM,MAAM,GAAG,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC;QAE3C,MAAM,QAAQ,GAAG,wBAAwB,CAAC,QAAQ,EAAE,MAAM,EAAE,WAAW,CAAC,CAAC;QAEzE,8DAA8D;QAC9D,MAAM,eAAe,GAAG,WAAW,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC9D,MAAM,mBAAmB,GAAG,eAAe;YACzC,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,CAAC,IAAI,2BAA2B,CAAC,SAAS,CAAC,CAAC;YAC5F,CAAC,CAAC,IAAI,CAAC;QAET,MAAM,CAAC,IAAI,CAAC;YACV,KAAK,EAAE,UAAU,EAAE;YACnB,IAAI,EAAE,SAAS;YACf,WAAW,EAAE,QAAQ;YACrB,SAAS,EAAE,MAAM;YACjB,mBAAmB,EAAE,CAAC,EAAE,yBAAyB;YACjD,eAAe,EAAE,CAAC,EAAM,yBAAyB;YACjD,UAAU,EAAE,QAAQ,CAAC,UAAU;YAC/B,SAAS,EAAE,QAAQ,CAAC,SAAS;YAC7B,cAAc,EAAE,kBAAkB;YAClC,YAAY,EAAE,mBAAmB;YACjC,WAAW,EAAE,kBAAkB;YAC/B,YAAY,EAAE,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,YAAY,CAAC;YAClD,QAAQ;YACR,aAAa,EAAE,mBAAmB;SACnC,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACH,SAAS,eAAe,CAAC,KAAoB;QAC3C,gFAAgF;QAChF,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACnC,OAAO,CAAC,KAAK,CACX,mDAAmD,KAAK,CAAC,WAAW,IAAI,KAAK,CAAC,SAAS,GAAG;gBAC1F,SAAS,KAAK,CAAC,IAAI,gBAAgB,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CACxD,CAAC;YACF,OAAO;QACT,CAAC;QAED,MAAM,QAAQ,GAAG,KAAK,CAAC,WAAW,CAAC;QACnC,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,CAAC;QAC/B,MAAM,QAAQ,GAAG,wBAAwB,CAAC,QAAQ,EAAE,MAAM,EAAE,WAAW,CAAC,CAAC;QAEzE,2DAA2D;QAC3D,IAAI,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC;QAC3B,IAAI,KAAK,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YAC3B,yDAAyD;YACzD,SAAS,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;YACvC,MAAM,EAAE,GAAG,0BAA0B,CAAC,KAAK,CAAC,CAAC;YAC7C,IAAI,EAAE,EAAE,CAAC;gBACP,MAAM,MAAM,GAAG,sBAAsB,CAAC,EAAE,CAAC,CAAC;gBAC1C,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACtB,SAAS,GAAG,MAAM,GAAG,SAAS,CAAC;gBACjC,CAAC;YACH,CAAC;QACH,CAAC;QAED,uEAAuE;QACvE,IAAI,SAAS,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,OAAO,CAAC,KAAK,CACX,kEAAkE,QAAQ,IAAI,MAAM,GAAG;gBACvF,SAAS,KAAK,CAAC,IAAI,qBAAqB,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAC7D,CAAC;YACF,OAAO;QACT,CAAC;QAED,MAAM,uBAAuB,GAAG,KAAK,CAAC,IAAI,KAAK,OAAO;YACpD,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,CAAC,IAAI,2BAA2B,CAAC,SAAS,CAAC,CAAC;YAC5F,CAAC,CAAC,IAAI,CAAC;QAET,MAAM,CAAC,IAAI,CAAC;YACV,KAAK,EAAE,UAAU,EAAE;YACnB,IAAI,EAAE,SAAS;YACf,WAAW,EAAE,QAAQ;YACrB,SAAS,EAAE,MAAM;YACjB,mBAAmB,EAAE,CAAC;YACtB,eAAe,EAAE,CAAC;YAClB,UAAU,EAAE,QAAQ,CAAC,UAAU;YAC/B,SAAS,EAAE,QAAQ,CAAC,SAAS;YAC7B,cAAc,EAAE,kBAAkB;YAClC,YAAY,EAAE,mBAAmB;YACjC,WAAW,EAAE,kBAAkB;YAC/B,YAAY,EAAE,CAAC,yBAAyB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACrD,QAAQ,EAAE,IAAI;YACd,aAAa,EAAE,uBAAuB;SACvC,CAAC,CAAC;IACL,CAAC;IAED;;;;;OAKG;IACH,SAAS,oBAAoB,CAAC,KAAoB;QAChD,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;YAC7C,eAAe,CAAC,KAAK,CAAC,CAAC;YACvB,OAAO;QACT,CAAC;QAED,qDAAqD;QACrD,IAAI,WAAW,GAAG,EAAE,CAAC;QACrB,IAAI,KAAK,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YAC3B,MAAM,EAAE,GAAG,0BAA0B,CAAC,KAAK,CAAC,CAAC;YAC7C,IAAI,EAAE,EAAE,CAAC;gBACP,WAAW,GAAG,sBAAsB,CAAC,EAAE,CAAC,CAAC;YAC3C,CAAC;QACH,CAAC;QAED,kDAAkD;QAClD,yDAAyD;QACzD,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,KAAK,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC;QACpF,IAAI,GAAG,GAAG,CAAC,CAAC;QAEZ,OAAO,GAAG,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC;YAC9B,IAAI,MAAc,CAAC;YAEnB,IAAI,SAAS,CAAC,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;gBAClD,mCAAmC;gBACnC,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC;YAC5B,CAAC;iBAAM,CAAC;gBACN,iDAAiD;gBACjD,MAAM,GAAG,SAAS,CAAC,WAAW,CAAC,IAAI,EAAE,GAAG,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC;gBAChE,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;oBAClB,6DAA6D;oBAC7D,MAAM,GAAG,GAAG,GAAG,MAAM,CAAC,YAAY,CAAC;gBACrC,CAAC;YACH,CAAC;YAED,IAAI,SAAS,GAAG,SAAS,CAAC,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;YAC7C,IAAI,SAAS,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAChC,kEAAkE;gBAClE,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC3B,SAAS,GAAG,WAAW,GAAG,SAAS,CAAC;gBACtC,CAAC;gBAED,MAAM,QAAQ,GAAG,KAAK,CAAC,WAAW,GAAG,GAAG,CAAC;gBACzC,MAAM,MAAM,GAAG,KAAK,CAAC,WAAW,GAAG,MAAM,CAAC;gBAC1C,MAAM,QAAQ,GAAG,wBAAwB,CAAC,QAAQ,EAAE,MAAM,EAAE,WAAW,CAAC,CAAC;gBAEzE,MAAM,oBAAoB,GAAG,KAAK,CAAC,IAAI,KAAK,OAAO;oBACjD,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,CAAC,IAAI,2BAA2B,CAAC,SAAS,CAAC,CAAC;oBAC5F,CAAC,CAAC,IAAI,CAAC;gBAET,MAAM,CAAC,IAAI,CAAC;oBACV,KAAK,EAAE,UAAU,EAAE;oBACnB,IAAI,EAAE,SAAS;oBACf,WAAW,EAAE,QAAQ;oBACrB,SAAS,EAAE,MAAM;oBACjB,mBAAmB,EAAE,CAAC;oBACtB,eAAe,EAAE,CAAC;oBAClB,UAAU,EAAE,QAAQ,CAAC,UAAU;oBAC/B,SAAS,EAAE,QAAQ,CAAC,SAAS;oBAC7B,cAAc,EAAE,kBAAkB;oBAClC,YAAY,EAAE,mBAAmB;oBACjC,WAAW,EAAE,kBAAkB;oBAC/B,YAAY,EAAE,CAAC,yBAAyB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;oBACrD,QAAQ,EAAE,IAAI;oBACd,aAAa,EAAE,oBAAoB;iBACpC,CAAC,CAAC;YACL,CAAC;YAED,yDAAyD;YACzD,GAAG,GAAG,MAAM,GAAG,SAAS,CAAC,MAAM,IAAI,SAAS,CAAC,MAAM,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QACtF,CAAC;IACH,CAAC;IAED,KAAK,IAAI,QAAQ,GAAG,CAAC,EAAE,QAAQ,GAAG,MAAM,CAAC,MAAM,EAAE,QAAQ,EAAE,EAAE,CAAC;QAC5D,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;QAE/B,oCAAoC;QACpC,IAAI,KAAK,CAAC,IAAI,KAAK,OAAO,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YAC3D,SAAS;QACX,CAAC;QAED,kCAAkC;QAClC,MAAM,WAAW,GAAG,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,WAAW,EAAE,CAAC;YAChB,kBAAkB,GAAG,WAAW,CAAC,IAAI,CAAC;QACxC,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;YAC7B,gDAAgD;YAChD,gBAAgB,CAAC,KAAK,CAAC,CAAC;YAExB,yBAAyB;YACzB,kBAAkB,GAAG,KAAK,CAAC,WAAW,CAAC;YACvC,mBAAmB,GAAG,KAAK,CAAC,YAAY,CAAC;YAEzC,yCAAyC;YACzC,WAAW,GAAG,sBAAsB,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YACxD,qBAAqB,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC;QAE5C,CAAC;aAAM,IAAI,KAAK,CAAC,IAAI,KAAK,OAAO,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YAC3D,4EAA4E;YAC5E,4EAA4E;YAC5E,gEAAgE;YAChE,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;YAEvD,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,IAAI,aAAa,EAAE,CAAC;gBACvC,6DAA6D;gBAC7D,gBAAgB,CAAC,KAAK,CAAC,CAAC;gBACxB,oBAAoB,CAAC,KAAK,CAAC,CAAC;gBAC5B,WAAW,GAAG,sBAAsB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YACxD,CAAC;iBAAM,CAAC;gBACN,sEAAsE;gBACtE,qBAAqB,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC;gBAE1C,2EAA2E;gBAC3E,IAAI,WAAW,CAAC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;oBAC/C,MAAM,QAAQ,GAAG,oBAAoB,CAAC,WAAW,CAAC,IAAI,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC;oBAC1E,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC;oBAClC,MAAM,gBAAgB,GAAG,WAAW,CAAC,WAAW,CAAC;oBACjD,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,YAAY,CAAC,CAAC;oBAE5D,WAAW,CAAC,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;oBAC/C,gBAAgB,CAAC,KAAK,CAAC,CAAC;oBAExB,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;oBAC3C,WAAW,GAAG,sBAAsB,CAAC,gBAAgB,GAAG,QAAQ,CAAC,CAAC;oBAClE,WAAW,CAAC,IAAI,GAAG,SAAS,CAAC;oBAC7B,WAAW,CAAC,YAAY,GAAG,iBAAiB,CAAC;gBAC/C,CAAC;YACH,CAAC;QAEH,CAAC;aAAM,CAAC;YACN,oCAAoC;YACpC,iEAAiE;YACjE,MAAM,YAAY,GAAG,sBAAsB,CAAC,KAAK,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC;YAC9E,IAAI,YAAY,EAAE,CAAC;gBACjB,iEAAiE;gBACjE,gBAAgB,CAAC,KAAK,CAAC,CAAC;gBACxB,eAAe,CAAC,KAAK,CAAC,CAAC;gBACvB,WAAW,GAAG,sBAAsB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YACxD,CAAC;iBAAM,CAAC;gBACN,qBAAqB;gBACrB,qBAAqB,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC;gBAE1C,0CAA0C;gBAC1C,IAAI,WAAW,CAAC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;oBAC/C,yCAAyC;oBACzC,MAAM,QAAQ,GAAG,oBAAoB,CAAC,WAAW,CAAC,IAAI,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC;oBAE1E,0BAA0B;oBAC1B,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC;oBAClC,MAAM,gBAAgB,GAAG,WAAW,CAAC,WAAW,CAAC;oBACjD,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,YAAY,CAAC,CAAC;oBAE5D,qDAAqD;oBACrD,WAAW,CAAC,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;oBAC/C,gBAAgB,CAAC,KAAK,CAAC,CAAC;oBAExB,0CAA0C;oBAC1C,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;oBAC3C,WAAW,GAAG,sBAAsB,CAAC,gBAAgB,GAAG,QAAQ,CAAC,CAAC;oBAClE,WAAW,CAAC,IAAI,GAAG,SAAS,CAAC;oBAC7B,WAAW,CAAC,YAAY,GAAG,iBAAiB,CAAC;oBAE7C,0DAA0D;oBAC1D,OAAO,WAAW,CAAC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;wBAClD,MAAM,aAAa,GAAG,oBAAoB,CACxC,WAAW,CAAC,IAAI,EAChB,MAAM,CAAC,SAAS,CACjB,CAAC;wBACF,MAAM,aAAa,GAAG,WAAW,CAAC,IAAI,CAAC;wBACvC,MAAM,gBAAgB,GAAG,WAAW,CAAC,WAAW,CAAC;wBACjD,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,YAAY,CAAC,CAAC;wBAE5D,WAAW,CAAC,IAAI,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;wBACzD,gBAAgB,CAAC,KAAK,CAAC,CAAC;wBAExB,MAAM,cAAc,GAAG,aAAa,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;wBAC1D,WAAW,GAAG,sBAAsB,CAAC,gBAAgB,GAAG,aAAa,CAAC,CAAC;wBACvE,WAAW,CAAC,IAAI,GAAG,cAAc,CAAC;wBAClC,WAAW,CAAC,YAAY,GAAG,iBAAiB,CAAC;oBAC/C,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,8BAA8B;IAC9B,gBAAgB,CAAC,KAAK,CAAC,CAAC;IAExB,qDAAqD;IACrD,MAAM,YAAY,GAAG,sBAAsB,CAAC,MAAM,EAAE,MAAM,CAAC,YAAY,IAAI,GAAG,CAAC,CAAC;IAChF,oDAAoD;IACpD,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC;IAClB,MAAM,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;IAE7B,8CAA8C;IAC9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAExB,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACnB,6CAA6C;YAC7C,KAAK,CAAC,mBAAmB,GAAG,CAAC,CAAC;YAC9B,KAAK,CAAC,eAAe,GAAG,CAAC,CAAC;YAC1B,SAAS;QACX,CAAC;QAED,2DAA2D;QAC3D,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;YACrC,KAAK,CAAC,mBAAmB,GAAG,WAAW,CAAC;QAC1C,CAAC;QAED,sDAAsD;QACtD,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;YACrD,KAAK,CAAC,eAAe,GAAG,WAAW,CAAC;QACtC,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E;;;;;;;;GAQG;AACH,MAAM,UAAU,qBAAqB,CAAC,MAA6B;IACjE,MAAM,EACJ,KAAK,EACL,aAAa,EACb,eAAe,EACf,oBAAoB,EACpB,cAAc,EACd,QAAQ,EACR,WAAW,EACX,oBAAoB,EACpB,MAAM,GAAG,uBAAuB,GACjC,GAAG,MAAM,CAAC;IAEX,6BAA6B;IAC7B,MAAM,QAAQ,GAAuB;QACnC,WAAW,EAAE,KAAK,CAAC,KAAK;QACxB,eAAe,EAAE,KAAK,CAAC,WAAW;QAClC,aAAa,EAAE,KAAK,CAAC,SAAS;KAC/B,CAAC;IAEF,kCAAkC;IAClC,IAAI,KAAK,CAAC,UAAU,KAAK,IAAI,EAAE,CAAC;QAC9B,QAAQ,CAAC,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC;IAC1C,CAAC;IACD,IAAI,KAAK,CAAC,SAAS,KAAK,IAAI,EAAE,CAAC;QAC7B,QAAQ,CAAC,UAAU,GAAG,KAAK,CAAC,SAAS,CAAC;IACxC,CAAC;IAED,OAAO;QACL,IAAI,EAAE,cAAc,CAAC,KAAK;QAC1B,WAAW,EAAE,UAAwB;QACrC,SAAS,EAAE,eAAe;QAC1B,gBAAgB,EAAE,oBAAoB;QACtC,YAAY,EAAE,aAAa;QAC3B,UAAU,EAAE,cAAc;QAC1B,SAAS,EAAE,QAAQ;QACnB,SAAS,EAAE,SAAS;QACpB,iBAAiB,EAAE,OAAO;QAC1B,iBAAiB,EAAE;YACjB,UAAU,EAAE,MAAM,CAAC,SAAS;YAC5B,eAAe,EAAE,MAAM,CAAC,cAAc;YACtC,cAAc,EAAE,MAAM,CAAC,YAAY;YACnC,QAAQ,EAAE,gBAAgB;YAC1B,WAAW,EAAE,KAAK,CAAC,KAAK;YACxB,YAAY,EAAE,WAAW;YACzB,eAAe,EAAE,KAAK,CAAC,WAAW;YAClC,aAAa,EAAE,KAAK,CAAC,SAAS;YAC9B,eAAe,EAAE,KAAK,CAAC,cAAc,IAAI,IAAI;YAC7C,YAAY,EAAE,KAAK,CAAC,WAAW,IAAI,IAAI;YACvC,SAAS,EAAE,KAAK,CAAC,QAAQ;YACzB,aAAa,EAAE,KAAK,CAAC,YAAY;SAClC;QACD,sBAAsB,EAAE,oBAAoB,IAAI,IAAI;QACpD,QAAQ;KACT,CAAC;AACJ,CAAC;AAID,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Heading Level Normalizer for Section-Aware Chunking
|
|
3
|
+
*
|
|
4
|
+
* Fixes inconsistent heading levels from Datalab OCR by detecting
|
|
5
|
+
* repeating heading patterns (e.g., "ARTICLE N") and normalizing
|
|
6
|
+
* their heading levels to the mode (most common) level within each group.
|
|
7
|
+
*
|
|
8
|
+
* @module services/chunking/heading-normalizer
|
|
9
|
+
*/
|
|
10
|
+
import { MarkdownBlock } from './markdown-parser.js';
|
|
11
|
+
/** Configuration for heading normalization */
|
|
12
|
+
export interface HeadingNormalizationConfig {
|
|
13
|
+
/** Enable heading normalization (default: false) */
|
|
14
|
+
enabled: boolean;
|
|
15
|
+
/** Minimum pattern group size to trigger normalization (default: 3) */
|
|
16
|
+
minPatternCount?: number;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Normalize heading levels in-place for consistent section hierarchy.
|
|
20
|
+
*
|
|
21
|
+
* Groups headings by structural patterns (ARTICLE N, Section N.N, etc.),
|
|
22
|
+
* then normalizes each group to use the mode heading level. This fixes
|
|
23
|
+
* Datalab OCR inconsistencies where identical structural headings get
|
|
24
|
+
* assigned different levels (e.g., ARTICLE 1 as H1 but ARTICLE 5 as H3).
|
|
25
|
+
*
|
|
26
|
+
* Only mutates `block.headingLevel` - never modifies `block.text`.
|
|
27
|
+
*
|
|
28
|
+
* @param blocks - Parsed markdown blocks (mutated in-place)
|
|
29
|
+
* @param config - Normalization configuration
|
|
30
|
+
* @returns The same blocks array (for chaining convenience)
|
|
31
|
+
*/
|
|
32
|
+
export declare function normalizeHeadingLevels(blocks: MarkdownBlock[], config: HeadingNormalizationConfig): MarkdownBlock[];
|
|
33
|
+
//# sourceMappingURL=heading-normalizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"heading-normalizer.d.ts","sourceRoot":"","sources":["../../../src/services/chunking/heading-normalizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAErD,8CAA8C;AAC9C,MAAM,WAAW,0BAA0B;IACzC,oDAAoD;IACpD,OAAO,EAAE,OAAO,CAAC;IACjB,uEAAuE;IACvE,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAwDD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,sBAAsB,CACpC,MAAM,EAAE,aAAa,EAAE,EACvB,MAAM,EAAE,0BAA0B,GACjC,aAAa,EAAE,CA6CjB"}
|