rag-lite-ts 2.1.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/{cli → cjs/cli}/indexer.js +73 -15
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/{cli.js → cjs/cli.js} +25 -6
- package/dist/{core → cjs/core}/binary-index-format.js +6 -3
- package/dist/{core → cjs/core}/db.d.ts +56 -0
- package/dist/{core → cjs/core}/db.js +105 -0
- package/dist/{core → cjs/core}/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/search-pipeline.js +1 -1
- package/dist/{core → cjs/core}/search.js +1 -1
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +304 -0
- package/dist/cjs/core/vector-index.d.ts +107 -0
- package/dist/cjs/core/vector-index.js +344 -0
- package/dist/{factories → cjs/factories}/ingestion-factory.js +3 -7
- package/dist/{factories → cjs/factories}/search-factory.js +11 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +23 -3
- package/dist/{index-manager.js → cjs/index-manager.js} +84 -15
- package/dist/{index.d.ts → cjs/index.d.ts} +2 -1
- package/dist/{index.js → cjs/index.js} +3 -1
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +529 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +548 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +294 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +269 -0
- package/dist/esm/core/db.js +1000 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +904 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +304 -0
- package/dist/esm/core/vector-index.d.ts +107 -0
- package/dist/esm/core/vector-index.js +344 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +473 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +355 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +136 -0
- package/dist/esm/index-manager.js +667 -0
- package/dist/esm/index.d.ts +76 -0
- package/dist/esm/index.js +112 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +39 -14
- package/dist/core/vector-index.d.ts +0 -72
- package/dist/core/vector-index.js +0 -331
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.js +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.d.ts +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
- /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Binary Index Format Module
|
|
3
|
+
*
|
|
4
|
+
* Provides efficient binary serialization for HNSW vector indices.
|
|
5
|
+
*
|
|
6
|
+
* Format Specification:
|
|
7
|
+
* - Header: 24 bytes (6 × uint32)
|
|
8
|
+
* - Vectors: N × (4 + D × 4) bytes
|
|
9
|
+
* - Little-endian encoding for cross-platform compatibility
|
|
10
|
+
* - 4-byte alignment for Float32Array zero-copy views
|
|
11
|
+
*
|
|
12
|
+
* Performance:
|
|
13
|
+
* - 3.66x smaller than JSON format
|
|
14
|
+
* - 3.5x faster loading
|
|
15
|
+
* - Zero-copy Float32Array views
|
|
16
|
+
*/
|
|
17
|
+
export interface BinaryIndexData {
|
|
18
|
+
dimensions: number;
|
|
19
|
+
maxElements: number;
|
|
20
|
+
M: number;
|
|
21
|
+
efConstruction: number;
|
|
22
|
+
seed: number;
|
|
23
|
+
currentSize: number;
|
|
24
|
+
vectors: Array<{
|
|
25
|
+
id: number;
|
|
26
|
+
vector: Float32Array;
|
|
27
|
+
}>;
|
|
28
|
+
hasContentTypeGroups?: boolean;
|
|
29
|
+
textVectors?: Array<{
|
|
30
|
+
id: number;
|
|
31
|
+
vector: Float32Array;
|
|
32
|
+
}>;
|
|
33
|
+
imageVectors?: Array<{
|
|
34
|
+
id: number;
|
|
35
|
+
vector: Float32Array;
|
|
36
|
+
}>;
|
|
37
|
+
}
|
|
38
|
+
export declare class BinaryIndexFormat {
|
|
39
|
+
/**
|
|
40
|
+
* Save index data to binary format (original format for backward compatibility)
|
|
41
|
+
*
|
|
42
|
+
* File structure:
|
|
43
|
+
* - Header (24 bytes): dimensions, maxElements, M, efConstruction, seed, currentSize
|
|
44
|
+
* - Vectors: For each vector: id (4 bytes) + vector data (dimensions × 4 bytes)
|
|
45
|
+
*
|
|
46
|
+
* @param indexPath Path to save the binary index file
|
|
47
|
+
* @param data Index data to serialize
|
|
48
|
+
*/
|
|
49
|
+
static save(indexPath: string, data: BinaryIndexData): Promise<void>;
|
|
50
|
+
/**
|
|
51
|
+
* Save index data to grouped binary format
|
|
52
|
+
*
|
|
53
|
+
* File structure:
|
|
54
|
+
* - Extended Header (40 bytes):
|
|
55
|
+
* - Original 6 fields (24 bytes)
|
|
56
|
+
* - hasGroups flag (4 bytes)
|
|
57
|
+
* - textOffset (4 bytes)
|
|
58
|
+
* - textCount (4 bytes)
|
|
59
|
+
* - imageOffset (4 bytes)
|
|
60
|
+
* - imageCount (4 bytes)
|
|
61
|
+
* - Data section: [text vectors...][image vectors...]
|
|
62
|
+
*
|
|
63
|
+
* @param indexPath Path to save the binary index file
|
|
64
|
+
* @param data Index data to serialize
|
|
65
|
+
*/
|
|
66
|
+
static saveGrouped(indexPath: string, data: BinaryIndexData): Promise<void>;
|
|
67
|
+
/**
|
|
68
|
+
* Load index data from binary format (supports both original and grouped formats)
|
|
69
|
+
*
|
|
70
|
+
* Uses zero-copy Float32Array views for efficient loading.
|
|
71
|
+
* Copies the views to ensure data persistence after buffer lifecycle.
|
|
72
|
+
*
|
|
73
|
+
* @param indexPath Path to the binary index file
|
|
74
|
+
* @returns Deserialized index data
|
|
75
|
+
*/
|
|
76
|
+
static load(indexPath: string): Promise<BinaryIndexData>;
|
|
77
|
+
}
|
|
78
|
+
//# sourceMappingURL=binary-index-format.d.ts.map
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Binary Index Format Module
|
|
3
|
+
*
|
|
4
|
+
* Provides efficient binary serialization for HNSW vector indices.
|
|
5
|
+
*
|
|
6
|
+
* Format Specification:
|
|
7
|
+
* - Header: 24 bytes (6 × uint32)
|
|
8
|
+
* - Vectors: N × (4 + D × 4) bytes
|
|
9
|
+
* - Little-endian encoding for cross-platform compatibility
|
|
10
|
+
* - 4-byte alignment for Float32Array zero-copy views
|
|
11
|
+
*
|
|
12
|
+
* Performance:
|
|
13
|
+
* - 3.66x smaller than JSON format
|
|
14
|
+
* - 3.5x faster loading
|
|
15
|
+
* - Zero-copy Float32Array views
|
|
16
|
+
*/
|
|
17
|
+
import { readFileSync, writeFileSync } from 'fs';
|
|
18
|
+
export class BinaryIndexFormat {
|
|
19
|
+
/**
|
|
20
|
+
* Save index data to binary format (original format for backward compatibility)
|
|
21
|
+
*
|
|
22
|
+
* File structure:
|
|
23
|
+
* - Header (24 bytes): dimensions, maxElements, M, efConstruction, seed, currentSize
|
|
24
|
+
* - Vectors: For each vector: id (4 bytes) + vector data (dimensions × 4 bytes)
|
|
25
|
+
*
|
|
26
|
+
* @param indexPath Path to save the binary index file
|
|
27
|
+
* @param data Index data to serialize
|
|
28
|
+
*/
|
|
29
|
+
static async save(indexPath, data) {
|
|
30
|
+
// Use actual vector count to ensure accurate file size
|
|
31
|
+
const actualVectorCount = data.vectors.length;
|
|
32
|
+
// Calculate total size based on actual vectors
|
|
33
|
+
const headerSize = 24; // 6 uint32 fields
|
|
34
|
+
const vectorSize = 4 + (data.dimensions * 4); // id + vector
|
|
35
|
+
const totalSize = headerSize + (actualVectorCount * vectorSize);
|
|
36
|
+
const buffer = new ArrayBuffer(totalSize);
|
|
37
|
+
const view = new DataView(buffer);
|
|
38
|
+
let offset = 0;
|
|
39
|
+
// Write header (24 bytes, all little-endian)
|
|
40
|
+
view.setUint32(offset, data.dimensions, true);
|
|
41
|
+
offset += 4;
|
|
42
|
+
view.setUint32(offset, data.maxElements, true);
|
|
43
|
+
offset += 4;
|
|
44
|
+
view.setUint32(offset, data.M, true);
|
|
45
|
+
offset += 4;
|
|
46
|
+
view.setUint32(offset, data.efConstruction, true);
|
|
47
|
+
offset += 4;
|
|
48
|
+
view.setUint32(offset, data.seed, true);
|
|
49
|
+
offset += 4;
|
|
50
|
+
// Write actual vector count in header
|
|
51
|
+
view.setUint32(offset, actualVectorCount, true);
|
|
52
|
+
offset += 4;
|
|
53
|
+
// Write vectors
|
|
54
|
+
for (const item of data.vectors) {
|
|
55
|
+
// Ensure 4-byte alignment (should always be true with our format)
|
|
56
|
+
if (offset % 4 !== 0) {
|
|
57
|
+
throw new Error(`Offset ${offset} is not 4-byte aligned`);
|
|
58
|
+
}
|
|
59
|
+
// Write vector ID
|
|
60
|
+
view.setUint32(offset, item.id, true);
|
|
61
|
+
offset += 4;
|
|
62
|
+
// Write vector data
|
|
63
|
+
for (let i = 0; i < item.vector.length; i++) {
|
|
64
|
+
view.setFloat32(offset, item.vector[i], true);
|
|
65
|
+
offset += 4;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
// Write to file
|
|
69
|
+
writeFileSync(indexPath, Buffer.from(buffer));
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Save index data to grouped binary format
|
|
73
|
+
*
|
|
74
|
+
* File structure:
|
|
75
|
+
* - Extended Header (40 bytes):
|
|
76
|
+
* - Original 6 fields (24 bytes)
|
|
77
|
+
* - hasGroups flag (4 bytes)
|
|
78
|
+
* - textOffset (4 bytes)
|
|
79
|
+
* - textCount (4 bytes)
|
|
80
|
+
* - imageOffset (4 bytes)
|
|
81
|
+
* - imageCount (4 bytes)
|
|
82
|
+
* - Data section: [text vectors...][image vectors...]
|
|
83
|
+
*
|
|
84
|
+
* @param indexPath Path to save the binary index file
|
|
85
|
+
* @param data Index data to serialize
|
|
86
|
+
*/
|
|
87
|
+
static async saveGrouped(indexPath, data) {
|
|
88
|
+
if (!data.hasContentTypeGroups || !data.textVectors || !data.imageVectors) {
|
|
89
|
+
// Fallback to original format
|
|
90
|
+
return this.save(indexPath, data);
|
|
91
|
+
}
|
|
92
|
+
const headerSize = 44; // Extended header: 24 + 20 bytes (hasGroups + textOffset + textCount + imageOffset + imageCount)
|
|
93
|
+
const vectorSize = 4 + (data.dimensions * 4); // id + vector
|
|
94
|
+
// Calculate offsets and total size
|
|
95
|
+
const textOffset = headerSize;
|
|
96
|
+
const imageOffset = textOffset + (data.textVectors.length * vectorSize);
|
|
97
|
+
const totalSize = imageOffset + (data.imageVectors.length * vectorSize);
|
|
98
|
+
const buffer = new ArrayBuffer(totalSize);
|
|
99
|
+
const view = new DataView(buffer);
|
|
100
|
+
let offset = 0;
|
|
101
|
+
// Write extended header (40 bytes, all little-endian)
|
|
102
|
+
if (offset + 40 > buffer.byteLength) {
|
|
103
|
+
throw new Error(`Header write would exceed buffer bounds: offset=${offset}, headerSize=40, bufferSize=${buffer.byteLength}`);
|
|
104
|
+
}
|
|
105
|
+
view.setUint32(offset, data.dimensions, true);
|
|
106
|
+
offset += 4;
|
|
107
|
+
view.setUint32(offset, data.maxElements, true);
|
|
108
|
+
offset += 4;
|
|
109
|
+
view.setUint32(offset, data.M, true);
|
|
110
|
+
offset += 4;
|
|
111
|
+
view.setUint32(offset, data.efConstruction, true);
|
|
112
|
+
offset += 4;
|
|
113
|
+
view.setUint32(offset, data.seed, true);
|
|
114
|
+
offset += 4;
|
|
115
|
+
view.setUint32(offset, data.currentSize, true);
|
|
116
|
+
offset += 4;
|
|
117
|
+
// Extended fields
|
|
118
|
+
view.setUint32(offset, 1, true);
|
|
119
|
+
offset += 4; // hasGroups = 1
|
|
120
|
+
view.setUint32(offset, textOffset, true);
|
|
121
|
+
offset += 4;
|
|
122
|
+
view.setUint32(offset, data.textVectors.length, true);
|
|
123
|
+
offset += 4;
|
|
124
|
+
view.setUint32(offset, imageOffset, true);
|
|
125
|
+
offset += 4;
|
|
126
|
+
view.setUint32(offset, data.imageVectors.length, true);
|
|
127
|
+
offset += 4;
|
|
128
|
+
// Write text vectors
|
|
129
|
+
for (const item of data.textVectors) {
|
|
130
|
+
// Ensure 4-byte alignment
|
|
131
|
+
if (offset % 4 !== 0) {
|
|
132
|
+
throw new Error(`Offset ${offset} is not 4-byte aligned`);
|
|
133
|
+
}
|
|
134
|
+
// Check bounds before writing
|
|
135
|
+
if (offset + 4 > buffer.byteLength) {
|
|
136
|
+
throw new Error(`ID write would exceed buffer bounds: offset=${offset}, bufferSize=${buffer.byteLength}`);
|
|
137
|
+
}
|
|
138
|
+
// Write vector ID
|
|
139
|
+
view.setUint32(offset, item.id, true);
|
|
140
|
+
offset += 4;
|
|
141
|
+
// Check bounds for vector data
|
|
142
|
+
const vectorDataSize = item.vector.length * 4;
|
|
143
|
+
if (offset + vectorDataSize > buffer.byteLength) {
|
|
144
|
+
throw new Error(`Vector data write would exceed buffer bounds: offset=${offset}, dataSize=${vectorDataSize}, bufferSize=${buffer.byteLength}`);
|
|
145
|
+
}
|
|
146
|
+
// Write vector data
|
|
147
|
+
for (let i = 0; i < item.vector.length; i++) {
|
|
148
|
+
view.setFloat32(offset, item.vector[i], true);
|
|
149
|
+
offset += 4;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
// Write image vectors
|
|
153
|
+
for (const item of data.imageVectors) {
|
|
154
|
+
// Ensure 4-byte alignment
|
|
155
|
+
if (offset % 4 !== 0) {
|
|
156
|
+
throw new Error(`Offset ${offset} is not 4-byte aligned`);
|
|
157
|
+
}
|
|
158
|
+
// Check bounds before writing
|
|
159
|
+
if (offset + 4 > buffer.byteLength) {
|
|
160
|
+
throw new Error(`ID write would exceed buffer bounds: offset=${offset}, bufferSize=${buffer.byteLength}`);
|
|
161
|
+
}
|
|
162
|
+
// Write vector ID
|
|
163
|
+
view.setUint32(offset, item.id, true);
|
|
164
|
+
offset += 4;
|
|
165
|
+
// Check bounds for vector data
|
|
166
|
+
const vectorDataSize = item.vector.length * 4;
|
|
167
|
+
if (offset + vectorDataSize > buffer.byteLength) {
|
|
168
|
+
throw new Error(`Vector data write would exceed buffer bounds: offset=${offset}, dataSize=${vectorDataSize}, bufferSize=${buffer.byteLength}`);
|
|
169
|
+
}
|
|
170
|
+
// Write vector data
|
|
171
|
+
for (let i = 0; i < item.vector.length; i++) {
|
|
172
|
+
view.setFloat32(offset, item.vector[i], true);
|
|
173
|
+
offset += 4;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
// Write to file
|
|
177
|
+
writeFileSync(indexPath, Buffer.from(buffer));
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Load index data from binary format (supports both original and grouped formats)
|
|
181
|
+
*
|
|
182
|
+
* Uses zero-copy Float32Array views for efficient loading.
|
|
183
|
+
* Copies the views to ensure data persistence after buffer lifecycle.
|
|
184
|
+
*
|
|
185
|
+
* @param indexPath Path to the binary index file
|
|
186
|
+
* @returns Deserialized index data
|
|
187
|
+
*/
|
|
188
|
+
static async load(indexPath) {
|
|
189
|
+
const buffer = readFileSync(indexPath);
|
|
190
|
+
const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
|
|
191
|
+
let offset = 0;
|
|
192
|
+
// Read basic header (24 bytes, all little-endian)
|
|
193
|
+
const dimensions = view.getUint32(offset, true);
|
|
194
|
+
offset += 4;
|
|
195
|
+
const maxElements = view.getUint32(offset, true);
|
|
196
|
+
offset += 4;
|
|
197
|
+
const M = view.getUint32(offset, true);
|
|
198
|
+
offset += 4;
|
|
199
|
+
const efConstruction = view.getUint32(offset, true);
|
|
200
|
+
offset += 4;
|
|
201
|
+
const seed = view.getUint32(offset, true);
|
|
202
|
+
offset += 4;
|
|
203
|
+
const currentSize = view.getUint32(offset, true);
|
|
204
|
+
offset += 4;
|
|
205
|
+
// Check if this is the extended grouped format (40+ bytes header)
|
|
206
|
+
const hasGroups = buffer.byteLength >= 40 ? view.getUint32(offset, true) : 0;
|
|
207
|
+
if (hasGroups === 1 && buffer.byteLength >= 40) {
|
|
208
|
+
// Load grouped format
|
|
209
|
+
const textOffset = view.getUint32(offset + 4, true);
|
|
210
|
+
const textCount = view.getUint32(offset + 8, true);
|
|
211
|
+
const imageOffset = view.getUint32(offset + 12, true);
|
|
212
|
+
const imageCount = view.getUint32(offset + 16, true);
|
|
213
|
+
// Load text vectors
|
|
214
|
+
const textVectors = [];
|
|
215
|
+
offset = textOffset;
|
|
216
|
+
for (let i = 0; i < textCount; i++) {
|
|
217
|
+
// Ensure 4-byte alignment
|
|
218
|
+
if (offset % 4 !== 0) {
|
|
219
|
+
throw new Error(`Offset ${offset} is not 4-byte aligned`);
|
|
220
|
+
}
|
|
221
|
+
// Read vector ID
|
|
222
|
+
const id = view.getUint32(offset, true);
|
|
223
|
+
offset += 4;
|
|
224
|
+
// Zero-copy Float32Array view
|
|
225
|
+
const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
|
|
226
|
+
// Copy to avoid buffer lifecycle issues
|
|
227
|
+
const vector = new Float32Array(vectorView);
|
|
228
|
+
offset += dimensions * 4;
|
|
229
|
+
textVectors.push({ id, vector });
|
|
230
|
+
}
|
|
231
|
+
// Load image vectors
|
|
232
|
+
const imageVectors = [];
|
|
233
|
+
offset = imageOffset;
|
|
234
|
+
for (let i = 0; i < imageCount; i++) {
|
|
235
|
+
// Ensure 4-byte alignment
|
|
236
|
+
if (offset % 4 !== 0) {
|
|
237
|
+
throw new Error(`Offset ${offset} is not 4-byte aligned`);
|
|
238
|
+
}
|
|
239
|
+
// Read vector ID
|
|
240
|
+
const id = view.getUint32(offset, true);
|
|
241
|
+
offset += 4;
|
|
242
|
+
// Zero-copy Float32Array view
|
|
243
|
+
const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
|
|
244
|
+
// Copy to avoid buffer lifecycle issues
|
|
245
|
+
const vector = new Float32Array(vectorView);
|
|
246
|
+
offset += dimensions * 4;
|
|
247
|
+
imageVectors.push({ id, vector });
|
|
248
|
+
}
|
|
249
|
+
// Combine all vectors for backward compatibility
|
|
250
|
+
const allVectors = [...textVectors, ...imageVectors];
|
|
251
|
+
return {
|
|
252
|
+
dimensions,
|
|
253
|
+
maxElements,
|
|
254
|
+
M,
|
|
255
|
+
efConstruction,
|
|
256
|
+
seed,
|
|
257
|
+
currentSize,
|
|
258
|
+
vectors: allVectors,
|
|
259
|
+
hasContentTypeGroups: true,
|
|
260
|
+
textVectors,
|
|
261
|
+
imageVectors
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
else {
|
|
265
|
+
// Load original format
|
|
266
|
+
const vectors = [];
|
|
267
|
+
for (let i = 0; i < currentSize; i++) {
|
|
268
|
+
// Ensure 4-byte alignment (should always be true with our format)
|
|
269
|
+
if (offset % 4 !== 0) {
|
|
270
|
+
throw new Error(`Offset ${offset} is not 4-byte aligned`);
|
|
271
|
+
}
|
|
272
|
+
// Read vector ID
|
|
273
|
+
const id = view.getUint32(offset, true);
|
|
274
|
+
offset += 4;
|
|
275
|
+
// Zero-copy Float32Array view (fast!)
|
|
276
|
+
const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
|
|
277
|
+
// Copy to avoid buffer lifecycle issues
|
|
278
|
+
const vector = new Float32Array(vectorView);
|
|
279
|
+
offset += dimensions * 4;
|
|
280
|
+
vectors.push({ id, vector });
|
|
281
|
+
}
|
|
282
|
+
return {
|
|
283
|
+
dimensions,
|
|
284
|
+
maxElements,
|
|
285
|
+
M,
|
|
286
|
+
efConstruction,
|
|
287
|
+
seed,
|
|
288
|
+
currentSize,
|
|
289
|
+
vectors
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
//# sourceMappingURL=binary-index-format.js.map
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Configuration for chunking behavior
|
|
7
|
+
*/
|
|
8
|
+
export interface ChunkConfig {
|
|
9
|
+
/** Target chunk size in tokens (200-300 recommended) */
|
|
10
|
+
chunkSize: number;
|
|
11
|
+
/** Overlap between chunks in tokens (50 recommended) */
|
|
12
|
+
chunkOverlap: number;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Generic document interface that can represent different content types
|
|
16
|
+
*/
|
|
17
|
+
export interface GenericDocument {
|
|
18
|
+
/** Source path or identifier */
|
|
19
|
+
source: string;
|
|
20
|
+
/** Document title */
|
|
21
|
+
title: string;
|
|
22
|
+
/** Content (text, image path, etc.) */
|
|
23
|
+
content: string;
|
|
24
|
+
/** Content type identifier (text, image, etc.) */
|
|
25
|
+
contentType: string;
|
|
26
|
+
/** Optional metadata for the document */
|
|
27
|
+
metadata?: Record<string, any>;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Generic chunk interface that can represent different content types
|
|
31
|
+
*/
|
|
32
|
+
export interface GenericChunk {
|
|
33
|
+
/** The content of the chunk (text, image path, etc.) */
|
|
34
|
+
content: string;
|
|
35
|
+
/** Content type identifier (text, image, etc.) */
|
|
36
|
+
contentType: string;
|
|
37
|
+
/** Index of this chunk within the document */
|
|
38
|
+
chunkIndex: number;
|
|
39
|
+
/** Optional metadata for the chunk */
|
|
40
|
+
metadata?: Record<string, any>;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Strategy interface for chunking different content types
|
|
44
|
+
*/
|
|
45
|
+
export interface ChunkingStrategy {
|
|
46
|
+
/**
|
|
47
|
+
* Check if this strategy applies to the given content type
|
|
48
|
+
*/
|
|
49
|
+
appliesTo(contentType: string): boolean;
|
|
50
|
+
/**
|
|
51
|
+
* Chunk a document using this strategy
|
|
52
|
+
*/
|
|
53
|
+
chunk(document: GenericDocument, config: ChunkConfig): Promise<GenericChunk[]>;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Registry for chunking strategies
|
|
57
|
+
*/
|
|
58
|
+
export declare class ChunkingStrategyRegistry {
|
|
59
|
+
private strategies;
|
|
60
|
+
/**
|
|
61
|
+
* Register a chunking strategy
|
|
62
|
+
*/
|
|
63
|
+
register(strategy: ChunkingStrategy): void;
|
|
64
|
+
/**
|
|
65
|
+
* Find the appropriate strategy for a content type
|
|
66
|
+
*/
|
|
67
|
+
findStrategy(contentType: string): ChunkingStrategy | undefined;
|
|
68
|
+
/**
|
|
69
|
+
* Get all registered strategies
|
|
70
|
+
*/
|
|
71
|
+
getStrategies(): ChunkingStrategy[];
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Default chunking configuration
|
|
75
|
+
*/
|
|
76
|
+
export declare const DEFAULT_CHUNK_CONFIG: ChunkConfig;
|
|
77
|
+
/**
|
|
78
|
+
* Global chunking strategy registry
|
|
79
|
+
*/
|
|
80
|
+
export declare const chunkingRegistry: ChunkingStrategyRegistry;
|
|
81
|
+
/**
|
|
82
|
+
* Generic chunking function that uses registered strategies
|
|
83
|
+
*/
|
|
84
|
+
export declare function chunkGenericDocument(document: GenericDocument, config?: ChunkConfig): Promise<GenericChunk[]>;
|
|
85
|
+
/**
|
|
86
|
+
* Document interface for text chunking
|
|
87
|
+
*/
|
|
88
|
+
export interface Document {
|
|
89
|
+
/** Source path or identifier */
|
|
90
|
+
source: string;
|
|
91
|
+
/** Document title */
|
|
92
|
+
title: string;
|
|
93
|
+
/** Full text content */
|
|
94
|
+
content: string;
|
|
95
|
+
/** Optional metadata */
|
|
96
|
+
metadata?: Record<string, any>;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Chunk interface for text chunking results
|
|
100
|
+
*/
|
|
101
|
+
export interface Chunk {
|
|
102
|
+
/** The text content of the chunk */
|
|
103
|
+
text: string;
|
|
104
|
+
/** Index of this chunk within the document */
|
|
105
|
+
chunkIndex: number;
|
|
106
|
+
/** Number of tokens in this chunk */
|
|
107
|
+
tokenCount: number;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Text document chunking function
|
|
111
|
+
* Uses the text chunking strategy from the text implementation layer
|
|
112
|
+
*/
|
|
113
|
+
export declare function chunkDocument(document: Document, config?: ChunkConfig): Promise<Chunk[]>;
|
|
114
|
+
/**
|
|
115
|
+
* Register the text chunking strategy with the global registry
|
|
116
|
+
* This should be called during application initialization
|
|
117
|
+
*/
|
|
118
|
+
export declare function registerTextChunkingStrategy(): Promise<void>;
|
|
119
|
+
//# sourceMappingURL=chunker.d.ts.map
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Registry for chunking strategies
|
|
7
|
+
*/
|
|
8
|
+
export class ChunkingStrategyRegistry {
|
|
9
|
+
strategies = [];
|
|
10
|
+
/**
|
|
11
|
+
* Register a chunking strategy
|
|
12
|
+
*/
|
|
13
|
+
register(strategy) {
|
|
14
|
+
this.strategies.push(strategy);
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Find the appropriate strategy for a content type
|
|
18
|
+
*/
|
|
19
|
+
findStrategy(contentType) {
|
|
20
|
+
return this.strategies.find(strategy => strategy.appliesTo(contentType));
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Get all registered strategies
|
|
24
|
+
*/
|
|
25
|
+
getStrategies() {
|
|
26
|
+
return [...this.strategies];
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Default chunking configuration
|
|
31
|
+
*/
|
|
32
|
+
export const DEFAULT_CHUNK_CONFIG = {
|
|
33
|
+
chunkSize: 250, // Target 200-300 tokens
|
|
34
|
+
chunkOverlap: 50
|
|
35
|
+
};
|
|
36
|
+
/**
|
|
37
|
+
* Global chunking strategy registry
|
|
38
|
+
*/
|
|
39
|
+
export const chunkingRegistry = new ChunkingStrategyRegistry();
|
|
40
|
+
/**
|
|
41
|
+
* Generic chunking function that uses registered strategies
|
|
42
|
+
*/
|
|
43
|
+
export async function chunkGenericDocument(document, config = DEFAULT_CHUNK_CONFIG) {
|
|
44
|
+
const strategy = chunkingRegistry.findStrategy(document.contentType);
|
|
45
|
+
if (!strategy) {
|
|
46
|
+
throw new Error(`No chunking strategy found for content type: ${document.contentType}`);
|
|
47
|
+
}
|
|
48
|
+
return strategy.chunk(document, config);
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Text document chunking function
|
|
52
|
+
* Uses the text chunking strategy from the text implementation layer
|
|
53
|
+
*/
|
|
54
|
+
export async function chunkDocument(document, config = DEFAULT_CHUNK_CONFIG) {
|
|
55
|
+
// Import the text chunker implementation dynamically to avoid circular dependencies
|
|
56
|
+
const { chunkDocument: textChunkDocument } = await import('../text/chunker.js');
|
|
57
|
+
return textChunkDocument(document, config);
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Register the text chunking strategy with the global registry
|
|
61
|
+
* This should be called during application initialization
|
|
62
|
+
*/
|
|
63
|
+
export async function registerTextChunkingStrategy() {
|
|
64
|
+
const { TextChunkingStrategy } = await import('../text/chunker.js');
|
|
65
|
+
const textStrategy = new TextChunkingStrategy();
|
|
66
|
+
chunkingRegistry.register(textStrategy);
|
|
67
|
+
}
|
|
68
|
+
// Auto-register the text strategy when this module is loaded
|
|
69
|
+
// This ensures text chunking works out of the box
|
|
70
|
+
registerTextChunkingStrategy().catch(error => {
|
|
71
|
+
console.warn('Failed to register text chunking strategy:', error);
|
|
72
|
+
});
|
|
73
|
+
//# sourceMappingURL=chunker.js.map
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI Database Utilities - Database access helpers for CLI commands
|
|
3
|
+
* Provides database locking detection and retry mechanisms for CLI operations
|
|
4
|
+
* Prevents conflicts between CLI commands and long-running processes like MCP server
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* CLI-specific database access options
|
|
8
|
+
*/
|
|
9
|
+
export interface CLIDatabaseOptions {
|
|
10
|
+
/** Maximum time to wait for database access (ms) */
|
|
11
|
+
maxWaitMs?: number;
|
|
12
|
+
/** Retry interval (ms) */
|
|
13
|
+
retryIntervalMs?: number;
|
|
14
|
+
/** Show progress messages to user */
|
|
15
|
+
showProgress?: boolean;
|
|
16
|
+
/** Command name for better error messages */
|
|
17
|
+
commandName?: string;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Wait for database to become available for CLI operations
|
|
21
|
+
* Provides user-friendly progress messages and error handling
|
|
22
|
+
*/
|
|
23
|
+
export declare function waitForCLIDatabaseAccess(dbPath: string, options?: CLIDatabaseOptions): Promise<void>;
|
|
24
|
+
/**
|
|
25
|
+
* Execute a CLI operation with database access protection
|
|
26
|
+
* Automatically handles database locking and provides user feedback
|
|
27
|
+
*/
|
|
28
|
+
export declare function withCLIDatabaseAccess<T>(dbPath: string, operation: () => Promise<T>, options?: CLIDatabaseOptions): Promise<T>;
|
|
29
|
+
/**
|
|
30
|
+
* Check if database is currently busy (non-blocking)
|
|
31
|
+
* Useful for showing warnings or status information
|
|
32
|
+
*/
|
|
33
|
+
export declare function isDatabaseBusy(dbPath: string): Promise<{
|
|
34
|
+
isBusy: boolean;
|
|
35
|
+
reason?: string;
|
|
36
|
+
suggestions?: string[];
|
|
37
|
+
}>;
|
|
38
|
+
/**
|
|
39
|
+
* Show database status information for debugging
|
|
40
|
+
* Useful for troubleshooting CLI issues
|
|
41
|
+
*/
|
|
42
|
+
export declare function showDatabaseStatus(dbPath: string): Promise<void>;
|
|
43
|
+
/**
|
|
44
|
+
* Force cleanup of database connections (emergency use only)
|
|
45
|
+
* Use with caution - only for recovery from stuck states
|
|
46
|
+
*/
|
|
47
|
+
export declare function forceCleanupDatabase(dbPath: string): Promise<void>;
|
|
48
|
+
/**
|
|
49
|
+
* Graceful shutdown helper for CLI commands
|
|
50
|
+
* Ensures proper cleanup when CLI commands are interrupted
|
|
51
|
+
*/
|
|
52
|
+
export declare function setupCLICleanup(dbPath?: string): void;
|
|
53
|
+
//# sourceMappingURL=cli-database-utils.d.ts.map
|