rag-lite-ts 2.1.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/{cli → cjs/cli}/indexer.js +73 -15
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/{cli.js → cjs/cli.js} +25 -6
- package/dist/{core → cjs/core}/binary-index-format.js +6 -3
- package/dist/{core → cjs/core}/db.d.ts +56 -0
- package/dist/{core → cjs/core}/db.js +105 -0
- package/dist/{core → cjs/core}/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/search-pipeline.js +1 -1
- package/dist/{core → cjs/core}/search.js +1 -1
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +304 -0
- package/dist/cjs/core/vector-index.d.ts +107 -0
- package/dist/cjs/core/vector-index.js +344 -0
- package/dist/{factories → cjs/factories}/ingestion-factory.js +3 -7
- package/dist/{factories → cjs/factories}/search-factory.js +11 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +23 -3
- package/dist/{index-manager.js → cjs/index-manager.js} +84 -15
- package/dist/{index.d.ts → cjs/index.d.ts} +2 -1
- package/dist/{index.js → cjs/index.js} +3 -1
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +529 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +548 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +294 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +269 -0
- package/dist/esm/core/db.js +1000 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +904 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +304 -0
- package/dist/esm/core/vector-index.d.ts +107 -0
- package/dist/esm/core/vector-index.js +344 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +473 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +355 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +136 -0
- package/dist/esm/index-manager.js +667 -0
- package/dist/esm/index.d.ts +76 -0
- package/dist/esm/index.js +112 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +39 -14
- package/dist/core/vector-index.d.ts +0 -72
- package/dist/core/vector-index.js +0 -331
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.js +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.d.ts +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
- /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,408 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*
|
|
5
|
+
* This module defines the core interfaces for dependency injection in the search engine.
|
|
6
|
+
* These interfaces enable:
|
|
7
|
+
* 1. Different embedding implementations (text-only, multimodal, etc.)
|
|
8
|
+
* 2. Different reranking strategies (cross-encoder, neural, etc.)
|
|
9
|
+
* 3. Support for multiple content types (text, image, etc.)
|
|
10
|
+
* 4. Different embedding dimensions (384, 512, 768, etc.)
|
|
11
|
+
*
|
|
12
|
+
* DEPENDENCY INJECTION PATTERNS:
|
|
13
|
+
*
|
|
14
|
+
* 1. Direct Function Injection (Advanced Users):
|
|
15
|
+
* ```typescript
|
|
16
|
+
* // Text-only implementation
|
|
17
|
+
* const textEmbedFn: EmbedFunction = async (query) => textEmbedder.embedSingle(query);
|
|
18
|
+
* const textRerankFn: RerankFunction = async (query, results) => textReranker.rerank(query, results);
|
|
19
|
+
* const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
|
|
20
|
+
*
|
|
21
|
+
* // Custom implementation
|
|
22
|
+
* const customEmbedFn: EmbedFunction = async (query) => ({
|
|
23
|
+
* embedding_id: generateId(),
|
|
24
|
+
* vector: await myCustomModel.embed(query)
|
|
25
|
+
* });
|
|
26
|
+
* const search = new SearchEngine(customEmbedFn, indexManager, db);
|
|
27
|
+
* ```
|
|
28
|
+
*
|
|
29
|
+
* 2. Factory Pattern (Recommended for Common Use Cases):
|
|
30
|
+
* ```typescript
|
|
31
|
+
* // Using factory for convenience
|
|
32
|
+
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite', {
|
|
33
|
+
* embeddingModel: 'all-MiniLM-L6-v2',
|
|
34
|
+
* enableReranking: true
|
|
35
|
+
* });
|
|
36
|
+
*
|
|
37
|
+
* // Factory with custom configuration
|
|
38
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
39
|
+
* chunkSize: 300,
|
|
40
|
+
* chunkOverlap: 50
|
|
41
|
+
* });
|
|
42
|
+
* ```
|
|
43
|
+
*
|
|
44
|
+
* 3. Interface-Based Implementation (Plugin Architecture):
|
|
45
|
+
* ```typescript
|
|
46
|
+
* // Implement interfaces for custom behavior
|
|
47
|
+
* class CustomEmbeddingInterface implements EmbeddingQueryInterface {
|
|
48
|
+
* async embedQuery(query: string): Promise<EmbeddingResult> {
|
|
49
|
+
* return { embedding_id: generateId(), vector: await this.model.embed(query) };
|
|
50
|
+
* }
|
|
51
|
+
* supportedContentTypes = ['text', 'code'];
|
|
52
|
+
* embeddingDimensions = 384;
|
|
53
|
+
* modelIdentifier = 'custom-model-v1';
|
|
54
|
+
* }
|
|
55
|
+
*
|
|
56
|
+
* const customInterface = new CustomEmbeddingInterface();
|
|
57
|
+
* const embedFn = customInterface.embedQuery.bind(customInterface);
|
|
58
|
+
* const search = new SearchEngine(embedFn, indexManager, db);
|
|
59
|
+
* ```
|
|
60
|
+
*
|
|
61
|
+
* 4. Multimodal Implementation (Future):
|
|
62
|
+
* ```typescript
|
|
63
|
+
* // Multimodal embedding function
|
|
64
|
+
* const multimodalEmbedFn: EmbedFunction = async (query, contentType) => {
|
|
65
|
+
* if (contentType === 'image') return clipEmbedder.embedImage(query);
|
|
66
|
+
* return clipEmbedder.embedText(query);
|
|
67
|
+
* };
|
|
68
|
+
*
|
|
69
|
+
* // Multimodal reranking function
|
|
70
|
+
* const multimodalRerankFn: RerankFunction = async (query, results, contentType) => {
|
|
71
|
+
* return multimodalReranker.rerank(query, results, contentType);
|
|
72
|
+
* };
|
|
73
|
+
*
|
|
74
|
+
* const search = new SearchEngine(multimodalEmbedFn, indexManager, db, multimodalRerankFn);
|
|
75
|
+
* ```
|
|
76
|
+
*/
|
|
77
|
+
import type { SearchResult, EmbeddingResult } from './types.js';
|
|
78
|
+
/**
|
|
79
|
+
* Core embedding function interface for dependency injection
|
|
80
|
+
* Supports different content types and embedding dimensions (384, 512, 768, etc.)
|
|
81
|
+
*/
|
|
82
|
+
export type EmbedFunction = (query: string, contentType?: string) => Promise<EmbeddingResult>;
|
|
83
|
+
/**
|
|
84
|
+
* Core reranking function interface for dependency injection
|
|
85
|
+
* Supports different content types and query-result pairs
|
|
86
|
+
*/
|
|
87
|
+
export type RerankFunction = (query: string, results: SearchResult[], contentType?: string) => Promise<SearchResult[]>;
|
|
88
|
+
/**
|
|
89
|
+
* Interface for embedding query operations
|
|
90
|
+
* Enables dependency injection of different embedding implementations
|
|
91
|
+
*
|
|
92
|
+
* This interface provides a standardized way to interact with different
|
|
93
|
+
* embedding models while maintaining compatibility checking and metadata.
|
|
94
|
+
* Implementations can be text-only, multimodal, or custom models.
|
|
95
|
+
*
|
|
96
|
+
* @example
|
|
97
|
+
* ```typescript
|
|
98
|
+
* // Text embedding implementation
|
|
99
|
+
* class TextEmbeddingInterface implements EmbeddingQueryInterface {
|
|
100
|
+
* embedQuery = async (query: string) => textEmbedder.embedSingle(query);
|
|
101
|
+
* supportedContentTypes = ['text', 'code'];
|
|
102
|
+
* embeddingDimensions = 384;
|
|
103
|
+
* modelIdentifier = 'all-MiniLM-L6-v2';
|
|
104
|
+
* }
|
|
105
|
+
*
|
|
106
|
+
* // Use with SearchEngine
|
|
107
|
+
* const embeddingInterface = new TextEmbeddingInterface();
|
|
108
|
+
* const embedFn = embeddingInterface.embedQuery.bind(embeddingInterface);
|
|
109
|
+
* const search = new SearchEngine(embedFn, indexManager, db);
|
|
110
|
+
* ```
|
|
111
|
+
*/
|
|
112
|
+
export interface EmbeddingQueryInterface {
|
|
113
|
+
/**
|
|
114
|
+
* Function to embed a query string into a vector
|
|
115
|
+
* Should handle the specific content types listed in supportedContentTypes
|
|
116
|
+
*/
|
|
117
|
+
embedQuery: EmbedFunction;
|
|
118
|
+
/**
|
|
119
|
+
* Content types this embedder supports (e.g., ['text'], ['text', 'image'])
|
|
120
|
+
* Used for validation and routing of different content types
|
|
121
|
+
*/
|
|
122
|
+
supportedContentTypes: string[];
|
|
123
|
+
/**
|
|
124
|
+
* Dimensions of the embedding vectors this embedder produces
|
|
125
|
+
* Must match the vector index dimensions for compatibility
|
|
126
|
+
*/
|
|
127
|
+
embeddingDimensions: number;
|
|
128
|
+
/**
|
|
129
|
+
* Model name or identifier for compatibility checking
|
|
130
|
+
* Used to ensure consistent model usage across sessions
|
|
131
|
+
*/
|
|
132
|
+
modelIdentifier: string;
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Interface for reranking operations
|
|
136
|
+
* Enables dependency injection of different reranking implementations
|
|
137
|
+
*
|
|
138
|
+
* This interface provides a standardized way to interact with different
|
|
139
|
+
* reranking models. Reranking improves search quality by re-scoring
|
|
140
|
+
* initial search results using more sophisticated models.
|
|
141
|
+
*
|
|
142
|
+
* @example
|
|
143
|
+
* ```typescript
|
|
144
|
+
* // Text reranking implementation
|
|
145
|
+
* class TextRerankingInterface implements RerankingInterface {
|
|
146
|
+
* rerankResults = async (query: string, results: SearchResult[]) =>
|
|
147
|
+
* textReranker.rerank(query, results);
|
|
148
|
+
* supportedContentTypes = ['text'];
|
|
149
|
+
* isEnabled = true;
|
|
150
|
+
* modelIdentifier = 'cross-encoder/ms-marco-MiniLM-L-6-v2';
|
|
151
|
+
* }
|
|
152
|
+
*
|
|
153
|
+
* // Use with SearchEngine
|
|
154
|
+
* const rerankingInterface = new TextRerankingInterface();
|
|
155
|
+
* const rerankFn = rerankingInterface.rerankResults.bind(rerankingInterface);
|
|
156
|
+
* const search = new SearchEngine(embedFn, indexManager, db, rerankFn);
|
|
157
|
+
* ```
|
|
158
|
+
*/
|
|
159
|
+
export interface RerankingInterface {
|
|
160
|
+
/**
|
|
161
|
+
* Function to rerank search results
|
|
162
|
+
* Takes a query and initial results, returns reordered results with updated scores
|
|
163
|
+
*/
|
|
164
|
+
rerankResults: RerankFunction;
|
|
165
|
+
/**
|
|
166
|
+
* Content types this reranker supports
|
|
167
|
+
* Should match or be a subset of the embedding interface content types
|
|
168
|
+
*/
|
|
169
|
+
supportedContentTypes: string[];
|
|
170
|
+
/**
|
|
171
|
+
* Whether reranking is currently enabled and available
|
|
172
|
+
* Can be used to gracefully disable reranking if models fail to load
|
|
173
|
+
*/
|
|
174
|
+
isEnabled: boolean;
|
|
175
|
+
/**
|
|
176
|
+
* Model name or identifier for the reranking model
|
|
177
|
+
* Used for logging and compatibility tracking
|
|
178
|
+
*/
|
|
179
|
+
modelIdentifier: string;
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Configuration interface for search engine dependency injection
|
|
183
|
+
* Allows different implementations to be plugged in with optional settings
|
|
184
|
+
*
|
|
185
|
+
* This interface provides a way to configure SearchEngine instances with
|
|
186
|
+
* different embedding and reranking implementations, along with default
|
|
187
|
+
* behaviors for content type handling and initialization.
|
|
188
|
+
*
|
|
189
|
+
* @example
|
|
190
|
+
* ```typescript
|
|
191
|
+
* // Configuration with custom interfaces
|
|
192
|
+
* const config: SearchEngineConfig = {
|
|
193
|
+
* embeddingInterface: new CustomEmbeddingInterface(),
|
|
194
|
+
* rerankingInterface: new CustomRerankingInterface(),
|
|
195
|
+
* defaultContentType: 'text',
|
|
196
|
+
* autoInitialize: true
|
|
197
|
+
* };
|
|
198
|
+
*
|
|
199
|
+
* // Use configuration (implementation-specific)
|
|
200
|
+
* const search = new ConfigurableSearchEngine(config);
|
|
201
|
+
* ```
|
|
202
|
+
*/
|
|
203
|
+
export interface SearchEngineConfig {
|
|
204
|
+
/**
|
|
205
|
+
* Optional embedding interface for dependency injection
|
|
206
|
+
* If provided, will be used instead of direct function injection
|
|
207
|
+
*/
|
|
208
|
+
embeddingInterface?: EmbeddingQueryInterface;
|
|
209
|
+
/**
|
|
210
|
+
* Optional reranking interface for dependency injection
|
|
211
|
+
* If provided, will be used for result reranking
|
|
212
|
+
*/
|
|
213
|
+
rerankingInterface?: RerankingInterface;
|
|
214
|
+
/**
|
|
215
|
+
* Default content type for queries when not specified
|
|
216
|
+
* Used when content type cannot be inferred from context
|
|
217
|
+
*/
|
|
218
|
+
defaultContentType?: string;
|
|
219
|
+
/**
|
|
220
|
+
* Whether to enable automatic initialization
|
|
221
|
+
* When true, models and resources are loaded lazily on first use
|
|
222
|
+
*/
|
|
223
|
+
autoInitialize?: boolean;
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Interface for content type strategies
|
|
227
|
+
* Enables different handling for different content types
|
|
228
|
+
*/
|
|
229
|
+
export interface ContentTypeStrategy {
|
|
230
|
+
/**
|
|
231
|
+
* Content type this strategy handles
|
|
232
|
+
*/
|
|
233
|
+
contentType: string;
|
|
234
|
+
/**
|
|
235
|
+
* Whether this strategy can handle the given content type
|
|
236
|
+
*/
|
|
237
|
+
canHandle(contentType: string): boolean;
|
|
238
|
+
/**
|
|
239
|
+
* Process query for this content type before embedding
|
|
240
|
+
*/
|
|
241
|
+
preprocessQuery?(query: string): string;
|
|
242
|
+
/**
|
|
243
|
+
* Post-process search results for this content type
|
|
244
|
+
*/
|
|
245
|
+
postprocessResults?(results: SearchResult[]): SearchResult[];
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Generic interface for model-agnostic operations
|
|
249
|
+
* Base interface that all model-specific implementations should extend
|
|
250
|
+
*/
|
|
251
|
+
export interface ModelAgnosticInterface {
|
|
252
|
+
/**
|
|
253
|
+
* Initialize the interface (load models, set up resources, etc.)
|
|
254
|
+
*/
|
|
255
|
+
initialize(): Promise<void>;
|
|
256
|
+
/**
|
|
257
|
+
* Clean up resources
|
|
258
|
+
*/
|
|
259
|
+
cleanup(): Promise<void>;
|
|
260
|
+
/**
|
|
261
|
+
* Check if the interface is ready for use
|
|
262
|
+
*/
|
|
263
|
+
isReady(): boolean;
|
|
264
|
+
/**
|
|
265
|
+
* Get interface metadata
|
|
266
|
+
*/
|
|
267
|
+
getMetadata(): {
|
|
268
|
+
name: string;
|
|
269
|
+
version: string;
|
|
270
|
+
supportedContentTypes: string[];
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
/**
|
|
274
|
+
* Extended embedding interface that includes model-agnostic operations
|
|
275
|
+
*/
|
|
276
|
+
export interface ExtendedEmbeddingInterface extends EmbeddingQueryInterface, ModelAgnosticInterface {
|
|
277
|
+
/**
|
|
278
|
+
* Batch embed multiple queries for efficiency
|
|
279
|
+
*/
|
|
280
|
+
embedBatch?(queries: string[], contentType?: string): Promise<EmbeddingResult[]>;
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Extended reranking interface that includes model-agnostic operations
|
|
284
|
+
*/
|
|
285
|
+
export interface ExtendedRerankingInterface extends RerankingInterface, ModelAgnosticInterface {
|
|
286
|
+
/**
|
|
287
|
+
* Batch rerank multiple query-result pairs for efficiency
|
|
288
|
+
*/
|
|
289
|
+
rerankBatch?(queries: string[], resultSets: SearchResult[][], contentType?: string): Promise<SearchResult[][]>;
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Interface for search pipeline coordination
|
|
293
|
+
* Defines the core search pipeline steps that are model-agnostic
|
|
294
|
+
*/
|
|
295
|
+
export interface SearchPipelineInterface {
|
|
296
|
+
/**
|
|
297
|
+
* Step 1: Process and embed the query
|
|
298
|
+
*/
|
|
299
|
+
embedQuery(query: string, contentType?: string): Promise<EmbeddingResult>;
|
|
300
|
+
/**
|
|
301
|
+
* Step 2: Perform vector search
|
|
302
|
+
*/
|
|
303
|
+
vectorSearch(queryVector: Float32Array, topK: number): Promise<{
|
|
304
|
+
embeddingIds: string[];
|
|
305
|
+
distances: number[];
|
|
306
|
+
}>;
|
|
307
|
+
/**
|
|
308
|
+
* Step 3: Retrieve metadata from database
|
|
309
|
+
*/
|
|
310
|
+
retrieveMetadata(embeddingIds: string[]): Promise<any[]>;
|
|
311
|
+
/**
|
|
312
|
+
* Step 4: Format initial results
|
|
313
|
+
*/
|
|
314
|
+
formatResults(chunks: any[], distances: number[], embeddingIds: string[]): SearchResult[];
|
|
315
|
+
/**
|
|
316
|
+
* Step 5: Optional reranking
|
|
317
|
+
*/
|
|
318
|
+
rerankResults?(query: string, results: SearchResult[], contentType?: string): Promise<SearchResult[]>;
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Factory interface for creating embedding and reranking functions
|
|
322
|
+
* Enables clean dependency injection patterns and simplifies common use cases
|
|
323
|
+
*
|
|
324
|
+
* FACTORY PATTERN BENEFITS:
|
|
325
|
+
* - Handles complex initialization logic (model loading, configuration)
|
|
326
|
+
* - Provides simple API for common use cases
|
|
327
|
+
* - Maintains access to underlying dependency injection architecture
|
|
328
|
+
* - Supports different content types and embedding models
|
|
329
|
+
*
|
|
330
|
+
* USAGE EXAMPLES:
|
|
331
|
+
* ```typescript
|
|
332
|
+
* // Text factory implementation
|
|
333
|
+
* class TextSearchDependencyFactory implements SearchDependencyFactory {
|
|
334
|
+
* createEmbedFunction(contentType = 'text'): EmbedFunction {
|
|
335
|
+
* const embedder = new TextEmbeddingEngine();
|
|
336
|
+
* return async (query) => embedder.embedSingle(query);
|
|
337
|
+
* }
|
|
338
|
+
*
|
|
339
|
+
* createRerankFunction(contentType = 'text'): RerankFunction {
|
|
340
|
+
* const reranker = new CrossEncoderReranker();
|
|
341
|
+
* return async (query, results) => reranker.rerank(query, results);
|
|
342
|
+
* }
|
|
343
|
+
* }
|
|
344
|
+
*
|
|
345
|
+
* // Factory usage in practice
|
|
346
|
+
* const factory = new TextSearchDependencyFactory();
|
|
347
|
+
* const embedFn = factory.createEmbedFunction();
|
|
348
|
+
* const rerankFn = factory.createRerankFunction();
|
|
349
|
+
* const search = new SearchEngine(embedFn, indexManager, db, rerankFn);
|
|
350
|
+
*
|
|
351
|
+
* // Multimodal factory (future)
|
|
352
|
+
* class MultimodalSearchDependencyFactory implements SearchDependencyFactory {
|
|
353
|
+
* createEmbedFunction(contentType = 'text'): EmbedFunction {
|
|
354
|
+
* const clipModel = new CLIPEmbeddingEngine();
|
|
355
|
+
* return async (query, type) => {
|
|
356
|
+
* if (type === 'image') return clipModel.embedImage(query);
|
|
357
|
+
* return clipModel.embedText(query);
|
|
358
|
+
* };
|
|
359
|
+
* }
|
|
360
|
+
* }
|
|
361
|
+
* ```
|
|
362
|
+
*/
|
|
363
|
+
export interface SearchDependencyFactory {
|
|
364
|
+
/**
|
|
365
|
+
* Create an embedding function for the specified content type
|
|
366
|
+
* @param contentType - Content type to create embedder for ('text', 'image', etc.)
|
|
367
|
+
* @returns EmbedFunction that can handle the specified content type
|
|
368
|
+
*/
|
|
369
|
+
createEmbedFunction(contentType?: string): EmbedFunction;
|
|
370
|
+
/**
|
|
371
|
+
* Create a reranking function for the specified content type
|
|
372
|
+
* @param contentType - Content type to create reranker for ('text', 'image', etc.)
|
|
373
|
+
* @returns RerankFunction for the content type, or undefined if not supported
|
|
374
|
+
*/
|
|
375
|
+
createRerankFunction(contentType?: string): RerankFunction | undefined;
|
|
376
|
+
/**
|
|
377
|
+
* Get supported content types for this factory
|
|
378
|
+
* @returns Array of supported content type strings
|
|
379
|
+
*/
|
|
380
|
+
getSupportedContentTypes(): string[];
|
|
381
|
+
/**
|
|
382
|
+
* Get embedding dimensions for compatibility checking
|
|
383
|
+
* @returns Number of dimensions in embedding vectors produced by this factory
|
|
384
|
+
*/
|
|
385
|
+
getEmbeddingDimensions(): number;
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Validation utilities for interface compatibility
|
|
389
|
+
*/
|
|
390
|
+
export declare class InterfaceValidator {
|
|
391
|
+
/**
|
|
392
|
+
* Validate that an EmbedFunction is compatible with expected interface
|
|
393
|
+
*/
|
|
394
|
+
static validateEmbedFunction(embedFn: EmbedFunction): boolean;
|
|
395
|
+
/**
|
|
396
|
+
* Validate that a RerankFunction is compatible with expected interface
|
|
397
|
+
*/
|
|
398
|
+
static validateRerankFunction(rerankFn: RerankFunction): boolean;
|
|
399
|
+
/**
|
|
400
|
+
* Validate embedding dimensions compatibility
|
|
401
|
+
*/
|
|
402
|
+
static validateEmbeddingDimensions(expected: number, actual: number): boolean;
|
|
403
|
+
/**
|
|
404
|
+
* Validate content type support
|
|
405
|
+
*/
|
|
406
|
+
static validateContentTypeSupport(supportedTypes: string[], requestedType: string): boolean;
|
|
407
|
+
}
|
|
408
|
+
//# sourceMappingURL=interfaces.d.ts.map
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*
|
|
5
|
+
* This module defines the core interfaces for dependency injection in the search engine.
|
|
6
|
+
* These interfaces enable:
|
|
7
|
+
* 1. Different embedding implementations (text-only, multimodal, etc.)
|
|
8
|
+
* 2. Different reranking strategies (cross-encoder, neural, etc.)
|
|
9
|
+
* 3. Support for multiple content types (text, image, etc.)
|
|
10
|
+
* 4. Different embedding dimensions (384, 512, 768, etc.)
|
|
11
|
+
*
|
|
12
|
+
* DEPENDENCY INJECTION PATTERNS:
|
|
13
|
+
*
|
|
14
|
+
* 1. Direct Function Injection (Advanced Users):
|
|
15
|
+
* ```typescript
|
|
16
|
+
* // Text-only implementation
|
|
17
|
+
* const textEmbedFn: EmbedFunction = async (query) => textEmbedder.embedSingle(query);
|
|
18
|
+
* const textRerankFn: RerankFunction = async (query, results) => textReranker.rerank(query, results);
|
|
19
|
+
* const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
|
|
20
|
+
*
|
|
21
|
+
* // Custom implementation
|
|
22
|
+
* const customEmbedFn: EmbedFunction = async (query) => ({
|
|
23
|
+
* embedding_id: generateId(),
|
|
24
|
+
* vector: await myCustomModel.embed(query)
|
|
25
|
+
* });
|
|
26
|
+
* const search = new SearchEngine(customEmbedFn, indexManager, db);
|
|
27
|
+
* ```
|
|
28
|
+
*
|
|
29
|
+
* 2. Factory Pattern (Recommended for Common Use Cases):
|
|
30
|
+
* ```typescript
|
|
31
|
+
* // Using factory for convenience
|
|
32
|
+
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite', {
|
|
33
|
+
* embeddingModel: 'all-MiniLM-L6-v2',
|
|
34
|
+
* enableReranking: true
|
|
35
|
+
* });
|
|
36
|
+
*
|
|
37
|
+
* // Factory with custom configuration
|
|
38
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
39
|
+
* chunkSize: 300,
|
|
40
|
+
* chunkOverlap: 50
|
|
41
|
+
* });
|
|
42
|
+
* ```
|
|
43
|
+
*
|
|
44
|
+
* 3. Interface-Based Implementation (Plugin Architecture):
|
|
45
|
+
* ```typescript
|
|
46
|
+
* // Implement interfaces for custom behavior
|
|
47
|
+
* class CustomEmbeddingInterface implements EmbeddingQueryInterface {
|
|
48
|
+
* async embedQuery(query: string): Promise<EmbeddingResult> {
|
|
49
|
+
* return { embedding_id: generateId(), vector: await this.model.embed(query) };
|
|
50
|
+
* }
|
|
51
|
+
* supportedContentTypes = ['text', 'code'];
|
|
52
|
+
* embeddingDimensions = 384;
|
|
53
|
+
* modelIdentifier = 'custom-model-v1';
|
|
54
|
+
* }
|
|
55
|
+
*
|
|
56
|
+
* const customInterface = new CustomEmbeddingInterface();
|
|
57
|
+
* const embedFn = customInterface.embedQuery.bind(customInterface);
|
|
58
|
+
* const search = new SearchEngine(embedFn, indexManager, db);
|
|
59
|
+
* ```
|
|
60
|
+
*
|
|
61
|
+
* 4. Multimodal Implementation (Future):
|
|
62
|
+
* ```typescript
|
|
63
|
+
* // Multimodal embedding function
|
|
64
|
+
* const multimodalEmbedFn: EmbedFunction = async (query, contentType) => {
|
|
65
|
+
* if (contentType === 'image') return clipEmbedder.embedImage(query);
|
|
66
|
+
* return clipEmbedder.embedText(query);
|
|
67
|
+
* };
|
|
68
|
+
*
|
|
69
|
+
* // Multimodal reranking function
|
|
70
|
+
* const multimodalRerankFn: RerankFunction = async (query, results, contentType) => {
|
|
71
|
+
* return multimodalReranker.rerank(query, results, contentType);
|
|
72
|
+
* };
|
|
73
|
+
*
|
|
74
|
+
* const search = new SearchEngine(multimodalEmbedFn, indexManager, db, multimodalRerankFn);
|
|
75
|
+
* ```
|
|
76
|
+
*/
|
|
77
|
+
/**
|
|
78
|
+
* Validation utilities for interface compatibility
|
|
79
|
+
*/
|
|
80
|
+
export class InterfaceValidator {
|
|
81
|
+
/**
|
|
82
|
+
* Validate that an EmbedFunction is compatible with expected interface
|
|
83
|
+
*/
|
|
84
|
+
static validateEmbedFunction(embedFn) {
|
|
85
|
+
return typeof embedFn === 'function';
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Validate that a RerankFunction is compatible with expected interface
|
|
89
|
+
*/
|
|
90
|
+
static validateRerankFunction(rerankFn) {
|
|
91
|
+
return typeof rerankFn === 'function';
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Validate embedding dimensions compatibility
|
|
95
|
+
*/
|
|
96
|
+
static validateEmbeddingDimensions(expected, actual) {
|
|
97
|
+
return expected === actual;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Validate content type support
|
|
101
|
+
*/
|
|
102
|
+
static validateContentTypeSupport(supportedTypes, requestedType) {
|
|
103
|
+
return supportedTypes.includes(requestedType);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
//# sourceMappingURL=interfaces.js.map
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge Base Manager
|
|
3
|
+
*
|
|
4
|
+
* Provides a unified API for managing the knowledge base (database + vector index).
|
|
5
|
+
* This module is designed to solve file locking issues on Windows by using
|
|
6
|
+
* in-place reset operations instead of file deletion.
|
|
7
|
+
*
|
|
8
|
+
* Key Features:
|
|
9
|
+
* - Reset database and index without file deletion (avoids EBUSY/EACCES errors)
|
|
10
|
+
* - Coordinated reset of both database and index in a single operation
|
|
11
|
+
* - Connection management to prevent orphaned handles
|
|
12
|
+
* - Cross-platform compatibility (especially Windows)
|
|
13
|
+
*
|
|
14
|
+
* @module knowledge-base-manager
|
|
15
|
+
*/
|
|
16
|
+
import { type DatabaseResetResult } from './db.js';
|
|
17
|
+
/**
|
|
18
|
+
* Result of a knowledge base reset operation
|
|
19
|
+
*/
|
|
20
|
+
export interface KnowledgeBaseResetResult {
|
|
21
|
+
/** Whether the overall reset was successful */
|
|
22
|
+
success: boolean;
|
|
23
|
+
/** Database reset result */
|
|
24
|
+
database: DatabaseResetResult;
|
|
25
|
+
/** Index reset statistics */
|
|
26
|
+
index: {
|
|
27
|
+
/** Number of vectors cleared */
|
|
28
|
+
vectorsCleared: number;
|
|
29
|
+
/** Time taken for index reset in milliseconds */
|
|
30
|
+
resetTimeMs: number;
|
|
31
|
+
};
|
|
32
|
+
/** Total time for the complete reset operation */
|
|
33
|
+
totalTimeMs: number;
|
|
34
|
+
/** Any warnings that occurred during reset */
|
|
35
|
+
warnings: string[];
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Options for knowledge base reset operation
|
|
39
|
+
*/
|
|
40
|
+
export interface KnowledgeBaseResetOptions {
|
|
41
|
+
/** Whether to preserve system_info (mode, model configuration) - default: false */
|
|
42
|
+
preserveSystemInfo?: boolean;
|
|
43
|
+
/** Whether to run VACUUM after database reset - default: true */
|
|
44
|
+
runVacuum?: boolean;
|
|
45
|
+
/** Model name to use for index recreation - default: from config */
|
|
46
|
+
modelName?: string;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Knowledge Base Manager
|
|
50
|
+
*
|
|
51
|
+
* Manages the complete knowledge base lifecycle including database and vector index.
|
|
52
|
+
* Provides safe reset operations that avoid file locking issues on Windows.
|
|
53
|
+
*
|
|
54
|
+
* @example
|
|
55
|
+
* ```typescript
|
|
56
|
+
* // Reset knowledge base for force rebuild
|
|
57
|
+
* const result = await KnowledgeBaseManager.reset('./db.sqlite', './index.bin');
|
|
58
|
+
* console.log(`Reset ${result.database.documentsDeleted} documents and ${result.index.vectorsCleared} vectors`);
|
|
59
|
+
*
|
|
60
|
+
* // Reset with options
|
|
61
|
+
* const result = await KnowledgeBaseManager.reset('./db.sqlite', './index.bin', {
|
|
62
|
+
* preserveSystemInfo: true, // Keep mode/model configuration
|
|
63
|
+
* modelName: 'all-MiniLM-L6-v2' // Specify model for index
|
|
64
|
+
* });
|
|
65
|
+
* ```
|
|
66
|
+
*/
|
|
67
|
+
export declare class KnowledgeBaseManager {
|
|
68
|
+
/**
|
|
69
|
+
* Reset the knowledge base by clearing all data while keeping files intact.
|
|
70
|
+
* This is a safer alternative to file deletion that avoids file locking issues on Windows.
|
|
71
|
+
*
|
|
72
|
+
* The reset operation:
|
|
73
|
+
* 1. Closes any existing connections via DatabaseConnectionManager
|
|
74
|
+
* 2. Opens a fresh connection to the database
|
|
75
|
+
* 3. Deletes all rows from documents, chunks, content_metadata tables
|
|
76
|
+
* 4. Optionally runs VACUUM to reclaim disk space
|
|
77
|
+
* 5. Reinitializes the vector index (clears all vectors)
|
|
78
|
+
* 6. Saves the empty index to disk (overwrites existing file content)
|
|
79
|
+
*
|
|
80
|
+
* This approach works because:
|
|
81
|
+
* - We don't delete files, so no EBUSY/EACCES errors
|
|
82
|
+
* - The same file handles can be reused or replaced safely
|
|
83
|
+
* - SQLite transactions ensure data integrity
|
|
84
|
+
* - Index overwrite uses standard file write operations
|
|
85
|
+
*
|
|
86
|
+
* @param dbPath - Path to the SQLite database file
|
|
87
|
+
* @param indexPath - Path to the vector index file
|
|
88
|
+
* @param options - Reset options
|
|
89
|
+
* @returns Promise resolving to reset result statistics
|
|
90
|
+
*
|
|
91
|
+
* @throws Error if database or index reset fails
|
|
92
|
+
*/
|
|
93
|
+
static reset(dbPath: string, indexPath: string, options?: KnowledgeBaseResetOptions): Promise<KnowledgeBaseResetResult>;
|
|
94
|
+
/**
|
|
95
|
+
* Check if the knowledge base has any data
|
|
96
|
+
*
|
|
97
|
+
* @param dbPath - Path to the SQLite database file
|
|
98
|
+
* @returns Promise resolving to true if database has data, false if empty
|
|
99
|
+
*/
|
|
100
|
+
static hasData(dbPath: string): Promise<boolean>;
|
|
101
|
+
/**
|
|
102
|
+
* Close all connections to the knowledge base
|
|
103
|
+
* Useful before operations that might conflict with open handles
|
|
104
|
+
*
|
|
105
|
+
* @param dbPath - Path to the SQLite database file
|
|
106
|
+
*/
|
|
107
|
+
static closeAllConnections(dbPath: string): Promise<void>;
|
|
108
|
+
}
|
|
109
|
+
//# sourceMappingURL=knowledge-base-manager.d.ts.map
|