rag-lite-ts 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{cli → cjs/cli}/indexer.js +1 -1
- package/dist/{cli → cjs/cli}/search.js +5 -10
- package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
- package/dist/cjs/core/binary-index-format.js +291 -0
- package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
- package/dist/{core → cjs/core}/ingestion.js +76 -9
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
- package/dist/{core → cjs/core}/search.js +2 -1
- package/dist/{core → cjs/core}/types.d.ts +1 -1
- package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
- package/dist/{core → cjs/core}/vector-index.js +10 -2
- package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
- package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
- package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
- package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +471 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +529 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +291 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +213 -0
- package/dist/esm/core/db.js +895 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +901 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index.d.ts +72 -0
- package/dist/esm/core/vector-index.js +333 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +477 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +344 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +116 -0
- package/dist/esm/index-manager.js +598 -0
- package/dist/esm/index.d.ts +75 -0
- package/dist/esm/index.js +110 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +30 -12
- package/dist/core/binary-index-format.js +0 -122
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{cli.js → cjs/cli.js} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/db.d.ts +0 -0
- /package/dist/{core → cjs/core}/db.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
- /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
- /package/dist/{index.js → cjs/index.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*/
|
|
5
|
+
import { getChunksByEmbeddingIds } from './db.js';
|
|
6
|
+
import { config } from './config.js';
|
|
7
|
+
import { createMissingDependencyError } from './actionable-error-messages.js';
|
|
8
|
+
/**
|
|
9
|
+
* Search engine that provides semantic search capabilities
|
|
10
|
+
* Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
|
|
11
|
+
* Uses explicit dependency injection for clean architecture
|
|
12
|
+
*/
|
|
13
|
+
export class SearchEngine {
|
|
14
|
+
embedFn;
|
|
15
|
+
indexManager;
|
|
16
|
+
db;
|
|
17
|
+
rerankFn;
|
|
18
|
+
contentResolver;
|
|
19
|
+
/**
|
|
20
|
+
* Creates a new SearchEngine with explicit dependency injection
|
|
21
|
+
*
|
|
22
|
+
* DEPENDENCY INJECTION PATTERN:
|
|
23
|
+
* This constructor requires all dependencies to be explicitly provided, enabling:
|
|
24
|
+
* - Clean separation between core logic and implementation-specific components
|
|
25
|
+
* - Support for different embedding models (text-only, multimodal, custom)
|
|
26
|
+
* - Testability through mock injection
|
|
27
|
+
* - Future extensibility without core changes
|
|
28
|
+
*
|
|
29
|
+
* @param embedFn - Function to embed queries into vectors
|
|
30
|
+
* - Signature: (query: string, contentType?: string) => Promise<EmbeddingResult>
|
|
31
|
+
* - Examples:
|
|
32
|
+
* - Text: const embedFn = (query) => textEmbedder.embedSingle(query)
|
|
33
|
+
* - Multimodal: const embedFn = (query, type) => type === 'image' ? clipEmbedder.embedImage(query) : clipEmbedder.embedText(query)
|
|
34
|
+
* - Custom: const embedFn = (query) => customModel.embed(query)
|
|
35
|
+
*
|
|
36
|
+
* @param indexManager - Vector index manager for similarity search
|
|
37
|
+
* - Handles vector storage and retrieval operations
|
|
38
|
+
* - Works with any embedding dimensions (384, 512, 768, etc.)
|
|
39
|
+
* - Example: new IndexManager('./index.bin')
|
|
40
|
+
*
|
|
41
|
+
* @param db - Database connection for metadata retrieval
|
|
42
|
+
* - Provides access to document and chunk metadata
|
|
43
|
+
* - Supports different content types through metadata fields
|
|
44
|
+
* - Example: await openDatabase('./db.sqlite')
|
|
45
|
+
*
|
|
46
|
+
* @param rerankFn - Optional function to rerank search results
|
|
47
|
+
* - Signature: (query: string, results: SearchResult[], contentType?: string) => Promise<SearchResult[]>
|
|
48
|
+
* - Examples:
|
|
49
|
+
* - Text: const rerankFn = (query, results) => textReranker.rerank(query, results)
|
|
50
|
+
* - Custom: const rerankFn = (query, results) => customReranker.rerank(query, results)
|
|
51
|
+
* - Disabled: undefined (no reranking)
|
|
52
|
+
*
|
|
53
|
+
* USAGE EXAMPLES:
|
|
54
|
+
* ```typescript
|
|
55
|
+
* // Text-only search engine
|
|
56
|
+
* const textEmbedFn = createTextEmbedFunction();
|
|
57
|
+
* const textRerankFn = createTextRerankFunction();
|
|
58
|
+
* const indexManager = new IndexManager('./index.bin');
|
|
59
|
+
* const db = await openDatabase('./db.sqlite');
|
|
60
|
+
* const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
|
|
61
|
+
*
|
|
62
|
+
* // Search engine without reranking
|
|
63
|
+
* const search = new SearchEngine(textEmbedFn, indexManager, db);
|
|
64
|
+
*
|
|
65
|
+
* // Custom embedding implementation
|
|
66
|
+
* const customEmbedFn = async (query) => ({
|
|
67
|
+
* embedding_id: generateId(),
|
|
68
|
+
* vector: await myCustomModel.embed(query)
|
|
69
|
+
* });
|
|
70
|
+
* const search = new SearchEngine(customEmbedFn, indexManager, db);
|
|
71
|
+
* ```
|
|
72
|
+
*/
|
|
73
|
+
constructor(embedFn, indexManager, db, rerankFn, contentResolver) {
|
|
74
|
+
this.embedFn = embedFn;
|
|
75
|
+
this.indexManager = indexManager;
|
|
76
|
+
this.db = db;
|
|
77
|
+
this.rerankFn = rerankFn;
|
|
78
|
+
// Validate required dependencies
|
|
79
|
+
if (!embedFn || typeof embedFn !== 'function') {
|
|
80
|
+
throw createMissingDependencyError('embedFn', 'function', {
|
|
81
|
+
operationContext: 'SearchEngine constructor'
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
if (!indexManager) {
|
|
85
|
+
throw createMissingDependencyError('indexManager', 'object', {
|
|
86
|
+
operationContext: 'SearchEngine constructor'
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
if (!db) {
|
|
90
|
+
throw createMissingDependencyError('db', 'object', {
|
|
91
|
+
operationContext: 'SearchEngine constructor'
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
// Initialize ContentResolver if provided, or create lazily when needed
|
|
95
|
+
this.contentResolver = contentResolver;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Perform semantic search on the indexed documents
|
|
99
|
+
* Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
|
|
100
|
+
* @param query - Search query string
|
|
101
|
+
* @param options - Search options including top_k and rerank settings
|
|
102
|
+
* @returns Promise resolving to array of search results
|
|
103
|
+
*/
|
|
104
|
+
async search(query, options = {}) {
|
|
105
|
+
if (!query || query.trim().length === 0) {
|
|
106
|
+
return [];
|
|
107
|
+
}
|
|
108
|
+
const startTime = performance.now();
|
|
109
|
+
try {
|
|
110
|
+
// Step 1: Build query embedding using injected embed function
|
|
111
|
+
const embeddingStartTime = performance.now();
|
|
112
|
+
const queryEmbedding = await this.embedFn(query);
|
|
113
|
+
const embeddingTime = performance.now() - embeddingStartTime;
|
|
114
|
+
// Step 2: Search with the vector
|
|
115
|
+
const results = await this.searchWithVector(queryEmbedding.vector, options, query, embeddingTime);
|
|
116
|
+
return results;
|
|
117
|
+
}
|
|
118
|
+
catch (error) {
|
|
119
|
+
throw new Error(`Search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Perform semantic search using a pre-computed embedding vector
|
|
124
|
+
* Useful for image-based search or when embedding is computed externally
|
|
125
|
+
* @param queryVector - Pre-computed query embedding vector
|
|
126
|
+
* @param options - Search options including top_k and rerank settings
|
|
127
|
+
* @param originalQuery - Optional original query for reranking (text or image path)
|
|
128
|
+
* @param embeddingTime - Optional embedding time for logging
|
|
129
|
+
* @returns Promise resolving to array of search results
|
|
130
|
+
*/
|
|
131
|
+
async searchWithVector(queryVector, options = {}, originalQuery, embeddingTime) {
|
|
132
|
+
const startTime = performance.now();
|
|
133
|
+
const topK = options.top_k || config.top_k || 10;
|
|
134
|
+
// Phase 1: Disable reranking by default for better performance
|
|
135
|
+
// Users must explicitly opt-in with --rerank flag
|
|
136
|
+
const shouldRerank = options.rerank === true;
|
|
137
|
+
try {
|
|
138
|
+
// Step 1: Search using IndexManager (which handles hash mapping properly)
|
|
139
|
+
const searchStartTime = performance.now();
|
|
140
|
+
let searchResult;
|
|
141
|
+
try {
|
|
142
|
+
const contentType = options.contentType;
|
|
143
|
+
searchResult = this.indexManager.search(queryVector, topK, contentType);
|
|
144
|
+
}
|
|
145
|
+
catch (error) {
|
|
146
|
+
if (error instanceof Error && error.message.includes('No embedding ID found for hash')) {
|
|
147
|
+
console.warn(`Hash mapping issue detected: ${error.message}`);
|
|
148
|
+
console.warn('This may indicate index/database synchronization issues. Consider running: raglite rebuild');
|
|
149
|
+
return [];
|
|
150
|
+
}
|
|
151
|
+
throw error;
|
|
152
|
+
}
|
|
153
|
+
const vectorSearchTime = performance.now() - searchStartTime;
|
|
154
|
+
if (searchResult.embeddingIds.length === 0) {
|
|
155
|
+
const totalTime = performance.now() - startTime;
|
|
156
|
+
console.log(`No similar documents found (${totalTime.toFixed(2)}ms total)`);
|
|
157
|
+
return [];
|
|
158
|
+
}
|
|
159
|
+
// Step 2: Retrieve chunks from database using embedding IDs
|
|
160
|
+
const retrievalStartTime = performance.now();
|
|
161
|
+
const chunks = await getChunksByEmbeddingIds(this.db, searchResult.embeddingIds);
|
|
162
|
+
const retrievalTime = performance.now() - retrievalStartTime;
|
|
163
|
+
// Step 3: Format results as JSON with text, score, and document metadata
|
|
164
|
+
let results = this.formatSearchResults(chunks, searchResult.distances, searchResult.embeddingIds);
|
|
165
|
+
// Step 4: Optional reranking with injected rerank function
|
|
166
|
+
let rerankTime = 0;
|
|
167
|
+
if (shouldRerank && this.rerankFn && results.length > 1 && originalQuery) {
|
|
168
|
+
try {
|
|
169
|
+
const rerankStartTime = performance.now();
|
|
170
|
+
results = await this.rerankFn(originalQuery, results);
|
|
171
|
+
rerankTime = performance.now() - rerankStartTime;
|
|
172
|
+
}
|
|
173
|
+
catch (error) {
|
|
174
|
+
// Fallback to vector search results and log the error
|
|
175
|
+
console.warn(`Reranking failed, using vector search results: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
const totalTime = performance.now() - startTime;
|
|
179
|
+
// Measure latency without premature optimization - just log for monitoring
|
|
180
|
+
const embedTimeStr = embeddingTime !== undefined ? `embed: ${embeddingTime.toFixed(2)}ms, ` : '';
|
|
181
|
+
console.log(`Search completed: ${results.length} results in ${totalTime.toFixed(2)}ms ` +
|
|
182
|
+
`(${embedTimeStr}vector: ${vectorSearchTime.toFixed(2)}ms, ` +
|
|
183
|
+
`retrieval: ${retrievalTime.toFixed(2)}ms${rerankTime > 0 ? `, rerank: ${rerankTime.toFixed(2)}ms` : ''})`);
|
|
184
|
+
return results;
|
|
185
|
+
}
|
|
186
|
+
catch (error) {
|
|
187
|
+
throw new Error(`Vector search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Format search results with proper structure
|
|
192
|
+
* @param chunks - Database chunks with metadata
|
|
193
|
+
* @param distances - Similarity distances from vector search
|
|
194
|
+
* @param embeddingIds - Embedding IDs in search result order
|
|
195
|
+
* @returns Formatted search results
|
|
196
|
+
*/
|
|
197
|
+
formatSearchResults(chunks, distances, embeddingIds) {
|
|
198
|
+
const results = [];
|
|
199
|
+
// Create a map for quick chunk lookup by embedding_id
|
|
200
|
+
const chunkMap = new Map();
|
|
201
|
+
chunks.forEach(chunk => {
|
|
202
|
+
chunkMap.set(chunk.embedding_id, chunk);
|
|
203
|
+
});
|
|
204
|
+
// Build results in the order of search results
|
|
205
|
+
for (let i = 0; i < embeddingIds.length; i++) {
|
|
206
|
+
const embeddingId = embeddingIds[i];
|
|
207
|
+
const chunk = chunkMap.get(embeddingId);
|
|
208
|
+
if (chunk) {
|
|
209
|
+
// Convert cosine distance to similarity score (1 - distance)
|
|
210
|
+
// hnswlib-wasm returns cosine distance, we want similarity
|
|
211
|
+
const score = Math.max(0, 1 - distances[i]);
|
|
212
|
+
results.push({
|
|
213
|
+
content: chunk.content,
|
|
214
|
+
score: score,
|
|
215
|
+
contentType: chunk.content_type || 'text',
|
|
216
|
+
document: {
|
|
217
|
+
id: chunk.document_id,
|
|
218
|
+
source: chunk.document_source,
|
|
219
|
+
title: chunk.document_title,
|
|
220
|
+
contentType: chunk.document_content_type || 'text',
|
|
221
|
+
contentId: chunk.document_content_id || undefined
|
|
222
|
+
}
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
return results;
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Get search engine statistics
|
|
230
|
+
* @returns Object with current search engine stats
|
|
231
|
+
*/
|
|
232
|
+
async getStats() {
|
|
233
|
+
const indexStats = await this.indexManager.getStats();
|
|
234
|
+
return {
|
|
235
|
+
totalChunks: indexStats.totalVectors,
|
|
236
|
+
indexSize: indexStats.totalVectors,
|
|
237
|
+
rerankingEnabled: this.rerankFn !== undefined
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Retrieve content by ID in the specified format
|
|
242
|
+
* @param contentId - Content ID to retrieve
|
|
243
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
244
|
+
* @returns Promise that resolves to content in requested format
|
|
245
|
+
*/
|
|
246
|
+
async getContent(contentId, format = 'file') {
|
|
247
|
+
// Lazy initialization of ContentResolver
|
|
248
|
+
if (!this.contentResolver) {
|
|
249
|
+
const { ContentResolver } = await import('./content-resolver.js');
|
|
250
|
+
this.contentResolver = new ContentResolver(this.db);
|
|
251
|
+
}
|
|
252
|
+
return this.contentResolver.getContent(contentId, format);
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Retrieve multiple content items efficiently in batch
|
|
256
|
+
* @param contentIds - Array of content IDs to retrieve
|
|
257
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
258
|
+
* @returns Promise that resolves to array of content in requested format
|
|
259
|
+
*/
|
|
260
|
+
async getContentBatch(contentIds, format = 'file') {
|
|
261
|
+
// Lazy initialization of ContentResolver
|
|
262
|
+
if (!this.contentResolver) {
|
|
263
|
+
const { ContentResolver } = await import('./content-resolver.js');
|
|
264
|
+
this.contentResolver = new ContentResolver(this.db);
|
|
265
|
+
}
|
|
266
|
+
// Convert contentIds array to ContentRequest array
|
|
267
|
+
const requests = contentIds.map(contentId => ({ contentId, format }));
|
|
268
|
+
const results = await this.contentResolver.getContentBatch(requests);
|
|
269
|
+
// Extract content from results, maintaining order and handling errors
|
|
270
|
+
return results.map(result => {
|
|
271
|
+
if (!result.success) {
|
|
272
|
+
throw new Error(`Failed to retrieve content ${result.contentId}: ${result.error}`);
|
|
273
|
+
}
|
|
274
|
+
return result.content;
|
|
275
|
+
});
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* Retrieve content metadata for result enhancement
|
|
279
|
+
* @param contentId - Content ID to get metadata for
|
|
280
|
+
* @returns Promise that resolves to content metadata
|
|
281
|
+
*/
|
|
282
|
+
async getContentMetadata(contentId) {
|
|
283
|
+
// Lazy initialization of ContentResolver
|
|
284
|
+
if (!this.contentResolver) {
|
|
285
|
+
const { ContentResolver } = await import('./content-resolver.js');
|
|
286
|
+
this.contentResolver = new ContentResolver(this.db);
|
|
287
|
+
}
|
|
288
|
+
return this.contentResolver.getContentMetadata(contentId);
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* Verify that content exists and is accessible
|
|
292
|
+
* @param contentId - Content ID to verify
|
|
293
|
+
* @returns Promise that resolves to true if content exists, false otherwise
|
|
294
|
+
*/
|
|
295
|
+
async verifyContentExists(contentId) {
|
|
296
|
+
// Lazy initialization of ContentResolver
|
|
297
|
+
if (!this.contentResolver) {
|
|
298
|
+
const { ContentResolver } = await import('./content-resolver.js');
|
|
299
|
+
this.contentResolver = new ContentResolver(this.db);
|
|
300
|
+
}
|
|
301
|
+
return this.contentResolver.verifyContentExists(contentId);
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* Clean up resources - explicit cleanup method
|
|
305
|
+
*/
|
|
306
|
+
async cleanup() {
|
|
307
|
+
try {
|
|
308
|
+
// Clean up ContentResolver to prevent resource leaks
|
|
309
|
+
if (this.contentResolver && typeof this.contentResolver.cleanup === 'function') {
|
|
310
|
+
this.contentResolver.cleanup();
|
|
311
|
+
}
|
|
312
|
+
await this.db.close();
|
|
313
|
+
await this.indexManager.close();
|
|
314
|
+
}
|
|
315
|
+
catch (error) {
|
|
316
|
+
console.error('Error during SearchEngine cleanup:', error instanceof Error ? error.message : String(error));
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
//# sourceMappingURL=search.js.map
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Streaming Operations for Large Content - Task 9.1 Implementation
|
|
3
|
+
* Provides memory-efficient streaming operations for content ingestion and retrieval
|
|
4
|
+
* Minimizes memory usage for large files through streaming algorithms
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Progress callback for long-running operations
|
|
8
|
+
*/
|
|
9
|
+
export interface ProgressCallback {
|
|
10
|
+
(bytesProcessed: number, totalBytes?: number): void;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Streaming hash calculation result
|
|
14
|
+
*/
|
|
15
|
+
export interface StreamingHashResult {
|
|
16
|
+
hash: string;
|
|
17
|
+
bytesProcessed: number;
|
|
18
|
+
processingTimeMs: number;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Streaming file copy result
|
|
22
|
+
*/
|
|
23
|
+
export interface StreamingCopyResult {
|
|
24
|
+
bytesWritten: number;
|
|
25
|
+
processingTimeMs: number;
|
|
26
|
+
hash?: string;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Configuration for streaming operations
|
|
30
|
+
*/
|
|
31
|
+
export interface StreamingConfig {
|
|
32
|
+
chunkSize: number;
|
|
33
|
+
enableProgress: boolean;
|
|
34
|
+
enableHashing: boolean;
|
|
35
|
+
timeout: number;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* StreamingOperations class provides memory-efficient operations for large content
|
|
39
|
+
*/
|
|
40
|
+
export declare class StreamingOperations {
|
|
41
|
+
private config;
|
|
42
|
+
constructor(config?: Partial<StreamingConfig>);
|
|
43
|
+
/**
|
|
44
|
+
* Calculates SHA-256 hash of a file using streaming to minimize memory usage
|
|
45
|
+
* @param filePath - Path to the file to hash
|
|
46
|
+
* @param progressCallback - Optional callback for progress reporting
|
|
47
|
+
* @returns Promise that resolves to hash result
|
|
48
|
+
*/
|
|
49
|
+
calculateFileHashStreaming(filePath: string, progressCallback?: ProgressCallback): Promise<StreamingHashResult>;
|
|
50
|
+
/**
|
|
51
|
+
* Calculates SHA-256 hash of a buffer using streaming to minimize memory usage
|
|
52
|
+
* @param content - Buffer to hash
|
|
53
|
+
* @param progressCallback - Optional callback for progress reporting
|
|
54
|
+
* @returns Promise that resolves to hash result
|
|
55
|
+
*/
|
|
56
|
+
calculateBufferHashStreaming(content: Buffer, progressCallback?: ProgressCallback): Promise<StreamingHashResult>;
|
|
57
|
+
/**
|
|
58
|
+
* Copies a file using streaming operations with optional hashing
|
|
59
|
+
* @param sourcePath - Source file path
|
|
60
|
+
* @param destinationPath - Destination file path
|
|
61
|
+
* @param progressCallback - Optional callback for progress reporting
|
|
62
|
+
* @returns Promise that resolves to copy result
|
|
63
|
+
*/
|
|
64
|
+
copyFileStreaming(sourcePath: string, destinationPath: string, progressCallback?: ProgressCallback): Promise<StreamingCopyResult>;
|
|
65
|
+
/**
|
|
66
|
+
* Writes buffer content to file using streaming operations
|
|
67
|
+
* @param content - Buffer to write
|
|
68
|
+
* @param destinationPath - Destination file path
|
|
69
|
+
* @param progressCallback - Optional callback for progress reporting
|
|
70
|
+
* @returns Promise that resolves to write result
|
|
71
|
+
*/
|
|
72
|
+
writeBufferStreaming(content: Buffer, destinationPath: string, progressCallback?: ProgressCallback): Promise<StreamingCopyResult>;
|
|
73
|
+
/**
|
|
74
|
+
* Reads file content and converts to base64 using streaming to minimize memory usage
|
|
75
|
+
* @param filePath - Path to the file to read
|
|
76
|
+
* @param progressCallback - Optional callback for progress reporting
|
|
77
|
+
* @returns Promise that resolves to base64 string
|
|
78
|
+
*/
|
|
79
|
+
readFileAsBase64Streaming(filePath: string, progressCallback?: ProgressCallback): Promise<string>;
|
|
80
|
+
/**
|
|
81
|
+
* Validates file integrity by comparing streaming hash with expected hash
|
|
82
|
+
* @param filePath - Path to the file to validate
|
|
83
|
+
* @param expectedHash - Expected SHA-256 hash
|
|
84
|
+
* @param progressCallback - Optional callback for progress reporting
|
|
85
|
+
* @returns Promise that resolves to validation result
|
|
86
|
+
*/
|
|
87
|
+
validateFileIntegrityStreaming(filePath: string, expectedHash: string, progressCallback?: ProgressCallback): Promise<{
|
|
88
|
+
isValid: boolean;
|
|
89
|
+
actualHash: string;
|
|
90
|
+
bytesProcessed: number;
|
|
91
|
+
}>;
|
|
92
|
+
/**
|
|
93
|
+
* Gets file information without loading content into memory
|
|
94
|
+
* @param filePath - Path to the file
|
|
95
|
+
* @returns Promise that resolves to file information
|
|
96
|
+
*/
|
|
97
|
+
getFileInfo(filePath: string): Promise<{
|
|
98
|
+
size: number;
|
|
99
|
+
isFile: boolean;
|
|
100
|
+
isDirectory: boolean;
|
|
101
|
+
lastModified: Date;
|
|
102
|
+
canRead: boolean;
|
|
103
|
+
canWrite: boolean;
|
|
104
|
+
}>;
|
|
105
|
+
/**
|
|
106
|
+
* Converts buffer to chunks for streaming
|
|
107
|
+
* @param buffer - Buffer to chunk
|
|
108
|
+
* @returns Generator that yields buffer chunks
|
|
109
|
+
*/
|
|
110
|
+
private bufferToChunks;
|
|
111
|
+
/**
|
|
112
|
+
* Wraps a promise with timeout functionality
|
|
113
|
+
* @param promise - Promise to wrap
|
|
114
|
+
* @param timeoutMs - Timeout in milliseconds
|
|
115
|
+
* @param errorMessage - Error message for timeout
|
|
116
|
+
* @returns Promise that rejects if timeout is reached
|
|
117
|
+
*/
|
|
118
|
+
private withTimeout;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Creates a StreamingOperations instance with default configuration
|
|
122
|
+
* @param config - Optional configuration overrides
|
|
123
|
+
* @returns StreamingOperations instance
|
|
124
|
+
*/
|
|
125
|
+
export declare function createStreamingOperations(config?: Partial<StreamingConfig>): StreamingOperations;
|
|
126
|
+
/**
|
|
127
|
+
* Utility function to format bytes for progress reporting
|
|
128
|
+
* @param bytes - Number of bytes
|
|
129
|
+
* @returns Formatted string (e.g., "1.5 MB")
|
|
130
|
+
*/
|
|
131
|
+
export declare function formatBytes(bytes: number): string;
|
|
132
|
+
/**
|
|
133
|
+
* Utility function to format processing time
|
|
134
|
+
* @param milliseconds - Processing time in milliseconds
|
|
135
|
+
* @returns Formatted string (e.g., "1.5s" or "150ms")
|
|
136
|
+
*/
|
|
137
|
+
export declare function formatProcessingTime(milliseconds: number): string;
|
|
138
|
+
/**
|
|
139
|
+
* Utility function to calculate processing speed
|
|
140
|
+
* @param bytes - Number of bytes processed
|
|
141
|
+
* @param milliseconds - Processing time in milliseconds
|
|
142
|
+
* @returns Speed in MB/s
|
|
143
|
+
*/
|
|
144
|
+
export declare function calculateProcessingSpeed(bytes: number, milliseconds: number): number;
|
|
145
|
+
//# sourceMappingURL=streaming-operations.d.ts.map
|