rag-lite-ts 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{cli → cjs/cli}/indexer.js +1 -1
- package/dist/{cli → cjs/cli}/search.js +5 -10
- package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
- package/dist/cjs/core/binary-index-format.js +291 -0
- package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
- package/dist/{core → cjs/core}/ingestion.js +76 -9
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
- package/dist/{core → cjs/core}/search.js +2 -1
- package/dist/{core → cjs/core}/types.d.ts +1 -1
- package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
- package/dist/{core → cjs/core}/vector-index.js +10 -2
- package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
- package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
- package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
- package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +471 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +529 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +291 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +213 -0
- package/dist/esm/core/db.js +895 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +901 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index.d.ts +72 -0
- package/dist/esm/core/vector-index.js +333 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +477 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +344 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +116 -0
- package/dist/esm/index-manager.js +598 -0
- package/dist/esm/index.d.ts +75 -0
- package/dist/esm/index.js +110 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +30 -12
- package/dist/core/binary-index-format.js +0 -122
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{cli.js → cjs/cli.js} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/db.d.ts +0 -0
- /package/dist/{core → cjs/core}/db.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
- /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
- /package/dist/{index.js → cjs/index.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Core search pipeline coordinator
|
|
7
|
+
* Orchestrates the search pipeline: query processing → vector search → metadata retrieval → optional reranking
|
|
8
|
+
* Remains completely independent of specific embedding models or transformer libraries
|
|
9
|
+
*/
|
|
10
|
+
export class SearchPipelineCoordinator {
|
|
11
|
+
embedQueryFn = null;
|
|
12
|
+
rerankResultsFn = null;
|
|
13
|
+
indexManager = null;
|
|
14
|
+
dbConnection = null;
|
|
15
|
+
defaultContentType = 'text';
|
|
16
|
+
/**
|
|
17
|
+
* Set the embedding function for query processing
|
|
18
|
+
*/
|
|
19
|
+
setEmbedFunction(embedFn) {
|
|
20
|
+
this.embedQueryFn = embedFn;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Set the reranking function for result reranking
|
|
24
|
+
*/
|
|
25
|
+
setRerankFunction(rerankFn) {
|
|
26
|
+
this.rerankResultsFn = rerankFn;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Set the index manager for vector search
|
|
30
|
+
*/
|
|
31
|
+
setIndexManager(indexManager) {
|
|
32
|
+
this.indexManager = indexManager;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Set the database connection for metadata retrieval
|
|
36
|
+
*/
|
|
37
|
+
setDatabaseConnection(dbConnection) {
|
|
38
|
+
this.dbConnection = dbConnection;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Set the default content type
|
|
42
|
+
*/
|
|
43
|
+
setDefaultContentType(contentType) {
|
|
44
|
+
this.defaultContentType = contentType;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Execute the complete search pipeline
|
|
48
|
+
* Coordinates all steps without knowledge of specific embedding models
|
|
49
|
+
*/
|
|
50
|
+
async executeSearchPipeline(query, options = {}) {
|
|
51
|
+
if (!query || query.trim().length === 0) {
|
|
52
|
+
return [];
|
|
53
|
+
}
|
|
54
|
+
const startTime = performance.now();
|
|
55
|
+
const topK = options.top_k || 10;
|
|
56
|
+
const shouldRerank = options.rerank !== undefined ? options.rerank : (this.rerankResultsFn !== null);
|
|
57
|
+
const contentType = options.contentType || this.defaultContentType;
|
|
58
|
+
// Validate dependencies
|
|
59
|
+
this.validateDependencies();
|
|
60
|
+
try {
|
|
61
|
+
// Step 1: Query processing and embedding
|
|
62
|
+
const embeddingStartTime = performance.now();
|
|
63
|
+
const queryEmbedding = await this.embedQuery(query, contentType);
|
|
64
|
+
const embeddingTime = performance.now() - embeddingStartTime;
|
|
65
|
+
// Step 2: Vector search
|
|
66
|
+
const searchStartTime = performance.now();
|
|
67
|
+
const searchResult = await this.vectorSearch(queryEmbedding.vector, topK);
|
|
68
|
+
const vectorSearchTime = performance.now() - searchStartTime;
|
|
69
|
+
if (searchResult.embeddingIds.length === 0) {
|
|
70
|
+
const totalTime = performance.now() - startTime;
|
|
71
|
+
console.log(`No similar documents found (${totalTime.toFixed(2)}ms total)`);
|
|
72
|
+
return [];
|
|
73
|
+
}
|
|
74
|
+
// Step 3: Metadata retrieval
|
|
75
|
+
const retrievalStartTime = performance.now();
|
|
76
|
+
const chunks = await this.retrieveMetadata(searchResult.embeddingIds);
|
|
77
|
+
const retrievalTime = performance.now() - retrievalStartTime;
|
|
78
|
+
// Step 4: Format initial results
|
|
79
|
+
let results = this.formatResults(chunks, searchResult.distances, searchResult.embeddingIds);
|
|
80
|
+
// Step 5: Optional reranking
|
|
81
|
+
let rerankTime = 0;
|
|
82
|
+
if (shouldRerank && this.rerankResultsFn && results.length > 1) {
|
|
83
|
+
try {
|
|
84
|
+
const rerankStartTime = performance.now();
|
|
85
|
+
results = await this.rerankResults(query, results, contentType);
|
|
86
|
+
rerankTime = performance.now() - rerankStartTime;
|
|
87
|
+
}
|
|
88
|
+
catch (error) {
|
|
89
|
+
console.warn(`Reranking failed, using vector search results: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
const totalTime = performance.now() - startTime;
|
|
93
|
+
console.log(`Search pipeline completed: ${results.length} results in ${totalTime.toFixed(2)}ms ` +
|
|
94
|
+
`(embed: ${embeddingTime.toFixed(2)}ms, vector: ${vectorSearchTime.toFixed(2)}ms, ` +
|
|
95
|
+
`retrieval: ${retrievalTime.toFixed(2)}ms${rerankTime > 0 ? `, rerank: ${rerankTime.toFixed(2)}ms` : ''})`);
|
|
96
|
+
return results;
|
|
97
|
+
}
|
|
98
|
+
catch (error) {
|
|
99
|
+
throw new Error(`Search pipeline failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Step 1: Process and embed the query
|
|
104
|
+
* Uses injected embedding function without knowledge of specific models
|
|
105
|
+
*/
|
|
106
|
+
async embedQuery(query, contentType) {
|
|
107
|
+
if (!this.embedQueryFn) {
|
|
108
|
+
throw new Error('No embedding function provided. Set embedding function before executing pipeline.');
|
|
109
|
+
}
|
|
110
|
+
try {
|
|
111
|
+
return await this.embedQueryFn(query, contentType);
|
|
112
|
+
}
|
|
113
|
+
catch (error) {
|
|
114
|
+
throw new Error(`Query embedding failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Step 2: Perform vector search
|
|
119
|
+
* Uses index manager without knowledge of specific embedding models
|
|
120
|
+
*/
|
|
121
|
+
async vectorSearch(queryVector, topK) {
|
|
122
|
+
if (!this.indexManager) {
|
|
123
|
+
throw new Error('Index manager not set. Set index manager before executing pipeline.');
|
|
124
|
+
}
|
|
125
|
+
try {
|
|
126
|
+
return this.indexManager.search(queryVector, topK);
|
|
127
|
+
}
|
|
128
|
+
catch (error) {
|
|
129
|
+
if (error instanceof Error && error.message.includes('No embedding ID found for hash')) {
|
|
130
|
+
console.warn(`Hash mapping issue detected: ${error.message}`);
|
|
131
|
+
console.warn('This may indicate index/database synchronization issues. Consider running: raglite rebuild');
|
|
132
|
+
return { embeddingIds: [], distances: [] };
|
|
133
|
+
}
|
|
134
|
+
throw new Error(`Vector search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Step 3: Retrieve metadata from database
|
|
139
|
+
* Uses database connection without knowledge of specific data formats
|
|
140
|
+
*/
|
|
141
|
+
async retrieveMetadata(embeddingIds) {
|
|
142
|
+
if (!this.dbConnection) {
|
|
143
|
+
throw new Error('Database connection not set. Set database connection before executing pipeline.');
|
|
144
|
+
}
|
|
145
|
+
try {
|
|
146
|
+
const { getChunksByEmbeddingIds } = await import('./db.js');
|
|
147
|
+
return await getChunksByEmbeddingIds(this.dbConnection, embeddingIds);
|
|
148
|
+
}
|
|
149
|
+
catch (error) {
|
|
150
|
+
throw new Error(`Metadata retrieval failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Step 4: Format initial results
|
|
155
|
+
* Formats results in core format without knowledge of specific content types
|
|
156
|
+
*/
|
|
157
|
+
formatResults(chunks, distances, embeddingIds) {
|
|
158
|
+
const results = [];
|
|
159
|
+
// Create a map for quick chunk lookup by embedding_id
|
|
160
|
+
const chunkMap = new Map();
|
|
161
|
+
chunks.forEach(chunk => {
|
|
162
|
+
chunkMap.set(chunk.embedding_id, chunk);
|
|
163
|
+
});
|
|
164
|
+
// Build results in the order of search results
|
|
165
|
+
for (let i = 0; i < embeddingIds.length; i++) {
|
|
166
|
+
const embeddingId = embeddingIds[i];
|
|
167
|
+
const chunk = chunkMap.get(embeddingId);
|
|
168
|
+
if (chunk) {
|
|
169
|
+
// Convert cosine distance to similarity score (1 - distance)
|
|
170
|
+
const score = Math.max(0, 1 - distances[i]);
|
|
171
|
+
results.push({
|
|
172
|
+
content: chunk.text,
|
|
173
|
+
score: score,
|
|
174
|
+
contentType: chunk.content_type || this.defaultContentType,
|
|
175
|
+
document: {
|
|
176
|
+
id: chunk.document_id,
|
|
177
|
+
source: chunk.document_source,
|
|
178
|
+
title: chunk.document_title,
|
|
179
|
+
contentType: chunk.document_content_type || this.defaultContentType
|
|
180
|
+
},
|
|
181
|
+
metadata: chunk.metadata ? this.parseMetadata(chunk.metadata) : undefined
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
return results;
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Step 5: Optional reranking
|
|
189
|
+
* Uses injected reranking function without knowledge of specific models
|
|
190
|
+
*/
|
|
191
|
+
async rerankResults(query, results, contentType) {
|
|
192
|
+
if (!this.rerankResultsFn) {
|
|
193
|
+
return results; // No reranking function available
|
|
194
|
+
}
|
|
195
|
+
try {
|
|
196
|
+
return await this.rerankResultsFn(query, results, contentType);
|
|
197
|
+
}
|
|
198
|
+
catch (error) {
|
|
199
|
+
console.warn(`Reranking failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
200
|
+
return results; // Return original results on reranking failure
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Validate that all required dependencies are set
|
|
205
|
+
*/
|
|
206
|
+
validateDependencies() {
|
|
207
|
+
const missing = [];
|
|
208
|
+
if (!this.embedQueryFn) {
|
|
209
|
+
missing.push('embedding function');
|
|
210
|
+
}
|
|
211
|
+
if (!this.indexManager) {
|
|
212
|
+
missing.push('index manager');
|
|
213
|
+
}
|
|
214
|
+
if (!this.dbConnection) {
|
|
215
|
+
missing.push('database connection');
|
|
216
|
+
}
|
|
217
|
+
if (missing.length > 0) {
|
|
218
|
+
throw new Error(`Missing required dependencies: ${missing.join(', ')}. Set all dependencies before executing search pipeline.`);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Parse metadata safely
|
|
223
|
+
*/
|
|
224
|
+
parseMetadata(metadata) {
|
|
225
|
+
try {
|
|
226
|
+
return JSON.parse(metadata);
|
|
227
|
+
}
|
|
228
|
+
catch (error) {
|
|
229
|
+
console.warn(`Failed to parse metadata: ${metadata}`);
|
|
230
|
+
return undefined;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Check if the pipeline is ready to execute
|
|
235
|
+
*/
|
|
236
|
+
isReady() {
|
|
237
|
+
return !!(this.embedQueryFn && this.indexManager && this.dbConnection);
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Get pipeline status information
|
|
241
|
+
*/
|
|
242
|
+
getStatus() {
|
|
243
|
+
return {
|
|
244
|
+
hasEmbedFunction: this.embedQueryFn !== null,
|
|
245
|
+
hasRerankFunction: this.rerankResultsFn !== null,
|
|
246
|
+
hasIndexManager: this.indexManager !== null,
|
|
247
|
+
hasDatabaseConnection: this.dbConnection !== null,
|
|
248
|
+
isReady: this.isReady()
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Reset all dependencies (useful for testing or reconfiguration)
|
|
253
|
+
*/
|
|
254
|
+
reset() {
|
|
255
|
+
this.embedQueryFn = null;
|
|
256
|
+
this.rerankResultsFn = null;
|
|
257
|
+
this.indexManager = null;
|
|
258
|
+
this.dbConnection = null;
|
|
259
|
+
this.defaultContentType = 'text';
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
/**
|
|
263
|
+
* Factory for creating search pipeline coordinators
|
|
264
|
+
*/
|
|
265
|
+
export class SearchPipelineFactory {
|
|
266
|
+
/**
|
|
267
|
+
* Create a search pipeline coordinator with all dependencies
|
|
268
|
+
*/
|
|
269
|
+
static create(embedFn, indexManager, dbConnection, rerankFn, defaultContentType = 'text') {
|
|
270
|
+
const coordinator = new SearchPipelineCoordinator();
|
|
271
|
+
coordinator.setEmbedFunction(embedFn);
|
|
272
|
+
coordinator.setIndexManager(indexManager);
|
|
273
|
+
coordinator.setDatabaseConnection(dbConnection);
|
|
274
|
+
coordinator.setDefaultContentType(defaultContentType);
|
|
275
|
+
if (rerankFn) {
|
|
276
|
+
coordinator.setRerankFunction(rerankFn);
|
|
277
|
+
}
|
|
278
|
+
return coordinator;
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* Create an empty coordinator for manual configuration
|
|
282
|
+
*/
|
|
283
|
+
static createEmpty() {
|
|
284
|
+
return new SearchPipelineCoordinator();
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
//# sourceMappingURL=search-pipeline.js.map
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*/
|
|
5
|
+
import { IndexManager } from '../index-manager.js';
|
|
6
|
+
import { DatabaseConnection } from './db.js';
|
|
7
|
+
import type { SearchResult, SearchOptions } from './types.js';
|
|
8
|
+
import type { EmbedFunction, RerankFunction } from './interfaces.js';
|
|
9
|
+
/**
|
|
10
|
+
* Search engine that provides semantic search capabilities
|
|
11
|
+
* Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
|
|
12
|
+
* Uses explicit dependency injection for clean architecture
|
|
13
|
+
*/
|
|
14
|
+
export declare class SearchEngine {
|
|
15
|
+
private embedFn;
|
|
16
|
+
private indexManager;
|
|
17
|
+
private db;
|
|
18
|
+
private rerankFn?;
|
|
19
|
+
private contentResolver?;
|
|
20
|
+
/**
|
|
21
|
+
* Creates a new SearchEngine with explicit dependency injection
|
|
22
|
+
*
|
|
23
|
+
* DEPENDENCY INJECTION PATTERN:
|
|
24
|
+
* This constructor requires all dependencies to be explicitly provided, enabling:
|
|
25
|
+
* - Clean separation between core logic and implementation-specific components
|
|
26
|
+
* - Support for different embedding models (text-only, multimodal, custom)
|
|
27
|
+
* - Testability through mock injection
|
|
28
|
+
* - Future extensibility without core changes
|
|
29
|
+
*
|
|
30
|
+
* @param embedFn - Function to embed queries into vectors
|
|
31
|
+
* - Signature: (query: string, contentType?: string) => Promise<EmbeddingResult>
|
|
32
|
+
* - Examples:
|
|
33
|
+
* - Text: const embedFn = (query) => textEmbedder.embedSingle(query)
|
|
34
|
+
* - Multimodal: const embedFn = (query, type) => type === 'image' ? clipEmbedder.embedImage(query) : clipEmbedder.embedText(query)
|
|
35
|
+
* - Custom: const embedFn = (query) => customModel.embed(query)
|
|
36
|
+
*
|
|
37
|
+
* @param indexManager - Vector index manager for similarity search
|
|
38
|
+
* - Handles vector storage and retrieval operations
|
|
39
|
+
* - Works with any embedding dimensions (384, 512, 768, etc.)
|
|
40
|
+
* - Example: new IndexManager('./index.bin')
|
|
41
|
+
*
|
|
42
|
+
* @param db - Database connection for metadata retrieval
|
|
43
|
+
* - Provides access to document and chunk metadata
|
|
44
|
+
* - Supports different content types through metadata fields
|
|
45
|
+
* - Example: await openDatabase('./db.sqlite')
|
|
46
|
+
*
|
|
47
|
+
* @param rerankFn - Optional function to rerank search results
|
|
48
|
+
* - Signature: (query: string, results: SearchResult[], contentType?: string) => Promise<SearchResult[]>
|
|
49
|
+
* - Examples:
|
|
50
|
+
* - Text: const rerankFn = (query, results) => textReranker.rerank(query, results)
|
|
51
|
+
* - Custom: const rerankFn = (query, results) => customReranker.rerank(query, results)
|
|
52
|
+
* - Disabled: undefined (no reranking)
|
|
53
|
+
*
|
|
54
|
+
* USAGE EXAMPLES:
|
|
55
|
+
* ```typescript
|
|
56
|
+
* // Text-only search engine
|
|
57
|
+
* const textEmbedFn = createTextEmbedFunction();
|
|
58
|
+
* const textRerankFn = createTextRerankFunction();
|
|
59
|
+
* const indexManager = new IndexManager('./index.bin');
|
|
60
|
+
* const db = await openDatabase('./db.sqlite');
|
|
61
|
+
* const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
|
|
62
|
+
*
|
|
63
|
+
* // Search engine without reranking
|
|
64
|
+
* const search = new SearchEngine(textEmbedFn, indexManager, db);
|
|
65
|
+
*
|
|
66
|
+
* // Custom embedding implementation
|
|
67
|
+
* const customEmbedFn = async (query) => ({
|
|
68
|
+
* embedding_id: generateId(),
|
|
69
|
+
* vector: await myCustomModel.embed(query)
|
|
70
|
+
* });
|
|
71
|
+
* const search = new SearchEngine(customEmbedFn, indexManager, db);
|
|
72
|
+
* ```
|
|
73
|
+
*/
|
|
74
|
+
constructor(embedFn: EmbedFunction, indexManager: IndexManager, db: DatabaseConnection, rerankFn?: RerankFunction | undefined, contentResolver?: import('./content-resolver.js').ContentResolver);
|
|
75
|
+
/**
|
|
76
|
+
* Perform semantic search on the indexed documents
|
|
77
|
+
* Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
|
|
78
|
+
* @param query - Search query string
|
|
79
|
+
* @param options - Search options including top_k and rerank settings
|
|
80
|
+
* @returns Promise resolving to array of search results
|
|
81
|
+
*/
|
|
82
|
+
search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
|
|
83
|
+
/**
|
|
84
|
+
* Perform semantic search using a pre-computed embedding vector
|
|
85
|
+
* Useful for image-based search or when embedding is computed externally
|
|
86
|
+
* @param queryVector - Pre-computed query embedding vector
|
|
87
|
+
* @param options - Search options including top_k and rerank settings
|
|
88
|
+
* @param originalQuery - Optional original query for reranking (text or image path)
|
|
89
|
+
* @param embeddingTime - Optional embedding time for logging
|
|
90
|
+
* @returns Promise resolving to array of search results
|
|
91
|
+
*/
|
|
92
|
+
searchWithVector(queryVector: Float32Array, options?: SearchOptions, originalQuery?: string, embeddingTime?: number): Promise<SearchResult[]>;
|
|
93
|
+
/**
|
|
94
|
+
* Format search results with proper structure
|
|
95
|
+
* @param chunks - Database chunks with metadata
|
|
96
|
+
* @param distances - Similarity distances from vector search
|
|
97
|
+
* @param embeddingIds - Embedding IDs in search result order
|
|
98
|
+
* @returns Formatted search results
|
|
99
|
+
*/
|
|
100
|
+
private formatSearchResults;
|
|
101
|
+
/**
|
|
102
|
+
* Get search engine statistics
|
|
103
|
+
* @returns Object with current search engine stats
|
|
104
|
+
*/
|
|
105
|
+
getStats(): Promise<{
|
|
106
|
+
totalChunks: number;
|
|
107
|
+
indexSize: number;
|
|
108
|
+
rerankingEnabled: boolean;
|
|
109
|
+
}>;
|
|
110
|
+
/**
|
|
111
|
+
* Retrieve content by ID in the specified format
|
|
112
|
+
* @param contentId - Content ID to retrieve
|
|
113
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
114
|
+
* @returns Promise that resolves to content in requested format
|
|
115
|
+
*/
|
|
116
|
+
getContent(contentId: string, format?: 'file' | 'base64'): Promise<string>;
|
|
117
|
+
/**
|
|
118
|
+
* Retrieve multiple content items efficiently in batch
|
|
119
|
+
* @param contentIds - Array of content IDs to retrieve
|
|
120
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
121
|
+
* @returns Promise that resolves to array of content in requested format
|
|
122
|
+
*/
|
|
123
|
+
getContentBatch(contentIds: string[], format?: 'file' | 'base64'): Promise<string[]>;
|
|
124
|
+
/**
|
|
125
|
+
* Retrieve content metadata for result enhancement
|
|
126
|
+
* @param contentId - Content ID to get metadata for
|
|
127
|
+
* @returns Promise that resolves to content metadata
|
|
128
|
+
*/
|
|
129
|
+
getContentMetadata(contentId: string): Promise<import('./content-resolver.js').ContentMetadata>;
|
|
130
|
+
/**
|
|
131
|
+
* Verify that content exists and is accessible
|
|
132
|
+
* @param contentId - Content ID to verify
|
|
133
|
+
* @returns Promise that resolves to true if content exists, false otherwise
|
|
134
|
+
*/
|
|
135
|
+
verifyContentExists(contentId: string): Promise<boolean>;
|
|
136
|
+
/**
|
|
137
|
+
* Clean up resources - explicit cleanup method
|
|
138
|
+
*/
|
|
139
|
+
cleanup(): Promise<void>;
|
|
140
|
+
}
|
|
141
|
+
//# sourceMappingURL=search.d.ts.map
|