rag-lite-ts 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{cli → cjs/cli}/indexer.js +1 -1
- package/dist/{cli → cjs/cli}/search.js +5 -10
- package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
- package/dist/cjs/core/binary-index-format.js +291 -0
- package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
- package/dist/{core → cjs/core}/ingestion.js +76 -9
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
- package/dist/{core → cjs/core}/search.js +2 -1
- package/dist/{core → cjs/core}/types.d.ts +1 -1
- package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
- package/dist/{core → cjs/core}/vector-index.js +10 -2
- package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
- package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
- package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
- package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +471 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +529 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +291 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +213 -0
- package/dist/esm/core/db.js +895 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +901 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index.d.ts +72 -0
- package/dist/esm/core/vector-index.js +333 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +477 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +344 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +116 -0
- package/dist/esm/index-manager.js +598 -0
- package/dist/esm/index.d.ts +75 -0
- package/dist/esm/index.js +110 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +30 -12
- package/dist/core/binary-index-format.js +0 -122
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{cli.js → cjs/cli.js} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/db.d.ts +0 -0
- /package/dist/{core → cjs/core}/db.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
- /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
- /package/dist/{index.js → cjs/index.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,598 @@
|
|
|
1
|
+
import { VectorIndex } from './core/vector-index.js';
|
|
2
|
+
import { BinaryIndexFormat } from './core/binary-index-format.js';
|
|
3
|
+
import { openDatabase, getSystemInfo, setSystemInfo } from './core/db.js';
|
|
4
|
+
import { config, getModelDefaults } from './core/config.js';
|
|
5
|
+
export class IndexManager {
|
|
6
|
+
modelName;
|
|
7
|
+
vectorIndex;
|
|
8
|
+
textIndex;
|
|
9
|
+
imageIndex;
|
|
10
|
+
db = null;
|
|
11
|
+
indexPath;
|
|
12
|
+
dbPath;
|
|
13
|
+
isInitialized = false;
|
|
14
|
+
hashToEmbeddingId = new Map();
|
|
15
|
+
embeddingIdToHash = new Map();
|
|
16
|
+
groupedEmbeddings;
|
|
17
|
+
vectorIndexOptions;
|
|
18
|
+
constructor(indexPath, dbPath, dimensions, modelName) {
|
|
19
|
+
this.modelName = modelName;
|
|
20
|
+
this.indexPath = indexPath;
|
|
21
|
+
this.dbPath = dbPath;
|
|
22
|
+
// Store options for creating specialized indexes
|
|
23
|
+
this.vectorIndexOptions = {
|
|
24
|
+
dimensions: dimensions,
|
|
25
|
+
maxElements: 100000, // Start with 100k capacity
|
|
26
|
+
efConstruction: 200,
|
|
27
|
+
M: 16
|
|
28
|
+
};
|
|
29
|
+
// Initialize with provided dimensions from config
|
|
30
|
+
this.vectorIndex = new VectorIndex(indexPath, this.vectorIndexOptions);
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Initialize the index manager and load existing index if available
|
|
34
|
+
* @param skipModelCheck - Skip model compatibility check (used for rebuilds)
|
|
35
|
+
* @param forceRecreate - Force recreation of index (used for model changes)
|
|
36
|
+
*/
|
|
37
|
+
async initialize(skipModelCheck = false, forceRecreate = false) {
|
|
38
|
+
if (this.isInitialized) {
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
try {
|
|
42
|
+
// Open database connection
|
|
43
|
+
this.db = await openDatabase(this.dbPath);
|
|
44
|
+
// Check model compatibility BEFORE trying to load the vector index
|
|
45
|
+
// This prevents WebAssembly exceptions when dimensions don't match
|
|
46
|
+
if (!skipModelCheck && !forceRecreate) {
|
|
47
|
+
await this.checkModelCompatibility();
|
|
48
|
+
}
|
|
49
|
+
if (forceRecreate || !this.vectorIndex.indexExists()) {
|
|
50
|
+
console.log('Creating new vector index...');
|
|
51
|
+
await this.vectorIndex.initialize();
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
// Only try to load existing index if not forcing recreation
|
|
55
|
+
console.log('Loading existing vector index...');
|
|
56
|
+
await this.vectorIndex.loadIndex();
|
|
57
|
+
// Check if the loaded index has grouped data and create specialized indexes
|
|
58
|
+
await this.createSpecializedIndexes();
|
|
59
|
+
}
|
|
60
|
+
// Always populate the embedding ID mapping from existing database entries
|
|
61
|
+
// This is needed both for new and existing indexes
|
|
62
|
+
const existingChunks = await this.db.all('SELECT embedding_id FROM chunks ORDER BY id');
|
|
63
|
+
for (const chunk of existingChunks) {
|
|
64
|
+
this.hashEmbeddingId(chunk.embedding_id); // This will populate the mapping
|
|
65
|
+
}
|
|
66
|
+
this.isInitialized = true;
|
|
67
|
+
const vectorCount = this.vectorIndex.getCurrentCount();
|
|
68
|
+
console.log(`Index manager initialized with ${vectorCount} vectors${this.textIndex && this.imageIndex ? ' (multi-graph mode)' : ''}`);
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
throw new Error(`Failed to initialize index manager: ${error}`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Check model compatibility between stored and current configuration
|
|
76
|
+
* Requirements: 2.1, 2.2, 2.4, 5.1, 5.2, 5.3, 5.4
|
|
77
|
+
*/
|
|
78
|
+
async checkModelCompatibility() {
|
|
79
|
+
if (!this.db) {
|
|
80
|
+
throw new Error('Database not initialized');
|
|
81
|
+
}
|
|
82
|
+
try {
|
|
83
|
+
// Get stored model information
|
|
84
|
+
const systemInfo = await getSystemInfo(this.db);
|
|
85
|
+
const currentModel = this.modelName || config.embedding_model;
|
|
86
|
+
const currentDefaults = getModelDefaults(currentModel);
|
|
87
|
+
if (systemInfo && systemInfo.modelName && systemInfo.modelDimensions) {
|
|
88
|
+
// Check if models match
|
|
89
|
+
if (systemInfo.modelName !== currentModel) {
|
|
90
|
+
throw new Error(`Model mismatch detected!\n` +
|
|
91
|
+
`Current model: ${currentModel} (${currentDefaults.dimensions} dimensions)\n` +
|
|
92
|
+
`Index model: ${systemInfo.modelName} (${systemInfo.modelDimensions} dimensions)\n` +
|
|
93
|
+
`\n` +
|
|
94
|
+
`The embedding model has changed since the index was created.\n` +
|
|
95
|
+
`This requires a full index rebuild to maintain consistency.\n` +
|
|
96
|
+
`\n` +
|
|
97
|
+
`To fix this issue:\n` +
|
|
98
|
+
`1. Run: npm run rebuild\n` +
|
|
99
|
+
`2. Or run: node dist/cli.js rebuild\n` +
|
|
100
|
+
`\n` +
|
|
101
|
+
`This will regenerate all embeddings with the new model.`);
|
|
102
|
+
}
|
|
103
|
+
// Check if dimensions match (additional safety check)
|
|
104
|
+
if (systemInfo.modelDimensions !== currentDefaults.dimensions) {
|
|
105
|
+
throw new Error(`Model dimension mismatch detected!\n` +
|
|
106
|
+
`Current model dimensions: ${currentDefaults.dimensions}\n` +
|
|
107
|
+
`Index model dimensions: ${systemInfo.modelDimensions}\n` +
|
|
108
|
+
`\n` +
|
|
109
|
+
`This indicates a configuration inconsistency.\n` +
|
|
110
|
+
`Please run: npm run rebuild`);
|
|
111
|
+
}
|
|
112
|
+
console.log(`Model compatibility verified: ${currentModel} (${currentDefaults.dimensions} dimensions)`);
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
// First run - store the model info
|
|
116
|
+
console.log(`No model info stored yet - storing current model info: ${currentModel}`);
|
|
117
|
+
await setSystemInfo(this.db, {
|
|
118
|
+
modelName: currentModel,
|
|
119
|
+
modelDimensions: currentDefaults.dimensions
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
catch (error) {
|
|
124
|
+
if (error instanceof Error) {
|
|
125
|
+
throw error; // Re-throw our formatted errors
|
|
126
|
+
}
|
|
127
|
+
throw new Error(`Failed to check model compatibility: ${error}`);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Add vectors to the index with corresponding metadata (incremental addition)
|
|
132
|
+
* Requirements: 5.3 - When new documents are added THEN system SHALL append new chunks and vectors without rebuilding existing index
|
|
133
|
+
*/
|
|
134
|
+
async addVectors(embeddings) {
|
|
135
|
+
if (!this.isInitialized) {
|
|
136
|
+
throw new Error('Index manager not initialized');
|
|
137
|
+
}
|
|
138
|
+
if (embeddings.length === 0) {
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
try {
|
|
142
|
+
// Convert embedding IDs to numeric IDs for hnswlib
|
|
143
|
+
const vectors = embeddings.map((embedding) => ({
|
|
144
|
+
id: this.hashEmbeddingId(embedding.embedding_id),
|
|
145
|
+
vector: embedding.vector
|
|
146
|
+
}));
|
|
147
|
+
// Check if we need to resize the index before adding
|
|
148
|
+
const currentCount = this.vectorIndex.getCurrentCount();
|
|
149
|
+
const newCount = currentCount + vectors.length;
|
|
150
|
+
const currentCapacity = 100000; // This should match the initial capacity
|
|
151
|
+
if (newCount > currentCapacity * 0.9) {
|
|
152
|
+
const newCapacity = Math.ceil(newCount * 1.5);
|
|
153
|
+
console.log(`Resizing index from ${currentCapacity} to ${newCapacity} to accommodate new vectors`);
|
|
154
|
+
this.vectorIndex.resizeIndex(newCapacity);
|
|
155
|
+
}
|
|
156
|
+
// Add vectors incrementally (this is the key requirement - no rebuild needed)
|
|
157
|
+
this.vectorIndex.addVectors(vectors);
|
|
158
|
+
console.log(`Incrementally added ${embeddings.length} vectors to index (total: ${this.vectorIndex.getCurrentCount()})`);
|
|
159
|
+
// Save the updated index
|
|
160
|
+
await this.saveIndex();
|
|
161
|
+
}
|
|
162
|
+
catch (error) {
|
|
163
|
+
throw new Error(`Failed to add vectors to index: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Add grouped embeddings by content type (for new grouped format)
|
|
168
|
+
*/
|
|
169
|
+
async addGroupedEmbeddings(textEmbeddings, imageEmbeddings) {
|
|
170
|
+
if (!this.isInitialized) {
|
|
171
|
+
throw new Error('Index manager not initialized');
|
|
172
|
+
}
|
|
173
|
+
console.log(`addGroupedEmbeddings: text=${textEmbeddings.length}, image=${imageEmbeddings.length}`);
|
|
174
|
+
const allEmbeddings = [...textEmbeddings, ...imageEmbeddings];
|
|
175
|
+
if (allEmbeddings.length === 0) {
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
try {
|
|
179
|
+
// Store grouped information for later saving
|
|
180
|
+
this.groupedEmbeddings = { text: textEmbeddings, image: imageEmbeddings };
|
|
181
|
+
console.log('addGroupedEmbeddings: stored grouped embeddings');
|
|
182
|
+
// Add all embeddings to the index (maintains current behavior)
|
|
183
|
+
await this.addVectors(allEmbeddings);
|
|
184
|
+
console.log('addGroupedEmbeddings: addVectors completed');
|
|
185
|
+
// The saveIndex method will now use grouped format if groupedEmbeddings exists
|
|
186
|
+
}
|
|
187
|
+
catch (error) {
|
|
188
|
+
throw new Error(`Failed to add grouped embeddings to index: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Rebuild the entire index from scratch
|
|
193
|
+
* Requirements: 5.2, 5.4 - Create full index rebuild functionality for model changes or document deletions
|
|
194
|
+
*/
|
|
195
|
+
async rebuildIndex(newModelVersion) {
|
|
196
|
+
if (!this.db) {
|
|
197
|
+
throw new Error('Database not initialized');
|
|
198
|
+
}
|
|
199
|
+
console.log('Starting full index rebuild...');
|
|
200
|
+
try {
|
|
201
|
+
// Initialize new empty index (this will overwrite existing index)
|
|
202
|
+
await this.vectorIndex.initialize();
|
|
203
|
+
// Get all chunk embedding IDs from database (we'll need to regenerate embeddings)
|
|
204
|
+
const chunkData = await this.getAllChunksFromDB();
|
|
205
|
+
if (chunkData.length === 0) {
|
|
206
|
+
console.log('No chunks found in database - index rebuild complete with 0 vectors');
|
|
207
|
+
// Update model version if provided
|
|
208
|
+
if (newModelVersion) {
|
|
209
|
+
await this.updateModelVersion(newModelVersion);
|
|
210
|
+
}
|
|
211
|
+
await this.saveIndex();
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
console.log(`Found ${chunkData.length} chunks in database that need re-embedding`);
|
|
215
|
+
// Note: In a complete implementation, we would need to:
|
|
216
|
+
// 1. Re-generate embeddings for all chunks using the new model
|
|
217
|
+
// 2. Add the new vectors to the index
|
|
218
|
+
// For now, we'll create a placeholder implementation that shows the structure
|
|
219
|
+
console.warn('WARNING: Full rebuild requires re-generating embeddings for all chunks.');
|
|
220
|
+
console.warn('This implementation requires integration with the EmbeddingEngine.');
|
|
221
|
+
console.warn('The index has been reset but vectors need to be regenerated.');
|
|
222
|
+
// Check if we need to resize index based on chunk count
|
|
223
|
+
const currentCapacity = 100000; // Default capacity
|
|
224
|
+
if (chunkData.length > currentCapacity * 0.8) {
|
|
225
|
+
const newCapacity = Math.ceil(chunkData.length * 1.5);
|
|
226
|
+
this.vectorIndex.resizeIndex(newCapacity);
|
|
227
|
+
console.log(`Resized index capacity to ${newCapacity} for ${chunkData.length} chunks`);
|
|
228
|
+
}
|
|
229
|
+
// Update model version if provided
|
|
230
|
+
if (newModelVersion) {
|
|
231
|
+
await this.updateModelVersion(newModelVersion);
|
|
232
|
+
}
|
|
233
|
+
// Save the (empty) rebuilt index structure
|
|
234
|
+
await this.saveIndex();
|
|
235
|
+
console.log(`Index rebuild structure complete. ${chunkData.length} chunks need re-embedding.`);
|
|
236
|
+
}
|
|
237
|
+
catch (error) {
|
|
238
|
+
throw new Error(`Failed to rebuild index: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Trigger a full rebuild when documents are modified or deleted
|
|
243
|
+
* Requirements: 5.4 - When documents are modified or deleted THEN system SHALL trigger full index rebuild
|
|
244
|
+
*/
|
|
245
|
+
async triggerRebuildForDocumentChanges(reason) {
|
|
246
|
+
console.log(`Triggering index rebuild due to: ${reason}`);
|
|
247
|
+
await this.rebuildIndex();
|
|
248
|
+
}
|
|
249
|
+
/**
|
|
250
|
+
* Complete rebuild workflow with embedding regeneration
|
|
251
|
+
* This method should be called by higher-level components that have access to the EmbeddingEngine
|
|
252
|
+
* Requirements: 5.2, 5.4 - Full index rebuild functionality
|
|
253
|
+
*/
|
|
254
|
+
async rebuildWithEmbeddings(embeddingEngine) {
|
|
255
|
+
if (!this.db) {
|
|
256
|
+
throw new Error('Database not initialized');
|
|
257
|
+
}
|
|
258
|
+
console.log('Starting complete index rebuild with embedding regeneration...');
|
|
259
|
+
try {
|
|
260
|
+
// Get all chunks that need re-embedding
|
|
261
|
+
const chunkData = await this.getAllChunksFromDB();
|
|
262
|
+
if (chunkData.length === 0) {
|
|
263
|
+
console.log('No chunks found - initializing empty index');
|
|
264
|
+
await this.vectorIndex.initialize();
|
|
265
|
+
await this.updateModelVersion(embeddingEngine.getModelVersion());
|
|
266
|
+
// Store model info for the new model
|
|
267
|
+
const currentModel = this.modelName || config.embedding_model;
|
|
268
|
+
const currentDefaults = getModelDefaults(currentModel);
|
|
269
|
+
await setSystemInfo(this.db, {
|
|
270
|
+
modelName: currentModel,
|
|
271
|
+
modelDimensions: currentDefaults.dimensions
|
|
272
|
+
});
|
|
273
|
+
await this.saveIndex();
|
|
274
|
+
return;
|
|
275
|
+
}
|
|
276
|
+
// Initialize new empty index
|
|
277
|
+
await this.vectorIndex.initialize();
|
|
278
|
+
// Check if we need to resize index
|
|
279
|
+
const currentCapacity = 100000;
|
|
280
|
+
if (chunkData.length > currentCapacity * 0.8) {
|
|
281
|
+
const newCapacity = Math.ceil(chunkData.length * 1.5);
|
|
282
|
+
this.vectorIndex.resizeIndex(newCapacity);
|
|
283
|
+
console.log(`Resized index capacity to ${newCapacity}`);
|
|
284
|
+
}
|
|
285
|
+
// Re-generate embeddings for all chunks
|
|
286
|
+
console.log(`Re-generating embeddings for ${chunkData.length} chunks...`);
|
|
287
|
+
const texts = chunkData.map(chunk => chunk.text);
|
|
288
|
+
const newEmbeddings = await embeddingEngine.embedDocumentBatch(texts);
|
|
289
|
+
if (newEmbeddings.length === 0) {
|
|
290
|
+
throw new Error('Failed to generate any embeddings during rebuild');
|
|
291
|
+
}
|
|
292
|
+
// Add all vectors to the new index
|
|
293
|
+
const vectors = newEmbeddings.map((embedding) => ({
|
|
294
|
+
id: this.hashEmbeddingId(embedding.embedding_id),
|
|
295
|
+
vector: embedding.vector
|
|
296
|
+
}));
|
|
297
|
+
this.vectorIndex.addVectors(vectors);
|
|
298
|
+
console.log(`Added ${vectors.length} vectors to rebuilt index`);
|
|
299
|
+
// Update model version
|
|
300
|
+
await this.updateModelVersion(embeddingEngine.getModelVersion());
|
|
301
|
+
// Store model info for the new model
|
|
302
|
+
const currentModel = this.modelName || config.embedding_model;
|
|
303
|
+
const currentDefaults = getModelDefaults(currentModel);
|
|
304
|
+
await setSystemInfo(this.db, {
|
|
305
|
+
modelName: currentModel,
|
|
306
|
+
modelDimensions: currentDefaults.dimensions
|
|
307
|
+
});
|
|
308
|
+
// Save the rebuilt index
|
|
309
|
+
await this.saveIndex();
|
|
310
|
+
console.log(`Index rebuild complete: ${vectors.length} vectors with new model version`);
|
|
311
|
+
}
|
|
312
|
+
catch (error) {
|
|
313
|
+
throw new Error(`Failed to rebuild index with embeddings: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Check if the current model version matches stored version
|
|
318
|
+
* Requirements: 5.1, 5.2 - Compare current embedding model version with stored version
|
|
319
|
+
*/
|
|
320
|
+
async checkModelVersion(currentVersion) {
|
|
321
|
+
if (!this.db) {
|
|
322
|
+
throw new Error('Database not initialized');
|
|
323
|
+
}
|
|
324
|
+
try {
|
|
325
|
+
const systemInfo = await getSystemInfo(this.db);
|
|
326
|
+
const storedVersion = systemInfo?.modelVersion;
|
|
327
|
+
if (!storedVersion || storedVersion === "") {
|
|
328
|
+
// No version stored yet, this is first run - store current version
|
|
329
|
+
await setSystemInfo(this.db, { modelVersion: currentVersion });
|
|
330
|
+
console.log(`Stored initial model version: ${currentVersion}`);
|
|
331
|
+
return true;
|
|
332
|
+
}
|
|
333
|
+
const matches = storedVersion === currentVersion;
|
|
334
|
+
if (!matches) {
|
|
335
|
+
console.error(`Model version mismatch detected!`);
|
|
336
|
+
console.error(`Stored version: ${storedVersion}`);
|
|
337
|
+
console.error(`Current version: ${currentVersion}`);
|
|
338
|
+
console.error(`A full index rebuild is required before the system can continue.`);
|
|
339
|
+
}
|
|
340
|
+
return matches;
|
|
341
|
+
}
|
|
342
|
+
catch (error) {
|
|
343
|
+
throw new Error(`Failed to check model version: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Update the stored model version after successful rebuild
|
|
348
|
+
* Requirements: 5.5 - Save model name and hash in SQLite for version tracking
|
|
349
|
+
*/
|
|
350
|
+
async updateModelVersion(version) {
|
|
351
|
+
if (!this.db) {
|
|
352
|
+
throw new Error('Database not initialized');
|
|
353
|
+
}
|
|
354
|
+
try {
|
|
355
|
+
await setSystemInfo(this.db, { modelVersion: version });
|
|
356
|
+
console.log(`Updated model version to: ${version}`);
|
|
357
|
+
}
|
|
358
|
+
catch (error) {
|
|
359
|
+
throw new Error(`Failed to update model version: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Validate model version and exit if mismatch detected
|
|
364
|
+
* Requirements: 5.2 - System SHALL exit with error message until full index rebuild is completed
|
|
365
|
+
*/
|
|
366
|
+
async validateModelVersionOrExit(currentVersion) {
|
|
367
|
+
const isValid = await this.checkModelVersion(currentVersion);
|
|
368
|
+
if (!isValid) {
|
|
369
|
+
console.error('\n=== MODEL VERSION MISMATCH ===');
|
|
370
|
+
console.error('The embedding model version has changed since the last index build.');
|
|
371
|
+
console.error('This requires a full index rebuild to maintain consistency.');
|
|
372
|
+
console.error('\nTo rebuild the index, run:');
|
|
373
|
+
console.error(' npm run rebuild-index');
|
|
374
|
+
console.error(' # or');
|
|
375
|
+
console.error(' node dist/cli.js rebuild');
|
|
376
|
+
console.error('\nThe system will now exit.');
|
|
377
|
+
process.exit(1);
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
/**
|
|
381
|
+
* Save the vector index to disk
|
|
382
|
+
*/
|
|
383
|
+
async saveIndex() {
|
|
384
|
+
if (!this.isInitialized) {
|
|
385
|
+
throw new Error('Index manager not initialized');
|
|
386
|
+
}
|
|
387
|
+
// If we have grouped embeddings, save in grouped format
|
|
388
|
+
if (this.groupedEmbeddings) {
|
|
389
|
+
console.log('IndexManager: Saving in grouped format');
|
|
390
|
+
await this.saveGroupedIndex(this.groupedEmbeddings.text, this.groupedEmbeddings.image);
|
|
391
|
+
// Clear grouped data after saving
|
|
392
|
+
this.groupedEmbeddings = undefined;
|
|
393
|
+
}
|
|
394
|
+
else {
|
|
395
|
+
console.log('IndexManager: Saving in standard format');
|
|
396
|
+
await this.vectorIndex.saveIndex();
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
/**
|
|
400
|
+
* Create specialized indexes for text and image content when grouped data is available
|
|
401
|
+
*/
|
|
402
|
+
async createSpecializedIndexes() {
|
|
403
|
+
try {
|
|
404
|
+
// Load the index data to check if it has grouped information
|
|
405
|
+
const indexData = await BinaryIndexFormat.load(this.indexPath);
|
|
406
|
+
if (indexData.hasContentTypeGroups && indexData.textVectors && indexData.imageVectors) {
|
|
407
|
+
// Only create specialized indexes if we have both text and image vectors
|
|
408
|
+
// In text-only mode, textVectors would be populated but imageVectors empty
|
|
409
|
+
// In multimodal mode, both would be populated
|
|
410
|
+
const hasTextVectors = indexData.textVectors.length > 0;
|
|
411
|
+
const hasImageVectors = indexData.imageVectors.length > 0;
|
|
412
|
+
if (hasTextVectors && hasImageVectors) {
|
|
413
|
+
console.log('Creating specialized indexes for content type filtering...');
|
|
414
|
+
// Create text-only index
|
|
415
|
+
this.textIndex = new VectorIndex(`${this.indexPath}.text`, this.vectorIndexOptions);
|
|
416
|
+
await this.textIndex.initialize();
|
|
417
|
+
this.textIndex.addVectors(indexData.textVectors);
|
|
418
|
+
console.log(`✓ Text index created with ${indexData.textVectors.length} vectors`);
|
|
419
|
+
// Create image-only index
|
|
420
|
+
this.imageIndex = new VectorIndex(`${this.indexPath}.image`, this.vectorIndexOptions);
|
|
421
|
+
await this.imageIndex.initialize();
|
|
422
|
+
this.imageIndex.addVectors(indexData.imageVectors);
|
|
423
|
+
console.log(`✓ Image index created with ${indexData.imageVectors.length} vectors`);
|
|
424
|
+
console.log('✓ Specialized indexes ready for content type filtering');
|
|
425
|
+
}
|
|
426
|
+
else if (hasTextVectors) {
|
|
427
|
+
console.log('Text-only index detected - using combined index for all searches');
|
|
428
|
+
// In text-only mode, we don't need specialized indexes
|
|
429
|
+
// The combined index (vectorIndex) already contains all text vectors
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
catch (error) {
|
|
434
|
+
console.warn('Failed to create specialized indexes, falling back to combined index:', error);
|
|
435
|
+
// Continue without specialized indexes - search will still work with combined index
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
/**
|
|
439
|
+
* Save index with content type grouping (for new grouped format)
|
|
440
|
+
*/
|
|
441
|
+
async saveGroupedIndex(textEmbeddings, imageEmbeddings) {
|
|
442
|
+
if (!this.isInitialized) {
|
|
443
|
+
throw new Error('Index manager not initialized');
|
|
444
|
+
}
|
|
445
|
+
console.log(`saveGroupedIndex: text=${textEmbeddings.length}, image=${imageEmbeddings.length}`);
|
|
446
|
+
// Group vectors by content type
|
|
447
|
+
const textVectors = textEmbeddings.map((embedding) => ({
|
|
448
|
+
id: this.hashEmbeddingId(embedding.embedding_id),
|
|
449
|
+
vector: embedding.vector
|
|
450
|
+
}));
|
|
451
|
+
const imageVectors = imageEmbeddings.map((embedding) => ({
|
|
452
|
+
id: this.hashEmbeddingId(embedding.embedding_id),
|
|
453
|
+
vector: embedding.vector
|
|
454
|
+
}));
|
|
455
|
+
// Get index parameters
|
|
456
|
+
const options = this.vectorIndex.getOptions();
|
|
457
|
+
const allVectors = [...textVectors, ...imageVectors];
|
|
458
|
+
console.log(`saveGroupedIndex: dimensions=${options.dimensions}, totalVectors=${allVectors.length}`);
|
|
459
|
+
const indexData = {
|
|
460
|
+
dimensions: options.dimensions,
|
|
461
|
+
maxElements: options.maxElements,
|
|
462
|
+
M: options.M || 16,
|
|
463
|
+
efConstruction: options.efConstruction || 200,
|
|
464
|
+
seed: options.seed || 100,
|
|
465
|
+
currentSize: textVectors.length + imageVectors.length,
|
|
466
|
+
vectors: allVectors, // Required for backward compatibility
|
|
467
|
+
hasContentTypeGroups: true,
|
|
468
|
+
textVectors,
|
|
469
|
+
imageVectors
|
|
470
|
+
};
|
|
471
|
+
console.log('saveGroupedIndex: Calling BinaryIndexFormat.saveGrouped');
|
|
472
|
+
// Save using grouped format
|
|
473
|
+
await BinaryIndexFormat.saveGrouped(this.indexPath, indexData);
|
|
474
|
+
console.log(`✓ Saved grouped index with ${textVectors.length} text and ${imageVectors.length} image vectors`);
|
|
475
|
+
}
|
|
476
|
+
/**
|
|
477
|
+
* Search for similar vectors
|
|
478
|
+
*/
|
|
479
|
+
search(queryVector, k = 5, contentType) {
|
|
480
|
+
if (!this.isInitialized) {
|
|
481
|
+
throw new Error('Index manager not initialized');
|
|
482
|
+
}
|
|
483
|
+
// Select the appropriate index based on content type
|
|
484
|
+
let targetIndex;
|
|
485
|
+
// If we have specialized indexes (multimodal mode), use them for filtering
|
|
486
|
+
if (this.textIndex && this.imageIndex) {
|
|
487
|
+
if (contentType === 'text') {
|
|
488
|
+
targetIndex = this.textIndex;
|
|
489
|
+
}
|
|
490
|
+
else if (contentType === 'image') {
|
|
491
|
+
targetIndex = this.imageIndex;
|
|
492
|
+
}
|
|
493
|
+
else {
|
|
494
|
+
// 'combined' or undefined
|
|
495
|
+
targetIndex = this.vectorIndex;
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
else {
|
|
499
|
+
// No specialized indexes (text-only mode) - ignore contentType and use combined index
|
|
500
|
+
targetIndex = this.vectorIndex;
|
|
501
|
+
}
|
|
502
|
+
const results = targetIndex.search(queryVector, k);
|
|
503
|
+
// Convert numeric IDs back to embedding IDs
|
|
504
|
+
const embeddingIds = results.neighbors.map(id => this.unhashEmbeddingId(id));
|
|
505
|
+
return {
|
|
506
|
+
embeddingIds,
|
|
507
|
+
distances: results.distances
|
|
508
|
+
};
|
|
509
|
+
}
|
|
510
|
+
/**
|
|
511
|
+
* Get index statistics
|
|
512
|
+
*/
|
|
513
|
+
async getStats() {
|
|
514
|
+
if (!this.db) {
|
|
515
|
+
throw new Error('Database not initialized');
|
|
516
|
+
}
|
|
517
|
+
const totalVectors = this.vectorIndex.getCurrentCount();
|
|
518
|
+
try {
|
|
519
|
+
const systemInfo = await getSystemInfo(this.db);
|
|
520
|
+
const modelVersion = systemInfo?.modelVersion || null;
|
|
521
|
+
return {
|
|
522
|
+
totalVectors,
|
|
523
|
+
modelVersion: modelVersion || 'unknown',
|
|
524
|
+
lastUpdated: new Date() // Could be enhanced to track actual last update time
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
catch (error) {
|
|
528
|
+
throw new Error(`Failed to get stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
/**
|
|
532
|
+
* Get all chunks from database for rebuild (returns chunk data, not embeddings)
|
|
533
|
+
* Note: Embeddings need to be regenerated during rebuild since we don't store vectors in DB
|
|
534
|
+
*/
|
|
535
|
+
async getAllChunksFromDB() {
|
|
536
|
+
if (!this.db) {
|
|
537
|
+
throw new Error('Database not initialized');
|
|
538
|
+
}
|
|
539
|
+
try {
|
|
540
|
+
const rows = await this.db.all('SELECT embedding_id, content as text, document_id FROM chunks ORDER BY id');
|
|
541
|
+
return rows.map(row => ({
|
|
542
|
+
embedding_id: row.embedding_id,
|
|
543
|
+
text: row.text,
|
|
544
|
+
document_id: row.document_id
|
|
545
|
+
}));
|
|
546
|
+
}
|
|
547
|
+
catch (error) {
|
|
548
|
+
throw new Error(`Failed to get chunks from DB: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
/**
|
|
552
|
+
* Convert embedding ID string to numeric ID for hnswlib with collision handling
|
|
553
|
+
*/
|
|
554
|
+
hashEmbeddingId(embeddingId) {
|
|
555
|
+
// Check if we already have a mapping for this embedding ID
|
|
556
|
+
if (this.embeddingIdToHash.has(embeddingId)) {
|
|
557
|
+
return this.embeddingIdToHash.get(embeddingId);
|
|
558
|
+
}
|
|
559
|
+
let hash = 0;
|
|
560
|
+
for (let i = 0; i < embeddingId.length; i++) {
|
|
561
|
+
const char = embeddingId.charCodeAt(i);
|
|
562
|
+
hash = ((hash << 5) - hash) + char;
|
|
563
|
+
hash = hash & hash; // Convert to 32-bit integer
|
|
564
|
+
}
|
|
565
|
+
hash = Math.abs(hash);
|
|
566
|
+
// Handle hash collisions by incrementing until we find an unused hash
|
|
567
|
+
let finalHash = hash;
|
|
568
|
+
while (this.hashToEmbeddingId.has(finalHash) && this.hashToEmbeddingId.get(finalHash) !== embeddingId) {
|
|
569
|
+
finalHash = (finalHash + 1) & 0x7FFFFFFF; // Keep it positive
|
|
570
|
+
}
|
|
571
|
+
// Store the bidirectional mapping
|
|
572
|
+
this.embeddingIdToHash.set(embeddingId, finalHash);
|
|
573
|
+
this.hashToEmbeddingId.set(finalHash, embeddingId);
|
|
574
|
+
return finalHash;
|
|
575
|
+
}
|
|
576
|
+
/**
|
|
577
|
+
* Convert numeric ID back to embedding ID using the maintained mapping
|
|
578
|
+
*/
|
|
579
|
+
unhashEmbeddingId(numericId) {
|
|
580
|
+
const embeddingId = this.hashToEmbeddingId.get(numericId);
|
|
581
|
+
if (!embeddingId) {
|
|
582
|
+
console.warn(`Warning: No embedding ID found for hash ${numericId}. This may indicate index/database synchronization issues.`);
|
|
583
|
+
console.warn('Consider running "raglite rebuild" to fix synchronization problems.');
|
|
584
|
+
throw new Error(`No embedding ID found for hash ${numericId}`);
|
|
585
|
+
}
|
|
586
|
+
return embeddingId;
|
|
587
|
+
}
|
|
588
|
+
/**
|
|
589
|
+
* Close database connection
|
|
590
|
+
*/
|
|
591
|
+
async close() {
|
|
592
|
+
if (this.db) {
|
|
593
|
+
await this.db.close();
|
|
594
|
+
this.db = null;
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
//# sourceMappingURL=index-manager.js.map
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* rag-lite-ts - Clean Architecture with Factory Pattern
|
|
3
|
+
*
|
|
4
|
+
* Quick Start (Recommended):
|
|
5
|
+
* ```typescript
|
|
6
|
+
* import { SearchFactory, IngestionFactory } from 'rag-lite-ts';
|
|
7
|
+
*
|
|
8
|
+
* // Simple search - just works!
|
|
9
|
+
* const search = await SearchFactory.create('./index.bin', './db.sqlite');
|
|
10
|
+
* const results = await search.search('your query');
|
|
11
|
+
*
|
|
12
|
+
* // Simple ingestion - just works!
|
|
13
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
|
|
14
|
+
* await ingestion.ingestDirectory('./documents');
|
|
15
|
+
* ```
|
|
16
|
+
*
|
|
17
|
+
* With Configuration:
|
|
18
|
+
* ```typescript
|
|
19
|
+
* const search = await SearchFactory.create('./index.bin', './db.sqlite', {
|
|
20
|
+
* embeddingModel: 'Xenova/all-mpnet-base-v2',
|
|
21
|
+
* enableReranking: true
|
|
22
|
+
* });
|
|
23
|
+
* ```
|
|
24
|
+
*
|
|
25
|
+
* Complete RAG System:
|
|
26
|
+
* ```typescript
|
|
27
|
+
* import { RAGFactory } from 'rag-lite-ts';
|
|
28
|
+
*
|
|
29
|
+
* const { searchEngine, ingestionPipeline } = await RAGFactory.createBoth(
|
|
30
|
+
* './index.bin',
|
|
31
|
+
* './db.sqlite'
|
|
32
|
+
* );
|
|
33
|
+
* ```
|
|
34
|
+
*
|
|
35
|
+
* Advanced Usage (Direct Dependency Injection):
|
|
36
|
+
* ```typescript
|
|
37
|
+
* import { CoreSearchEngine, createTextEmbedFunction } from 'rag-lite-ts';
|
|
38
|
+
*
|
|
39
|
+
* const embedFn = await createTextEmbedFunction();
|
|
40
|
+
* const search = new CoreSearchEngine(embedFn, indexManager, db);
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
export { IngestionFactory, SearchFactory } from './factories/index.js';
|
|
44
|
+
export type { IngestionFactoryOptions } from './factories/index.js';
|
|
45
|
+
export { SearchEngine as CoreSearchEngine } from './core/search.js';
|
|
46
|
+
export { IngestionPipeline as CoreIngestionPipeline } from './core/ingestion.js';
|
|
47
|
+
export { SearchEngine } from './search.js';
|
|
48
|
+
export { IngestionPipeline } from './ingestion.js';
|
|
49
|
+
export { LazyEmbedderLoader, LazyRerankerLoader, LazyMultimodalLoader, LazyDependencyManager } from './core/lazy-dependency-loader.js';
|
|
50
|
+
export type { EmbedFunction, RerankFunction, EmbeddingQueryInterface, RerankingInterface, SearchEngineConfig, ContentTypeStrategy, ModelAgnosticInterface, ExtendedEmbeddingInterface, ExtendedRerankingInterface, SearchPipelineInterface, SearchDependencyFactory } from './core/interfaces.js';
|
|
51
|
+
export { InterfaceValidator } from './core/interfaces.js';
|
|
52
|
+
export { validateModeModelCompatibility, validateModeModelCompatibilityOrThrow, getRecommendedModelsForMode, isModeModelCompatible, getCompatibleModelsForMode, type ModeModelValidationResult } from './core/mode-model-validator.js';
|
|
53
|
+
export { createMissingFileError, createInvalidPathError, createModelLoadingError, createDimensionMismatchError, createModeMismatchError, createInvalidContentError, createMissingDependencyError, createFactoryCreationError, enhanceError, createContextualError, type ActionableErrorConfig } from './core/actionable-error-messages.js';
|
|
54
|
+
export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createTextEmbedFunction } from './text/embedder.js';
|
|
55
|
+
export type { UniversalEmbedder } from './core/universal-embedder.js';
|
|
56
|
+
export { CLIPEmbedder } from './multimodal/clip-embedder.js';
|
|
57
|
+
export { createEmbedder } from './core/embedder-factory.js';
|
|
58
|
+
export { CrossEncoderReranker, createTextRerankFunction } from './text/reranker.js';
|
|
59
|
+
export { countTokens } from './text/tokenizer.js';
|
|
60
|
+
export type { RerankingStrategyType, RerankingConfig } from './core/reranking-config.js';
|
|
61
|
+
export { validateRerankingStrategy, validateRerankingConfig, getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, RerankingConfigBuilder, DEFAULT_TEXT_RERANKING_CONFIG, DEFAULT_MULTIMODAL_RERANKING_CONFIG } from './core/reranking-config.js';
|
|
62
|
+
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, type DatabaseConnection } from './core/db.js';
|
|
63
|
+
export { IndexManager } from './index-manager.js';
|
|
64
|
+
export { VectorIndex } from './core/vector-index.js';
|
|
65
|
+
export { config, getModelDefaults, type CoreConfig, type ExtensibleConfig, type ModelDefaults, EXIT_CODES, ConfigurationError, getDefaultModelCachePath, handleUnrecoverableError, logError } from './core/config.js';
|
|
66
|
+
export { discoverFiles, processFiles, discoverAndProcessFiles, DEFAULT_FILE_PROCESSOR_OPTIONS, type FileProcessorOptions, type FileDiscoveryResult, type DocumentProcessingResult } from './file-processor.js';
|
|
67
|
+
export { chunkDocument, type ChunkConfig } from './core/chunker.js';
|
|
68
|
+
export { DocumentPathManager } from './core/path-manager.js';
|
|
69
|
+
export { resolveRagLitePaths, ensureRagLiteStructure, migrateToRagLiteStructure, getStandardRagLitePaths, type RagLiteConfig, type RagLitePaths } from './core/raglite-paths.js';
|
|
70
|
+
export type { SearchResult, SearchOptions, Document, EmbeddingResult, ContentDocument, ContentChunk } from './core/types.js';
|
|
71
|
+
export type { Chunk, Preprocessor, PreprocessorOptions, PreprocessingConfig } from './types.js';
|
|
72
|
+
export type { IngestionOptions, IngestionResult } from './core/ingestion.js';
|
|
73
|
+
export { handleError, safeExecute, ErrorCategory, ErrorSeverity, createError, type ErrorContext } from './core/error-handler.js';
|
|
74
|
+
export { APIError, IngestionError, SearchError, ResourceError, ModelCompatibilityError, ErrorFactory, CommonErrors, handleAPIError } from './api-errors.js';
|
|
75
|
+
//# sourceMappingURL=index.d.ts.map
|