rag-lite-ts 2.1.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/{cli → cjs/cli}/indexer.js +73 -15
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/{cli.js → cjs/cli.js} +25 -6
- package/dist/{core → cjs/core}/binary-index-format.js +6 -3
- package/dist/{core → cjs/core}/db.d.ts +56 -0
- package/dist/{core → cjs/core}/db.js +105 -0
- package/dist/{core → cjs/core}/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/search-pipeline.js +1 -1
- package/dist/{core → cjs/core}/search.js +1 -1
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +304 -0
- package/dist/cjs/core/vector-index.d.ts +107 -0
- package/dist/cjs/core/vector-index.js +344 -0
- package/dist/{factories → cjs/factories}/ingestion-factory.js +3 -7
- package/dist/{factories → cjs/factories}/search-factory.js +11 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +23 -3
- package/dist/{index-manager.js → cjs/index-manager.js} +84 -15
- package/dist/{index.d.ts → cjs/index.d.ts} +2 -1
- package/dist/{index.js → cjs/index.js} +3 -1
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +529 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +548 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +294 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +269 -0
- package/dist/esm/core/db.js +1000 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +904 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +304 -0
- package/dist/esm/core/vector-index.d.ts +107 -0
- package/dist/esm/core/vector-index.js +344 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +473 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +355 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +136 -0
- package/dist/esm/index-manager.js +667 -0
- package/dist/esm/index.d.ts +76 -0
- package/dist/esm/index.js +112 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +39 -14
- package/dist/core/vector-index.d.ts +0 -72
- package/dist/core/vector-index.js +0 -331
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.js +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.d.ts +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
- /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*
|
|
5
|
+
* Worker-based implementation to prevent WebAssembly memory accumulation.
|
|
6
|
+
*/
|
|
7
|
+
import { Worker } from 'worker_threads';
|
|
8
|
+
import { existsSync } from 'fs';
|
|
9
|
+
import { fileURLToPath } from 'url';
|
|
10
|
+
import { dirname, join } from 'path';
|
|
11
|
+
import { handleError, ErrorCategory, ErrorSeverity, createError } from './error-handler.js';
|
|
12
|
+
import { createMissingFileError, createDimensionMismatchError } from './actionable-error-messages.js';
|
|
13
|
+
export class VectorIndex {
|
|
14
|
+
worker = null;
|
|
15
|
+
indexPath;
|
|
16
|
+
options;
|
|
17
|
+
messageQueue = new Map();
|
|
18
|
+
messageId = 0;
|
|
19
|
+
isInitialized = false;
|
|
20
|
+
constructor(indexPath, options) {
|
|
21
|
+
this.indexPath = indexPath;
|
|
22
|
+
this.options = {
|
|
23
|
+
efConstruction: 200,
|
|
24
|
+
M: 16,
|
|
25
|
+
seed: 100,
|
|
26
|
+
...options
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Get the path to the worker script
|
|
31
|
+
* Always uses compiled .js files - workers cannot execute TypeScript directly
|
|
32
|
+
*/
|
|
33
|
+
getWorkerPath() {
|
|
34
|
+
const currentFile = fileURLToPath(import.meta.url);
|
|
35
|
+
const currentDir = dirname(currentFile);
|
|
36
|
+
// Always prefer .js (compiled output)
|
|
37
|
+
const jsPath = join(currentDir, 'vector-index-worker.js');
|
|
38
|
+
// Check if .js exists in current directory (compiled)
|
|
39
|
+
if (existsSync(jsPath)) {
|
|
40
|
+
return jsPath;
|
|
41
|
+
}
|
|
42
|
+
// If running from src/ (development), try dist/ paths
|
|
43
|
+
if (currentDir.includes('src')) {
|
|
44
|
+
// Find project root (go up from src/core)
|
|
45
|
+
const projectRoot = currentDir.replace(/[\\/]src[\\/]core.*$/, '');
|
|
46
|
+
const distEsmPath = join(projectRoot, 'dist', 'esm', 'core', 'vector-index-worker.js');
|
|
47
|
+
const distCjsPath = join(projectRoot, 'dist', 'cjs', 'core', 'vector-index-worker.js');
|
|
48
|
+
if (existsSync(distEsmPath)) {
|
|
49
|
+
return distEsmPath;
|
|
50
|
+
}
|
|
51
|
+
if (existsSync(distCjsPath)) {
|
|
52
|
+
return distCjsPath;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
// If running from node_modules (installed package), try dist paths
|
|
56
|
+
if (currentDir.includes('node_modules')) {
|
|
57
|
+
const packageRoot = currentDir.split('node_modules')[0];
|
|
58
|
+
const distEsmPath = join(packageRoot, 'node_modules', 'rag-lite-ts', 'dist', 'esm', 'core', 'vector-index-worker.js');
|
|
59
|
+
const distCjsPath = join(packageRoot, 'node_modules', 'rag-lite-ts', 'dist', 'cjs', 'core', 'vector-index-worker.js');
|
|
60
|
+
if (existsSync(distEsmPath)) {
|
|
61
|
+
return distEsmPath;
|
|
62
|
+
}
|
|
63
|
+
if (existsSync(distCjsPath)) {
|
|
64
|
+
return distCjsPath;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
// Final fallback - will fail with clear error
|
|
68
|
+
throw new Error(`Worker file not found. Expected: ${jsPath}\n` +
|
|
69
|
+
'Please run "npm run build" to compile the vector-index-worker.ts file.\n' +
|
|
70
|
+
`Current directory: ${currentDir}\n` +
|
|
71
|
+
`Checked paths: ${jsPath}, ${currentDir.includes('src') ? join(currentDir.replace(/[\\/]src[\\/]core.*$/, ''), 'dist', 'esm', 'core', 'vector-index-worker.js') : 'N/A'}`);
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Ensure worker is created and ready
|
|
75
|
+
*/
|
|
76
|
+
async ensureWorker() {
|
|
77
|
+
if (this.worker) {
|
|
78
|
+
return;
|
|
79
|
+
}
|
|
80
|
+
const workerPath = this.getWorkerPath();
|
|
81
|
+
this.worker = new Worker(workerPath);
|
|
82
|
+
// Set up message handler
|
|
83
|
+
this.worker.on('message', (response) => {
|
|
84
|
+
const handler = this.messageQueue.get(response.id);
|
|
85
|
+
if (handler) {
|
|
86
|
+
this.messageQueue.delete(response.id);
|
|
87
|
+
if (response.type === 'error') {
|
|
88
|
+
handler.reject(new Error(response.error || 'Unknown error'));
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
handler.resolve(response.payload);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
});
|
|
95
|
+
// Handle worker errors
|
|
96
|
+
this.worker.on('error', (error) => {
|
|
97
|
+
console.error('VectorIndex worker error:', error);
|
|
98
|
+
// Reject all pending requests
|
|
99
|
+
for (const [id, handler] of this.messageQueue.entries()) {
|
|
100
|
+
handler.reject(error);
|
|
101
|
+
}
|
|
102
|
+
this.messageQueue.clear();
|
|
103
|
+
});
|
|
104
|
+
// Handle worker exit
|
|
105
|
+
this.worker.on('exit', (code) => {
|
|
106
|
+
if (code !== 0) {
|
|
107
|
+
console.error(`VectorIndex worker exited with code ${code}`);
|
|
108
|
+
}
|
|
109
|
+
// Reject all pending requests
|
|
110
|
+
for (const [id, handler] of this.messageQueue.entries()) {
|
|
111
|
+
handler.reject(new Error(`Worker exited with code ${code}`));
|
|
112
|
+
}
|
|
113
|
+
this.messageQueue.clear();
|
|
114
|
+
this.worker = null;
|
|
115
|
+
this.isInitialized = false;
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Send a message to the worker and wait for response
|
|
120
|
+
*/
|
|
121
|
+
async sendMessage(type, payload) {
|
|
122
|
+
await this.ensureWorker();
|
|
123
|
+
return new Promise((resolve, reject) => {
|
|
124
|
+
const id = this.messageId++;
|
|
125
|
+
this.messageQueue.set(id, { resolve, reject });
|
|
126
|
+
const request = { id, type, payload };
|
|
127
|
+
this.worker.postMessage(request);
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Convert Float32Array to ArrayBuffer for transfer
|
|
132
|
+
*/
|
|
133
|
+
float32ArrayToBuffer(vector) {
|
|
134
|
+
const buffer = vector.buffer.slice(vector.byteOffset, vector.byteOffset + vector.byteLength);
|
|
135
|
+
// Ensure we return ArrayBuffer, not SharedArrayBuffer
|
|
136
|
+
return buffer instanceof ArrayBuffer ? buffer : new ArrayBuffer(0);
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Initialize the HNSW index with cosine similarity using hnswlib-wasm
|
|
140
|
+
*/
|
|
141
|
+
async initialize() {
|
|
142
|
+
try {
|
|
143
|
+
const payload = {
|
|
144
|
+
dimensions: this.options.dimensions,
|
|
145
|
+
maxElements: this.options.maxElements,
|
|
146
|
+
M: this.options.M,
|
|
147
|
+
efConstruction: this.options.efConstruction,
|
|
148
|
+
seed: this.options.seed,
|
|
149
|
+
indexPath: this.indexPath // Pass indexPath to worker for saveIndex operations
|
|
150
|
+
};
|
|
151
|
+
await this.sendMessage('init', payload);
|
|
152
|
+
this.isInitialized = true;
|
|
153
|
+
console.log(`Initialized HNSW index with ${this.options.dimensions} dimensions using hnswlib-wasm (worker)`);
|
|
154
|
+
}
|
|
155
|
+
catch (error) {
|
|
156
|
+
handleError(createError.index(`Failed to initialize vector index: ${error instanceof Error ? error.message : String(error)}`), 'Vector Index Initialization', {
|
|
157
|
+
category: ErrorCategory.INDEX,
|
|
158
|
+
severity: ErrorSeverity.FATAL
|
|
159
|
+
});
|
|
160
|
+
throw error;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Load existing index from file using hnswlib-wasm
|
|
165
|
+
*/
|
|
166
|
+
async loadIndex() {
|
|
167
|
+
if (!existsSync(this.indexPath)) {
|
|
168
|
+
throw createMissingFileError(this.indexPath, 'index', {
|
|
169
|
+
operationContext: 'VectorIndex.loadIndex'
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
try {
|
|
173
|
+
const payload = {
|
|
174
|
+
indexPath: this.indexPath
|
|
175
|
+
};
|
|
176
|
+
const result = await this.sendMessage('loadIndex', payload);
|
|
177
|
+
this.isInitialized = true;
|
|
178
|
+
console.log(`✓ Loaded HNSW index with ${result.count} vectors from ${this.indexPath} (worker)`);
|
|
179
|
+
}
|
|
180
|
+
catch (error) {
|
|
181
|
+
throw new Error(`Failed to load index from ${this.indexPath}: ${error}`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Save index to binary format
|
|
186
|
+
*/
|
|
187
|
+
async saveIndex() {
|
|
188
|
+
if (!this.isInitialized) {
|
|
189
|
+
throw new Error('Index not initialized');
|
|
190
|
+
}
|
|
191
|
+
try {
|
|
192
|
+
const result = await this.sendMessage('saveIndex');
|
|
193
|
+
const actualSize = result.count;
|
|
194
|
+
console.log(`✓ Saved HNSW index with ${actualSize} vectors (${(actualSize * this.options.dimensions * 4 / 1024).toFixed(2)} KB of vector data) to ${this.indexPath} (worker)`);
|
|
195
|
+
}
|
|
196
|
+
catch (error) {
|
|
197
|
+
throw new Error(`Failed to save index to ${this.indexPath}: ${error}`);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Add a single vector to the HNSW index
|
|
202
|
+
* Now async due to worker-based implementation
|
|
203
|
+
*/
|
|
204
|
+
async addVector(embeddingId, vector) {
|
|
205
|
+
if (!this.isInitialized) {
|
|
206
|
+
throw new Error('Index not initialized');
|
|
207
|
+
}
|
|
208
|
+
if (vector.length !== this.options.dimensions) {
|
|
209
|
+
throw createDimensionMismatchError(this.options.dimensions, vector.length, 'vector addition', { operationContext: 'VectorIndex.addVector' });
|
|
210
|
+
}
|
|
211
|
+
const payload = {
|
|
212
|
+
id: embeddingId,
|
|
213
|
+
vector: this.float32ArrayToBuffer(vector),
|
|
214
|
+
dimensions: vector.length
|
|
215
|
+
};
|
|
216
|
+
await this.sendMessage('addVector', payload);
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Add multiple vectors to the index in batch
|
|
220
|
+
* Now async due to worker-based implementation
|
|
221
|
+
*/
|
|
222
|
+
async addVectors(vectors) {
|
|
223
|
+
if (!this.isInitialized) {
|
|
224
|
+
throw new Error('Index not initialized');
|
|
225
|
+
}
|
|
226
|
+
const payload = {
|
|
227
|
+
vectors: vectors.map(v => ({
|
|
228
|
+
id: v.id,
|
|
229
|
+
vector: this.float32ArrayToBuffer(v.vector),
|
|
230
|
+
dimensions: v.vector.length
|
|
231
|
+
}))
|
|
232
|
+
};
|
|
233
|
+
await this.sendMessage('addVectors', payload);
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Search for k nearest neighbors using hnswlib-wasm
|
|
237
|
+
* Now async due to worker-based implementation
|
|
238
|
+
*/
|
|
239
|
+
async search(queryVector, k = 5) {
|
|
240
|
+
if (!this.isInitialized) {
|
|
241
|
+
throw new Error('Index not initialized');
|
|
242
|
+
}
|
|
243
|
+
if (queryVector.length !== this.options.dimensions) {
|
|
244
|
+
throw createDimensionMismatchError(this.options.dimensions, queryVector.length, 'vector search', { operationContext: 'VectorIndex.search' });
|
|
245
|
+
}
|
|
246
|
+
const payload = {
|
|
247
|
+
queryVector: this.float32ArrayToBuffer(queryVector),
|
|
248
|
+
dimensions: queryVector.length,
|
|
249
|
+
k
|
|
250
|
+
};
|
|
251
|
+
const result = await this.sendMessage('search', payload);
|
|
252
|
+
// Check if empty result
|
|
253
|
+
if (result.neighbors.length === 0 && result.distances.length === 0) {
|
|
254
|
+
return { neighbors: [], distances: [] };
|
|
255
|
+
}
|
|
256
|
+
return result;
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Get current number of vectors in the index
|
|
260
|
+
* Now async due to worker-based implementation
|
|
261
|
+
*/
|
|
262
|
+
async getCurrentCount() {
|
|
263
|
+
if (!this.isInitialized) {
|
|
264
|
+
return 0;
|
|
265
|
+
}
|
|
266
|
+
const result = await this.sendMessage('getCurrentCount');
|
|
267
|
+
return result.count;
|
|
268
|
+
}
|
|
269
|
+
/**
|
|
270
|
+
* Check if index exists on disk
|
|
271
|
+
*/
|
|
272
|
+
indexExists() {
|
|
273
|
+
// This can be synchronous since it's just a file system check
|
|
274
|
+
return existsSync(this.indexPath);
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* Set search parameters for query time
|
|
278
|
+
* Now async due to worker-based implementation
|
|
279
|
+
*/
|
|
280
|
+
async setEf(ef) {
|
|
281
|
+
if (!this.isInitialized) {
|
|
282
|
+
throw new Error('Index not initialized');
|
|
283
|
+
}
|
|
284
|
+
const payload = { ef };
|
|
285
|
+
try {
|
|
286
|
+
await this.sendMessage('setEf', payload);
|
|
287
|
+
}
|
|
288
|
+
catch (error) {
|
|
289
|
+
console.log(`Failed to set ef: ${error}`);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Resize index to accommodate more vectors
|
|
294
|
+
* Now async due to worker-based implementation
|
|
295
|
+
*/
|
|
296
|
+
async resizeIndex(newMaxElements) {
|
|
297
|
+
if (!this.isInitialized) {
|
|
298
|
+
throw new Error('Index not initialized');
|
|
299
|
+
}
|
|
300
|
+
if (newMaxElements <= this.options.maxElements) {
|
|
301
|
+
throw new Error(`New max elements (${newMaxElements}) must be greater than current (${this.options.maxElements})`);
|
|
302
|
+
}
|
|
303
|
+
const payload = { newMaxElements };
|
|
304
|
+
await this.sendMessage('resizeIndex', payload);
|
|
305
|
+
this.options.maxElements = newMaxElements;
|
|
306
|
+
console.log(`Resized index to accommodate ${newMaxElements} vectors`);
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Reset the vector index to an empty state.
|
|
310
|
+
* Clears all vectors from the HNSW graph and vectorStorage.
|
|
311
|
+
* The index parameters (dimensions, M, efConstruction) are preserved.
|
|
312
|
+
*/
|
|
313
|
+
async reset() {
|
|
314
|
+
console.log('🔄 VectorIndex: Resetting to empty state...');
|
|
315
|
+
await this.sendMessage('reset');
|
|
316
|
+
console.log('✓ VectorIndex reset: cleared all vectors');
|
|
317
|
+
}
|
|
318
|
+
/**
|
|
319
|
+
* Get index options (for external access to configuration)
|
|
320
|
+
*/
|
|
321
|
+
getOptions() {
|
|
322
|
+
return { ...this.options };
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* Cleanup: terminate worker and free all WebAssembly memory
|
|
326
|
+
*/
|
|
327
|
+
async cleanup() {
|
|
328
|
+
if (this.worker) {
|
|
329
|
+
try {
|
|
330
|
+
// Send cleanup message (worker will acknowledge)
|
|
331
|
+
await this.sendMessage('cleanup');
|
|
332
|
+
}
|
|
333
|
+
catch (error) {
|
|
334
|
+
// Ignore errors during cleanup
|
|
335
|
+
}
|
|
336
|
+
// Terminate worker - this frees ALL WebAssembly memory
|
|
337
|
+
await this.worker.terminate();
|
|
338
|
+
this.worker = null;
|
|
339
|
+
this.isInitialized = false;
|
|
340
|
+
this.messageQueue.clear();
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
//# sourceMappingURL=vector-index.js.map
|
|
@@ -296,8 +296,7 @@ export class IngestionFactory {
|
|
|
296
296
|
// Preserve custom error messages for model mismatch and mode mismatch
|
|
297
297
|
if (error instanceof Error && (error.message.includes('Model mismatch') ||
|
|
298
298
|
error.message.includes('Mode mismatch') ||
|
|
299
|
-
error.message.includes('--force-rebuild')
|
|
300
|
-
error.message.includes('--rebuild-if-needed'))) {
|
|
299
|
+
error.message.includes('--force-rebuild'))) {
|
|
301
300
|
throw error; // Re-throw custom validation errors as-is
|
|
302
301
|
}
|
|
303
302
|
throw createFactoryCreationError('IngestionFactory', error instanceof Error ? error.message : 'Unknown error', { operationContext: 'ingestion pipeline creation' });
|
|
@@ -366,13 +365,10 @@ export class IngestionFactory {
|
|
|
366
365
|
`❌ Model mismatch: Database is configured for '${existingSystemInfo.modelName}', but '${effectiveModel}' was requested.`,
|
|
367
366
|
'',
|
|
368
367
|
'🛠️ How to fix this:',
|
|
369
|
-
' 1. Use --force-rebuild to
|
|
368
|
+
' 1. Use --force-rebuild to rebuild from scratch:',
|
|
370
369
|
' raglite ingest <path> --model ' + effectiveModel + ' --force-rebuild',
|
|
371
370
|
'',
|
|
372
|
-
' 2. Or
|
|
373
|
-
' raglite ingest <path> --model ' + effectiveModel + ' --rebuild-if-needed',
|
|
374
|
-
'',
|
|
375
|
-
' 3. Or continue using the existing model:',
|
|
371
|
+
' 2. Or continue using the existing model:',
|
|
376
372
|
' raglite ingest <path> # Uses ' + existingSystemInfo.modelName,
|
|
377
373
|
'',
|
|
378
374
|
'🔍 Model switching requires rebuilding the vector index because different models',
|
|
@@ -281,6 +281,17 @@ export class SearchFactory {
|
|
|
281
281
|
enhancedMessage += '\n The index was created with a different model. Rebuild the index:';
|
|
282
282
|
enhancedMessage += '\n raglite ingest <directory> --force-rebuild';
|
|
283
283
|
}
|
|
284
|
+
else if (error.message.includes('Cannot enlarge memory') ||
|
|
285
|
+
error.message.includes('WebAssembly memory limit') ||
|
|
286
|
+
error.message.includes('memory limit exceeded')) {
|
|
287
|
+
enhancedMessage += '\n\n💡 WebAssembly memory limit exceeded.';
|
|
288
|
+
enhancedMessage += '\n Your vector index is too large for the 2GB WebAssembly memory limit.';
|
|
289
|
+
enhancedMessage += '\n Solutions:';
|
|
290
|
+
enhancedMessage += '\n 1. Increase Node.js memory: node --max-old-space-size=4096 ...';
|
|
291
|
+
enhancedMessage += '\n 2. Split your data into smaller indexes';
|
|
292
|
+
enhancedMessage += '\n 3. Use a smaller embedding model (fewer dimensions)';
|
|
293
|
+
enhancedMessage += '\n 4. Rebuild the index with fewer vectors';
|
|
294
|
+
}
|
|
284
295
|
return new Error(enhancedMessage);
|
|
285
296
|
}
|
|
286
297
|
return new Error(`SearchFactory.create failed: Unknown error`);
|
|
@@ -86,11 +86,12 @@ export declare class IndexManager {
|
|
|
86
86
|
saveGroupedIndex(textEmbeddings: EmbeddingResult[], imageEmbeddings: EmbeddingResult[]): Promise<void>;
|
|
87
87
|
/**
|
|
88
88
|
* Search for similar vectors
|
|
89
|
+
* Now async due to worker-based VectorIndex implementation
|
|
89
90
|
*/
|
|
90
|
-
search(queryVector: Float32Array, k?: number, contentType?: 'text' | 'image' | 'combined'): {
|
|
91
|
+
search(queryVector: Float32Array, k?: number, contentType?: 'text' | 'image' | 'combined'): Promise<{
|
|
91
92
|
embeddingIds: string[];
|
|
92
93
|
distances: number[];
|
|
93
|
-
}
|
|
94
|
+
}>;
|
|
94
95
|
/**
|
|
95
96
|
* Get index statistics
|
|
96
97
|
*/
|
|
@@ -109,8 +110,27 @@ export declare class IndexManager {
|
|
|
109
110
|
*/
|
|
110
111
|
private unhashEmbeddingId;
|
|
111
112
|
/**
|
|
112
|
-
* Close database connection
|
|
113
|
+
* Close database connection and cleanup vector index worker
|
|
113
114
|
*/
|
|
114
115
|
close(): Promise<void>;
|
|
116
|
+
/**
|
|
117
|
+
* Reset the vector index by clearing all vectors while keeping the index structure.
|
|
118
|
+
* This is a safer alternative to file deletion that avoids file locking issues on Windows.
|
|
119
|
+
*
|
|
120
|
+
* The reset operation:
|
|
121
|
+
* 1. Clears in-memory HNSW index
|
|
122
|
+
* 2. Clears in-memory vector storage and ID mappings
|
|
123
|
+
* 3. Reinitializes an empty index with the same parameters
|
|
124
|
+
* 4. Saves the empty index to disk (overwrites existing file)
|
|
125
|
+
*
|
|
126
|
+
* @returns Promise that resolves when reset is complete
|
|
127
|
+
*/
|
|
128
|
+
reset(): Promise<void>;
|
|
129
|
+
/**
|
|
130
|
+
* Check if the index has any vectors
|
|
131
|
+
* @returns true if the index contains vectors, false if empty
|
|
132
|
+
* Now async due to worker-based VectorIndex implementation
|
|
133
|
+
*/
|
|
134
|
+
hasVectors(): Promise<boolean>;
|
|
115
135
|
}
|
|
116
136
|
//# sourceMappingURL=index-manager.d.ts.map
|
|
@@ -64,7 +64,7 @@ export class IndexManager {
|
|
|
64
64
|
this.hashEmbeddingId(chunk.embedding_id); // This will populate the mapping
|
|
65
65
|
}
|
|
66
66
|
this.isInitialized = true;
|
|
67
|
-
const vectorCount = this.vectorIndex.getCurrentCount();
|
|
67
|
+
const vectorCount = await this.vectorIndex.getCurrentCount();
|
|
68
68
|
console.log(`Index manager initialized with ${vectorCount} vectors${this.textIndex && this.imageIndex ? ' (multi-graph mode)' : ''}`);
|
|
69
69
|
}
|
|
70
70
|
catch (error) {
|
|
@@ -145,17 +145,18 @@ export class IndexManager {
|
|
|
145
145
|
vector: embedding.vector
|
|
146
146
|
}));
|
|
147
147
|
// Check if we need to resize the index before adding
|
|
148
|
-
const
|
|
149
|
-
const newCount =
|
|
148
|
+
const initialCount = await this.vectorIndex.getCurrentCount();
|
|
149
|
+
const newCount = initialCount + vectors.length;
|
|
150
150
|
const currentCapacity = 100000; // This should match the initial capacity
|
|
151
151
|
if (newCount > currentCapacity * 0.9) {
|
|
152
152
|
const newCapacity = Math.ceil(newCount * 1.5);
|
|
153
153
|
console.log(`Resizing index from ${currentCapacity} to ${newCapacity} to accommodate new vectors`);
|
|
154
|
-
this.vectorIndex.resizeIndex(newCapacity);
|
|
154
|
+
await this.vectorIndex.resizeIndex(newCapacity);
|
|
155
155
|
}
|
|
156
156
|
// Add vectors incrementally (this is the key requirement - no rebuild needed)
|
|
157
|
-
this.vectorIndex.addVectors(vectors);
|
|
158
|
-
|
|
157
|
+
await this.vectorIndex.addVectors(vectors);
|
|
158
|
+
const finalCount = await this.vectorIndex.getCurrentCount();
|
|
159
|
+
console.log(`Incrementally added ${embeddings.length} vectors to index (total: ${finalCount})`);
|
|
159
160
|
// Save the updated index
|
|
160
161
|
await this.saveIndex();
|
|
161
162
|
}
|
|
@@ -223,7 +224,7 @@ export class IndexManager {
|
|
|
223
224
|
const currentCapacity = 100000; // Default capacity
|
|
224
225
|
if (chunkData.length > currentCapacity * 0.8) {
|
|
225
226
|
const newCapacity = Math.ceil(chunkData.length * 1.5);
|
|
226
|
-
this.vectorIndex.resizeIndex(newCapacity);
|
|
227
|
+
await this.vectorIndex.resizeIndex(newCapacity);
|
|
227
228
|
console.log(`Resized index capacity to ${newCapacity} for ${chunkData.length} chunks`);
|
|
228
229
|
}
|
|
229
230
|
// Update model version if provided
|
|
@@ -279,7 +280,7 @@ export class IndexManager {
|
|
|
279
280
|
const currentCapacity = 100000;
|
|
280
281
|
if (chunkData.length > currentCapacity * 0.8) {
|
|
281
282
|
const newCapacity = Math.ceil(chunkData.length * 1.5);
|
|
282
|
-
this.vectorIndex.resizeIndex(newCapacity);
|
|
283
|
+
await this.vectorIndex.resizeIndex(newCapacity);
|
|
283
284
|
console.log(`Resized index capacity to ${newCapacity}`);
|
|
284
285
|
}
|
|
285
286
|
// Re-generate embeddings for all chunks
|
|
@@ -294,7 +295,7 @@ export class IndexManager {
|
|
|
294
295
|
id: this.hashEmbeddingId(embedding.embedding_id),
|
|
295
296
|
vector: embedding.vector
|
|
296
297
|
}));
|
|
297
|
-
this.vectorIndex.addVectors(vectors);
|
|
298
|
+
await this.vectorIndex.addVectors(vectors);
|
|
298
299
|
console.log(`Added ${vectors.length} vectors to rebuilt index`);
|
|
299
300
|
// Update model version
|
|
300
301
|
await this.updateModelVersion(embeddingEngine.getModelVersion());
|
|
@@ -414,12 +415,12 @@ export class IndexManager {
|
|
|
414
415
|
// Create text-only index
|
|
415
416
|
this.textIndex = new VectorIndex(`${this.indexPath}.text`, this.vectorIndexOptions);
|
|
416
417
|
await this.textIndex.initialize();
|
|
417
|
-
this.textIndex.addVectors(indexData.textVectors);
|
|
418
|
+
await this.textIndex.addVectors(indexData.textVectors);
|
|
418
419
|
console.log(`✓ Text index created with ${indexData.textVectors.length} vectors`);
|
|
419
420
|
// Create image-only index
|
|
420
421
|
this.imageIndex = new VectorIndex(`${this.indexPath}.image`, this.vectorIndexOptions);
|
|
421
422
|
await this.imageIndex.initialize();
|
|
422
|
-
this.imageIndex.addVectors(indexData.imageVectors);
|
|
423
|
+
await this.imageIndex.addVectors(indexData.imageVectors);
|
|
423
424
|
console.log(`✓ Image index created with ${indexData.imageVectors.length} vectors`);
|
|
424
425
|
console.log('✓ Specialized indexes ready for content type filtering');
|
|
425
426
|
}
|
|
@@ -475,8 +476,9 @@ export class IndexManager {
|
|
|
475
476
|
}
|
|
476
477
|
/**
|
|
477
478
|
* Search for similar vectors
|
|
479
|
+
* Now async due to worker-based VectorIndex implementation
|
|
478
480
|
*/
|
|
479
|
-
search(queryVector, k = 5, contentType) {
|
|
481
|
+
async search(queryVector, k = 5, contentType) {
|
|
480
482
|
if (!this.isInitialized) {
|
|
481
483
|
throw new Error('Index manager not initialized');
|
|
482
484
|
}
|
|
@@ -499,7 +501,7 @@ export class IndexManager {
|
|
|
499
501
|
// No specialized indexes (text-only mode) - ignore contentType and use combined index
|
|
500
502
|
targetIndex = this.vectorIndex;
|
|
501
503
|
}
|
|
502
|
-
const results = targetIndex.search(queryVector, k);
|
|
504
|
+
const results = await targetIndex.search(queryVector, k);
|
|
503
505
|
// Convert numeric IDs back to embedding IDs
|
|
504
506
|
const embeddingIds = results.neighbors.map(id => this.unhashEmbeddingId(id));
|
|
505
507
|
return {
|
|
@@ -514,7 +516,7 @@ export class IndexManager {
|
|
|
514
516
|
if (!this.db) {
|
|
515
517
|
throw new Error('Database not initialized');
|
|
516
518
|
}
|
|
517
|
-
const totalVectors = this.vectorIndex.getCurrentCount();
|
|
519
|
+
const totalVectors = await this.vectorIndex.getCurrentCount();
|
|
518
520
|
try {
|
|
519
521
|
const systemInfo = await getSystemInfo(this.db);
|
|
520
522
|
const modelVersion = systemInfo?.modelVersion || null;
|
|
@@ -586,13 +588,80 @@ export class IndexManager {
|
|
|
586
588
|
return embeddingId;
|
|
587
589
|
}
|
|
588
590
|
/**
|
|
589
|
-
* Close database connection
|
|
591
|
+
* Close database connection and cleanup vector index worker
|
|
590
592
|
*/
|
|
591
593
|
async close() {
|
|
592
594
|
if (this.db) {
|
|
593
595
|
await this.db.close();
|
|
594
596
|
this.db = null;
|
|
595
597
|
}
|
|
598
|
+
// Clean up vector index worker to free WebAssembly memory
|
|
599
|
+
if (this.vectorIndex && typeof this.vectorIndex.cleanup === 'function') {
|
|
600
|
+
await this.vectorIndex.cleanup();
|
|
601
|
+
}
|
|
602
|
+
// Also clean up specialized indexes
|
|
603
|
+
if (this.textIndex && typeof this.textIndex.cleanup === 'function') {
|
|
604
|
+
await this.textIndex.cleanup();
|
|
605
|
+
}
|
|
606
|
+
if (this.imageIndex && typeof this.imageIndex.cleanup === 'function') {
|
|
607
|
+
await this.imageIndex.cleanup();
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
/**
|
|
611
|
+
* Reset the vector index by clearing all vectors while keeping the index structure.
|
|
612
|
+
* This is a safer alternative to file deletion that avoids file locking issues on Windows.
|
|
613
|
+
*
|
|
614
|
+
* The reset operation:
|
|
615
|
+
* 1. Clears in-memory HNSW index
|
|
616
|
+
* 2. Clears in-memory vector storage and ID mappings
|
|
617
|
+
* 3. Reinitializes an empty index with the same parameters
|
|
618
|
+
* 4. Saves the empty index to disk (overwrites existing file)
|
|
619
|
+
*
|
|
620
|
+
* @returns Promise that resolves when reset is complete
|
|
621
|
+
*/
|
|
622
|
+
async reset() {
|
|
623
|
+
console.log('🔄 Starting index reset...');
|
|
624
|
+
const startTime = Date.now();
|
|
625
|
+
try {
|
|
626
|
+
// Clear in-memory mappings
|
|
627
|
+
const previousVectorCount = await this.vectorIndex.getCurrentCount();
|
|
628
|
+
this.hashToEmbeddingId.clear();
|
|
629
|
+
this.embeddingIdToHash.clear();
|
|
630
|
+
// Clear grouped embeddings if any
|
|
631
|
+
this.groupedEmbeddings = undefined;
|
|
632
|
+
// Clear specialized indexes if they exist
|
|
633
|
+
if (this.textIndex) {
|
|
634
|
+
this.textIndex = undefined;
|
|
635
|
+
}
|
|
636
|
+
if (this.imageIndex) {
|
|
637
|
+
this.imageIndex = undefined;
|
|
638
|
+
}
|
|
639
|
+
// Reset the vector index (clears all vectors and reinitializes empty HNSW graph)
|
|
640
|
+
console.log(' Resetting HNSW index...');
|
|
641
|
+
await this.vectorIndex.reset();
|
|
642
|
+
// Save the empty index to disk (this overwrites the existing file)
|
|
643
|
+
console.log(' Saving empty index to disk...');
|
|
644
|
+
await this.vectorIndex.saveIndex();
|
|
645
|
+
const resetTimeMs = Date.now() - startTime;
|
|
646
|
+
const currentCount = await this.vectorIndex.getCurrentCount();
|
|
647
|
+
console.log(`✓ Index reset complete in ${resetTimeMs}ms`);
|
|
648
|
+
console.log(` Vectors cleared: ${previousVectorCount}`);
|
|
649
|
+
console.log(` Current vector count: ${currentCount}`);
|
|
650
|
+
}
|
|
651
|
+
catch (error) {
|
|
652
|
+
const resetTimeMs = Date.now() - startTime;
|
|
653
|
+
console.error(`❌ Index reset failed after ${resetTimeMs}ms:`, error);
|
|
654
|
+
throw new Error(`Failed to reset index: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
/**
|
|
658
|
+
* Check if the index has any vectors
|
|
659
|
+
* @returns true if the index contains vectors, false if empty
|
|
660
|
+
* Now async due to worker-based VectorIndex implementation
|
|
661
|
+
*/
|
|
662
|
+
async hasVectors() {
|
|
663
|
+
const count = await this.vectorIndex.getCurrentCount();
|
|
664
|
+
return count > 0;
|
|
596
665
|
}
|
|
597
666
|
}
|
|
598
667
|
//# sourceMappingURL=index-manager.js.map
|
|
@@ -59,7 +59,8 @@ export { CrossEncoderReranker, createTextRerankFunction } from './text/reranker.
|
|
|
59
59
|
export { countTokens } from './text/tokenizer.js';
|
|
60
60
|
export type { RerankingStrategyType, RerankingConfig } from './core/reranking-config.js';
|
|
61
61
|
export { validateRerankingStrategy, validateRerankingConfig, getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, RerankingConfigBuilder, DEFAULT_TEXT_RERANKING_CONFIG, DEFAULT_MULTIMODAL_RERANKING_CONFIG } from './core/reranking-config.js';
|
|
62
|
-
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, type DatabaseConnection } from './core/db.js';
|
|
62
|
+
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, resetDatabase, hasDatabaseData, type DatabaseConnection, type DatabaseResetOptions, type DatabaseResetResult } from './core/db.js';
|
|
63
|
+
export { KnowledgeBaseManager, type KnowledgeBaseResetOptions, type KnowledgeBaseResetResult } from './core/knowledge-base-manager.js';
|
|
63
64
|
export { IndexManager } from './index-manager.js';
|
|
64
65
|
export { VectorIndex } from './core/vector-index.js';
|
|
65
66
|
export { config, getModelDefaults, type CoreConfig, type ExtensibleConfig, type ModelDefaults, EXIT_CODES, ConfigurationError, getDefaultModelCachePath, handleUnrecoverableError, logError } from './core/config.js';
|
|
@@ -83,7 +83,9 @@ export { validateRerankingStrategy, validateRerankingConfig, getDefaultReranking
|
|
|
83
83
|
// CORE INFRASTRUCTURE (FOR ADVANCED USERS)
|
|
84
84
|
// =============================================================================
|
|
85
85
|
// Database operations
|
|
86
|
-
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds } from './core/db.js';
|
|
86
|
+
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, resetDatabase, hasDatabaseData } from './core/db.js';
|
|
87
|
+
// Knowledge Base Manager (for reset operations)
|
|
88
|
+
export { KnowledgeBaseManager } from './core/knowledge-base-manager.js';
|
|
87
89
|
// Vector index management
|
|
88
90
|
export { IndexManager } from './index-manager.js';
|
|
89
91
|
export { VectorIndex } from './core/vector-index.js';
|