rag-lite-ts 2.1.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/{cli → cjs/cli}/indexer.js +73 -15
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/{cli.js → cjs/cli.js} +25 -6
- package/dist/{core → cjs/core}/binary-index-format.js +6 -3
- package/dist/{core → cjs/core}/db.d.ts +56 -0
- package/dist/{core → cjs/core}/db.js +105 -0
- package/dist/{core → cjs/core}/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/search-pipeline.js +1 -1
- package/dist/{core → cjs/core}/search.js +1 -1
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +304 -0
- package/dist/cjs/core/vector-index.d.ts +107 -0
- package/dist/cjs/core/vector-index.js +344 -0
- package/dist/{factories → cjs/factories}/ingestion-factory.js +3 -7
- package/dist/{factories → cjs/factories}/search-factory.js +11 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +23 -3
- package/dist/{index-manager.js → cjs/index-manager.js} +84 -15
- package/dist/{index.d.ts → cjs/index.d.ts} +2 -1
- package/dist/{index.js → cjs/index.js} +3 -1
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +529 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +548 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +294 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +269 -0
- package/dist/esm/core/db.js +1000 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +904 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +304 -0
- package/dist/esm/core/vector-index.d.ts +107 -0
- package/dist/esm/core/vector-index.js +344 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +473 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +355 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +136 -0
- package/dist/esm/index-manager.js +667 -0
- package/dist/esm/index.d.ts +76 -0
- package/dist/esm/index.js +112 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +39 -14
- package/dist/core/vector-index.d.ts +0 -72
- package/dist/core/vector-index.js +0 -331
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.js +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.d.ts +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
- /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Worker thread for VectorIndex operations
|
|
3
|
+
* Isolates hnswlib-wasm WebAssembly memory to prevent accumulation
|
|
4
|
+
*/
|
|
5
|
+
import { parentPort } from 'worker_threads';
|
|
6
|
+
import { existsSync } from 'fs';
|
|
7
|
+
import { BinaryIndexFormat } from './binary-index-format.js';
|
|
8
|
+
// Set up browser-like environment for hnswlib-wasm (same as current vector-index.ts)
|
|
9
|
+
if (typeof window === 'undefined') {
|
|
10
|
+
const { JSDOM } = await import('jsdom');
|
|
11
|
+
const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
|
|
12
|
+
url: 'http://localhost',
|
|
13
|
+
pretendToBeVisual: true,
|
|
14
|
+
resources: 'usable'
|
|
15
|
+
});
|
|
16
|
+
global.window = dom.window;
|
|
17
|
+
global.document = dom.window.document;
|
|
18
|
+
global.XMLHttpRequest = dom.window.XMLHttpRequest;
|
|
19
|
+
global.indexedDB = undefined;
|
|
20
|
+
Object.defineProperty(dom.window, 'indexedDB', {
|
|
21
|
+
value: undefined,
|
|
22
|
+
writable: false,
|
|
23
|
+
configurable: true
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
// Worker state
|
|
27
|
+
let hnswlib = null;
|
|
28
|
+
let index = null;
|
|
29
|
+
let vectorStorage = new Map();
|
|
30
|
+
let currentSize = 0;
|
|
31
|
+
let options = null;
|
|
32
|
+
let indexPath = null; // Set during init or loadIndex
|
|
33
|
+
// Helper: Load hnswlib module (only once per worker)
|
|
34
|
+
async function loadHnswlibModule() {
|
|
35
|
+
if (hnswlib) {
|
|
36
|
+
return hnswlib;
|
|
37
|
+
}
|
|
38
|
+
// Suppress stderr during loading (same as current implementation)
|
|
39
|
+
const originalStderrWrite = process.stderr.write;
|
|
40
|
+
const originalConsoleError = console.error;
|
|
41
|
+
process.stderr.write = function (chunk, encoding, callback) {
|
|
42
|
+
const message = chunk.toString();
|
|
43
|
+
if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
|
|
44
|
+
message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
|
|
45
|
+
message.includes('jsFS Error') || message.includes('syncing FS') ||
|
|
46
|
+
message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
|
|
47
|
+
message.includes('abort') || message.includes('assert') ||
|
|
48
|
+
message.includes('hnswlib-wasm/dist/hnswlib')) {
|
|
49
|
+
if (callback)
|
|
50
|
+
callback();
|
|
51
|
+
return true;
|
|
52
|
+
}
|
|
53
|
+
return originalStderrWrite.call(this, chunk, encoding, callback);
|
|
54
|
+
};
|
|
55
|
+
console.error = (...args) => {
|
|
56
|
+
const message = args.join(' ');
|
|
57
|
+
if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
|
|
58
|
+
message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
|
|
59
|
+
message.includes('jsFS Error') || message.includes('syncing FS') ||
|
|
60
|
+
message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
|
|
61
|
+
message.includes('abort') || message.includes('assert') ||
|
|
62
|
+
message.includes('hnswlib-wasm/dist/hnswlib')) {
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
originalConsoleError.apply(console, args);
|
|
66
|
+
};
|
|
67
|
+
try {
|
|
68
|
+
const hnswlibModule = await import('hnswlib-wasm/dist/hnswlib.js');
|
|
69
|
+
const { loadHnswlib } = hnswlibModule;
|
|
70
|
+
hnswlib = await loadHnswlib();
|
|
71
|
+
return hnswlib;
|
|
72
|
+
}
|
|
73
|
+
finally {
|
|
74
|
+
process.stderr.write = originalStderrWrite;
|
|
75
|
+
console.error = originalConsoleError;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
// Helper: Convert ArrayBuffer to Float32Array
|
|
79
|
+
function bufferToFloat32Array(buffer, dimensions) {
|
|
80
|
+
return new Float32Array(buffer, 0, dimensions);
|
|
81
|
+
}
|
|
82
|
+
// Message handlers
|
|
83
|
+
async function handleInit(payload) {
|
|
84
|
+
await loadHnswlibModule();
|
|
85
|
+
// Store indexPath for saveIndex operations
|
|
86
|
+
indexPath = payload.indexPath;
|
|
87
|
+
options = {
|
|
88
|
+
dimensions: payload.dimensions,
|
|
89
|
+
maxElements: payload.maxElements,
|
|
90
|
+
M: payload.M || 16,
|
|
91
|
+
efConstruction: payload.efConstruction || 200,
|
|
92
|
+
seed: payload.seed || 100
|
|
93
|
+
};
|
|
94
|
+
index = new hnswlib.HierarchicalNSW('cosine', options.dimensions, '');
|
|
95
|
+
index.initIndex(options.maxElements, options.M, options.efConstruction, options.seed);
|
|
96
|
+
currentSize = 0;
|
|
97
|
+
vectorStorage.clear();
|
|
98
|
+
}
|
|
99
|
+
async function handleLoadIndex(payload) {
|
|
100
|
+
if (!existsSync(payload.indexPath)) {
|
|
101
|
+
throw new Error(`Index file not found: ${payload.indexPath}`);
|
|
102
|
+
}
|
|
103
|
+
await loadHnswlibModule();
|
|
104
|
+
const data = await BinaryIndexFormat.load(payload.indexPath);
|
|
105
|
+
indexPath = payload.indexPath;
|
|
106
|
+
if (!options) {
|
|
107
|
+
options = {
|
|
108
|
+
dimensions: data.dimensions,
|
|
109
|
+
maxElements: data.maxElements,
|
|
110
|
+
M: data.M || 16,
|
|
111
|
+
efConstruction: data.efConstruction || 200,
|
|
112
|
+
seed: data.seed || 100
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
// Validate dimensions
|
|
116
|
+
if (data.dimensions !== options.dimensions) {
|
|
117
|
+
throw new Error(`Dimension mismatch: stored ${data.dimensions}, expected ${options.dimensions}`);
|
|
118
|
+
}
|
|
119
|
+
// Create index
|
|
120
|
+
index = new hnswlib.HierarchicalNSW('cosine', options.dimensions, '');
|
|
121
|
+
index.initIndex(options.maxElements, options.M, options.efConstruction, options.seed);
|
|
122
|
+
// Clear and repopulate
|
|
123
|
+
vectorStorage.clear();
|
|
124
|
+
currentSize = 0;
|
|
125
|
+
// Load vectors in batches
|
|
126
|
+
const batchSize = 1000;
|
|
127
|
+
const totalVectors = data.vectors.length;
|
|
128
|
+
for (let i = 0; i < totalVectors; i += batchSize) {
|
|
129
|
+
const batch = data.vectors.slice(i, i + batchSize);
|
|
130
|
+
for (const item of batch) {
|
|
131
|
+
try {
|
|
132
|
+
index.addPoint(item.vector, item.id, false);
|
|
133
|
+
vectorStorage.set(item.id, item.vector);
|
|
134
|
+
currentSize++;
|
|
135
|
+
}
|
|
136
|
+
catch (error) {
|
|
137
|
+
if (error?.message?.includes('Cannot enlarge memory') ||
|
|
138
|
+
error?.message?.includes('memory') ||
|
|
139
|
+
(error?.name === 'WebAssembly.Exception' && error?.message?.includes('memory'))) {
|
|
140
|
+
throw new Error(`WebAssembly memory limit exceeded while loading vector index. ` +
|
|
141
|
+
`Index contains ${totalVectors} vectors which requires more than 2GB of memory. ` +
|
|
142
|
+
`Consider: 1) Rebuilding the index with fewer vectors, 2) Using a smaller embedding model, ` +
|
|
143
|
+
`3) Splitting your data into multiple smaller indexes, or 4) Increasing Node.js memory with --max-old-space-size=4096`);
|
|
144
|
+
}
|
|
145
|
+
throw error;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
if (totalVectors > 10000 && (i + batchSize) % 10000 === 0) {
|
|
149
|
+
console.log(` Loaded ${Math.min(i + batchSize, totalVectors)}/${totalVectors} vectors...`);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
async function handleSaveIndex() {
|
|
154
|
+
if (!index || !indexPath) {
|
|
155
|
+
throw new Error('Index not initialized or indexPath not set');
|
|
156
|
+
}
|
|
157
|
+
const vectors = Array.from(vectorStorage.entries()).map(([id, vector]) => ({
|
|
158
|
+
id,
|
|
159
|
+
vector
|
|
160
|
+
}));
|
|
161
|
+
await BinaryIndexFormat.save(indexPath, {
|
|
162
|
+
dimensions: options.dimensions,
|
|
163
|
+
maxElements: options.maxElements,
|
|
164
|
+
M: options.M,
|
|
165
|
+
efConstruction: options.efConstruction,
|
|
166
|
+
seed: options.seed,
|
|
167
|
+
currentSize: vectors.length,
|
|
168
|
+
vectors
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
function handleAddVector(payload) {
|
|
172
|
+
if (!index || !options) {
|
|
173
|
+
throw new Error('Index not initialized');
|
|
174
|
+
}
|
|
175
|
+
const vector = bufferToFloat32Array(payload.vector, payload.dimensions);
|
|
176
|
+
if (vector.length !== options.dimensions) {
|
|
177
|
+
throw new Error(`Vector dimension mismatch: ${vector.length} vs ${options.dimensions}`);
|
|
178
|
+
}
|
|
179
|
+
index.addPoint(vector, payload.id, false);
|
|
180
|
+
vectorStorage.set(payload.id, new Float32Array(vector));
|
|
181
|
+
currentSize++;
|
|
182
|
+
}
|
|
183
|
+
function handleAddVectors(payload) {
|
|
184
|
+
for (const item of payload.vectors) {
|
|
185
|
+
handleAddVector({
|
|
186
|
+
id: item.id,
|
|
187
|
+
vector: item.vector,
|
|
188
|
+
dimensions: item.dimensions
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
function handleSearch(payload) {
|
|
193
|
+
if (!index || !options) {
|
|
194
|
+
throw new Error('Index not initialized');
|
|
195
|
+
}
|
|
196
|
+
const queryVector = bufferToFloat32Array(payload.queryVector, payload.dimensions);
|
|
197
|
+
if (queryVector.length !== options.dimensions) {
|
|
198
|
+
throw new Error(`Query vector dimension mismatch: ${queryVector.length} vs ${options.dimensions}`);
|
|
199
|
+
}
|
|
200
|
+
if (currentSize === 0) {
|
|
201
|
+
return { neighbors: [], distances: [] };
|
|
202
|
+
}
|
|
203
|
+
const result = index.searchKnn(queryVector, Math.min(payload.k, currentSize), undefined);
|
|
204
|
+
return {
|
|
205
|
+
neighbors: result.neighbors,
|
|
206
|
+
distances: result.distances
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
function handleGetCurrentCount() {
|
|
210
|
+
return currentSize;
|
|
211
|
+
}
|
|
212
|
+
function handleResizeIndex(payload) {
|
|
213
|
+
if (!index || !options) {
|
|
214
|
+
throw new Error('Index not initialized');
|
|
215
|
+
}
|
|
216
|
+
if (payload.newMaxElements <= options.maxElements) {
|
|
217
|
+
throw new Error(`New max elements must be greater than current`);
|
|
218
|
+
}
|
|
219
|
+
index.resizeIndex(payload.newMaxElements);
|
|
220
|
+
options.maxElements = payload.newMaxElements;
|
|
221
|
+
}
|
|
222
|
+
async function handleReset() {
|
|
223
|
+
vectorStorage.clear();
|
|
224
|
+
currentSize = 0;
|
|
225
|
+
if (index && options && hnswlib) {
|
|
226
|
+
index = new hnswlib.HierarchicalNSW('cosine', options.dimensions, '');
|
|
227
|
+
index.initIndex(options.maxElements, options.M, options.efConstruction, options.seed);
|
|
228
|
+
// Set efSearch for query time
|
|
229
|
+
if (typeof index.setEfSearch === 'function') {
|
|
230
|
+
index.setEfSearch(50);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
function handleSetEf(payload) {
|
|
235
|
+
if (!index) {
|
|
236
|
+
throw new Error('Index not initialized');
|
|
237
|
+
}
|
|
238
|
+
if (typeof index.setEfSearch === 'function') {
|
|
239
|
+
index.setEfSearch(payload.ef);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
function handleIndexExists(payload) {
|
|
243
|
+
return existsSync(payload.indexPath);
|
|
244
|
+
}
|
|
245
|
+
// Main message handler
|
|
246
|
+
parentPort.on('message', async (request) => {
|
|
247
|
+
try {
|
|
248
|
+
let response = { id: request.id, type: 'success' };
|
|
249
|
+
switch (request.type) {
|
|
250
|
+
case 'init':
|
|
251
|
+
await handleInit(request.payload);
|
|
252
|
+
break;
|
|
253
|
+
case 'loadIndex':
|
|
254
|
+
await handleLoadIndex(request.payload);
|
|
255
|
+
response.payload = { count: currentSize };
|
|
256
|
+
break;
|
|
257
|
+
case 'saveIndex':
|
|
258
|
+
await handleSaveIndex();
|
|
259
|
+
response.payload = { count: currentSize };
|
|
260
|
+
break;
|
|
261
|
+
case 'addVector':
|
|
262
|
+
handleAddVector(request.payload);
|
|
263
|
+
response.payload = { count: currentSize };
|
|
264
|
+
break;
|
|
265
|
+
case 'addVectors':
|
|
266
|
+
handleAddVectors(request.payload);
|
|
267
|
+
response.payload = { count: currentSize };
|
|
268
|
+
break;
|
|
269
|
+
case 'search':
|
|
270
|
+
response.payload = handleSearch(request.payload);
|
|
271
|
+
break;
|
|
272
|
+
case 'getCurrentCount':
|
|
273
|
+
response.payload = { count: currentSize };
|
|
274
|
+
break;
|
|
275
|
+
case 'resizeIndex':
|
|
276
|
+
handleResizeIndex(request.payload);
|
|
277
|
+
break;
|
|
278
|
+
case 'reset':
|
|
279
|
+
await handleReset();
|
|
280
|
+
response.payload = { count: 0 };
|
|
281
|
+
break;
|
|
282
|
+
case 'setEf':
|
|
283
|
+
handleSetEf(request.payload);
|
|
284
|
+
break;
|
|
285
|
+
case 'indexExists':
|
|
286
|
+
response.payload = { exists: handleIndexExists(request.payload) };
|
|
287
|
+
break;
|
|
288
|
+
case 'cleanup':
|
|
289
|
+
// Worker will be terminated by main thread, just acknowledge
|
|
290
|
+
break;
|
|
291
|
+
default:
|
|
292
|
+
throw new Error(`Unknown request type: ${request.type}`);
|
|
293
|
+
}
|
|
294
|
+
parentPort.postMessage(response);
|
|
295
|
+
}
|
|
296
|
+
catch (error) {
|
|
297
|
+
parentPort.postMessage({
|
|
298
|
+
id: request.id,
|
|
299
|
+
type: 'error',
|
|
300
|
+
error: error instanceof Error ? error.message : String(error)
|
|
301
|
+
});
|
|
302
|
+
}
|
|
303
|
+
});
|
|
304
|
+
//# sourceMappingURL=vector-index-worker.js.map
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*
|
|
5
|
+
* Worker-based implementation to prevent WebAssembly memory accumulation.
|
|
6
|
+
*/
|
|
7
|
+
export interface VectorIndexOptions {
|
|
8
|
+
dimensions: number;
|
|
9
|
+
maxElements: number;
|
|
10
|
+
efConstruction?: number;
|
|
11
|
+
M?: number;
|
|
12
|
+
seed?: number;
|
|
13
|
+
}
|
|
14
|
+
export interface SearchResult {
|
|
15
|
+
neighbors: number[];
|
|
16
|
+
distances: number[];
|
|
17
|
+
}
|
|
18
|
+
export declare class VectorIndex {
|
|
19
|
+
private worker;
|
|
20
|
+
private indexPath;
|
|
21
|
+
private options;
|
|
22
|
+
private messageQueue;
|
|
23
|
+
private messageId;
|
|
24
|
+
private isInitialized;
|
|
25
|
+
constructor(indexPath: string, options: VectorIndexOptions);
|
|
26
|
+
/**
|
|
27
|
+
* Get the path to the worker script
|
|
28
|
+
* Always uses compiled .js files - workers cannot execute TypeScript directly
|
|
29
|
+
*/
|
|
30
|
+
private getWorkerPath;
|
|
31
|
+
/**
|
|
32
|
+
* Ensure worker is created and ready
|
|
33
|
+
*/
|
|
34
|
+
private ensureWorker;
|
|
35
|
+
/**
|
|
36
|
+
* Send a message to the worker and wait for response
|
|
37
|
+
*/
|
|
38
|
+
private sendMessage;
|
|
39
|
+
/**
|
|
40
|
+
* Convert Float32Array to ArrayBuffer for transfer
|
|
41
|
+
*/
|
|
42
|
+
private float32ArrayToBuffer;
|
|
43
|
+
/**
|
|
44
|
+
* Initialize the HNSW index with cosine similarity using hnswlib-wasm
|
|
45
|
+
*/
|
|
46
|
+
initialize(): Promise<void>;
|
|
47
|
+
/**
|
|
48
|
+
* Load existing index from file using hnswlib-wasm
|
|
49
|
+
*/
|
|
50
|
+
loadIndex(): Promise<void>;
|
|
51
|
+
/**
|
|
52
|
+
* Save index to binary format
|
|
53
|
+
*/
|
|
54
|
+
saveIndex(): Promise<void>;
|
|
55
|
+
/**
|
|
56
|
+
* Add a single vector to the HNSW index
|
|
57
|
+
* Now async due to worker-based implementation
|
|
58
|
+
*/
|
|
59
|
+
addVector(embeddingId: number, vector: Float32Array): Promise<void>;
|
|
60
|
+
/**
|
|
61
|
+
* Add multiple vectors to the index in batch
|
|
62
|
+
* Now async due to worker-based implementation
|
|
63
|
+
*/
|
|
64
|
+
addVectors(vectors: Array<{
|
|
65
|
+
id: number;
|
|
66
|
+
vector: Float32Array;
|
|
67
|
+
}>): Promise<void>;
|
|
68
|
+
/**
|
|
69
|
+
* Search for k nearest neighbors using hnswlib-wasm
|
|
70
|
+
* Now async due to worker-based implementation
|
|
71
|
+
*/
|
|
72
|
+
search(queryVector: Float32Array, k?: number): Promise<SearchResult>;
|
|
73
|
+
/**
|
|
74
|
+
* Get current number of vectors in the index
|
|
75
|
+
* Now async due to worker-based implementation
|
|
76
|
+
*/
|
|
77
|
+
getCurrentCount(): Promise<number>;
|
|
78
|
+
/**
|
|
79
|
+
* Check if index exists on disk
|
|
80
|
+
*/
|
|
81
|
+
indexExists(): boolean;
|
|
82
|
+
/**
|
|
83
|
+
* Set search parameters for query time
|
|
84
|
+
* Now async due to worker-based implementation
|
|
85
|
+
*/
|
|
86
|
+
setEf(ef: number): Promise<void>;
|
|
87
|
+
/**
|
|
88
|
+
* Resize index to accommodate more vectors
|
|
89
|
+
* Now async due to worker-based implementation
|
|
90
|
+
*/
|
|
91
|
+
resizeIndex(newMaxElements: number): Promise<void>;
|
|
92
|
+
/**
|
|
93
|
+
* Reset the vector index to an empty state.
|
|
94
|
+
* Clears all vectors from the HNSW graph and vectorStorage.
|
|
95
|
+
* The index parameters (dimensions, M, efConstruction) are preserved.
|
|
96
|
+
*/
|
|
97
|
+
reset(): Promise<void>;
|
|
98
|
+
/**
|
|
99
|
+
* Get index options (for external access to configuration)
|
|
100
|
+
*/
|
|
101
|
+
getOptions(): VectorIndexOptions;
|
|
102
|
+
/**
|
|
103
|
+
* Cleanup: terminate worker and free all WebAssembly memory
|
|
104
|
+
*/
|
|
105
|
+
cleanup(): Promise<void>;
|
|
106
|
+
}
|
|
107
|
+
//# sourceMappingURL=vector-index.d.ts.map
|