rag-lite-ts 2.1.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/{cli → cjs/cli}/indexer.js +73 -15
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/{cli.js → cjs/cli.js} +25 -6
- package/dist/{core → cjs/core}/binary-index-format.js +6 -3
- package/dist/{core → cjs/core}/db.d.ts +56 -0
- package/dist/{core → cjs/core}/db.js +105 -0
- package/dist/{core → cjs/core}/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/search-pipeline.js +1 -1
- package/dist/{core → cjs/core}/search.js +1 -1
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +304 -0
- package/dist/cjs/core/vector-index.d.ts +107 -0
- package/dist/cjs/core/vector-index.js +344 -0
- package/dist/{factories → cjs/factories}/ingestion-factory.js +3 -7
- package/dist/{factories → cjs/factories}/search-factory.js +11 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +23 -3
- package/dist/{index-manager.js → cjs/index-manager.js} +84 -15
- package/dist/{index.d.ts → cjs/index.d.ts} +2 -1
- package/dist/{index.js → cjs/index.js} +3 -1
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +529 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +548 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +294 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +269 -0
- package/dist/esm/core/db.js +1000 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +904 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +304 -0
- package/dist/esm/core/vector-index.d.ts +107 -0
- package/dist/esm/core/vector-index.js +344 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +473 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +355 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +136 -0
- package/dist/esm/index-manager.js +667 -0
- package/dist/esm/index.d.ts +76 -0
- package/dist/esm/index.js +112 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +39 -14
- package/dist/core/vector-index.d.ts +0 -72
- package/dist/core/vector-index.js +0 -331
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.js +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.d.ts +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
- /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*
|
|
5
|
+
* Worker-based implementation to prevent WebAssembly memory accumulation.
|
|
6
|
+
*/
|
|
7
|
+
import { Worker } from 'worker_threads';
|
|
8
|
+
import { existsSync } from 'fs';
|
|
9
|
+
import { fileURLToPath } from 'url';
|
|
10
|
+
import { dirname, join } from 'path';
|
|
11
|
+
import { handleError, ErrorCategory, ErrorSeverity, createError } from './error-handler.js';
|
|
12
|
+
import { createMissingFileError, createDimensionMismatchError } from './actionable-error-messages.js';
|
|
13
|
+
export class VectorIndex {
|
|
14
|
+
worker = null;
|
|
15
|
+
indexPath;
|
|
16
|
+
options;
|
|
17
|
+
messageQueue = new Map();
|
|
18
|
+
messageId = 0;
|
|
19
|
+
isInitialized = false;
|
|
20
|
+
constructor(indexPath, options) {
|
|
21
|
+
this.indexPath = indexPath;
|
|
22
|
+
this.options = {
|
|
23
|
+
efConstruction: 200,
|
|
24
|
+
M: 16,
|
|
25
|
+
seed: 100,
|
|
26
|
+
...options
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Get the path to the worker script
|
|
31
|
+
* Always uses compiled .js files - workers cannot execute TypeScript directly
|
|
32
|
+
*/
|
|
33
|
+
getWorkerPath() {
|
|
34
|
+
const currentFile = fileURLToPath(import.meta.url);
|
|
35
|
+
const currentDir = dirname(currentFile);
|
|
36
|
+
// Always prefer .js (compiled output)
|
|
37
|
+
const jsPath = join(currentDir, 'vector-index-worker.js');
|
|
38
|
+
// Check if .js exists in current directory (compiled)
|
|
39
|
+
if (existsSync(jsPath)) {
|
|
40
|
+
return jsPath;
|
|
41
|
+
}
|
|
42
|
+
// If running from src/ (development), try dist/ paths
|
|
43
|
+
if (currentDir.includes('src')) {
|
|
44
|
+
// Find project root (go up from src/core)
|
|
45
|
+
const projectRoot = currentDir.replace(/[\\/]src[\\/]core.*$/, '');
|
|
46
|
+
const distEsmPath = join(projectRoot, 'dist', 'esm', 'core', 'vector-index-worker.js');
|
|
47
|
+
const distCjsPath = join(projectRoot, 'dist', 'cjs', 'core', 'vector-index-worker.js');
|
|
48
|
+
if (existsSync(distEsmPath)) {
|
|
49
|
+
return distEsmPath;
|
|
50
|
+
}
|
|
51
|
+
if (existsSync(distCjsPath)) {
|
|
52
|
+
return distCjsPath;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
// If running from node_modules (installed package), try dist paths
|
|
56
|
+
if (currentDir.includes('node_modules')) {
|
|
57
|
+
const packageRoot = currentDir.split('node_modules')[0];
|
|
58
|
+
const distEsmPath = join(packageRoot, 'node_modules', 'rag-lite-ts', 'dist', 'esm', 'core', 'vector-index-worker.js');
|
|
59
|
+
const distCjsPath = join(packageRoot, 'node_modules', 'rag-lite-ts', 'dist', 'cjs', 'core', 'vector-index-worker.js');
|
|
60
|
+
if (existsSync(distEsmPath)) {
|
|
61
|
+
return distEsmPath;
|
|
62
|
+
}
|
|
63
|
+
if (existsSync(distCjsPath)) {
|
|
64
|
+
return distCjsPath;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
// Final fallback - will fail with clear error
|
|
68
|
+
throw new Error(`Worker file not found. Expected: ${jsPath}\n` +
|
|
69
|
+
'Please run "npm run build" to compile the vector-index-worker.ts file.\n' +
|
|
70
|
+
`Current directory: ${currentDir}\n` +
|
|
71
|
+
`Checked paths: ${jsPath}, ${currentDir.includes('src') ? join(currentDir.replace(/[\\/]src[\\/]core.*$/, ''), 'dist', 'esm', 'core', 'vector-index-worker.js') : 'N/A'}`);
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Ensure worker is created and ready
|
|
75
|
+
*/
|
|
76
|
+
async ensureWorker() {
|
|
77
|
+
if (this.worker) {
|
|
78
|
+
return;
|
|
79
|
+
}
|
|
80
|
+
const workerPath = this.getWorkerPath();
|
|
81
|
+
this.worker = new Worker(workerPath);
|
|
82
|
+
// Set up message handler
|
|
83
|
+
this.worker.on('message', (response) => {
|
|
84
|
+
const handler = this.messageQueue.get(response.id);
|
|
85
|
+
if (handler) {
|
|
86
|
+
this.messageQueue.delete(response.id);
|
|
87
|
+
if (response.type === 'error') {
|
|
88
|
+
handler.reject(new Error(response.error || 'Unknown error'));
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
handler.resolve(response.payload);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
});
|
|
95
|
+
// Handle worker errors
|
|
96
|
+
this.worker.on('error', (error) => {
|
|
97
|
+
console.error('VectorIndex worker error:', error);
|
|
98
|
+
// Reject all pending requests
|
|
99
|
+
for (const [id, handler] of this.messageQueue.entries()) {
|
|
100
|
+
handler.reject(error);
|
|
101
|
+
}
|
|
102
|
+
this.messageQueue.clear();
|
|
103
|
+
});
|
|
104
|
+
// Handle worker exit
|
|
105
|
+
this.worker.on('exit', (code) => {
|
|
106
|
+
if (code !== 0) {
|
|
107
|
+
console.error(`VectorIndex worker exited with code ${code}`);
|
|
108
|
+
}
|
|
109
|
+
// Reject all pending requests
|
|
110
|
+
for (const [id, handler] of this.messageQueue.entries()) {
|
|
111
|
+
handler.reject(new Error(`Worker exited with code ${code}`));
|
|
112
|
+
}
|
|
113
|
+
this.messageQueue.clear();
|
|
114
|
+
this.worker = null;
|
|
115
|
+
this.isInitialized = false;
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Send a message to the worker and wait for response
|
|
120
|
+
*/
|
|
121
|
+
async sendMessage(type, payload) {
|
|
122
|
+
await this.ensureWorker();
|
|
123
|
+
return new Promise((resolve, reject) => {
|
|
124
|
+
const id = this.messageId++;
|
|
125
|
+
this.messageQueue.set(id, { resolve, reject });
|
|
126
|
+
const request = { id, type, payload };
|
|
127
|
+
this.worker.postMessage(request);
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Convert Float32Array to ArrayBuffer for transfer
|
|
132
|
+
*/
|
|
133
|
+
float32ArrayToBuffer(vector) {
|
|
134
|
+
const buffer = vector.buffer.slice(vector.byteOffset, vector.byteOffset + vector.byteLength);
|
|
135
|
+
// Ensure we return ArrayBuffer, not SharedArrayBuffer
|
|
136
|
+
return buffer instanceof ArrayBuffer ? buffer : new ArrayBuffer(0);
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Initialize the HNSW index with cosine similarity using hnswlib-wasm
|
|
140
|
+
*/
|
|
141
|
+
async initialize() {
|
|
142
|
+
try {
|
|
143
|
+
const payload = {
|
|
144
|
+
dimensions: this.options.dimensions,
|
|
145
|
+
maxElements: this.options.maxElements,
|
|
146
|
+
M: this.options.M,
|
|
147
|
+
efConstruction: this.options.efConstruction,
|
|
148
|
+
seed: this.options.seed,
|
|
149
|
+
indexPath: this.indexPath // Pass indexPath to worker for saveIndex operations
|
|
150
|
+
};
|
|
151
|
+
await this.sendMessage('init', payload);
|
|
152
|
+
this.isInitialized = true;
|
|
153
|
+
console.log(`Initialized HNSW index with ${this.options.dimensions} dimensions using hnswlib-wasm (worker)`);
|
|
154
|
+
}
|
|
155
|
+
catch (error) {
|
|
156
|
+
handleError(createError.index(`Failed to initialize vector index: ${error instanceof Error ? error.message : String(error)}`), 'Vector Index Initialization', {
|
|
157
|
+
category: ErrorCategory.INDEX,
|
|
158
|
+
severity: ErrorSeverity.FATAL
|
|
159
|
+
});
|
|
160
|
+
throw error;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Load existing index from file using hnswlib-wasm
|
|
165
|
+
*/
|
|
166
|
+
async loadIndex() {
|
|
167
|
+
if (!existsSync(this.indexPath)) {
|
|
168
|
+
throw createMissingFileError(this.indexPath, 'index', {
|
|
169
|
+
operationContext: 'VectorIndex.loadIndex'
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
try {
|
|
173
|
+
const payload = {
|
|
174
|
+
indexPath: this.indexPath
|
|
175
|
+
};
|
|
176
|
+
const result = await this.sendMessage('loadIndex', payload);
|
|
177
|
+
this.isInitialized = true;
|
|
178
|
+
console.log(`✓ Loaded HNSW index with ${result.count} vectors from ${this.indexPath} (worker)`);
|
|
179
|
+
}
|
|
180
|
+
catch (error) {
|
|
181
|
+
throw new Error(`Failed to load index from ${this.indexPath}: ${error}`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Save index to binary format
|
|
186
|
+
*/
|
|
187
|
+
async saveIndex() {
|
|
188
|
+
if (!this.isInitialized) {
|
|
189
|
+
throw new Error('Index not initialized');
|
|
190
|
+
}
|
|
191
|
+
try {
|
|
192
|
+
const result = await this.sendMessage('saveIndex');
|
|
193
|
+
const actualSize = result.count;
|
|
194
|
+
console.log(`✓ Saved HNSW index with ${actualSize} vectors (${(actualSize * this.options.dimensions * 4 / 1024).toFixed(2)} KB of vector data) to ${this.indexPath} (worker)`);
|
|
195
|
+
}
|
|
196
|
+
catch (error) {
|
|
197
|
+
throw new Error(`Failed to save index to ${this.indexPath}: ${error}`);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Add a single vector to the HNSW index
|
|
202
|
+
* Now async due to worker-based implementation
|
|
203
|
+
*/
|
|
204
|
+
async addVector(embeddingId, vector) {
|
|
205
|
+
if (!this.isInitialized) {
|
|
206
|
+
throw new Error('Index not initialized');
|
|
207
|
+
}
|
|
208
|
+
if (vector.length !== this.options.dimensions) {
|
|
209
|
+
throw createDimensionMismatchError(this.options.dimensions, vector.length, 'vector addition', { operationContext: 'VectorIndex.addVector' });
|
|
210
|
+
}
|
|
211
|
+
const payload = {
|
|
212
|
+
id: embeddingId,
|
|
213
|
+
vector: this.float32ArrayToBuffer(vector),
|
|
214
|
+
dimensions: vector.length
|
|
215
|
+
};
|
|
216
|
+
await this.sendMessage('addVector', payload);
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Add multiple vectors to the index in batch
|
|
220
|
+
* Now async due to worker-based implementation
|
|
221
|
+
*/
|
|
222
|
+
async addVectors(vectors) {
|
|
223
|
+
if (!this.isInitialized) {
|
|
224
|
+
throw new Error('Index not initialized');
|
|
225
|
+
}
|
|
226
|
+
const payload = {
|
|
227
|
+
vectors: vectors.map(v => ({
|
|
228
|
+
id: v.id,
|
|
229
|
+
vector: this.float32ArrayToBuffer(v.vector),
|
|
230
|
+
dimensions: v.vector.length
|
|
231
|
+
}))
|
|
232
|
+
};
|
|
233
|
+
await this.sendMessage('addVectors', payload);
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Search for k nearest neighbors using hnswlib-wasm
|
|
237
|
+
* Now async due to worker-based implementation
|
|
238
|
+
*/
|
|
239
|
+
async search(queryVector, k = 5) {
|
|
240
|
+
if (!this.isInitialized) {
|
|
241
|
+
throw new Error('Index not initialized');
|
|
242
|
+
}
|
|
243
|
+
if (queryVector.length !== this.options.dimensions) {
|
|
244
|
+
throw createDimensionMismatchError(this.options.dimensions, queryVector.length, 'vector search', { operationContext: 'VectorIndex.search' });
|
|
245
|
+
}
|
|
246
|
+
const payload = {
|
|
247
|
+
queryVector: this.float32ArrayToBuffer(queryVector),
|
|
248
|
+
dimensions: queryVector.length,
|
|
249
|
+
k
|
|
250
|
+
};
|
|
251
|
+
const result = await this.sendMessage('search', payload);
|
|
252
|
+
// Check if empty result
|
|
253
|
+
if (result.neighbors.length === 0 && result.distances.length === 0) {
|
|
254
|
+
return { neighbors: [], distances: [] };
|
|
255
|
+
}
|
|
256
|
+
return result;
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Get current number of vectors in the index
|
|
260
|
+
* Now async due to worker-based implementation
|
|
261
|
+
*/
|
|
262
|
+
async getCurrentCount() {
|
|
263
|
+
if (!this.isInitialized) {
|
|
264
|
+
return 0;
|
|
265
|
+
}
|
|
266
|
+
const result = await this.sendMessage('getCurrentCount');
|
|
267
|
+
return result.count;
|
|
268
|
+
}
|
|
269
|
+
/**
|
|
270
|
+
* Check if index exists on disk
|
|
271
|
+
*/
|
|
272
|
+
indexExists() {
|
|
273
|
+
// This can be synchronous since it's just a file system check
|
|
274
|
+
return existsSync(this.indexPath);
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* Set search parameters for query time
|
|
278
|
+
* Now async due to worker-based implementation
|
|
279
|
+
*/
|
|
280
|
+
async setEf(ef) {
|
|
281
|
+
if (!this.isInitialized) {
|
|
282
|
+
throw new Error('Index not initialized');
|
|
283
|
+
}
|
|
284
|
+
const payload = { ef };
|
|
285
|
+
try {
|
|
286
|
+
await this.sendMessage('setEf', payload);
|
|
287
|
+
}
|
|
288
|
+
catch (error) {
|
|
289
|
+
console.log(`Failed to set ef: ${error}`);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Resize index to accommodate more vectors
|
|
294
|
+
* Now async due to worker-based implementation
|
|
295
|
+
*/
|
|
296
|
+
async resizeIndex(newMaxElements) {
|
|
297
|
+
if (!this.isInitialized) {
|
|
298
|
+
throw new Error('Index not initialized');
|
|
299
|
+
}
|
|
300
|
+
if (newMaxElements <= this.options.maxElements) {
|
|
301
|
+
throw new Error(`New max elements (${newMaxElements}) must be greater than current (${this.options.maxElements})`);
|
|
302
|
+
}
|
|
303
|
+
const payload = { newMaxElements };
|
|
304
|
+
await this.sendMessage('resizeIndex', payload);
|
|
305
|
+
this.options.maxElements = newMaxElements;
|
|
306
|
+
console.log(`Resized index to accommodate ${newMaxElements} vectors`);
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Reset the vector index to an empty state.
|
|
310
|
+
* Clears all vectors from the HNSW graph and vectorStorage.
|
|
311
|
+
* The index parameters (dimensions, M, efConstruction) are preserved.
|
|
312
|
+
*/
|
|
313
|
+
async reset() {
|
|
314
|
+
console.log('🔄 VectorIndex: Resetting to empty state...');
|
|
315
|
+
await this.sendMessage('reset');
|
|
316
|
+
console.log('✓ VectorIndex reset: cleared all vectors');
|
|
317
|
+
}
|
|
318
|
+
/**
|
|
319
|
+
* Get index options (for external access to configuration)
|
|
320
|
+
*/
|
|
321
|
+
getOptions() {
|
|
322
|
+
return { ...this.options };
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* Cleanup: terminate worker and free all WebAssembly memory
|
|
326
|
+
*/
|
|
327
|
+
async cleanup() {
|
|
328
|
+
if (this.worker) {
|
|
329
|
+
try {
|
|
330
|
+
// Send cleanup message (worker will acknowledge)
|
|
331
|
+
await this.sendMessage('cleanup');
|
|
332
|
+
}
|
|
333
|
+
catch (error) {
|
|
334
|
+
// Ignore errors during cleanup
|
|
335
|
+
}
|
|
336
|
+
// Terminate worker - this frees ALL WebAssembly memory
|
|
337
|
+
await this.worker.terminate();
|
|
338
|
+
this.worker = null;
|
|
339
|
+
this.isInitialized = false;
|
|
340
|
+
this.messageQueue.clear();
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
//# sourceMappingURL=vector-index.js.map
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOM polyfills for Node.js environment
|
|
3
|
+
* Required for transformers.js and other browser-dependent libraries
|
|
4
|
+
*/
|
|
5
|
+
import { JSDOM } from 'jsdom';
|
|
6
|
+
// Only set up polyfills if we're in Node.js (not browser)
|
|
7
|
+
if (typeof window === 'undefined') {
|
|
8
|
+
console.log('Setting up DOM polyfills for Node.js environment...');
|
|
9
|
+
// Create a minimal DOM environment
|
|
10
|
+
const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
|
|
11
|
+
pretendToBeVisual: true,
|
|
12
|
+
resources: 'usable'
|
|
13
|
+
});
|
|
14
|
+
// Set up global objects that transformers.js expects
|
|
15
|
+
if (typeof globalThis.self === 'undefined') {
|
|
16
|
+
globalThis.self = globalThis;
|
|
17
|
+
}
|
|
18
|
+
// Also set on global for older Node.js versions
|
|
19
|
+
if (typeof global.self === 'undefined') {
|
|
20
|
+
global.self = global;
|
|
21
|
+
}
|
|
22
|
+
console.log('DOM polyfills set up successfully. self is now:', typeof self !== 'undefined' ? 'defined' : 'undefined');
|
|
23
|
+
if (typeof globalThis.window === 'undefined') {
|
|
24
|
+
globalThis.window = dom.window;
|
|
25
|
+
}
|
|
26
|
+
if (typeof globalThis.document === 'undefined') {
|
|
27
|
+
globalThis.document = dom.window.document;
|
|
28
|
+
}
|
|
29
|
+
// Additional polyfills that might be needed
|
|
30
|
+
if (typeof globalThis.navigator === 'undefined') {
|
|
31
|
+
globalThis.navigator = dom.window.navigator;
|
|
32
|
+
}
|
|
33
|
+
// Note: Do NOT polyfill createImageBitmap with a fake implementation
|
|
34
|
+
// RawImage.fromURL() will handle image loading correctly without it
|
|
35
|
+
// Setting a fake createImageBitmap that throws errors breaks image loading
|
|
36
|
+
}
|
|
37
|
+
//# sourceMappingURL=dom-polyfills.js.map
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Factory exports for creating RAG instances
|
|
3
|
+
* Provides convenient factory functions for common use cases
|
|
4
|
+
*
|
|
5
|
+
* This module serves as the main entry point for factory functions that
|
|
6
|
+
* simplify the creation of search and ingestion systems.
|
|
7
|
+
* The factories handle complex initialization while providing clean APIs.
|
|
8
|
+
*
|
|
9
|
+
* MAIN FACTORY CLASSES:
|
|
10
|
+
* - IngestionFactory: Creates IngestionPipeline instances for document ingestion
|
|
11
|
+
* - SearchFactory: Creates SearchEngine with automatic mode detection (recommended)
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```typescript
|
|
15
|
+
* import { IngestionFactory, SearchFactory } from './factories';
|
|
16
|
+
*
|
|
17
|
+
* // Create ingestion pipeline
|
|
18
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
|
|
19
|
+
*
|
|
20
|
+
* // Create search engine with automatic mode detection
|
|
21
|
+
* const search = await SearchFactory.create('./index.bin', './db.sqlite');
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
export { IngestionFactory } from './ingestion-factory.js';
|
|
25
|
+
export { SearchFactory } from './search-factory.js';
|
|
26
|
+
export type { IngestionFactoryOptions, ContentSystemConfig } from './ingestion-factory.js';
|
|
27
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Factory exports for creating RAG instances
|
|
3
|
+
* Provides convenient factory functions for common use cases
|
|
4
|
+
*
|
|
5
|
+
* This module serves as the main entry point for factory functions that
|
|
6
|
+
* simplify the creation of search and ingestion systems.
|
|
7
|
+
* The factories handle complex initialization while providing clean APIs.
|
|
8
|
+
*
|
|
9
|
+
* MAIN FACTORY CLASSES:
|
|
10
|
+
* - IngestionFactory: Creates IngestionPipeline instances for document ingestion
|
|
11
|
+
* - SearchFactory: Creates SearchEngine with automatic mode detection (recommended)
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```typescript
|
|
15
|
+
* import { IngestionFactory, SearchFactory } from './factories';
|
|
16
|
+
*
|
|
17
|
+
* // Create ingestion pipeline
|
|
18
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
|
|
19
|
+
*
|
|
20
|
+
* // Create search engine with automatic mode detection
|
|
21
|
+
* const search = await SearchFactory.create('./index.bin', './db.sqlite');
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
// Main factory classes
|
|
25
|
+
export { IngestionFactory } from './ingestion-factory.js';
|
|
26
|
+
// Polymorphic search factory (recommended for automatic mode detection)
|
|
27
|
+
// Re-exported from core for convenience
|
|
28
|
+
export { SearchFactory } from './search-factory.js';
|
|
29
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Factory functions for creating text-specific search and ingestion instances
|
|
3
|
+
* Handles complex initialization logic while providing clean API for common use cases
|
|
4
|
+
*
|
|
5
|
+
* FACTORY PATTERN BENEFITS:
|
|
6
|
+
* - Abstracts complex initialization (model loading, database setup, index initialization)
|
|
7
|
+
* - Provides simple API for common use cases while preserving access to dependency injection
|
|
8
|
+
* - Clear validation and error handling without fallback mechanisms
|
|
9
|
+
* - Supports different embedding models and configurations
|
|
10
|
+
* - Enables clean separation between simple usage and advanced customization
|
|
11
|
+
*
|
|
12
|
+
* MODE SELECTION GUIDE:
|
|
13
|
+
* - Text Mode (default): Optimized for text-only content
|
|
14
|
+
* - Uses sentence-transformer models (fast, accurate for text)
|
|
15
|
+
* - Images converted to text descriptions
|
|
16
|
+
* - Best for: document search, text clustering, semantic similarity
|
|
17
|
+
*
|
|
18
|
+
* - Multimodal Mode: Optimized for mixed text/image content
|
|
19
|
+
* - Uses CLIP models (unified embedding space)
|
|
20
|
+
* - True cross-modal search (text finds images, images find text)
|
|
21
|
+
* - Best for: image search, visual QA, multimodal retrieval
|
|
22
|
+
*
|
|
23
|
+
* USAGE PATTERNS:
|
|
24
|
+
*
|
|
25
|
+
* 1. Mode Selection:
|
|
26
|
+
* ```typescript
|
|
27
|
+
* // Text mode (default) - optimized for text-only content
|
|
28
|
+
* const textIngestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
29
|
+
* mode: 'text',
|
|
30
|
+
* embeddingModel: 'sentence-transformers/all-MiniLM-L6-v2'
|
|
31
|
+
* });
|
|
32
|
+
*
|
|
33
|
+
* // Multimodal mode - enables cross-modal search
|
|
34
|
+
* const multimodalIngestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
35
|
+
* mode: 'multimodal',
|
|
36
|
+
* embeddingModel: 'Xenova/clip-vit-base-patch32',
|
|
37
|
+
* rerankingStrategy: 'text-derived'
|
|
38
|
+
* });
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
import { IngestionPipeline } from '../core/ingestion.js';
|
|
42
|
+
/**
|
|
43
|
+
* Content system configuration options
|
|
44
|
+
*/
|
|
45
|
+
export interface ContentSystemConfig {
|
|
46
|
+
/** Content directory path (default: '.raglite/content') */
|
|
47
|
+
contentDir?: string;
|
|
48
|
+
/** Maximum file size in bytes (default: 50MB) */
|
|
49
|
+
maxFileSize?: number;
|
|
50
|
+
/** Maximum content directory size in bytes (default: 2GB) */
|
|
51
|
+
maxContentDirSize?: number;
|
|
52
|
+
/** Enable content deduplication (default: true) */
|
|
53
|
+
enableDeduplication?: boolean;
|
|
54
|
+
/** Enable storage tracking (default: true) */
|
|
55
|
+
enableStorageTracking?: boolean;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Options for text ingestion factory
|
|
59
|
+
*/
|
|
60
|
+
export interface IngestionFactoryOptions {
|
|
61
|
+
/** Embedding model name override */
|
|
62
|
+
embeddingModel?: string;
|
|
63
|
+
/** Embedding batch size override */
|
|
64
|
+
batchSize?: number;
|
|
65
|
+
/** Chunk size override */
|
|
66
|
+
chunkSize?: number;
|
|
67
|
+
/** Chunk overlap override */
|
|
68
|
+
chunkOverlap?: number;
|
|
69
|
+
/** Whether to force rebuild the index */
|
|
70
|
+
forceRebuild?: boolean;
|
|
71
|
+
/** Mode for the ingestion pipeline (text or multimodal) */
|
|
72
|
+
mode?: 'text' | 'multimodal';
|
|
73
|
+
/** Reranking strategy for multimodal mode */
|
|
74
|
+
rerankingStrategy?: 'cross-encoder' | 'text-derived' | 'metadata' | 'hybrid' | 'disabled';
|
|
75
|
+
/** Content system configuration */
|
|
76
|
+
contentSystemConfig?: ContentSystemConfig;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Factory for creating text-based IngestionPipeline instances
|
|
80
|
+
* Handles model loading, database initialization, and index setup
|
|
81
|
+
*
|
|
82
|
+
* This factory abstracts the complex initialization process required for text ingestion:
|
|
83
|
+
* 1. Creates necessary directories if they don't exist
|
|
84
|
+
* 2. Validates mode-model compatibility (no fallback mechanisms)
|
|
85
|
+
* 3. Loads and validates embedding models with clear error reporting
|
|
86
|
+
* 4. Establishes database connections and initializes schema
|
|
87
|
+
* 5. Stores mode configuration in database for automatic detection
|
|
88
|
+
* 6. Creates or loads vector indexes with proper configuration
|
|
89
|
+
* 7. Creates IngestionPipeline with proper dependency injection
|
|
90
|
+
*
|
|
91
|
+
* Mode Configuration:
|
|
92
|
+
* - Text Mode (default): Uses sentence-transformer models for text-only content
|
|
93
|
+
* - Multimodal Mode: Uses CLIP models for mixed text/image content
|
|
94
|
+
* - Mode is stored in database and auto-detected during search
|
|
95
|
+
* - Clear validation prevents mode-model mismatches
|
|
96
|
+
*
|
|
97
|
+
* @example
|
|
98
|
+
* ```typescript
|
|
99
|
+
* // Basic usage
|
|
100
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
|
|
101
|
+
* await ingestion.ingestDirectory('./documents');
|
|
102
|
+
*
|
|
103
|
+
* // With custom configuration
|
|
104
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
105
|
+
* embeddingModel: 'all-MiniLM-L6-v2',
|
|
106
|
+
* chunkSize: 512,
|
|
107
|
+
* chunkOverlap: 50,
|
|
108
|
+
* forceRebuild: true
|
|
109
|
+
* });
|
|
110
|
+
*
|
|
111
|
+
* // With defaults
|
|
112
|
+
* const ingestion = await IngestionFactory.createWithDefaults({
|
|
113
|
+
* batchSize: 32 // Faster processing
|
|
114
|
+
* });
|
|
115
|
+
* ```
|
|
116
|
+
*/
|
|
117
|
+
export declare class IngestionFactory {
|
|
118
|
+
/**
|
|
119
|
+
* Create an IngestionPipeline configured for text ingestion
|
|
120
|
+
*
|
|
121
|
+
* This method handles the complete initialization process:
|
|
122
|
+
* - Creates necessary directories if they don't exist
|
|
123
|
+
* - Loads text embedding model (with lazy initialization)
|
|
124
|
+
* - Opens database connection and initializes schema
|
|
125
|
+
* - Creates or loads vector index (with force rebuild option)
|
|
126
|
+
* - Creates IngestionPipeline with dependency injection
|
|
127
|
+
* - Validates the complete setup
|
|
128
|
+
*
|
|
129
|
+
* @param dbPath - Path to the SQLite database file (will be created if doesn't exist)
|
|
130
|
+
* @param indexPath - Path to the vector index file (will be created if doesn't exist)
|
|
131
|
+
* @param options - Optional configuration overrides
|
|
132
|
+
* @param options.embeddingModel - Override embedding model (default: from config)
|
|
133
|
+
* @param options.batchSize - Override embedding batch size (default: from config)
|
|
134
|
+
* @param options.chunkSize - Override chunk size (default: from config)
|
|
135
|
+
* @param options.chunkOverlap - Override chunk overlap (default: from config)
|
|
136
|
+
* @param options.forceRebuild - Force rebuild of existing index (default: false)
|
|
137
|
+
* @param options.contentSystemConfig - Content system configuration options
|
|
138
|
+
* @param options.contentSystemConfig.contentDir - Content directory path (default: '.raglite/content')
|
|
139
|
+
* @param options.contentSystemConfig.maxFileSize - Maximum file size in bytes (default: 50MB)
|
|
140
|
+
* @param options.contentSystemConfig.maxContentDirSize - Maximum content directory size (default: 2GB)
|
|
141
|
+
* @param options.contentSystemConfig.enableDeduplication - Enable content deduplication (default: true)
|
|
142
|
+
* @param options.contentSystemConfig.enableStorageTracking - Enable storage tracking (default: true)
|
|
143
|
+
* @returns Promise resolving to configured IngestionPipeline
|
|
144
|
+
* @throws {Error} If initialization fails
|
|
145
|
+
*
|
|
146
|
+
* @example
|
|
147
|
+
* ```typescript
|
|
148
|
+
* // Create ingestion pipeline with default content system
|
|
149
|
+
* const ingestion = await IngestionFactory.create('./my-db.sqlite', './my-index.bin');
|
|
150
|
+
*
|
|
151
|
+
* // Create with custom content system configuration
|
|
152
|
+
* const ingestion = await IngestionFactory.create('./my-db.sqlite', './my-index.bin', {
|
|
153
|
+
* contentSystemConfig: {
|
|
154
|
+
* contentDir: './custom-content',
|
|
155
|
+
* maxFileSize: 100 * 1024 * 1024, // 100MB
|
|
156
|
+
* maxContentDirSize: 5 * 1024 * 1024 * 1024, // 5GB
|
|
157
|
+
* enableDeduplication: true
|
|
158
|
+
* }
|
|
159
|
+
* });
|
|
160
|
+
*
|
|
161
|
+
* // Ingest documents from directory
|
|
162
|
+
* const result = await ingestion.ingestDirectory('./documents');
|
|
163
|
+
* console.log(`Processed ${result.documentsProcessed} documents`);
|
|
164
|
+
*
|
|
165
|
+
* // Ingest content from memory (MCP integration)
|
|
166
|
+
* const contentId = await ingestion.ingestFromMemory(buffer, {
|
|
167
|
+
* displayName: 'uploaded-file.pdf',
|
|
168
|
+
* contentType: 'application/pdf'
|
|
169
|
+
* });
|
|
170
|
+
*
|
|
171
|
+
* // Clean up when done
|
|
172
|
+
* await ingestion.cleanup();
|
|
173
|
+
* ```
|
|
174
|
+
*/
|
|
175
|
+
static create(dbPath: string, indexPath: string, options?: IngestionFactoryOptions): Promise<IngestionPipeline>;
|
|
176
|
+
/**
|
|
177
|
+
* Create an IngestionPipeline with automatic path resolution
|
|
178
|
+
* Uses default paths based on current working directory
|
|
179
|
+
* @param options - Optional configuration overrides
|
|
180
|
+
* @returns Promise resolving to configured IngestionPipeline
|
|
181
|
+
*/
|
|
182
|
+
static createWithDefaults(options?: IngestionFactoryOptions): Promise<IngestionPipeline>;
|
|
183
|
+
/**
|
|
184
|
+
* Handles mode storage during ingestion
|
|
185
|
+
* Creates or validates system info based on the provided mode and options
|
|
186
|
+
* @private
|
|
187
|
+
*/
|
|
188
|
+
private static handleModeStorage;
|
|
189
|
+
/**
|
|
190
|
+
* Updates system info in the database
|
|
191
|
+
* @private
|
|
192
|
+
*/
|
|
193
|
+
private static updateSystemInfo;
|
|
194
|
+
/**
|
|
195
|
+
* Validates and prepares content system configuration
|
|
196
|
+
* @private
|
|
197
|
+
*/
|
|
198
|
+
private static validateAndPrepareContentSystemConfig;
|
|
199
|
+
}
|
|
200
|
+
//# sourceMappingURL=ingestion-factory.d.ts.map
|