rag-lite-ts 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{cli → cjs/cli}/indexer.js +1 -1
- package/dist/{cli → cjs/cli}/search.js +5 -10
- package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
- package/dist/cjs/core/binary-index-format.js +291 -0
- package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
- package/dist/{core → cjs/core}/ingestion.js +76 -9
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
- package/dist/{core → cjs/core}/search.js +2 -1
- package/dist/{core → cjs/core}/types.d.ts +1 -1
- package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
- package/dist/{core → cjs/core}/vector-index.js +10 -2
- package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
- package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
- package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
- package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +471 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +529 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +291 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +213 -0
- package/dist/esm/core/db.js +895 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +901 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index.d.ts +72 -0
- package/dist/esm/core/vector-index.js +333 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +477 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +344 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +116 -0
- package/dist/esm/index-manager.js +598 -0
- package/dist/esm/index.d.ts +75 -0
- package/dist/esm/index.js +110 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +30 -12
- package/dist/core/binary-index-format.js +0 -122
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{cli.js → cjs/cli.js} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/db.d.ts +0 -0
- /package/dist/{core → cjs/core}/db.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
- /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
- /package/dist/{index.js → cjs/index.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*/
|
|
5
|
+
import { existsSync } from 'fs';
|
|
6
|
+
import { JSDOM } from 'jsdom';
|
|
7
|
+
import { ErrorCategory, ErrorSeverity, safeExecute } from './error-handler.js';
|
|
8
|
+
import { createMissingFileError, createDimensionMismatchError } from './actionable-error-messages.js';
|
|
9
|
+
import { BinaryIndexFormat } from './binary-index-format.js';
|
|
10
|
+
// Set up browser-like environment for hnswlib-wasm
|
|
11
|
+
if (typeof window === 'undefined') {
|
|
12
|
+
const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
|
|
13
|
+
url: 'http://localhost',
|
|
14
|
+
pretendToBeVisual: true,
|
|
15
|
+
resources: 'usable'
|
|
16
|
+
});
|
|
17
|
+
// Type assertion to avoid TypeScript issues with global polyfills
|
|
18
|
+
global.window = dom.window;
|
|
19
|
+
global.document = dom.window.document;
|
|
20
|
+
global.XMLHttpRequest = dom.window.XMLHttpRequest;
|
|
21
|
+
// Disable IndexedDB to prevent hnswlib-wasm from trying to use it
|
|
22
|
+
global.indexedDB = undefined;
|
|
23
|
+
// Override indexedDB on the window object to return undefined
|
|
24
|
+
Object.defineProperty(dom.window, 'indexedDB', {
|
|
25
|
+
value: undefined,
|
|
26
|
+
writable: false,
|
|
27
|
+
configurable: true
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
export class VectorIndex {
|
|
31
|
+
index = null;
|
|
32
|
+
hnswlib = null;
|
|
33
|
+
indexPath;
|
|
34
|
+
options;
|
|
35
|
+
currentSize = 0;
|
|
36
|
+
vectorStorage = new Map(); // For persistence
|
|
37
|
+
constructor(indexPath, options) {
|
|
38
|
+
this.indexPath = indexPath;
|
|
39
|
+
this.options = {
|
|
40
|
+
efConstruction: 200,
|
|
41
|
+
M: 16,
|
|
42
|
+
seed: 100,
|
|
43
|
+
...options
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Initialize the HNSW index with cosine similarity using hnswlib-wasm
|
|
48
|
+
*/
|
|
49
|
+
async initialize() {
|
|
50
|
+
await safeExecute(async () => {
|
|
51
|
+
// Load the hnswlib module
|
|
52
|
+
if (!this.hnswlib) {
|
|
53
|
+
// Temporarily suppress stderr output during hnswlib loading to avoid IndexedDB warnings
|
|
54
|
+
const originalStderrWrite = process.stderr.write;
|
|
55
|
+
const originalConsoleError = console.error;
|
|
56
|
+
process.stderr.write = function (chunk, encoding, callback) {
|
|
57
|
+
const message = chunk.toString();
|
|
58
|
+
// Suppress specific IndexedDB/IDBFS related errors and WebAssembly errors
|
|
59
|
+
if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
|
|
60
|
+
message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
|
|
61
|
+
message.includes('jsFS Error') || message.includes('syncing FS') ||
|
|
62
|
+
message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
|
|
63
|
+
message.includes('abort') || message.includes('assert') ||
|
|
64
|
+
message.includes('hnswlib-wasm/dist/hnswlib')) {
|
|
65
|
+
if (callback)
|
|
66
|
+
callback();
|
|
67
|
+
return true;
|
|
68
|
+
}
|
|
69
|
+
return originalStderrWrite.call(this, chunk, encoding, callback);
|
|
70
|
+
};
|
|
71
|
+
console.error = (...args) => {
|
|
72
|
+
const message = args.join(' ');
|
|
73
|
+
if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
|
|
74
|
+
message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
|
|
75
|
+
message.includes('jsFS Error') || message.includes('syncing FS') ||
|
|
76
|
+
message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
|
|
77
|
+
message.includes('abort') || message.includes('assert') ||
|
|
78
|
+
message.includes('hnswlib-wasm/dist/hnswlib')) {
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
originalConsoleError.apply(console, args);
|
|
82
|
+
};
|
|
83
|
+
try {
|
|
84
|
+
const hnswlibModule = await import('hnswlib-wasm/dist/hnswlib.js');
|
|
85
|
+
const { loadHnswlib } = hnswlibModule;
|
|
86
|
+
this.hnswlib = await loadHnswlib();
|
|
87
|
+
}
|
|
88
|
+
finally {
|
|
89
|
+
// Restore original output streams
|
|
90
|
+
process.stderr.write = originalStderrWrite;
|
|
91
|
+
console.error = originalConsoleError;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
// Create new HNSW index (third parameter is autoSaveFilename, but we'll handle persistence manually)
|
|
95
|
+
this.index = new this.hnswlib.HierarchicalNSW('cosine', this.options.dimensions, '');
|
|
96
|
+
this.index.initIndex(this.options.maxElements, this.options.M || 16, this.options.efConstruction || 200, this.options.seed || 100);
|
|
97
|
+
this.currentSize = 0;
|
|
98
|
+
console.log(`Initialized HNSW index with ${this.options.dimensions} dimensions using hnswlib-wasm`);
|
|
99
|
+
}, 'Vector Index Initialization', {
|
|
100
|
+
category: ErrorCategory.INDEX,
|
|
101
|
+
severity: ErrorSeverity.FATAL
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Load existing index from file using hnswlib-wasm
|
|
106
|
+
*/
|
|
107
|
+
async loadIndex() {
|
|
108
|
+
if (!existsSync(this.indexPath)) {
|
|
109
|
+
throw createMissingFileError(this.indexPath, 'index', {
|
|
110
|
+
operationContext: 'VectorIndex.loadIndex'
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
try {
|
|
114
|
+
// Load the hnswlib module
|
|
115
|
+
if (!this.hnswlib) {
|
|
116
|
+
// Temporarily suppress stderr output during hnswlib loading to avoid IndexedDB warnings
|
|
117
|
+
const originalStderrWrite = process.stderr.write;
|
|
118
|
+
const originalConsoleError = console.error;
|
|
119
|
+
process.stderr.write = function (chunk, encoding, callback) {
|
|
120
|
+
const message = chunk.toString();
|
|
121
|
+
// Suppress specific IndexedDB/IDBFS related errors and WebAssembly errors
|
|
122
|
+
if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
|
|
123
|
+
message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
|
|
124
|
+
message.includes('jsFS Error') || message.includes('syncing FS') ||
|
|
125
|
+
message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
|
|
126
|
+
message.includes('abort') || message.includes('assert') ||
|
|
127
|
+
message.includes('hnswlib-wasm/dist/hnswlib')) {
|
|
128
|
+
if (callback)
|
|
129
|
+
callback();
|
|
130
|
+
return true;
|
|
131
|
+
}
|
|
132
|
+
return originalStderrWrite.call(this, chunk, encoding, callback);
|
|
133
|
+
};
|
|
134
|
+
console.error = (...args) => {
|
|
135
|
+
const message = args.join(' ');
|
|
136
|
+
if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
|
|
137
|
+
message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
|
|
138
|
+
message.includes('jsFS Error') || message.includes('syncing FS') ||
|
|
139
|
+
message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
|
|
140
|
+
message.includes('abort') || message.includes('assert') ||
|
|
141
|
+
message.includes('hnswlib-wasm/dist/hnswlib')) {
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
originalConsoleError.apply(console, args);
|
|
145
|
+
};
|
|
146
|
+
try {
|
|
147
|
+
const hnswlibModule = await import('hnswlib-wasm/dist/hnswlib.js');
|
|
148
|
+
const { loadHnswlib } = hnswlibModule;
|
|
149
|
+
this.hnswlib = await loadHnswlib();
|
|
150
|
+
}
|
|
151
|
+
finally {
|
|
152
|
+
// Restore original output streams
|
|
153
|
+
process.stderr.write = originalStderrWrite;
|
|
154
|
+
console.error = originalConsoleError;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
// Create new HNSW index (third parameter is autoSaveFilename, but we'll handle persistence manually)
|
|
158
|
+
this.index = new this.hnswlib.HierarchicalNSW('cosine', this.options.dimensions, '');
|
|
159
|
+
// Load from binary format
|
|
160
|
+
const data = await BinaryIndexFormat.load(this.indexPath);
|
|
161
|
+
// Validate dimensions
|
|
162
|
+
if (data.dimensions !== this.options.dimensions) {
|
|
163
|
+
console.log(`⚠️ Dimension mismatch detected:`);
|
|
164
|
+
console.log(` Stored dimensions: ${data.dimensions}`);
|
|
165
|
+
console.log(` Expected dimensions: ${this.options.dimensions}`);
|
|
166
|
+
console.log(` Number of vectors: ${data.vectors.length}`);
|
|
167
|
+
if (data.vectors.length > 0) {
|
|
168
|
+
console.log(` Actual vector length: ${data.vectors[0].vector.length}`);
|
|
169
|
+
}
|
|
170
|
+
throw createDimensionMismatchError(this.options.dimensions, data.dimensions, 'vector index loading', { operationContext: 'VectorIndex.loadIndex' });
|
|
171
|
+
}
|
|
172
|
+
// Update options from stored data
|
|
173
|
+
this.options.maxElements = data.maxElements;
|
|
174
|
+
this.options.M = data.M;
|
|
175
|
+
this.options.efConstruction = data.efConstruction;
|
|
176
|
+
this.options.seed = data.seed;
|
|
177
|
+
// Initialize HNSW index
|
|
178
|
+
this.index.initIndex(this.options.maxElements, this.options.M, this.options.efConstruction, this.options.seed);
|
|
179
|
+
// Clear and repopulate vector storage
|
|
180
|
+
this.vectorStorage.clear();
|
|
181
|
+
// Add all stored vectors to HNSW index
|
|
182
|
+
for (const item of data.vectors) {
|
|
183
|
+
this.index.addPoint(item.vector, item.id, false);
|
|
184
|
+
this.vectorStorage.set(item.id, item.vector);
|
|
185
|
+
}
|
|
186
|
+
this.currentSize = data.currentSize;
|
|
187
|
+
console.log(`✓ Loaded HNSW index with ${this.currentSize} vectors from ${this.indexPath}`);
|
|
188
|
+
}
|
|
189
|
+
catch (error) {
|
|
190
|
+
throw new Error(`Failed to load index from ${this.indexPath}: ${error}`);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Save index to binary format
|
|
195
|
+
*/
|
|
196
|
+
async saveIndex() {
|
|
197
|
+
if (!this.index) {
|
|
198
|
+
throw new Error('Index not initialized');
|
|
199
|
+
}
|
|
200
|
+
try {
|
|
201
|
+
// Collect all vectors from storage
|
|
202
|
+
const vectors = Array.from(this.vectorStorage.entries()).map(([id, vector]) => ({
|
|
203
|
+
id,
|
|
204
|
+
vector
|
|
205
|
+
}));
|
|
206
|
+
// Save to binary format
|
|
207
|
+
await BinaryIndexFormat.save(this.indexPath, {
|
|
208
|
+
dimensions: this.options.dimensions,
|
|
209
|
+
maxElements: this.options.maxElements,
|
|
210
|
+
M: this.options.M || 16,
|
|
211
|
+
efConstruction: this.options.efConstruction || 200,
|
|
212
|
+
seed: this.options.seed || 100,
|
|
213
|
+
currentSize: this.currentSize,
|
|
214
|
+
vectors
|
|
215
|
+
});
|
|
216
|
+
console.log(`✓ Saved HNSW index with ${this.currentSize} vectors to ${this.indexPath}`);
|
|
217
|
+
}
|
|
218
|
+
catch (error) {
|
|
219
|
+
throw new Error(`Failed to save index to ${this.indexPath}: ${error}`);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Add a single vector to the HNSW index
|
|
224
|
+
*/
|
|
225
|
+
addVector(embeddingId, vector) {
|
|
226
|
+
if (!this.index) {
|
|
227
|
+
throw new Error('Index not initialized');
|
|
228
|
+
}
|
|
229
|
+
if (vector.length !== this.options.dimensions) {
|
|
230
|
+
throw createDimensionMismatchError(this.options.dimensions, vector.length, 'vector addition', { operationContext: 'VectorIndex.addVector' });
|
|
231
|
+
}
|
|
232
|
+
try {
|
|
233
|
+
this.index.addPoint(vector, embeddingId, false);
|
|
234
|
+
// Store vector for persistence
|
|
235
|
+
this.vectorStorage.set(embeddingId, new Float32Array(vector));
|
|
236
|
+
this.currentSize++;
|
|
237
|
+
}
|
|
238
|
+
catch (error) {
|
|
239
|
+
throw new Error(`Failed to add vector ${embeddingId}: ${error}`);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Add multiple vectors to the index in batch
|
|
244
|
+
*/
|
|
245
|
+
addVectors(vectors) {
|
|
246
|
+
for (const { id, vector } of vectors) {
|
|
247
|
+
this.addVector(id, vector);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Search for k nearest neighbors using hnswlib-wasm
|
|
252
|
+
*/
|
|
253
|
+
search(queryVector, k = 5) {
|
|
254
|
+
if (!this.index) {
|
|
255
|
+
throw new Error('Index not initialized');
|
|
256
|
+
}
|
|
257
|
+
if (queryVector.length !== this.options.dimensions) {
|
|
258
|
+
throw createDimensionMismatchError(this.options.dimensions, queryVector.length, 'vector search', { operationContext: 'VectorIndex.search' });
|
|
259
|
+
}
|
|
260
|
+
if (this.currentSize === 0) {
|
|
261
|
+
return { neighbors: [], distances: [] };
|
|
262
|
+
}
|
|
263
|
+
try {
|
|
264
|
+
const result = this.index.searchKnn(queryVector, Math.min(k, this.currentSize), undefined);
|
|
265
|
+
return {
|
|
266
|
+
neighbors: result.neighbors,
|
|
267
|
+
distances: result.distances
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
catch (error) {
|
|
271
|
+
throw new Error(`Search failed: ${error}`);
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
/**
|
|
275
|
+
* Get current number of vectors in the index
|
|
276
|
+
*/
|
|
277
|
+
getCurrentCount() {
|
|
278
|
+
return this.currentSize;
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* Check if index exists on disk
|
|
282
|
+
*/
|
|
283
|
+
indexExists() {
|
|
284
|
+
return existsSync(this.indexPath);
|
|
285
|
+
}
|
|
286
|
+
/**
|
|
287
|
+
* Set search parameters for query time
|
|
288
|
+
*/
|
|
289
|
+
setEf(ef) {
|
|
290
|
+
if (!this.index) {
|
|
291
|
+
throw new Error('Index not initialized');
|
|
292
|
+
}
|
|
293
|
+
try {
|
|
294
|
+
// hnswlib-wasm might not have setEf method, check if it exists
|
|
295
|
+
if (typeof this.index.setEfSearch === 'function') {
|
|
296
|
+
this.index.setEfSearch(ef);
|
|
297
|
+
console.log(`Set efSearch to ${ef}`);
|
|
298
|
+
}
|
|
299
|
+
else {
|
|
300
|
+
console.log(`setEfSearch not available in hnswlib-wasm`);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
catch (error) {
|
|
304
|
+
console.log(`Failed to set ef: ${error}`);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Resize index to accommodate more vectors
|
|
309
|
+
*/
|
|
310
|
+
resizeIndex(newMaxElements) {
|
|
311
|
+
if (!this.index) {
|
|
312
|
+
throw new Error('Index not initialized');
|
|
313
|
+
}
|
|
314
|
+
if (newMaxElements <= this.options.maxElements) {
|
|
315
|
+
throw new Error(`New max elements (${newMaxElements}) must be greater than current (${this.options.maxElements})`);
|
|
316
|
+
}
|
|
317
|
+
try {
|
|
318
|
+
this.index.resizeIndex(newMaxElements);
|
|
319
|
+
this.options.maxElements = newMaxElements;
|
|
320
|
+
console.log(`Resized index to accommodate ${newMaxElements} vectors`);
|
|
321
|
+
}
|
|
322
|
+
catch (error) {
|
|
323
|
+
throw new Error(`Failed to resize index: ${error}`);
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Get index options (for external access to configuration)
|
|
328
|
+
*/
|
|
329
|
+
getOptions() {
|
|
330
|
+
return { ...this.options };
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
//# sourceMappingURL=vector-index.js.map
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOM polyfills for Node.js environment
|
|
3
|
+
* Required for transformers.js and other browser-dependent libraries
|
|
4
|
+
*/
|
|
5
|
+
import { JSDOM } from 'jsdom';
|
|
6
|
+
// Only set up polyfills if we're in Node.js (not browser)
|
|
7
|
+
if (typeof window === 'undefined') {
|
|
8
|
+
console.log('Setting up DOM polyfills for Node.js environment...');
|
|
9
|
+
// Create a minimal DOM environment
|
|
10
|
+
const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
|
|
11
|
+
pretendToBeVisual: true,
|
|
12
|
+
resources: 'usable'
|
|
13
|
+
});
|
|
14
|
+
// Set up global objects that transformers.js expects
|
|
15
|
+
if (typeof globalThis.self === 'undefined') {
|
|
16
|
+
globalThis.self = globalThis;
|
|
17
|
+
}
|
|
18
|
+
// Also set on global for older Node.js versions
|
|
19
|
+
if (typeof global.self === 'undefined') {
|
|
20
|
+
global.self = global;
|
|
21
|
+
}
|
|
22
|
+
console.log('DOM polyfills set up successfully. self is now:', typeof self !== 'undefined' ? 'defined' : 'undefined');
|
|
23
|
+
if (typeof globalThis.window === 'undefined') {
|
|
24
|
+
globalThis.window = dom.window;
|
|
25
|
+
}
|
|
26
|
+
if (typeof globalThis.document === 'undefined') {
|
|
27
|
+
globalThis.document = dom.window.document;
|
|
28
|
+
}
|
|
29
|
+
// Additional polyfills that might be needed
|
|
30
|
+
if (typeof globalThis.navigator === 'undefined') {
|
|
31
|
+
globalThis.navigator = dom.window.navigator;
|
|
32
|
+
}
|
|
33
|
+
// Note: Do NOT polyfill createImageBitmap with a fake implementation
|
|
34
|
+
// RawImage.fromURL() will handle image loading correctly without it
|
|
35
|
+
// Setting a fake createImageBitmap that throws errors breaks image loading
|
|
36
|
+
}
|
|
37
|
+
//# sourceMappingURL=dom-polyfills.js.map
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Factory exports for creating RAG instances
|
|
3
|
+
* Provides convenient factory functions for common use cases
|
|
4
|
+
*
|
|
5
|
+
* This module serves as the main entry point for factory functions that
|
|
6
|
+
* simplify the creation of search and ingestion systems.
|
|
7
|
+
* The factories handle complex initialization while providing clean APIs.
|
|
8
|
+
*
|
|
9
|
+
* MAIN FACTORY CLASSES:
|
|
10
|
+
* - IngestionFactory: Creates IngestionPipeline instances for document ingestion
|
|
11
|
+
* - SearchFactory: Creates SearchEngine with automatic mode detection (recommended)
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```typescript
|
|
15
|
+
* import { IngestionFactory, SearchFactory } from './factories';
|
|
16
|
+
*
|
|
17
|
+
* // Create ingestion pipeline
|
|
18
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
|
|
19
|
+
*
|
|
20
|
+
* // Create search engine with automatic mode detection
|
|
21
|
+
* const search = await SearchFactory.create('./index.bin', './db.sqlite');
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
export { IngestionFactory } from './ingestion-factory.js';
|
|
25
|
+
export { SearchFactory } from './search-factory.js';
|
|
26
|
+
export type { IngestionFactoryOptions, ContentSystemConfig } from './ingestion-factory.js';
|
|
27
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Factory exports for creating RAG instances
|
|
3
|
+
* Provides convenient factory functions for common use cases
|
|
4
|
+
*
|
|
5
|
+
* This module serves as the main entry point for factory functions that
|
|
6
|
+
* simplify the creation of search and ingestion systems.
|
|
7
|
+
* The factories handle complex initialization while providing clean APIs.
|
|
8
|
+
*
|
|
9
|
+
* MAIN FACTORY CLASSES:
|
|
10
|
+
* - IngestionFactory: Creates IngestionPipeline instances for document ingestion
|
|
11
|
+
* - SearchFactory: Creates SearchEngine with automatic mode detection (recommended)
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```typescript
|
|
15
|
+
* import { IngestionFactory, SearchFactory } from './factories';
|
|
16
|
+
*
|
|
17
|
+
* // Create ingestion pipeline
|
|
18
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
|
|
19
|
+
*
|
|
20
|
+
* // Create search engine with automatic mode detection
|
|
21
|
+
* const search = await SearchFactory.create('./index.bin', './db.sqlite');
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
// Main factory classes
|
|
25
|
+
export { IngestionFactory } from './ingestion-factory.js';
|
|
26
|
+
// Polymorphic search factory (recommended for automatic mode detection)
|
|
27
|
+
// Re-exported from core for convenience
|
|
28
|
+
export { SearchFactory } from './search-factory.js';
|
|
29
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Factory functions for creating text-specific search and ingestion instances
|
|
3
|
+
* Handles complex initialization logic while providing clean API for common use cases
|
|
4
|
+
*
|
|
5
|
+
* FACTORY PATTERN BENEFITS:
|
|
6
|
+
* - Abstracts complex initialization (model loading, database setup, index initialization)
|
|
7
|
+
* - Provides simple API for common use cases while preserving access to dependency injection
|
|
8
|
+
* - Clear validation and error handling without fallback mechanisms
|
|
9
|
+
* - Supports different embedding models and configurations
|
|
10
|
+
* - Enables clean separation between simple usage and advanced customization
|
|
11
|
+
*
|
|
12
|
+
* MODE SELECTION GUIDE:
|
|
13
|
+
* - Text Mode (default): Optimized for text-only content
|
|
14
|
+
* - Uses sentence-transformer models (fast, accurate for text)
|
|
15
|
+
* - Images converted to text descriptions
|
|
16
|
+
* - Best for: document search, text clustering, semantic similarity
|
|
17
|
+
*
|
|
18
|
+
* - Multimodal Mode: Optimized for mixed text/image content
|
|
19
|
+
* - Uses CLIP models (unified embedding space)
|
|
20
|
+
* - True cross-modal search (text finds images, images find text)
|
|
21
|
+
* - Best for: image search, visual QA, multimodal retrieval
|
|
22
|
+
*
|
|
23
|
+
* USAGE PATTERNS:
|
|
24
|
+
*
|
|
25
|
+
* 1. Mode Selection:
|
|
26
|
+
* ```typescript
|
|
27
|
+
* // Text mode (default) - optimized for text-only content
|
|
28
|
+
* const textIngestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
29
|
+
* mode: 'text',
|
|
30
|
+
* embeddingModel: 'sentence-transformers/all-MiniLM-L6-v2'
|
|
31
|
+
* });
|
|
32
|
+
*
|
|
33
|
+
* // Multimodal mode - enables cross-modal search
|
|
34
|
+
* const multimodalIngestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
35
|
+
* mode: 'multimodal',
|
|
36
|
+
* embeddingModel: 'Xenova/clip-vit-base-patch32',
|
|
37
|
+
* rerankingStrategy: 'text-derived'
|
|
38
|
+
* });
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
import { IngestionPipeline } from '../core/ingestion.js';
|
|
42
|
+
/**
|
|
43
|
+
* Content system configuration options
|
|
44
|
+
*/
|
|
45
|
+
export interface ContentSystemConfig {
|
|
46
|
+
/** Content directory path (default: '.raglite/content') */
|
|
47
|
+
contentDir?: string;
|
|
48
|
+
/** Maximum file size in bytes (default: 50MB) */
|
|
49
|
+
maxFileSize?: number;
|
|
50
|
+
/** Maximum content directory size in bytes (default: 2GB) */
|
|
51
|
+
maxContentDirSize?: number;
|
|
52
|
+
/** Enable content deduplication (default: true) */
|
|
53
|
+
enableDeduplication?: boolean;
|
|
54
|
+
/** Enable storage tracking (default: true) */
|
|
55
|
+
enableStorageTracking?: boolean;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Options for text ingestion factory
|
|
59
|
+
*/
|
|
60
|
+
export interface IngestionFactoryOptions {
|
|
61
|
+
/** Embedding model name override */
|
|
62
|
+
embeddingModel?: string;
|
|
63
|
+
/** Embedding batch size override */
|
|
64
|
+
batchSize?: number;
|
|
65
|
+
/** Chunk size override */
|
|
66
|
+
chunkSize?: number;
|
|
67
|
+
/** Chunk overlap override */
|
|
68
|
+
chunkOverlap?: number;
|
|
69
|
+
/** Whether to force rebuild the index */
|
|
70
|
+
forceRebuild?: boolean;
|
|
71
|
+
/** Mode for the ingestion pipeline (text or multimodal) */
|
|
72
|
+
mode?: 'text' | 'multimodal';
|
|
73
|
+
/** Reranking strategy for multimodal mode */
|
|
74
|
+
rerankingStrategy?: 'cross-encoder' | 'text-derived' | 'metadata' | 'hybrid' | 'disabled';
|
|
75
|
+
/** Content system configuration */
|
|
76
|
+
contentSystemConfig?: ContentSystemConfig;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Factory for creating text-based IngestionPipeline instances
|
|
80
|
+
* Handles model loading, database initialization, and index setup
|
|
81
|
+
*
|
|
82
|
+
* This factory abstracts the complex initialization process required for text ingestion:
|
|
83
|
+
* 1. Creates necessary directories if they don't exist
|
|
84
|
+
* 2. Validates mode-model compatibility (no fallback mechanisms)
|
|
85
|
+
* 3. Loads and validates embedding models with clear error reporting
|
|
86
|
+
* 4. Establishes database connections and initializes schema
|
|
87
|
+
* 5. Stores mode configuration in database for automatic detection
|
|
88
|
+
* 6. Creates or loads vector indexes with proper configuration
|
|
89
|
+
* 7. Creates IngestionPipeline with proper dependency injection
|
|
90
|
+
*
|
|
91
|
+
* Mode Configuration:
|
|
92
|
+
* - Text Mode (default): Uses sentence-transformer models for text-only content
|
|
93
|
+
* - Multimodal Mode: Uses CLIP models for mixed text/image content
|
|
94
|
+
* - Mode is stored in database and auto-detected during search
|
|
95
|
+
* - Clear validation prevents mode-model mismatches
|
|
96
|
+
*
|
|
97
|
+
* @example
|
|
98
|
+
* ```typescript
|
|
99
|
+
* // Basic usage
|
|
100
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
|
|
101
|
+
* await ingestion.ingestDirectory('./documents');
|
|
102
|
+
*
|
|
103
|
+
* // With custom configuration
|
|
104
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
105
|
+
* embeddingModel: 'all-MiniLM-L6-v2',
|
|
106
|
+
* chunkSize: 512,
|
|
107
|
+
* chunkOverlap: 50,
|
|
108
|
+
* forceRebuild: true
|
|
109
|
+
* });
|
|
110
|
+
*
|
|
111
|
+
* // With defaults
|
|
112
|
+
* const ingestion = await IngestionFactory.createWithDefaults({
|
|
113
|
+
* batchSize: 32 // Faster processing
|
|
114
|
+
* });
|
|
115
|
+
* ```
|
|
116
|
+
*/
|
|
117
|
+
export declare class IngestionFactory {
|
|
118
|
+
/**
|
|
119
|
+
* Create an IngestionPipeline configured for text ingestion
|
|
120
|
+
*
|
|
121
|
+
* This method handles the complete initialization process:
|
|
122
|
+
* - Creates necessary directories if they don't exist
|
|
123
|
+
* - Loads text embedding model (with lazy initialization)
|
|
124
|
+
* - Opens database connection and initializes schema
|
|
125
|
+
* - Creates or loads vector index (with force rebuild option)
|
|
126
|
+
* - Creates IngestionPipeline with dependency injection
|
|
127
|
+
* - Validates the complete setup
|
|
128
|
+
*
|
|
129
|
+
* @param dbPath - Path to the SQLite database file (will be created if doesn't exist)
|
|
130
|
+
* @param indexPath - Path to the vector index file (will be created if doesn't exist)
|
|
131
|
+
* @param options - Optional configuration overrides
|
|
132
|
+
* @param options.embeddingModel - Override embedding model (default: from config)
|
|
133
|
+
* @param options.batchSize - Override embedding batch size (default: from config)
|
|
134
|
+
* @param options.chunkSize - Override chunk size (default: from config)
|
|
135
|
+
* @param options.chunkOverlap - Override chunk overlap (default: from config)
|
|
136
|
+
* @param options.forceRebuild - Force rebuild of existing index (default: false)
|
|
137
|
+
* @param options.contentSystemConfig - Content system configuration options
|
|
138
|
+
* @param options.contentSystemConfig.contentDir - Content directory path (default: '.raglite/content')
|
|
139
|
+
* @param options.contentSystemConfig.maxFileSize - Maximum file size in bytes (default: 50MB)
|
|
140
|
+
* @param options.contentSystemConfig.maxContentDirSize - Maximum content directory size (default: 2GB)
|
|
141
|
+
* @param options.contentSystemConfig.enableDeduplication - Enable content deduplication (default: true)
|
|
142
|
+
* @param options.contentSystemConfig.enableStorageTracking - Enable storage tracking (default: true)
|
|
143
|
+
* @returns Promise resolving to configured IngestionPipeline
|
|
144
|
+
* @throws {Error} If initialization fails
|
|
145
|
+
*
|
|
146
|
+
* @example
|
|
147
|
+
* ```typescript
|
|
148
|
+
* // Create ingestion pipeline with default content system
|
|
149
|
+
* const ingestion = await IngestionFactory.create('./my-db.sqlite', './my-index.bin');
|
|
150
|
+
*
|
|
151
|
+
* // Create with custom content system configuration
|
|
152
|
+
* const ingestion = await IngestionFactory.create('./my-db.sqlite', './my-index.bin', {
|
|
153
|
+
* contentSystemConfig: {
|
|
154
|
+
* contentDir: './custom-content',
|
|
155
|
+
* maxFileSize: 100 * 1024 * 1024, // 100MB
|
|
156
|
+
* maxContentDirSize: 5 * 1024 * 1024 * 1024, // 5GB
|
|
157
|
+
* enableDeduplication: true
|
|
158
|
+
* }
|
|
159
|
+
* });
|
|
160
|
+
*
|
|
161
|
+
* // Ingest documents from directory
|
|
162
|
+
* const result = await ingestion.ingestDirectory('./documents');
|
|
163
|
+
* console.log(`Processed ${result.documentsProcessed} documents`);
|
|
164
|
+
*
|
|
165
|
+
* // Ingest content from memory (MCP integration)
|
|
166
|
+
* const contentId = await ingestion.ingestFromMemory(buffer, {
|
|
167
|
+
* displayName: 'uploaded-file.pdf',
|
|
168
|
+
* contentType: 'application/pdf'
|
|
169
|
+
* });
|
|
170
|
+
*
|
|
171
|
+
* // Clean up when done
|
|
172
|
+
* await ingestion.cleanup();
|
|
173
|
+
* ```
|
|
174
|
+
*/
|
|
175
|
+
static create(dbPath: string, indexPath: string, options?: IngestionFactoryOptions): Promise<IngestionPipeline>;
|
|
176
|
+
/**
|
|
177
|
+
* Create an IngestionPipeline with automatic path resolution
|
|
178
|
+
* Uses default paths based on current working directory
|
|
179
|
+
* @param options - Optional configuration overrides
|
|
180
|
+
* @returns Promise resolving to configured IngestionPipeline
|
|
181
|
+
*/
|
|
182
|
+
static createWithDefaults(options?: IngestionFactoryOptions): Promise<IngestionPipeline>;
|
|
183
|
+
/**
|
|
184
|
+
* Handles mode storage during ingestion
|
|
185
|
+
* Creates or validates system info based on the provided mode and options
|
|
186
|
+
* @private
|
|
187
|
+
*/
|
|
188
|
+
private static handleModeStorage;
|
|
189
|
+
/**
|
|
190
|
+
* Updates system info in the database
|
|
191
|
+
* @private
|
|
192
|
+
*/
|
|
193
|
+
private static updateSystemInfo;
|
|
194
|
+
/**
|
|
195
|
+
* Validates and prepares content system configuration
|
|
196
|
+
* @private
|
|
197
|
+
*/
|
|
198
|
+
private static validateAndPrepareContentSystemConfig;
|
|
199
|
+
}
|
|
200
|
+
//# sourceMappingURL=ingestion-factory.d.ts.map
|