rag-lite-ts 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{cli → cjs/cli}/indexer.js +1 -1
- package/dist/{cli → cjs/cli}/search.js +5 -10
- package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
- package/dist/cjs/core/binary-index-format.js +291 -0
- package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
- package/dist/{core → cjs/core}/ingestion.js +76 -9
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
- package/dist/{core → cjs/core}/search.js +2 -1
- package/dist/{core → cjs/core}/types.d.ts +1 -1
- package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
- package/dist/{core → cjs/core}/vector-index.js +10 -2
- package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
- package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
- package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
- package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +471 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +529 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +291 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +213 -0
- package/dist/esm/core/db.js +895 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +901 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index.d.ts +72 -0
- package/dist/esm/core/vector-index.js +333 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +477 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +344 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +116 -0
- package/dist/esm/index-manager.js +598 -0
- package/dist/esm/index.d.ts +75 -0
- package/dist/esm/index.js +110 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +30 -12
- package/dist/core/binary-index-format.js +0 -122
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{cli.js → cjs/cli.js} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/db.d.ts +0 -0
- /package/dist/{core → cjs/core}/db.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
- /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
- /package/dist/{index.js → cjs/index.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*/
|
|
5
|
+
import { EXIT_CODES } from './config.js';
|
|
6
|
+
/**
|
|
7
|
+
* Error categories for different types of failures
|
|
8
|
+
*/
|
|
9
|
+
export var ErrorCategory;
|
|
10
|
+
(function (ErrorCategory) {
|
|
11
|
+
ErrorCategory["CONFIGURATION"] = "Configuration";
|
|
12
|
+
ErrorCategory["DATABASE"] = "Database";
|
|
13
|
+
ErrorCategory["MODEL"] = "Model";
|
|
14
|
+
ErrorCategory["INDEX"] = "Index";
|
|
15
|
+
ErrorCategory["FILE_SYSTEM"] = "File System";
|
|
16
|
+
ErrorCategory["EMBEDDING"] = "Embedding";
|
|
17
|
+
ErrorCategory["NETWORK"] = "Network";
|
|
18
|
+
ErrorCategory["VALIDATION"] = "Validation";
|
|
19
|
+
ErrorCategory["GENERAL"] = "General";
|
|
20
|
+
})(ErrorCategory || (ErrorCategory = {}));
|
|
21
|
+
/**
|
|
22
|
+
* Error severity levels
|
|
23
|
+
*/
|
|
24
|
+
export var ErrorSeverity;
|
|
25
|
+
(function (ErrorSeverity) {
|
|
26
|
+
ErrorSeverity["FATAL"] = "FATAL";
|
|
27
|
+
ErrorSeverity["ERROR"] = "ERROR";
|
|
28
|
+
ErrorSeverity["WARNING"] = "WARNING";
|
|
29
|
+
ErrorSeverity["INFO"] = "INFO"; // Informational message
|
|
30
|
+
})(ErrorSeverity || (ErrorSeverity = {}));
|
|
31
|
+
/**
|
|
32
|
+
* Enhanced error class with category and severity
|
|
33
|
+
*/
|
|
34
|
+
export class CategorizedError extends Error {
|
|
35
|
+
category;
|
|
36
|
+
severity;
|
|
37
|
+
exitCode;
|
|
38
|
+
originalError;
|
|
39
|
+
constructor(message, category, severity = ErrorSeverity.ERROR, exitCode = EXIT_CODES.GENERAL_ERROR, originalError) {
|
|
40
|
+
super(message);
|
|
41
|
+
this.category = category;
|
|
42
|
+
this.severity = severity;
|
|
43
|
+
this.exitCode = exitCode;
|
|
44
|
+
this.originalError = originalError;
|
|
45
|
+
this.name = 'CategorizedError';
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Default error context implementation - provides generic guidance
|
|
50
|
+
*/
|
|
51
|
+
class DefaultErrorContext {
|
|
52
|
+
getErrorMessage(error, context) {
|
|
53
|
+
return error.message;
|
|
54
|
+
}
|
|
55
|
+
getSuggestions(error, context) {
|
|
56
|
+
return [
|
|
57
|
+
'Check the error message above for specific details',
|
|
58
|
+
'Try running the command again',
|
|
59
|
+
'If the problem persists, please report it as a bug'
|
|
60
|
+
];
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
// Global error context - can be injected by implementations
|
|
64
|
+
let globalErrorContext = new DefaultErrorContext();
|
|
65
|
+
/**
|
|
66
|
+
* Set the global error context for dependency injection
|
|
67
|
+
* @param errorContext - Implementation-specific error context
|
|
68
|
+
*/
|
|
69
|
+
export function setErrorContext(errorContext) {
|
|
70
|
+
globalErrorContext = errorContext;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Get the current error context
|
|
74
|
+
* @returns Current error context
|
|
75
|
+
*/
|
|
76
|
+
export function getErrorContext() {
|
|
77
|
+
return globalErrorContext;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Handle errors with appropriate logging and exit behavior
|
|
81
|
+
* Supports dependency injection of implementation-specific error contexts
|
|
82
|
+
* @param error - Error to handle
|
|
83
|
+
* @param context - Context where error occurred
|
|
84
|
+
* @param options - Handling options
|
|
85
|
+
*/
|
|
86
|
+
export function handleError(error, context, options = {}) {
|
|
87
|
+
const { category = ErrorCategory.GENERAL, severity = ErrorSeverity.ERROR, exitCode = EXIT_CODES.GENERAL_ERROR, skipError = false, showStack = false, errorContext = globalErrorContext } = options;
|
|
88
|
+
const errorObj = error instanceof Error ? error : new Error(String(error));
|
|
89
|
+
const errorMessage = errorContext.getErrorMessage(errorObj, context);
|
|
90
|
+
const timestamp = new Date().toISOString();
|
|
91
|
+
// Format error message based on severity
|
|
92
|
+
const severityPrefix = severity === ErrorSeverity.FATAL ? '🚨' :
|
|
93
|
+
severity === ErrorSeverity.ERROR ? '❌' :
|
|
94
|
+
severity === ErrorSeverity.WARNING ? '⚠️' : 'ℹ️';
|
|
95
|
+
const logMessage = `${severityPrefix} [${timestamp}] ${severity} in ${context} (${category}): ${errorMessage}`;
|
|
96
|
+
if (severity === ErrorSeverity.FATAL || severity === ErrorSeverity.ERROR) {
|
|
97
|
+
console.error(logMessage);
|
|
98
|
+
}
|
|
99
|
+
else {
|
|
100
|
+
console.log(logMessage);
|
|
101
|
+
}
|
|
102
|
+
// Show stack trace for debugging if requested
|
|
103
|
+
if (showStack && errorObj.stack) {
|
|
104
|
+
console.error('Stack trace:', errorObj.stack);
|
|
105
|
+
}
|
|
106
|
+
// Handle based on severity
|
|
107
|
+
if (severity === ErrorSeverity.FATAL) {
|
|
108
|
+
console.error('\nThe system cannot continue and will exit immediately.');
|
|
109
|
+
provideContextualGuidance(category, exitCode, errorObj, context, errorContext);
|
|
110
|
+
process.exit(exitCode);
|
|
111
|
+
}
|
|
112
|
+
else if (severity === ErrorSeverity.ERROR && !skipError) {
|
|
113
|
+
console.error('Operation failed. See error details above.');
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Provide contextual guidance based on error category and exit code
|
|
118
|
+
* Uses dependency injection to get implementation-specific suggestions
|
|
119
|
+
*/
|
|
120
|
+
function provideContextualGuidance(category, exitCode, error, context, errorContext) {
|
|
121
|
+
console.error('\nTroubleshooting guidance:');
|
|
122
|
+
// Get implementation-specific suggestions first
|
|
123
|
+
const customSuggestions = errorContext.getSuggestions(error, context);
|
|
124
|
+
if (customSuggestions.length > 0) {
|
|
125
|
+
customSuggestions.forEach(suggestion => {
|
|
126
|
+
console.error(`- ${suggestion}`);
|
|
127
|
+
});
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
// Fallback to generic category-based guidance
|
|
131
|
+
switch (category) {
|
|
132
|
+
case ErrorCategory.CONFIGURATION:
|
|
133
|
+
console.error('- Check your configuration file for syntax errors');
|
|
134
|
+
console.error('- Ensure all required fields are present and valid');
|
|
135
|
+
console.error('- Verify numeric values are positive numbers');
|
|
136
|
+
break;
|
|
137
|
+
case ErrorCategory.DATABASE:
|
|
138
|
+
console.error('- Try running "raglite rebuild" to fix database issues');
|
|
139
|
+
console.error('- Check that the database file is not locked by another process');
|
|
140
|
+
console.error('- Ensure you have write permissions in the current directory');
|
|
141
|
+
console.error('- Verify sufficient disk space is available');
|
|
142
|
+
break;
|
|
143
|
+
case ErrorCategory.MODEL:
|
|
144
|
+
console.error('- Ensure you have internet connection for model download');
|
|
145
|
+
console.error('- Check available disk space in the models directory');
|
|
146
|
+
console.error('- Try clearing the model cache and downloading again');
|
|
147
|
+
console.error('- Verify your system has sufficient memory (2GB+ recommended)');
|
|
148
|
+
break;
|
|
149
|
+
case ErrorCategory.INDEX:
|
|
150
|
+
console.error('- Try running "raglite rebuild" to recreate the vector index');
|
|
151
|
+
console.error('- Check available disk space for index files');
|
|
152
|
+
console.error('- Ensure the index file is not corrupted or locked');
|
|
153
|
+
break;
|
|
154
|
+
case ErrorCategory.FILE_SYSTEM:
|
|
155
|
+
console.error('- Check that files and directories exist and are accessible');
|
|
156
|
+
console.error('- Verify you have read/write permissions');
|
|
157
|
+
console.error('- Ensure paths are spelled correctly');
|
|
158
|
+
break;
|
|
159
|
+
case ErrorCategory.EMBEDDING:
|
|
160
|
+
console.error('- Check that the embedding model is properly loaded');
|
|
161
|
+
console.error('- Verify input text is not empty or malformed');
|
|
162
|
+
console.error('- Ensure sufficient memory for batch processing');
|
|
163
|
+
break;
|
|
164
|
+
case ErrorCategory.NETWORK:
|
|
165
|
+
console.error('- Check your internet connection');
|
|
166
|
+
console.error('- Verify firewall settings allow model downloads');
|
|
167
|
+
console.error('- Try again later if servers are temporarily unavailable');
|
|
168
|
+
break;
|
|
169
|
+
default:
|
|
170
|
+
console.error('- Check the error message above for specific details');
|
|
171
|
+
console.error('- Try running the command again');
|
|
172
|
+
console.error('- If the problem persists, please report it as a bug');
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Wrapper for try-catch blocks with consistent error handling
|
|
177
|
+
* @param operation - Function to execute
|
|
178
|
+
* @param context - Context description
|
|
179
|
+
* @param options - Error handling options
|
|
180
|
+
*/
|
|
181
|
+
export async function safeExecute(operation, context, options = {}) {
|
|
182
|
+
try {
|
|
183
|
+
return await operation();
|
|
184
|
+
}
|
|
185
|
+
catch (error) {
|
|
186
|
+
handleError(error, context, options);
|
|
187
|
+
if (options.fallbackValue !== undefined) {
|
|
188
|
+
return options.fallbackValue;
|
|
189
|
+
}
|
|
190
|
+
if (options.skipError) {
|
|
191
|
+
return undefined;
|
|
192
|
+
}
|
|
193
|
+
// Re-throw if not skipping and no fallback
|
|
194
|
+
if (options.severity !== ErrorSeverity.FATAL) {
|
|
195
|
+
throw error;
|
|
196
|
+
}
|
|
197
|
+
return undefined; // This won't be reached due to process.exit in FATAL
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Validate that a condition is true, throw categorized error if not
|
|
202
|
+
* @param condition - Condition to check
|
|
203
|
+
* @param message - Error message if condition fails
|
|
204
|
+
* @param category - Error category
|
|
205
|
+
* @param exitCode - Exit code for fatal errors
|
|
206
|
+
*/
|
|
207
|
+
export function assert(condition, message, category = ErrorCategory.VALIDATION, exitCode = EXIT_CODES.GENERAL_ERROR) {
|
|
208
|
+
if (!condition) {
|
|
209
|
+
throw new CategorizedError(message, category, ErrorSeverity.FATAL, exitCode);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
/**
|
|
213
|
+
* Log progress with error context
|
|
214
|
+
* @param message - Progress message
|
|
215
|
+
* @param current - Current progress value
|
|
216
|
+
* @param total - Total progress value
|
|
217
|
+
*/
|
|
218
|
+
export function logProgress(message, current, total) {
|
|
219
|
+
const timestamp = new Date().toISOString();
|
|
220
|
+
let progressMsg = `ℹ️ [${timestamp}] ${message}`;
|
|
221
|
+
if (current !== undefined && total !== undefined) {
|
|
222
|
+
const percentage = Math.round((current / total) * 100);
|
|
223
|
+
progressMsg += ` (${current}/${total} - ${percentage}%)`;
|
|
224
|
+
}
|
|
225
|
+
console.log(progressMsg);
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Create a categorized error for common scenarios
|
|
229
|
+
*/
|
|
230
|
+
export const createError = {
|
|
231
|
+
configuration: (message, exitCode = EXIT_CODES.CONFIGURATION_ERROR) => new CategorizedError(message, ErrorCategory.CONFIGURATION, ErrorSeverity.FATAL, exitCode),
|
|
232
|
+
database: (message, exitCode = EXIT_CODES.DATABASE_ERROR) => new CategorizedError(message, ErrorCategory.DATABASE, ErrorSeverity.FATAL, exitCode),
|
|
233
|
+
model: (message, exitCode = EXIT_CODES.MODEL_ERROR) => new CategorizedError(message, ErrorCategory.MODEL, ErrorSeverity.FATAL, exitCode),
|
|
234
|
+
index: (message, exitCode = EXIT_CODES.INDEX_ERROR) => new CategorizedError(message, ErrorCategory.INDEX, ErrorSeverity.FATAL, exitCode),
|
|
235
|
+
fileSystem: (message, exitCode = EXIT_CODES.FILE_NOT_FOUND) => new CategorizedError(message, ErrorCategory.FILE_SYSTEM, ErrorSeverity.FATAL, exitCode),
|
|
236
|
+
embedding: (message, severity = ErrorSeverity.ERROR) => new CategorizedError(message, ErrorCategory.EMBEDDING, severity),
|
|
237
|
+
validation: (message, exitCode = EXIT_CODES.INVALID_ARGUMENTS) => new CategorizedError(message, ErrorCategory.VALIDATION, ErrorSeverity.FATAL, exitCode)
|
|
238
|
+
};
|
|
239
|
+
//# sourceMappingURL=error-handler.js.map
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*
|
|
5
|
+
* This module provides the clean re-export surface for the core layer, enabling
|
|
6
|
+
* dependency injection patterns for different implementations (text-only, multimodal, etc.).
|
|
7
|
+
*
|
|
8
|
+
* DEPENDENCY INJECTION ARCHITECTURE:
|
|
9
|
+
*
|
|
10
|
+
* The core layer uses explicit dependency injection to maintain clean separation between
|
|
11
|
+
* model-agnostic logic and implementation-specific components:
|
|
12
|
+
*
|
|
13
|
+
* 1. Core Classes (SearchEngine, IngestionPipeline):
|
|
14
|
+
* - Accept injected functions (EmbedFunction, RerankFunction) in constructors
|
|
15
|
+
* - Coordinate model-agnostic operations (database, vector index, search pipeline)
|
|
16
|
+
* - No knowledge of specific embedding models or transformers
|
|
17
|
+
*
|
|
18
|
+
* 2. Dependency Injection Interfaces:
|
|
19
|
+
* - EmbedFunction: (query: string, contentType?: string) => Promise<EmbeddingResult>
|
|
20
|
+
* - RerankFunction: (query: string, results: SearchResult[], contentType?: string) => Promise<SearchResult[]>
|
|
21
|
+
* - Support different content types (text, image, etc.) and embedding dimensions
|
|
22
|
+
*
|
|
23
|
+
* 3. Usage Patterns:
|
|
24
|
+
*
|
|
25
|
+
* // Direct dependency injection (advanced users)
|
|
26
|
+
* const embedFn = createTextEmbedFunction();
|
|
27
|
+
* const rerankFn = createTextRerankFunction();
|
|
28
|
+
* const indexManager = new IndexManager('./index.bin');
|
|
29
|
+
* const db = await openDatabase('./db.sqlite');
|
|
30
|
+
* const search = new SearchEngine(embedFn, indexManager, db, rerankFn);
|
|
31
|
+
*
|
|
32
|
+
* // Factory pattern (recommended for common use cases)
|
|
33
|
+
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite');
|
|
34
|
+
*
|
|
35
|
+
* 4. Extension Points:
|
|
36
|
+
* - New implementations (multimodal, custom models) implement the same interfaces
|
|
37
|
+
* - Core classes remain unchanged when adding new modalities
|
|
38
|
+
* - Plugin architecture enabled through interface-based design
|
|
39
|
+
*
|
|
40
|
+
* 5. Benefits:
|
|
41
|
+
* - Clean separation of concerns
|
|
42
|
+
* - Testability through mock injection
|
|
43
|
+
* - Future extensibility without core changes
|
|
44
|
+
* - Support for different embedding dimensions and content types
|
|
45
|
+
*/
|
|
46
|
+
export { type ContentDocument, type ContentChunk, type Document, type Chunk, type EmbeddingResult, type SearchResult, type SearchOptions, } from './types.js';
|
|
47
|
+
export { type EmbedFunction, type RerankFunction, type EmbeddingQueryInterface, type RerankingInterface, type SearchEngineConfig, type ContentTypeStrategy, type ModelAgnosticInterface, type ExtendedEmbeddingInterface, type ExtendedRerankingInterface, type SearchPipelineInterface, type SearchDependencyFactory, InterfaceValidator } from './interfaces.js';
|
|
48
|
+
export * from './adapters.js';
|
|
49
|
+
export * from './config.js';
|
|
50
|
+
export { type DatabaseConnection, type ContentMetadata, openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, insertContentMetadata, getContentMetadata, getContentMetadataByHash, getContentMetadataByStorageType, deleteContentMetadata, getStorageStats, updateStorageStats } from './db.js';
|
|
51
|
+
export { type VectorIndexOptions, VectorIndex } from './vector-index.js';
|
|
52
|
+
export { type ChunkConfig, type GenericDocument, type GenericChunk, type ChunkingStrategy, ChunkingStrategyRegistry, DEFAULT_CHUNK_CONFIG, chunkingRegistry, chunkGenericDocument, registerTextChunkingStrategy } from './chunker.js';
|
|
53
|
+
export * from './search.js';
|
|
54
|
+
export * from './ingestion.js';
|
|
55
|
+
export * from './path-manager.js';
|
|
56
|
+
export { ContentManager, type MemoryContentMetadata, type ContentIngestionResult, type ContentManagerConfig } from './content-manager.js';
|
|
57
|
+
export { ContentResolver, type ContentRequest, type ContentResult } from './content-resolver.js';
|
|
58
|
+
export * from './error-handler.js';
|
|
59
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*
|
|
5
|
+
* This module provides the clean re-export surface for the core layer, enabling
|
|
6
|
+
* dependency injection patterns for different implementations (text-only, multimodal, etc.).
|
|
7
|
+
*
|
|
8
|
+
* DEPENDENCY INJECTION ARCHITECTURE:
|
|
9
|
+
*
|
|
10
|
+
* The core layer uses explicit dependency injection to maintain clean separation between
|
|
11
|
+
* model-agnostic logic and implementation-specific components:
|
|
12
|
+
*
|
|
13
|
+
* 1. Core Classes (SearchEngine, IngestionPipeline):
|
|
14
|
+
* - Accept injected functions (EmbedFunction, RerankFunction) in constructors
|
|
15
|
+
* - Coordinate model-agnostic operations (database, vector index, search pipeline)
|
|
16
|
+
* - No knowledge of specific embedding models or transformers
|
|
17
|
+
*
|
|
18
|
+
* 2. Dependency Injection Interfaces:
|
|
19
|
+
* - EmbedFunction: (query: string, contentType?: string) => Promise<EmbeddingResult>
|
|
20
|
+
* - RerankFunction: (query: string, results: SearchResult[], contentType?: string) => Promise<SearchResult[]>
|
|
21
|
+
* - Support different content types (text, image, etc.) and embedding dimensions
|
|
22
|
+
*
|
|
23
|
+
* 3. Usage Patterns:
|
|
24
|
+
*
|
|
25
|
+
* // Direct dependency injection (advanced users)
|
|
26
|
+
* const embedFn = createTextEmbedFunction();
|
|
27
|
+
* const rerankFn = createTextRerankFunction();
|
|
28
|
+
* const indexManager = new IndexManager('./index.bin');
|
|
29
|
+
* const db = await openDatabase('./db.sqlite');
|
|
30
|
+
* const search = new SearchEngine(embedFn, indexManager, db, rerankFn);
|
|
31
|
+
*
|
|
32
|
+
* // Factory pattern (recommended for common use cases)
|
|
33
|
+
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite');
|
|
34
|
+
*
|
|
35
|
+
* 4. Extension Points:
|
|
36
|
+
* - New implementations (multimodal, custom models) implement the same interfaces
|
|
37
|
+
* - Core classes remain unchanged when adding new modalities
|
|
38
|
+
* - Plugin architecture enabled through interface-based design
|
|
39
|
+
*
|
|
40
|
+
* 5. Benefits:
|
|
41
|
+
* - Clean separation of concerns
|
|
42
|
+
* - Testability through mock injection
|
|
43
|
+
* - Future extensibility without core changes
|
|
44
|
+
* - Support for different embedding dimensions and content types
|
|
45
|
+
*/
|
|
46
|
+
// Dependency injection interfaces and utilities
|
|
47
|
+
export { InterfaceValidator } from './interfaces.js';
|
|
48
|
+
// Adapter utilities for converting implementations to dependency injection
|
|
49
|
+
export * from './adapters.js';
|
|
50
|
+
// Core configuration management - model-agnostic settings
|
|
51
|
+
export * from './config.js';
|
|
52
|
+
// Database operations - supports different content types through metadata
|
|
53
|
+
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, insertContentMetadata, getContentMetadata, getContentMetadataByHash, getContentMetadataByStorageType, deleteContentMetadata, getStorageStats, updateStorageStats } from './db.js';
|
|
54
|
+
// Vector index operations - works with any embedding dimensions
|
|
55
|
+
export { VectorIndex } from './vector-index.js';
|
|
56
|
+
// Generic chunking interfaces and strategies - supports text, image metadata, etc.
|
|
57
|
+
export { ChunkingStrategyRegistry, DEFAULT_CHUNK_CONFIG, chunkingRegistry, chunkGenericDocument, registerTextChunkingStrategy } from './chunker.js';
|
|
58
|
+
// Core search engine - uses dependency injection for embedding and reranking
|
|
59
|
+
export * from './search.js';
|
|
60
|
+
// Core ingestion pipeline - uses dependency injection for embedding
|
|
61
|
+
export * from './ingestion.js';
|
|
62
|
+
// Path management utilities - content-type agnostic
|
|
63
|
+
export * from './path-manager.js';
|
|
64
|
+
// Unified content system - handles both filesystem and memory content
|
|
65
|
+
export { ContentManager } from './content-manager.js';
|
|
66
|
+
export { ContentResolver } from './content-resolver.js';
|
|
67
|
+
// Error handling framework - supports implementation-specific error contexts
|
|
68
|
+
export * from './error-handler.js';
|
|
69
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*/
|
|
5
|
+
import { type FileProcessorOptions } from '../file-processor.js';
|
|
6
|
+
import { type ChunkConfig } from './chunker.js';
|
|
7
|
+
import { IndexManager } from '../index-manager.js';
|
|
8
|
+
import { type DatabaseConnection } from './db.js';
|
|
9
|
+
import type { EmbedFunction } from './interfaces.js';
|
|
10
|
+
import { ContentManager, type MemoryContentMetadata } from './content-manager.js';
|
|
11
|
+
/**
|
|
12
|
+
* Options for the ingestion pipeline
|
|
13
|
+
*/
|
|
14
|
+
export interface IngestionOptions {
|
|
15
|
+
/** File processing options */
|
|
16
|
+
fileOptions?: FileProcessorOptions;
|
|
17
|
+
/** Chunking configuration */
|
|
18
|
+
chunkConfig?: ChunkConfig;
|
|
19
|
+
/** Whether to force rebuild the index */
|
|
20
|
+
forceRebuild?: boolean;
|
|
21
|
+
/** Mode for the ingestion pipeline (text or multimodal) */
|
|
22
|
+
mode?: 'text' | 'multimodal';
|
|
23
|
+
/** Content type for the ingested content */
|
|
24
|
+
contentType?: string;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Result of the ingestion process
|
|
28
|
+
*/
|
|
29
|
+
export interface IngestionResult {
|
|
30
|
+
/** Total documents processed */
|
|
31
|
+
documentsProcessed: number;
|
|
32
|
+
/** Total chunks created */
|
|
33
|
+
chunksCreated: number;
|
|
34
|
+
/** Total embeddings generated */
|
|
35
|
+
embeddingsGenerated: number;
|
|
36
|
+
/** Number of documents that failed processing */
|
|
37
|
+
documentErrors: number;
|
|
38
|
+
/** Number of chunks that failed embedding */
|
|
39
|
+
embeddingErrors: number;
|
|
40
|
+
/** Processing time in milliseconds */
|
|
41
|
+
processingTimeMs: number;
|
|
42
|
+
/** Content IDs of successfully ingested documents */
|
|
43
|
+
contentIds: string[];
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Main ingestion pipeline class
|
|
47
|
+
* Coordinates the entire process from file discovery to vector storage
|
|
48
|
+
* Uses explicit dependency injection for clean architecture
|
|
49
|
+
*/
|
|
50
|
+
export declare class IngestionPipeline {
|
|
51
|
+
private embedFn;
|
|
52
|
+
private indexManager;
|
|
53
|
+
private db;
|
|
54
|
+
private defaultChunkConfig?;
|
|
55
|
+
private pathManager;
|
|
56
|
+
private contentManager;
|
|
57
|
+
/**
|
|
58
|
+
* Creates a new IngestionPipeline with explicit dependency injection
|
|
59
|
+
* Enhanced with ContentManager integration for unified content system
|
|
60
|
+
*
|
|
61
|
+
* DEPENDENCY INJECTION PATTERN:
|
|
62
|
+
* This constructor requires all dependencies to be explicitly provided, enabling:
|
|
63
|
+
* - Clean separation between core ingestion logic and implementation-specific components
|
|
64
|
+
* - Support for different embedding models and content types
|
|
65
|
+
* - Testability through mock injection
|
|
66
|
+
* - Future extensibility for multimodal content processing
|
|
67
|
+
* - Unified content management for both filesystem and memory-based ingestion
|
|
68
|
+
*
|
|
69
|
+
* @param embedFn - Function to embed document chunks into vectors
|
|
70
|
+
* - Signature: (query: string, contentType?: string) => Promise<EmbeddingResult>
|
|
71
|
+
* - Must handle chunk text and return consistent embedding format
|
|
72
|
+
* - Examples:
|
|
73
|
+
* - Text: const embedFn = (text) => textEmbedder.embedSingle(text)
|
|
74
|
+
* - Multimodal: const embedFn = (content, type) => type === 'image' ? clipEmbedder.embedImage(content) : clipEmbedder.embedText(content)
|
|
75
|
+
* - Custom: const embedFn = (text) => customModel.embed(text)
|
|
76
|
+
*
|
|
77
|
+
* @param indexManager - Vector index manager for storing embeddings
|
|
78
|
+
* - Handles vector storage and indexing operations
|
|
79
|
+
* - Must support the embedding dimensions produced by embedFn
|
|
80
|
+
* - Example: new IndexManager('./index.bin')
|
|
81
|
+
*
|
|
82
|
+
* @param db - Database connection for metadata storage
|
|
83
|
+
* - Stores document and chunk metadata with content type support
|
|
84
|
+
* - Supports different content types through metadata fields
|
|
85
|
+
* - Example: await openDatabase('./db.sqlite')
|
|
86
|
+
*
|
|
87
|
+
* @param contentManager - Optional ContentManager for unified content system
|
|
88
|
+
* - Handles content storage routing and deduplication
|
|
89
|
+
* - If not provided, creates default instance with standard configuration
|
|
90
|
+
* - Example: new ContentManager(db, { contentDir: '.raglite/content' })
|
|
91
|
+
*
|
|
92
|
+
* USAGE EXAMPLES:
|
|
93
|
+
* ```typescript
|
|
94
|
+
* // Text-only ingestion pipeline with unified content system
|
|
95
|
+
* const textEmbedFn = createTextEmbedFunction();
|
|
96
|
+
* const indexManager = new IndexManager('./index.bin');
|
|
97
|
+
* const db = await openDatabase('./db.sqlite');
|
|
98
|
+
* const contentManager = new ContentManager(db);
|
|
99
|
+
* const ingestion = new IngestionPipeline(textEmbedFn, indexManager, db, undefined, contentManager);
|
|
100
|
+
*
|
|
101
|
+
* // Simple usage (ContentManager created automatically)
|
|
102
|
+
* const ingestion = new IngestionPipeline(textEmbedFn, indexManager, db);
|
|
103
|
+
*
|
|
104
|
+
* // Custom embedding implementation with memory ingestion
|
|
105
|
+
* const customEmbedFn = async (text) => ({
|
|
106
|
+
* embedding_id: generateId(),
|
|
107
|
+
* vector: await myCustomModel.embed(text)
|
|
108
|
+
* });
|
|
109
|
+
* const ingestion = new IngestionPipeline(customEmbedFn, indexManager, db);
|
|
110
|
+
* await ingestion.ingestFromMemory(buffer, { displayName: 'file.txt' });
|
|
111
|
+
* ```
|
|
112
|
+
*/
|
|
113
|
+
constructor(embedFn: EmbedFunction, indexManager: IndexManager, db: DatabaseConnection, defaultChunkConfig?: ChunkConfig | undefined, contentManager?: ContentManager);
|
|
114
|
+
/**
|
|
115
|
+
* Ingest documents from a directory
|
|
116
|
+
* @param directoryPath - Path to directory containing documents
|
|
117
|
+
* @param options - Optional ingestion configuration
|
|
118
|
+
* @returns Promise resolving to ingestion results
|
|
119
|
+
*/
|
|
120
|
+
ingestDirectory(directoryPath: string, options?: IngestionOptions): Promise<IngestionResult>;
|
|
121
|
+
/**
|
|
122
|
+
* Ingest a single file
|
|
123
|
+
* @param filePath - Path to the file to ingest
|
|
124
|
+
* @param options - Optional ingestion configuration
|
|
125
|
+
* @returns Promise resolving to ingestion results
|
|
126
|
+
*/
|
|
127
|
+
ingestFile(filePath: string, options?: IngestionOptions): Promise<IngestionResult>;
|
|
128
|
+
/**
|
|
129
|
+
* Ingest content from memory buffer
|
|
130
|
+
* Enables MCP integration and real-time content processing
|
|
131
|
+
* @param content - Buffer containing the content to ingest
|
|
132
|
+
* @param metadata - Memory content metadata including display name and content type
|
|
133
|
+
* @param options - Optional ingestion configuration
|
|
134
|
+
* @returns Promise resolving to content ID for the ingested content
|
|
135
|
+
*/
|
|
136
|
+
ingestFromMemory(content: Buffer, metadata: MemoryContentMetadata, options?: IngestionOptions): Promise<string>;
|
|
137
|
+
/**
|
|
138
|
+
* Ingest documents from a path (file or directory)
|
|
139
|
+
* Implements the complete pipeline: file processing → chunking → embedding → storage
|
|
140
|
+
* Enhanced to handle mixed content types (text and images) in multimodal mode
|
|
141
|
+
*/
|
|
142
|
+
ingestPath(path: string, options?: IngestionOptions): Promise<IngestionResult>;
|
|
143
|
+
/**
|
|
144
|
+
* Analyze content types in the document collection
|
|
145
|
+
* @private
|
|
146
|
+
*/
|
|
147
|
+
private analyzeContentTypes;
|
|
148
|
+
/**
|
|
149
|
+
* Chunk all documents and organize results with content-type awareness
|
|
150
|
+
* Enhanced to handle different content types appropriately
|
|
151
|
+
*/
|
|
152
|
+
private chunkDocumentsWithContentTypes;
|
|
153
|
+
/**
|
|
154
|
+
* Generate embeddings for all chunks with content-type support
|
|
155
|
+
* Enhanced to handle different content types and pass metadata to embedding function
|
|
156
|
+
*/
|
|
157
|
+
private generateEmbeddingsWithContentTypes;
|
|
158
|
+
/**
|
|
159
|
+
* Store documents and chunks in database with content-type support
|
|
160
|
+
* Enhanced to handle content type metadata and multimodal content
|
|
161
|
+
* @returns Array of content IDs for successfully stored documents
|
|
162
|
+
*/
|
|
163
|
+
private storeDocumentsAndChunksWithContentTypes;
|
|
164
|
+
/**
|
|
165
|
+
* Update vector index with new embeddings (supports grouped content type storage)
|
|
166
|
+
*/
|
|
167
|
+
private updateVectorIndex;
|
|
168
|
+
/**
|
|
169
|
+
* Filter documents based on ingestion mode to avoid processing incompatible content types
|
|
170
|
+
*/
|
|
171
|
+
private filterDocumentsByMode;
|
|
172
|
+
/**
|
|
173
|
+
* Converts MIME type to simple content type for embedding function
|
|
174
|
+
* @param mimeType - MIME type string (e.g., 'text/plain', 'image/jpeg')
|
|
175
|
+
* @returns Simple content type ('text', 'image', etc.)
|
|
176
|
+
*/
|
|
177
|
+
private getContentTypeForEmbedding;
|
|
178
|
+
/**
|
|
179
|
+
* Save the vector index to disk
|
|
180
|
+
*/
|
|
181
|
+
saveIndex(): Promise<void>;
|
|
182
|
+
/**
|
|
183
|
+
* Process image content from memory using the existing image processing pipeline
|
|
184
|
+
* @private
|
|
185
|
+
*/
|
|
186
|
+
private processImageFromMemory;
|
|
187
|
+
/**
|
|
188
|
+
* Process PDF content from memory using the existing PDF processing pipeline
|
|
189
|
+
* @private
|
|
190
|
+
*/
|
|
191
|
+
private processPDFFromMemory;
|
|
192
|
+
/**
|
|
193
|
+
* Process DOCX content from memory using the existing DOCX processing pipeline
|
|
194
|
+
* @private
|
|
195
|
+
*/
|
|
196
|
+
private processDOCXFromMemory;
|
|
197
|
+
/**
|
|
198
|
+
* Clean up resources - explicit cleanup method
|
|
199
|
+
*/
|
|
200
|
+
cleanup(): Promise<void>;
|
|
201
|
+
}
|
|
202
|
+
//# sourceMappingURL=ingestion.d.ts.map
|