rag-lite-ts 1.0.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +651 -109
- package/dist/cli/indexer.js +262 -46
- package/dist/cli/search.js +54 -32
- package/dist/cli.js +185 -28
- package/dist/config.d.ts +34 -73
- package/dist/config.js +50 -255
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/adapters.d.ts +93 -0
- package/dist/core/adapters.js +139 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/chunker.d.ts +119 -0
- package/dist/core/chunker.js +73 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.d.ts +102 -0
- package/dist/core/config.js +247 -0
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +245 -0
- package/dist/core/db.js +952 -0
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
- package/dist/{error-handler.js → core/error-handler.js} +51 -8
- package/dist/core/index.d.ts +59 -0
- package/dist/core/index.js +69 -0
- package/dist/core/ingestion.d.ts +213 -0
- package/dist/core/ingestion.js +812 -0
- package/dist/core/interfaces.d.ts +408 -0
- package/dist/core/interfaces.js +106 -0
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
- package/dist/{path-manager.js → core/path-manager.js} +5 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search-pipeline.d.ts +111 -0
- package/dist/core/search-pipeline.js +287 -0
- package/dist/core/search.d.ts +131 -0
- package/dist/core/search.js +296 -0
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +66 -0
- package/dist/core/types.js +6 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
- package/dist/{vector-index.js → core/vector-index.js} +21 -3
- package/dist/dom-polyfills.d.ts +6 -0
- package/dist/dom-polyfills.js +40 -0
- package/dist/factories/index.d.ts +43 -0
- package/dist/factories/index.js +44 -0
- package/dist/factories/text-factory.d.ts +560 -0
- package/dist/factories/text-factory.js +968 -0
- package/dist/file-processor.d.ts +90 -4
- package/dist/file-processor.js +723 -20
- package/dist/index-manager.d.ts +3 -2
- package/dist/index-manager.js +13 -11
- package/dist/index.d.ts +72 -8
- package/dist/index.js +102 -16
- package/dist/indexer.js +1 -1
- package/dist/ingestion.d.ts +44 -154
- package/dist/ingestion.js +75 -671
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1186 -79
- package/dist/multimodal/clip-embedder.d.ts +314 -0
- package/dist/multimodal/clip-embedder.js +945 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/preprocess.js +1 -1
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search-standalone.js +1 -1
- package/dist/search.d.ts +51 -69
- package/dist/search.js +117 -412
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +33 -0
- package/dist/{chunker.js → text/chunker.js} +98 -75
- package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
- package/dist/{embedder.js → text/embedder.js} +84 -10
- package/dist/text/index.d.ts +8 -0
- package/dist/text/index.js +9 -0
- package/dist/text/preprocessors/index.d.ts +17 -0
- package/dist/text/preprocessors/index.js +38 -0
- package/dist/text/preprocessors/mdx.d.ts +25 -0
- package/dist/text/preprocessors/mdx.js +101 -0
- package/dist/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/text/preprocessors/mermaid.js +330 -0
- package/dist/text/preprocessors/registry.d.ts +56 -0
- package/dist/text/preprocessors/registry.js +180 -0
- package/dist/text/reranker.d.ts +59 -0
- package/dist/{reranker.js → text/reranker.js} +138 -53
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
- package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
- package/dist/types.d.ts +40 -1
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +16 -4
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/chunker.d.ts +0 -47
- package/dist/chunker.d.ts.map +0 -1
- package/dist/chunker.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/db.d.ts +0 -90
- package/dist/db.d.ts.map +0 -1
- package/dist/db.js +0 -340
- package/dist/db.js.map +0 -1
- package/dist/embedder.d.ts.map +0 -1
- package/dist/embedder.js.map +0 -1
- package/dist/error-handler.d.ts.map +0 -1
- package/dist/error-handler.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/path-manager.d.ts.map +0 -1
- package/dist/path-manager.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/reranker.d.ts +0 -40
- package/dist/reranker.d.ts.map +0 -1
- package/dist/reranker.js.map +0 -1
- package/dist/resource-manager-demo.d.ts +0 -7
- package/dist/resource-manager-demo.d.ts.map +0 -1
- package/dist/resource-manager-demo.js +0 -52
- package/dist/resource-manager-demo.js.map +0 -1
- package/dist/resource-manager.d.ts +0 -129
- package/dist/resource-manager.d.ts.map +0 -1
- package/dist/resource-manager.js +0 -389
- package/dist/resource-manager.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/tokenizer.d.ts.map +0 -1
- package/dist/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
- package/dist/vector-index.d.ts.map +0 -1
- package/dist/vector-index.js.map +0 -1
package/dist/cli.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import { readFileSync } from 'fs';
|
|
3
3
|
import { fileURLToPath } from 'url';
|
|
4
4
|
import { dirname, join } from 'path';
|
|
5
|
-
import { EXIT_CODES, ConfigurationError } from './config.js';
|
|
5
|
+
import { EXIT_CODES, ConfigurationError } from './core/config.js';
|
|
6
6
|
// Get package.json for version info
|
|
7
7
|
const __filename = fileURLToPath(import.meta.url);
|
|
8
8
|
const __dirname = dirname(__filename);
|
|
@@ -29,26 +29,40 @@ Examples:
|
|
|
29
29
|
raglite ingest ./docs/ # Ingest all .md/.txt files in docs/
|
|
30
30
|
raglite ingest ./readme.md # Ingest single file
|
|
31
31
|
raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2 # Use higher quality model
|
|
32
|
+
raglite ingest ./docs/ --mode multimodal # Enable multimodal processing
|
|
33
|
+
raglite ingest ./docs/ --mode multimodal --rerank-strategy metadata # Use metadata reranking
|
|
32
34
|
raglite ingest ./docs/ --path-strategy relative --path-base /project # Use relative paths
|
|
33
35
|
raglite search "machine learning" # Search for documents about machine learning
|
|
34
36
|
raglite search "API documentation" --top-k 10 # Get top 10 results
|
|
37
|
+
raglite search "red car" --content-type image # Search only image results
|
|
35
38
|
|
|
36
39
|
raglite rebuild # Rebuild the entire index
|
|
37
40
|
|
|
38
41
|
Options for search:
|
|
39
|
-
--top-k <number>
|
|
40
|
-
--rerank
|
|
41
|
-
--no-rerank
|
|
42
|
+
--top-k <number> Number of results to return (default: 10)
|
|
43
|
+
--rerank Enable reranking for better results
|
|
44
|
+
--no-rerank Disable reranking
|
|
45
|
+
--content-type <type> Filter results by content type: 'text', 'image', or 'all' (default: all)
|
|
42
46
|
|
|
43
47
|
Options for ingest:
|
|
44
48
|
--model <name> Use specific embedding model
|
|
49
|
+
--mode <mode> Processing mode: 'text' (default) or 'multimodal'
|
|
50
|
+
--rerank-strategy <strategy> Reranking strategy for multimodal mode
|
|
45
51
|
--rebuild-if-needed Automatically rebuild if model mismatch detected (WARNING: rebuilds entire index)
|
|
46
52
|
--path-strategy <strategy> Path storage strategy: 'relative' (default) or 'absolute'
|
|
47
53
|
--path-base <path> Base directory for relative paths (defaults to current directory)
|
|
48
54
|
|
|
49
55
|
Available models:
|
|
50
|
-
|
|
51
|
-
|
|
56
|
+
Text mode:
|
|
57
|
+
sentence-transformers/all-MiniLM-L6-v2 (384 dim, fast, default)
|
|
58
|
+
Xenova/all-mpnet-base-v2 (768 dim, higher quality)
|
|
59
|
+
Multimodal mode:
|
|
60
|
+
Xenova/clip-vit-base-patch32 (512 dim, text + image support)
|
|
61
|
+
|
|
62
|
+
Available reranking strategies (multimodal mode):
|
|
63
|
+
text-derived Use image-to-text conversion + cross-encoder (default)
|
|
64
|
+
metadata Use filename and metadata-based scoring
|
|
65
|
+
disabled No reranking, use vector similarity only
|
|
52
66
|
|
|
53
67
|
For more information, visit: https://github.com/your-repo/rag-lite-ts
|
|
54
68
|
`);
|
|
@@ -115,9 +129,13 @@ function validateArgs(command, args, options) {
|
|
|
115
129
|
console.error(' raglite ingest ./docs/ # Ingest all .md/.txt files in docs/');
|
|
116
130
|
console.error(' raglite ingest ./readme.md # Ingest single file');
|
|
117
131
|
console.error(' raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2 # Use higher quality model');
|
|
132
|
+
console.error(' raglite ingest ./docs/ --mode multimodal # Enable multimodal processing');
|
|
133
|
+
console.error(' raglite ingest ./docs/ --mode multimodal --rerank-strategy metadata # Use metadata reranking');
|
|
118
134
|
console.error('');
|
|
119
135
|
console.error('Options:');
|
|
120
136
|
console.error(' --model <name> Use specific embedding model');
|
|
137
|
+
console.error(' --mode <mode> Processing mode: text (default) or multimodal');
|
|
138
|
+
console.error(' --rerank-strategy <strategy> Reranking strategy for multimodal mode');
|
|
121
139
|
console.error(' --rebuild-if-needed Automatically rebuild if model mismatch detected');
|
|
122
140
|
console.error('');
|
|
123
141
|
console.error('The path can be either a file (.md or .txt) or a directory.');
|
|
@@ -134,11 +152,13 @@ function validateArgs(command, args, options) {
|
|
|
134
152
|
console.error(' raglite search "machine learning"');
|
|
135
153
|
console.error(' raglite search "API documentation" --top-k 10');
|
|
136
154
|
console.error(' raglite search "tutorial" --rerank');
|
|
155
|
+
console.error(' raglite search "red car" --content-type image');
|
|
137
156
|
console.error('');
|
|
138
157
|
console.error('Options:');
|
|
139
|
-
console.error(' --top-k <number>
|
|
140
|
-
console.error(' --rerank
|
|
141
|
-
console.error(' --no-rerank
|
|
158
|
+
console.error(' --top-k <number> Number of results to return (default: 10)');
|
|
159
|
+
console.error(' --rerank Enable reranking for better results');
|
|
160
|
+
console.error(' --no-rerank Disable reranking');
|
|
161
|
+
console.error(' --content-type <type> Filter by content type: text, image, or all (default: all)');
|
|
142
162
|
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
143
163
|
}
|
|
144
164
|
break;
|
|
@@ -173,6 +193,96 @@ function validateArgs(command, args, options) {
|
|
|
173
193
|
}
|
|
174
194
|
options['top-k'] = topK;
|
|
175
195
|
}
|
|
196
|
+
// Validate content-type option (only for search command)
|
|
197
|
+
if (options['content-type'] !== undefined) {
|
|
198
|
+
if (command !== 'search') {
|
|
199
|
+
console.error(`Error: --content-type option is only available for the 'search' command`);
|
|
200
|
+
console.error('');
|
|
201
|
+
console.error('Use this option to filter search results by content type.');
|
|
202
|
+
console.error('');
|
|
203
|
+
console.error('Examples:');
|
|
204
|
+
console.error(' raglite search "query" --content-type text # Only text results');
|
|
205
|
+
console.error(' raglite search "query" --content-type image # Only image results');
|
|
206
|
+
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
207
|
+
}
|
|
208
|
+
const supportedTypes = ['text', 'image', 'all'];
|
|
209
|
+
if (!supportedTypes.includes(options['content-type'])) {
|
|
210
|
+
console.error(`Error: Unsupported content type '${options['content-type']}'`);
|
|
211
|
+
console.error('');
|
|
212
|
+
console.error('Supported content types:');
|
|
213
|
+
console.error(' text Filter to show only text results');
|
|
214
|
+
console.error(' image Filter to show only image results');
|
|
215
|
+
console.error(' all Show all results (default)');
|
|
216
|
+
console.error('');
|
|
217
|
+
console.error('Examples:');
|
|
218
|
+
console.error(' --content-type text');
|
|
219
|
+
console.error(' --content-type image');
|
|
220
|
+
console.error(' --content-type all');
|
|
221
|
+
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
// Validate mode option (only for ingest command)
|
|
225
|
+
if (options.mode !== undefined) {
|
|
226
|
+
if (command !== 'ingest') {
|
|
227
|
+
console.error(`Error: --mode option is only available for the 'ingest' command`);
|
|
228
|
+
console.error('');
|
|
229
|
+
console.error('The search command automatically detects the mode from the database.');
|
|
230
|
+
console.error('Mode is set once during ingestion and persists for all searches.');
|
|
231
|
+
console.error('');
|
|
232
|
+
console.error('Examples:');
|
|
233
|
+
console.error(' raglite ingest ./docs/ --mode multimodal');
|
|
234
|
+
console.error(' raglite search "your query" # Uses mode from ingestion');
|
|
235
|
+
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
236
|
+
}
|
|
237
|
+
const supportedModes = ['text', 'multimodal'];
|
|
238
|
+
if (!supportedModes.includes(options.mode)) {
|
|
239
|
+
console.error(`Error: Unsupported mode '${options.mode}'`);
|
|
240
|
+
console.error('');
|
|
241
|
+
console.error('Supported modes:');
|
|
242
|
+
console.error(' text Process text documents only (default)');
|
|
243
|
+
console.error(' multimodal Process text and image documents');
|
|
244
|
+
console.error('');
|
|
245
|
+
console.error('Examples:');
|
|
246
|
+
console.error(' --mode text');
|
|
247
|
+
console.error(' --mode multimodal');
|
|
248
|
+
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
// Validate rerank-strategy option (only for ingest command with multimodal mode)
|
|
252
|
+
if (options['rerank-strategy'] !== undefined) {
|
|
253
|
+
if (command !== 'ingest') {
|
|
254
|
+
console.error(`Error: --rerank-strategy option is only available for the 'ingest' command`);
|
|
255
|
+
console.error('');
|
|
256
|
+
console.error('Reranking strategy is configured during ingestion and used automatically during search.');
|
|
257
|
+
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
258
|
+
}
|
|
259
|
+
const mode = options.mode || 'text';
|
|
260
|
+
if (mode !== 'multimodal') {
|
|
261
|
+
console.error(`Error: --rerank-strategy option is only available in multimodal mode`);
|
|
262
|
+
console.error('');
|
|
263
|
+
console.error('To use reranking strategies, specify --mode multimodal');
|
|
264
|
+
console.error('');
|
|
265
|
+
console.error('Examples:');
|
|
266
|
+
console.error(' raglite ingest ./docs/ --mode multimodal --rerank-strategy text-derived');
|
|
267
|
+
console.error(' raglite ingest ./docs/ --mode multimodal --rerank-strategy metadata');
|
|
268
|
+
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
269
|
+
}
|
|
270
|
+
const supportedStrategies = ['text-derived', 'metadata', 'disabled'];
|
|
271
|
+
if (!supportedStrategies.includes(options['rerank-strategy'])) {
|
|
272
|
+
console.error(`Error: Unsupported reranking strategy '${options['rerank-strategy']}'`);
|
|
273
|
+
console.error('');
|
|
274
|
+
console.error('Supported strategies for multimodal mode:');
|
|
275
|
+
console.error(' text-derived Convert images to text, then use cross-encoder (default)');
|
|
276
|
+
console.error(' metadata Use filename and metadata-based scoring');
|
|
277
|
+
console.error(' disabled No reranking, use vector similarity only');
|
|
278
|
+
console.error('');
|
|
279
|
+
console.error('Examples:');
|
|
280
|
+
console.error(' --rerank-strategy text-derived');
|
|
281
|
+
console.error(' --rerank-strategy metadata');
|
|
282
|
+
console.error(' --rerank-strategy disabled');
|
|
283
|
+
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
176
286
|
// Validate model option (only for ingest command)
|
|
177
287
|
if (options.model !== undefined) {
|
|
178
288
|
if (command !== 'ingest') {
|
|
@@ -186,20 +296,45 @@ function validateArgs(command, args, options) {
|
|
|
186
296
|
console.error(' raglite search "your query" # Uses the model from ingestion');
|
|
187
297
|
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
188
298
|
}
|
|
189
|
-
const
|
|
299
|
+
const mode = options.mode || 'text';
|
|
300
|
+
const textModels = [
|
|
190
301
|
'sentence-transformers/all-MiniLM-L6-v2',
|
|
191
302
|
'Xenova/all-mpnet-base-v2'
|
|
192
303
|
];
|
|
304
|
+
const multimodalModels = [
|
|
305
|
+
'Xenova/clip-vit-base-patch32'
|
|
306
|
+
];
|
|
307
|
+
let supportedModels;
|
|
308
|
+
let modelTypeDescription;
|
|
309
|
+
if (mode === 'multimodal') {
|
|
310
|
+
supportedModels = multimodalModels;
|
|
311
|
+
modelTypeDescription = 'multimodal models';
|
|
312
|
+
}
|
|
313
|
+
else {
|
|
314
|
+
supportedModels = textModels;
|
|
315
|
+
modelTypeDescription = 'text models';
|
|
316
|
+
}
|
|
193
317
|
if (!supportedModels.includes(options.model)) {
|
|
194
|
-
console.error(`Error:
|
|
318
|
+
console.error(`Error: Model '${options.model}' is not supported for ${mode} mode`);
|
|
195
319
|
console.error('');
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
320
|
+
if (mode === 'text') {
|
|
321
|
+
console.error('Supported models for text mode:');
|
|
322
|
+
console.error(' sentence-transformers/all-MiniLM-L6-v2 (384 dim, fast, default)');
|
|
323
|
+
console.error(' Xenova/all-mpnet-base-v2 (768 dim, higher quality)');
|
|
324
|
+
}
|
|
325
|
+
else {
|
|
326
|
+
console.error('Supported models for multimodal mode:');
|
|
327
|
+
console.error(' Xenova/clip-vit-base-patch32 (512 dim, text + image support)');
|
|
328
|
+
}
|
|
199
329
|
console.error('');
|
|
200
330
|
console.error('Examples:');
|
|
201
|
-
|
|
202
|
-
|
|
331
|
+
if (mode === 'text') {
|
|
332
|
+
console.error(' --model sentence-transformers/all-MiniLM-L6-v2');
|
|
333
|
+
console.error(' --model Xenova/all-mpnet-base-v2');
|
|
334
|
+
}
|
|
335
|
+
else {
|
|
336
|
+
console.error(' --model Xenova/clip-vit-base-patch32 --mode multimodal');
|
|
337
|
+
}
|
|
203
338
|
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
204
339
|
}
|
|
205
340
|
}
|
|
@@ -215,7 +350,7 @@ function validateArgs(command, args, options) {
|
|
|
215
350
|
console.error('');
|
|
216
351
|
console.error('Supported strategies:');
|
|
217
352
|
console.error(' relative Store paths relative to base directory (default, portable)');
|
|
218
|
-
console.error(' absolute Store absolute paths
|
|
353
|
+
console.error(' absolute Store absolute paths');
|
|
219
354
|
console.error('');
|
|
220
355
|
console.error('Examples:');
|
|
221
356
|
console.error(' --path-strategy relative');
|
|
@@ -243,6 +378,8 @@ function validateArgs(command, args, options) {
|
|
|
243
378
|
* Main CLI entry point
|
|
244
379
|
*/
|
|
245
380
|
async function main() {
|
|
381
|
+
// Set CLI mode to prevent database connection manager from starting timers
|
|
382
|
+
process.env.RAG_CLI_MODE = 'true';
|
|
246
383
|
try {
|
|
247
384
|
const { command, args, options } = parseArgs();
|
|
248
385
|
// Validate arguments
|
|
@@ -340,23 +477,43 @@ process.on('uncaughtException', (error) => {
|
|
|
340
477
|
process.exit(EXIT_CODES.GENERAL_ERROR);
|
|
341
478
|
});
|
|
342
479
|
// Handle process termination signals gracefully
|
|
343
|
-
process.on('SIGINT', () => {
|
|
480
|
+
process.on('SIGINT', async () => {
|
|
344
481
|
console.log('\n\nReceived SIGINT (Ctrl+C). Shutting down gracefully...');
|
|
345
482
|
console.log('If you need to force quit, press Ctrl+C again.');
|
|
483
|
+
// Clean up database connections before exit
|
|
484
|
+
try {
|
|
485
|
+
const { DatabaseConnectionManager } = await import('./core/database-connection-manager.js');
|
|
486
|
+
await DatabaseConnectionManager.closeAllConnections();
|
|
487
|
+
}
|
|
488
|
+
catch (error) {
|
|
489
|
+
// Ignore cleanup errors during shutdown
|
|
490
|
+
}
|
|
346
491
|
process.exit(EXIT_CODES.SUCCESS);
|
|
347
492
|
});
|
|
348
|
-
process.on('SIGTERM', () => {
|
|
493
|
+
process.on('SIGTERM', async () => {
|
|
349
494
|
console.log('\n\nReceived SIGTERM. Shutting down gracefully...');
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
console.error('Fatal error:', error instanceof Error ? error.message : String(error));
|
|
355
|
-
if (error instanceof ConfigurationError) {
|
|
356
|
-
process.exit(error.exitCode);
|
|
495
|
+
// Clean up database connections before exit
|
|
496
|
+
try {
|
|
497
|
+
const { DatabaseConnectionManager } = await import('./core/database-connection-manager.js');
|
|
498
|
+
await DatabaseConnectionManager.closeAllConnections();
|
|
357
499
|
}
|
|
358
|
-
|
|
359
|
-
|
|
500
|
+
catch (error) {
|
|
501
|
+
// Ignore cleanup errors during shutdown
|
|
360
502
|
}
|
|
503
|
+
process.exit(EXIT_CODES.SUCCESS);
|
|
361
504
|
});
|
|
505
|
+
// Run the CLI only if this file is executed directly
|
|
506
|
+
// In ES modules, we need to check import.meta.url instead of require.main
|
|
507
|
+
// Check if this file is being run directly
|
|
508
|
+
if (process.argv[1] === __filename || process.argv[1].endsWith('cli.js')) {
|
|
509
|
+
main().catch((error) => {
|
|
510
|
+
console.error('Fatal error:', error instanceof Error ? error.message : String(error));
|
|
511
|
+
if (error instanceof ConfigurationError) {
|
|
512
|
+
process.exit(error.exitCode);
|
|
513
|
+
}
|
|
514
|
+
else {
|
|
515
|
+
process.exit(EXIT_CODES.GENERAL_ERROR);
|
|
516
|
+
}
|
|
517
|
+
});
|
|
518
|
+
}
|
|
362
519
|
//# sourceMappingURL=cli.js.map
|
package/dist/config.d.ts
CHANGED
|
@@ -1,90 +1,51 @@
|
|
|
1
|
-
import { PreprocessingConfig } from './types.js';
|
|
2
|
-
export interface Config {
|
|
3
|
-
embedding_model: string;
|
|
4
|
-
chunk_size: number;
|
|
5
|
-
chunk_overlap: number;
|
|
6
|
-
batch_size: number;
|
|
7
|
-
top_k: number;
|
|
8
|
-
db_file: string;
|
|
9
|
-
index_file: string;
|
|
10
|
-
rerank_enabled: boolean;
|
|
11
|
-
preprocessing: PreprocessingConfig;
|
|
12
|
-
model_cache_path?: string;
|
|
13
|
-
path_storage_strategy: 'absolute' | 'relative';
|
|
14
|
-
}
|
|
15
|
-
export interface ModelDefaults {
|
|
16
|
-
dimensions: number;
|
|
17
|
-
chunk_size: number;
|
|
18
|
-
chunk_overlap: number;
|
|
19
|
-
batch_size: number;
|
|
20
|
-
}
|
|
21
1
|
/**
|
|
22
|
-
*
|
|
2
|
+
* Main configuration file with text-specific settings
|
|
3
|
+
* Extends core configuration with implementation-specific properties
|
|
23
4
|
*/
|
|
24
|
-
|
|
5
|
+
import { CoreConfig } from './core/config.js';
|
|
25
6
|
/**
|
|
26
|
-
*
|
|
27
|
-
* @param modelName - The embedding model name
|
|
28
|
-
* @returns Model-specific defaults for dimensions, chunk_size, chunk_overlap, and batch_size
|
|
7
|
+
* Text-specific configuration properties
|
|
29
8
|
*/
|
|
30
|
-
export
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
readonly FILE_NOT_FOUND: 4;
|
|
41
|
-
readonly DATABASE_ERROR: 5;
|
|
42
|
-
readonly MODEL_ERROR: 6;
|
|
43
|
-
readonly INDEX_ERROR: 7;
|
|
44
|
-
readonly PERMISSION_ERROR: 8;
|
|
45
|
-
};
|
|
46
|
-
/**
|
|
47
|
-
* Configuration validation error with specific exit code
|
|
48
|
-
*/
|
|
49
|
-
export declare class ConfigurationError extends Error {
|
|
50
|
-
exitCode: number;
|
|
51
|
-
constructor(message: string, exitCode?: number);
|
|
9
|
+
export interface TextConfig {
|
|
10
|
+
embedding_model: string;
|
|
11
|
+
rerank_enabled: boolean;
|
|
12
|
+
rerank_model: string;
|
|
13
|
+
preprocessing: {
|
|
14
|
+
enabled: boolean;
|
|
15
|
+
mdx: boolean;
|
|
16
|
+
mermaid: boolean;
|
|
17
|
+
code_blocks: boolean;
|
|
18
|
+
};
|
|
52
19
|
}
|
|
53
20
|
/**
|
|
54
|
-
*
|
|
55
|
-
* @param config - Preprocessing configuration to validate
|
|
56
|
-
* @throws {ConfigurationError} If preprocessing configuration is invalid
|
|
21
|
+
* Complete configuration interface combining core and text-specific settings
|
|
57
22
|
*/
|
|
58
|
-
export
|
|
23
|
+
export interface Config extends CoreConfig {
|
|
24
|
+
embedding_model: string;
|
|
25
|
+
rerank_enabled: boolean;
|
|
26
|
+
rerank_model: string;
|
|
27
|
+
preprocessing: {
|
|
28
|
+
enabled: boolean;
|
|
29
|
+
mdx: boolean;
|
|
30
|
+
mermaid: boolean;
|
|
31
|
+
code_blocks: boolean;
|
|
32
|
+
};
|
|
33
|
+
}
|
|
59
34
|
/**
|
|
60
|
-
*
|
|
61
|
-
* @param config - Base preprocessing configuration
|
|
62
|
-
* @returns Resolved preprocessing options for each content type
|
|
35
|
+
* Default configuration object with both core and text-specific settings
|
|
63
36
|
*/
|
|
64
|
-
export declare
|
|
65
|
-
|
|
66
|
-
mermaid: 'strip' | 'extract' | 'placeholder';
|
|
67
|
-
code: 'strip' | 'keep' | 'placeholder';
|
|
68
|
-
};
|
|
37
|
+
export declare const config: Config;
|
|
38
|
+
export * from './core/config.js';
|
|
69
39
|
/**
|
|
70
|
-
*
|
|
71
|
-
* @param config - Configuration object to validate
|
|
72
|
-
* @throws {ConfigurationError} If configuration is invalid
|
|
40
|
+
* Validate complete configuration including text-specific settings
|
|
73
41
|
*/
|
|
74
42
|
export declare function validateConfig(config: any): asserts config is Config;
|
|
75
43
|
/**
|
|
76
|
-
*
|
|
77
|
-
* Logs error and exits immediately with appropriate exit code
|
|
78
|
-
* @param error - Error object or message
|
|
79
|
-
* @param context - Context where the error occurred
|
|
80
|
-
* @param exitCode - Exit code to use (defaults to GENERAL_ERROR)
|
|
44
|
+
* Validate preprocessing configuration
|
|
81
45
|
*/
|
|
82
|
-
export declare function
|
|
46
|
+
export declare function validatePreprocessingConfig(config: any): boolean;
|
|
83
47
|
/**
|
|
84
|
-
*
|
|
85
|
-
* @param error - Error to log
|
|
86
|
-
* @param context - Context where error occurred
|
|
87
|
-
* @param skipError - Whether to skip this error and continue (default: false)
|
|
48
|
+
* Merge preprocessing configurations
|
|
88
49
|
*/
|
|
89
|
-
export declare function
|
|
50
|
+
export declare function mergePreprocessingConfig(base: any, override: any): any;
|
|
90
51
|
//# sourceMappingURL=config.d.ts.map
|