rag-lite-ts 2.0.2 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -0
- package/dist/cli/indexer.js +25 -6
- package/dist/cli/search.js +3 -3
- package/dist/cli.js +33 -6
- package/dist/core/actionable-error-messages.js +3 -3
- package/dist/core/content-manager.d.ts +0 -8
- package/dist/core/content-manager.js +2 -30
- package/dist/core/database-connection-manager.js +10 -0
- package/dist/core/db.d.ts +0 -32
- package/dist/core/db.js +11 -68
- package/dist/core/embedder-factory.d.ts +0 -22
- package/dist/core/embedder-factory.js +8 -35
- package/dist/core/index.d.ts +3 -3
- package/dist/core/index.js +3 -3
- package/dist/core/ingestion.d.ts +1 -16
- package/dist/core/ingestion.js +1 -30
- package/dist/core/interfaces.d.ts +1 -1
- package/dist/core/interfaces.js +1 -1
- package/dist/core/model-registry.d.ts +0 -4
- package/dist/core/model-registry.js +5 -9
- package/dist/core/search.d.ts +2 -2
- package/dist/core/search.js +2 -2
- package/dist/factories/index.d.ts +11 -29
- package/dist/factories/index.js +12 -29
- package/dist/factories/ingestion-factory.d.ts +200 -0
- package/dist/factories/ingestion-factory.js +475 -0
- package/dist/{core/polymorphic-search-factory.d.ts → factories/search-factory.d.ts} +7 -7
- package/dist/{core/polymorphic-search-factory.js → factories/search-factory.js} +22 -22
- package/dist/index-manager.js +25 -14
- package/dist/index.d.ts +5 -30
- package/dist/index.js +9 -24
- package/dist/indexer.js +5 -2
- package/dist/ingestion.d.ts +2 -4
- package/dist/ingestion.js +2 -2
- package/dist/mcp-server.js +31 -25
- package/dist/search.js +2 -2
- package/dist/text/embedder.d.ts +0 -11
- package/dist/text/embedder.js +11 -22
- package/dist/text/index.d.ts +2 -2
- package/dist/text/index.js +2 -2
- package/dist/text/reranker.d.ts +0 -10
- package/dist/text/reranker.js +10 -33
- package/package.json +7 -3
- package/dist/factories/polymorphic-factory.d.ts +0 -50
- package/dist/factories/polymorphic-factory.js +0 -159
- package/dist/factories/text-factory.d.ts +0 -560
- package/dist/factories/text-factory.js +0 -968
package/README.md
CHANGED
|
@@ -438,6 +438,33 @@ Now Claude can search your docs directly! Works with any MCP-compatible AI tool.
|
|
|
438
438
|
</tr>
|
|
439
439
|
</table>
|
|
440
440
|
|
|
441
|
+
### 📁 Supported File Formats
|
|
442
|
+
|
|
443
|
+
RAG-lite TS supports the following file formats with full processing implementations:
|
|
444
|
+
|
|
445
|
+
**Text Mode:**
|
|
446
|
+
- Markdown: `.md`, `.mdx`
|
|
447
|
+
- Plain text: `.txt`
|
|
448
|
+
- Documents: `.pdf`, `.docx`
|
|
449
|
+
|
|
450
|
+
**Multimodal Mode** (includes all text formats plus):
|
|
451
|
+
- Images: `.jpg`, `.jpeg`, `.png`, `.gif`, `.webp`, `.bmp`
|
|
452
|
+
|
|
453
|
+
All formats work seamlessly with both single file and directory ingestion:
|
|
454
|
+
|
|
455
|
+
```bash
|
|
456
|
+
# Single file ingestion
|
|
457
|
+
raglite ingest ./document.pdf
|
|
458
|
+
raglite ingest ./readme.md
|
|
459
|
+
raglite ingest ./notes.txt
|
|
460
|
+
|
|
461
|
+
# Directory ingestion (processes all supported formats)
|
|
462
|
+
raglite ingest ./docs/
|
|
463
|
+
|
|
464
|
+
# Multimodal ingestion (includes images)
|
|
465
|
+
raglite ingest ./mixed-content/ --mode multimodal
|
|
466
|
+
```
|
|
467
|
+
|
|
441
468
|
## 🔧 How It Works
|
|
442
469
|
|
|
443
470
|
RAG-lite TS follows a clean, efficient pipeline:
|
package/dist/cli/indexer.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { existsSync, statSync } from 'fs';
|
|
2
2
|
import { resolve } from 'path';
|
|
3
|
-
import {
|
|
3
|
+
import { IngestionFactory } from '../factories/ingestion-factory.js';
|
|
4
4
|
import { withCLIDatabaseAccess, setupCLICleanup, isDatabaseBusy } from '../core/cli-database-utils.js';
|
|
5
5
|
import { EXIT_CODES, ConfigurationError } from '../core/config.js';
|
|
6
6
|
/**
|
|
@@ -148,12 +148,31 @@ export async function runIngest(path, options = {}) {
|
|
|
148
148
|
const pathType = stats.isDirectory() ? 'directory' : 'file';
|
|
149
149
|
// Validate file type for single files
|
|
150
150
|
if (stats.isFile()) {
|
|
151
|
-
const
|
|
151
|
+
const mode = options.mode || 'text';
|
|
152
|
+
// Only formats with actual processing implementations
|
|
153
|
+
const textExtensions = ['.md', '.txt', '.mdx', '.pdf', '.docx'];
|
|
154
|
+
const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'];
|
|
155
|
+
const validExtensions = mode === 'multimodal'
|
|
156
|
+
? [...textExtensions, ...imageExtensions]
|
|
157
|
+
: textExtensions;
|
|
152
158
|
const hasValidExtension = validExtensions.some(ext => path.toLowerCase().endsWith(ext));
|
|
153
159
|
if (!hasValidExtension) {
|
|
154
160
|
console.error(`Error: Unsupported file type: ${path}`);
|
|
155
161
|
console.error('');
|
|
156
|
-
|
|
162
|
+
if (mode === 'multimodal') {
|
|
163
|
+
console.error('Supported file types in multimodal mode:');
|
|
164
|
+
console.error(' Text: .md, .txt, .mdx');
|
|
165
|
+
console.error(' Documents: .pdf, .docx');
|
|
166
|
+
console.error(' Images: .jpg, .jpeg, .png, .gif, .webp, .bmp');
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
console.error('Supported file types in text mode:');
|
|
170
|
+
console.error(' Text: .md, .txt, .mdx');
|
|
171
|
+
console.error(' Documents: .pdf, .docx');
|
|
172
|
+
console.error('');
|
|
173
|
+
console.error('For image files, use --mode multimodal:');
|
|
174
|
+
console.error(' raglite ingest <path> --mode multimodal');
|
|
175
|
+
}
|
|
157
176
|
console.error('');
|
|
158
177
|
console.error('If you want to ingest multiple files, provide a directory path instead.');
|
|
159
178
|
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
@@ -209,8 +228,8 @@ export async function runIngest(path, options = {}) {
|
|
|
209
228
|
// Create ingestion pipeline using factory
|
|
210
229
|
let pipeline;
|
|
211
230
|
try {
|
|
212
|
-
// Create ingestion pipeline using
|
|
213
|
-
pipeline = await withCLIDatabaseAccess(dbPath, () =>
|
|
231
|
+
// Create ingestion pipeline using IngestionFactory with database protection
|
|
232
|
+
pipeline = await withCLIDatabaseAccess(dbPath, () => IngestionFactory.create(dbPath, indexPath, factoryOptions), {
|
|
214
233
|
commandName: 'Ingestion command',
|
|
215
234
|
showProgress: true,
|
|
216
235
|
maxWaitMs: 15000 // Longer timeout for ingestion
|
|
@@ -360,7 +379,7 @@ export async function runRebuild() {
|
|
|
360
379
|
}
|
|
361
380
|
}
|
|
362
381
|
// Create ingestion pipeline with force rebuild using factory
|
|
363
|
-
const pipeline = await
|
|
382
|
+
const pipeline = await IngestionFactory.create(dbPath, indexPath, rebuildOptions);
|
|
364
383
|
try {
|
|
365
384
|
// Get all documents from database and re-ingest them
|
|
366
385
|
const { openDatabase } = await import('../core/db.js');
|
package/dist/cli/search.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { existsSync } from 'fs';
|
|
2
|
-
import {
|
|
2
|
+
import { SearchFactory } from '../factories/search-factory.js';
|
|
3
3
|
import { withCLIDatabaseAccess, setupCLICleanup } from '../core/cli-database-utils.js';
|
|
4
4
|
import { config, EXIT_CODES, ConfigurationError } from '../core/config.js';
|
|
5
5
|
/**
|
|
@@ -60,8 +60,8 @@ export async function runSearch(query, options = {}) {
|
|
|
60
60
|
// Initialize search engine using polymorphic factory with database protection
|
|
61
61
|
let searchEngine;
|
|
62
62
|
try {
|
|
63
|
-
// Create search engine using
|
|
64
|
-
searchEngine = await withCLIDatabaseAccess(effectiveConfig.db_file, () =>
|
|
63
|
+
// Create search engine using SearchFactory (auto-detects mode)
|
|
64
|
+
searchEngine = await withCLIDatabaseAccess(effectiveConfig.db_file, () => SearchFactory.create(effectiveConfig.index_file, effectiveConfig.db_file), {
|
|
65
65
|
commandName: 'Search command',
|
|
66
66
|
showProgress: true
|
|
67
67
|
});
|
package/dist/cli.js
CHANGED
|
@@ -8,6 +8,12 @@ const __filename = fileURLToPath(import.meta.url);
|
|
|
8
8
|
const __dirname = dirname(__filename);
|
|
9
9
|
const packageJsonPath = join(__dirname, '..', 'package.json');
|
|
10
10
|
const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
|
11
|
+
/**
|
|
12
|
+
* Display version information
|
|
13
|
+
*/
|
|
14
|
+
function showVersion() {
|
|
15
|
+
console.log(`RAG-lite TS v${packageJson.version}`);
|
|
16
|
+
}
|
|
11
17
|
/**
|
|
12
18
|
* Display help information
|
|
13
19
|
*/
|
|
@@ -23,10 +29,11 @@ Commands:
|
|
|
23
29
|
ingest <path> Ingest documents from file or directory
|
|
24
30
|
search <query> Search indexed documents
|
|
25
31
|
rebuild Rebuild the vector index
|
|
32
|
+
version Show version information
|
|
26
33
|
help Show this help message
|
|
27
34
|
|
|
28
35
|
Examples:
|
|
29
|
-
raglite ingest ./docs/ # Ingest all .md/.txt files in docs/
|
|
36
|
+
raglite ingest ./docs/ # Ingest all .md/.txt/.docx/.pdf files in docs/
|
|
30
37
|
raglite ingest ./readme.md # Ingest single file
|
|
31
38
|
raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2 # Use higher quality model
|
|
32
39
|
raglite ingest ./docs/ --mode multimodal # Enable multimodal processing
|
|
@@ -57,7 +64,8 @@ Available models:
|
|
|
57
64
|
sentence-transformers/all-MiniLM-L6-v2 (384 dim, fast, default)
|
|
58
65
|
Xenova/all-mpnet-base-v2 (768 dim, higher quality)
|
|
59
66
|
Multimodal mode:
|
|
60
|
-
Xenova/clip-vit-base-patch32 (512 dim,
|
|
67
|
+
Xenova/clip-vit-base-patch32 (512 dim, faster, default)
|
|
68
|
+
Xenova/clip-vit-base-patch16 (512 dim, more accurate, slower)
|
|
61
69
|
|
|
62
70
|
Available reranking strategies (multimodal mode):
|
|
63
71
|
text-derived Use image-to-text conversion + cross-encoder (default)
|
|
@@ -75,6 +83,13 @@ function parseArgs() {
|
|
|
75
83
|
if (args.length === 0) {
|
|
76
84
|
return { command: 'help', args: [], options: {} };
|
|
77
85
|
}
|
|
86
|
+
// Handle --version and --help flags at the top level
|
|
87
|
+
if (args[0] === '--version' || args[0] === '-v') {
|
|
88
|
+
return { command: 'version', args: [], options: {} };
|
|
89
|
+
}
|
|
90
|
+
if (args[0] === '--help' || args[0] === '-h') {
|
|
91
|
+
return { command: 'help', args: [], options: {} };
|
|
92
|
+
}
|
|
78
93
|
const command = args[0];
|
|
79
94
|
const remainingArgs = [];
|
|
80
95
|
const options = {};
|
|
@@ -96,6 +111,9 @@ function parseArgs() {
|
|
|
96
111
|
else if (optionName === 'help') {
|
|
97
112
|
return { command: 'help', args: [], options: {} };
|
|
98
113
|
}
|
|
114
|
+
else if (optionName === 'version') {
|
|
115
|
+
return { command: 'version', args: [], options: {} };
|
|
116
|
+
}
|
|
99
117
|
else {
|
|
100
118
|
// Handle options with values
|
|
101
119
|
const nextArg = args[i + 1];
|
|
@@ -126,7 +144,7 @@ function validateArgs(command, args, options) {
|
|
|
126
144
|
console.error('Usage: raglite ingest <path>');
|
|
127
145
|
console.error('');
|
|
128
146
|
console.error('Examples:');
|
|
129
|
-
console.error(' raglite ingest ./docs/ # Ingest all .md/.txt files in docs/');
|
|
147
|
+
console.error(' raglite ingest ./docs/ # Ingest all .md/.txt/.docx/.pdf files in docs/');
|
|
130
148
|
console.error(' raglite ingest ./readme.md # Ingest single file');
|
|
131
149
|
console.error(' raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2 # Use higher quality model');
|
|
132
150
|
console.error(' raglite ingest ./docs/ --mode multimodal # Enable multimodal processing');
|
|
@@ -165,13 +183,16 @@ function validateArgs(command, args, options) {
|
|
|
165
183
|
case 'rebuild':
|
|
166
184
|
// No arguments required
|
|
167
185
|
break;
|
|
186
|
+
case 'version':
|
|
187
|
+
// No validation needed
|
|
188
|
+
break;
|
|
168
189
|
case 'help':
|
|
169
190
|
// No validation needed
|
|
170
191
|
break;
|
|
171
192
|
default:
|
|
172
193
|
console.error(`Error: Unknown command '${command}'`);
|
|
173
194
|
console.error('');
|
|
174
|
-
console.error('Available commands: ingest, search, rebuild, help');
|
|
195
|
+
console.error('Available commands: ingest, search, rebuild, version, help');
|
|
175
196
|
console.error('Run "raglite help" for detailed usage information');
|
|
176
197
|
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
177
198
|
}
|
|
@@ -302,7 +323,8 @@ function validateArgs(command, args, options) {
|
|
|
302
323
|
'Xenova/all-mpnet-base-v2'
|
|
303
324
|
];
|
|
304
325
|
const multimodalModels = [
|
|
305
|
-
'Xenova/clip-vit-base-patch32'
|
|
326
|
+
'Xenova/clip-vit-base-patch32',
|
|
327
|
+
'Xenova/clip-vit-base-patch16'
|
|
306
328
|
];
|
|
307
329
|
let supportedModels;
|
|
308
330
|
let modelTypeDescription;
|
|
@@ -324,7 +346,8 @@ function validateArgs(command, args, options) {
|
|
|
324
346
|
}
|
|
325
347
|
else {
|
|
326
348
|
console.error('Supported models for multimodal mode:');
|
|
327
|
-
console.error(' Xenova/clip-vit-base-patch32 (512 dim,
|
|
349
|
+
console.error(' Xenova/clip-vit-base-patch32 (512 dim, faster, default)');
|
|
350
|
+
console.error(' Xenova/clip-vit-base-patch16 (512 dim, more accurate, slower)');
|
|
328
351
|
}
|
|
329
352
|
console.error('');
|
|
330
353
|
console.error('Examples:');
|
|
@@ -334,6 +357,7 @@ function validateArgs(command, args, options) {
|
|
|
334
357
|
}
|
|
335
358
|
else {
|
|
336
359
|
console.error(' --model Xenova/clip-vit-base-patch32 --mode multimodal');
|
|
360
|
+
console.error(' --model Xenova/clip-vit-base-patch16 --mode multimodal');
|
|
337
361
|
}
|
|
338
362
|
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
339
363
|
}
|
|
@@ -386,6 +410,9 @@ async function main() {
|
|
|
386
410
|
validateArgs(command, args, options);
|
|
387
411
|
// Handle commands
|
|
388
412
|
switch (command) {
|
|
413
|
+
case 'version':
|
|
414
|
+
showVersion();
|
|
415
|
+
break;
|
|
389
416
|
case 'help':
|
|
390
417
|
showHelp();
|
|
391
418
|
break;
|
|
@@ -56,8 +56,8 @@ export function createMissingFileError(filePath, fileType, config = {}) {
|
|
|
56
56
|
messages.push(' 2. Or create an ingestion pipeline programmatically:');
|
|
57
57
|
if (cfg.includeExamples) {
|
|
58
58
|
messages.push(' ```typescript');
|
|
59
|
-
messages.push(' import {
|
|
60
|
-
messages.push(' const pipeline = await
|
|
59
|
+
messages.push(' import { IngestionFactory } from "rag-lite-ts";');
|
|
60
|
+
messages.push(' const pipeline = await IngestionFactory.create(');
|
|
61
61
|
messages.push(` "${filePath.endsWith('.bin') ? filePath.replace('.bin', '.sqlite') : filePath}",`);
|
|
62
62
|
messages.push(` "${filePath.endsWith('.sqlite') ? filePath.replace('.sqlite', '.bin') : filePath}"`);
|
|
63
63
|
messages.push(' );');
|
|
@@ -216,7 +216,7 @@ export function createModeMismatchError(expectedMode, actualMode, config = {}) {
|
|
|
216
216
|
messages.push(' 3. Or create a new database for the different mode:');
|
|
217
217
|
if (cfg.includeExamples) {
|
|
218
218
|
messages.push(' ```typescript');
|
|
219
|
-
messages.push(' const pipeline = await
|
|
219
|
+
messages.push(' const pipeline = await IngestionFactory.create(');
|
|
220
220
|
messages.push(' "./new-database.sqlite",');
|
|
221
221
|
messages.push(' "./new-index.bin",');
|
|
222
222
|
messages.push(` { mode: "${actualMode}" }`);
|
|
@@ -189,14 +189,6 @@ export declare class ContentManager {
|
|
|
189
189
|
* @returns Promise that resolves when directory is created
|
|
190
190
|
*/
|
|
191
191
|
private ensureContentDirectory;
|
|
192
|
-
/**
|
|
193
|
-
* Writes content to file atomically to prevent partial writes
|
|
194
|
-
* @param filePath - Path to write to
|
|
195
|
-
* @param content - Content to write
|
|
196
|
-
* @returns Promise that resolves when write is complete
|
|
197
|
-
* @deprecated Use writeFileAtomic from resource-cleanup.ts for better resource management
|
|
198
|
-
*/
|
|
199
|
-
private writeContentAtomic;
|
|
200
192
|
/**
|
|
201
193
|
* Gets comprehensive storage statistics for monitoring and reporting
|
|
202
194
|
* @returns Promise that resolves to detailed storage statistics
|
|
@@ -5,9 +5,9 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import { createHash } from 'crypto';
|
|
7
7
|
import { promises as fs } from 'fs';
|
|
8
|
-
import { join,
|
|
8
|
+
import { join, extname, basename } from 'path';
|
|
9
9
|
import { insertContentMetadata, getContentMetadataByHash, getStorageStats, updateStorageStats, getContentMetadataByStorageType, deleteContentMetadata } from './db.js';
|
|
10
|
-
import { ContentIngestionError,
|
|
10
|
+
import { ContentIngestionError, StorageLimitExceededError, InvalidContentFormatError, ContentErrorHandler } from './content-errors.js';
|
|
11
11
|
import { globalResourceCleanup, withResourceCleanup, writeFileAtomic, withTimeout, SafeBuffer } from './resource-cleanup.js';
|
|
12
12
|
import { createStreamingOperations, formatBytes, formatProcessingTime } from './streaming-operations.js';
|
|
13
13
|
import { createContentPerformanceOptimizer, formatCacheHitRate } from './content-performance-optimizer.js';
|
|
@@ -1008,34 +1008,6 @@ export class ContentManager {
|
|
|
1008
1008
|
throw new Error(`Failed to create content directory: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1009
1009
|
}
|
|
1010
1010
|
}
|
|
1011
|
-
/**
|
|
1012
|
-
* Writes content to file atomically to prevent partial writes
|
|
1013
|
-
* @param filePath - Path to write to
|
|
1014
|
-
* @param content - Content to write
|
|
1015
|
-
* @returns Promise that resolves when write is complete
|
|
1016
|
-
* @deprecated Use writeFileAtomic from resource-cleanup.ts for better resource management
|
|
1017
|
-
*/
|
|
1018
|
-
async writeContentAtomic(filePath, content) {
|
|
1019
|
-
const tempPath = `${filePath}.tmp.${Date.now()}`;
|
|
1020
|
-
try {
|
|
1021
|
-
// Ensure directory exists
|
|
1022
|
-
await fs.mkdir(dirname(filePath), { recursive: true });
|
|
1023
|
-
// Write to temporary file first
|
|
1024
|
-
await fs.writeFile(tempPath, content);
|
|
1025
|
-
// Atomically move to final location
|
|
1026
|
-
await fs.rename(tempPath, filePath);
|
|
1027
|
-
}
|
|
1028
|
-
catch (error) {
|
|
1029
|
-
// Clean up temporary file if it exists
|
|
1030
|
-
try {
|
|
1031
|
-
await fs.unlink(tempPath);
|
|
1032
|
-
}
|
|
1033
|
-
catch {
|
|
1034
|
-
// Ignore cleanup errors
|
|
1035
|
-
}
|
|
1036
|
-
throw new ContentDirectoryError('atomic write', `Failed to write content atomically: ${error instanceof Error ? error.message : 'Unknown error'}`, 'file_write');
|
|
1037
|
-
}
|
|
1038
|
-
}
|
|
1039
1011
|
// =============================================================================
|
|
1040
1012
|
// CONTENT DIRECTORY MANAGEMENT METHODS
|
|
1041
1013
|
// =============================================================================
|
|
@@ -20,6 +20,16 @@ export class DatabaseConnectionManager {
|
|
|
20
20
|
static async getConnection(dbPath) {
|
|
21
21
|
const normalizedPath = this.normalizePath(dbPath);
|
|
22
22
|
let connectionInfo = this.connections.get(normalizedPath);
|
|
23
|
+
// Check if cached connection exists but database file was deleted
|
|
24
|
+
if (connectionInfo && !connectionInfo.isClosing) {
|
|
25
|
+
const { existsSync } = await import('fs');
|
|
26
|
+
if (!existsSync(normalizedPath)) {
|
|
27
|
+
// Database file was deleted - invalidate cached connection
|
|
28
|
+
console.log(`🔄 Database file deleted, invalidating cached connection: ${normalizedPath}`);
|
|
29
|
+
await this.forceCloseConnection(normalizedPath);
|
|
30
|
+
connectionInfo = undefined; // Force creation of new connection
|
|
31
|
+
}
|
|
32
|
+
}
|
|
23
33
|
if (!connectionInfo || connectionInfo.isClosing) {
|
|
24
34
|
// Create new connection
|
|
25
35
|
const connection = await openDatabase(dbPath);
|
package/dist/core/db.d.ts
CHANGED
|
@@ -91,38 +91,6 @@ export declare function getSystemInfo(connection: DatabaseConnection): Promise<S
|
|
|
91
91
|
* @param systemInfo - SystemInfo object to store
|
|
92
92
|
*/
|
|
93
93
|
export declare function setSystemInfo(connection: DatabaseConnection, systemInfo: Partial<SystemInfo>): Promise<void>;
|
|
94
|
-
/**
|
|
95
|
-
* @deprecated Use getSystemInfo() instead. This function is kept for existing code compatibility.
|
|
96
|
-
* Gets the current model version from system_info table
|
|
97
|
-
* @param connection - Database connection object
|
|
98
|
-
* @returns Promise that resolves to the model version string or null if not set
|
|
99
|
-
*/
|
|
100
|
-
export declare function getModelVersion(connection: DatabaseConnection): Promise<string | null>;
|
|
101
|
-
/**
|
|
102
|
-
* @deprecated Use setSystemInfo() instead. This function is kept for existing code compatibility.
|
|
103
|
-
* Sets the model version in system_info table
|
|
104
|
-
* @param connection - Database connection object
|
|
105
|
-
* @param modelVersion - Model version string to store
|
|
106
|
-
*/
|
|
107
|
-
export declare function setModelVersion(connection: DatabaseConnection, modelVersion: string): Promise<void>;
|
|
108
|
-
/**
|
|
109
|
-
* @deprecated Use getSystemInfo() instead. This function is kept for existing code compatibility.
|
|
110
|
-
* Gets the stored model information from system_info table
|
|
111
|
-
* @param connection - Database connection object
|
|
112
|
-
* @returns Promise that resolves to model info object or null if not set
|
|
113
|
-
*/
|
|
114
|
-
export declare function getStoredModelInfo(connection: DatabaseConnection): Promise<{
|
|
115
|
-
modelName: string;
|
|
116
|
-
dimensions: number;
|
|
117
|
-
} | null>;
|
|
118
|
-
/**
|
|
119
|
-
* @deprecated Use setSystemInfo() instead. This function is kept for existing code compatibility.
|
|
120
|
-
* Sets the model information in system_info table
|
|
121
|
-
* @param connection - Database connection object
|
|
122
|
-
* @param modelName - Name of the embedding model
|
|
123
|
-
* @param dimensions - Number of dimensions for the model
|
|
124
|
-
*/
|
|
125
|
-
export declare function setStoredModelInfo(connection: DatabaseConnection, modelName: string, dimensions: number): Promise<void>;
|
|
126
94
|
/**
|
|
127
95
|
* Retrieves documents by content type
|
|
128
96
|
* @param connection - Database connection object
|
package/dist/core/db.js
CHANGED
|
@@ -516,74 +516,17 @@ export async function setSystemInfo(connection, systemInfo) {
|
|
|
516
516
|
throw new Error(`Failed to set system info: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
517
517
|
}
|
|
518
518
|
}
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
catch (error) {
|
|
531
|
-
throw new Error(`Failed to get model version: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
532
|
-
}
|
|
533
|
-
}
|
|
534
|
-
/**
|
|
535
|
-
* @deprecated Use setSystemInfo() instead. This function is kept for existing code compatibility.
|
|
536
|
-
* Sets the model version in system_info table
|
|
537
|
-
* @param connection - Database connection object
|
|
538
|
-
* @param modelVersion - Model version string to store
|
|
539
|
-
*/
|
|
540
|
-
export async function setModelVersion(connection, modelVersion) {
|
|
541
|
-
try {
|
|
542
|
-
await setSystemInfo(connection, { modelVersion });
|
|
543
|
-
}
|
|
544
|
-
catch (error) {
|
|
545
|
-
throw new Error(`Failed to set model version: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
546
|
-
}
|
|
547
|
-
}
|
|
548
|
-
/**
|
|
549
|
-
* @deprecated Use getSystemInfo() instead. This function is kept for existing code compatibility.
|
|
550
|
-
* Gets the stored model information from system_info table
|
|
551
|
-
* @param connection - Database connection object
|
|
552
|
-
* @returns Promise that resolves to model info object or null if not set
|
|
553
|
-
*/
|
|
554
|
-
export async function getStoredModelInfo(connection) {
|
|
555
|
-
try {
|
|
556
|
-
const systemInfo = await getSystemInfo(connection);
|
|
557
|
-
if (!systemInfo || !systemInfo.modelName || !systemInfo.modelDimensions) {
|
|
558
|
-
return null;
|
|
559
|
-
}
|
|
560
|
-
return {
|
|
561
|
-
modelName: systemInfo.modelName,
|
|
562
|
-
dimensions: systemInfo.modelDimensions
|
|
563
|
-
};
|
|
564
|
-
}
|
|
565
|
-
catch (error) {
|
|
566
|
-
throw new Error(`Failed to get stored model info: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
567
|
-
}
|
|
568
|
-
}
|
|
569
|
-
/**
|
|
570
|
-
* @deprecated Use setSystemInfo() instead. This function is kept for existing code compatibility.
|
|
571
|
-
* Sets the model information in system_info table
|
|
572
|
-
* @param connection - Database connection object
|
|
573
|
-
* @param modelName - Name of the embedding model
|
|
574
|
-
* @param dimensions - Number of dimensions for the model
|
|
575
|
-
*/
|
|
576
|
-
export async function setStoredModelInfo(connection, modelName, dimensions) {
|
|
577
|
-
try {
|
|
578
|
-
await setSystemInfo(connection, {
|
|
579
|
-
modelName,
|
|
580
|
-
modelDimensions: dimensions
|
|
581
|
-
});
|
|
582
|
-
}
|
|
583
|
-
catch (error) {
|
|
584
|
-
throw new Error(`Failed to set stored model info: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
585
|
-
}
|
|
586
|
-
}
|
|
519
|
+
// =============================================================================
|
|
520
|
+
// REMOVED IN v3.0.0: Legacy database functions
|
|
521
|
+
// =============================================================================
|
|
522
|
+
// The following functions have been removed. Use getSystemInfo() and setSystemInfo() instead:
|
|
523
|
+
//
|
|
524
|
+
// - getModelVersion() → Use: const systemInfo = await getSystemInfo(db); const version = systemInfo?.modelVersion;
|
|
525
|
+
// - setModelVersion() → Use: await setSystemInfo(db, { modelVersion: 'version' });
|
|
526
|
+
// - getStoredModelInfo() → Use: const systemInfo = await getSystemInfo(db); access systemInfo.modelName and systemInfo.modelDimensions
|
|
527
|
+
// - setStoredModelInfo() → Use: await setSystemInfo(db, { modelName: 'name', modelDimensions: 384 });
|
|
528
|
+
//
|
|
529
|
+
// Migration guide: See CHANGELOG.md for v3.0.0 breaking changes
|
|
587
530
|
/**
|
|
588
531
|
* Retrieves documents by content type
|
|
589
532
|
* @param connection - Database connection object
|
|
@@ -151,26 +151,4 @@ export declare function listAvailableModels(): Array<{
|
|
|
151
151
|
supportedContentTypes: readonly string[];
|
|
152
152
|
memoryRequirement: number | undefined;
|
|
153
153
|
}>;
|
|
154
|
-
/**
|
|
155
|
-
* @deprecated Use createEmbedder() instead
|
|
156
|
-
* Legacy factory-style interface for backward compatibility
|
|
157
|
-
*/
|
|
158
|
-
export declare const UniversalEmbedderFactory: {
|
|
159
|
-
/**
|
|
160
|
-
* @deprecated Use createEmbedder() instead
|
|
161
|
-
*/
|
|
162
|
-
readonly create: (modelName: string, options?: EmbedderCreationOptions) => Promise<UniversalEmbedder>;
|
|
163
|
-
/**
|
|
164
|
-
* @deprecated Use ModelRegistry.validateModel() instead
|
|
165
|
-
*/
|
|
166
|
-
readonly validateModel: (modelName: string) => import("./universal-embedder.js").ModelValidationResult;
|
|
167
|
-
/**
|
|
168
|
-
* @deprecated Use ModelRegistry.getModelInfo() instead
|
|
169
|
-
*/
|
|
170
|
-
readonly getModelInfo: (modelName: string) => import("./universal-embedder.js").ModelInfo | null;
|
|
171
|
-
/**
|
|
172
|
-
* @deprecated Use ModelRegistry.getSupportedModels() instead
|
|
173
|
-
*/
|
|
174
|
-
readonly getSupportedModels: (modelType?: ModelType) => string[];
|
|
175
|
-
};
|
|
176
154
|
//# sourceMappingURL=embedder-factory.d.ts.map
|
|
@@ -299,40 +299,13 @@ export function listAvailableModels() {
|
|
|
299
299
|
});
|
|
300
300
|
}
|
|
301
301
|
// =============================================================================
|
|
302
|
-
// BACKWARD COMPATIBILITY
|
|
302
|
+
// BACKWARD COMPATIBILITY - REMOVED
|
|
303
303
|
// =============================================================================
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
*/
|
|
312
|
-
async create(modelName, options) {
|
|
313
|
-
console.warn('UniversalEmbedderFactory.create() is deprecated. Use createEmbedder() instead.');
|
|
314
|
-
return createEmbedder(modelName, options);
|
|
315
|
-
},
|
|
316
|
-
/**
|
|
317
|
-
* @deprecated Use ModelRegistry.validateModel() instead
|
|
318
|
-
*/
|
|
319
|
-
validateModel(modelName) {
|
|
320
|
-
console.warn('UniversalEmbedderFactory.validateModel() is deprecated. Use ModelRegistry.validateModel() instead.');
|
|
321
|
-
return ModelRegistry.validateModel(modelName);
|
|
322
|
-
},
|
|
323
|
-
/**
|
|
324
|
-
* @deprecated Use ModelRegistry.getModelInfo() instead
|
|
325
|
-
*/
|
|
326
|
-
getModelInfo(modelName) {
|
|
327
|
-
console.warn('UniversalEmbedderFactory.getModelInfo() is deprecated. Use ModelRegistry.getModelInfo() instead.');
|
|
328
|
-
return ModelRegistry.getModelInfo(modelName);
|
|
329
|
-
},
|
|
330
|
-
/**
|
|
331
|
-
* @deprecated Use ModelRegistry.getSupportedModels() instead
|
|
332
|
-
*/
|
|
333
|
-
getSupportedModels(modelType) {
|
|
334
|
-
console.warn('UniversalEmbedderFactory.getSupportedModels() is deprecated. Use ModelRegistry.getSupportedModels() instead.');
|
|
335
|
-
return ModelRegistry.getSupportedModels(modelType);
|
|
336
|
-
}
|
|
337
|
-
};
|
|
304
|
+
// The UniversalEmbedderFactory object has been removed as it was only a thin
|
|
305
|
+
// wrapper around the new API with deprecation warnings. Use the following instead:
|
|
306
|
+
//
|
|
307
|
+
// - UniversalEmbedderFactory.create() → createEmbedder()
|
|
308
|
+
// - UniversalEmbedderFactory.validateModel() → ModelRegistry.validateModel()
|
|
309
|
+
// - UniversalEmbedderFactory.getModelInfo() → ModelRegistry.getModelInfo()
|
|
310
|
+
// - UniversalEmbedderFactory.getSupportedModels() → ModelRegistry.getSupportedModels()
|
|
338
311
|
//# sourceMappingURL=embedder-factory.js.map
|
package/dist/core/index.d.ts
CHANGED
|
@@ -23,8 +23,8 @@
|
|
|
23
23
|
* 3. Usage Patterns:
|
|
24
24
|
*
|
|
25
25
|
* // Direct dependency injection (advanced users)
|
|
26
|
-
* const embedFn =
|
|
27
|
-
* const rerankFn =
|
|
26
|
+
* const embedFn = createTextEmbedFunction();
|
|
27
|
+
* const rerankFn = createTextRerankFunction();
|
|
28
28
|
* const indexManager = new IndexManager('./index.bin');
|
|
29
29
|
* const db = await openDatabase('./db.sqlite');
|
|
30
30
|
* const search = new SearchEngine(embedFn, indexManager, db, rerankFn);
|
|
@@ -47,7 +47,7 @@ export { type ContentDocument, type ContentChunk, type Document, type Chunk, typ
|
|
|
47
47
|
export { type EmbedFunction, type RerankFunction, type EmbeddingQueryInterface, type RerankingInterface, type SearchEngineConfig, type ContentTypeStrategy, type ModelAgnosticInterface, type ExtendedEmbeddingInterface, type ExtendedRerankingInterface, type SearchPipelineInterface, type SearchDependencyFactory, InterfaceValidator } from './interfaces.js';
|
|
48
48
|
export * from './adapters.js';
|
|
49
49
|
export * from './config.js';
|
|
50
|
-
export { type DatabaseConnection, type ContentMetadata, openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds,
|
|
50
|
+
export { type DatabaseConnection, type ContentMetadata, openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, insertContentMetadata, getContentMetadata, getContentMetadataByHash, getContentMetadataByStorageType, deleteContentMetadata, getStorageStats, updateStorageStats } from './db.js';
|
|
51
51
|
export { type VectorIndexOptions, VectorIndex } from './vector-index.js';
|
|
52
52
|
export { type ChunkConfig, type GenericDocument, type GenericChunk, type ChunkingStrategy, ChunkingStrategyRegistry, DEFAULT_CHUNK_CONFIG, chunkingRegistry, chunkGenericDocument, registerTextChunkingStrategy } from './chunker.js';
|
|
53
53
|
export * from './search.js';
|
package/dist/core/index.js
CHANGED
|
@@ -23,8 +23,8 @@
|
|
|
23
23
|
* 3. Usage Patterns:
|
|
24
24
|
*
|
|
25
25
|
* // Direct dependency injection (advanced users)
|
|
26
|
-
* const embedFn =
|
|
27
|
-
* const rerankFn =
|
|
26
|
+
* const embedFn = createTextEmbedFunction();
|
|
27
|
+
* const rerankFn = createTextRerankFunction();
|
|
28
28
|
* const indexManager = new IndexManager('./index.bin');
|
|
29
29
|
* const db = await openDatabase('./db.sqlite');
|
|
30
30
|
* const search = new SearchEngine(embedFn, indexManager, db, rerankFn);
|
|
@@ -50,7 +50,7 @@ export * from './adapters.js';
|
|
|
50
50
|
// Core configuration management - model-agnostic settings
|
|
51
51
|
export * from './config.js';
|
|
52
52
|
// Database operations - supports different content types through metadata
|
|
53
|
-
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds,
|
|
53
|
+
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, insertContentMetadata, getContentMetadata, getContentMetadataByHash, getContentMetadataByStorageType, deleteContentMetadata, getStorageStats, updateStorageStats } from './db.js';
|
|
54
54
|
// Vector index operations - works with any embedding dimensions
|
|
55
55
|
export { VectorIndex } from './vector-index.js';
|
|
56
56
|
// Generic chunking interfaces and strategies - supports text, image metadata, etc.
|
package/dist/core/ingestion.d.ts
CHANGED
|
@@ -92,7 +92,7 @@ export declare class IngestionPipeline {
|
|
|
92
92
|
* USAGE EXAMPLES:
|
|
93
93
|
* ```typescript
|
|
94
94
|
* // Text-only ingestion pipeline with unified content system
|
|
95
|
-
* const textEmbedFn =
|
|
95
|
+
* const textEmbedFn = createTextEmbedFunction();
|
|
96
96
|
* const indexManager = new IndexManager('./index.bin');
|
|
97
97
|
* const db = await openDatabase('./db.sqlite');
|
|
98
98
|
* const contentManager = new ContentManager(db);
|
|
@@ -150,32 +150,17 @@ export declare class IngestionPipeline {
|
|
|
150
150
|
* Enhanced to handle different content types appropriately
|
|
151
151
|
*/
|
|
152
152
|
private chunkDocumentsWithContentTypes;
|
|
153
|
-
/**
|
|
154
|
-
* Chunk all documents and organize results (legacy method for backward compatibility)
|
|
155
|
-
* @deprecated Use chunkDocumentsWithContentTypes for multimodal support
|
|
156
|
-
*/
|
|
157
|
-
private chunkDocuments;
|
|
158
153
|
/**
|
|
159
154
|
* Generate embeddings for all chunks with content-type support
|
|
160
155
|
* Enhanced to handle different content types and pass metadata to embedding function
|
|
161
156
|
*/
|
|
162
157
|
private generateEmbeddingsWithContentTypes;
|
|
163
|
-
/**
|
|
164
|
-
* Generate embeddings for all chunks with error handling (legacy method for backward compatibility)
|
|
165
|
-
* @deprecated Use generateEmbeddingsWithContentTypes for multimodal support
|
|
166
|
-
*/
|
|
167
|
-
private generateEmbeddings;
|
|
168
158
|
/**
|
|
169
159
|
* Store documents and chunks in database with content-type support
|
|
170
160
|
* Enhanced to handle content type metadata and multimodal content
|
|
171
161
|
* @returns Array of content IDs for successfully stored documents
|
|
172
162
|
*/
|
|
173
163
|
private storeDocumentsAndChunksWithContentTypes;
|
|
174
|
-
/**
|
|
175
|
-
* Store documents and chunks in database (legacy method for backward compatibility)
|
|
176
|
-
* @deprecated Use storeDocumentsAndChunksWithContentTypes for multimodal support
|
|
177
|
-
*/
|
|
178
|
-
private storeDocumentsAndChunks;
|
|
179
164
|
/**
|
|
180
165
|
* Update vector index with new embeddings
|
|
181
166
|
*/
|