rag-lite-ts 2.0.3 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/indexer.js +4 -4
- package/dist/cli/search.js +3 -3
- package/dist/cli.js +31 -4
- package/dist/config.js +3 -0
- package/dist/core/actionable-error-messages.js +3 -3
- package/dist/core/content-manager.d.ts +0 -8
- package/dist/core/content-manager.js +2 -30
- package/dist/core/database-connection-manager.js +15 -9
- package/dist/core/db.d.ts +0 -32
- package/dist/core/db.js +11 -68
- package/dist/core/embedder-factory.d.ts +0 -22
- package/dist/core/embedder-factory.js +8 -35
- package/dist/core/index.d.ts +3 -3
- package/dist/core/index.js +3 -3
- package/dist/core/ingestion.d.ts +1 -16
- package/dist/core/ingestion.js +4 -30
- package/dist/core/interfaces.d.ts +1 -1
- package/dist/core/interfaces.js +1 -1
- package/dist/core/model-registry.d.ts +0 -4
- package/dist/core/model-registry.js +5 -9
- package/dist/core/search.d.ts +2 -2
- package/dist/core/search.js +2 -2
- package/dist/factories/index.d.ts +11 -29
- package/dist/factories/index.js +12 -29
- package/dist/factories/ingestion-factory.d.ts +200 -0
- package/dist/factories/ingestion-factory.js +475 -0
- package/dist/{core/polymorphic-search-factory.d.ts → factories/search-factory.d.ts} +7 -7
- package/dist/{core/polymorphic-search-factory.js → factories/search-factory.js} +22 -22
- package/dist/index-manager.js +25 -14
- package/dist/index.d.ts +5 -30
- package/dist/index.js +9 -24
- package/dist/ingestion.d.ts +2 -4
- package/dist/ingestion.js +2 -2
- package/dist/mcp-server.js +34 -30
- package/dist/search.js +2 -2
- package/dist/text/embedder.d.ts +0 -11
- package/dist/text/embedder.js +11 -22
- package/dist/text/index.d.ts +2 -2
- package/dist/text/index.js +2 -2
- package/dist/text/reranker.d.ts +0 -10
- package/dist/text/reranker.js +10 -33
- package/package.json +105 -101
- package/dist/factories/polymorphic-factory.d.ts +0 -50
- package/dist/factories/polymorphic-factory.js +0 -159
- package/dist/factories/text-factory.d.ts +0 -560
- package/dist/factories/text-factory.js +0 -982
package/dist/cli/indexer.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { existsSync, statSync } from 'fs';
|
|
2
2
|
import { resolve } from 'path';
|
|
3
|
-
import {
|
|
3
|
+
import { IngestionFactory } from '../factories/ingestion-factory.js';
|
|
4
4
|
import { withCLIDatabaseAccess, setupCLICleanup, isDatabaseBusy } from '../core/cli-database-utils.js';
|
|
5
5
|
import { EXIT_CODES, ConfigurationError } from '../core/config.js';
|
|
6
6
|
/**
|
|
@@ -228,8 +228,8 @@ export async function runIngest(path, options = {}) {
|
|
|
228
228
|
// Create ingestion pipeline using factory
|
|
229
229
|
let pipeline;
|
|
230
230
|
try {
|
|
231
|
-
// Create ingestion pipeline using
|
|
232
|
-
pipeline = await withCLIDatabaseAccess(dbPath, () =>
|
|
231
|
+
// Create ingestion pipeline using IngestionFactory with database protection
|
|
232
|
+
pipeline = await withCLIDatabaseAccess(dbPath, () => IngestionFactory.create(dbPath, indexPath, factoryOptions), {
|
|
233
233
|
commandName: 'Ingestion command',
|
|
234
234
|
showProgress: true,
|
|
235
235
|
maxWaitMs: 15000 // Longer timeout for ingestion
|
|
@@ -379,7 +379,7 @@ export async function runRebuild() {
|
|
|
379
379
|
}
|
|
380
380
|
}
|
|
381
381
|
// Create ingestion pipeline with force rebuild using factory
|
|
382
|
-
const pipeline = await
|
|
382
|
+
const pipeline = await IngestionFactory.create(dbPath, indexPath, rebuildOptions);
|
|
383
383
|
try {
|
|
384
384
|
// Get all documents from database and re-ingest them
|
|
385
385
|
const { openDatabase } = await import('../core/db.js');
|
package/dist/cli/search.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { existsSync } from 'fs';
|
|
2
|
-
import {
|
|
2
|
+
import { SearchFactory } from '../factories/search-factory.js';
|
|
3
3
|
import { withCLIDatabaseAccess, setupCLICleanup } from '../core/cli-database-utils.js';
|
|
4
4
|
import { config, EXIT_CODES, ConfigurationError } from '../core/config.js';
|
|
5
5
|
/**
|
|
@@ -60,8 +60,8 @@ export async function runSearch(query, options = {}) {
|
|
|
60
60
|
// Initialize search engine using polymorphic factory with database protection
|
|
61
61
|
let searchEngine;
|
|
62
62
|
try {
|
|
63
|
-
// Create search engine using
|
|
64
|
-
searchEngine = await withCLIDatabaseAccess(effectiveConfig.db_file, () =>
|
|
63
|
+
// Create search engine using SearchFactory (auto-detects mode)
|
|
64
|
+
searchEngine = await withCLIDatabaseAccess(effectiveConfig.db_file, () => SearchFactory.create(effectiveConfig.index_file, effectiveConfig.db_file), {
|
|
65
65
|
commandName: 'Search command',
|
|
66
66
|
showProgress: true
|
|
67
67
|
});
|
package/dist/cli.js
CHANGED
|
@@ -8,6 +8,12 @@ const __filename = fileURLToPath(import.meta.url);
|
|
|
8
8
|
const __dirname = dirname(__filename);
|
|
9
9
|
const packageJsonPath = join(__dirname, '..', 'package.json');
|
|
10
10
|
const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
|
11
|
+
/**
|
|
12
|
+
* Display version information
|
|
13
|
+
*/
|
|
14
|
+
function showVersion() {
|
|
15
|
+
console.log(`RAG-lite TS v${packageJson.version}`);
|
|
16
|
+
}
|
|
11
17
|
/**
|
|
12
18
|
* Display help information
|
|
13
19
|
*/
|
|
@@ -23,6 +29,7 @@ Commands:
|
|
|
23
29
|
ingest <path> Ingest documents from file or directory
|
|
24
30
|
search <query> Search indexed documents
|
|
25
31
|
rebuild Rebuild the vector index
|
|
32
|
+
version Show version information
|
|
26
33
|
help Show this help message
|
|
27
34
|
|
|
28
35
|
Examples:
|
|
@@ -57,7 +64,8 @@ Available models:
|
|
|
57
64
|
sentence-transformers/all-MiniLM-L6-v2 (384 dim, fast, default)
|
|
58
65
|
Xenova/all-mpnet-base-v2 (768 dim, higher quality)
|
|
59
66
|
Multimodal mode:
|
|
60
|
-
Xenova/clip-vit-base-patch32 (512 dim,
|
|
67
|
+
Xenova/clip-vit-base-patch32 (512 dim, faster, default)
|
|
68
|
+
Xenova/clip-vit-base-patch16 (512 dim, more accurate, slower)
|
|
61
69
|
|
|
62
70
|
Available reranking strategies (multimodal mode):
|
|
63
71
|
text-derived Use image-to-text conversion + cross-encoder (default)
|
|
@@ -75,6 +83,13 @@ function parseArgs() {
|
|
|
75
83
|
if (args.length === 0) {
|
|
76
84
|
return { command: 'help', args: [], options: {} };
|
|
77
85
|
}
|
|
86
|
+
// Handle --version and --help flags at the top level
|
|
87
|
+
if (args[0] === '--version' || args[0] === '-v') {
|
|
88
|
+
return { command: 'version', args: [], options: {} };
|
|
89
|
+
}
|
|
90
|
+
if (args[0] === '--help' || args[0] === '-h') {
|
|
91
|
+
return { command: 'help', args: [], options: {} };
|
|
92
|
+
}
|
|
78
93
|
const command = args[0];
|
|
79
94
|
const remainingArgs = [];
|
|
80
95
|
const options = {};
|
|
@@ -96,6 +111,9 @@ function parseArgs() {
|
|
|
96
111
|
else if (optionName === 'help') {
|
|
97
112
|
return { command: 'help', args: [], options: {} };
|
|
98
113
|
}
|
|
114
|
+
else if (optionName === 'version') {
|
|
115
|
+
return { command: 'version', args: [], options: {} };
|
|
116
|
+
}
|
|
99
117
|
else {
|
|
100
118
|
// Handle options with values
|
|
101
119
|
const nextArg = args[i + 1];
|
|
@@ -165,13 +183,16 @@ function validateArgs(command, args, options) {
|
|
|
165
183
|
case 'rebuild':
|
|
166
184
|
// No arguments required
|
|
167
185
|
break;
|
|
186
|
+
case 'version':
|
|
187
|
+
// No validation needed
|
|
188
|
+
break;
|
|
168
189
|
case 'help':
|
|
169
190
|
// No validation needed
|
|
170
191
|
break;
|
|
171
192
|
default:
|
|
172
193
|
console.error(`Error: Unknown command '${command}'`);
|
|
173
194
|
console.error('');
|
|
174
|
-
console.error('Available commands: ingest, search, rebuild, help');
|
|
195
|
+
console.error('Available commands: ingest, search, rebuild, version, help');
|
|
175
196
|
console.error('Run "raglite help" for detailed usage information');
|
|
176
197
|
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
177
198
|
}
|
|
@@ -302,7 +323,8 @@ function validateArgs(command, args, options) {
|
|
|
302
323
|
'Xenova/all-mpnet-base-v2'
|
|
303
324
|
];
|
|
304
325
|
const multimodalModels = [
|
|
305
|
-
'Xenova/clip-vit-base-patch32'
|
|
326
|
+
'Xenova/clip-vit-base-patch32',
|
|
327
|
+
'Xenova/clip-vit-base-patch16'
|
|
306
328
|
];
|
|
307
329
|
let supportedModels;
|
|
308
330
|
let modelTypeDescription;
|
|
@@ -324,7 +346,8 @@ function validateArgs(command, args, options) {
|
|
|
324
346
|
}
|
|
325
347
|
else {
|
|
326
348
|
console.error('Supported models for multimodal mode:');
|
|
327
|
-
console.error(' Xenova/clip-vit-base-patch32 (512 dim,
|
|
349
|
+
console.error(' Xenova/clip-vit-base-patch32 (512 dim, faster, default)');
|
|
350
|
+
console.error(' Xenova/clip-vit-base-patch16 (512 dim, more accurate, slower)');
|
|
328
351
|
}
|
|
329
352
|
console.error('');
|
|
330
353
|
console.error('Examples:');
|
|
@@ -334,6 +357,7 @@ function validateArgs(command, args, options) {
|
|
|
334
357
|
}
|
|
335
358
|
else {
|
|
336
359
|
console.error(' --model Xenova/clip-vit-base-patch32 --mode multimodal');
|
|
360
|
+
console.error(' --model Xenova/clip-vit-base-patch16 --mode multimodal');
|
|
337
361
|
}
|
|
338
362
|
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
339
363
|
}
|
|
@@ -386,6 +410,9 @@ async function main() {
|
|
|
386
410
|
validateArgs(command, args, options);
|
|
387
411
|
// Handle commands
|
|
388
412
|
switch (command) {
|
|
413
|
+
case 'version':
|
|
414
|
+
showVersion();
|
|
415
|
+
break;
|
|
389
416
|
case 'help':
|
|
390
417
|
showHelp();
|
|
391
418
|
break;
|
package/dist/config.js
CHANGED
|
@@ -3,6 +3,9 @@
|
|
|
3
3
|
* Extends core configuration with implementation-specific properties
|
|
4
4
|
*/
|
|
5
5
|
import { getDefaultModelCachePath } from './core/config.js';
|
|
6
|
+
import { createRequire } from 'module';
|
|
7
|
+
// Create require for CommonJS modules in ES module context
|
|
8
|
+
const require = createRequire(import.meta.url);
|
|
6
9
|
/**
|
|
7
10
|
* Default configuration object with both core and text-specific settings
|
|
8
11
|
*/
|
|
@@ -56,8 +56,8 @@ export function createMissingFileError(filePath, fileType, config = {}) {
|
|
|
56
56
|
messages.push(' 2. Or create an ingestion pipeline programmatically:');
|
|
57
57
|
if (cfg.includeExamples) {
|
|
58
58
|
messages.push(' ```typescript');
|
|
59
|
-
messages.push(' import {
|
|
60
|
-
messages.push(' const pipeline = await
|
|
59
|
+
messages.push(' import { IngestionFactory } from "rag-lite-ts";');
|
|
60
|
+
messages.push(' const pipeline = await IngestionFactory.create(');
|
|
61
61
|
messages.push(` "${filePath.endsWith('.bin') ? filePath.replace('.bin', '.sqlite') : filePath}",`);
|
|
62
62
|
messages.push(` "${filePath.endsWith('.sqlite') ? filePath.replace('.sqlite', '.bin') : filePath}"`);
|
|
63
63
|
messages.push(' );');
|
|
@@ -216,7 +216,7 @@ export function createModeMismatchError(expectedMode, actualMode, config = {}) {
|
|
|
216
216
|
messages.push(' 3. Or create a new database for the different mode:');
|
|
217
217
|
if (cfg.includeExamples) {
|
|
218
218
|
messages.push(' ```typescript');
|
|
219
|
-
messages.push(' const pipeline = await
|
|
219
|
+
messages.push(' const pipeline = await IngestionFactory.create(');
|
|
220
220
|
messages.push(' "./new-database.sqlite",');
|
|
221
221
|
messages.push(' "./new-index.bin",');
|
|
222
222
|
messages.push(` { mode: "${actualMode}" }`);
|
|
@@ -189,14 +189,6 @@ export declare class ContentManager {
|
|
|
189
189
|
* @returns Promise that resolves when directory is created
|
|
190
190
|
*/
|
|
191
191
|
private ensureContentDirectory;
|
|
192
|
-
/**
|
|
193
|
-
* Writes content to file atomically to prevent partial writes
|
|
194
|
-
* @param filePath - Path to write to
|
|
195
|
-
* @param content - Content to write
|
|
196
|
-
* @returns Promise that resolves when write is complete
|
|
197
|
-
* @deprecated Use writeFileAtomic from resource-cleanup.ts for better resource management
|
|
198
|
-
*/
|
|
199
|
-
private writeContentAtomic;
|
|
200
192
|
/**
|
|
201
193
|
* Gets comprehensive storage statistics for monitoring and reporting
|
|
202
194
|
* @returns Promise that resolves to detailed storage statistics
|
|
@@ -5,9 +5,9 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import { createHash } from 'crypto';
|
|
7
7
|
import { promises as fs } from 'fs';
|
|
8
|
-
import { join,
|
|
8
|
+
import { join, extname, basename } from 'path';
|
|
9
9
|
import { insertContentMetadata, getContentMetadataByHash, getStorageStats, updateStorageStats, getContentMetadataByStorageType, deleteContentMetadata } from './db.js';
|
|
10
|
-
import { ContentIngestionError,
|
|
10
|
+
import { ContentIngestionError, StorageLimitExceededError, InvalidContentFormatError, ContentErrorHandler } from './content-errors.js';
|
|
11
11
|
import { globalResourceCleanup, withResourceCleanup, writeFileAtomic, withTimeout, SafeBuffer } from './resource-cleanup.js';
|
|
12
12
|
import { createStreamingOperations, formatBytes, formatProcessingTime } from './streaming-operations.js';
|
|
13
13
|
import { createContentPerformanceOptimizer, formatCacheHitRate } from './content-performance-optimizer.js';
|
|
@@ -1008,34 +1008,6 @@ export class ContentManager {
|
|
|
1008
1008
|
throw new Error(`Failed to create content directory: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1009
1009
|
}
|
|
1010
1010
|
}
|
|
1011
|
-
/**
|
|
1012
|
-
* Writes content to file atomically to prevent partial writes
|
|
1013
|
-
* @param filePath - Path to write to
|
|
1014
|
-
* @param content - Content to write
|
|
1015
|
-
* @returns Promise that resolves when write is complete
|
|
1016
|
-
* @deprecated Use writeFileAtomic from resource-cleanup.ts for better resource management
|
|
1017
|
-
*/
|
|
1018
|
-
async writeContentAtomic(filePath, content) {
|
|
1019
|
-
const tempPath = `${filePath}.tmp.${Date.now()}`;
|
|
1020
|
-
try {
|
|
1021
|
-
// Ensure directory exists
|
|
1022
|
-
await fs.mkdir(dirname(filePath), { recursive: true });
|
|
1023
|
-
// Write to temporary file first
|
|
1024
|
-
await fs.writeFile(tempPath, content);
|
|
1025
|
-
// Atomically move to final location
|
|
1026
|
-
await fs.rename(tempPath, filePath);
|
|
1027
|
-
}
|
|
1028
|
-
catch (error) {
|
|
1029
|
-
// Clean up temporary file if it exists
|
|
1030
|
-
try {
|
|
1031
|
-
await fs.unlink(tempPath);
|
|
1032
|
-
}
|
|
1033
|
-
catch {
|
|
1034
|
-
// Ignore cleanup errors
|
|
1035
|
-
}
|
|
1036
|
-
throw new ContentDirectoryError('atomic write', `Failed to write content atomically: ${error instanceof Error ? error.message : 'Unknown error'}`, 'file_write');
|
|
1037
|
-
}
|
|
1038
|
-
}
|
|
1039
1011
|
// =============================================================================
|
|
1040
1012
|
// CONTENT DIRECTORY MANAGEMENT METHODS
|
|
1041
1013
|
// =============================================================================
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
* Addresses production issues with MCP server + CLI concurrent usage
|
|
5
5
|
*/
|
|
6
6
|
import { openDatabase } from './db.js';
|
|
7
|
+
import { resolve as pathResolve } from 'node:path';
|
|
7
8
|
/**
|
|
8
9
|
* Database Connection Manager
|
|
9
10
|
* Manages shared database connections to prevent locking issues
|
|
@@ -20,6 +21,16 @@ export class DatabaseConnectionManager {
|
|
|
20
21
|
static async getConnection(dbPath) {
|
|
21
22
|
const normalizedPath = this.normalizePath(dbPath);
|
|
22
23
|
let connectionInfo = this.connections.get(normalizedPath);
|
|
24
|
+
// Check if cached connection exists but database file was deleted
|
|
25
|
+
if (connectionInfo && !connectionInfo.isClosing) {
|
|
26
|
+
const { existsSync } = await import('fs');
|
|
27
|
+
if (!existsSync(normalizedPath)) {
|
|
28
|
+
// Database file was deleted - invalidate cached connection
|
|
29
|
+
console.log(`🔄 Database file deleted, invalidating cached connection: ${normalizedPath}`);
|
|
30
|
+
await this.forceCloseConnection(normalizedPath);
|
|
31
|
+
connectionInfo = undefined; // Force creation of new connection
|
|
32
|
+
}
|
|
33
|
+
}
|
|
23
34
|
if (!connectionInfo || connectionInfo.isClosing) {
|
|
24
35
|
// Create new connection
|
|
25
36
|
const connection = await openDatabase(dbPath);
|
|
@@ -176,15 +187,10 @@ export class DatabaseConnectionManager {
|
|
|
176
187
|
*/
|
|
177
188
|
static normalizePath(dbPath) {
|
|
178
189
|
// Convert to absolute path and normalize separators
|
|
179
|
-
// Use
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
}
|
|
184
|
-
catch {
|
|
185
|
-
// Fallback: simple normalization
|
|
186
|
-
return dbPath.replace(/\\/g, '/');
|
|
187
|
-
}
|
|
190
|
+
// Use Node.js path.resolve for proper relative path handling
|
|
191
|
+
const absolutePath = pathResolve(dbPath);
|
|
192
|
+
// Normalize separators for cross-platform consistency
|
|
193
|
+
return absolutePath.replace(/\\/g, '/');
|
|
188
194
|
}
|
|
189
195
|
/**
|
|
190
196
|
* Start the cleanup timer for idle connections
|
package/dist/core/db.d.ts
CHANGED
|
@@ -91,38 +91,6 @@ export declare function getSystemInfo(connection: DatabaseConnection): Promise<S
|
|
|
91
91
|
* @param systemInfo - SystemInfo object to store
|
|
92
92
|
*/
|
|
93
93
|
export declare function setSystemInfo(connection: DatabaseConnection, systemInfo: Partial<SystemInfo>): Promise<void>;
|
|
94
|
-
/**
|
|
95
|
-
* @deprecated Use getSystemInfo() instead. This function is kept for existing code compatibility.
|
|
96
|
-
* Gets the current model version from system_info table
|
|
97
|
-
* @param connection - Database connection object
|
|
98
|
-
* @returns Promise that resolves to the model version string or null if not set
|
|
99
|
-
*/
|
|
100
|
-
export declare function getModelVersion(connection: DatabaseConnection): Promise<string | null>;
|
|
101
|
-
/**
|
|
102
|
-
* @deprecated Use setSystemInfo() instead. This function is kept for existing code compatibility.
|
|
103
|
-
* Sets the model version in system_info table
|
|
104
|
-
* @param connection - Database connection object
|
|
105
|
-
* @param modelVersion - Model version string to store
|
|
106
|
-
*/
|
|
107
|
-
export declare function setModelVersion(connection: DatabaseConnection, modelVersion: string): Promise<void>;
|
|
108
|
-
/**
|
|
109
|
-
* @deprecated Use getSystemInfo() instead. This function is kept for existing code compatibility.
|
|
110
|
-
* Gets the stored model information from system_info table
|
|
111
|
-
* @param connection - Database connection object
|
|
112
|
-
* @returns Promise that resolves to model info object or null if not set
|
|
113
|
-
*/
|
|
114
|
-
export declare function getStoredModelInfo(connection: DatabaseConnection): Promise<{
|
|
115
|
-
modelName: string;
|
|
116
|
-
dimensions: number;
|
|
117
|
-
} | null>;
|
|
118
|
-
/**
|
|
119
|
-
* @deprecated Use setSystemInfo() instead. This function is kept for existing code compatibility.
|
|
120
|
-
* Sets the model information in system_info table
|
|
121
|
-
* @param connection - Database connection object
|
|
122
|
-
* @param modelName - Name of the embedding model
|
|
123
|
-
* @param dimensions - Number of dimensions for the model
|
|
124
|
-
*/
|
|
125
|
-
export declare function setStoredModelInfo(connection: DatabaseConnection, modelName: string, dimensions: number): Promise<void>;
|
|
126
94
|
/**
|
|
127
95
|
* Retrieves documents by content type
|
|
128
96
|
* @param connection - Database connection object
|
package/dist/core/db.js
CHANGED
|
@@ -516,74 +516,17 @@ export async function setSystemInfo(connection, systemInfo) {
|
|
|
516
516
|
throw new Error(`Failed to set system info: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
517
517
|
}
|
|
518
518
|
}
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
catch (error) {
|
|
531
|
-
throw new Error(`Failed to get model version: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
532
|
-
}
|
|
533
|
-
}
|
|
534
|
-
/**
|
|
535
|
-
* @deprecated Use setSystemInfo() instead. This function is kept for existing code compatibility.
|
|
536
|
-
* Sets the model version in system_info table
|
|
537
|
-
* @param connection - Database connection object
|
|
538
|
-
* @param modelVersion - Model version string to store
|
|
539
|
-
*/
|
|
540
|
-
export async function setModelVersion(connection, modelVersion) {
|
|
541
|
-
try {
|
|
542
|
-
await setSystemInfo(connection, { modelVersion });
|
|
543
|
-
}
|
|
544
|
-
catch (error) {
|
|
545
|
-
throw new Error(`Failed to set model version: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
546
|
-
}
|
|
547
|
-
}
|
|
548
|
-
/**
|
|
549
|
-
* @deprecated Use getSystemInfo() instead. This function is kept for existing code compatibility.
|
|
550
|
-
* Gets the stored model information from system_info table
|
|
551
|
-
* @param connection - Database connection object
|
|
552
|
-
* @returns Promise that resolves to model info object or null if not set
|
|
553
|
-
*/
|
|
554
|
-
export async function getStoredModelInfo(connection) {
|
|
555
|
-
try {
|
|
556
|
-
const systemInfo = await getSystemInfo(connection);
|
|
557
|
-
if (!systemInfo || !systemInfo.modelName || !systemInfo.modelDimensions) {
|
|
558
|
-
return null;
|
|
559
|
-
}
|
|
560
|
-
return {
|
|
561
|
-
modelName: systemInfo.modelName,
|
|
562
|
-
dimensions: systemInfo.modelDimensions
|
|
563
|
-
};
|
|
564
|
-
}
|
|
565
|
-
catch (error) {
|
|
566
|
-
throw new Error(`Failed to get stored model info: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
567
|
-
}
|
|
568
|
-
}
|
|
569
|
-
/**
|
|
570
|
-
* @deprecated Use setSystemInfo() instead. This function is kept for existing code compatibility.
|
|
571
|
-
* Sets the model information in system_info table
|
|
572
|
-
* @param connection - Database connection object
|
|
573
|
-
* @param modelName - Name of the embedding model
|
|
574
|
-
* @param dimensions - Number of dimensions for the model
|
|
575
|
-
*/
|
|
576
|
-
export async function setStoredModelInfo(connection, modelName, dimensions) {
|
|
577
|
-
try {
|
|
578
|
-
await setSystemInfo(connection, {
|
|
579
|
-
modelName,
|
|
580
|
-
modelDimensions: dimensions
|
|
581
|
-
});
|
|
582
|
-
}
|
|
583
|
-
catch (error) {
|
|
584
|
-
throw new Error(`Failed to set stored model info: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
585
|
-
}
|
|
586
|
-
}
|
|
519
|
+
// =============================================================================
|
|
520
|
+
// REMOVED IN v3.0.0: Legacy database functions
|
|
521
|
+
// =============================================================================
|
|
522
|
+
// The following functions have been removed. Use getSystemInfo() and setSystemInfo() instead:
|
|
523
|
+
//
|
|
524
|
+
// - getModelVersion() → Use: const systemInfo = await getSystemInfo(db); const version = systemInfo?.modelVersion;
|
|
525
|
+
// - setModelVersion() → Use: await setSystemInfo(db, { modelVersion: 'version' });
|
|
526
|
+
// - getStoredModelInfo() → Use: const systemInfo = await getSystemInfo(db); access systemInfo.modelName and systemInfo.modelDimensions
|
|
527
|
+
// - setStoredModelInfo() → Use: await setSystemInfo(db, { modelName: 'name', modelDimensions: 384 });
|
|
528
|
+
//
|
|
529
|
+
// Migration guide: See CHANGELOG.md for v3.0.0 breaking changes
|
|
587
530
|
/**
|
|
588
531
|
* Retrieves documents by content type
|
|
589
532
|
* @param connection - Database connection object
|
|
@@ -151,26 +151,4 @@ export declare function listAvailableModels(): Array<{
|
|
|
151
151
|
supportedContentTypes: readonly string[];
|
|
152
152
|
memoryRequirement: number | undefined;
|
|
153
153
|
}>;
|
|
154
|
-
/**
|
|
155
|
-
* @deprecated Use createEmbedder() instead
|
|
156
|
-
* Legacy factory-style interface for backward compatibility
|
|
157
|
-
*/
|
|
158
|
-
export declare const UniversalEmbedderFactory: {
|
|
159
|
-
/**
|
|
160
|
-
* @deprecated Use createEmbedder() instead
|
|
161
|
-
*/
|
|
162
|
-
readonly create: (modelName: string, options?: EmbedderCreationOptions) => Promise<UniversalEmbedder>;
|
|
163
|
-
/**
|
|
164
|
-
* @deprecated Use ModelRegistry.validateModel() instead
|
|
165
|
-
*/
|
|
166
|
-
readonly validateModel: (modelName: string) => import("./universal-embedder.js").ModelValidationResult;
|
|
167
|
-
/**
|
|
168
|
-
* @deprecated Use ModelRegistry.getModelInfo() instead
|
|
169
|
-
*/
|
|
170
|
-
readonly getModelInfo: (modelName: string) => import("./universal-embedder.js").ModelInfo | null;
|
|
171
|
-
/**
|
|
172
|
-
* @deprecated Use ModelRegistry.getSupportedModels() instead
|
|
173
|
-
*/
|
|
174
|
-
readonly getSupportedModels: (modelType?: ModelType) => string[];
|
|
175
|
-
};
|
|
176
154
|
//# sourceMappingURL=embedder-factory.d.ts.map
|
|
@@ -299,40 +299,13 @@ export function listAvailableModels() {
|
|
|
299
299
|
});
|
|
300
300
|
}
|
|
301
301
|
// =============================================================================
|
|
302
|
-
// BACKWARD COMPATIBILITY
|
|
302
|
+
// BACKWARD COMPATIBILITY - REMOVED
|
|
303
303
|
// =============================================================================
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
*/
|
|
312
|
-
async create(modelName, options) {
|
|
313
|
-
console.warn('UniversalEmbedderFactory.create() is deprecated. Use createEmbedder() instead.');
|
|
314
|
-
return createEmbedder(modelName, options);
|
|
315
|
-
},
|
|
316
|
-
/**
|
|
317
|
-
* @deprecated Use ModelRegistry.validateModel() instead
|
|
318
|
-
*/
|
|
319
|
-
validateModel(modelName) {
|
|
320
|
-
console.warn('UniversalEmbedderFactory.validateModel() is deprecated. Use ModelRegistry.validateModel() instead.');
|
|
321
|
-
return ModelRegistry.validateModel(modelName);
|
|
322
|
-
},
|
|
323
|
-
/**
|
|
324
|
-
* @deprecated Use ModelRegistry.getModelInfo() instead
|
|
325
|
-
*/
|
|
326
|
-
getModelInfo(modelName) {
|
|
327
|
-
console.warn('UniversalEmbedderFactory.getModelInfo() is deprecated. Use ModelRegistry.getModelInfo() instead.');
|
|
328
|
-
return ModelRegistry.getModelInfo(modelName);
|
|
329
|
-
},
|
|
330
|
-
/**
|
|
331
|
-
* @deprecated Use ModelRegistry.getSupportedModels() instead
|
|
332
|
-
*/
|
|
333
|
-
getSupportedModels(modelType) {
|
|
334
|
-
console.warn('UniversalEmbedderFactory.getSupportedModels() is deprecated. Use ModelRegistry.getSupportedModels() instead.');
|
|
335
|
-
return ModelRegistry.getSupportedModels(modelType);
|
|
336
|
-
}
|
|
337
|
-
};
|
|
304
|
+
// The UniversalEmbedderFactory object has been removed as it was only a thin
|
|
305
|
+
// wrapper around the new API with deprecation warnings. Use the following instead:
|
|
306
|
+
//
|
|
307
|
+
// - UniversalEmbedderFactory.create() → createEmbedder()
|
|
308
|
+
// - UniversalEmbedderFactory.validateModel() → ModelRegistry.validateModel()
|
|
309
|
+
// - UniversalEmbedderFactory.getModelInfo() → ModelRegistry.getModelInfo()
|
|
310
|
+
// - UniversalEmbedderFactory.getSupportedModels() → ModelRegistry.getSupportedModels()
|
|
338
311
|
//# sourceMappingURL=embedder-factory.js.map
|
package/dist/core/index.d.ts
CHANGED
|
@@ -23,8 +23,8 @@
|
|
|
23
23
|
* 3. Usage Patterns:
|
|
24
24
|
*
|
|
25
25
|
* // Direct dependency injection (advanced users)
|
|
26
|
-
* const embedFn =
|
|
27
|
-
* const rerankFn =
|
|
26
|
+
* const embedFn = createTextEmbedFunction();
|
|
27
|
+
* const rerankFn = createTextRerankFunction();
|
|
28
28
|
* const indexManager = new IndexManager('./index.bin');
|
|
29
29
|
* const db = await openDatabase('./db.sqlite');
|
|
30
30
|
* const search = new SearchEngine(embedFn, indexManager, db, rerankFn);
|
|
@@ -47,7 +47,7 @@ export { type ContentDocument, type ContentChunk, type Document, type Chunk, typ
|
|
|
47
47
|
export { type EmbedFunction, type RerankFunction, type EmbeddingQueryInterface, type RerankingInterface, type SearchEngineConfig, type ContentTypeStrategy, type ModelAgnosticInterface, type ExtendedEmbeddingInterface, type ExtendedRerankingInterface, type SearchPipelineInterface, type SearchDependencyFactory, InterfaceValidator } from './interfaces.js';
|
|
48
48
|
export * from './adapters.js';
|
|
49
49
|
export * from './config.js';
|
|
50
|
-
export { type DatabaseConnection, type ContentMetadata, openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds,
|
|
50
|
+
export { type DatabaseConnection, type ContentMetadata, openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, insertContentMetadata, getContentMetadata, getContentMetadataByHash, getContentMetadataByStorageType, deleteContentMetadata, getStorageStats, updateStorageStats } from './db.js';
|
|
51
51
|
export { type VectorIndexOptions, VectorIndex } from './vector-index.js';
|
|
52
52
|
export { type ChunkConfig, type GenericDocument, type GenericChunk, type ChunkingStrategy, ChunkingStrategyRegistry, DEFAULT_CHUNK_CONFIG, chunkingRegistry, chunkGenericDocument, registerTextChunkingStrategy } from './chunker.js';
|
|
53
53
|
export * from './search.js';
|
package/dist/core/index.js
CHANGED
|
@@ -23,8 +23,8 @@
|
|
|
23
23
|
* 3. Usage Patterns:
|
|
24
24
|
*
|
|
25
25
|
* // Direct dependency injection (advanced users)
|
|
26
|
-
* const embedFn =
|
|
27
|
-
* const rerankFn =
|
|
26
|
+
* const embedFn = createTextEmbedFunction();
|
|
27
|
+
* const rerankFn = createTextRerankFunction();
|
|
28
28
|
* const indexManager = new IndexManager('./index.bin');
|
|
29
29
|
* const db = await openDatabase('./db.sqlite');
|
|
30
30
|
* const search = new SearchEngine(embedFn, indexManager, db, rerankFn);
|
|
@@ -50,7 +50,7 @@ export * from './adapters.js';
|
|
|
50
50
|
// Core configuration management - model-agnostic settings
|
|
51
51
|
export * from './config.js';
|
|
52
52
|
// Database operations - supports different content types through metadata
|
|
53
|
-
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds,
|
|
53
|
+
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, insertContentMetadata, getContentMetadata, getContentMetadataByHash, getContentMetadataByStorageType, deleteContentMetadata, getStorageStats, updateStorageStats } from './db.js';
|
|
54
54
|
// Vector index operations - works with any embedding dimensions
|
|
55
55
|
export { VectorIndex } from './vector-index.js';
|
|
56
56
|
// Generic chunking interfaces and strategies - supports text, image metadata, etc.
|
package/dist/core/ingestion.d.ts
CHANGED
|
@@ -92,7 +92,7 @@ export declare class IngestionPipeline {
|
|
|
92
92
|
* USAGE EXAMPLES:
|
|
93
93
|
* ```typescript
|
|
94
94
|
* // Text-only ingestion pipeline with unified content system
|
|
95
|
-
* const textEmbedFn =
|
|
95
|
+
* const textEmbedFn = createTextEmbedFunction();
|
|
96
96
|
* const indexManager = new IndexManager('./index.bin');
|
|
97
97
|
* const db = await openDatabase('./db.sqlite');
|
|
98
98
|
* const contentManager = new ContentManager(db);
|
|
@@ -150,32 +150,17 @@ export declare class IngestionPipeline {
|
|
|
150
150
|
* Enhanced to handle different content types appropriately
|
|
151
151
|
*/
|
|
152
152
|
private chunkDocumentsWithContentTypes;
|
|
153
|
-
/**
|
|
154
|
-
* Chunk all documents and organize results (legacy method for backward compatibility)
|
|
155
|
-
* @deprecated Use chunkDocumentsWithContentTypes for multimodal support
|
|
156
|
-
*/
|
|
157
|
-
private chunkDocuments;
|
|
158
153
|
/**
|
|
159
154
|
* Generate embeddings for all chunks with content-type support
|
|
160
155
|
* Enhanced to handle different content types and pass metadata to embedding function
|
|
161
156
|
*/
|
|
162
157
|
private generateEmbeddingsWithContentTypes;
|
|
163
|
-
/**
|
|
164
|
-
* Generate embeddings for all chunks with error handling (legacy method for backward compatibility)
|
|
165
|
-
* @deprecated Use generateEmbeddingsWithContentTypes for multimodal support
|
|
166
|
-
*/
|
|
167
|
-
private generateEmbeddings;
|
|
168
158
|
/**
|
|
169
159
|
* Store documents and chunks in database with content-type support
|
|
170
160
|
* Enhanced to handle content type metadata and multimodal content
|
|
171
161
|
* @returns Array of content IDs for successfully stored documents
|
|
172
162
|
*/
|
|
173
163
|
private storeDocumentsAndChunksWithContentTypes;
|
|
174
|
-
/**
|
|
175
|
-
* Store documents and chunks in database (legacy method for backward compatibility)
|
|
176
|
-
* @deprecated Use storeDocumentsAndChunksWithContentTypes for multimodal support
|
|
177
|
-
*/
|
|
178
|
-
private storeDocumentsAndChunks;
|
|
179
164
|
/**
|
|
180
165
|
* Update vector index with new embeddings
|
|
181
166
|
*/
|
package/dist/core/ingestion.js
CHANGED
|
@@ -9,6 +9,9 @@ import { config } from './config.js';
|
|
|
9
9
|
import { DocumentPathManager } from './path-manager.js';
|
|
10
10
|
import { existsSync } from 'fs';
|
|
11
11
|
import { ContentManager } from './content-manager.js';
|
|
12
|
+
import { createRequire } from 'module';
|
|
13
|
+
// Create require for CommonJS modules in ES module context
|
|
14
|
+
const require = createRequire(import.meta.url);
|
|
12
15
|
/**
|
|
13
16
|
* Main ingestion pipeline class
|
|
14
17
|
* Coordinates the entire process from file discovery to vector storage
|
|
@@ -59,7 +62,7 @@ export class IngestionPipeline {
|
|
|
59
62
|
* USAGE EXAMPLES:
|
|
60
63
|
* ```typescript
|
|
61
64
|
* // Text-only ingestion pipeline with unified content system
|
|
62
|
-
* const textEmbedFn =
|
|
65
|
+
* const textEmbedFn = createTextEmbedFunction();
|
|
63
66
|
* const indexManager = new IndexManager('./index.bin');
|
|
64
67
|
* const db = await openDatabase('./db.sqlite');
|
|
65
68
|
* const contentManager = new ContentManager(db);
|
|
@@ -428,19 +431,6 @@ export class IngestionPipeline {
|
|
|
428
431
|
console.log(`✓ Chunking complete: Created ${totalChunks} chunks from ${documentChunks.length} documents`);
|
|
429
432
|
return { documentChunks, allChunks, totalChunks };
|
|
430
433
|
}
|
|
431
|
-
/**
|
|
432
|
-
* Chunk all documents and organize results (legacy method for backward compatibility)
|
|
433
|
-
* @deprecated Use chunkDocumentsWithContentTypes for multimodal support
|
|
434
|
-
*/
|
|
435
|
-
async chunkDocuments(documents, chunkConfig) {
|
|
436
|
-
const result = await this.chunkDocumentsWithContentTypes(documents, chunkConfig);
|
|
437
|
-
// Convert to legacy format for backward compatibility
|
|
438
|
-
return {
|
|
439
|
-
documentChunks: result.documentChunks,
|
|
440
|
-
allChunks: result.allChunks.map(chunk => chunk.text),
|
|
441
|
-
totalChunks: result.totalChunks
|
|
442
|
-
};
|
|
443
|
-
}
|
|
444
434
|
/**
|
|
445
435
|
* Generate embeddings for all chunks with content-type support
|
|
446
436
|
* Enhanced to handle different content types and pass metadata to embedding function
|
|
@@ -488,15 +478,6 @@ export class IngestionPipeline {
|
|
|
488
478
|
throw new Error(`Embedding generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
489
479
|
}
|
|
490
480
|
}
|
|
491
|
-
/**
|
|
492
|
-
* Generate embeddings for all chunks with error handling (legacy method for backward compatibility)
|
|
493
|
-
* @deprecated Use generateEmbeddingsWithContentTypes for multimodal support
|
|
494
|
-
*/
|
|
495
|
-
async generateEmbeddings(chunkTexts) {
|
|
496
|
-
// Convert to new format for backward compatibility
|
|
497
|
-
const chunks = chunkTexts.map(text => ({ text, contentType: 'text' }));
|
|
498
|
-
return this.generateEmbeddingsWithContentTypes(chunks);
|
|
499
|
-
}
|
|
500
481
|
/**
|
|
501
482
|
* Store documents and chunks in database with content-type support
|
|
502
483
|
* Enhanced to handle content type metadata and multimodal content
|
|
@@ -584,13 +565,6 @@ export class IngestionPipeline {
|
|
|
584
565
|
console.log(`✓ Storage complete: ${documentsStored} documents, ${totalChunksStored} chunks saved to database`);
|
|
585
566
|
return contentIds;
|
|
586
567
|
}
|
|
587
|
-
/**
|
|
588
|
-
* Store documents and chunks in database (legacy method for backward compatibility)
|
|
589
|
-
* @deprecated Use storeDocumentsAndChunksWithContentTypes for multimodal support
|
|
590
|
-
*/
|
|
591
|
-
async storeDocumentsAndChunks(documentChunks, embeddings) {
|
|
592
|
-
await this.storeDocumentsAndChunksWithContentTypes(documentChunks, embeddings);
|
|
593
|
-
}
|
|
594
568
|
/**
|
|
595
569
|
* Update vector index with new embeddings
|
|
596
570
|
*/
|