npm - rag-lite-ts - Versions diffs - 2.2.0 → 2.3.1 - Mend

rag-lite-ts 2.2.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

package/README.md +88 -5
package/dist/cjs/cli/indexer.js +73 -15
package/dist/cjs/cli/search.js +77 -2
package/dist/cjs/cli/ui-server.d.ts +5 -0
package/dist/cjs/cli/ui-server.js +152 -0
package/dist/cjs/cli.js +53 -7
package/dist/cjs/core/abstract-generator.d.ts +97 -0
package/dist/cjs/core/abstract-generator.js +222 -0
package/dist/cjs/core/binary-index-format.js +53 -10
package/dist/cjs/core/db.d.ts +56 -0
package/dist/cjs/core/db.js +105 -0
package/dist/cjs/core/generator-registry.d.ts +114 -0
package/dist/cjs/core/generator-registry.js +280 -0
package/dist/cjs/core/index.d.ts +4 -0
package/dist/cjs/core/index.js +11 -0
package/dist/cjs/core/ingestion.js +3 -0
package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
package/dist/cjs/core/knowledge-base-manager.js +256 -0
package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
package/dist/cjs/core/lazy-dependency-loader.js +111 -2
package/dist/cjs/core/prompt-templates.d.ts +138 -0
package/dist/cjs/core/prompt-templates.js +225 -0
package/dist/cjs/core/response-generator.d.ts +132 -0
package/dist/cjs/core/response-generator.js +69 -0
package/dist/cjs/core/search-pipeline.js +1 -1
package/dist/cjs/core/search.d.ts +72 -1
package/dist/cjs/core/search.js +80 -7
package/dist/cjs/core/types.d.ts +1 -0
package/dist/cjs/core/vector-index-messages.d.ts +52 -0
package/dist/cjs/core/vector-index-messages.js +5 -0
package/dist/cjs/core/vector-index-worker.d.ts +6 -0
package/dist/cjs/core/vector-index-worker.js +314 -0
package/dist/cjs/core/vector-index.d.ts +45 -10
package/dist/cjs/core/vector-index.js +279 -218
package/dist/cjs/factories/generator-factory.d.ts +88 -0
package/dist/cjs/factories/generator-factory.js +151 -0
package/dist/cjs/factories/index.d.ts +1 -0
package/dist/cjs/factories/index.js +5 -0
package/dist/cjs/factories/ingestion-factory.js +3 -7
package/dist/cjs/factories/search-factory.js +11 -0
package/dist/cjs/index-manager.d.ts +23 -3
package/dist/cjs/index-manager.js +84 -15
package/dist/cjs/index.d.ts +11 -1
package/dist/cjs/index.js +19 -1
package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
package/dist/cjs/text/generators/index.d.ts +10 -0
package/dist/cjs/text/generators/index.js +10 -0
package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
package/dist/cjs/text/generators/instruct-generator.js +192 -0
package/dist/esm/cli/indexer.js +73 -15
package/dist/esm/cli/search.js +77 -2
package/dist/esm/cli/ui-server.d.ts +5 -0
package/dist/esm/cli/ui-server.js +152 -0
package/dist/esm/cli.js +53 -7
package/dist/esm/core/abstract-generator.d.ts +97 -0
package/dist/esm/core/abstract-generator.js +222 -0
package/dist/esm/core/binary-index-format.js +53 -10
package/dist/esm/core/db.d.ts +56 -0
package/dist/esm/core/db.js +105 -0
package/dist/esm/core/generator-registry.d.ts +114 -0
package/dist/esm/core/generator-registry.js +280 -0
package/dist/esm/core/index.d.ts +4 -0
package/dist/esm/core/index.js +11 -0
package/dist/esm/core/ingestion.js +3 -0
package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
package/dist/esm/core/knowledge-base-manager.js +256 -0
package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
package/dist/esm/core/lazy-dependency-loader.js +111 -2
package/dist/esm/core/prompt-templates.d.ts +138 -0
package/dist/esm/core/prompt-templates.js +225 -0
package/dist/esm/core/response-generator.d.ts +132 -0
package/dist/esm/core/response-generator.js +69 -0
package/dist/esm/core/search-pipeline.js +1 -1
package/dist/esm/core/search.d.ts +72 -1
package/dist/esm/core/search.js +80 -7
package/dist/esm/core/types.d.ts +1 -0
package/dist/esm/core/vector-index-messages.d.ts +52 -0
package/dist/esm/core/vector-index-messages.js +5 -0
package/dist/esm/core/vector-index-worker.d.ts +6 -0
package/dist/esm/core/vector-index-worker.js +314 -0
package/dist/esm/core/vector-index.d.ts +45 -10
package/dist/esm/core/vector-index.js +279 -218
package/dist/esm/factories/generator-factory.d.ts +88 -0
package/dist/esm/factories/generator-factory.js +151 -0
package/dist/esm/factories/index.d.ts +1 -0
package/dist/esm/factories/index.js +5 -0
package/dist/esm/factories/ingestion-factory.js +3 -7
package/dist/esm/factories/search-factory.js +11 -0
package/dist/esm/index-manager.d.ts +23 -3
package/dist/esm/index-manager.js +84 -15
package/dist/esm/index.d.ts +11 -1
package/dist/esm/index.js +19 -1
package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
package/dist/esm/text/generators/causal-lm-generator.js +197 -0
package/dist/esm/text/generators/index.d.ts +10 -0
package/dist/esm/text/generators/index.js +10 -0
package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
package/dist/esm/text/generators/instruct-generator.js +192 -0
package/package.json +14 -7

package/dist/esm/core/abstract-generator.js ADDED Viewed

@@ -0,0 +1,222 @@
+/**
+ * CORE MODULE — Abstract Base Generator
+ *
+ * Provides model-agnostic base functionality for all generator implementations.
+ * This is an abstract base class, not a concrete implementation.
+ *
+ * ARCHITECTURAL NOTE:
+ * Similar to BaseUniversalEmbedder, this class provides shared infrastructure:
+ * - Model lifecycle management (loading, cleanup, disposal)
+ * - Token budget management
+ * - Error handling with helpful messages
+ * - Common utility methods
+ *
+ * IMPLEMENTATION LAYERS:
+ * - Text: InstructGenerator extends this class (SmolLM2-Instruct)
+ * - Text: CausalLMGenerator extends this class (DistilGPT2)
+ *
+ * @experimental This feature is experimental and may change in future versions.
+ */
+import { GenerationError } from './response-generator.js';
+import { GeneratorRegistry } from './generator-registry.js';
+import { buildPrompt, getDefaultStopSequences } from './prompt-templates.js';
+// =============================================================================
+// BASE GENERATOR ABSTRACT CLASS
+// =============================================================================
+/**
+ * Abstract base class for response generators
+ * Provides common functionality and lifecycle management
+ */
+export class BaseResponseGenerator {
+    modelName;
+    _isLoaded = false;
+    _modelInfo;
+    _options;
+    constructor(modelName, options = {}) {
+        this.modelName = modelName;
+        const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
+        if (!modelInfo) {
+            throw new Error(`Generator model '${modelName}' is not supported. ` +
+                `Supported models: ${GeneratorRegistry.getSupportedGenerators().join(', ')}`);
+        }
+        this._modelInfo = modelInfo;
+        this._options = options;
+    }
+    // =============================================================================
+    // PUBLIC INTERFACE IMPLEMENTATION
+    // =============================================================================
+    get modelType() {
+        return this._modelInfo.type;
+    }
+    get maxContextLength() {
+        return this._modelInfo.capabilities.maxContextLength;
+    }
+    get maxOutputLength() {
+        return this._modelInfo.capabilities.defaultMaxOutputTokens;
+    }
+    isLoaded() {
+        return this._isLoaded;
+    }
+    getModelInfo() {
+        return { ...this._modelInfo }; // Return a copy to prevent mutation
+    }
+    // =============================================================================
+    // DEFAULT IMPLEMENTATION
+    // =============================================================================
+    /**
+     * Generate a response based on query and retrieved chunks
+     * This method orchestrates the generation pipeline
+     */
+    async generate(request) {
+        if (!this._isLoaded) {
+            await this.loadModel();
+        }
+        const startTime = Date.now();
+        try {
+            // Get generation parameters with defaults
+            const maxTokens = request.maxTokens ?? this._modelInfo.capabilities.defaultMaxOutputTokens;
+            const temperature = request.temperature ?? this._modelInfo.capabilities.recommendedTemperature;
+            const topP = request.topP ?? 0.9;
+            const topK = request.topK ?? 50;
+            const repetitionPenalty = request.repetitionPenalty ?? 1.1;
+            const stopSequences = request.stopSequences ?? getDefaultStopSequences(this.modelType);
+            // Get max chunks for context (configurable, with model-specific default)
+            const maxChunksForContext = request.maxChunksForContext ??
+                this._modelInfo.capabilities.defaultMaxChunksForContext;
+            // Limit chunks to maxChunksForContext (assumes chunks are already reranked)
+            const totalChunks = request.chunks.length;
+            const limitedChunks = request.chunks.slice(0, maxChunksForContext);
+            if (totalChunks > maxChunksForContext) {
+                console.log(`📊 Using top ${maxChunksForContext} of ${totalChunks} reranked chunks for generation`);
+            }
+            // Build the prompt with context
+            const builtPrompt = buildPrompt({
+                query: request.query,
+                chunks: limitedChunks,
+                modelType: this.modelType,
+                systemPrompt: request.systemPrompt,
+                maxContextLength: this.maxContextLength,
+                reservedOutputTokens: maxTokens,
+                includeSourceAttribution: request.includeSourceAttribution
+            });
+            // Log context info
+            if (builtPrompt.contextInfo.truncated) {
+                console.warn(`⚠️  Context truncated: Only ${builtPrompt.contextInfo.chunksIncluded} of ` +
+                    `${builtPrompt.contextInfo.totalChunks} chunks fit in context window`);
+            }
+            // Generate response
+            const result = await this.generateText(builtPrompt.prompt, {
+                maxTokens,
+                temperature,
+                topP,
+                topK,
+                repetitionPenalty,
+                stopSequences
+            });
+            const generationTimeMs = Date.now() - startTime;
+            // Clean up the response text
+            const cleanedResponse = this.cleanResponseText(result.text);
+            return {
+                response: cleanedResponse,
+                tokensUsed: result.promptTokens + result.completionTokens,
+                truncated: builtPrompt.contextInfo.truncated,
+                modelName: this.modelName,
+                generationTimeMs,
+                metadata: {
+                    promptTokens: result.promptTokens,
+                    completionTokens: result.completionTokens,
+                    chunksIncluded: builtPrompt.contextInfo.chunksIncluded,
+                    totalChunks: totalChunks, // Report original total, not limited
+                    finishReason: result.finishReason
+                }
+            };
+        }
+        catch (error) {
+            const generationTimeMs = Date.now() - startTime;
+            if (error instanceof GenerationError) {
+                throw error;
+            }
+            throw new GenerationError(this.modelName, 'generation', `Generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? error : undefined);
+        }
+    }
+    // =============================================================================
+    // PROTECTED HELPER METHODS
+    // =============================================================================
+    /**
+     * Validate that the model is loaded before operations
+     */
+    ensureLoaded() {
+        if (!this._isLoaded) {
+            throw new GenerationError(this.modelName, 'generation', `Model '${this.modelName}' is not loaded. Call loadModel() first.`);
+        }
+    }
+    /**
+     * Clean up response text by removing artifacts
+     */
+    cleanResponseText(text) {
+        let cleaned = text.trim();
+        // Remove common artifacts
+        const artifactsToRemove = [
+            '<|im_end|>',
+            '<|im_start|>',
+            '<|endoftext|>',
+            '<|assistant|>',
+            '<|user|>',
+            '<|system|>'
+        ];
+        for (const artifact of artifactsToRemove) {
+            cleaned = cleaned.split(artifact)[0];
+        }
+        // Remove trailing incomplete sentences (if cut off at max tokens)
+        if (cleaned.length > 0 && !cleaned.match(/[.!?]$/)) {
+            const lastSentenceEnd = Math.max(cleaned.lastIndexOf('.'), cleaned.lastIndexOf('!'), cleaned.lastIndexOf('?'));
+            if (lastSentenceEnd > cleaned.length * 0.5) {
+                cleaned = cleaned.substring(0, lastSentenceEnd + 1);
+            }
+        }
+        return cleaned.trim();
+    }
+    /**
+     * Log model loading progress
+     */
+    logModelLoading(stage, details) {
+        const message = `[${this.modelName}] ${stage}`;
+        if (details) {
+            console.log(`${message}: ${details}`);
+        }
+        else {
+            console.log(message);
+        }
+    }
+    /**
+     * Handle model loading errors with helpful messages
+     */
+    handleLoadingError(error) {
+        const baseMessage = `Failed to load generator model '${this.modelName}': ${error.message}`;
+        // Provide specific guidance based on error type
+        if (error.message.includes('network') || error.message.includes('fetch')) {
+            return new GenerationError(this.modelName, 'loading', `${baseMessage}\n` +
+                `This appears to be a network error. Please check your internet connection ` +
+                `and ensure the model repository is accessible.`, error);
+        }
+        if (error.message.includes('memory') || error.message.includes('OOM')) {
+            return new GenerationError(this.modelName, 'loading', `${baseMessage}\n` +
+                `This appears to be a memory error. The model requires ` +
+                `${this._modelInfo.requirements.minimumMemory}MB. Try closing other applications ` +
+                `or using a smaller model like 'Xenova/distilgpt2'.`, error);
+        }
+        return new GenerationError(this.modelName, 'loading', baseMessage, error);
+    }
+}
+/**
+ * Create generator options with defaults
+ */
+export function createGeneratorOptions(options = {}) {
+    return {
+        timeout: 60000, // 60 seconds
+        enableGPU: false,
+        logLevel: 'info',
+        ...options
+    };
+}
+//# sourceMappingURL=abstract-generator.js.map

package/dist/esm/core/binary-index-format.js CHANGED Viewed

@@ -27,10 +27,12 @@ export class BinaryIndexFormat {
      * @param data Index data to serialize
      */
     static async save(indexPath, data) {
-        // Calculate total size
+        // Use actual vector count to ensure accurate file size
+        const actualVectorCount = data.vectors.length;
+        // Calculate total size based on actual vectors
         const headerSize = 24; // 6 uint32 fields
         const vectorSize = 4 + (data.dimensions * 4); // id + vector
-        const totalSize = headerSize + (data.currentSize * vectorSize);
+        const totalSize = headerSize + (actualVectorCount * vectorSize);
         const buffer = new ArrayBuffer(totalSize);
         const view = new DataView(buffer);
         let offset = 0;
@@ -45,7 +47,8 @@ export class BinaryIndexFormat {
         offset += 4;
         view.setUint32(offset, data.seed, true);
         offset += 4;
-        view.setUint32(offset, data.currentSize, true);
+        // Write actual vector count in header
+        view.setUint32(offset, actualVectorCount, true);
         offset += 4;
         // Write vectors
         for (const item of data.vectors) {
@@ -187,6 +190,9 @@ export class BinaryIndexFormat {
         const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
         let offset = 0;
         // Read basic header (24 bytes, all little-endian)
+        if (buffer.byteLength < 24) {
+            throw new Error(`Index file too small: expected at least 24 bytes, got ${buffer.byteLength}`);
+        }
         const dimensions = view.getUint32(offset, true);
         offset += 4;
         const maxElements = view.getUint32(offset, true);
@@ -199,10 +205,20 @@ export class BinaryIndexFormat {
         offset += 4;
         const currentSize = view.getUint32(offset, true);
         offset += 4;
-        // Check if this is the extended grouped format (40+ bytes header)
-        const hasGroups = buffer.byteLength >= 40 ? view.getUint32(offset, true) : 0;
-        if (hasGroups === 1 && buffer.byteLength >= 40) {
-            // Load grouped format
+        // Calculate expected size for original format
+        const vectorSize = 4 + (dimensions * 4); // id + vector
+        const expectedOriginalSize = 24 + (currentSize * vectorSize);
+        // Check if this is the extended grouped format (44 bytes header)
+        // Extended header has: 24 bytes basic + 4 bytes hasGroups + 16 bytes for offsets/counts = 44 bytes
+        // Only check for grouped format if file is larger than expected original format size
+        const hasGroups = buffer.byteLength > expectedOriginalSize && buffer.byteLength >= 44 && offset + 4 <= buffer.byteLength
+            ? view.getUint32(offset, true)
+            : 0;
+        if (hasGroups === 1 && buffer.byteLength >= 44) {
+            // Load grouped format - ensure we have enough bytes for extended header
+            if (offset + 20 > buffer.byteLength) {
+                throw new Error(`Index file too small for grouped format: expected at least ${offset + 20} bytes, got ${buffer.byteLength}`);
+            }
             const textOffset = view.getUint32(offset + 4, true);
             const textCount = view.getUint32(offset + 8, true);
             const imageOffset = view.getUint32(offset + 12, true);
@@ -215,14 +231,23 @@ export class BinaryIndexFormat {
                 if (offset % 4 !== 0) {
                     throw new Error(`Offset ${offset} is not 4-byte aligned`);
                 }
+                // Check bounds before reading vector ID
+                if (offset + 4 > buffer.byteLength) {
+                    throw new Error(`Text vector ID at offset ${offset} is outside the bounds of the DataView (buffer size: ${buffer.byteLength})`);
+                }
                 // Read vector ID
                 const id = view.getUint32(offset, true);
                 offset += 4;
+                // Check bounds before reading vector data
+                const vectorDataSize = dimensions * 4;
+                if (offset + vectorDataSize > buffer.byteLength) {
+                    throw new Error(`Text vector data at offset ${offset} would exceed buffer bounds (buffer size: ${buffer.byteLength}, required: ${offset + vectorDataSize})`);
+                }
                 // Zero-copy Float32Array view
                 const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
                 // Copy to avoid buffer lifecycle issues
                 const vector = new Float32Array(vectorView);
-                offset += dimensions * 4;
+                offset += vectorDataSize;
                 textVectors.push({ id, vector });
             }
             // Load image vectors
@@ -233,14 +258,23 @@ export class BinaryIndexFormat {
                 if (offset % 4 !== 0) {
                     throw new Error(`Offset ${offset} is not 4-byte aligned`);
                 }
+                // Check bounds before reading vector ID
+                if (offset + 4 > buffer.byteLength) {
+                    throw new Error(`Image vector ID at offset ${offset} is outside the bounds of the DataView (buffer size: ${buffer.byteLength})`);
+                }
                 // Read vector ID
                 const id = view.getUint32(offset, true);
                 offset += 4;
+                // Check bounds before reading vector data
+                const vectorDataSize = dimensions * 4;
+                if (offset + vectorDataSize > buffer.byteLength) {
+                    throw new Error(`Image vector data at offset ${offset} would exceed buffer bounds (buffer size: ${buffer.byteLength}, required: ${offset + vectorDataSize})`);
+                }
                 // Zero-copy Float32Array view
                 const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
                 // Copy to avoid buffer lifecycle issues
                 const vector = new Float32Array(vectorView);
-                offset += dimensions * 4;
+                offset += vectorDataSize;
                 imageVectors.push({ id, vector });
             }
             // Combine all vectors for backward compatibility
@@ -266,14 +300,23 @@ export class BinaryIndexFormat {
                 if (offset % 4 !== 0) {
                     throw new Error(`Offset ${offset} is not 4-byte aligned`);
                 }
+                // Check bounds before reading vector ID
+                if (offset + 4 > buffer.byteLength) {
+                    throw new Error(`Offset ${offset} is outside the bounds of the DataView (buffer size: ${buffer.byteLength})`);
+                }
                 // Read vector ID
                 const id = view.getUint32(offset, true);
                 offset += 4;
+                // Check bounds before reading vector data
+                const vectorDataSize = dimensions * 4;
+                if (offset + vectorDataSize > buffer.byteLength) {
+                    throw new Error(`Vector data at offset ${offset} would exceed buffer bounds (buffer size: ${buffer.byteLength}, required: ${offset + vectorDataSize})`);
+                }
                 // Zero-copy Float32Array view (fast!)
                 const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
                 // Copy to avoid buffer lifecycle issues
                 const vector = new Float32Array(vectorView);
-                offset += dimensions * 4;
+                offset += vectorDataSize;
                 vectors.push({ id, vector });
             }
             return {

package/dist/esm/core/db.d.ts CHANGED Viewed

@@ -210,4 +210,60 @@ export declare function updateStorageStats(connection: DatabaseConnection, stats
     filesystemRefs?: number;
     lastCleanup?: Date;
 }): Promise<void>;
+/**
+ * Result of a database reset operation
+ */
+export interface DatabaseResetResult {
+    /** Whether the reset was successful */
+    success: boolean;
+    /** Number of documents deleted */
+    documentsDeleted: number;
+    /** Number of chunks deleted */
+    chunksDeleted: number;
+    /** Number of content metadata entries deleted */
+    contentMetadataDeleted: number;
+    /** Whether system_info was preserved or cleared */
+    systemInfoCleared: boolean;
+    /** Time taken for the reset operation in milliseconds */
+    resetTimeMs: number;
+}
+/**
+ * Options for database reset operation
+ */
+export interface DatabaseResetOptions {
+    /** Whether to preserve system_info (mode, model configuration) - default: false */
+    preserveSystemInfo?: boolean;
+    /** Whether to run VACUUM after deletion to reclaim space - default: true */
+    runVacuum?: boolean;
+}
+/**
+ * Reset the database by deleting all data while keeping the schema intact.
+ * This is a safer alternative to file deletion that avoids file locking issues on Windows.
+ *
+ * This function:
+ * 1. Deletes all rows from chunks, documents, content_metadata tables
+ * 2. Optionally clears system_info (mode/model configuration)
+ * 3. Resets storage_stats counters
+ * 4. Optionally runs VACUUM to reclaim disk space
+ *
+ * @param connection - Database connection object
+ * @param options - Reset options
+ * @returns Promise resolving to reset result statistics
+ *
+ * @example
+ * ```typescript
+ * const db = await openDatabase('./db.sqlite');
+ * const result = await resetDatabase(db, { preserveSystemInfo: false });
+ * console.log(`Deleted ${result.documentsDeleted} documents and ${result.chunksDeleted} chunks`);
+ * ```
+ */
+export declare function resetDatabase(connection: DatabaseConnection, options?: DatabaseResetOptions): Promise<DatabaseResetResult>;
+/**
+ * Check if the database has any data (documents, chunks, or content)
+ * Useful for determining if a reset is needed
+ *
+ * @param connection - Database connection object
+ * @returns Promise resolving to true if database has data, false if empty
+ */
+export declare function hasDatabaseData(connection: DatabaseConnection): Promise<boolean>;
 //# sourceMappingURL=db.d.ts.map

package/dist/esm/core/db.js CHANGED Viewed

@@ -892,4 +892,109 @@ export async function updateStorageStats(connection, stats) {
         throw new Error(`Failed to update storage stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
     }
 }
+/**
+ * Reset the database by deleting all data while keeping the schema intact.
+ * This is a safer alternative to file deletion that avoids file locking issues on Windows.
+ *
+ * This function:
+ * 1. Deletes all rows from chunks, documents, content_metadata tables
+ * 2. Optionally clears system_info (mode/model configuration)
+ * 3. Resets storage_stats counters
+ * 4. Optionally runs VACUUM to reclaim disk space
+ *
+ * @param connection - Database connection object
+ * @param options - Reset options
+ * @returns Promise resolving to reset result statistics
+ *
+ * @example
+ * ```typescript
+ * const db = await openDatabase('./db.sqlite');
+ * const result = await resetDatabase(db, { preserveSystemInfo: false });
+ * console.log(`Deleted ${result.documentsDeleted} documents and ${result.chunksDeleted} chunks`);
+ * ```
+ */
+export async function resetDatabase(connection, options = {}) {
+    const startTime = Date.now();
+    const { preserveSystemInfo = false, runVacuum = true } = options;
+    try {
+        console.log('🔄 Starting database reset...');
+        // Get counts before deletion for reporting
+        const docCountResult = await connection.get('SELECT COUNT(*) as count FROM documents');
+        const chunkCountResult = await connection.get('SELECT COUNT(*) as count FROM chunks');
+        const contentMetadataCountResult = await connection.get('SELECT COUNT(*) as count FROM content_metadata');
+        const documentsDeleted = docCountResult?.count || 0;
+        const chunksDeleted = chunkCountResult?.count || 0;
+        const contentMetadataDeleted = contentMetadataCountResult?.count || 0;
+        // Delete in order respecting foreign key constraints
+        // chunks → documents → content_metadata (chunks reference documents, documents reference content_metadata)
+        console.log('  Deleting chunks...');
+        await connection.run('DELETE FROM chunks');
+        console.log('  Deleting documents...');
+        await connection.run('DELETE FROM documents');
+        console.log('  Deleting content_metadata...');
+        await connection.run('DELETE FROM content_metadata');
+        // Reset storage_stats counters
+        console.log('  Resetting storage_stats...');
+        await connection.run(`
+      UPDATE storage_stats SET
+        content_dir_files = 0,
+        content_dir_size = 0,
+        filesystem_refs = 0,
+        updated_at = CURRENT_TIMESTAMP
+      WHERE id = 1
+    `);
+        // Optionally clear system_info
+        let systemInfoCleared = false;
+        if (!preserveSystemInfo) {
+            console.log('  Clearing system_info...');
+            await connection.run('DELETE FROM system_info WHERE id = 1');
+            systemInfoCleared = true;
+        }
+        else {
+            console.log('  Preserving system_info (mode/model configuration)');
+        }
+        // Run VACUUM to reclaim disk space
+        if (runVacuum) {
+            console.log('  Running VACUUM to reclaim disk space...');
+            await connection.run('VACUUM');
+        }
+        const resetTimeMs = Date.now() - startTime;
+        console.log(`✓ Database reset complete in ${resetTimeMs}ms`);
+        console.log(`  Documents deleted: ${documentsDeleted}`);
+        console.log(`  Chunks deleted: ${chunksDeleted}`);
+        console.log(`  Content metadata deleted: ${contentMetadataDeleted}`);
+        console.log(`  System info cleared: ${systemInfoCleared}`);
+        return {
+            success: true,
+            documentsDeleted,
+            chunksDeleted,
+            contentMetadataDeleted,
+            systemInfoCleared,
+            resetTimeMs
+        };
+    }
+    catch (error) {
+        const resetTimeMs = Date.now() - startTime;
+        console.error(`❌ Database reset failed after ${resetTimeMs}ms:`, error);
+        throw new Error(`Failed to reset database: ${error instanceof Error ? error.message : 'Unknown error'}`);
+    }
+}
+/**
+ * Check if the database has any data (documents, chunks, or content)
+ * Useful for determining if a reset is needed
+ *
+ * @param connection - Database connection object
+ * @returns Promise resolving to true if database has data, false if empty
+ */
+export async function hasDatabaseData(connection) {
+    try {
+        const docCount = await connection.get('SELECT COUNT(*) as count FROM documents');
+        const chunkCount = await connection.get('SELECT COUNT(*) as count FROM chunks');
+        return (docCount?.count || 0) > 0 || (chunkCount?.count || 0) > 0;
+    }
+    catch (error) {
+        // If tables don't exist, consider it empty
+        return false;
+    }
+}
 //# sourceMappingURL=db.js.map

package/dist/esm/core/generator-registry.d.ts ADDED Viewed

@@ -0,0 +1,114 @@
+/**
+ * CORE MODULE — Generator Registry for RAG Response Generation
+ *
+ * Centralized registry of supported generator models with validation and
+ * compatibility checking. Follows the same patterns as model-registry.ts.
+ *
+ * SUPPORTED MODELS:
+ * - HuggingFaceTB/SmolLM2-135M-Instruct: Balanced instruct model (DEFAULT, 3 chunks)
+ * - HuggingFaceTB/SmolLM2-360M-Instruct: Higher quality instruct model (5 chunks)
+ *
+ * PREREQUISITES:
+ * - Reranking must be enabled for response generation to ensure quality context
+ *
+ * @experimental This feature is experimental and may change in future versions.
+ */
+import type { GeneratorModelInfo, GeneratorModelType, GeneratorValidationResult, GeneratorCapabilities } from './response-generator.js';
+/**
+ * Registry of supported generator models with their metadata and capabilities
+ */
+export declare const SUPPORTED_GENERATORS: Record<string, GeneratorModelInfo>;
+/** Default generator model name */
+export declare const DEFAULT_GENERATOR_MODEL = "HuggingFaceTB/SmolLM2-135M-Instruct";
+/**
+ * Generator registry class providing validation and model information services
+ */
+export declare class GeneratorRegistry {
+    /**
+     * Gets generator model information for a given model name
+     * @param modelName - Name of the generator model
+     * @returns Generator model information or null if not supported
+     */
+    static getGeneratorInfo(modelName: string): GeneratorModelInfo | null;
+    /**
+     * Validates a generator model name and returns compatibility information
+     * @param modelName - Name of the model to validate
+     * @returns Validation result with errors, warnings, and suggestions
+     */
+    static validateGenerator(modelName: string): GeneratorValidationResult;
+    /**
+     * Lists all supported generator models
+     * @param modelType - Optional filter by model type
+     * @returns Array of supported generator model names
+     */
+    static getSupportedGenerators(modelType?: GeneratorModelType): string[];
+    /**
+     * Gets the default generator model name
+     * @returns Default generator model name
+     */
+    static getDefaultGenerator(): string;
+    /**
+     * Gets generators by type
+     * @param type - Generator type ('causal-lm' or 'instruct')
+     * @returns Array of model names matching the type
+     */
+    static getGeneratorsByType(type: GeneratorModelType): string[];
+    /**
+     * Checks if a generator model supports a specific capability
+     * @param modelName - Name of the model
+     * @param capability - Capability to check
+     * @returns True if the model supports the capability
+     */
+    static supportsCapability(modelName: string, capability: keyof GeneratorCapabilities): boolean;
+    /**
+     * Gets generators similar to the given model name (for suggestions)
+     * @private
+     */
+    private static getSimilarGenerators;
+    /**
+     * Validates system compatibility for a generator model
+     * @param modelName - Name of the model
+     * @param systemCapabilities - System capabilities to check against
+     * @returns Validation result with compatibility information
+     */
+    static validateSystemCompatibility(modelName: string, systemCapabilities: {
+        availableMemory?: number;
+        platform?: string;
+        transformersJsVersion?: string;
+    }): GeneratorValidationResult;
+}
+/**
+ * Gets the generator type for a given model name
+ * @param modelName - Name of the model
+ * @returns Generator type or null if model not supported
+ */
+export declare function getGeneratorType(modelName: string): GeneratorModelType | null;
+/**
+ * Checks if a model is an instruction-tuned model
+ * @param modelName - Name of the model
+ * @returns True if the model is instruction-tuned
+ */
+export declare function isInstructionTunedModel(modelName: string): boolean;
+/**
+ * Gets the maximum context length for a generator model
+ * @param modelName - Name of the model
+ * @returns Maximum context length or null if model not supported
+ */
+export declare function getMaxContextLength(modelName: string): number | null;
+/**
+ * Gets recommended generation settings for a model
+ * @param modelName - Name of the model
+ * @returns Recommended settings or null if model not supported
+ */
+export declare function getRecommendedSettings(modelName: string): {
+    temperature: number;
+    maxTokens: number;
+    maxChunksForContext: number;
+} | null;
+/**
+ * Gets the default maximum chunks for context for a generator model
+ * @param modelName - Name of the model
+ * @returns Default max chunks for context or null if model not supported
+ */
+export declare function getDefaultMaxChunksForContext(modelName: string): number | null;
+//# sourceMappingURL=generator-registry.d.ts.map