npm - viberag - Versions diffs - 0.3.1 → 0.3.2 - Mend

viberag 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/cli/app.js +8 -1
package/dist/cli/commands/handlers.d.ts +1 -1
package/dist/cli/commands/useRagCommands.js +8 -1
package/dist/common/components/StatusBar.js +14 -1
package/dist/common/types.d.ts +4 -0
package/dist/rag/embeddings/gemini.d.ts +10 -0
package/dist/rag/embeddings/gemini.js +82 -6
package/dist/rag/embeddings/mistral.d.ts +10 -0
package/dist/rag/embeddings/mistral.js +82 -6
package/dist/rag/embeddings/openai.d.ts +10 -0
package/dist/rag/embeddings/openai.js +82 -6
package/dist/rag/gitignore/index.js +55 -2
package/dist/rag/indexer/indexer.d.ts +3 -0
package/dist/rag/indexer/indexer.js +113 -60
package/dist/rag/indexer/types.d.ts +6 -1
package/package.json +1 -1

package/dist/cli/app.js CHANGED Viewed

@@ -148,7 +148,14 @@ export default function App() {
                 total: 0,
                 stage: 'Indexing',
             });
-            const stats = await runIndex(projectRoot, true, (current, total, stage) => setAppStatus({ state: 'indexing', current, total, stage }));
+            const stats = await runIndex(projectRoot, true, (current, total, stage, throttleMessage, chunksProcessed) => setAppStatus({
+                state: 'indexing',
+                current,
+                total,
+                stage,
+                throttleMessage,
+                chunksProcessed,
+            }));
             addOutput('system', formatIndexStats(stats));
             // Reload stats after indexing
             const newStats = await loadIndexStats(projectRoot);

package/dist/cli/commands/handlers.d.ts CHANGED Viewed

@@ -29,7 +29,7 @@ export declare function runInit(projectRoot: string, isReinit?: boolean, wizardC
  * When force=true, also updates config dimensions to match current PROVIDER_CONFIGS
  * (handles dimension changes after viberag upgrades).
  */
-export declare function runIndex(projectRoot: string, force?: boolean, onProgress?: (current: number, total: number, stage: string) => void): Promise<IndexStats>;
+export declare function runIndex(projectRoot: string, force?: boolean, onProgress?: (current: number, total: number, stage: string, throttleMessage?: string | null, chunksProcessed?: number) => void): Promise<IndexStats>;
 /**
  * Format index stats for display.
  */

package/dist/cli/commands/useRagCommands.js CHANGED Viewed

@@ -55,7 +55,14 @@ Manual MCP Setup:
         const action = force ? 'Reindexing' : 'Indexing';
         addOutput('system', `${action} codebase...`);
         setAppStatus({ state: 'indexing', current: 0, total: 0, stage: action });
-        runIndex(projectRoot, force, (current, total, stage) => setAppStatus({ state: 'indexing', current, total, stage }))
+        runIndex(projectRoot, force, (current, total, stage, throttleMessage, chunksProcessed) => setAppStatus({
+            state: 'indexing',
+            current,
+            total,
+            stage,
+            throttleMessage,
+            chunksProcessed,
+        }))
             .then(async (stats) => {
             addOutput('system', formatIndexStats(stats));
             // Reload stats after indexing

package/dist/common/components/StatusBar.js CHANGED Viewed

@@ -25,12 +25,25 @@ function formatStatus(status) {
         case 'ready':
             return { text: 'Ready', color: 'green', showSpinner: false };
         case 'indexing': {
+            // Throttle status takes precedence - show in yellow
+            if (status.throttleMessage) {
+                return {
+                    text: status.throttleMessage,
+                    color: 'yellow',
+                    showSpinner: true,
+                };
+            }
+            // Normal indexing display
             if (status.total === 0) {
                 return { text: `${status.stage}`, color: 'cyan', showSpinner: true };
             }
             const percent = Math.round((status.current / status.total) * 100);
+            // Include chunk count if available
+            const chunkInfo = status.chunksProcessed !== undefined
+                ? ` · ${status.chunksProcessed} chunks`
+                : '';
             return {
-                text: `${status.stage} ${status.current}/${status.total} (${percent}%)`,
+                text: `${status.stage} ${status.current}/${status.total} (${percent}%)${chunkInfo}`,
                 color: 'cyan',
                 showSpinner: true,
             };

package/dist/common/types.d.ts CHANGED Viewed

@@ -61,6 +61,10 @@ export type AppStatus = {
     current: number;
     total: number;
     stage: string;
+    /** Rate limit message (shown in yellow when set) */
+    throttleMessage?: string | null;
+    /** Number of chunks embedded so far */
+    chunksProcessed?: number;
 } | {
     state: 'searching';
 } | {

package/dist/rag/embeddings/gemini.d.ts CHANGED Viewed

@@ -17,9 +17,19 @@ export declare class GeminiEmbeddingProvider implements EmbeddingProvider {
     readonly dimensions = 1536;
     private apiKey;
     private initialized;
+    onThrottle?: (message: string | null) => void;
+    onBatchProgress?: (processed: number, total: number) => void;
     constructor(apiKey?: string);
     initialize(_onProgress?: ModelProgressCallback): Promise<void>;
     embed(texts: string[]): Promise<number[][]>;
+    /**
+     * Embed a batch with exponential backoff retry on rate limit errors.
+     */
+    private embedBatchWithRetry;
+    /**
+     * Check if an error is a rate limit error (429 or quota exceeded).
+     */
+    private isRateLimitError;
     private embedBatch;
     embedSingle(text: string): Promise<number[]>;
     close(): void;

package/dist/rag/embeddings/gemini.js CHANGED Viewed

@@ -10,7 +10,17 @@
  */
 const GEMINI_API_BASE = 'https://generativelanguage.googleapis.com/v1beta/models';
 const MODEL = 'gemini-embedding-001';
-const BATCH_SIZE = 100; // Gemini supports up to 100 texts per request
+// Gemini limits: 2,048 tokens/text, 20,000 tokens/batch, 100-250 texts/batch
+// With avg ~1000 tokens/chunk, safe limit is 20 texts.
+const BATCH_SIZE = 20;
+// Concurrency and rate limiting
+const CONCURRENCY = 5; // Max concurrent API requests
+const MAX_RETRIES = 12; // Max retry attempts on rate limit
+const INITIAL_BACKOFF_MS = 1000; // Start at 1s
+const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
+function sleep(ms) {
+    return new Promise(resolve => setTimeout(resolve, ms));
+}
 /**
  * Gemini embedding provider.
  * Uses gemini-embedding-001 model via Google's Generative AI API.
@@ -35,6 +45,20 @@ export class GeminiEmbeddingProvider {
             writable: true,
             value: false
         });
+        // Callback for rate limit throttling - message or null to clear
+        Object.defineProperty(this, "onThrottle", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        // Callback for batch progress - (processed, total) chunks
+        Object.defineProperty(this, "onBatchProgress", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
         // Trim the key to remove any accidental whitespace
         this.apiKey = (apiKey ?? '').trim();
     }
@@ -51,15 +75,67 @@ export class GeminiEmbeddingProvider {
         if (texts.length === 0) {
             return [];
         }
-        const results = [];
-        // Process in batches
+        // Split into batches
+        const batches = [];
         for (let i = 0; i < texts.length; i += BATCH_SIZE) {
-            const batch = texts.slice(i, i + BATCH_SIZE);
-            const batchResults = await this.embedBatch(batch);
-            results.push(...batchResults);
+            batches.push(texts.slice(i, i + BATCH_SIZE));
+        }
+        // Process batches with limited concurrency
+        const results = [];
+        let completed = 0;
+        for (let i = 0; i < batches.length; i += CONCURRENCY) {
+            const concurrentBatches = batches.slice(i, i + CONCURRENCY);
+            // Fire concurrent requests
+            const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
+            // Flatten and collect results (Promise.all preserves order)
+            for (const result of batchResults) {
+                results.push(...result);
+            }
+            // Report progress after concurrent group completes
+            completed += concurrentBatches.length;
+            const processed = Math.min(completed * BATCH_SIZE, texts.length);
+            this.onBatchProgress?.(processed, texts.length);
         }
         return results;
     }
+    /**
+     * Embed a batch with exponential backoff retry on rate limit errors.
+     */
+    async embedBatchWithRetry(batch) {
+        let attempt = 0;
+        let backoffMs = INITIAL_BACKOFF_MS;
+        while (true) {
+            try {
+                const result = await this.embedBatch(batch);
+                // Clear throttle message on success (if was throttling)
+                if (attempt > 0)
+                    this.onThrottle?.(null);
+                return result;
+            }
+            catch (error) {
+                if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
+                    attempt++;
+                    const secs = Math.round(backoffMs / 1000);
+                    this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
+                    await sleep(backoffMs);
+                    backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
+                }
+                else {
+                    throw error;
+                }
+            }
+        }
+    }
+    /**
+     * Check if an error is a rate limit error (429 or quota exceeded).
+     */
+    isRateLimitError(error) {
+        if (error instanceof Error) {
+            const msg = error.message.toLowerCase();
+            return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
+        }
+        return false;
+    }
     async embedBatch(texts) {
         const url = `${GEMINI_API_BASE}/${MODEL}:batchEmbedContents`;
         const response = await fetch(url, {

package/dist/rag/embeddings/mistral.d.ts CHANGED Viewed

@@ -13,9 +13,19 @@ export declare class MistralEmbeddingProvider implements EmbeddingProvider {
     readonly dimensions = 1536;
     private apiKey;
     private initialized;
+    onThrottle?: (message: string | null) => void;
+    onBatchProgress?: (processed: number, total: number) => void;
     constructor(apiKey?: string);
     initialize(_onProgress?: ModelProgressCallback): Promise<void>;
     embed(texts: string[]): Promise<number[][]>;
+    /**
+     * Embed a batch with exponential backoff retry on rate limit errors.
+     */
+    private embedBatchWithRetry;
+    /**
+     * Check if an error is a rate limit error (429 or quota exceeded).
+     */
+    private isRateLimitError;
     private embedBatch;
     embedSingle(text: string): Promise<number[]>;
     close(): void;

package/dist/rag/embeddings/mistral.js CHANGED Viewed

@@ -6,7 +6,17 @@
  */
 const MISTRAL_API_BASE = 'https://api.mistral.ai/v1';
 const MODEL = 'codestral-embed';
-const BATCH_SIZE = 64; // Mistral supports batching
+// Mistral limits: 8,192 tokens/text, 16,000 tokens/batch TOTAL
+// With avg ~500 tokens/chunk, can fit ~32. Use 24 for safety margin.
+const BATCH_SIZE = 24;
+// Concurrency and rate limiting
+const CONCURRENCY = 5; // Max concurrent API requests
+const MAX_RETRIES = 12; // Max retry attempts on rate limit
+const INITIAL_BACKOFF_MS = 1000; // Start at 1s
+const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
+function sleep(ms) {
+    return new Promise(resolve => setTimeout(resolve, ms));
+}
 /**
  * Mistral embedding provider.
  * Uses codestral-embed model via Mistral AI API.
@@ -31,6 +41,20 @@ export class MistralEmbeddingProvider {
             writable: true,
             value: false
         });
+        // Callback for rate limit throttling - message or null to clear
+        Object.defineProperty(this, "onThrottle", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        // Callback for batch progress - (processed, total) chunks
+        Object.defineProperty(this, "onBatchProgress", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
         // Trim the key to remove any accidental whitespace
         this.apiKey = (apiKey ?? '').trim();
     }
@@ -47,15 +71,67 @@ export class MistralEmbeddingProvider {
         if (texts.length === 0) {
             return [];
         }
-        const results = [];
-        // Process in batches
+        // Split into batches
+        const batches = [];
         for (let i = 0; i < texts.length; i += BATCH_SIZE) {
-            const batch = texts.slice(i, i + BATCH_SIZE);
-            const batchResults = await this.embedBatch(batch);
-            results.push(...batchResults);
+            batches.push(texts.slice(i, i + BATCH_SIZE));
+        }
+        // Process batches with limited concurrency
+        const results = [];
+        let completed = 0;
+        for (let i = 0; i < batches.length; i += CONCURRENCY) {
+            const concurrentBatches = batches.slice(i, i + CONCURRENCY);
+            // Fire concurrent requests
+            const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
+            // Flatten and collect results (Promise.all preserves order)
+            for (const result of batchResults) {
+                results.push(...result);
+            }
+            // Report progress after concurrent group completes
+            completed += concurrentBatches.length;
+            const processed = Math.min(completed * BATCH_SIZE, texts.length);
+            this.onBatchProgress?.(processed, texts.length);
         }
         return results;
     }
+    /**
+     * Embed a batch with exponential backoff retry on rate limit errors.
+     */
+    async embedBatchWithRetry(batch) {
+        let attempt = 0;
+        let backoffMs = INITIAL_BACKOFF_MS;
+        while (true) {
+            try {
+                const result = await this.embedBatch(batch);
+                // Clear throttle message on success (if was throttling)
+                if (attempt > 0)
+                    this.onThrottle?.(null);
+                return result;
+            }
+            catch (error) {
+                if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
+                    attempt++;
+                    const secs = Math.round(backoffMs / 1000);
+                    this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
+                    await sleep(backoffMs);
+                    backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
+                }
+                else {
+                    throw error;
+                }
+            }
+        }
+    }
+    /**
+     * Check if an error is a rate limit error (429 or quota exceeded).
+     */
+    isRateLimitError(error) {
+        if (error instanceof Error) {
+            const msg = error.message.toLowerCase();
+            return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
+        }
+        return false;
+    }
     async embedBatch(texts) {
         const response = await fetch(`${MISTRAL_API_BASE}/embeddings`, {
             method: 'POST',

package/dist/rag/embeddings/openai.d.ts CHANGED Viewed

@@ -13,9 +13,19 @@ export declare class OpenAIEmbeddingProvider implements EmbeddingProvider {
     readonly dimensions = 1536;
     private apiKey;
     private initialized;
+    onThrottle?: (message: string | null) => void;
+    onBatchProgress?: (processed: number, total: number) => void;
     constructor(apiKey?: string);
     initialize(_onProgress?: ModelProgressCallback): Promise<void>;
     embed(texts: string[]): Promise<number[][]>;
+    /**
+     * Embed a batch with exponential backoff retry on rate limit errors.
+     */
+    private embedBatchWithRetry;
+    /**
+     * Check if an error is a rate limit error (429 or quota exceeded).
+     */
+    private isRateLimitError;
     private embedBatch;
     embedSingle(text: string): Promise<number[]>;
     close(): void;

package/dist/rag/embeddings/openai.js CHANGED Viewed

@@ -6,7 +6,17 @@
  */
 const OPENAI_API_BASE = 'https://api.openai.com/v1';
 const MODEL = 'text-embedding-3-small';
-const BATCH_SIZE = 2048; // OpenAI supports up to 2048 texts per request
+// OpenAI limits: 8,191 tokens/text, 300,000 tokens/batch, 2,048 texts/batch
+// With avg ~1000 tokens/chunk, safe limit is 300 texts. Use 256 for margin.
+const BATCH_SIZE = 256;
+// Concurrency and rate limiting
+const CONCURRENCY = 5; // Max concurrent API requests
+const MAX_RETRIES = 12; // Max retry attempts on rate limit
+const INITIAL_BACKOFF_MS = 1000; // Start at 1s
+const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
+function sleep(ms) {
+    return new Promise(resolve => setTimeout(resolve, ms));
+}
 /**
  * OpenAI embedding provider.
  * Uses text-embedding-3-small model via OpenAI API.
@@ -31,6 +41,20 @@ export class OpenAIEmbeddingProvider {
             writable: true,
             value: false
         });
+        // Callback for rate limit throttling - message or null to clear
+        Object.defineProperty(this, "onThrottle", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        // Callback for batch progress - (processed, total) chunks
+        Object.defineProperty(this, "onBatchProgress", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
         // Trim the key to remove any accidental whitespace
         this.apiKey = (apiKey ?? '').trim();
     }
@@ -51,15 +75,67 @@ export class OpenAIEmbeddingProvider {
         if (texts.length === 0) {
             return [];
         }
-        const results = [];
-        // Process in batches
+        // Split into batches
+        const batches = [];
         for (let i = 0; i < texts.length; i += BATCH_SIZE) {
-            const batch = texts.slice(i, i + BATCH_SIZE);
-            const batchResults = await this.embedBatch(batch);
-            results.push(...batchResults);
+            batches.push(texts.slice(i, i + BATCH_SIZE));
+        }
+        // Process batches with limited concurrency
+        const results = [];
+        let completed = 0;
+        for (let i = 0; i < batches.length; i += CONCURRENCY) {
+            const concurrentBatches = batches.slice(i, i + CONCURRENCY);
+            // Fire concurrent requests
+            const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
+            // Flatten and collect results (Promise.all preserves order)
+            for (const result of batchResults) {
+                results.push(...result);
+            }
+            // Report progress after concurrent group completes
+            completed += concurrentBatches.length;
+            const processed = Math.min(completed * BATCH_SIZE, texts.length);
+            this.onBatchProgress?.(processed, texts.length);
         }
         return results;
     }
+    /**
+     * Embed a batch with exponential backoff retry on rate limit errors.
+     */
+    async embedBatchWithRetry(batch) {
+        let attempt = 0;
+        let backoffMs = INITIAL_BACKOFF_MS;
+        while (true) {
+            try {
+                const result = await this.embedBatch(batch);
+                // Clear throttle message on success (if was throttling)
+                if (attempt > 0)
+                    this.onThrottle?.(null);
+                return result;
+            }
+            catch (error) {
+                if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
+                    attempt++;
+                    const secs = Math.round(backoffMs / 1000);
+                    this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
+                    await sleep(backoffMs);
+                    backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
+                }
+                else {
+                    throw error;
+                }
+            }
+        }
+    }
+    /**
+     * Check if an error is a rate limit error (429 or quota exceeded).
+     */
+    isRateLimitError(error) {
+        if (error instanceof Error) {
+            const msg = error.message.toLowerCase();
+            return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
+        }
+        return false;
+    }
     async embedBatch(texts) {
         const response = await fetch(`${OPENAI_API_BASE}/embeddings`, {
             method: 'POST',

package/dist/rag/gitignore/index.js CHANGED Viewed

@@ -19,6 +19,47 @@ const ALWAYS_IGNORED = [
     '.viberag',
     'node_modules', // Fallback in case not in .gitignore
 ];
+/**
+ * Lock files that should always be ignored.
+ * These are machine-generated and provide no value for code search.
+ */
+const ALWAYS_IGNORED_FILES = [
+    // JavaScript/TypeScript
+    'package-lock.json', // npm
+    'yarn.lock', // Yarn
+    'pnpm-lock.yaml', // pnpm
+    'bun.lockb', // Bun
+    // Python
+    'uv.lock', // UV
+    'poetry.lock', // Poetry
+    'Pipfile.lock', // Pipenv
+    // Ruby
+    'Gemfile.lock', // Bundler
+    // PHP
+    'composer.lock', // Composer
+    // Rust
+    'Cargo.lock', // Cargo
+    // Go
+    'go.sum', // Go modules
+    // Java/Kotlin
+    'gradle.lockfile', // Gradle
+    // C#/.NET
+    'packages.lock.json', // NuGet
+    // Dart
+    'pubspec.lock', // Pub
+    // Swift
+    'Package.resolved', // Swift PM
+];
+/**
+ * File patterns that should always be ignored.
+ * These are build artifacts with no semantic value for code search.
+ */
+const ALWAYS_IGNORED_PATTERNS = [
+    '*.min.js', // Minified JavaScript
+    '*.min.css', // Minified CSS
+    '*.map', // Source maps
+    '*.d.ts.map', // TypeScript declaration maps
+];
 /**
  * Cache of Ignore instances per project root.
  */
@@ -37,8 +78,12 @@ export async function loadGitignore(projectRoot) {
         return cached;
     }
     const ig = ignore();
-    // Add always-ignored patterns
+    // Add always-ignored patterns (directories)
     ig.add(ALWAYS_IGNORED);
+    // Add always-ignored files (lock files)
+    ig.add(ALWAYS_IGNORED_FILES);
+    // Add always-ignored file patterns (minified, maps)
+    ig.add(ALWAYS_IGNORED_PATTERNS);
     // Try to load .gitignore
     const gitignorePath = path.join(projectRoot, '.gitignore');
     try {
@@ -99,8 +144,16 @@ export function clearAllGitignoreCache() {
  */
 export async function getGlobIgnorePatterns(projectRoot) {
     const patterns = [];
-    // Always exclude these (same as ALWAYS_IGNORED)
+    // Always exclude these directories (same as ALWAYS_IGNORED)
     patterns.push('**/.git/**', '**/.viberag/**', '**/node_modules/**');
+    // Always exclude lock files (same as ALWAYS_IGNORED_FILES)
+    for (const file of ALWAYS_IGNORED_FILES) {
+        patterns.push(`**/${file}`);
+    }
+    // Always exclude file patterns (minified, maps)
+    for (const pattern of ALWAYS_IGNORED_PATTERNS) {
+        patterns.push(`**/${pattern}`);
+    }
     // Try to load .gitignore
     const gitignorePath = path.join(projectRoot, '.gitignore');
     try {

package/dist/rag/indexer/indexer.d.ts CHANGED Viewed

@@ -55,6 +55,9 @@ export declare class Indexer {
     /**
      * Process a batch of files: read, chunk, embed, and prepare CodeChunks.
      *
+     * Strategy: Collect all chunks from all files first, then embed them
+     * together with full concurrency for maximum throughput.
+     *
      * Error handling strategy:
      * - File read/parse errors: Log and continue (file-specific, recoverable)
      * - Embedding/storage errors: Let propagate (fatal, affects all files)

package/dist/rag/indexer/indexer.js CHANGED Viewed

@@ -142,6 +142,17 @@ export class Indexer {
             // 6. Process new and modified files
             const filesToProcess = [...diff.new, ...diff.modified];
             const totalFiles = filesToProcess.length;
+            // Track cumulative chunks for progress display
+            let totalChunksProcessed = 0;
+            let lastProgress = 0;
+            // Wire throttle callback for rate limit feedback (API providers only)
+            if ('onThrottle' in embeddings) {
+                embeddings.onThrottle =
+                    message => {
+                        // Pass throttle message to UI - shown in yellow when set
+                        progressCallback?.(lastProgress, totalFiles, 'Indexing files', message, totalChunksProcessed);
+                    };
+            }
             if (totalFiles > 0) {
                 this.log('info', `Processing ${totalFiles} files`);
                 // First, delete existing chunks for modified files
@@ -153,7 +164,15 @@ export class Indexer {
                 const batchSize = 10;
                 for (let i = 0; i < filesToProcess.length; i += batchSize) {
                     const batch = filesToProcess.slice(i, i + batchSize);
-                    const batchChunks = await this.processFileBatch(batch, chunker, embeddings, storage, stats);
+                    const batchChunks = await this.processFileBatch(batch, chunker, embeddings, storage, stats, {
+                        totalFiles,
+                        currentFileOffset: i,
+                        progressCallback,
+                        onChunksProcessed: (count) => {
+                            totalChunksProcessed += count;
+                            progressCallback?.(i, totalFiles, 'Indexing files', null, totalChunksProcessed);
+                        },
+                    });
                     if (batchChunks.length > 0) {
                         // Use addChunks after table reset to avoid schema mismatch,
                         // upsertChunks for normal incremental updates
@@ -166,7 +185,8 @@ export class Indexer {
                         stats.chunksAdded += batchChunks.length;
                     }
                     const progress = Math.round(((i + batch.length) / totalFiles) * 100);
-                    progressCallback?.(i + batch.length, totalFiles, 'Indexing files');
+                    lastProgress = i + batch.length;
+                    progressCallback?.(i + batch.length, totalFiles, 'Indexing files', null, totalChunksProcessed);
                     this.log('debug', `Progress: ${progress}% (${i + batch.length}/${totalFiles})`);
                 }
             }
@@ -218,79 +238,112 @@ export class Indexer {
     /**
      * Process a batch of files: read, chunk, embed, and prepare CodeChunks.
      *
+     * Strategy: Collect all chunks from all files first, then embed them
+     * together with full concurrency for maximum throughput.
+     *
      * Error handling strategy:
      * - File read/parse errors: Log and continue (file-specific, recoverable)
      * - Embedding/storage errors: Let propagate (fatal, affects all files)
      */
-    async processFileBatch(filepaths, chunker, embeddings, storage, stats) {
-        const allChunks = [];
+    async processFileBatch(filepaths, chunker, embeddings, storage, stats, progressContext) {
+        const fileDataList = [];
         for (const filepath of filepaths) {
-            // Phase 1: File reading and chunking (recoverable errors)
-            let content;
-            let fileHash;
-            let chunks;
             try {
                 const absolutePath = path.join(this.projectRoot, filepath);
-                content = await fs.readFile(absolutePath, 'utf-8');
-                fileHash = (await import('../merkle/hash.js')).computeStringHash(content);
-                // Chunk the file (with size limits from config)
-                chunks = await chunker.chunkFile(filepath, content, this.config.chunkMaxSize);
+                const content = await fs.readFile(absolutePath, 'utf-8');
+                const fileHash = (await import('../merkle/hash.js')).computeStringHash(content);
+                const chunks = await chunker.chunkFile(filepath, content, this.config.chunkMaxSize);
+                fileDataList.push({ filepath, fileHash, chunks });
             }
             catch (error) {
-                // File-specific error (read/parse) - log and continue with other files
                 this.log('warn', `Failed to read/parse file: ${filepath}`, error);
                 continue;
             }
-            // Phase 2: Embedding and storage (fatal errors - let propagate)
-            // NO try-catch here - API/storage errors should stop indexing
-            // Check embedding cache for each chunk
-            const contentHashes = chunks.map(c => c.contentHash);
-            const cachedEmbeddings = await storage.getCachedEmbeddings(contentHashes);
-            // Compute embeddings for cache misses
-            const missingChunks = chunks.filter(c => !cachedEmbeddings.has(c.contentHash));
-            if (missingChunks.length > 0) {
-                // Embed contextHeader + text for semantic relevance
-                const texts = missingChunks.map(c => c.contextHeader ? `${c.contextHeader}\n${c.text}` : c.text);
-                const newEmbeddings = await embeddings.embed(texts);
-                stats.embeddingsComputed += missingChunks.length;
-                // Cache the new embeddings
-                const cacheEntries = missingChunks.map((chunk, i) => ({
-                    contentHash: chunk.contentHash,
-                    vector: newEmbeddings[i],
-                    createdAt: new Date().toISOString(),
-                }));
-                await storage.cacheEmbeddings(cacheEntries);
-                // Add to cachedEmbeddings map
-                missingChunks.forEach((chunk, i) => {
-                    cachedEmbeddings.set(chunk.contentHash, newEmbeddings[i]);
+        }
+        const allChunksWithContext = [];
+        for (const fd of fileDataList) {
+            for (const chunk of fd.chunks) {
+                allChunksWithContext.push({
+                    chunk,
+                    filepath: fd.filepath,
+                    fileHash: fd.fileHash,
                 });
             }
-            stats.embeddingsCached += chunks.length - missingChunks.length;
-            // Build CodeChunk objects
-            const filename = path.basename(filepath);
-            const extension = path.extname(filepath);
-            for (const chunk of chunks) {
-                const vector = cachedEmbeddings.get(chunk.contentHash);
-                allChunks.push({
-                    id: `${filepath}:${chunk.startLine}`,
-                    vector,
-                    text: chunk.text,
-                    contentHash: chunk.contentHash,
-                    filepath,
-                    filename,
-                    extension,
-                    type: chunk.type,
-                    name: chunk.name,
-                    startLine: chunk.startLine,
-                    endLine: chunk.endLine,
-                    fileHash,
-                    // New metadata fields from schema v2
-                    signature: chunk.signature,
-                    docstring: chunk.docstring,
-                    isExported: chunk.isExported,
-                    decoratorNames: chunk.decoratorNames,
-                });
+        }
+        if (allChunksWithContext.length === 0) {
+            return [];
+        }
+        // Phase 2: Check embedding cache for ALL chunks at once
+        const contentHashes = allChunksWithContext.map(c => c.chunk.contentHash);
+        const cachedEmbeddings = await storage.getCachedEmbeddings(contentHashes);
+        // Find all cache misses
+        const missingChunksWithContext = allChunksWithContext.filter(c => !cachedEmbeddings.has(c.chunk.contentHash));
+        stats.embeddingsCached +=
+            allChunksWithContext.length - missingChunksWithContext.length;
+        // Phase 3: Embed ALL missing chunks together (with full concurrency)
+        if (missingChunksWithContext.length > 0) {
+            // Track chunks processed for progress updates
+            let lastReportedChunks = 0;
+            // Wire batch progress callback to report incremental chunks
+            if (progressContext?.onChunksProcessed && 'onBatchProgress' in embeddings) {
+                embeddings.onBatchProgress = (processed, _total) => {
+                    // Report only the delta since last update
+                    const delta = processed - lastReportedChunks;
+                    if (delta > 0) {
+                        progressContext.onChunksProcessed(delta);
+                        lastReportedChunks = processed;
+                    }
+                };
+            }
+            // Embed all chunks together
+            const texts = missingChunksWithContext.map(c => c.chunk.contextHeader
+                ? `${c.chunk.contextHeader}\n${c.chunk.text}`
+                : c.chunk.text);
+            const newEmbeddings = await embeddings.embed(texts);
+            stats.embeddingsComputed += missingChunksWithContext.length;
+            // Report any remaining chunks not yet reported
+            const remainingDelta = missingChunksWithContext.length - lastReportedChunks;
+            if (remainingDelta > 0 && progressContext?.onChunksProcessed) {
+                progressContext.onChunksProcessed(remainingDelta);
             }
+            // Clear batch progress callback
+            if ('onBatchProgress' in embeddings) {
+                embeddings.onBatchProgress = undefined;
+            }
+            // Cache the new embeddings
+            const cacheEntries = missingChunksWithContext.map((c, i) => ({
+                contentHash: c.chunk.contentHash,
+                vector: newEmbeddings[i],
+                createdAt: new Date().toISOString(),
+            }));
+            await storage.cacheEmbeddings(cacheEntries);
+            // Add to cachedEmbeddings map
+            missingChunksWithContext.forEach((c, i) => {
+                cachedEmbeddings.set(c.chunk.contentHash, newEmbeddings[i]);
+            });
+        }
+        // Phase 4: Build CodeChunk objects
+        const allChunks = [];
+        for (const { chunk, filepath, fileHash } of allChunksWithContext) {
+            const vector = cachedEmbeddings.get(chunk.contentHash);
+            allChunks.push({
+                id: `${filepath}:${chunk.startLine}`,
+                vector,
+                text: chunk.text,
+                contentHash: chunk.contentHash,
+                filepath,
+                filename: path.basename(filepath),
+                extension: path.extname(filepath),
+                type: chunk.type,
+                name: chunk.name,
+                startLine: chunk.startLine,
+                endLine: chunk.endLine,
+                fileHash,
+                signature: chunk.signature,
+                docstring: chunk.docstring,
+                isExported: chunk.isExported,
+                decoratorNames: chunk.decoratorNames,
+            });
         }
         return allChunks;
     }

package/dist/rag/indexer/types.d.ts CHANGED Viewed

@@ -60,8 +60,13 @@ export interface IndexStats {
 }
 /**
  * Progress callback for indexing operations.
+ * @param current - Current progress count
+ * @param total - Total items (0 for indeterminate)
+ * @param stage - Human-readable stage name
+ * @param throttleMessage - Rate limit message (shown in yellow) or null to clear
+ * @param chunksProcessed - Number of chunks embedded so far
  */
-export type ProgressCallback = (current: number, total: number, stage: string) => void;
+export type ProgressCallback = (current: number, total: number, stage: string, throttleMessage?: string | null, chunksProcessed?: number) => void;
 /**
  * Create empty index stats.
  */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "viberag",
-	"version": "0.3.1",
+	"version": "0.3.2",
 	"description": "Local code RAG for AI coding assistants - semantic search via MCP server",
 	"license": "AGPL-3.0",
 	"keywords": [