npm - viberag - Versions diffs - 0.3.0 → 0.3.2 - Mend

viberag 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +1 -1
package/dist/cli/app.js +8 -1
package/dist/cli/commands/handlers.d.ts +1 -1
package/dist/cli/commands/useRagCommands.js +8 -1
package/dist/common/components/StatusBar.js +14 -1
package/dist/common/types.d.ts +4 -0
package/dist/mcp/server.js +15 -25
package/dist/rag/embeddings/gemini.d.ts +10 -0
package/dist/rag/embeddings/gemini.js +82 -6
package/dist/rag/embeddings/mistral.d.ts +10 -0
package/dist/rag/embeddings/mistral.js +82 -6
package/dist/rag/embeddings/openai.d.ts +10 -0
package/dist/rag/embeddings/openai.js +82 -6
package/dist/rag/gitignore/index.js +55 -2
package/dist/rag/indexer/indexer.d.ts +3 -0
package/dist/rag/indexer/indexer.js +113 -60
package/dist/rag/indexer/types.d.ts +6 -1
package/dist/rag/search/index.js +1 -9
package/dist/rag/search/types.d.ts +0 -4
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -60,7 +60,7 @@ Your coding agent would normally use Search / Grep / Find and guess search terms
 When searching for "authentication", VibeRAG will find all code snippets that are relevant to authentication, such as "login", "logout", "register", and names of functions and classes like `AuthDependency`, `APIKeyCache`, etc.
-This ensures a more exhaustive search of your codebase so you don't miss important files and features that are relevant to your changes or refactor.
+This ensures comprehensive search of your codebase so you don't miss important files and features that are relevant to your changes or refactor.
 ### Great for Monorepos

package/dist/cli/app.js CHANGED Viewed

@@ -148,7 +148,14 @@ export default function App() {
                 total: 0,
                 stage: 'Indexing',
             });
-            const stats = await runIndex(projectRoot, true, (current, total, stage) => setAppStatus({ state: 'indexing', current, total, stage }));
+            const stats = await runIndex(projectRoot, true, (current, total, stage, throttleMessage, chunksProcessed) => setAppStatus({
+                state: 'indexing',
+                current,
+                total,
+                stage,
+                throttleMessage,
+                chunksProcessed,
+            }));
             addOutput('system', formatIndexStats(stats));
             // Reload stats after indexing
             const newStats = await loadIndexStats(projectRoot);

package/dist/cli/commands/handlers.d.ts CHANGED Viewed

@@ -29,7 +29,7 @@ export declare function runInit(projectRoot: string, isReinit?: boolean, wizardC
  * When force=true, also updates config dimensions to match current PROVIDER_CONFIGS
  * (handles dimension changes after viberag upgrades).
  */
-export declare function runIndex(projectRoot: string, force?: boolean, onProgress?: (current: number, total: number, stage: string) => void): Promise<IndexStats>;
+export declare function runIndex(projectRoot: string, force?: boolean, onProgress?: (current: number, total: number, stage: string, throttleMessage?: string | null, chunksProcessed?: number) => void): Promise<IndexStats>;
 /**
  * Format index stats for display.
  */

package/dist/cli/commands/useRagCommands.js CHANGED Viewed

@@ -55,7 +55,14 @@ Manual MCP Setup:
         const action = force ? 'Reindexing' : 'Indexing';
         addOutput('system', `${action} codebase...`);
         setAppStatus({ state: 'indexing', current: 0, total: 0, stage: action });
-        runIndex(projectRoot, force, (current, total, stage) => setAppStatus({ state: 'indexing', current, total, stage }))
+        runIndex(projectRoot, force, (current, total, stage, throttleMessage, chunksProcessed) => setAppStatus({
+            state: 'indexing',
+            current,
+            total,
+            stage,
+            throttleMessage,
+            chunksProcessed,
+        }))
             .then(async (stats) => {
             addOutput('system', formatIndexStats(stats));
             // Reload stats after indexing

package/dist/common/components/StatusBar.js CHANGED Viewed

@@ -25,12 +25,25 @@ function formatStatus(status) {
         case 'ready':
             return { text: 'Ready', color: 'green', showSpinner: false };
         case 'indexing': {
+            // Throttle status takes precedence - show in yellow
+            if (status.throttleMessage) {
+                return {
+                    text: status.throttleMessage,
+                    color: 'yellow',
+                    showSpinner: true,
+                };
+            }
+            // Normal indexing display
             if (status.total === 0) {
                 return { text: `${status.stage}`, color: 'cyan', showSpinner: true };
             }
             const percent = Math.round((status.current / status.total) * 100);
+            // Include chunk count if available
+            const chunkInfo = status.chunksProcessed !== undefined
+                ? ` · ${status.chunksProcessed} chunks`
+                : '';
             return {
-                text: `${status.stage} ${status.current}/${status.total} (${percent}%)`,
+                text: `${status.stage} ${status.current}/${status.total} (${percent}%)${chunkInfo}`,
                 color: 'cyan',
                 showSpinner: true,
             };

package/dist/common/types.d.ts CHANGED Viewed

@@ -61,6 +61,10 @@ export type AppStatus = {
     current: number;
     total: number;
     stage: string;
+    /** Rate limit message (shown in yellow when set) */
+    throttleMessage?: string | null;
+    /** Number of chunks embedded so far */
+    chunksProcessed?: number;
 } | {
     state: 'searching';
 } | {

package/dist/mcp/server.js CHANGED Viewed

@@ -34,14 +34,16 @@ async function ensureInitialized(projectRoot) {
     }
 }
 /**
- * Default maximum response size in bytes (100KB).
+ * Default maximum response size in bytes (50KB).
  * Reduces result count to fit; does NOT truncate text.
+ * Note: Claude Code has ~25K token limit (~100KB), so 50KB default leaves headroom.
  */
-const DEFAULT_MAX_RESPONSE_SIZE = 100 * 1024;
+const DEFAULT_MAX_RESPONSE_SIZE = 50 * 1024;
 /**
- * Maximum allowed response size (500KB).
+ * Maximum allowed response size (100KB).
+ * Hard cap to prevent token overflow in AI tools.
  */
-const MAX_RESPONSE_SIZE = 500 * 1024;
+const MAX_RESPONSE_SIZE = 100 * 1024;
 /**
  * Overhead per result in JSON (metadata fields, formatting).
  */
@@ -129,10 +131,6 @@ function formatSearchResults(results, includeDebug = false, maxResponseSize = DE
         response['originalResultCount'] = results.results.length;
         response['reducedForSize'] = true;
     }
-    // Add totalMatches for exhaustive mode
-    if (results.totalMatches !== undefined) {
-        response['totalMatches'] = results.totalMatches;
-    }
     // Add debug info for AI evaluation
     if (includeDebug && results.debug) {
         response['debug'] = formatDebugInfo(results.debug);
@@ -241,9 +239,9 @@ export function createMcpServer(projectRoot) {
     server.addTool({
         name: 'codebase_search',
         description: `
-Codebase search: semantic search, keyword search, and hybrid search options.
-Use this when you need to find code that matches semantic meaning and keyword patterns.
-This tool helps you perform exhaustive searches of the codebase and get the best
+Codebase search: semantic search, keyword search, and hybrid search options.
+Use this when you need to find code that matches semantic meaning and keyword patterns.
+This tool helps you perform comprehensive searches of the codebase and get the best
 context and understanding when exploring and searching the codebase, docs, etc.
 USE FOR CODEBASE EXPLORATION:
@@ -299,8 +297,7 @@ For thorough searches, consider:
 1. Start with hybrid mode, default weights
 2. Check debug info to evaluate search quality
 3. If maxVectorScore < 0.3, try exact mode or higher bm25_weight
-4. If results seem incomplete, try codebase_parallel_search for comparison
-5. Use exhaustive=true for refactoring tasks needing ALL matches
+4. If results seem incomplete, run more searches and try codebase_parallel_search for comparison
 RESULT INTERPRETATION:
 - score: Combined relevance (higher = better)
@@ -351,11 +348,6 @@ Production code: { path_not_contains: ["test", "mock", "fixture"], is_exported:
                 .optional()
                 .default(10)
                 .describe('Maximum number of results (1-100, default: 10)'),
-            exhaustive: z
-                .boolean()
-                .optional()
-                .default(false)
-                .describe('Return all matches (for refactoring/auditing)'),
             min_score: z
                 .number()
                 .min(0)
@@ -398,9 +390,8 @@ Production code: { path_not_contains: ["test", "mock", "fixture"], is_exported:
                 .max(MAX_RESPONSE_SIZE)
                 .optional()
                 .default(DEFAULT_MAX_RESPONSE_SIZE)
-                .describe('Maximum response size in bytes (default: 100KB, max: 500KB). ' +
-                'Reduces result count to fit within limit; does NOT truncate text content. ' +
-                'Use a larger value for exhaustive searches.'),
+                .describe('Maximum response size in bytes (default: 50KB, max: 100KB). ' +
+                'Reduces result count to fit within limit; does NOT truncate text content.'),
         }),
         execute: async (args) => {
             await ensureInitialized(projectRoot);
@@ -425,7 +416,6 @@ Production code: { path_not_contains: ["test", "mock", "fixture"], is_exported:
             const results = await engine.search(args.query, {
                 mode: args.mode,
                 limit: args.limit,
-                exhaustive: args.exhaustive,
                 minScore: args.min_score,
                 filters,
                 codeSnippet: args.code_snippet,
@@ -545,9 +535,9 @@ Production code: { path_not_contains: ["test", "mock", "fixture"], is_exported:
     server.addTool({
         name: 'codebase_parallel_search',
         description: `
-Codebase Parallel Search: run multiple semantic search, keyword search, and hybrid searches in parallel and compare results.
-Use this when you need to run multiple searches at once to find code that matches semantic meaning and keyword patterns.
-This tool helps you perform exhaustive searches of the codebase and get the best
+Codebase Parallel Search: run multiple semantic search, keyword search, and hybrid searches in parallel and compare results.
+Use this when you need to run multiple searches at once to find code that matches semantic meaning and keyword patterns.
+This tool helps you perform comprehensive searches of the codebase and get the best
 context and understanding when exploring and searching the codebase, docs, etc.
 NOTE: This is for narrower sets of queries. Parallel searches may return a large number of results,

package/dist/rag/embeddings/gemini.d.ts CHANGED Viewed

@@ -17,9 +17,19 @@ export declare class GeminiEmbeddingProvider implements EmbeddingProvider {
     readonly dimensions = 1536;
     private apiKey;
     private initialized;
+    onThrottle?: (message: string | null) => void;
+    onBatchProgress?: (processed: number, total: number) => void;
     constructor(apiKey?: string);
     initialize(_onProgress?: ModelProgressCallback): Promise<void>;
     embed(texts: string[]): Promise<number[][]>;
+    /**
+     * Embed a batch with exponential backoff retry on rate limit errors.
+     */
+    private embedBatchWithRetry;
+    /**
+     * Check if an error is a rate limit error (429 or quota exceeded).
+     */
+    private isRateLimitError;
     private embedBatch;
     embedSingle(text: string): Promise<number[]>;
     close(): void;

package/dist/rag/embeddings/gemini.js CHANGED Viewed

@@ -10,7 +10,17 @@
  */
 const GEMINI_API_BASE = 'https://generativelanguage.googleapis.com/v1beta/models';
 const MODEL = 'gemini-embedding-001';
-const BATCH_SIZE = 100; // Gemini supports up to 100 texts per request
+// Gemini limits: 2,048 tokens/text, 20,000 tokens/batch, 100-250 texts/batch
+// With avg ~1000 tokens/chunk, safe limit is 20 texts.
+const BATCH_SIZE = 20;
+// Concurrency and rate limiting
+const CONCURRENCY = 5; // Max concurrent API requests
+const MAX_RETRIES = 12; // Max retry attempts on rate limit
+const INITIAL_BACKOFF_MS = 1000; // Start at 1s
+const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
+function sleep(ms) {
+    return new Promise(resolve => setTimeout(resolve, ms));
+}
 /**
  * Gemini embedding provider.
  * Uses gemini-embedding-001 model via Google's Generative AI API.
@@ -35,6 +45,20 @@ export class GeminiEmbeddingProvider {
             writable: true,
             value: false
         });
+        // Callback for rate limit throttling - message or null to clear
+        Object.defineProperty(this, "onThrottle", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        // Callback for batch progress - (processed, total) chunks
+        Object.defineProperty(this, "onBatchProgress", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
         // Trim the key to remove any accidental whitespace
         this.apiKey = (apiKey ?? '').trim();
     }
@@ -51,15 +75,67 @@ export class GeminiEmbeddingProvider {
         if (texts.length === 0) {
             return [];
         }
-        const results = [];
-        // Process in batches
+        // Split into batches
+        const batches = [];
         for (let i = 0; i < texts.length; i += BATCH_SIZE) {
-            const batch = texts.slice(i, i + BATCH_SIZE);
-            const batchResults = await this.embedBatch(batch);
-            results.push(...batchResults);
+            batches.push(texts.slice(i, i + BATCH_SIZE));
+        }
+        // Process batches with limited concurrency
+        const results = [];
+        let completed = 0;
+        for (let i = 0; i < batches.length; i += CONCURRENCY) {
+            const concurrentBatches = batches.slice(i, i + CONCURRENCY);
+            // Fire concurrent requests
+            const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
+            // Flatten and collect results (Promise.all preserves order)
+            for (const result of batchResults) {
+                results.push(...result);
+            }
+            // Report progress after concurrent group completes
+            completed += concurrentBatches.length;
+            const processed = Math.min(completed * BATCH_SIZE, texts.length);
+            this.onBatchProgress?.(processed, texts.length);
         }
         return results;
     }
+    /**
+     * Embed a batch with exponential backoff retry on rate limit errors.
+     */
+    async embedBatchWithRetry(batch) {
+        let attempt = 0;
+        let backoffMs = INITIAL_BACKOFF_MS;
+        while (true) {
+            try {
+                const result = await this.embedBatch(batch);
+                // Clear throttle message on success (if was throttling)
+                if (attempt > 0)
+                    this.onThrottle?.(null);
+                return result;
+            }
+            catch (error) {
+                if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
+                    attempt++;
+                    const secs = Math.round(backoffMs / 1000);
+                    this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
+                    await sleep(backoffMs);
+                    backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
+                }
+                else {
+                    throw error;
+                }
+            }
+        }
+    }
+    /**
+     * Check if an error is a rate limit error (429 or quota exceeded).
+     */
+    isRateLimitError(error) {
+        if (error instanceof Error) {
+            const msg = error.message.toLowerCase();
+            return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
+        }
+        return false;
+    }
     async embedBatch(texts) {
         const url = `${GEMINI_API_BASE}/${MODEL}:batchEmbedContents`;
         const response = await fetch(url, {

package/dist/rag/embeddings/mistral.d.ts CHANGED Viewed

@@ -13,9 +13,19 @@ export declare class MistralEmbeddingProvider implements EmbeddingProvider {
     readonly dimensions = 1536;
     private apiKey;
     private initialized;
+    onThrottle?: (message: string | null) => void;
+    onBatchProgress?: (processed: number, total: number) => void;
     constructor(apiKey?: string);
     initialize(_onProgress?: ModelProgressCallback): Promise<void>;
     embed(texts: string[]): Promise<number[][]>;
+    /**
+     * Embed a batch with exponential backoff retry on rate limit errors.
+     */
+    private embedBatchWithRetry;
+    /**
+     * Check if an error is a rate limit error (429 or quota exceeded).
+     */
+    private isRateLimitError;
     private embedBatch;
     embedSingle(text: string): Promise<number[]>;
     close(): void;

package/dist/rag/embeddings/mistral.js CHANGED Viewed

@@ -6,7 +6,17 @@
  */
 const MISTRAL_API_BASE = 'https://api.mistral.ai/v1';
 const MODEL = 'codestral-embed';
-const BATCH_SIZE = 64; // Mistral supports batching
+// Mistral limits: 8,192 tokens/text, 16,000 tokens/batch TOTAL
+// With avg ~500 tokens/chunk, can fit ~32. Use 24 for safety margin.
+const BATCH_SIZE = 24;
+// Concurrency and rate limiting
+const CONCURRENCY = 5; // Max concurrent API requests
+const MAX_RETRIES = 12; // Max retry attempts on rate limit
+const INITIAL_BACKOFF_MS = 1000; // Start at 1s
+const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
+function sleep(ms) {
+    return new Promise(resolve => setTimeout(resolve, ms));
+}
 /**
  * Mistral embedding provider.
  * Uses codestral-embed model via Mistral AI API.
@@ -31,6 +41,20 @@ export class MistralEmbeddingProvider {
             writable: true,
             value: false
         });
+        // Callback for rate limit throttling - message or null to clear
+        Object.defineProperty(this, "onThrottle", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        // Callback for batch progress - (processed, total) chunks
+        Object.defineProperty(this, "onBatchProgress", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
         // Trim the key to remove any accidental whitespace
         this.apiKey = (apiKey ?? '').trim();
     }
@@ -47,15 +71,67 @@ export class MistralEmbeddingProvider {
         if (texts.length === 0) {
             return [];
         }
-        const results = [];
-        // Process in batches
+        // Split into batches
+        const batches = [];
         for (let i = 0; i < texts.length; i += BATCH_SIZE) {
-            const batch = texts.slice(i, i + BATCH_SIZE);
-            const batchResults = await this.embedBatch(batch);
-            results.push(...batchResults);
+            batches.push(texts.slice(i, i + BATCH_SIZE));
+        }
+        // Process batches with limited concurrency
+        const results = [];
+        let completed = 0;
+        for (let i = 0; i < batches.length; i += CONCURRENCY) {
+            const concurrentBatches = batches.slice(i, i + CONCURRENCY);
+            // Fire concurrent requests
+            const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
+            // Flatten and collect results (Promise.all preserves order)
+            for (const result of batchResults) {
+                results.push(...result);
+            }
+            // Report progress after concurrent group completes
+            completed += concurrentBatches.length;
+            const processed = Math.min(completed * BATCH_SIZE, texts.length);
+            this.onBatchProgress?.(processed, texts.length);
         }
         return results;
     }
+    /**
+     * Embed a batch with exponential backoff retry on rate limit errors.
+     */
+    async embedBatchWithRetry(batch) {
+        let attempt = 0;
+        let backoffMs = INITIAL_BACKOFF_MS;
+        while (true) {
+            try {
+                const result = await this.embedBatch(batch);
+                // Clear throttle message on success (if was throttling)
+                if (attempt > 0)
+                    this.onThrottle?.(null);
+                return result;
+            }
+            catch (error) {
+                if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
+                    attempt++;
+                    const secs = Math.round(backoffMs / 1000);
+                    this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
+                    await sleep(backoffMs);
+                    backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
+                }
+                else {
+                    throw error;
+                }
+            }
+        }
+    }
+    /**
+     * Check if an error is a rate limit error (429 or quota exceeded).
+     */
+    isRateLimitError(error) {
+        if (error instanceof Error) {
+            const msg = error.message.toLowerCase();
+            return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
+        }
+        return false;
+    }
     async embedBatch(texts) {
         const response = await fetch(`${MISTRAL_API_BASE}/embeddings`, {
             method: 'POST',

package/dist/rag/embeddings/openai.d.ts CHANGED Viewed

@@ -13,9 +13,19 @@ export declare class OpenAIEmbeddingProvider implements EmbeddingProvider {
     readonly dimensions = 1536;
     private apiKey;
     private initialized;
+    onThrottle?: (message: string | null) => void;
+    onBatchProgress?: (processed: number, total: number) => void;
     constructor(apiKey?: string);
     initialize(_onProgress?: ModelProgressCallback): Promise<void>;
     embed(texts: string[]): Promise<number[][]>;
+    /**
+     * Embed a batch with exponential backoff retry on rate limit errors.
+     */
+    private embedBatchWithRetry;
+    /**
+     * Check if an error is a rate limit error (429 or quota exceeded).
+     */
+    private isRateLimitError;
     private embedBatch;
     embedSingle(text: string): Promise<number[]>;
     close(): void;

package/dist/rag/embeddings/openai.js CHANGED Viewed

@@ -6,7 +6,17 @@
  */
 const OPENAI_API_BASE = 'https://api.openai.com/v1';
 const MODEL = 'text-embedding-3-small';
-const BATCH_SIZE = 2048; // OpenAI supports up to 2048 texts per request
+// OpenAI limits: 8,191 tokens/text, 300,000 tokens/batch, 2,048 texts/batch
+// With avg ~1000 tokens/chunk, safe limit is 300 texts. Use 256 for margin.
+const BATCH_SIZE = 256;
+// Concurrency and rate limiting
+const CONCURRENCY = 5; // Max concurrent API requests
+const MAX_RETRIES = 12; // Max retry attempts on rate limit
+const INITIAL_BACKOFF_MS = 1000; // Start at 1s
+const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
+function sleep(ms) {
+    return new Promise(resolve => setTimeout(resolve, ms));
+}
 /**
  * OpenAI embedding provider.
  * Uses text-embedding-3-small model via OpenAI API.
@@ -31,6 +41,20 @@ export class OpenAIEmbeddingProvider {
             writable: true,
             value: false
         });
+        // Callback for rate limit throttling - message or null to clear
+        Object.defineProperty(this, "onThrottle", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        // Callback for batch progress - (processed, total) chunks
+        Object.defineProperty(this, "onBatchProgress", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
         // Trim the key to remove any accidental whitespace
         this.apiKey = (apiKey ?? '').trim();
     }
@@ -51,15 +75,67 @@ export class OpenAIEmbeddingProvider {
         if (texts.length === 0) {
             return [];
         }
-        const results = [];
-        // Process in batches
+        // Split into batches
+        const batches = [];
         for (let i = 0; i < texts.length; i += BATCH_SIZE) {
-            const batch = texts.slice(i, i + BATCH_SIZE);
-            const batchResults = await this.embedBatch(batch);
-            results.push(...batchResults);
+            batches.push(texts.slice(i, i + BATCH_SIZE));
+        }
+        // Process batches with limited concurrency
+        const results = [];
+        let completed = 0;
+        for (let i = 0; i < batches.length; i += CONCURRENCY) {
+            const concurrentBatches = batches.slice(i, i + CONCURRENCY);
+            // Fire concurrent requests
+            const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
+            // Flatten and collect results (Promise.all preserves order)
+            for (const result of batchResults) {
+                results.push(...result);
+            }
+            // Report progress after concurrent group completes
+            completed += concurrentBatches.length;
+            const processed = Math.min(completed * BATCH_SIZE, texts.length);
+            this.onBatchProgress?.(processed, texts.length);
         }
         return results;
     }
+    /**
+     * Embed a batch with exponential backoff retry on rate limit errors.
+     */
+    async embedBatchWithRetry(batch) {
+        let attempt = 0;
+        let backoffMs = INITIAL_BACKOFF_MS;
+        while (true) {
+            try {
+                const result = await this.embedBatch(batch);
+                // Clear throttle message on success (if was throttling)
+                if (attempt > 0)
+                    this.onThrottle?.(null);
+                return result;
+            }
+            catch (error) {
+                if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
+                    attempt++;
+                    const secs = Math.round(backoffMs / 1000);
+                    this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
+                    await sleep(backoffMs);
+                    backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
+                }
+                else {
+                    throw error;
+                }
+            }
+        }
+    }
+    /**
+     * Check if an error is a rate limit error (429 or quota exceeded).
+     */
+    isRateLimitError(error) {
+        if (error instanceof Error) {
+            const msg = error.message.toLowerCase();
+            return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
+        }
+        return false;
+    }
     async embedBatch(texts) {
         const response = await fetch(`${OPENAI_API_BASE}/embeddings`, {
             method: 'POST',

package/dist/rag/gitignore/index.js CHANGED Viewed

@@ -19,6 +19,47 @@ const ALWAYS_IGNORED = [
     '.viberag',
     'node_modules', // Fallback in case not in .gitignore
 ];
+/**
+ * Lock files that should always be ignored.
+ * These are machine-generated and provide no value for code search.
+ */
+const ALWAYS_IGNORED_FILES = [
+    // JavaScript/TypeScript
+    'package-lock.json', // npm
+    'yarn.lock', // Yarn
+    'pnpm-lock.yaml', // pnpm
+    'bun.lockb', // Bun
+    // Python
+    'uv.lock', // UV
+    'poetry.lock', // Poetry
+    'Pipfile.lock', // Pipenv
+    // Ruby
+    'Gemfile.lock', // Bundler
+    // PHP
+    'composer.lock', // Composer
+    // Rust
+    'Cargo.lock', // Cargo
+    // Go
+    'go.sum', // Go modules
+    // Java/Kotlin
+    'gradle.lockfile', // Gradle
+    // C#/.NET
+    'packages.lock.json', // NuGet
+    // Dart
+    'pubspec.lock', // Pub
+    // Swift
+    'Package.resolved', // Swift PM
+];
+/**
+ * File patterns that should always be ignored.
+ * These are build artifacts with no semantic value for code search.
+ */
+const ALWAYS_IGNORED_PATTERNS = [
+    '*.min.js', // Minified JavaScript
+    '*.min.css', // Minified CSS
+    '*.map', // Source maps
+    '*.d.ts.map', // TypeScript declaration maps
+];
 /**
  * Cache of Ignore instances per project root.
  */
@@ -37,8 +78,12 @@ export async function loadGitignore(projectRoot) {
         return cached;
     }
     const ig = ignore();
-    // Add always-ignored patterns
+    // Add always-ignored patterns (directories)
     ig.add(ALWAYS_IGNORED);
+    // Add always-ignored files (lock files)
+    ig.add(ALWAYS_IGNORED_FILES);
+    // Add always-ignored file patterns (minified, maps)
+    ig.add(ALWAYS_IGNORED_PATTERNS);
     // Try to load .gitignore
     const gitignorePath = path.join(projectRoot, '.gitignore');
     try {
@@ -99,8 +144,16 @@ export function clearAllGitignoreCache() {
  */
 export async function getGlobIgnorePatterns(projectRoot) {
     const patterns = [];
-    // Always exclude these (same as ALWAYS_IGNORED)
+    // Always exclude these directories (same as ALWAYS_IGNORED)
     patterns.push('**/.git/**', '**/.viberag/**', '**/node_modules/**');
+    // Always exclude lock files (same as ALWAYS_IGNORED_FILES)
+    for (const file of ALWAYS_IGNORED_FILES) {
+        patterns.push(`**/${file}`);
+    }
+    // Always exclude file patterns (minified, maps)
+    for (const pattern of ALWAYS_IGNORED_PATTERNS) {
+        patterns.push(`**/${pattern}`);
+    }
     // Try to load .gitignore
     const gitignorePath = path.join(projectRoot, '.gitignore');
     try {

package/dist/rag/indexer/indexer.d.ts CHANGED Viewed

@@ -55,6 +55,9 @@ export declare class Indexer {
     /**
      * Process a batch of files: read, chunk, embed, and prepare CodeChunks.
      *
+     * Strategy: Collect all chunks from all files first, then embed them
+     * together with full concurrency for maximum throughput.
+     *
      * Error handling strategy:
      * - File read/parse errors: Log and continue (file-specific, recoverable)
      * - Embedding/storage errors: Let propagate (fatal, affects all files)

package/dist/rag/indexer/indexer.js CHANGED Viewed

@@ -142,6 +142,17 @@ export class Indexer {
             // 6. Process new and modified files
             const filesToProcess = [...diff.new, ...diff.modified];
             const totalFiles = filesToProcess.length;
+            // Track cumulative chunks for progress display
+            let totalChunksProcessed = 0;
+            let lastProgress = 0;
+            // Wire throttle callback for rate limit feedback (API providers only)
+            if ('onThrottle' in embeddings) {
+                embeddings.onThrottle =
+                    message => {
+                        // Pass throttle message to UI - shown in yellow when set
+                        progressCallback?.(lastProgress, totalFiles, 'Indexing files', message, totalChunksProcessed);
+                    };
+            }
             if (totalFiles > 0) {
                 this.log('info', `Processing ${totalFiles} files`);
                 // First, delete existing chunks for modified files
@@ -153,7 +164,15 @@ export class Indexer {
                 const batchSize = 10;
                 for (let i = 0; i < filesToProcess.length; i += batchSize) {
                     const batch = filesToProcess.slice(i, i + batchSize);
-                    const batchChunks = await this.processFileBatch(batch, chunker, embeddings, storage, stats);
+                    const batchChunks = await this.processFileBatch(batch, chunker, embeddings, storage, stats, {
+                        totalFiles,
+                        currentFileOffset: i,
+                        progressCallback,
+                        onChunksProcessed: (count) => {
+                            totalChunksProcessed += count;
+                            progressCallback?.(i, totalFiles, 'Indexing files', null, totalChunksProcessed);
+                        },
+                    });
                     if (batchChunks.length > 0) {
                         // Use addChunks after table reset to avoid schema mismatch,
                         // upsertChunks for normal incremental updates
@@ -166,7 +185,8 @@ export class Indexer {
                         stats.chunksAdded += batchChunks.length;
                     }
                     const progress = Math.round(((i + batch.length) / totalFiles) * 100);
-                    progressCallback?.(i + batch.length, totalFiles, 'Indexing files');
+                    lastProgress = i + batch.length;
+                    progressCallback?.(i + batch.length, totalFiles, 'Indexing files', null, totalChunksProcessed);
                     this.log('debug', `Progress: ${progress}% (${i + batch.length}/${totalFiles})`);
                 }
             }
@@ -218,79 +238,112 @@ export class Indexer {
     /**
      * Process a batch of files: read, chunk, embed, and prepare CodeChunks.
      *
+     * Strategy: Collect all chunks from all files first, then embed them
+     * together with full concurrency for maximum throughput.
+     *
      * Error handling strategy:
      * - File read/parse errors: Log and continue (file-specific, recoverable)
      * - Embedding/storage errors: Let propagate (fatal, affects all files)
      */
-    async processFileBatch(filepaths, chunker, embeddings, storage, stats) {
-        const allChunks = [];
+    async processFileBatch(filepaths, chunker, embeddings, storage, stats, progressContext) {
+        const fileDataList = [];
         for (const filepath of filepaths) {
-            // Phase 1: File reading and chunking (recoverable errors)
-            let content;
-            let fileHash;
-            let chunks;
             try {
                 const absolutePath = path.join(this.projectRoot, filepath);
-                content = await fs.readFile(absolutePath, 'utf-8');
-                fileHash = (await import('../merkle/hash.js')).computeStringHash(content);
-                // Chunk the file (with size limits from config)
-                chunks = await chunker.chunkFile(filepath, content, this.config.chunkMaxSize);
+                const content = await fs.readFile(absolutePath, 'utf-8');
+                const fileHash = (await import('../merkle/hash.js')).computeStringHash(content);
+                const chunks = await chunker.chunkFile(filepath, content, this.config.chunkMaxSize);
+                fileDataList.push({ filepath, fileHash, chunks });
             }
             catch (error) {
-                // File-specific error (read/parse) - log and continue with other files
                 this.log('warn', `Failed to read/parse file: ${filepath}`, error);
                 continue;
             }
-            // Phase 2: Embedding and storage (fatal errors - let propagate)
-            // NO try-catch here - API/storage errors should stop indexing
-            // Check embedding cache for each chunk
-            const contentHashes = chunks.map(c => c.contentHash);
-            const cachedEmbeddings = await storage.getCachedEmbeddings(contentHashes);
-            // Compute embeddings for cache misses
-            const missingChunks = chunks.filter(c => !cachedEmbeddings.has(c.contentHash));
-            if (missingChunks.length > 0) {
-                // Embed contextHeader + text for semantic relevance
-                const texts = missingChunks.map(c => c.contextHeader ? `${c.contextHeader}\n${c.text}` : c.text);
-                const newEmbeddings = await embeddings.embed(texts);
-                stats.embeddingsComputed += missingChunks.length;
-                // Cache the new embeddings
-                const cacheEntries = missingChunks.map((chunk, i) => ({
-                    contentHash: chunk.contentHash,
-                    vector: newEmbeddings[i],
-                    createdAt: new Date().toISOString(),
-                }));
-                await storage.cacheEmbeddings(cacheEntries);
-                // Add to cachedEmbeddings map
-                missingChunks.forEach((chunk, i) => {
-                    cachedEmbeddings.set(chunk.contentHash, newEmbeddings[i]);
+        }
+        const allChunksWithContext = [];
+        for (const fd of fileDataList) {
+            for (const chunk of fd.chunks) {
+                allChunksWithContext.push({
+                    chunk,
+                    filepath: fd.filepath,
+                    fileHash: fd.fileHash,
                 });
             }
-            stats.embeddingsCached += chunks.length - missingChunks.length;
-            // Build CodeChunk objects
-            const filename = path.basename(filepath);
-            const extension = path.extname(filepath);
-            for (const chunk of chunks) {
-                const vector = cachedEmbeddings.get(chunk.contentHash);
-                allChunks.push({
-                    id: `${filepath}:${chunk.startLine}`,
-                    vector,
-                    text: chunk.text,
-                    contentHash: chunk.contentHash,
-                    filepath,
-                    filename,
-                    extension,
-                    type: chunk.type,
-                    name: chunk.name,
-                    startLine: chunk.startLine,
-                    endLine: chunk.endLine,
-                    fileHash,
-                    // New metadata fields from schema v2
-                    signature: chunk.signature,
-                    docstring: chunk.docstring,
-                    isExported: chunk.isExported,
-                    decoratorNames: chunk.decoratorNames,
-                });
+        }
+        if (allChunksWithContext.length === 0) {
+            return [];
+        }
+        // Phase 2: Check embedding cache for ALL chunks at once
+        const contentHashes = allChunksWithContext.map(c => c.chunk.contentHash);
+        const cachedEmbeddings = await storage.getCachedEmbeddings(contentHashes);
+        // Find all cache misses
+        const missingChunksWithContext = allChunksWithContext.filter(c => !cachedEmbeddings.has(c.chunk.contentHash));
+        stats.embeddingsCached +=
+            allChunksWithContext.length - missingChunksWithContext.length;
+        // Phase 3: Embed ALL missing chunks together (with full concurrency)
+        if (missingChunksWithContext.length > 0) {
+            // Track chunks processed for progress updates
+            let lastReportedChunks = 0;
+            // Wire batch progress callback to report incremental chunks
+            if (progressContext?.onChunksProcessed && 'onBatchProgress' in embeddings) {
+                embeddings.onBatchProgress = (processed, _total) => {
+                    // Report only the delta since last update
+                    const delta = processed - lastReportedChunks;
+                    if (delta > 0) {
+                        progressContext.onChunksProcessed(delta);
+                        lastReportedChunks = processed;
+                    }
+                };
+            }
+            // Embed all chunks together
+            const texts = missingChunksWithContext.map(c => c.chunk.contextHeader
+                ? `${c.chunk.contextHeader}\n${c.chunk.text}`
+                : c.chunk.text);
+            const newEmbeddings = await embeddings.embed(texts);
+            stats.embeddingsComputed += missingChunksWithContext.length;
+            // Report any remaining chunks not yet reported
+            const remainingDelta = missingChunksWithContext.length - lastReportedChunks;
+            if (remainingDelta > 0 && progressContext?.onChunksProcessed) {
+                progressContext.onChunksProcessed(remainingDelta);
             }
+            // Clear batch progress callback
+            if ('onBatchProgress' in embeddings) {
+                embeddings.onBatchProgress = undefined;
+            }
+            // Cache the new embeddings
+            const cacheEntries = missingChunksWithContext.map((c, i) => ({
+                contentHash: c.chunk.contentHash,
+                vector: newEmbeddings[i],
+                createdAt: new Date().toISOString(),
+            }));
+            await storage.cacheEmbeddings(cacheEntries);
+            // Add to cachedEmbeddings map
+            missingChunksWithContext.forEach((c, i) => {
+                cachedEmbeddings.set(c.chunk.contentHash, newEmbeddings[i]);
+            });
+        }
+        // Phase 4: Build CodeChunk objects
+        const allChunks = [];
+        for (const { chunk, filepath, fileHash } of allChunksWithContext) {
+            const vector = cachedEmbeddings.get(chunk.contentHash);
+            allChunks.push({
+                id: `${filepath}:${chunk.startLine}`,
+                vector,
+                text: chunk.text,
+                contentHash: chunk.contentHash,
+                filepath,
+                filename: path.basename(filepath),
+                extension: path.extname(filepath),
+                type: chunk.type,
+                name: chunk.name,
+                startLine: chunk.startLine,
+                endLine: chunk.endLine,
+                fileHash,
+                signature: chunk.signature,
+                docstring: chunk.docstring,
+                isExported: chunk.isExported,
+                decoratorNames: chunk.decoratorNames,
+            });
         }
         return allChunks;
     }

package/dist/rag/indexer/types.d.ts CHANGED Viewed

@@ -60,8 +60,13 @@ export interface IndexStats {
 }
 /**
  * Progress callback for indexing operations.
+ * @param current - Current progress count
+ * @param total - Total items (0 for indeterminate)
+ * @param stage - Human-readable stage name
+ * @param throttleMessage - Rate limit message (shown in yellow) or null to clear
+ * @param chunksProcessed - Number of chunks embedded so far
  */
-export type ProgressCallback = (current: number, total: number, stage: string) => void;
+export type ProgressCallback = (current: number, total: number, stage: string, throttleMessage?: string | null, chunksProcessed?: number) => void;
 /**
  * Create empty index stats.
  */

package/dist/rag/search/index.js CHANGED Viewed

@@ -20,8 +20,6 @@ export { ftsSearch, ensureFtsIndex } from './fts.js';
 export { hybridRerank } from './hybrid.js';
 /** Default search limit */
 const DEFAULT_LIMIT = 10;
-/** Exhaustive mode limit (high but bounded) */
-const EXHAUSTIVE_LIMIT = 500;
 /** Default BM25 weight for hybrid search */
 const DEFAULT_BM25_WEIGHT = 0.3;
 /** Default oversample multiplier for hybrid search */
@@ -79,9 +77,7 @@ export class SearchEngine {
     async search(query, options = {}) {
         const start = Date.now();
         const mode = options.mode ?? 'hybrid';
-        const limit = options.exhaustive
-            ? EXHAUSTIVE_LIMIT
-            : (options.limit ?? DEFAULT_LIMIT);
+        const limit = options.limit ?? DEFAULT_LIMIT;
         const filterClause = buildFilterClause(options.filters);
         await this.ensureInitialized();
         const table = await this.getTable();
@@ -104,10 +100,6 @@ export class SearchEngine {
                 results = await this.searchHybrid(table, query, limit, options.bm25Weight ?? DEFAULT_BM25_WEIGHT, filterClause, options.minScore, options.autoBoost ?? true, options.autoBoostThreshold ?? 0.3, options.returnDebug ?? false);
                 break;
         }
-        // Add total matches for exhaustive mode
-        if (options.exhaustive) {
-            results.totalMatches = results.results.length;
-        }
         results.elapsedMs = Date.now() - start;
         return results;
     }

package/dist/rag/search/types.d.ts CHANGED Viewed

@@ -74,8 +74,6 @@ export interface SearchResults {
     searchType: SearchMode;
     /** Time taken in milliseconds */
     elapsedMs: number;
-    /** Total matches (when exhaustive=true) */
-    totalMatches?: number;
     /** Debug info for hybrid search (when return_debug=true) */
     debug?: SearchDebugInfo;
 }
@@ -111,8 +109,6 @@ export interface SearchOptions {
     limit?: number;
     /** Weight for BM25 in hybrid search (0.0-1.0, default: 0.3) */
     bm25Weight?: number;
-    /** Return all matches above threshold (default: false) */
-    exhaustive?: boolean;
     /** Minimum score threshold 0-1 (default: 0) */
     minScore?: number;
     /** Transparent filters */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "viberag",
-	"version": "0.3.0",
+	"version": "0.3.2",
 	"description": "Local code RAG for AI coding assistants - semantic search via MCP server",
 	"license": "AGPL-3.0",
 	"keywords": [