viberag 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -60,7 +60,7 @@ Your coding agent would normally use Search / Grep / Find and guess search terms
60
60
 
61
61
  When searching for "authentication", VibeRAG will find all code snippets that are relevant to authentication, such as "login", "logout", "register", and names of functions and classes like `AuthDependency`, `APIKeyCache`, etc.
62
62
 
63
- This ensures a more exhaustive search of your codebase so you don't miss important files and features that are relevant to your changes or refactor.
63
+ This ensures comprehensive search of your codebase so you don't miss important files and features that are relevant to your changes or refactor.
64
64
 
65
65
  ### Great for Monorepos
66
66
 
package/dist/cli/app.js CHANGED
@@ -148,7 +148,14 @@ export default function App() {
148
148
  total: 0,
149
149
  stage: 'Indexing',
150
150
  });
151
- const stats = await runIndex(projectRoot, true, (current, total, stage) => setAppStatus({ state: 'indexing', current, total, stage }));
151
+ const stats = await runIndex(projectRoot, true, (current, total, stage, throttleMessage, chunksProcessed) => setAppStatus({
152
+ state: 'indexing',
153
+ current,
154
+ total,
155
+ stage,
156
+ throttleMessage,
157
+ chunksProcessed,
158
+ }));
152
159
  addOutput('system', formatIndexStats(stats));
153
160
  // Reload stats after indexing
154
161
  const newStats = await loadIndexStats(projectRoot);
@@ -29,7 +29,7 @@ export declare function runInit(projectRoot: string, isReinit?: boolean, wizardC
29
29
  * When force=true, also updates config dimensions to match current PROVIDER_CONFIGS
30
30
  * (handles dimension changes after viberag upgrades).
31
31
  */
32
- export declare function runIndex(projectRoot: string, force?: boolean, onProgress?: (current: number, total: number, stage: string) => void): Promise<IndexStats>;
32
+ export declare function runIndex(projectRoot: string, force?: boolean, onProgress?: (current: number, total: number, stage: string, throttleMessage?: string | null, chunksProcessed?: number) => void): Promise<IndexStats>;
33
33
  /**
34
34
  * Format index stats for display.
35
35
  */
@@ -55,7 +55,14 @@ Manual MCP Setup:
55
55
  const action = force ? 'Reindexing' : 'Indexing';
56
56
  addOutput('system', `${action} codebase...`);
57
57
  setAppStatus({ state: 'indexing', current: 0, total: 0, stage: action });
58
- runIndex(projectRoot, force, (current, total, stage) => setAppStatus({ state: 'indexing', current, total, stage }))
58
+ runIndex(projectRoot, force, (current, total, stage, throttleMessage, chunksProcessed) => setAppStatus({
59
+ state: 'indexing',
60
+ current,
61
+ total,
62
+ stage,
63
+ throttleMessage,
64
+ chunksProcessed,
65
+ }))
59
66
  .then(async (stats) => {
60
67
  addOutput('system', formatIndexStats(stats));
61
68
  // Reload stats after indexing
@@ -25,12 +25,25 @@ function formatStatus(status) {
25
25
  case 'ready':
26
26
  return { text: 'Ready', color: 'green', showSpinner: false };
27
27
  case 'indexing': {
28
+ // Throttle status takes precedence - show in yellow
29
+ if (status.throttleMessage) {
30
+ return {
31
+ text: status.throttleMessage,
32
+ color: 'yellow',
33
+ showSpinner: true,
34
+ };
35
+ }
36
+ // Normal indexing display
28
37
  if (status.total === 0) {
29
38
  return { text: `${status.stage}`, color: 'cyan', showSpinner: true };
30
39
  }
31
40
  const percent = Math.round((status.current / status.total) * 100);
41
+ // Include chunk count if available
42
+ const chunkInfo = status.chunksProcessed !== undefined
43
+ ? ` · ${status.chunksProcessed} chunks`
44
+ : '';
32
45
  return {
33
- text: `${status.stage} ${status.current}/${status.total} (${percent}%)`,
46
+ text: `${status.stage} ${status.current}/${status.total} (${percent}%)${chunkInfo}`,
34
47
  color: 'cyan',
35
48
  showSpinner: true,
36
49
  };
@@ -61,6 +61,10 @@ export type AppStatus = {
61
61
  current: number;
62
62
  total: number;
63
63
  stage: string;
64
+ /** Rate limit message (shown in yellow when set) */
65
+ throttleMessage?: string | null;
66
+ /** Number of chunks embedded so far */
67
+ chunksProcessed?: number;
64
68
  } | {
65
69
  state: 'searching';
66
70
  } | {
@@ -34,14 +34,16 @@ async function ensureInitialized(projectRoot) {
34
34
  }
35
35
  }
36
36
  /**
37
- * Default maximum response size in bytes (100KB).
37
+ * Default maximum response size in bytes (50KB).
38
38
  * Reduces result count to fit; does NOT truncate text.
39
+ * Note: Claude Code has ~25K token limit (~100KB), so 50KB default leaves headroom.
39
40
  */
40
- const DEFAULT_MAX_RESPONSE_SIZE = 100 * 1024;
41
+ const DEFAULT_MAX_RESPONSE_SIZE = 50 * 1024;
41
42
  /**
42
- * Maximum allowed response size (500KB).
43
+ * Maximum allowed response size (100KB).
44
+ * Hard cap to prevent token overflow in AI tools.
43
45
  */
44
- const MAX_RESPONSE_SIZE = 500 * 1024;
46
+ const MAX_RESPONSE_SIZE = 100 * 1024;
45
47
  /**
46
48
  * Overhead per result in JSON (metadata fields, formatting).
47
49
  */
@@ -129,10 +131,6 @@ function formatSearchResults(results, includeDebug = false, maxResponseSize = DE
129
131
  response['originalResultCount'] = results.results.length;
130
132
  response['reducedForSize'] = true;
131
133
  }
132
- // Add totalMatches for exhaustive mode
133
- if (results.totalMatches !== undefined) {
134
- response['totalMatches'] = results.totalMatches;
135
- }
136
134
  // Add debug info for AI evaluation
137
135
  if (includeDebug && results.debug) {
138
136
  response['debug'] = formatDebugInfo(results.debug);
@@ -241,9 +239,9 @@ export function createMcpServer(projectRoot) {
241
239
  server.addTool({
242
240
  name: 'codebase_search',
243
241
  description: `
244
- Codebase search: semantic search, keyword search, and hybrid search options.
245
- Use this when you need to find code that matches semantic meaning and keyword patterns.
246
- This tool helps you perform exhaustive searches of the codebase and get the best
242
+ Codebase search: semantic search, keyword search, and hybrid search options.
243
+ Use this when you need to find code that matches semantic meaning and keyword patterns.
244
+ This tool helps you perform comprehensive searches of the codebase and get the best
247
245
  context and understanding when exploring and searching the codebase, docs, etc.
248
246
 
249
247
  USE FOR CODEBASE EXPLORATION:
@@ -299,8 +297,7 @@ For thorough searches, consider:
299
297
  1. Start with hybrid mode, default weights
300
298
  2. Check debug info to evaluate search quality
301
299
  3. If maxVectorScore < 0.3, try exact mode or higher bm25_weight
302
- 4. If results seem incomplete, try codebase_parallel_search for comparison
303
- 5. Use exhaustive=true for refactoring tasks needing ALL matches
300
+ 4. If results seem incomplete, run more searches and try codebase_parallel_search for comparison
304
301
 
305
302
  RESULT INTERPRETATION:
306
303
  - score: Combined relevance (higher = better)
@@ -351,11 +348,6 @@ Production code: { path_not_contains: ["test", "mock", "fixture"], is_exported:
351
348
  .optional()
352
349
  .default(10)
353
350
  .describe('Maximum number of results (1-100, default: 10)'),
354
- exhaustive: z
355
- .boolean()
356
- .optional()
357
- .default(false)
358
- .describe('Return all matches (for refactoring/auditing)'),
359
351
  min_score: z
360
352
  .number()
361
353
  .min(0)
@@ -398,9 +390,8 @@ Production code: { path_not_contains: ["test", "mock", "fixture"], is_exported:
398
390
  .max(MAX_RESPONSE_SIZE)
399
391
  .optional()
400
392
  .default(DEFAULT_MAX_RESPONSE_SIZE)
401
- .describe('Maximum response size in bytes (default: 100KB, max: 500KB). ' +
402
- 'Reduces result count to fit within limit; does NOT truncate text content. ' +
403
- 'Use a larger value for exhaustive searches.'),
393
+ .describe('Maximum response size in bytes (default: 50KB, max: 100KB). ' +
394
+ 'Reduces result count to fit within limit; does NOT truncate text content.'),
404
395
  }),
405
396
  execute: async (args) => {
406
397
  await ensureInitialized(projectRoot);
@@ -425,7 +416,6 @@ Production code: { path_not_contains: ["test", "mock", "fixture"], is_exported:
425
416
  const results = await engine.search(args.query, {
426
417
  mode: args.mode,
427
418
  limit: args.limit,
428
- exhaustive: args.exhaustive,
429
419
  minScore: args.min_score,
430
420
  filters,
431
421
  codeSnippet: args.code_snippet,
@@ -545,9 +535,9 @@ Production code: { path_not_contains: ["test", "mock", "fixture"], is_exported:
545
535
  server.addTool({
546
536
  name: 'codebase_parallel_search',
547
537
  description: `
548
- Codebase Parallel Search: run multiple semantic search, keyword search, and hybrid searches in parallel and compare results.
549
- Use this when you need to run multiple searches at once to find code that matches semantic meaning and keyword patterns.
550
- This tool helps you perform exhaustive searches of the codebase and get the best
538
+ Codebase Parallel Search: run multiple semantic search, keyword search, and hybrid searches in parallel and compare results.
539
+ Use this when you need to run multiple searches at once to find code that matches semantic meaning and keyword patterns.
540
+ This tool helps you perform comprehensive searches of the codebase and get the best
551
541
  context and understanding when exploring and searching the codebase, docs, etc.
552
542
 
553
543
  NOTE: This is for narrower sets of queries. Parallel searches may return a large number of results,
@@ -17,9 +17,19 @@ export declare class GeminiEmbeddingProvider implements EmbeddingProvider {
17
17
  readonly dimensions = 1536;
18
18
  private apiKey;
19
19
  private initialized;
20
+ onThrottle?: (message: string | null) => void;
21
+ onBatchProgress?: (processed: number, total: number) => void;
20
22
  constructor(apiKey?: string);
21
23
  initialize(_onProgress?: ModelProgressCallback): Promise<void>;
22
24
  embed(texts: string[]): Promise<number[][]>;
25
+ /**
26
+ * Embed a batch with exponential backoff retry on rate limit errors.
27
+ */
28
+ private embedBatchWithRetry;
29
+ /**
30
+ * Check if an error is a rate limit error (429 or quota exceeded).
31
+ */
32
+ private isRateLimitError;
23
33
  private embedBatch;
24
34
  embedSingle(text: string): Promise<number[]>;
25
35
  close(): void;
@@ -10,7 +10,17 @@
10
10
  */
11
11
  const GEMINI_API_BASE = 'https://generativelanguage.googleapis.com/v1beta/models';
12
12
  const MODEL = 'gemini-embedding-001';
13
- const BATCH_SIZE = 100; // Gemini supports up to 100 texts per request
13
+ // Gemini limits: 2,048 tokens/text, 20,000 tokens/batch, 100-250 texts/batch
14
+ // With avg ~1000 tokens/chunk, safe limit is 20 texts.
15
+ const BATCH_SIZE = 20;
16
+ // Concurrency and rate limiting
17
+ const CONCURRENCY = 5; // Max concurrent API requests
18
+ const MAX_RETRIES = 12; // Max retry attempts on rate limit
19
+ const INITIAL_BACKOFF_MS = 1000; // Start at 1s
20
+ const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
21
+ function sleep(ms) {
22
+ return new Promise(resolve => setTimeout(resolve, ms));
23
+ }
14
24
  /**
15
25
  * Gemini embedding provider.
16
26
  * Uses gemini-embedding-001 model via Google's Generative AI API.
@@ -35,6 +45,20 @@ export class GeminiEmbeddingProvider {
35
45
  writable: true,
36
46
  value: false
37
47
  });
48
+ // Callback for rate limit throttling - message or null to clear
49
+ Object.defineProperty(this, "onThrottle", {
50
+ enumerable: true,
51
+ configurable: true,
52
+ writable: true,
53
+ value: void 0
54
+ });
55
+ // Callback for batch progress - (processed, total) chunks
56
+ Object.defineProperty(this, "onBatchProgress", {
57
+ enumerable: true,
58
+ configurable: true,
59
+ writable: true,
60
+ value: void 0
61
+ });
38
62
  // Trim the key to remove any accidental whitespace
39
63
  this.apiKey = (apiKey ?? '').trim();
40
64
  }
@@ -51,15 +75,67 @@ export class GeminiEmbeddingProvider {
51
75
  if (texts.length === 0) {
52
76
  return [];
53
77
  }
54
- const results = [];
55
- // Process in batches
78
+ // Split into batches
79
+ const batches = [];
56
80
  for (let i = 0; i < texts.length; i += BATCH_SIZE) {
57
- const batch = texts.slice(i, i + BATCH_SIZE);
58
- const batchResults = await this.embedBatch(batch);
59
- results.push(...batchResults);
81
+ batches.push(texts.slice(i, i + BATCH_SIZE));
82
+ }
83
+ // Process batches with limited concurrency
84
+ const results = [];
85
+ let completed = 0;
86
+ for (let i = 0; i < batches.length; i += CONCURRENCY) {
87
+ const concurrentBatches = batches.slice(i, i + CONCURRENCY);
88
+ // Fire concurrent requests
89
+ const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
90
+ // Flatten and collect results (Promise.all preserves order)
91
+ for (const result of batchResults) {
92
+ results.push(...result);
93
+ }
94
+ // Report progress after concurrent group completes
95
+ completed += concurrentBatches.length;
96
+ const processed = Math.min(completed * BATCH_SIZE, texts.length);
97
+ this.onBatchProgress?.(processed, texts.length);
60
98
  }
61
99
  return results;
62
100
  }
101
+ /**
102
+ * Embed a batch with exponential backoff retry on rate limit errors.
103
+ */
104
+ async embedBatchWithRetry(batch) {
105
+ let attempt = 0;
106
+ let backoffMs = INITIAL_BACKOFF_MS;
107
+ while (true) {
108
+ try {
109
+ const result = await this.embedBatch(batch);
110
+ // Clear throttle message on success (if was throttling)
111
+ if (attempt > 0)
112
+ this.onThrottle?.(null);
113
+ return result;
114
+ }
115
+ catch (error) {
116
+ if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
117
+ attempt++;
118
+ const secs = Math.round(backoffMs / 1000);
119
+ this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
120
+ await sleep(backoffMs);
121
+ backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
122
+ }
123
+ else {
124
+ throw error;
125
+ }
126
+ }
127
+ }
128
+ }
129
+ /**
130
+ * Check if an error is a rate limit error (429 or quota exceeded).
131
+ */
132
+ isRateLimitError(error) {
133
+ if (error instanceof Error) {
134
+ const msg = error.message.toLowerCase();
135
+ return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
136
+ }
137
+ return false;
138
+ }
63
139
  async embedBatch(texts) {
64
140
  const url = `${GEMINI_API_BASE}/${MODEL}:batchEmbedContents`;
65
141
  const response = await fetch(url, {
@@ -13,9 +13,19 @@ export declare class MistralEmbeddingProvider implements EmbeddingProvider {
13
13
  readonly dimensions = 1536;
14
14
  private apiKey;
15
15
  private initialized;
16
+ onThrottle?: (message: string | null) => void;
17
+ onBatchProgress?: (processed: number, total: number) => void;
16
18
  constructor(apiKey?: string);
17
19
  initialize(_onProgress?: ModelProgressCallback): Promise<void>;
18
20
  embed(texts: string[]): Promise<number[][]>;
21
+ /**
22
+ * Embed a batch with exponential backoff retry on rate limit errors.
23
+ */
24
+ private embedBatchWithRetry;
25
+ /**
26
+ * Check if an error is a rate limit error (429 or quota exceeded).
27
+ */
28
+ private isRateLimitError;
19
29
  private embedBatch;
20
30
  embedSingle(text: string): Promise<number[]>;
21
31
  close(): void;
@@ -6,7 +6,17 @@
6
6
  */
7
7
  const MISTRAL_API_BASE = 'https://api.mistral.ai/v1';
8
8
  const MODEL = 'codestral-embed';
9
- const BATCH_SIZE = 64; // Mistral supports batching
9
+ // Mistral limits: 8,192 tokens/text, 16,000 tokens/batch TOTAL
10
+ // With avg ~500 tokens/chunk, can fit ~32. Use 24 for safety margin.
11
+ const BATCH_SIZE = 24;
12
+ // Concurrency and rate limiting
13
+ const CONCURRENCY = 5; // Max concurrent API requests
14
+ const MAX_RETRIES = 12; // Max retry attempts on rate limit
15
+ const INITIAL_BACKOFF_MS = 1000; // Start at 1s
16
+ const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
17
+ function sleep(ms) {
18
+ return new Promise(resolve => setTimeout(resolve, ms));
19
+ }
10
20
  /**
11
21
  * Mistral embedding provider.
12
22
  * Uses codestral-embed model via Mistral AI API.
@@ -31,6 +41,20 @@ export class MistralEmbeddingProvider {
31
41
  writable: true,
32
42
  value: false
33
43
  });
44
+ // Callback for rate limit throttling - message or null to clear
45
+ Object.defineProperty(this, "onThrottle", {
46
+ enumerable: true,
47
+ configurable: true,
48
+ writable: true,
49
+ value: void 0
50
+ });
51
+ // Callback for batch progress - (processed, total) chunks
52
+ Object.defineProperty(this, "onBatchProgress", {
53
+ enumerable: true,
54
+ configurable: true,
55
+ writable: true,
56
+ value: void 0
57
+ });
34
58
  // Trim the key to remove any accidental whitespace
35
59
  this.apiKey = (apiKey ?? '').trim();
36
60
  }
@@ -47,15 +71,67 @@ export class MistralEmbeddingProvider {
47
71
  if (texts.length === 0) {
48
72
  return [];
49
73
  }
50
- const results = [];
51
- // Process in batches
74
+ // Split into batches
75
+ const batches = [];
52
76
  for (let i = 0; i < texts.length; i += BATCH_SIZE) {
53
- const batch = texts.slice(i, i + BATCH_SIZE);
54
- const batchResults = await this.embedBatch(batch);
55
- results.push(...batchResults);
77
+ batches.push(texts.slice(i, i + BATCH_SIZE));
78
+ }
79
+ // Process batches with limited concurrency
80
+ const results = [];
81
+ let completed = 0;
82
+ for (let i = 0; i < batches.length; i += CONCURRENCY) {
83
+ const concurrentBatches = batches.slice(i, i + CONCURRENCY);
84
+ // Fire concurrent requests
85
+ const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
86
+ // Flatten and collect results (Promise.all preserves order)
87
+ for (const result of batchResults) {
88
+ results.push(...result);
89
+ }
90
+ // Report progress after concurrent group completes
91
+ completed += concurrentBatches.length;
92
+ const processed = Math.min(completed * BATCH_SIZE, texts.length);
93
+ this.onBatchProgress?.(processed, texts.length);
56
94
  }
57
95
  return results;
58
96
  }
97
+ /**
98
+ * Embed a batch with exponential backoff retry on rate limit errors.
99
+ */
100
+ async embedBatchWithRetry(batch) {
101
+ let attempt = 0;
102
+ let backoffMs = INITIAL_BACKOFF_MS;
103
+ while (true) {
104
+ try {
105
+ const result = await this.embedBatch(batch);
106
+ // Clear throttle message on success (if was throttling)
107
+ if (attempt > 0)
108
+ this.onThrottle?.(null);
109
+ return result;
110
+ }
111
+ catch (error) {
112
+ if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
113
+ attempt++;
114
+ const secs = Math.round(backoffMs / 1000);
115
+ this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
116
+ await sleep(backoffMs);
117
+ backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
118
+ }
119
+ else {
120
+ throw error;
121
+ }
122
+ }
123
+ }
124
+ }
125
+ /**
126
+ * Check if an error is a rate limit error (429 or quota exceeded).
127
+ */
128
+ isRateLimitError(error) {
129
+ if (error instanceof Error) {
130
+ const msg = error.message.toLowerCase();
131
+ return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
132
+ }
133
+ return false;
134
+ }
59
135
  async embedBatch(texts) {
60
136
  const response = await fetch(`${MISTRAL_API_BASE}/embeddings`, {
61
137
  method: 'POST',
@@ -13,9 +13,19 @@ export declare class OpenAIEmbeddingProvider implements EmbeddingProvider {
13
13
  readonly dimensions = 1536;
14
14
  private apiKey;
15
15
  private initialized;
16
+ onThrottle?: (message: string | null) => void;
17
+ onBatchProgress?: (processed: number, total: number) => void;
16
18
  constructor(apiKey?: string);
17
19
  initialize(_onProgress?: ModelProgressCallback): Promise<void>;
18
20
  embed(texts: string[]): Promise<number[][]>;
21
+ /**
22
+ * Embed a batch with exponential backoff retry on rate limit errors.
23
+ */
24
+ private embedBatchWithRetry;
25
+ /**
26
+ * Check if an error is a rate limit error (429 or quota exceeded).
27
+ */
28
+ private isRateLimitError;
19
29
  private embedBatch;
20
30
  embedSingle(text: string): Promise<number[]>;
21
31
  close(): void;
@@ -6,7 +6,17 @@
6
6
  */
7
7
  const OPENAI_API_BASE = 'https://api.openai.com/v1';
8
8
  const MODEL = 'text-embedding-3-small';
9
- const BATCH_SIZE = 2048; // OpenAI supports up to 2048 texts per request
9
+ // OpenAI limits: 8,191 tokens/text, 300,000 tokens/batch, 2,048 texts/batch
10
+ // With avg ~1000 tokens/chunk, safe limit is 300 texts. Use 256 for margin.
11
+ const BATCH_SIZE = 256;
12
+ // Concurrency and rate limiting
13
+ const CONCURRENCY = 5; // Max concurrent API requests
14
+ const MAX_RETRIES = 12; // Max retry attempts on rate limit
15
+ const INITIAL_BACKOFF_MS = 1000; // Start at 1s
16
+ const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
17
+ function sleep(ms) {
18
+ return new Promise(resolve => setTimeout(resolve, ms));
19
+ }
10
20
  /**
11
21
  * OpenAI embedding provider.
12
22
  * Uses text-embedding-3-small model via OpenAI API.
@@ -31,6 +41,20 @@ export class OpenAIEmbeddingProvider {
31
41
  writable: true,
32
42
  value: false
33
43
  });
44
+ // Callback for rate limit throttling - message or null to clear
45
+ Object.defineProperty(this, "onThrottle", {
46
+ enumerable: true,
47
+ configurable: true,
48
+ writable: true,
49
+ value: void 0
50
+ });
51
+ // Callback for batch progress - (processed, total) chunks
52
+ Object.defineProperty(this, "onBatchProgress", {
53
+ enumerable: true,
54
+ configurable: true,
55
+ writable: true,
56
+ value: void 0
57
+ });
34
58
  // Trim the key to remove any accidental whitespace
35
59
  this.apiKey = (apiKey ?? '').trim();
36
60
  }
@@ -51,15 +75,67 @@ export class OpenAIEmbeddingProvider {
51
75
  if (texts.length === 0) {
52
76
  return [];
53
77
  }
54
- const results = [];
55
- // Process in batches
78
+ // Split into batches
79
+ const batches = [];
56
80
  for (let i = 0; i < texts.length; i += BATCH_SIZE) {
57
- const batch = texts.slice(i, i + BATCH_SIZE);
58
- const batchResults = await this.embedBatch(batch);
59
- results.push(...batchResults);
81
+ batches.push(texts.slice(i, i + BATCH_SIZE));
82
+ }
83
+ // Process batches with limited concurrency
84
+ const results = [];
85
+ let completed = 0;
86
+ for (let i = 0; i < batches.length; i += CONCURRENCY) {
87
+ const concurrentBatches = batches.slice(i, i + CONCURRENCY);
88
+ // Fire concurrent requests
89
+ const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
90
+ // Flatten and collect results (Promise.all preserves order)
91
+ for (const result of batchResults) {
92
+ results.push(...result);
93
+ }
94
+ // Report progress after concurrent group completes
95
+ completed += concurrentBatches.length;
96
+ const processed = Math.min(completed * BATCH_SIZE, texts.length);
97
+ this.onBatchProgress?.(processed, texts.length);
60
98
  }
61
99
  return results;
62
100
  }
101
+ /**
102
+ * Embed a batch with exponential backoff retry on rate limit errors.
103
+ */
104
+ async embedBatchWithRetry(batch) {
105
+ let attempt = 0;
106
+ let backoffMs = INITIAL_BACKOFF_MS;
107
+ while (true) {
108
+ try {
109
+ const result = await this.embedBatch(batch);
110
+ // Clear throttle message on success (if was throttling)
111
+ if (attempt > 0)
112
+ this.onThrottle?.(null);
113
+ return result;
114
+ }
115
+ catch (error) {
116
+ if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
117
+ attempt++;
118
+ const secs = Math.round(backoffMs / 1000);
119
+ this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
120
+ await sleep(backoffMs);
121
+ backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
122
+ }
123
+ else {
124
+ throw error;
125
+ }
126
+ }
127
+ }
128
+ }
129
+ /**
130
+ * Check if an error is a rate limit error (429 or quota exceeded).
131
+ */
132
+ isRateLimitError(error) {
133
+ if (error instanceof Error) {
134
+ const msg = error.message.toLowerCase();
135
+ return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
136
+ }
137
+ return false;
138
+ }
63
139
  async embedBatch(texts) {
64
140
  const response = await fetch(`${OPENAI_API_BASE}/embeddings`, {
65
141
  method: 'POST',
@@ -19,6 +19,47 @@ const ALWAYS_IGNORED = [
19
19
  '.viberag',
20
20
  'node_modules', // Fallback in case not in .gitignore
21
21
  ];
22
+ /**
23
+ * Lock files that should always be ignored.
24
+ * These are machine-generated and provide no value for code search.
25
+ */
26
+ const ALWAYS_IGNORED_FILES = [
27
+ // JavaScript/TypeScript
28
+ 'package-lock.json', // npm
29
+ 'yarn.lock', // Yarn
30
+ 'pnpm-lock.yaml', // pnpm
31
+ 'bun.lockb', // Bun
32
+ // Python
33
+ 'uv.lock', // UV
34
+ 'poetry.lock', // Poetry
35
+ 'Pipfile.lock', // Pipenv
36
+ // Ruby
37
+ 'Gemfile.lock', // Bundler
38
+ // PHP
39
+ 'composer.lock', // Composer
40
+ // Rust
41
+ 'Cargo.lock', // Cargo
42
+ // Go
43
+ 'go.sum', // Go modules
44
+ // Java/Kotlin
45
+ 'gradle.lockfile', // Gradle
46
+ // C#/.NET
47
+ 'packages.lock.json', // NuGet
48
+ // Dart
49
+ 'pubspec.lock', // Pub
50
+ // Swift
51
+ 'Package.resolved', // Swift PM
52
+ ];
53
+ /**
54
+ * File patterns that should always be ignored.
55
+ * These are build artifacts with no semantic value for code search.
56
+ */
57
+ const ALWAYS_IGNORED_PATTERNS = [
58
+ '*.min.js', // Minified JavaScript
59
+ '*.min.css', // Minified CSS
60
+ '*.map', // Source maps
61
+ '*.d.ts.map', // TypeScript declaration maps
62
+ ];
22
63
  /**
23
64
  * Cache of Ignore instances per project root.
24
65
  */
@@ -37,8 +78,12 @@ export async function loadGitignore(projectRoot) {
37
78
  return cached;
38
79
  }
39
80
  const ig = ignore();
40
- // Add always-ignored patterns
81
+ // Add always-ignored patterns (directories)
41
82
  ig.add(ALWAYS_IGNORED);
83
+ // Add always-ignored files (lock files)
84
+ ig.add(ALWAYS_IGNORED_FILES);
85
+ // Add always-ignored file patterns (minified, maps)
86
+ ig.add(ALWAYS_IGNORED_PATTERNS);
42
87
  // Try to load .gitignore
43
88
  const gitignorePath = path.join(projectRoot, '.gitignore');
44
89
  try {
@@ -99,8 +144,16 @@ export function clearAllGitignoreCache() {
99
144
  */
100
145
  export async function getGlobIgnorePatterns(projectRoot) {
101
146
  const patterns = [];
102
- // Always exclude these (same as ALWAYS_IGNORED)
147
+ // Always exclude these directories (same as ALWAYS_IGNORED)
103
148
  patterns.push('**/.git/**', '**/.viberag/**', '**/node_modules/**');
149
+ // Always exclude lock files (same as ALWAYS_IGNORED_FILES)
150
+ for (const file of ALWAYS_IGNORED_FILES) {
151
+ patterns.push(`**/${file}`);
152
+ }
153
+ // Always exclude file patterns (minified, maps)
154
+ for (const pattern of ALWAYS_IGNORED_PATTERNS) {
155
+ patterns.push(`**/${pattern}`);
156
+ }
104
157
  // Try to load .gitignore
105
158
  const gitignorePath = path.join(projectRoot, '.gitignore');
106
159
  try {
@@ -55,6 +55,9 @@ export declare class Indexer {
55
55
  /**
56
56
  * Process a batch of files: read, chunk, embed, and prepare CodeChunks.
57
57
  *
58
+ * Strategy: Collect all chunks from all files first, then embed them
59
+ * together with full concurrency for maximum throughput.
60
+ *
58
61
  * Error handling strategy:
59
62
  * - File read/parse errors: Log and continue (file-specific, recoverable)
60
63
  * - Embedding/storage errors: Let propagate (fatal, affects all files)
@@ -142,6 +142,17 @@ export class Indexer {
142
142
  // 6. Process new and modified files
143
143
  const filesToProcess = [...diff.new, ...diff.modified];
144
144
  const totalFiles = filesToProcess.length;
145
+ // Track cumulative chunks for progress display
146
+ let totalChunksProcessed = 0;
147
+ let lastProgress = 0;
148
+ // Wire throttle callback for rate limit feedback (API providers only)
149
+ if ('onThrottle' in embeddings) {
150
+ embeddings.onThrottle =
151
+ message => {
152
+ // Pass throttle message to UI - shown in yellow when set
153
+ progressCallback?.(lastProgress, totalFiles, 'Indexing files', message, totalChunksProcessed);
154
+ };
155
+ }
145
156
  if (totalFiles > 0) {
146
157
  this.log('info', `Processing ${totalFiles} files`);
147
158
  // First, delete existing chunks for modified files
@@ -153,7 +164,15 @@ export class Indexer {
153
164
  const batchSize = 10;
154
165
  for (let i = 0; i < filesToProcess.length; i += batchSize) {
155
166
  const batch = filesToProcess.slice(i, i + batchSize);
156
- const batchChunks = await this.processFileBatch(batch, chunker, embeddings, storage, stats);
167
+ const batchChunks = await this.processFileBatch(batch, chunker, embeddings, storage, stats, {
168
+ totalFiles,
169
+ currentFileOffset: i,
170
+ progressCallback,
171
+ onChunksProcessed: (count) => {
172
+ totalChunksProcessed += count;
173
+ progressCallback?.(i, totalFiles, 'Indexing files', null, totalChunksProcessed);
174
+ },
175
+ });
157
176
  if (batchChunks.length > 0) {
158
177
  // Use addChunks after table reset to avoid schema mismatch,
159
178
  // upsertChunks for normal incremental updates
@@ -166,7 +185,8 @@ export class Indexer {
166
185
  stats.chunksAdded += batchChunks.length;
167
186
  }
168
187
  const progress = Math.round(((i + batch.length) / totalFiles) * 100);
169
- progressCallback?.(i + batch.length, totalFiles, 'Indexing files');
188
+ lastProgress = i + batch.length;
189
+ progressCallback?.(i + batch.length, totalFiles, 'Indexing files', null, totalChunksProcessed);
170
190
  this.log('debug', `Progress: ${progress}% (${i + batch.length}/${totalFiles})`);
171
191
  }
172
192
  }
@@ -218,79 +238,112 @@ export class Indexer {
218
238
  /**
219
239
  * Process a batch of files: read, chunk, embed, and prepare CodeChunks.
220
240
  *
241
+ * Strategy: Collect all chunks from all files first, then embed them
242
+ * together with full concurrency for maximum throughput.
243
+ *
221
244
  * Error handling strategy:
222
245
  * - File read/parse errors: Log and continue (file-specific, recoverable)
223
246
  * - Embedding/storage errors: Let propagate (fatal, affects all files)
224
247
  */
225
- async processFileBatch(filepaths, chunker, embeddings, storage, stats) {
226
- const allChunks = [];
248
+ async processFileBatch(filepaths, chunker, embeddings, storage, stats, progressContext) {
249
+ const fileDataList = [];
227
250
  for (const filepath of filepaths) {
228
- // Phase 1: File reading and chunking (recoverable errors)
229
- let content;
230
- let fileHash;
231
- let chunks;
232
251
  try {
233
252
  const absolutePath = path.join(this.projectRoot, filepath);
234
- content = await fs.readFile(absolutePath, 'utf-8');
235
- fileHash = (await import('../merkle/hash.js')).computeStringHash(content);
236
- // Chunk the file (with size limits from config)
237
- chunks = await chunker.chunkFile(filepath, content, this.config.chunkMaxSize);
253
+ const content = await fs.readFile(absolutePath, 'utf-8');
254
+ const fileHash = (await import('../merkle/hash.js')).computeStringHash(content);
255
+ const chunks = await chunker.chunkFile(filepath, content, this.config.chunkMaxSize);
256
+ fileDataList.push({ filepath, fileHash, chunks });
238
257
  }
239
258
  catch (error) {
240
- // File-specific error (read/parse) - log and continue with other files
241
259
  this.log('warn', `Failed to read/parse file: ${filepath}`, error);
242
260
  continue;
243
261
  }
244
- // Phase 2: Embedding and storage (fatal errors - let propagate)
245
- // NO try-catch here - API/storage errors should stop indexing
246
- // Check embedding cache for each chunk
247
- const contentHashes = chunks.map(c => c.contentHash);
248
- const cachedEmbeddings = await storage.getCachedEmbeddings(contentHashes);
249
- // Compute embeddings for cache misses
250
- const missingChunks = chunks.filter(c => !cachedEmbeddings.has(c.contentHash));
251
- if (missingChunks.length > 0) {
252
- // Embed contextHeader + text for semantic relevance
253
- const texts = missingChunks.map(c => c.contextHeader ? `${c.contextHeader}\n${c.text}` : c.text);
254
- const newEmbeddings = await embeddings.embed(texts);
255
- stats.embeddingsComputed += missingChunks.length;
256
- // Cache the new embeddings
257
- const cacheEntries = missingChunks.map((chunk, i) => ({
258
- contentHash: chunk.contentHash,
259
- vector: newEmbeddings[i],
260
- createdAt: new Date().toISOString(),
261
- }));
262
- await storage.cacheEmbeddings(cacheEntries);
263
- // Add to cachedEmbeddings map
264
- missingChunks.forEach((chunk, i) => {
265
- cachedEmbeddings.set(chunk.contentHash, newEmbeddings[i]);
262
+ }
263
+ const allChunksWithContext = [];
264
+ for (const fd of fileDataList) {
265
+ for (const chunk of fd.chunks) {
266
+ allChunksWithContext.push({
267
+ chunk,
268
+ filepath: fd.filepath,
269
+ fileHash: fd.fileHash,
266
270
  });
267
271
  }
268
- stats.embeddingsCached += chunks.length - missingChunks.length;
269
- // Build CodeChunk objects
270
- const filename = path.basename(filepath);
271
- const extension = path.extname(filepath);
272
- for (const chunk of chunks) {
273
- const vector = cachedEmbeddings.get(chunk.contentHash);
274
- allChunks.push({
275
- id: `${filepath}:${chunk.startLine}`,
276
- vector,
277
- text: chunk.text,
278
- contentHash: chunk.contentHash,
279
- filepath,
280
- filename,
281
- extension,
282
- type: chunk.type,
283
- name: chunk.name,
284
- startLine: chunk.startLine,
285
- endLine: chunk.endLine,
286
- fileHash,
287
- // New metadata fields from schema v2
288
- signature: chunk.signature,
289
- docstring: chunk.docstring,
290
- isExported: chunk.isExported,
291
- decoratorNames: chunk.decoratorNames,
292
- });
272
+ }
273
+ if (allChunksWithContext.length === 0) {
274
+ return [];
275
+ }
276
+ // Phase 2: Check embedding cache for ALL chunks at once
277
+ const contentHashes = allChunksWithContext.map(c => c.chunk.contentHash);
278
+ const cachedEmbeddings = await storage.getCachedEmbeddings(contentHashes);
279
+ // Find all cache misses
280
+ const missingChunksWithContext = allChunksWithContext.filter(c => !cachedEmbeddings.has(c.chunk.contentHash));
281
+ stats.embeddingsCached +=
282
+ allChunksWithContext.length - missingChunksWithContext.length;
283
+ // Phase 3: Embed ALL missing chunks together (with full concurrency)
284
+ if (missingChunksWithContext.length > 0) {
285
+ // Track chunks processed for progress updates
286
+ let lastReportedChunks = 0;
287
+ // Wire batch progress callback to report incremental chunks
288
+ if (progressContext?.onChunksProcessed && 'onBatchProgress' in embeddings) {
289
+ embeddings.onBatchProgress = (processed, _total) => {
290
+ // Report only the delta since last update
291
+ const delta = processed - lastReportedChunks;
292
+ if (delta > 0) {
293
+ progressContext.onChunksProcessed(delta);
294
+ lastReportedChunks = processed;
295
+ }
296
+ };
297
+ }
298
+ // Embed all chunks together
299
+ const texts = missingChunksWithContext.map(c => c.chunk.contextHeader
300
+ ? `${c.chunk.contextHeader}\n${c.chunk.text}`
301
+ : c.chunk.text);
302
+ const newEmbeddings = await embeddings.embed(texts);
303
+ stats.embeddingsComputed += missingChunksWithContext.length;
304
+ // Report any remaining chunks not yet reported
305
+ const remainingDelta = missingChunksWithContext.length - lastReportedChunks;
306
+ if (remainingDelta > 0 && progressContext?.onChunksProcessed) {
307
+ progressContext.onChunksProcessed(remainingDelta);
293
308
  }
309
+ // Clear batch progress callback
310
+ if ('onBatchProgress' in embeddings) {
311
+ embeddings.onBatchProgress = undefined;
312
+ }
313
+ // Cache the new embeddings
314
+ const cacheEntries = missingChunksWithContext.map((c, i) => ({
315
+ contentHash: c.chunk.contentHash,
316
+ vector: newEmbeddings[i],
317
+ createdAt: new Date().toISOString(),
318
+ }));
319
+ await storage.cacheEmbeddings(cacheEntries);
320
+ // Add to cachedEmbeddings map
321
+ missingChunksWithContext.forEach((c, i) => {
322
+ cachedEmbeddings.set(c.chunk.contentHash, newEmbeddings[i]);
323
+ });
324
+ }
325
+ // Phase 4: Build CodeChunk objects
326
+ const allChunks = [];
327
+ for (const { chunk, filepath, fileHash } of allChunksWithContext) {
328
+ const vector = cachedEmbeddings.get(chunk.contentHash);
329
+ allChunks.push({
330
+ id: `${filepath}:${chunk.startLine}`,
331
+ vector,
332
+ text: chunk.text,
333
+ contentHash: chunk.contentHash,
334
+ filepath,
335
+ filename: path.basename(filepath),
336
+ extension: path.extname(filepath),
337
+ type: chunk.type,
338
+ name: chunk.name,
339
+ startLine: chunk.startLine,
340
+ endLine: chunk.endLine,
341
+ fileHash,
342
+ signature: chunk.signature,
343
+ docstring: chunk.docstring,
344
+ isExported: chunk.isExported,
345
+ decoratorNames: chunk.decoratorNames,
346
+ });
294
347
  }
295
348
  return allChunks;
296
349
  }
@@ -60,8 +60,13 @@ export interface IndexStats {
60
60
  }
61
61
  /**
62
62
  * Progress callback for indexing operations.
63
+ * @param current - Current progress count
64
+ * @param total - Total items (0 for indeterminate)
65
+ * @param stage - Human-readable stage name
66
+ * @param throttleMessage - Rate limit message (shown in yellow) or null to clear
67
+ * @param chunksProcessed - Number of chunks embedded so far
63
68
  */
64
- export type ProgressCallback = (current: number, total: number, stage: string) => void;
69
+ export type ProgressCallback = (current: number, total: number, stage: string, throttleMessage?: string | null, chunksProcessed?: number) => void;
65
70
  /**
66
71
  * Create empty index stats.
67
72
  */
@@ -20,8 +20,6 @@ export { ftsSearch, ensureFtsIndex } from './fts.js';
20
20
  export { hybridRerank } from './hybrid.js';
21
21
  /** Default search limit */
22
22
  const DEFAULT_LIMIT = 10;
23
- /** Exhaustive mode limit (high but bounded) */
24
- const EXHAUSTIVE_LIMIT = 500;
25
23
  /** Default BM25 weight for hybrid search */
26
24
  const DEFAULT_BM25_WEIGHT = 0.3;
27
25
  /** Default oversample multiplier for hybrid search */
@@ -79,9 +77,7 @@ export class SearchEngine {
79
77
  async search(query, options = {}) {
80
78
  const start = Date.now();
81
79
  const mode = options.mode ?? 'hybrid';
82
- const limit = options.exhaustive
83
- ? EXHAUSTIVE_LIMIT
84
- : (options.limit ?? DEFAULT_LIMIT);
80
+ const limit = options.limit ?? DEFAULT_LIMIT;
85
81
  const filterClause = buildFilterClause(options.filters);
86
82
  await this.ensureInitialized();
87
83
  const table = await this.getTable();
@@ -104,10 +100,6 @@ export class SearchEngine {
104
100
  results = await this.searchHybrid(table, query, limit, options.bm25Weight ?? DEFAULT_BM25_WEIGHT, filterClause, options.minScore, options.autoBoost ?? true, options.autoBoostThreshold ?? 0.3, options.returnDebug ?? false);
105
101
  break;
106
102
  }
107
- // Add total matches for exhaustive mode
108
- if (options.exhaustive) {
109
- results.totalMatches = results.results.length;
110
- }
111
103
  results.elapsedMs = Date.now() - start;
112
104
  return results;
113
105
  }
@@ -74,8 +74,6 @@ export interface SearchResults {
74
74
  searchType: SearchMode;
75
75
  /** Time taken in milliseconds */
76
76
  elapsedMs: number;
77
- /** Total matches (when exhaustive=true) */
78
- totalMatches?: number;
79
77
  /** Debug info for hybrid search (when return_debug=true) */
80
78
  debug?: SearchDebugInfo;
81
79
  }
@@ -111,8 +109,6 @@ export interface SearchOptions {
111
109
  limit?: number;
112
110
  /** Weight for BM25 in hybrid search (0.0-1.0, default: 0.3) */
113
111
  bm25Weight?: number;
114
- /** Return all matches above threshold (default: false) */
115
- exhaustive?: boolean;
116
112
  /** Minimum score threshold 0-1 (default: 0) */
117
113
  minScore?: number;
118
114
  /** Transparent filters */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "viberag",
3
- "version": "0.3.0",
3
+ "version": "0.3.2",
4
4
  "description": "Local code RAG for AI coding assistants - semantic search via MCP server",
5
5
  "license": "AGPL-3.0",
6
6
  "keywords": [