viberag 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/app.js CHANGED
@@ -148,7 +148,14 @@ export default function App() {
148
148
  total: 0,
149
149
  stage: 'Indexing',
150
150
  });
151
- const stats = await runIndex(projectRoot, true, (current, total, stage) => setAppStatus({ state: 'indexing', current, total, stage }));
151
+ const stats = await runIndex(projectRoot, true, (current, total, stage, throttleMessage, chunksProcessed) => setAppStatus({
152
+ state: 'indexing',
153
+ current,
154
+ total,
155
+ stage,
156
+ throttleMessage,
157
+ chunksProcessed,
158
+ }));
152
159
  addOutput('system', formatIndexStats(stats));
153
160
  // Reload stats after indexing
154
161
  const newStats = await loadIndexStats(projectRoot);
@@ -29,7 +29,7 @@ export declare function runInit(projectRoot: string, isReinit?: boolean, wizardC
29
29
  * When force=true, also updates config dimensions to match current PROVIDER_CONFIGS
30
30
  * (handles dimension changes after viberag upgrades).
31
31
  */
32
- export declare function runIndex(projectRoot: string, force?: boolean, onProgress?: (current: number, total: number, stage: string) => void): Promise<IndexStats>;
32
+ export declare function runIndex(projectRoot: string, force?: boolean, onProgress?: (current: number, total: number, stage: string, throttleMessage?: string | null, chunksProcessed?: number) => void): Promise<IndexStats>;
33
33
  /**
34
34
  * Format index stats for display.
35
35
  */
@@ -55,7 +55,14 @@ Manual MCP Setup:
55
55
  const action = force ? 'Reindexing' : 'Indexing';
56
56
  addOutput('system', `${action} codebase...`);
57
57
  setAppStatus({ state: 'indexing', current: 0, total: 0, stage: action });
58
- runIndex(projectRoot, force, (current, total, stage) => setAppStatus({ state: 'indexing', current, total, stage }))
58
+ runIndex(projectRoot, force, (current, total, stage, throttleMessage, chunksProcessed) => setAppStatus({
59
+ state: 'indexing',
60
+ current,
61
+ total,
62
+ stage,
63
+ throttleMessage,
64
+ chunksProcessed,
65
+ }))
59
66
  .then(async (stats) => {
60
67
  addOutput('system', formatIndexStats(stats));
61
68
  // Reload stats after indexing
@@ -25,12 +25,25 @@ function formatStatus(status) {
25
25
  case 'ready':
26
26
  return { text: 'Ready', color: 'green', showSpinner: false };
27
27
  case 'indexing': {
28
+ // Throttle status takes precedence - show in yellow
29
+ if (status.throttleMessage) {
30
+ return {
31
+ text: status.throttleMessage,
32
+ color: 'yellow',
33
+ showSpinner: true,
34
+ };
35
+ }
36
+ // Normal indexing display
28
37
  if (status.total === 0) {
29
38
  return { text: `${status.stage}`, color: 'cyan', showSpinner: true };
30
39
  }
31
40
  const percent = Math.round((status.current / status.total) * 100);
41
+ // Include chunk count if available
42
+ const chunkInfo = status.chunksProcessed !== undefined
43
+ ? ` · ${status.chunksProcessed} chunks`
44
+ : '';
32
45
  return {
33
- text: `${status.stage} ${status.current}/${status.total} (${percent}%)`,
46
+ text: `${status.stage} ${status.current}/${status.total} (${percent}%)${chunkInfo}`,
34
47
  color: 'cyan',
35
48
  showSpinner: true,
36
49
  };
@@ -61,6 +61,10 @@ export type AppStatus = {
61
61
  current: number;
62
62
  total: number;
63
63
  stage: string;
64
+ /** Rate limit message (shown in yellow when set) */
65
+ throttleMessage?: string | null;
66
+ /** Number of chunks embedded so far */
67
+ chunksProcessed?: number;
64
68
  } | {
65
69
  state: 'searching';
66
70
  } | {
@@ -17,9 +17,19 @@ export declare class GeminiEmbeddingProvider implements EmbeddingProvider {
17
17
  readonly dimensions = 1536;
18
18
  private apiKey;
19
19
  private initialized;
20
+ onThrottle?: (message: string | null) => void;
21
+ onBatchProgress?: (processed: number, total: number) => void;
20
22
  constructor(apiKey?: string);
21
23
  initialize(_onProgress?: ModelProgressCallback): Promise<void>;
22
24
  embed(texts: string[]): Promise<number[][]>;
25
+ /**
26
+ * Embed a batch with exponential backoff retry on rate limit errors.
27
+ */
28
+ private embedBatchWithRetry;
29
+ /**
30
+ * Check if an error is a rate limit error (429 or quota exceeded).
31
+ */
32
+ private isRateLimitError;
23
33
  private embedBatch;
24
34
  embedSingle(text: string): Promise<number[]>;
25
35
  close(): void;
@@ -10,7 +10,17 @@
10
10
  */
11
11
  const GEMINI_API_BASE = 'https://generativelanguage.googleapis.com/v1beta/models';
12
12
  const MODEL = 'gemini-embedding-001';
13
- const BATCH_SIZE = 100; // Gemini supports up to 100 texts per request
13
+ // Gemini limits: 2,048 tokens/text, 20,000 tokens/batch, 100-250 texts/batch
14
+ // With avg ~1000 tokens/chunk, safe limit is 20 texts.
15
+ const BATCH_SIZE = 20;
16
+ // Concurrency and rate limiting
17
+ const CONCURRENCY = 5; // Max concurrent API requests
18
+ const MAX_RETRIES = 12; // Max retry attempts on rate limit
19
+ const INITIAL_BACKOFF_MS = 1000; // Start at 1s
20
+ const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
21
+ function sleep(ms) {
22
+ return new Promise(resolve => setTimeout(resolve, ms));
23
+ }
14
24
  /**
15
25
  * Gemini embedding provider.
16
26
  * Uses gemini-embedding-001 model via Google's Generative AI API.
@@ -35,6 +45,20 @@ export class GeminiEmbeddingProvider {
35
45
  writable: true,
36
46
  value: false
37
47
  });
48
+ // Callback for rate limit throttling - message or null to clear
49
+ Object.defineProperty(this, "onThrottle", {
50
+ enumerable: true,
51
+ configurable: true,
52
+ writable: true,
53
+ value: void 0
54
+ });
55
+ // Callback for batch progress - (processed, total) chunks
56
+ Object.defineProperty(this, "onBatchProgress", {
57
+ enumerable: true,
58
+ configurable: true,
59
+ writable: true,
60
+ value: void 0
61
+ });
38
62
  // Trim the key to remove any accidental whitespace
39
63
  this.apiKey = (apiKey ?? '').trim();
40
64
  }
@@ -51,15 +75,67 @@ export class GeminiEmbeddingProvider {
51
75
  if (texts.length === 0) {
52
76
  return [];
53
77
  }
54
- const results = [];
55
- // Process in batches
78
+ // Split into batches
79
+ const batches = [];
56
80
  for (let i = 0; i < texts.length; i += BATCH_SIZE) {
57
- const batch = texts.slice(i, i + BATCH_SIZE);
58
- const batchResults = await this.embedBatch(batch);
59
- results.push(...batchResults);
81
+ batches.push(texts.slice(i, i + BATCH_SIZE));
82
+ }
83
+ // Process batches with limited concurrency
84
+ const results = [];
85
+ let completed = 0;
86
+ for (let i = 0; i < batches.length; i += CONCURRENCY) {
87
+ const concurrentBatches = batches.slice(i, i + CONCURRENCY);
88
+ // Fire concurrent requests
89
+ const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
90
+ // Flatten and collect results (Promise.all preserves order)
91
+ for (const result of batchResults) {
92
+ results.push(...result);
93
+ }
94
+ // Report progress after concurrent group completes
95
+ completed += concurrentBatches.length;
96
+ const processed = Math.min(completed * BATCH_SIZE, texts.length);
97
+ this.onBatchProgress?.(processed, texts.length);
60
98
  }
61
99
  return results;
62
100
  }
101
+ /**
102
+ * Embed a batch with exponential backoff retry on rate limit errors.
103
+ */
104
+ async embedBatchWithRetry(batch) {
105
+ let attempt = 0;
106
+ let backoffMs = INITIAL_BACKOFF_MS;
107
+ while (true) {
108
+ try {
109
+ const result = await this.embedBatch(batch);
110
+ // Clear throttle message on success (if was throttling)
111
+ if (attempt > 0)
112
+ this.onThrottle?.(null);
113
+ return result;
114
+ }
115
+ catch (error) {
116
+ if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
117
+ attempt++;
118
+ const secs = Math.round(backoffMs / 1000);
119
+ this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
120
+ await sleep(backoffMs);
121
+ backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
122
+ }
123
+ else {
124
+ throw error;
125
+ }
126
+ }
127
+ }
128
+ }
129
+ /**
130
+ * Check if an error is a rate limit error (429 or quota exceeded).
131
+ */
132
+ isRateLimitError(error) {
133
+ if (error instanceof Error) {
134
+ const msg = error.message.toLowerCase();
135
+ return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
136
+ }
137
+ return false;
138
+ }
63
139
  async embedBatch(texts) {
64
140
  const url = `${GEMINI_API_BASE}/${MODEL}:batchEmbedContents`;
65
141
  const response = await fetch(url, {
@@ -13,9 +13,19 @@ export declare class MistralEmbeddingProvider implements EmbeddingProvider {
13
13
  readonly dimensions = 1536;
14
14
  private apiKey;
15
15
  private initialized;
16
+ onThrottle?: (message: string | null) => void;
17
+ onBatchProgress?: (processed: number, total: number) => void;
16
18
  constructor(apiKey?: string);
17
19
  initialize(_onProgress?: ModelProgressCallback): Promise<void>;
18
20
  embed(texts: string[]): Promise<number[][]>;
21
+ /**
22
+ * Embed a batch with exponential backoff retry on rate limit errors.
23
+ */
24
+ private embedBatchWithRetry;
25
+ /**
26
+ * Check if an error is a rate limit error (429 or quota exceeded).
27
+ */
28
+ private isRateLimitError;
19
29
  private embedBatch;
20
30
  embedSingle(text: string): Promise<number[]>;
21
31
  close(): void;
@@ -6,7 +6,17 @@
6
6
  */
7
7
  const MISTRAL_API_BASE = 'https://api.mistral.ai/v1';
8
8
  const MODEL = 'codestral-embed';
9
- const BATCH_SIZE = 64; // Mistral supports batching
9
+ // Mistral limits: 8,192 tokens/text, 16,000 tokens/batch TOTAL
10
+ // With avg ~500 tokens/chunk, can fit ~32. Use 24 for safety margin.
11
+ const BATCH_SIZE = 24;
12
+ // Concurrency and rate limiting
13
+ const CONCURRENCY = 5; // Max concurrent API requests
14
+ const MAX_RETRIES = 12; // Max retry attempts on rate limit
15
+ const INITIAL_BACKOFF_MS = 1000; // Start at 1s
16
+ const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
17
+ function sleep(ms) {
18
+ return new Promise(resolve => setTimeout(resolve, ms));
19
+ }
10
20
  /**
11
21
  * Mistral embedding provider.
12
22
  * Uses codestral-embed model via Mistral AI API.
@@ -31,6 +41,20 @@ export class MistralEmbeddingProvider {
31
41
  writable: true,
32
42
  value: false
33
43
  });
44
+ // Callback for rate limit throttling - message or null to clear
45
+ Object.defineProperty(this, "onThrottle", {
46
+ enumerable: true,
47
+ configurable: true,
48
+ writable: true,
49
+ value: void 0
50
+ });
51
+ // Callback for batch progress - (processed, total) chunks
52
+ Object.defineProperty(this, "onBatchProgress", {
53
+ enumerable: true,
54
+ configurable: true,
55
+ writable: true,
56
+ value: void 0
57
+ });
34
58
  // Trim the key to remove any accidental whitespace
35
59
  this.apiKey = (apiKey ?? '').trim();
36
60
  }
@@ -47,15 +71,67 @@ export class MistralEmbeddingProvider {
47
71
  if (texts.length === 0) {
48
72
  return [];
49
73
  }
50
- const results = [];
51
- // Process in batches
74
+ // Split into batches
75
+ const batches = [];
52
76
  for (let i = 0; i < texts.length; i += BATCH_SIZE) {
53
- const batch = texts.slice(i, i + BATCH_SIZE);
54
- const batchResults = await this.embedBatch(batch);
55
- results.push(...batchResults);
77
+ batches.push(texts.slice(i, i + BATCH_SIZE));
78
+ }
79
+ // Process batches with limited concurrency
80
+ const results = [];
81
+ let completed = 0;
82
+ for (let i = 0; i < batches.length; i += CONCURRENCY) {
83
+ const concurrentBatches = batches.slice(i, i + CONCURRENCY);
84
+ // Fire concurrent requests
85
+ const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
86
+ // Flatten and collect results (Promise.all preserves order)
87
+ for (const result of batchResults) {
88
+ results.push(...result);
89
+ }
90
+ // Report progress after concurrent group completes
91
+ completed += concurrentBatches.length;
92
+ const processed = Math.min(completed * BATCH_SIZE, texts.length);
93
+ this.onBatchProgress?.(processed, texts.length);
56
94
  }
57
95
  return results;
58
96
  }
97
+ /**
98
+ * Embed a batch with exponential backoff retry on rate limit errors.
99
+ */
100
+ async embedBatchWithRetry(batch) {
101
+ let attempt = 0;
102
+ let backoffMs = INITIAL_BACKOFF_MS;
103
+ while (true) {
104
+ try {
105
+ const result = await this.embedBatch(batch);
106
+ // Clear throttle message on success (if was throttling)
107
+ if (attempt > 0)
108
+ this.onThrottle?.(null);
109
+ return result;
110
+ }
111
+ catch (error) {
112
+ if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
113
+ attempt++;
114
+ const secs = Math.round(backoffMs / 1000);
115
+ this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
116
+ await sleep(backoffMs);
117
+ backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
118
+ }
119
+ else {
120
+ throw error;
121
+ }
122
+ }
123
+ }
124
+ }
125
+ /**
126
+ * Check if an error is a rate limit error (429 or quota exceeded).
127
+ */
128
+ isRateLimitError(error) {
129
+ if (error instanceof Error) {
130
+ const msg = error.message.toLowerCase();
131
+ return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
132
+ }
133
+ return false;
134
+ }
59
135
  async embedBatch(texts) {
60
136
  const response = await fetch(`${MISTRAL_API_BASE}/embeddings`, {
61
137
  method: 'POST',
@@ -13,9 +13,19 @@ export declare class OpenAIEmbeddingProvider implements EmbeddingProvider {
13
13
  readonly dimensions = 1536;
14
14
  private apiKey;
15
15
  private initialized;
16
+ onThrottle?: (message: string | null) => void;
17
+ onBatchProgress?: (processed: number, total: number) => void;
16
18
  constructor(apiKey?: string);
17
19
  initialize(_onProgress?: ModelProgressCallback): Promise<void>;
18
20
  embed(texts: string[]): Promise<number[][]>;
21
+ /**
22
+ * Embed a batch with exponential backoff retry on rate limit errors.
23
+ */
24
+ private embedBatchWithRetry;
25
+ /**
26
+ * Check if an error is a rate limit error (429 or quota exceeded).
27
+ */
28
+ private isRateLimitError;
19
29
  private embedBatch;
20
30
  embedSingle(text: string): Promise<number[]>;
21
31
  close(): void;
@@ -6,7 +6,17 @@
6
6
  */
7
7
  const OPENAI_API_BASE = 'https://api.openai.com/v1';
8
8
  const MODEL = 'text-embedding-3-small';
9
- const BATCH_SIZE = 2048; // OpenAI supports up to 2048 texts per request
9
+ // OpenAI limits: 8,191 tokens/text, 300,000 tokens/batch, 2,048 texts/batch
10
+ // With avg ~1000 tokens/chunk, safe limit is 300 texts. Use 256 for margin.
11
+ const BATCH_SIZE = 256;
12
+ // Concurrency and rate limiting
13
+ const CONCURRENCY = 5; // Max concurrent API requests
14
+ const MAX_RETRIES = 12; // Max retry attempts on rate limit
15
+ const INITIAL_BACKOFF_MS = 1000; // Start at 1s
16
+ const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
17
+ function sleep(ms) {
18
+ return new Promise(resolve => setTimeout(resolve, ms));
19
+ }
10
20
  /**
11
21
  * OpenAI embedding provider.
12
22
  * Uses text-embedding-3-small model via OpenAI API.
@@ -31,6 +41,20 @@ export class OpenAIEmbeddingProvider {
31
41
  writable: true,
32
42
  value: false
33
43
  });
44
+ // Callback for rate limit throttling - message or null to clear
45
+ Object.defineProperty(this, "onThrottle", {
46
+ enumerable: true,
47
+ configurable: true,
48
+ writable: true,
49
+ value: void 0
50
+ });
51
+ // Callback for batch progress - (processed, total) chunks
52
+ Object.defineProperty(this, "onBatchProgress", {
53
+ enumerable: true,
54
+ configurable: true,
55
+ writable: true,
56
+ value: void 0
57
+ });
34
58
  // Trim the key to remove any accidental whitespace
35
59
  this.apiKey = (apiKey ?? '').trim();
36
60
  }
@@ -51,15 +75,67 @@ export class OpenAIEmbeddingProvider {
51
75
  if (texts.length === 0) {
52
76
  return [];
53
77
  }
54
- const results = [];
55
- // Process in batches
78
+ // Split into batches
79
+ const batches = [];
56
80
  for (let i = 0; i < texts.length; i += BATCH_SIZE) {
57
- const batch = texts.slice(i, i + BATCH_SIZE);
58
- const batchResults = await this.embedBatch(batch);
59
- results.push(...batchResults);
81
+ batches.push(texts.slice(i, i + BATCH_SIZE));
82
+ }
83
+ // Process batches with limited concurrency
84
+ const results = [];
85
+ let completed = 0;
86
+ for (let i = 0; i < batches.length; i += CONCURRENCY) {
87
+ const concurrentBatches = batches.slice(i, i + CONCURRENCY);
88
+ // Fire concurrent requests
89
+ const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
90
+ // Flatten and collect results (Promise.all preserves order)
91
+ for (const result of batchResults) {
92
+ results.push(...result);
93
+ }
94
+ // Report progress after concurrent group completes
95
+ completed += concurrentBatches.length;
96
+ const processed = Math.min(completed * BATCH_SIZE, texts.length);
97
+ this.onBatchProgress?.(processed, texts.length);
60
98
  }
61
99
  return results;
62
100
  }
101
+ /**
102
+ * Embed a batch with exponential backoff retry on rate limit errors.
103
+ */
104
+ async embedBatchWithRetry(batch) {
105
+ let attempt = 0;
106
+ let backoffMs = INITIAL_BACKOFF_MS;
107
+ while (true) {
108
+ try {
109
+ const result = await this.embedBatch(batch);
110
+ // Clear throttle message on success (if was throttling)
111
+ if (attempt > 0)
112
+ this.onThrottle?.(null);
113
+ return result;
114
+ }
115
+ catch (error) {
116
+ if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
117
+ attempt++;
118
+ const secs = Math.round(backoffMs / 1000);
119
+ this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
120
+ await sleep(backoffMs);
121
+ backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
122
+ }
123
+ else {
124
+ throw error;
125
+ }
126
+ }
127
+ }
128
+ }
129
+ /**
130
+ * Check if an error is a rate limit error (429 or quota exceeded).
131
+ */
132
+ isRateLimitError(error) {
133
+ if (error instanceof Error) {
134
+ const msg = error.message.toLowerCase();
135
+ return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
136
+ }
137
+ return false;
138
+ }
63
139
  async embedBatch(texts) {
64
140
  const response = await fetch(`${OPENAI_API_BASE}/embeddings`, {
65
141
  method: 'POST',
@@ -19,6 +19,47 @@ const ALWAYS_IGNORED = [
19
19
  '.viberag',
20
20
  'node_modules', // Fallback in case not in .gitignore
21
21
  ];
22
+ /**
23
+ * Lock files that should always be ignored.
24
+ * These are machine-generated and provide no value for code search.
25
+ */
26
+ const ALWAYS_IGNORED_FILES = [
27
+ // JavaScript/TypeScript
28
+ 'package-lock.json', // npm
29
+ 'yarn.lock', // Yarn
30
+ 'pnpm-lock.yaml', // pnpm
31
+ 'bun.lockb', // Bun
32
+ // Python
33
+ 'uv.lock', // UV
34
+ 'poetry.lock', // Poetry
35
+ 'Pipfile.lock', // Pipenv
36
+ // Ruby
37
+ 'Gemfile.lock', // Bundler
38
+ // PHP
39
+ 'composer.lock', // Composer
40
+ // Rust
41
+ 'Cargo.lock', // Cargo
42
+ // Go
43
+ 'go.sum', // Go modules
44
+ // Java/Kotlin
45
+ 'gradle.lockfile', // Gradle
46
+ // C#/.NET
47
+ 'packages.lock.json', // NuGet
48
+ // Dart
49
+ 'pubspec.lock', // Pub
50
+ // Swift
51
+ 'Package.resolved', // Swift PM
52
+ ];
53
+ /**
54
+ * File patterns that should always be ignored.
55
+ * These are build artifacts with no semantic value for code search.
56
+ */
57
+ const ALWAYS_IGNORED_PATTERNS = [
58
+ '*.min.js', // Minified JavaScript
59
+ '*.min.css', // Minified CSS
60
+ '*.map', // Source maps
61
+ '*.d.ts.map', // TypeScript declaration maps
62
+ ];
22
63
  /**
23
64
  * Cache of Ignore instances per project root.
24
65
  */
@@ -37,8 +78,12 @@ export async function loadGitignore(projectRoot) {
37
78
  return cached;
38
79
  }
39
80
  const ig = ignore();
40
- // Add always-ignored patterns
81
+ // Add always-ignored patterns (directories)
41
82
  ig.add(ALWAYS_IGNORED);
83
+ // Add always-ignored files (lock files)
84
+ ig.add(ALWAYS_IGNORED_FILES);
85
+ // Add always-ignored file patterns (minified, maps)
86
+ ig.add(ALWAYS_IGNORED_PATTERNS);
42
87
  // Try to load .gitignore
43
88
  const gitignorePath = path.join(projectRoot, '.gitignore');
44
89
  try {
@@ -99,8 +144,16 @@ export function clearAllGitignoreCache() {
99
144
  */
100
145
  export async function getGlobIgnorePatterns(projectRoot) {
101
146
  const patterns = [];
102
- // Always exclude these (same as ALWAYS_IGNORED)
147
+ // Always exclude these directories (same as ALWAYS_IGNORED)
103
148
  patterns.push('**/.git/**', '**/.viberag/**', '**/node_modules/**');
149
+ // Always exclude lock files (same as ALWAYS_IGNORED_FILES)
150
+ for (const file of ALWAYS_IGNORED_FILES) {
151
+ patterns.push(`**/${file}`);
152
+ }
153
+ // Always exclude file patterns (minified, maps)
154
+ for (const pattern of ALWAYS_IGNORED_PATTERNS) {
155
+ patterns.push(`**/${pattern}`);
156
+ }
104
157
  // Try to load .gitignore
105
158
  const gitignorePath = path.join(projectRoot, '.gitignore');
106
159
  try {
@@ -55,6 +55,9 @@ export declare class Indexer {
55
55
  /**
56
56
  * Process a batch of files: read, chunk, embed, and prepare CodeChunks.
57
57
  *
58
+ * Strategy: Collect all chunks from all files first, then embed them
59
+ * together with full concurrency for maximum throughput.
60
+ *
58
61
  * Error handling strategy:
59
62
  * - File read/parse errors: Log and continue (file-specific, recoverable)
60
63
  * - Embedding/storage errors: Let propagate (fatal, affects all files)
@@ -142,6 +142,17 @@ export class Indexer {
142
142
  // 6. Process new and modified files
143
143
  const filesToProcess = [...diff.new, ...diff.modified];
144
144
  const totalFiles = filesToProcess.length;
145
+ // Track cumulative chunks for progress display
146
+ let totalChunksProcessed = 0;
147
+ let lastProgress = 0;
148
+ // Wire throttle callback for rate limit feedback (API providers only)
149
+ if ('onThrottle' in embeddings) {
150
+ embeddings.onThrottle =
151
+ message => {
152
+ // Pass throttle message to UI - shown in yellow when set
153
+ progressCallback?.(lastProgress, totalFiles, 'Indexing files', message, totalChunksProcessed);
154
+ };
155
+ }
145
156
  if (totalFiles > 0) {
146
157
  this.log('info', `Processing ${totalFiles} files`);
147
158
  // First, delete existing chunks for modified files
@@ -153,7 +164,15 @@ export class Indexer {
153
164
  const batchSize = 10;
154
165
  for (let i = 0; i < filesToProcess.length; i += batchSize) {
155
166
  const batch = filesToProcess.slice(i, i + batchSize);
156
- const batchChunks = await this.processFileBatch(batch, chunker, embeddings, storage, stats);
167
+ const batchChunks = await this.processFileBatch(batch, chunker, embeddings, storage, stats, {
168
+ totalFiles,
169
+ currentFileOffset: i,
170
+ progressCallback,
171
+ onChunksProcessed: (count) => {
172
+ totalChunksProcessed += count;
173
+ progressCallback?.(i, totalFiles, 'Indexing files', null, totalChunksProcessed);
174
+ },
175
+ });
157
176
  if (batchChunks.length > 0) {
158
177
  // Use addChunks after table reset to avoid schema mismatch,
159
178
  // upsertChunks for normal incremental updates
@@ -166,7 +185,8 @@ export class Indexer {
166
185
  stats.chunksAdded += batchChunks.length;
167
186
  }
168
187
  const progress = Math.round(((i + batch.length) / totalFiles) * 100);
169
- progressCallback?.(i + batch.length, totalFiles, 'Indexing files');
188
+ lastProgress = i + batch.length;
189
+ progressCallback?.(i + batch.length, totalFiles, 'Indexing files', null, totalChunksProcessed);
170
190
  this.log('debug', `Progress: ${progress}% (${i + batch.length}/${totalFiles})`);
171
191
  }
172
192
  }
@@ -218,79 +238,112 @@ export class Indexer {
218
238
  /**
219
239
  * Process a batch of files: read, chunk, embed, and prepare CodeChunks.
220
240
  *
241
+ * Strategy: Collect all chunks from all files first, then embed them
242
+ * together with full concurrency for maximum throughput.
243
+ *
221
244
  * Error handling strategy:
222
245
  * - File read/parse errors: Log and continue (file-specific, recoverable)
223
246
  * - Embedding/storage errors: Let propagate (fatal, affects all files)
224
247
  */
225
- async processFileBatch(filepaths, chunker, embeddings, storage, stats) {
226
- const allChunks = [];
248
+ async processFileBatch(filepaths, chunker, embeddings, storage, stats, progressContext) {
249
+ const fileDataList = [];
227
250
  for (const filepath of filepaths) {
228
- // Phase 1: File reading and chunking (recoverable errors)
229
- let content;
230
- let fileHash;
231
- let chunks;
232
251
  try {
233
252
  const absolutePath = path.join(this.projectRoot, filepath);
234
- content = await fs.readFile(absolutePath, 'utf-8');
235
- fileHash = (await import('../merkle/hash.js')).computeStringHash(content);
236
- // Chunk the file (with size limits from config)
237
- chunks = await chunker.chunkFile(filepath, content, this.config.chunkMaxSize);
253
+ const content = await fs.readFile(absolutePath, 'utf-8');
254
+ const fileHash = (await import('../merkle/hash.js')).computeStringHash(content);
255
+ const chunks = await chunker.chunkFile(filepath, content, this.config.chunkMaxSize);
256
+ fileDataList.push({ filepath, fileHash, chunks });
238
257
  }
239
258
  catch (error) {
240
- // File-specific error (read/parse) - log and continue with other files
241
259
  this.log('warn', `Failed to read/parse file: ${filepath}`, error);
242
260
  continue;
243
261
  }
244
- // Phase 2: Embedding and storage (fatal errors - let propagate)
245
- // NO try-catch here - API/storage errors should stop indexing
246
- // Check embedding cache for each chunk
247
- const contentHashes = chunks.map(c => c.contentHash);
248
- const cachedEmbeddings = await storage.getCachedEmbeddings(contentHashes);
249
- // Compute embeddings for cache misses
250
- const missingChunks = chunks.filter(c => !cachedEmbeddings.has(c.contentHash));
251
- if (missingChunks.length > 0) {
252
- // Embed contextHeader + text for semantic relevance
253
- const texts = missingChunks.map(c => c.contextHeader ? `${c.contextHeader}\n${c.text}` : c.text);
254
- const newEmbeddings = await embeddings.embed(texts);
255
- stats.embeddingsComputed += missingChunks.length;
256
- // Cache the new embeddings
257
- const cacheEntries = missingChunks.map((chunk, i) => ({
258
- contentHash: chunk.contentHash,
259
- vector: newEmbeddings[i],
260
- createdAt: new Date().toISOString(),
261
- }));
262
- await storage.cacheEmbeddings(cacheEntries);
263
- // Add to cachedEmbeddings map
264
- missingChunks.forEach((chunk, i) => {
265
- cachedEmbeddings.set(chunk.contentHash, newEmbeddings[i]);
262
+ }
263
+ const allChunksWithContext = [];
264
+ for (const fd of fileDataList) {
265
+ for (const chunk of fd.chunks) {
266
+ allChunksWithContext.push({
267
+ chunk,
268
+ filepath: fd.filepath,
269
+ fileHash: fd.fileHash,
266
270
  });
267
271
  }
268
- stats.embeddingsCached += chunks.length - missingChunks.length;
269
- // Build CodeChunk objects
270
- const filename = path.basename(filepath);
271
- const extension = path.extname(filepath);
272
- for (const chunk of chunks) {
273
- const vector = cachedEmbeddings.get(chunk.contentHash);
274
- allChunks.push({
275
- id: `${filepath}:${chunk.startLine}`,
276
- vector,
277
- text: chunk.text,
278
- contentHash: chunk.contentHash,
279
- filepath,
280
- filename,
281
- extension,
282
- type: chunk.type,
283
- name: chunk.name,
284
- startLine: chunk.startLine,
285
- endLine: chunk.endLine,
286
- fileHash,
287
- // New metadata fields from schema v2
288
- signature: chunk.signature,
289
- docstring: chunk.docstring,
290
- isExported: chunk.isExported,
291
- decoratorNames: chunk.decoratorNames,
292
- });
272
+ }
273
+ if (allChunksWithContext.length === 0) {
274
+ return [];
275
+ }
276
+ // Phase 2: Check embedding cache for ALL chunks at once
277
+ const contentHashes = allChunksWithContext.map(c => c.chunk.contentHash);
278
+ const cachedEmbeddings = await storage.getCachedEmbeddings(contentHashes);
279
+ // Find all cache misses
280
+ const missingChunksWithContext = allChunksWithContext.filter(c => !cachedEmbeddings.has(c.chunk.contentHash));
281
+ stats.embeddingsCached +=
282
+ allChunksWithContext.length - missingChunksWithContext.length;
283
+ // Phase 3: Embed ALL missing chunks together (with full concurrency)
284
+ if (missingChunksWithContext.length > 0) {
285
+ // Track chunks processed for progress updates
286
+ let lastReportedChunks = 0;
287
+ // Wire batch progress callback to report incremental chunks
288
+ if (progressContext?.onChunksProcessed && 'onBatchProgress' in embeddings) {
289
+ embeddings.onBatchProgress = (processed, _total) => {
290
+ // Report only the delta since last update
291
+ const delta = processed - lastReportedChunks;
292
+ if (delta > 0) {
293
+ progressContext.onChunksProcessed(delta);
294
+ lastReportedChunks = processed;
295
+ }
296
+ };
297
+ }
298
+ // Embed all chunks together
299
+ const texts = missingChunksWithContext.map(c => c.chunk.contextHeader
300
+ ? `${c.chunk.contextHeader}\n${c.chunk.text}`
301
+ : c.chunk.text);
302
+ const newEmbeddings = await embeddings.embed(texts);
303
+ stats.embeddingsComputed += missingChunksWithContext.length;
304
+ // Report any remaining chunks not yet reported
305
+ const remainingDelta = missingChunksWithContext.length - lastReportedChunks;
306
+ if (remainingDelta > 0 && progressContext?.onChunksProcessed) {
307
+ progressContext.onChunksProcessed(remainingDelta);
293
308
  }
309
+ // Clear batch progress callback
310
+ if ('onBatchProgress' in embeddings) {
311
+ embeddings.onBatchProgress = undefined;
312
+ }
313
+ // Cache the new embeddings
314
+ const cacheEntries = missingChunksWithContext.map((c, i) => ({
315
+ contentHash: c.chunk.contentHash,
316
+ vector: newEmbeddings[i],
317
+ createdAt: new Date().toISOString(),
318
+ }));
319
+ await storage.cacheEmbeddings(cacheEntries);
320
+ // Add to cachedEmbeddings map
321
+ missingChunksWithContext.forEach((c, i) => {
322
+ cachedEmbeddings.set(c.chunk.contentHash, newEmbeddings[i]);
323
+ });
324
+ }
325
+ // Phase 4: Build CodeChunk objects
326
+ const allChunks = [];
327
+ for (const { chunk, filepath, fileHash } of allChunksWithContext) {
328
+ const vector = cachedEmbeddings.get(chunk.contentHash);
329
+ allChunks.push({
330
+ id: `${filepath}:${chunk.startLine}`,
331
+ vector,
332
+ text: chunk.text,
333
+ contentHash: chunk.contentHash,
334
+ filepath,
335
+ filename: path.basename(filepath),
336
+ extension: path.extname(filepath),
337
+ type: chunk.type,
338
+ name: chunk.name,
339
+ startLine: chunk.startLine,
340
+ endLine: chunk.endLine,
341
+ fileHash,
342
+ signature: chunk.signature,
343
+ docstring: chunk.docstring,
344
+ isExported: chunk.isExported,
345
+ decoratorNames: chunk.decoratorNames,
346
+ });
294
347
  }
295
348
  return allChunks;
296
349
  }
@@ -60,8 +60,13 @@ export interface IndexStats {
60
60
  }
61
61
  /**
62
62
  * Progress callback for indexing operations.
63
+ * @param current - Current progress count
64
+ * @param total - Total items (0 for indeterminate)
65
+ * @param stage - Human-readable stage name
66
+ * @param throttleMessage - Rate limit message (shown in yellow) or null to clear
67
+ * @param chunksProcessed - Number of chunks embedded so far
63
68
  */
64
- export type ProgressCallback = (current: number, total: number, stage: string) => void;
69
+ export type ProgressCallback = (current: number, total: number, stage: string, throttleMessage?: string | null, chunksProcessed?: number) => void;
65
70
  /**
66
71
  * Create empty index stats.
67
72
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "viberag",
3
- "version": "0.3.1",
3
+ "version": "0.3.2",
4
4
  "description": "Local code RAG for AI coding assistants - semantic search via MCP server",
5
5
  "license": "AGPL-3.0",
6
6
  "keywords": [