viberag 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli/app.js +8 -1
- package/dist/cli/commands/handlers.d.ts +1 -1
- package/dist/cli/commands/useRagCommands.js +8 -1
- package/dist/common/components/StatusBar.js +14 -1
- package/dist/common/types.d.ts +4 -0
- package/dist/mcp/server.js +15 -25
- package/dist/rag/embeddings/gemini.d.ts +10 -0
- package/dist/rag/embeddings/gemini.js +82 -6
- package/dist/rag/embeddings/mistral.d.ts +10 -0
- package/dist/rag/embeddings/mistral.js +82 -6
- package/dist/rag/embeddings/openai.d.ts +10 -0
- package/dist/rag/embeddings/openai.js +82 -6
- package/dist/rag/gitignore/index.js +55 -2
- package/dist/rag/indexer/indexer.d.ts +3 -0
- package/dist/rag/indexer/indexer.js +113 -60
- package/dist/rag/indexer/types.d.ts +6 -1
- package/dist/rag/search/index.js +1 -9
- package/dist/rag/search/types.d.ts +0 -4
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -60,7 +60,7 @@ Your coding agent would normally use Search / Grep / Find and guess search terms
|
|
|
60
60
|
|
|
61
61
|
When searching for "authentication", VibeRAG will find all code snippets that are relevant to authentication, such as "login", "logout", "register", and names of functions and classes like `AuthDependency`, `APIKeyCache`, etc.
|
|
62
62
|
|
|
63
|
-
This ensures
|
|
63
|
+
This ensures comprehensive search of your codebase so you don't miss important files and features that are relevant to your changes or refactor.
|
|
64
64
|
|
|
65
65
|
### Great for Monorepos
|
|
66
66
|
|
package/dist/cli/app.js
CHANGED
|
@@ -148,7 +148,14 @@ export default function App() {
|
|
|
148
148
|
total: 0,
|
|
149
149
|
stage: 'Indexing',
|
|
150
150
|
});
|
|
151
|
-
const stats = await runIndex(projectRoot, true, (current, total, stage) => setAppStatus({
|
|
151
|
+
const stats = await runIndex(projectRoot, true, (current, total, stage, throttleMessage, chunksProcessed) => setAppStatus({
|
|
152
|
+
state: 'indexing',
|
|
153
|
+
current,
|
|
154
|
+
total,
|
|
155
|
+
stage,
|
|
156
|
+
throttleMessage,
|
|
157
|
+
chunksProcessed,
|
|
158
|
+
}));
|
|
152
159
|
addOutput('system', formatIndexStats(stats));
|
|
153
160
|
// Reload stats after indexing
|
|
154
161
|
const newStats = await loadIndexStats(projectRoot);
|
|
@@ -29,7 +29,7 @@ export declare function runInit(projectRoot: string, isReinit?: boolean, wizardC
|
|
|
29
29
|
* When force=true, also updates config dimensions to match current PROVIDER_CONFIGS
|
|
30
30
|
* (handles dimension changes after viberag upgrades).
|
|
31
31
|
*/
|
|
32
|
-
export declare function runIndex(projectRoot: string, force?: boolean, onProgress?: (current: number, total: number, stage: string) => void): Promise<IndexStats>;
|
|
32
|
+
export declare function runIndex(projectRoot: string, force?: boolean, onProgress?: (current: number, total: number, stage: string, throttleMessage?: string | null, chunksProcessed?: number) => void): Promise<IndexStats>;
|
|
33
33
|
/**
|
|
34
34
|
* Format index stats for display.
|
|
35
35
|
*/
|
|
@@ -55,7 +55,14 @@ Manual MCP Setup:
|
|
|
55
55
|
const action = force ? 'Reindexing' : 'Indexing';
|
|
56
56
|
addOutput('system', `${action} codebase...`);
|
|
57
57
|
setAppStatus({ state: 'indexing', current: 0, total: 0, stage: action });
|
|
58
|
-
runIndex(projectRoot, force, (current, total, stage) => setAppStatus({
|
|
58
|
+
runIndex(projectRoot, force, (current, total, stage, throttleMessage, chunksProcessed) => setAppStatus({
|
|
59
|
+
state: 'indexing',
|
|
60
|
+
current,
|
|
61
|
+
total,
|
|
62
|
+
stage,
|
|
63
|
+
throttleMessage,
|
|
64
|
+
chunksProcessed,
|
|
65
|
+
}))
|
|
59
66
|
.then(async (stats) => {
|
|
60
67
|
addOutput('system', formatIndexStats(stats));
|
|
61
68
|
// Reload stats after indexing
|
|
@@ -25,12 +25,25 @@ function formatStatus(status) {
|
|
|
25
25
|
case 'ready':
|
|
26
26
|
return { text: 'Ready', color: 'green', showSpinner: false };
|
|
27
27
|
case 'indexing': {
|
|
28
|
+
// Throttle status takes precedence - show in yellow
|
|
29
|
+
if (status.throttleMessage) {
|
|
30
|
+
return {
|
|
31
|
+
text: status.throttleMessage,
|
|
32
|
+
color: 'yellow',
|
|
33
|
+
showSpinner: true,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
// Normal indexing display
|
|
28
37
|
if (status.total === 0) {
|
|
29
38
|
return { text: `${status.stage}`, color: 'cyan', showSpinner: true };
|
|
30
39
|
}
|
|
31
40
|
const percent = Math.round((status.current / status.total) * 100);
|
|
41
|
+
// Include chunk count if available
|
|
42
|
+
const chunkInfo = status.chunksProcessed !== undefined
|
|
43
|
+
? ` · ${status.chunksProcessed} chunks`
|
|
44
|
+
: '';
|
|
32
45
|
return {
|
|
33
|
-
text: `${status.stage} ${status.current}/${status.total} (${percent}%)`,
|
|
46
|
+
text: `${status.stage} ${status.current}/${status.total} (${percent}%)${chunkInfo}`,
|
|
34
47
|
color: 'cyan',
|
|
35
48
|
showSpinner: true,
|
|
36
49
|
};
|
package/dist/common/types.d.ts
CHANGED
|
@@ -61,6 +61,10 @@ export type AppStatus = {
|
|
|
61
61
|
current: number;
|
|
62
62
|
total: number;
|
|
63
63
|
stage: string;
|
|
64
|
+
/** Rate limit message (shown in yellow when set) */
|
|
65
|
+
throttleMessage?: string | null;
|
|
66
|
+
/** Number of chunks embedded so far */
|
|
67
|
+
chunksProcessed?: number;
|
|
64
68
|
} | {
|
|
65
69
|
state: 'searching';
|
|
66
70
|
} | {
|
package/dist/mcp/server.js
CHANGED
|
@@ -34,14 +34,16 @@ async function ensureInitialized(projectRoot) {
|
|
|
34
34
|
}
|
|
35
35
|
}
|
|
36
36
|
/**
|
|
37
|
-
* Default maximum response size in bytes (
|
|
37
|
+
* Default maximum response size in bytes (50KB).
|
|
38
38
|
* Reduces result count to fit; does NOT truncate text.
|
|
39
|
+
* Note: Claude Code has ~25K token limit (~100KB), so 50KB default leaves headroom.
|
|
39
40
|
*/
|
|
40
|
-
const DEFAULT_MAX_RESPONSE_SIZE =
|
|
41
|
+
const DEFAULT_MAX_RESPONSE_SIZE = 50 * 1024;
|
|
41
42
|
/**
|
|
42
|
-
* Maximum allowed response size (
|
|
43
|
+
* Maximum allowed response size (100KB).
|
|
44
|
+
* Hard cap to prevent token overflow in AI tools.
|
|
43
45
|
*/
|
|
44
|
-
const MAX_RESPONSE_SIZE =
|
|
46
|
+
const MAX_RESPONSE_SIZE = 100 * 1024;
|
|
45
47
|
/**
|
|
46
48
|
* Overhead per result in JSON (metadata fields, formatting).
|
|
47
49
|
*/
|
|
@@ -129,10 +131,6 @@ function formatSearchResults(results, includeDebug = false, maxResponseSize = DE
|
|
|
129
131
|
response['originalResultCount'] = results.results.length;
|
|
130
132
|
response['reducedForSize'] = true;
|
|
131
133
|
}
|
|
132
|
-
// Add totalMatches for exhaustive mode
|
|
133
|
-
if (results.totalMatches !== undefined) {
|
|
134
|
-
response['totalMatches'] = results.totalMatches;
|
|
135
|
-
}
|
|
136
134
|
// Add debug info for AI evaluation
|
|
137
135
|
if (includeDebug && results.debug) {
|
|
138
136
|
response['debug'] = formatDebugInfo(results.debug);
|
|
@@ -241,9 +239,9 @@ export function createMcpServer(projectRoot) {
|
|
|
241
239
|
server.addTool({
|
|
242
240
|
name: 'codebase_search',
|
|
243
241
|
description: `
|
|
244
|
-
Codebase search: semantic search, keyword search, and hybrid search options.
|
|
245
|
-
Use this when you need to find code that matches semantic meaning and keyword patterns.
|
|
246
|
-
This tool helps you perform
|
|
242
|
+
Codebase search: semantic search, keyword search, and hybrid search options.
|
|
243
|
+
Use this when you need to find code that matches semantic meaning and keyword patterns.
|
|
244
|
+
This tool helps you perform comprehensive searches of the codebase and get the best
|
|
247
245
|
context and understanding when exploring and searching the codebase, docs, etc.
|
|
248
246
|
|
|
249
247
|
USE FOR CODEBASE EXPLORATION:
|
|
@@ -299,8 +297,7 @@ For thorough searches, consider:
|
|
|
299
297
|
1. Start with hybrid mode, default weights
|
|
300
298
|
2. Check debug info to evaluate search quality
|
|
301
299
|
3. If maxVectorScore < 0.3, try exact mode or higher bm25_weight
|
|
302
|
-
4. If results seem incomplete, try codebase_parallel_search for comparison
|
|
303
|
-
5. Use exhaustive=true for refactoring tasks needing ALL matches
|
|
300
|
+
4. If results seem incomplete, run more searches and try codebase_parallel_search for comparison
|
|
304
301
|
|
|
305
302
|
RESULT INTERPRETATION:
|
|
306
303
|
- score: Combined relevance (higher = better)
|
|
@@ -351,11 +348,6 @@ Production code: { path_not_contains: ["test", "mock", "fixture"], is_exported:
|
|
|
351
348
|
.optional()
|
|
352
349
|
.default(10)
|
|
353
350
|
.describe('Maximum number of results (1-100, default: 10)'),
|
|
354
|
-
exhaustive: z
|
|
355
|
-
.boolean()
|
|
356
|
-
.optional()
|
|
357
|
-
.default(false)
|
|
358
|
-
.describe('Return all matches (for refactoring/auditing)'),
|
|
359
351
|
min_score: z
|
|
360
352
|
.number()
|
|
361
353
|
.min(0)
|
|
@@ -398,9 +390,8 @@ Production code: { path_not_contains: ["test", "mock", "fixture"], is_exported:
|
|
|
398
390
|
.max(MAX_RESPONSE_SIZE)
|
|
399
391
|
.optional()
|
|
400
392
|
.default(DEFAULT_MAX_RESPONSE_SIZE)
|
|
401
|
-
.describe('Maximum response size in bytes (default:
|
|
402
|
-
'Reduces result count to fit within limit; does NOT truncate text content.
|
|
403
|
-
'Use a larger value for exhaustive searches.'),
|
|
393
|
+
.describe('Maximum response size in bytes (default: 50KB, max: 100KB). ' +
|
|
394
|
+
'Reduces result count to fit within limit; does NOT truncate text content.'),
|
|
404
395
|
}),
|
|
405
396
|
execute: async (args) => {
|
|
406
397
|
await ensureInitialized(projectRoot);
|
|
@@ -425,7 +416,6 @@ Production code: { path_not_contains: ["test", "mock", "fixture"], is_exported:
|
|
|
425
416
|
const results = await engine.search(args.query, {
|
|
426
417
|
mode: args.mode,
|
|
427
418
|
limit: args.limit,
|
|
428
|
-
exhaustive: args.exhaustive,
|
|
429
419
|
minScore: args.min_score,
|
|
430
420
|
filters,
|
|
431
421
|
codeSnippet: args.code_snippet,
|
|
@@ -545,9 +535,9 @@ Production code: { path_not_contains: ["test", "mock", "fixture"], is_exported:
|
|
|
545
535
|
server.addTool({
|
|
546
536
|
name: 'codebase_parallel_search',
|
|
547
537
|
description: `
|
|
548
|
-
Codebase Parallel Search: run multiple semantic search, keyword search, and hybrid searches in parallel and compare results.
|
|
549
|
-
Use this when you need to run multiple searches at once to find code that matches semantic meaning and keyword patterns.
|
|
550
|
-
This tool helps you perform
|
|
538
|
+
Codebase Parallel Search: run multiple semantic search, keyword search, and hybrid searches in parallel and compare results.
|
|
539
|
+
Use this when you need to run multiple searches at once to find code that matches semantic meaning and keyword patterns.
|
|
540
|
+
This tool helps you perform comprehensive searches of the codebase and get the best
|
|
551
541
|
context and understanding when exploring and searching the codebase, docs, etc.
|
|
552
542
|
|
|
553
543
|
NOTE: This is for narrower sets of queries. Parallel searches may return a large number of results,
|
|
@@ -17,9 +17,19 @@ export declare class GeminiEmbeddingProvider implements EmbeddingProvider {
|
|
|
17
17
|
readonly dimensions = 1536;
|
|
18
18
|
private apiKey;
|
|
19
19
|
private initialized;
|
|
20
|
+
onThrottle?: (message: string | null) => void;
|
|
21
|
+
onBatchProgress?: (processed: number, total: number) => void;
|
|
20
22
|
constructor(apiKey?: string);
|
|
21
23
|
initialize(_onProgress?: ModelProgressCallback): Promise<void>;
|
|
22
24
|
embed(texts: string[]): Promise<number[][]>;
|
|
25
|
+
/**
|
|
26
|
+
* Embed a batch with exponential backoff retry on rate limit errors.
|
|
27
|
+
*/
|
|
28
|
+
private embedBatchWithRetry;
|
|
29
|
+
/**
|
|
30
|
+
* Check if an error is a rate limit error (429 or quota exceeded).
|
|
31
|
+
*/
|
|
32
|
+
private isRateLimitError;
|
|
23
33
|
private embedBatch;
|
|
24
34
|
embedSingle(text: string): Promise<number[]>;
|
|
25
35
|
close(): void;
|
|
@@ -10,7 +10,17 @@
|
|
|
10
10
|
*/
|
|
11
11
|
const GEMINI_API_BASE = 'https://generativelanguage.googleapis.com/v1beta/models';
|
|
12
12
|
const MODEL = 'gemini-embedding-001';
|
|
13
|
-
|
|
13
|
+
// Gemini limits: 2,048 tokens/text, 20,000 tokens/batch, 100-250 texts/batch
|
|
14
|
+
// With avg ~1000 tokens/chunk, safe limit is 20 texts.
|
|
15
|
+
const BATCH_SIZE = 20;
|
|
16
|
+
// Concurrency and rate limiting
|
|
17
|
+
const CONCURRENCY = 5; // Max concurrent API requests
|
|
18
|
+
const MAX_RETRIES = 12; // Max retry attempts on rate limit
|
|
19
|
+
const INITIAL_BACKOFF_MS = 1000; // Start at 1s
|
|
20
|
+
const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
|
|
21
|
+
function sleep(ms) {
|
|
22
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
23
|
+
}
|
|
14
24
|
/**
|
|
15
25
|
* Gemini embedding provider.
|
|
16
26
|
* Uses gemini-embedding-001 model via Google's Generative AI API.
|
|
@@ -35,6 +45,20 @@ export class GeminiEmbeddingProvider {
|
|
|
35
45
|
writable: true,
|
|
36
46
|
value: false
|
|
37
47
|
});
|
|
48
|
+
// Callback for rate limit throttling - message or null to clear
|
|
49
|
+
Object.defineProperty(this, "onThrottle", {
|
|
50
|
+
enumerable: true,
|
|
51
|
+
configurable: true,
|
|
52
|
+
writable: true,
|
|
53
|
+
value: void 0
|
|
54
|
+
});
|
|
55
|
+
// Callback for batch progress - (processed, total) chunks
|
|
56
|
+
Object.defineProperty(this, "onBatchProgress", {
|
|
57
|
+
enumerable: true,
|
|
58
|
+
configurable: true,
|
|
59
|
+
writable: true,
|
|
60
|
+
value: void 0
|
|
61
|
+
});
|
|
38
62
|
// Trim the key to remove any accidental whitespace
|
|
39
63
|
this.apiKey = (apiKey ?? '').trim();
|
|
40
64
|
}
|
|
@@ -51,15 +75,67 @@ export class GeminiEmbeddingProvider {
|
|
|
51
75
|
if (texts.length === 0) {
|
|
52
76
|
return [];
|
|
53
77
|
}
|
|
54
|
-
|
|
55
|
-
|
|
78
|
+
// Split into batches
|
|
79
|
+
const batches = [];
|
|
56
80
|
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
81
|
+
batches.push(texts.slice(i, i + BATCH_SIZE));
|
|
82
|
+
}
|
|
83
|
+
// Process batches with limited concurrency
|
|
84
|
+
const results = [];
|
|
85
|
+
let completed = 0;
|
|
86
|
+
for (let i = 0; i < batches.length; i += CONCURRENCY) {
|
|
87
|
+
const concurrentBatches = batches.slice(i, i + CONCURRENCY);
|
|
88
|
+
// Fire concurrent requests
|
|
89
|
+
const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
|
|
90
|
+
// Flatten and collect results (Promise.all preserves order)
|
|
91
|
+
for (const result of batchResults) {
|
|
92
|
+
results.push(...result);
|
|
93
|
+
}
|
|
94
|
+
// Report progress after concurrent group completes
|
|
95
|
+
completed += concurrentBatches.length;
|
|
96
|
+
const processed = Math.min(completed * BATCH_SIZE, texts.length);
|
|
97
|
+
this.onBatchProgress?.(processed, texts.length);
|
|
60
98
|
}
|
|
61
99
|
return results;
|
|
62
100
|
}
|
|
101
|
+
/**
|
|
102
|
+
* Embed a batch with exponential backoff retry on rate limit errors.
|
|
103
|
+
*/
|
|
104
|
+
async embedBatchWithRetry(batch) {
|
|
105
|
+
let attempt = 0;
|
|
106
|
+
let backoffMs = INITIAL_BACKOFF_MS;
|
|
107
|
+
while (true) {
|
|
108
|
+
try {
|
|
109
|
+
const result = await this.embedBatch(batch);
|
|
110
|
+
// Clear throttle message on success (if was throttling)
|
|
111
|
+
if (attempt > 0)
|
|
112
|
+
this.onThrottle?.(null);
|
|
113
|
+
return result;
|
|
114
|
+
}
|
|
115
|
+
catch (error) {
|
|
116
|
+
if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
|
|
117
|
+
attempt++;
|
|
118
|
+
const secs = Math.round(backoffMs / 1000);
|
|
119
|
+
this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
|
|
120
|
+
await sleep(backoffMs);
|
|
121
|
+
backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
throw error;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Check if an error is a rate limit error (429 or quota exceeded).
|
|
131
|
+
*/
|
|
132
|
+
isRateLimitError(error) {
|
|
133
|
+
if (error instanceof Error) {
|
|
134
|
+
const msg = error.message.toLowerCase();
|
|
135
|
+
return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
|
|
136
|
+
}
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
63
139
|
async embedBatch(texts) {
|
|
64
140
|
const url = `${GEMINI_API_BASE}/${MODEL}:batchEmbedContents`;
|
|
65
141
|
const response = await fetch(url, {
|
|
@@ -13,9 +13,19 @@ export declare class MistralEmbeddingProvider implements EmbeddingProvider {
|
|
|
13
13
|
readonly dimensions = 1536;
|
|
14
14
|
private apiKey;
|
|
15
15
|
private initialized;
|
|
16
|
+
onThrottle?: (message: string | null) => void;
|
|
17
|
+
onBatchProgress?: (processed: number, total: number) => void;
|
|
16
18
|
constructor(apiKey?: string);
|
|
17
19
|
initialize(_onProgress?: ModelProgressCallback): Promise<void>;
|
|
18
20
|
embed(texts: string[]): Promise<number[][]>;
|
|
21
|
+
/**
|
|
22
|
+
* Embed a batch with exponential backoff retry on rate limit errors.
|
|
23
|
+
*/
|
|
24
|
+
private embedBatchWithRetry;
|
|
25
|
+
/**
|
|
26
|
+
* Check if an error is a rate limit error (429 or quota exceeded).
|
|
27
|
+
*/
|
|
28
|
+
private isRateLimitError;
|
|
19
29
|
private embedBatch;
|
|
20
30
|
embedSingle(text: string): Promise<number[]>;
|
|
21
31
|
close(): void;
|
|
@@ -6,7 +6,17 @@
|
|
|
6
6
|
*/
|
|
7
7
|
const MISTRAL_API_BASE = 'https://api.mistral.ai/v1';
|
|
8
8
|
const MODEL = 'codestral-embed';
|
|
9
|
-
|
|
9
|
+
// Mistral limits: 8,192 tokens/text, 16,000 tokens/batch TOTAL
|
|
10
|
+
// With avg ~500 tokens/chunk, can fit ~32. Use 24 for safety margin.
|
|
11
|
+
const BATCH_SIZE = 24;
|
|
12
|
+
// Concurrency and rate limiting
|
|
13
|
+
const CONCURRENCY = 5; // Max concurrent API requests
|
|
14
|
+
const MAX_RETRIES = 12; // Max retry attempts on rate limit
|
|
15
|
+
const INITIAL_BACKOFF_MS = 1000; // Start at 1s
|
|
16
|
+
const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
|
|
17
|
+
function sleep(ms) {
|
|
18
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
19
|
+
}
|
|
10
20
|
/**
|
|
11
21
|
* Mistral embedding provider.
|
|
12
22
|
* Uses codestral-embed model via Mistral AI API.
|
|
@@ -31,6 +41,20 @@ export class MistralEmbeddingProvider {
|
|
|
31
41
|
writable: true,
|
|
32
42
|
value: false
|
|
33
43
|
});
|
|
44
|
+
// Callback for rate limit throttling - message or null to clear
|
|
45
|
+
Object.defineProperty(this, "onThrottle", {
|
|
46
|
+
enumerable: true,
|
|
47
|
+
configurable: true,
|
|
48
|
+
writable: true,
|
|
49
|
+
value: void 0
|
|
50
|
+
});
|
|
51
|
+
// Callback for batch progress - (processed, total) chunks
|
|
52
|
+
Object.defineProperty(this, "onBatchProgress", {
|
|
53
|
+
enumerable: true,
|
|
54
|
+
configurable: true,
|
|
55
|
+
writable: true,
|
|
56
|
+
value: void 0
|
|
57
|
+
});
|
|
34
58
|
// Trim the key to remove any accidental whitespace
|
|
35
59
|
this.apiKey = (apiKey ?? '').trim();
|
|
36
60
|
}
|
|
@@ -47,15 +71,67 @@ export class MistralEmbeddingProvider {
|
|
|
47
71
|
if (texts.length === 0) {
|
|
48
72
|
return [];
|
|
49
73
|
}
|
|
50
|
-
|
|
51
|
-
|
|
74
|
+
// Split into batches
|
|
75
|
+
const batches = [];
|
|
52
76
|
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
77
|
+
batches.push(texts.slice(i, i + BATCH_SIZE));
|
|
78
|
+
}
|
|
79
|
+
// Process batches with limited concurrency
|
|
80
|
+
const results = [];
|
|
81
|
+
let completed = 0;
|
|
82
|
+
for (let i = 0; i < batches.length; i += CONCURRENCY) {
|
|
83
|
+
const concurrentBatches = batches.slice(i, i + CONCURRENCY);
|
|
84
|
+
// Fire concurrent requests
|
|
85
|
+
const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
|
|
86
|
+
// Flatten and collect results (Promise.all preserves order)
|
|
87
|
+
for (const result of batchResults) {
|
|
88
|
+
results.push(...result);
|
|
89
|
+
}
|
|
90
|
+
// Report progress after concurrent group completes
|
|
91
|
+
completed += concurrentBatches.length;
|
|
92
|
+
const processed = Math.min(completed * BATCH_SIZE, texts.length);
|
|
93
|
+
this.onBatchProgress?.(processed, texts.length);
|
|
56
94
|
}
|
|
57
95
|
return results;
|
|
58
96
|
}
|
|
97
|
+
/**
|
|
98
|
+
* Embed a batch with exponential backoff retry on rate limit errors.
|
|
99
|
+
*/
|
|
100
|
+
async embedBatchWithRetry(batch) {
|
|
101
|
+
let attempt = 0;
|
|
102
|
+
let backoffMs = INITIAL_BACKOFF_MS;
|
|
103
|
+
while (true) {
|
|
104
|
+
try {
|
|
105
|
+
const result = await this.embedBatch(batch);
|
|
106
|
+
// Clear throttle message on success (if was throttling)
|
|
107
|
+
if (attempt > 0)
|
|
108
|
+
this.onThrottle?.(null);
|
|
109
|
+
return result;
|
|
110
|
+
}
|
|
111
|
+
catch (error) {
|
|
112
|
+
if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
|
|
113
|
+
attempt++;
|
|
114
|
+
const secs = Math.round(backoffMs / 1000);
|
|
115
|
+
this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
|
|
116
|
+
await sleep(backoffMs);
|
|
117
|
+
backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
|
|
118
|
+
}
|
|
119
|
+
else {
|
|
120
|
+
throw error;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Check if an error is a rate limit error (429 or quota exceeded).
|
|
127
|
+
*/
|
|
128
|
+
isRateLimitError(error) {
|
|
129
|
+
if (error instanceof Error) {
|
|
130
|
+
const msg = error.message.toLowerCase();
|
|
131
|
+
return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
|
|
132
|
+
}
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
59
135
|
async embedBatch(texts) {
|
|
60
136
|
const response = await fetch(`${MISTRAL_API_BASE}/embeddings`, {
|
|
61
137
|
method: 'POST',
|
|
@@ -13,9 +13,19 @@ export declare class OpenAIEmbeddingProvider implements EmbeddingProvider {
|
|
|
13
13
|
readonly dimensions = 1536;
|
|
14
14
|
private apiKey;
|
|
15
15
|
private initialized;
|
|
16
|
+
onThrottle?: (message: string | null) => void;
|
|
17
|
+
onBatchProgress?: (processed: number, total: number) => void;
|
|
16
18
|
constructor(apiKey?: string);
|
|
17
19
|
initialize(_onProgress?: ModelProgressCallback): Promise<void>;
|
|
18
20
|
embed(texts: string[]): Promise<number[][]>;
|
|
21
|
+
/**
|
|
22
|
+
* Embed a batch with exponential backoff retry on rate limit errors.
|
|
23
|
+
*/
|
|
24
|
+
private embedBatchWithRetry;
|
|
25
|
+
/**
|
|
26
|
+
* Check if an error is a rate limit error (429 or quota exceeded).
|
|
27
|
+
*/
|
|
28
|
+
private isRateLimitError;
|
|
19
29
|
private embedBatch;
|
|
20
30
|
embedSingle(text: string): Promise<number[]>;
|
|
21
31
|
close(): void;
|
|
@@ -6,7 +6,17 @@
|
|
|
6
6
|
*/
|
|
7
7
|
const OPENAI_API_BASE = 'https://api.openai.com/v1';
|
|
8
8
|
const MODEL = 'text-embedding-3-small';
|
|
9
|
-
|
|
9
|
+
// OpenAI limits: 8,191 tokens/text, 300,000 tokens/batch, 2,048 texts/batch
|
|
10
|
+
// With avg ~1000 tokens/chunk, safe limit is 300 texts. Use 256 for margin.
|
|
11
|
+
const BATCH_SIZE = 256;
|
|
12
|
+
// Concurrency and rate limiting
|
|
13
|
+
const CONCURRENCY = 5; // Max concurrent API requests
|
|
14
|
+
const MAX_RETRIES = 12; // Max retry attempts on rate limit
|
|
15
|
+
const INITIAL_BACKOFF_MS = 1000; // Start at 1s
|
|
16
|
+
const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
|
|
17
|
+
function sleep(ms) {
|
|
18
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
19
|
+
}
|
|
10
20
|
/**
|
|
11
21
|
* OpenAI embedding provider.
|
|
12
22
|
* Uses text-embedding-3-small model via OpenAI API.
|
|
@@ -31,6 +41,20 @@ export class OpenAIEmbeddingProvider {
|
|
|
31
41
|
writable: true,
|
|
32
42
|
value: false
|
|
33
43
|
});
|
|
44
|
+
// Callback for rate limit throttling - message or null to clear
|
|
45
|
+
Object.defineProperty(this, "onThrottle", {
|
|
46
|
+
enumerable: true,
|
|
47
|
+
configurable: true,
|
|
48
|
+
writable: true,
|
|
49
|
+
value: void 0
|
|
50
|
+
});
|
|
51
|
+
// Callback for batch progress - (processed, total) chunks
|
|
52
|
+
Object.defineProperty(this, "onBatchProgress", {
|
|
53
|
+
enumerable: true,
|
|
54
|
+
configurable: true,
|
|
55
|
+
writable: true,
|
|
56
|
+
value: void 0
|
|
57
|
+
});
|
|
34
58
|
// Trim the key to remove any accidental whitespace
|
|
35
59
|
this.apiKey = (apiKey ?? '').trim();
|
|
36
60
|
}
|
|
@@ -51,15 +75,67 @@ export class OpenAIEmbeddingProvider {
|
|
|
51
75
|
if (texts.length === 0) {
|
|
52
76
|
return [];
|
|
53
77
|
}
|
|
54
|
-
|
|
55
|
-
|
|
78
|
+
// Split into batches
|
|
79
|
+
const batches = [];
|
|
56
80
|
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
81
|
+
batches.push(texts.slice(i, i + BATCH_SIZE));
|
|
82
|
+
}
|
|
83
|
+
// Process batches with limited concurrency
|
|
84
|
+
const results = [];
|
|
85
|
+
let completed = 0;
|
|
86
|
+
for (let i = 0; i < batches.length; i += CONCURRENCY) {
|
|
87
|
+
const concurrentBatches = batches.slice(i, i + CONCURRENCY);
|
|
88
|
+
// Fire concurrent requests
|
|
89
|
+
const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
|
|
90
|
+
// Flatten and collect results (Promise.all preserves order)
|
|
91
|
+
for (const result of batchResults) {
|
|
92
|
+
results.push(...result);
|
|
93
|
+
}
|
|
94
|
+
// Report progress after concurrent group completes
|
|
95
|
+
completed += concurrentBatches.length;
|
|
96
|
+
const processed = Math.min(completed * BATCH_SIZE, texts.length);
|
|
97
|
+
this.onBatchProgress?.(processed, texts.length);
|
|
60
98
|
}
|
|
61
99
|
return results;
|
|
62
100
|
}
|
|
101
|
+
/**
|
|
102
|
+
* Embed a batch with exponential backoff retry on rate limit errors.
|
|
103
|
+
*/
|
|
104
|
+
async embedBatchWithRetry(batch) {
|
|
105
|
+
let attempt = 0;
|
|
106
|
+
let backoffMs = INITIAL_BACKOFF_MS;
|
|
107
|
+
while (true) {
|
|
108
|
+
try {
|
|
109
|
+
const result = await this.embedBatch(batch);
|
|
110
|
+
// Clear throttle message on success (if was throttling)
|
|
111
|
+
if (attempt > 0)
|
|
112
|
+
this.onThrottle?.(null);
|
|
113
|
+
return result;
|
|
114
|
+
}
|
|
115
|
+
catch (error) {
|
|
116
|
+
if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
|
|
117
|
+
attempt++;
|
|
118
|
+
const secs = Math.round(backoffMs / 1000);
|
|
119
|
+
this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
|
|
120
|
+
await sleep(backoffMs);
|
|
121
|
+
backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
throw error;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Check if an error is a rate limit error (429 or quota exceeded).
|
|
131
|
+
*/
|
|
132
|
+
isRateLimitError(error) {
|
|
133
|
+
if (error instanceof Error) {
|
|
134
|
+
const msg = error.message.toLowerCase();
|
|
135
|
+
return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
|
|
136
|
+
}
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
63
139
|
async embedBatch(texts) {
|
|
64
140
|
const response = await fetch(`${OPENAI_API_BASE}/embeddings`, {
|
|
65
141
|
method: 'POST',
|
|
@@ -19,6 +19,47 @@ const ALWAYS_IGNORED = [
|
|
|
19
19
|
'.viberag',
|
|
20
20
|
'node_modules', // Fallback in case not in .gitignore
|
|
21
21
|
];
|
|
22
|
+
/**
|
|
23
|
+
* Lock files that should always be ignored.
|
|
24
|
+
* These are machine-generated and provide no value for code search.
|
|
25
|
+
*/
|
|
26
|
+
const ALWAYS_IGNORED_FILES = [
|
|
27
|
+
// JavaScript/TypeScript
|
|
28
|
+
'package-lock.json', // npm
|
|
29
|
+
'yarn.lock', // Yarn
|
|
30
|
+
'pnpm-lock.yaml', // pnpm
|
|
31
|
+
'bun.lockb', // Bun
|
|
32
|
+
// Python
|
|
33
|
+
'uv.lock', // UV
|
|
34
|
+
'poetry.lock', // Poetry
|
|
35
|
+
'Pipfile.lock', // Pipenv
|
|
36
|
+
// Ruby
|
|
37
|
+
'Gemfile.lock', // Bundler
|
|
38
|
+
// PHP
|
|
39
|
+
'composer.lock', // Composer
|
|
40
|
+
// Rust
|
|
41
|
+
'Cargo.lock', // Cargo
|
|
42
|
+
// Go
|
|
43
|
+
'go.sum', // Go modules
|
|
44
|
+
// Java/Kotlin
|
|
45
|
+
'gradle.lockfile', // Gradle
|
|
46
|
+
// C#/.NET
|
|
47
|
+
'packages.lock.json', // NuGet
|
|
48
|
+
// Dart
|
|
49
|
+
'pubspec.lock', // Pub
|
|
50
|
+
// Swift
|
|
51
|
+
'Package.resolved', // Swift PM
|
|
52
|
+
];
|
|
53
|
+
/**
|
|
54
|
+
* File patterns that should always be ignored.
|
|
55
|
+
* These are build artifacts with no semantic value for code search.
|
|
56
|
+
*/
|
|
57
|
+
const ALWAYS_IGNORED_PATTERNS = [
|
|
58
|
+
'*.min.js', // Minified JavaScript
|
|
59
|
+
'*.min.css', // Minified CSS
|
|
60
|
+
'*.map', // Source maps
|
|
61
|
+
'*.d.ts.map', // TypeScript declaration maps
|
|
62
|
+
];
|
|
22
63
|
/**
|
|
23
64
|
* Cache of Ignore instances per project root.
|
|
24
65
|
*/
|
|
@@ -37,8 +78,12 @@ export async function loadGitignore(projectRoot) {
|
|
|
37
78
|
return cached;
|
|
38
79
|
}
|
|
39
80
|
const ig = ignore();
|
|
40
|
-
// Add always-ignored patterns
|
|
81
|
+
// Add always-ignored patterns (directories)
|
|
41
82
|
ig.add(ALWAYS_IGNORED);
|
|
83
|
+
// Add always-ignored files (lock files)
|
|
84
|
+
ig.add(ALWAYS_IGNORED_FILES);
|
|
85
|
+
// Add always-ignored file patterns (minified, maps)
|
|
86
|
+
ig.add(ALWAYS_IGNORED_PATTERNS);
|
|
42
87
|
// Try to load .gitignore
|
|
43
88
|
const gitignorePath = path.join(projectRoot, '.gitignore');
|
|
44
89
|
try {
|
|
@@ -99,8 +144,16 @@ export function clearAllGitignoreCache() {
|
|
|
99
144
|
*/
|
|
100
145
|
export async function getGlobIgnorePatterns(projectRoot) {
|
|
101
146
|
const patterns = [];
|
|
102
|
-
// Always exclude these (same as ALWAYS_IGNORED)
|
|
147
|
+
// Always exclude these directories (same as ALWAYS_IGNORED)
|
|
103
148
|
patterns.push('**/.git/**', '**/.viberag/**', '**/node_modules/**');
|
|
149
|
+
// Always exclude lock files (same as ALWAYS_IGNORED_FILES)
|
|
150
|
+
for (const file of ALWAYS_IGNORED_FILES) {
|
|
151
|
+
patterns.push(`**/${file}`);
|
|
152
|
+
}
|
|
153
|
+
// Always exclude file patterns (minified, maps)
|
|
154
|
+
for (const pattern of ALWAYS_IGNORED_PATTERNS) {
|
|
155
|
+
patterns.push(`**/${pattern}`);
|
|
156
|
+
}
|
|
104
157
|
// Try to load .gitignore
|
|
105
158
|
const gitignorePath = path.join(projectRoot, '.gitignore');
|
|
106
159
|
try {
|
|
@@ -55,6 +55,9 @@ export declare class Indexer {
|
|
|
55
55
|
/**
|
|
56
56
|
* Process a batch of files: read, chunk, embed, and prepare CodeChunks.
|
|
57
57
|
*
|
|
58
|
+
* Strategy: Collect all chunks from all files first, then embed them
|
|
59
|
+
* together with full concurrency for maximum throughput.
|
|
60
|
+
*
|
|
58
61
|
* Error handling strategy:
|
|
59
62
|
* - File read/parse errors: Log and continue (file-specific, recoverable)
|
|
60
63
|
* - Embedding/storage errors: Let propagate (fatal, affects all files)
|
|
@@ -142,6 +142,17 @@ export class Indexer {
|
|
|
142
142
|
// 6. Process new and modified files
|
|
143
143
|
const filesToProcess = [...diff.new, ...diff.modified];
|
|
144
144
|
const totalFiles = filesToProcess.length;
|
|
145
|
+
// Track cumulative chunks for progress display
|
|
146
|
+
let totalChunksProcessed = 0;
|
|
147
|
+
let lastProgress = 0;
|
|
148
|
+
// Wire throttle callback for rate limit feedback (API providers only)
|
|
149
|
+
if ('onThrottle' in embeddings) {
|
|
150
|
+
embeddings.onThrottle =
|
|
151
|
+
message => {
|
|
152
|
+
// Pass throttle message to UI - shown in yellow when set
|
|
153
|
+
progressCallback?.(lastProgress, totalFiles, 'Indexing files', message, totalChunksProcessed);
|
|
154
|
+
};
|
|
155
|
+
}
|
|
145
156
|
if (totalFiles > 0) {
|
|
146
157
|
this.log('info', `Processing ${totalFiles} files`);
|
|
147
158
|
// First, delete existing chunks for modified files
|
|
@@ -153,7 +164,15 @@ export class Indexer {
|
|
|
153
164
|
const batchSize = 10;
|
|
154
165
|
for (let i = 0; i < filesToProcess.length; i += batchSize) {
|
|
155
166
|
const batch = filesToProcess.slice(i, i + batchSize);
|
|
156
|
-
const batchChunks = await this.processFileBatch(batch, chunker, embeddings, storage, stats
|
|
167
|
+
const batchChunks = await this.processFileBatch(batch, chunker, embeddings, storage, stats, {
|
|
168
|
+
totalFiles,
|
|
169
|
+
currentFileOffset: i,
|
|
170
|
+
progressCallback,
|
|
171
|
+
onChunksProcessed: (count) => {
|
|
172
|
+
totalChunksProcessed += count;
|
|
173
|
+
progressCallback?.(i, totalFiles, 'Indexing files', null, totalChunksProcessed);
|
|
174
|
+
},
|
|
175
|
+
});
|
|
157
176
|
if (batchChunks.length > 0) {
|
|
158
177
|
// Use addChunks after table reset to avoid schema mismatch,
|
|
159
178
|
// upsertChunks for normal incremental updates
|
|
@@ -166,7 +185,8 @@ export class Indexer {
|
|
|
166
185
|
stats.chunksAdded += batchChunks.length;
|
|
167
186
|
}
|
|
168
187
|
const progress = Math.round(((i + batch.length) / totalFiles) * 100);
|
|
169
|
-
|
|
188
|
+
lastProgress = i + batch.length;
|
|
189
|
+
progressCallback?.(i + batch.length, totalFiles, 'Indexing files', null, totalChunksProcessed);
|
|
170
190
|
this.log('debug', `Progress: ${progress}% (${i + batch.length}/${totalFiles})`);
|
|
171
191
|
}
|
|
172
192
|
}
|
|
@@ -218,79 +238,112 @@ export class Indexer {
|
|
|
218
238
|
/**
|
|
219
239
|
* Process a batch of files: read, chunk, embed, and prepare CodeChunks.
|
|
220
240
|
*
|
|
241
|
+
* Strategy: Collect all chunks from all files first, then embed them
|
|
242
|
+
* together with full concurrency for maximum throughput.
|
|
243
|
+
*
|
|
221
244
|
* Error handling strategy:
|
|
222
245
|
* - File read/parse errors: Log and continue (file-specific, recoverable)
|
|
223
246
|
* - Embedding/storage errors: Let propagate (fatal, affects all files)
|
|
224
247
|
*/
|
|
225
|
-
async processFileBatch(filepaths, chunker, embeddings, storage, stats) {
|
|
226
|
-
const
|
|
248
|
+
async processFileBatch(filepaths, chunker, embeddings, storage, stats, progressContext) {
|
|
249
|
+
const fileDataList = [];
|
|
227
250
|
for (const filepath of filepaths) {
|
|
228
|
-
// Phase 1: File reading and chunking (recoverable errors)
|
|
229
|
-
let content;
|
|
230
|
-
let fileHash;
|
|
231
|
-
let chunks;
|
|
232
251
|
try {
|
|
233
252
|
const absolutePath = path.join(this.projectRoot, filepath);
|
|
234
|
-
content = await fs.readFile(absolutePath, 'utf-8');
|
|
235
|
-
fileHash = (await import('../merkle/hash.js')).computeStringHash(content);
|
|
236
|
-
|
|
237
|
-
|
|
253
|
+
const content = await fs.readFile(absolutePath, 'utf-8');
|
|
254
|
+
const fileHash = (await import('../merkle/hash.js')).computeStringHash(content);
|
|
255
|
+
const chunks = await chunker.chunkFile(filepath, content, this.config.chunkMaxSize);
|
|
256
|
+
fileDataList.push({ filepath, fileHash, chunks });
|
|
238
257
|
}
|
|
239
258
|
catch (error) {
|
|
240
|
-
// File-specific error (read/parse) - log and continue with other files
|
|
241
259
|
this.log('warn', `Failed to read/parse file: ${filepath}`, error);
|
|
242
260
|
continue;
|
|
243
261
|
}
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
const
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
// Embed contextHeader + text for semantic relevance
|
|
253
|
-
const texts = missingChunks.map(c => c.contextHeader ? `${c.contextHeader}\n${c.text}` : c.text);
|
|
254
|
-
const newEmbeddings = await embeddings.embed(texts);
|
|
255
|
-
stats.embeddingsComputed += missingChunks.length;
|
|
256
|
-
// Cache the new embeddings
|
|
257
|
-
const cacheEntries = missingChunks.map((chunk, i) => ({
|
|
258
|
-
contentHash: chunk.contentHash,
|
|
259
|
-
vector: newEmbeddings[i],
|
|
260
|
-
createdAt: new Date().toISOString(),
|
|
261
|
-
}));
|
|
262
|
-
await storage.cacheEmbeddings(cacheEntries);
|
|
263
|
-
// Add to cachedEmbeddings map
|
|
264
|
-
missingChunks.forEach((chunk, i) => {
|
|
265
|
-
cachedEmbeddings.set(chunk.contentHash, newEmbeddings[i]);
|
|
262
|
+
}
|
|
263
|
+
const allChunksWithContext = [];
|
|
264
|
+
for (const fd of fileDataList) {
|
|
265
|
+
for (const chunk of fd.chunks) {
|
|
266
|
+
allChunksWithContext.push({
|
|
267
|
+
chunk,
|
|
268
|
+
filepath: fd.filepath,
|
|
269
|
+
fileHash: fd.fileHash,
|
|
266
270
|
});
|
|
267
271
|
}
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
}
|
|
272
|
+
}
|
|
273
|
+
if (allChunksWithContext.length === 0) {
|
|
274
|
+
return [];
|
|
275
|
+
}
|
|
276
|
+
// Phase 2: Check embedding cache for ALL chunks at once
|
|
277
|
+
const contentHashes = allChunksWithContext.map(c => c.chunk.contentHash);
|
|
278
|
+
const cachedEmbeddings = await storage.getCachedEmbeddings(contentHashes);
|
|
279
|
+
// Find all cache misses
|
|
280
|
+
const missingChunksWithContext = allChunksWithContext.filter(c => !cachedEmbeddings.has(c.chunk.contentHash));
|
|
281
|
+
stats.embeddingsCached +=
|
|
282
|
+
allChunksWithContext.length - missingChunksWithContext.length;
|
|
283
|
+
// Phase 3: Embed ALL missing chunks together (with full concurrency)
|
|
284
|
+
if (missingChunksWithContext.length > 0) {
|
|
285
|
+
// Track chunks processed for progress updates
|
|
286
|
+
let lastReportedChunks = 0;
|
|
287
|
+
// Wire batch progress callback to report incremental chunks
|
|
288
|
+
if (progressContext?.onChunksProcessed && 'onBatchProgress' in embeddings) {
|
|
289
|
+
embeddings.onBatchProgress = (processed, _total) => {
|
|
290
|
+
// Report only the delta since last update
|
|
291
|
+
const delta = processed - lastReportedChunks;
|
|
292
|
+
if (delta > 0) {
|
|
293
|
+
progressContext.onChunksProcessed(delta);
|
|
294
|
+
lastReportedChunks = processed;
|
|
295
|
+
}
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
// Embed all chunks together
|
|
299
|
+
const texts = missingChunksWithContext.map(c => c.chunk.contextHeader
|
|
300
|
+
? `${c.chunk.contextHeader}\n${c.chunk.text}`
|
|
301
|
+
: c.chunk.text);
|
|
302
|
+
const newEmbeddings = await embeddings.embed(texts);
|
|
303
|
+
stats.embeddingsComputed += missingChunksWithContext.length;
|
|
304
|
+
// Report any remaining chunks not yet reported
|
|
305
|
+
const remainingDelta = missingChunksWithContext.length - lastReportedChunks;
|
|
306
|
+
if (remainingDelta > 0 && progressContext?.onChunksProcessed) {
|
|
307
|
+
progressContext.onChunksProcessed(remainingDelta);
|
|
293
308
|
}
|
|
309
|
+
// Clear batch progress callback
|
|
310
|
+
if ('onBatchProgress' in embeddings) {
|
|
311
|
+
embeddings.onBatchProgress = undefined;
|
|
312
|
+
}
|
|
313
|
+
// Cache the new embeddings
|
|
314
|
+
const cacheEntries = missingChunksWithContext.map((c, i) => ({
|
|
315
|
+
contentHash: c.chunk.contentHash,
|
|
316
|
+
vector: newEmbeddings[i],
|
|
317
|
+
createdAt: new Date().toISOString(),
|
|
318
|
+
}));
|
|
319
|
+
await storage.cacheEmbeddings(cacheEntries);
|
|
320
|
+
// Add to cachedEmbeddings map
|
|
321
|
+
missingChunksWithContext.forEach((c, i) => {
|
|
322
|
+
cachedEmbeddings.set(c.chunk.contentHash, newEmbeddings[i]);
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
// Phase 4: Build CodeChunk objects
|
|
326
|
+
const allChunks = [];
|
|
327
|
+
for (const { chunk, filepath, fileHash } of allChunksWithContext) {
|
|
328
|
+
const vector = cachedEmbeddings.get(chunk.contentHash);
|
|
329
|
+
allChunks.push({
|
|
330
|
+
id: `${filepath}:${chunk.startLine}`,
|
|
331
|
+
vector,
|
|
332
|
+
text: chunk.text,
|
|
333
|
+
contentHash: chunk.contentHash,
|
|
334
|
+
filepath,
|
|
335
|
+
filename: path.basename(filepath),
|
|
336
|
+
extension: path.extname(filepath),
|
|
337
|
+
type: chunk.type,
|
|
338
|
+
name: chunk.name,
|
|
339
|
+
startLine: chunk.startLine,
|
|
340
|
+
endLine: chunk.endLine,
|
|
341
|
+
fileHash,
|
|
342
|
+
signature: chunk.signature,
|
|
343
|
+
docstring: chunk.docstring,
|
|
344
|
+
isExported: chunk.isExported,
|
|
345
|
+
decoratorNames: chunk.decoratorNames,
|
|
346
|
+
});
|
|
294
347
|
}
|
|
295
348
|
return allChunks;
|
|
296
349
|
}
|
|
@@ -60,8 +60,13 @@ export interface IndexStats {
|
|
|
60
60
|
}
|
|
61
61
|
/**
|
|
62
62
|
* Progress callback for indexing operations.
|
|
63
|
+
* @param current - Current progress count
|
|
64
|
+
* @param total - Total items (0 for indeterminate)
|
|
65
|
+
* @param stage - Human-readable stage name
|
|
66
|
+
* @param throttleMessage - Rate limit message (shown in yellow) or null to clear
|
|
67
|
+
* @param chunksProcessed - Number of chunks embedded so far
|
|
63
68
|
*/
|
|
64
|
-
export type ProgressCallback = (current: number, total: number, stage: string) => void;
|
|
69
|
+
export type ProgressCallback = (current: number, total: number, stage: string, throttleMessage?: string | null, chunksProcessed?: number) => void;
|
|
65
70
|
/**
|
|
66
71
|
* Create empty index stats.
|
|
67
72
|
*/
|
package/dist/rag/search/index.js
CHANGED
|
@@ -20,8 +20,6 @@ export { ftsSearch, ensureFtsIndex } from './fts.js';
|
|
|
20
20
|
export { hybridRerank } from './hybrid.js';
|
|
21
21
|
/** Default search limit */
|
|
22
22
|
const DEFAULT_LIMIT = 10;
|
|
23
|
-
/** Exhaustive mode limit (high but bounded) */
|
|
24
|
-
const EXHAUSTIVE_LIMIT = 500;
|
|
25
23
|
/** Default BM25 weight for hybrid search */
|
|
26
24
|
const DEFAULT_BM25_WEIGHT = 0.3;
|
|
27
25
|
/** Default oversample multiplier for hybrid search */
|
|
@@ -79,9 +77,7 @@ export class SearchEngine {
|
|
|
79
77
|
async search(query, options = {}) {
|
|
80
78
|
const start = Date.now();
|
|
81
79
|
const mode = options.mode ?? 'hybrid';
|
|
82
|
-
const limit = options.
|
|
83
|
-
? EXHAUSTIVE_LIMIT
|
|
84
|
-
: (options.limit ?? DEFAULT_LIMIT);
|
|
80
|
+
const limit = options.limit ?? DEFAULT_LIMIT;
|
|
85
81
|
const filterClause = buildFilterClause(options.filters);
|
|
86
82
|
await this.ensureInitialized();
|
|
87
83
|
const table = await this.getTable();
|
|
@@ -104,10 +100,6 @@ export class SearchEngine {
|
|
|
104
100
|
results = await this.searchHybrid(table, query, limit, options.bm25Weight ?? DEFAULT_BM25_WEIGHT, filterClause, options.minScore, options.autoBoost ?? true, options.autoBoostThreshold ?? 0.3, options.returnDebug ?? false);
|
|
105
101
|
break;
|
|
106
102
|
}
|
|
107
|
-
// Add total matches for exhaustive mode
|
|
108
|
-
if (options.exhaustive) {
|
|
109
|
-
results.totalMatches = results.results.length;
|
|
110
|
-
}
|
|
111
103
|
results.elapsedMs = Date.now() - start;
|
|
112
104
|
return results;
|
|
113
105
|
}
|
|
@@ -74,8 +74,6 @@ export interface SearchResults {
|
|
|
74
74
|
searchType: SearchMode;
|
|
75
75
|
/** Time taken in milliseconds */
|
|
76
76
|
elapsedMs: number;
|
|
77
|
-
/** Total matches (when exhaustive=true) */
|
|
78
|
-
totalMatches?: number;
|
|
79
77
|
/** Debug info for hybrid search (when return_debug=true) */
|
|
80
78
|
debug?: SearchDebugInfo;
|
|
81
79
|
}
|
|
@@ -111,8 +109,6 @@ export interface SearchOptions {
|
|
|
111
109
|
limit?: number;
|
|
112
110
|
/** Weight for BM25 in hybrid search (0.0-1.0, default: 0.3) */
|
|
113
111
|
bm25Weight?: number;
|
|
114
|
-
/** Return all matches above threshold (default: false) */
|
|
115
|
-
exhaustive?: boolean;
|
|
116
112
|
/** Minimum score threshold 0-1 (default: 0) */
|
|
117
113
|
minScore?: number;
|
|
118
114
|
/** Transparent filters */
|