viberag 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/handlers.js +9 -0
- package/dist/cli/components/InitWizard.js +55 -4
- package/dist/common/types.d.ts +6 -0
- package/dist/rag/config/index.d.ts +2 -0
- package/dist/rag/embeddings/api-utils.d.ts +81 -0
- package/dist/rag/embeddings/api-utils.js +150 -0
- package/dist/rag/embeddings/gemini.d.ts +0 -8
- package/dist/rag/embeddings/gemini.js +10 -70
- package/dist/rag/embeddings/index.d.ts +3 -1
- package/dist/rag/embeddings/index.js +4 -1
- package/dist/rag/embeddings/local.d.ts +6 -1
- package/dist/rag/embeddings/local.js +45 -12
- package/dist/rag/embeddings/mistral.d.ts +0 -8
- package/dist/rag/embeddings/mistral.js +10 -70
- package/dist/rag/embeddings/mock.d.ts +35 -0
- package/dist/rag/embeddings/mock.js +69 -0
- package/dist/rag/embeddings/openai.d.ts +7 -9
- package/dist/rag/embeddings/openai.js +25 -73
- package/dist/rag/indexer/indexer.js +3 -2
- package/dist/rag/search/index.js +1 -1
- package/package.json +4 -1
|
@@ -41,12 +41,21 @@ export async function runInit(projectRoot, isReinit = false, wizardConfig) {
|
|
|
41
41
|
// Build config from wizard choices
|
|
42
42
|
const provider = wizardConfig?.provider ?? 'gemini';
|
|
43
43
|
const { model, dimensions } = PROVIDER_CONFIGS[provider];
|
|
44
|
+
// Map OpenAI region to base URL
|
|
45
|
+
const openaiBaseUrl = wizardConfig?.openaiRegion
|
|
46
|
+
? {
|
|
47
|
+
default: undefined,
|
|
48
|
+
us: 'https://us.api.openai.com/v1',
|
|
49
|
+
eu: 'https://eu.api.openai.com/v1',
|
|
50
|
+
}[wizardConfig.openaiRegion]
|
|
51
|
+
: undefined;
|
|
44
52
|
const config = {
|
|
45
53
|
...DEFAULT_CONFIG,
|
|
46
54
|
embeddingProvider: provider,
|
|
47
55
|
embeddingModel: model,
|
|
48
56
|
embeddingDimensions: dimensions,
|
|
49
57
|
...(wizardConfig?.apiKey && { apiKey: wizardConfig.apiKey }),
|
|
58
|
+
...(openaiBaseUrl && { openaiBaseUrl }),
|
|
50
59
|
};
|
|
51
60
|
// Save config
|
|
52
61
|
await saveConfig(projectRoot, config);
|
|
@@ -176,6 +176,21 @@ const API_KEY_ACTION_ITEMS = [
|
|
|
176
176
|
{ label: 'Keep existing API key', value: 'keep' },
|
|
177
177
|
{ label: 'Enter new API key', value: 'new' },
|
|
178
178
|
];
|
|
179
|
+
// OpenAI region options for data residency
|
|
180
|
+
const OPENAI_REGION_ITEMS = [
|
|
181
|
+
{
|
|
182
|
+
label: 'Default (api.openai.com) - Recommended',
|
|
183
|
+
value: 'default',
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
label: 'US (us.api.openai.com) - US Data Residency',
|
|
187
|
+
value: 'us',
|
|
188
|
+
},
|
|
189
|
+
{
|
|
190
|
+
label: 'EU (eu.api.openai.com) - EU Data Residency',
|
|
191
|
+
value: 'eu',
|
|
192
|
+
},
|
|
193
|
+
];
|
|
179
194
|
/**
|
|
180
195
|
* Simple text input component for API key entry.
|
|
181
196
|
* Uses a ref to accumulate input, which handles paste better than
|
|
@@ -227,6 +242,8 @@ export function InitWizard({ step, config, isReinit, existingApiKey, existingPro
|
|
|
227
242
|
// State for API key input
|
|
228
243
|
const [apiKeyInput, setApiKeyInput] = useState('');
|
|
229
244
|
const [apiKeyAction, setApiKeyAction] = useState(null);
|
|
245
|
+
// State for OpenAI region selection (shown after API key for OpenAI)
|
|
246
|
+
const [showRegionSelect, setShowRegionSelect] = useState(false);
|
|
230
247
|
// Handle Escape to cancel
|
|
231
248
|
useInput((input, key) => {
|
|
232
249
|
if (key.escape || (key.ctrl && input === 'c')) {
|
|
@@ -277,9 +294,10 @@ export function InitWizard({ step, config, isReinit, existingApiKey, existingPro
|
|
|
277
294
|
React.createElement(Text, { bold: true }, "Choose Embedding Provider"),
|
|
278
295
|
React.createElement(Box, { marginTop: 1 },
|
|
279
296
|
React.createElement(SelectInput, { items: PROVIDER_ITEMS, onSelect: item => {
|
|
280
|
-
// Reset API key state when provider changes
|
|
297
|
+
// Reset API key and region state when provider changes
|
|
281
298
|
setApiKeyInput('');
|
|
282
299
|
setApiKeyAction(null);
|
|
300
|
+
setShowRegionSelect(false);
|
|
283
301
|
// Use relative increment: step + 1
|
|
284
302
|
onStepChange(normalizedStep + 1, { provider: item.value });
|
|
285
303
|
} })),
|
|
@@ -298,6 +316,21 @@ export function InitWizard({ step, config, isReinit, existingApiKey, existingPro
|
|
|
298
316
|
const provider = currentProvider;
|
|
299
317
|
const info = PROVIDER_CONFIG[provider];
|
|
300
318
|
const apiKeyUrl = API_KEY_URLS[provider];
|
|
319
|
+
const isOpenAI = provider === 'openai';
|
|
320
|
+
// Show OpenAI region selection after API key is entered
|
|
321
|
+
if (isOpenAI && showRegionSelect) {
|
|
322
|
+
return (React.createElement(Box, { flexDirection: "column", borderStyle: "round", paddingX: 2, paddingY: 1 },
|
|
323
|
+
React.createElement(Text, { bold: true }, "Select OpenAI API Region"),
|
|
324
|
+
React.createElement(Box, { marginTop: 1, flexDirection: "column" },
|
|
325
|
+
React.createElement(Text, { dimColor: true }, "Corporate accounts with data residency require regional endpoints."),
|
|
326
|
+
React.createElement(Text, { dimColor: true }, "Most users should select Default.")),
|
|
327
|
+
React.createElement(Box, { marginTop: 1 },
|
|
328
|
+
React.createElement(SelectInput, { items: OPENAI_REGION_ITEMS, onSelect: item => {
|
|
329
|
+
onStepChange(normalizedStep + 1, { openaiRegion: item.value });
|
|
330
|
+
} })),
|
|
331
|
+
React.createElement(Box, { marginTop: 1 },
|
|
332
|
+
React.createElement(Text, { dimColor: true }, "\u2191/\u2193 navigate, Enter select, Esc cancel"))));
|
|
333
|
+
}
|
|
301
334
|
return (React.createElement(Box, { flexDirection: "column", borderStyle: "round", paddingX: 2, paddingY: 1 },
|
|
302
335
|
React.createElement(Text, { bold: true },
|
|
303
336
|
"Configure ",
|
|
@@ -316,8 +349,18 @@ export function InitWizard({ step, config, isReinit, existingApiKey, existingPro
|
|
|
316
349
|
React.createElement(Box, { marginTop: 1 },
|
|
317
350
|
React.createElement(SelectInput, { items: API_KEY_ACTION_ITEMS, onSelect: item => {
|
|
318
351
|
if (item.value === 'keep') {
|
|
319
|
-
// Keep existing key
|
|
320
|
-
onStepChange(normalizedStep
|
|
352
|
+
// Keep existing key
|
|
353
|
+
onStepChange(normalizedStep, { apiKey: existingApiKey });
|
|
354
|
+
if (isOpenAI) {
|
|
355
|
+
// Show region selection for OpenAI
|
|
356
|
+
setShowRegionSelect(true);
|
|
357
|
+
}
|
|
358
|
+
else {
|
|
359
|
+
// Advance to confirmation for other providers
|
|
360
|
+
onStepChange(normalizedStep + 1, {
|
|
361
|
+
apiKey: existingApiKey,
|
|
362
|
+
});
|
|
363
|
+
}
|
|
321
364
|
}
|
|
322
365
|
else {
|
|
323
366
|
// Show text input for new key
|
|
@@ -325,7 +368,15 @@ export function InitWizard({ step, config, isReinit, existingApiKey, existingPro
|
|
|
325
368
|
}
|
|
326
369
|
} })))) : (React.createElement(ApiKeyInputStep, { providerName: info.name, apiKeyInput: apiKeyInput, setApiKeyInput: setApiKeyInput, onSubmit: key => {
|
|
327
370
|
if (key.trim()) {
|
|
328
|
-
onStepChange(normalizedStep
|
|
371
|
+
onStepChange(normalizedStep, { apiKey: key.trim() });
|
|
372
|
+
if (isOpenAI) {
|
|
373
|
+
// Show region selection for OpenAI
|
|
374
|
+
setShowRegionSelect(true);
|
|
375
|
+
}
|
|
376
|
+
else {
|
|
377
|
+
// Advance to confirmation for other providers
|
|
378
|
+
onStepChange(normalizedStep + 1, { apiKey: key.trim() });
|
|
379
|
+
}
|
|
329
380
|
}
|
|
330
381
|
} })),
|
|
331
382
|
React.createElement(Box, { marginTop: 1 },
|
package/dist/common/types.d.ts
CHANGED
|
@@ -91,6 +91,10 @@ export type IndexDisplayStats = {
|
|
|
91
91
|
* - openai: text-embedding-3-small (1536d) - Fast API
|
|
92
92
|
*/
|
|
93
93
|
export type EmbeddingProviderType = 'local' | 'local-4b' | 'gemini' | 'mistral' | 'openai';
|
|
94
|
+
/**
|
|
95
|
+
* OpenAI API regional endpoints for data residency.
|
|
96
|
+
*/
|
|
97
|
+
export type OpenAIRegion = 'default' | 'us' | 'eu';
|
|
94
98
|
/**
|
|
95
99
|
* Configuration collected from the init wizard.
|
|
96
100
|
*/
|
|
@@ -98,6 +102,8 @@ export type InitWizardConfig = {
|
|
|
98
102
|
provider: EmbeddingProviderType;
|
|
99
103
|
/** API key for cloud providers (gemini, mistral, openai) */
|
|
100
104
|
apiKey?: string;
|
|
105
|
+
/** OpenAI regional endpoint (for corporate accounts with data residency) */
|
|
106
|
+
openaiRegion?: OpenAIRegion;
|
|
101
107
|
};
|
|
102
108
|
/**
|
|
103
109
|
* MCP editor identifiers.
|
|
@@ -20,6 +20,8 @@ export interface ViberagConfig {
|
|
|
20
20
|
embeddingDimensions: number;
|
|
21
21
|
/** API key for cloud providers (gemini, mistral, openai) */
|
|
22
22
|
apiKey?: string;
|
|
23
|
+
/** OpenAI API base URL (for corporate accounts with data residency) */
|
|
24
|
+
openaiBaseUrl?: string;
|
|
23
25
|
extensions: string[];
|
|
24
26
|
excludePatterns: string[];
|
|
25
27
|
chunkMaxSize: number;
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared utilities for API-based embedding providers.
|
|
3
|
+
* Provides common retry logic, rate limiting, and concurrency patterns.
|
|
4
|
+
*/
|
|
5
|
+
/** Max concurrent API requests */
|
|
6
|
+
export declare const CONCURRENCY = 5;
|
|
7
|
+
/** Delay (ms) between batch completion and next batch start (per slot) */
|
|
8
|
+
export declare const BATCH_DELAY_MS = 200;
|
|
9
|
+
/** Max retry attempts on rate limit */
|
|
10
|
+
export declare const MAX_RETRIES = 12;
|
|
11
|
+
/** Initial backoff (ms) */
|
|
12
|
+
export declare const INITIAL_BACKOFF_MS = 1000;
|
|
13
|
+
/** Maximum backoff (ms) */
|
|
14
|
+
export declare const MAX_BACKOFF_MS = 60000;
|
|
15
|
+
/**
|
|
16
|
+
* Sleep for a specified duration.
|
|
17
|
+
*/
|
|
18
|
+
export declare function sleep(ms: number): Promise<void>;
|
|
19
|
+
/**
|
|
20
|
+
* Check if an error is a rate limit error (429 or quota exceeded).
|
|
21
|
+
*/
|
|
22
|
+
export declare function isRateLimitError(error: unknown): boolean;
|
|
23
|
+
/**
|
|
24
|
+
* Check if an error is a known transient API error that should be retried.
|
|
25
|
+
*
|
|
26
|
+
* GEMINI TRANSIENT BUG:
|
|
27
|
+
* The Gemini API has a known server-side bug where it intermittently returns
|
|
28
|
+
* a 400 "API key expired" error even when the key is valid. This is NOT an
|
|
29
|
+
* actual authentication failure - it's a transient error that resolves on retry.
|
|
30
|
+
*
|
|
31
|
+
* Evidence:
|
|
32
|
+
* - Users report: "if I try the same request again a few times, it usually works fine"
|
|
33
|
+
* - New API keys don't fix it
|
|
34
|
+
* - Same key works in curl but fails randomly via API clients
|
|
35
|
+
* - Google has acknowledged this as a P1/P2 bug
|
|
36
|
+
*
|
|
37
|
+
* GitHub issues documenting this bug:
|
|
38
|
+
* - https://github.com/google-gemini/gemini-cli/issues/4430
|
|
39
|
+
* - https://github.com/google-gemini/gemini-cli/issues/1712
|
|
40
|
+
* - https://github.com/google-gemini/gemini-cli/issues/8675
|
|
41
|
+
*
|
|
42
|
+
* We detect this specific error and retry it rather than failing immediately.
|
|
43
|
+
*/
|
|
44
|
+
export declare function isTransientApiError(error: unknown): boolean;
|
|
45
|
+
/**
|
|
46
|
+
* Check if an error should trigger a retry (rate limit OR transient error).
|
|
47
|
+
*/
|
|
48
|
+
export declare function isRetriableError(error: unknown): boolean;
|
|
49
|
+
/**
|
|
50
|
+
* Callbacks for rate limiting and progress reporting.
|
|
51
|
+
*/
|
|
52
|
+
export interface ApiProviderCallbacks {
|
|
53
|
+
onThrottle?: (message: string | null) => void;
|
|
54
|
+
onBatchProgress?: (processed: number, total: number) => void;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Execute an async function with exponential backoff retry on retriable errors.
|
|
58
|
+
*
|
|
59
|
+
* Retries on:
|
|
60
|
+
* - Rate limit errors (429, quota exceeded)
|
|
61
|
+
* - Transient API errors (e.g., Gemini's spurious "API key expired" bug)
|
|
62
|
+
*
|
|
63
|
+
* @param fn - The async function to execute
|
|
64
|
+
* @param callbacks - Optional callbacks for throttle notifications
|
|
65
|
+
* @returns The result of the function
|
|
66
|
+
*/
|
|
67
|
+
export declare function withRetry<T>(fn: () => Promise<T>, callbacks?: ApiProviderCallbacks): Promise<T>;
|
|
68
|
+
/**
|
|
69
|
+
* Process batches with p-limit sliding window concurrency and inter-batch delay.
|
|
70
|
+
* Reports progress per-batch (more granular than group-based).
|
|
71
|
+
*
|
|
72
|
+
* @param batches - Array of batches to process
|
|
73
|
+
* @param processBatch - Function to process a single batch
|
|
74
|
+
* @param callbacks - Optional callbacks for progress reporting
|
|
75
|
+
* @returns Flattened array of results
|
|
76
|
+
*/
|
|
77
|
+
export declare function processBatchesWithLimit<T>(batches: T[][], processBatch: (batch: T[]) => Promise<number[][]>, callbacks?: ApiProviderCallbacks): Promise<number[][]>;
|
|
78
|
+
/**
|
|
79
|
+
* Split an array into batches of a specified size.
|
|
80
|
+
*/
|
|
81
|
+
export declare function chunk<T>(array: T[], size: number): T[][];
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared utilities for API-based embedding providers.
|
|
3
|
+
* Provides common retry logic, rate limiting, and concurrency patterns.
|
|
4
|
+
*/
|
|
5
|
+
import pLimit from 'p-limit';
|
|
6
|
+
// ============================================================================
|
|
7
|
+
// Constants
|
|
8
|
+
// ============================================================================
|
|
9
|
+
/** Max concurrent API requests */
|
|
10
|
+
export const CONCURRENCY = 5;
|
|
11
|
+
/** Delay (ms) between batch completion and next batch start (per slot) */
|
|
12
|
+
export const BATCH_DELAY_MS = 200;
|
|
13
|
+
/** Max retry attempts on rate limit */
|
|
14
|
+
export const MAX_RETRIES = 12;
|
|
15
|
+
/** Initial backoff (ms) */
|
|
16
|
+
export const INITIAL_BACKOFF_MS = 1000;
|
|
17
|
+
/** Maximum backoff (ms) */
|
|
18
|
+
export const MAX_BACKOFF_MS = 60000;
|
|
19
|
+
// ============================================================================
|
|
20
|
+
// Utility Functions
|
|
21
|
+
// ============================================================================
|
|
22
|
+
/**
|
|
23
|
+
* Sleep for a specified duration.
|
|
24
|
+
*/
|
|
25
|
+
export function sleep(ms) {
|
|
26
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Check if an error is a rate limit error (429 or quota exceeded).
|
|
30
|
+
*/
|
|
31
|
+
export function isRateLimitError(error) {
|
|
32
|
+
if (error instanceof Error) {
|
|
33
|
+
const msg = error.message.toLowerCase();
|
|
34
|
+
return msg.includes('429') || msg.includes('rate') || msg.includes('quota');
|
|
35
|
+
}
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Check if an error is a known transient API error that should be retried.
|
|
40
|
+
*
|
|
41
|
+
* GEMINI TRANSIENT BUG:
|
|
42
|
+
* The Gemini API has a known server-side bug where it intermittently returns
|
|
43
|
+
* a 400 "API key expired" error even when the key is valid. This is NOT an
|
|
44
|
+
* actual authentication failure - it's a transient error that resolves on retry.
|
|
45
|
+
*
|
|
46
|
+
* Evidence:
|
|
47
|
+
* - Users report: "if I try the same request again a few times, it usually works fine"
|
|
48
|
+
* - New API keys don't fix it
|
|
49
|
+
* - Same key works in curl but fails randomly via API clients
|
|
50
|
+
* - Google has acknowledged this as a P1/P2 bug
|
|
51
|
+
*
|
|
52
|
+
* GitHub issues documenting this bug:
|
|
53
|
+
* - https://github.com/google-gemini/gemini-cli/issues/4430
|
|
54
|
+
* - https://github.com/google-gemini/gemini-cli/issues/1712
|
|
55
|
+
* - https://github.com/google-gemini/gemini-cli/issues/8675
|
|
56
|
+
*
|
|
57
|
+
* We detect this specific error and retry it rather than failing immediately.
|
|
58
|
+
*/
|
|
59
|
+
export function isTransientApiError(error) {
|
|
60
|
+
if (error instanceof Error) {
|
|
61
|
+
const msg = error.message.toLowerCase();
|
|
62
|
+
// Gemini transient "API key expired" bug (400 status)
|
|
63
|
+
// The specific message is: "API key expired. Please renew the API key."
|
|
64
|
+
// We check for this specific pattern to avoid retrying actual auth failures
|
|
65
|
+
if (msg.includes('api key expired') &&
|
|
66
|
+
(msg.includes('400') || msg.includes('invalid_argument'))) {
|
|
67
|
+
return true;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
return false;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Check if an error should trigger a retry (rate limit OR transient error).
|
|
74
|
+
*/
|
|
75
|
+
export function isRetriableError(error) {
|
|
76
|
+
return isRateLimitError(error) || isTransientApiError(error);
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Execute an async function with exponential backoff retry on retriable errors.
|
|
80
|
+
*
|
|
81
|
+
* Retries on:
|
|
82
|
+
* - Rate limit errors (429, quota exceeded)
|
|
83
|
+
* - Transient API errors (e.g., Gemini's spurious "API key expired" bug)
|
|
84
|
+
*
|
|
85
|
+
* @param fn - The async function to execute
|
|
86
|
+
* @param callbacks - Optional callbacks for throttle notifications
|
|
87
|
+
* @returns The result of the function
|
|
88
|
+
*/
|
|
89
|
+
export async function withRetry(fn, callbacks) {
|
|
90
|
+
let attempt = 0;
|
|
91
|
+
let backoffMs = INITIAL_BACKOFF_MS;
|
|
92
|
+
while (true) {
|
|
93
|
+
try {
|
|
94
|
+
const result = await fn();
|
|
95
|
+
// Clear throttle message on success (if was throttling)
|
|
96
|
+
if (attempt > 0)
|
|
97
|
+
callbacks?.onThrottle?.(null);
|
|
98
|
+
return result;
|
|
99
|
+
}
|
|
100
|
+
catch (error) {
|
|
101
|
+
if (isRetriableError(error) && attempt < MAX_RETRIES) {
|
|
102
|
+
attempt++;
|
|
103
|
+
const secs = Math.round(backoffMs / 1000);
|
|
104
|
+
// Provide context-appropriate message
|
|
105
|
+
const isTransient = isTransientApiError(error);
|
|
106
|
+
const reason = isTransient ? 'Transient API error' : 'Rate limited';
|
|
107
|
+
callbacks?.onThrottle?.(`${reason} - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
|
|
108
|
+
await sleep(backoffMs);
|
|
109
|
+
backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
|
|
110
|
+
}
|
|
111
|
+
else {
|
|
112
|
+
throw error;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Process batches with p-limit sliding window concurrency and inter-batch delay.
|
|
119
|
+
* Reports progress per-batch (more granular than group-based).
|
|
120
|
+
*
|
|
121
|
+
* @param batches - Array of batches to process
|
|
122
|
+
* @param processBatch - Function to process a single batch
|
|
123
|
+
* @param callbacks - Optional callbacks for progress reporting
|
|
124
|
+
* @returns Flattened array of results
|
|
125
|
+
*/
|
|
126
|
+
export async function processBatchesWithLimit(batches, processBatch, callbacks) {
|
|
127
|
+
const limit = pLimit(CONCURRENCY);
|
|
128
|
+
let processedItems = 0;
|
|
129
|
+
const totalItems = batches.reduce((sum, batch) => sum + batch.length, 0);
|
|
130
|
+
const batchResults = await Promise.all(batches.map(batch => limit(async () => {
|
|
131
|
+
const result = await processBatch(batch);
|
|
132
|
+
// Delay before releasing the slot (rate limit protection)
|
|
133
|
+
await sleep(BATCH_DELAY_MS);
|
|
134
|
+
// Report progress per-batch
|
|
135
|
+
processedItems += batch.length;
|
|
136
|
+
callbacks?.onBatchProgress?.(processedItems, totalItems);
|
|
137
|
+
return result;
|
|
138
|
+
})));
|
|
139
|
+
return batchResults.flat();
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Split an array into batches of a specified size.
|
|
143
|
+
*/
|
|
144
|
+
export function chunk(array, size) {
|
|
145
|
+
const batches = [];
|
|
146
|
+
for (let i = 0; i < array.length; i += size) {
|
|
147
|
+
batches.push(array.slice(i, i + size));
|
|
148
|
+
}
|
|
149
|
+
return batches;
|
|
150
|
+
}
|
|
@@ -22,14 +22,6 @@ export declare class GeminiEmbeddingProvider implements EmbeddingProvider {
|
|
|
22
22
|
constructor(apiKey?: string);
|
|
23
23
|
initialize(_onProgress?: ModelProgressCallback): Promise<void>;
|
|
24
24
|
embed(texts: string[]): Promise<number[][]>;
|
|
25
|
-
/**
|
|
26
|
-
* Embed a batch with exponential backoff retry on rate limit errors.
|
|
27
|
-
*/
|
|
28
|
-
private embedBatchWithRetry;
|
|
29
|
-
/**
|
|
30
|
-
* Check if an error is a rate limit error (429 or quota exceeded).
|
|
31
|
-
*/
|
|
32
|
-
private isRateLimitError;
|
|
33
25
|
private embedBatch;
|
|
34
26
|
embedSingle(text: string): Promise<number[]>;
|
|
35
27
|
close(): void;
|
|
@@ -8,19 +8,13 @@
|
|
|
8
8
|
*
|
|
9
9
|
* Free tier available with generous limits.
|
|
10
10
|
*/
|
|
11
|
+
import { chunk, processBatchesWithLimit, withRetry, } from './api-utils.js';
|
|
11
12
|
const GEMINI_API_BASE = 'https://generativelanguage.googleapis.com/v1beta/models';
|
|
12
13
|
const MODEL = 'gemini-embedding-001';
|
|
13
14
|
// Gemini limits: 2,048 tokens/text, 20,000 tokens/batch, 100-250 texts/batch
|
|
14
|
-
//
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
const CONCURRENCY = 5; // Max concurrent API requests
|
|
18
|
-
const MAX_RETRIES = 12; // Max retry attempts on rate limit
|
|
19
|
-
const INITIAL_BACKOFF_MS = 1000; // Start at 1s
|
|
20
|
-
const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
|
|
21
|
-
function sleep(ms) {
|
|
22
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
23
|
-
}
|
|
15
|
+
// Chunks are ~2000 chars + context header ≈ 800-1000 tokens each
|
|
16
|
+
// 16 chunks × 1000 tokens = 16,000 tokens (safe margin under 20k limit)
|
|
17
|
+
const BATCH_SIZE = 16;
|
|
24
18
|
/**
|
|
25
19
|
* Gemini embedding provider.
|
|
26
20
|
* Uses gemini-embedding-001 model via Google's Generative AI API.
|
|
@@ -75,66 +69,12 @@ export class GeminiEmbeddingProvider {
|
|
|
75
69
|
if (texts.length === 0) {
|
|
76
70
|
return [];
|
|
77
71
|
}
|
|
78
|
-
|
|
79
|
-
const
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
const results = [];
|
|
85
|
-
let completed = 0;
|
|
86
|
-
for (let i = 0; i < batches.length; i += CONCURRENCY) {
|
|
87
|
-
const concurrentBatches = batches.slice(i, i + CONCURRENCY);
|
|
88
|
-
// Fire concurrent requests
|
|
89
|
-
const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
|
|
90
|
-
// Flatten and collect results (Promise.all preserves order)
|
|
91
|
-
for (const result of batchResults) {
|
|
92
|
-
results.push(...result);
|
|
93
|
-
}
|
|
94
|
-
// Report progress after concurrent group completes
|
|
95
|
-
completed += concurrentBatches.length;
|
|
96
|
-
const processed = Math.min(completed * BATCH_SIZE, texts.length);
|
|
97
|
-
this.onBatchProgress?.(processed, texts.length);
|
|
98
|
-
}
|
|
99
|
-
return results;
|
|
100
|
-
}
|
|
101
|
-
/**
|
|
102
|
-
* Embed a batch with exponential backoff retry on rate limit errors.
|
|
103
|
-
*/
|
|
104
|
-
async embedBatchWithRetry(batch) {
|
|
105
|
-
let attempt = 0;
|
|
106
|
-
let backoffMs = INITIAL_BACKOFF_MS;
|
|
107
|
-
while (true) {
|
|
108
|
-
try {
|
|
109
|
-
const result = await this.embedBatch(batch);
|
|
110
|
-
// Clear throttle message on success (if was throttling)
|
|
111
|
-
if (attempt > 0)
|
|
112
|
-
this.onThrottle?.(null);
|
|
113
|
-
return result;
|
|
114
|
-
}
|
|
115
|
-
catch (error) {
|
|
116
|
-
if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
|
|
117
|
-
attempt++;
|
|
118
|
-
const secs = Math.round(backoffMs / 1000);
|
|
119
|
-
this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
|
|
120
|
-
await sleep(backoffMs);
|
|
121
|
-
backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
|
|
122
|
-
}
|
|
123
|
-
else {
|
|
124
|
-
throw error;
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
/**
|
|
130
|
-
* Check if an error is a rate limit error (429 or quota exceeded).
|
|
131
|
-
*/
|
|
132
|
-
isRateLimitError(error) {
|
|
133
|
-
if (error instanceof Error) {
|
|
134
|
-
const msg = error.message.toLowerCase();
|
|
135
|
-
return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
|
|
136
|
-
}
|
|
137
|
-
return false;
|
|
72
|
+
const batches = chunk(texts, BATCH_SIZE);
|
|
73
|
+
const callbacks = {
|
|
74
|
+
onThrottle: this.onThrottle,
|
|
75
|
+
onBatchProgress: this.onBatchProgress,
|
|
76
|
+
};
|
|
77
|
+
return processBatchesWithLimit(batches, batch => withRetry(() => this.embedBatch(batch), callbacks), callbacks);
|
|
138
78
|
}
|
|
139
79
|
async embedBatch(texts) {
|
|
140
80
|
const url = `${GEMINI_API_BASE}/${MODEL}:batchEmbedContents`;
|
|
@@ -4,8 +4,10 @@
|
|
|
4
4
|
*/
|
|
5
5
|
export { GeminiEmbeddingProvider } from './gemini.js';
|
|
6
6
|
export { Local4BEmbeddingProvider } from './local-4b.js';
|
|
7
|
-
export { LocalEmbeddingProvider } from './local.js';
|
|
7
|
+
export { LocalEmbeddingProvider, clearCachedPipeline } from './local.js';
|
|
8
8
|
export { MistralEmbeddingProvider } from './mistral.js';
|
|
9
|
+
export { MockEmbeddingProvider } from './mock.js';
|
|
9
10
|
export { OpenAIEmbeddingProvider } from './openai.js';
|
|
10
11
|
export { validateApiKey, type ValidationResult } from './validate.js';
|
|
11
12
|
export type { EmbeddingProvider, ModelProgressCallback } from './types.js';
|
|
13
|
+
export { CONCURRENCY, BATCH_DELAY_MS, MAX_RETRIES, INITIAL_BACKOFF_MS, MAX_BACKOFF_MS, sleep, isRateLimitError, isTransientApiError, isRetriableError, withRetry, processBatchesWithLimit, chunk, type ApiProviderCallbacks, } from './api-utils.js';
|
|
@@ -4,7 +4,10 @@
|
|
|
4
4
|
*/
|
|
5
5
|
export { GeminiEmbeddingProvider } from './gemini.js';
|
|
6
6
|
export { Local4BEmbeddingProvider } from './local-4b.js';
|
|
7
|
-
export { LocalEmbeddingProvider } from './local.js';
|
|
7
|
+
export { LocalEmbeddingProvider, clearCachedPipeline } from './local.js';
|
|
8
8
|
export { MistralEmbeddingProvider } from './mistral.js';
|
|
9
|
+
export { MockEmbeddingProvider } from './mock.js';
|
|
9
10
|
export { OpenAIEmbeddingProvider } from './openai.js';
|
|
10
11
|
export { validateApiKey } from './validate.js';
|
|
12
|
+
// Shared utilities for API-based providers
|
|
13
|
+
export { CONCURRENCY, BATCH_DELAY_MS, MAX_RETRIES, INITIAL_BACKOFF_MS, MAX_BACKOFF_MS, sleep, isRateLimitError, isTransientApiError, isRetriableError, withRetry, processBatchesWithLimit, chunk, } from './api-utils.js';
|
|
@@ -14,14 +14,19 @@
|
|
|
14
14
|
* - Data never leaves your machine
|
|
15
15
|
*/
|
|
16
16
|
import type { EmbeddingProvider, ModelProgressCallback } from './types.js';
|
|
17
|
+
/**
|
|
18
|
+
* Clear the cached pipeline.
|
|
19
|
+
* Useful for tests that need to reset state between runs.
|
|
20
|
+
*/
|
|
21
|
+
export declare function clearCachedPipeline(): void;
|
|
17
22
|
/**
|
|
18
23
|
* Local embedding provider using Qwen3-Embedding-0.6B Q8.
|
|
19
24
|
*/
|
|
20
25
|
export declare class LocalEmbeddingProvider implements EmbeddingProvider {
|
|
21
26
|
readonly dimensions = 1024;
|
|
22
|
-
private extractor;
|
|
23
27
|
private initialized;
|
|
24
28
|
initialize(onProgress?: ModelProgressCallback): Promise<void>;
|
|
29
|
+
private loadModel;
|
|
25
30
|
embed(texts: string[]): Promise<number[][]>;
|
|
26
31
|
private embedBatch;
|
|
27
32
|
embedSingle(text: string): Promise<number[]>;
|
|
@@ -17,6 +17,19 @@ import { pipeline } from '@huggingface/transformers';
|
|
|
17
17
|
const MODEL_NAME = 'onnx-community/Qwen3-Embedding-0.6B-ONNX';
|
|
18
18
|
const DIMENSIONS = 1024;
|
|
19
19
|
const BATCH_SIZE = 8;
|
|
20
|
+
// Module-level cache for the ONNX pipeline
|
|
21
|
+
// Shared across all LocalEmbeddingProvider instances to avoid reloading the model
|
|
22
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- HuggingFace pipeline type is too complex
|
|
23
|
+
let cachedExtractor = null;
|
|
24
|
+
let initPromise = null;
|
|
25
|
+
/**
|
|
26
|
+
* Clear the cached pipeline.
|
|
27
|
+
* Useful for tests that need to reset state between runs.
|
|
28
|
+
*/
|
|
29
|
+
export function clearCachedPipeline() {
|
|
30
|
+
cachedExtractor = null;
|
|
31
|
+
initPromise = null;
|
|
32
|
+
}
|
|
20
33
|
/**
|
|
21
34
|
* Local embedding provider using Qwen3-Embedding-0.6B Q8.
|
|
22
35
|
*/
|
|
@@ -28,13 +41,6 @@ export class LocalEmbeddingProvider {
|
|
|
28
41
|
writable: true,
|
|
29
42
|
value: DIMENSIONS
|
|
30
43
|
});
|
|
31
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- HuggingFace pipeline type is too complex
|
|
32
|
-
Object.defineProperty(this, "extractor", {
|
|
33
|
-
enumerable: true,
|
|
34
|
-
configurable: true,
|
|
35
|
-
writable: true,
|
|
36
|
-
value: null
|
|
37
|
-
});
|
|
38
44
|
Object.defineProperty(this, "initialized", {
|
|
39
45
|
enumerable: true,
|
|
40
46
|
configurable: true,
|
|
@@ -45,6 +51,33 @@ export class LocalEmbeddingProvider {
|
|
|
45
51
|
async initialize(onProgress) {
|
|
46
52
|
if (this.initialized)
|
|
47
53
|
return;
|
|
54
|
+
// Reuse cached model if available
|
|
55
|
+
if (cachedExtractor) {
|
|
56
|
+
this.initialized = true;
|
|
57
|
+
onProgress?.('ready');
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
// If another instance is already loading, wait for it
|
|
61
|
+
if (initPromise) {
|
|
62
|
+
await initPromise;
|
|
63
|
+
this.initialized = true;
|
|
64
|
+
onProgress?.('ready');
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
// First load - this instance will load the model and cache it
|
|
68
|
+
initPromise = this.loadModel(onProgress);
|
|
69
|
+
try {
|
|
70
|
+
await initPromise;
|
|
71
|
+
this.initialized = true;
|
|
72
|
+
}
|
|
73
|
+
catch (error) {
|
|
74
|
+
// Clear the cached promise so future calls can retry
|
|
75
|
+
// (e.g., after network recovery or freeing memory)
|
|
76
|
+
initPromise = null;
|
|
77
|
+
throw error;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
async loadModel(onProgress) {
|
|
48
81
|
// Track download progress for the model files
|
|
49
82
|
let lastProgress = 0;
|
|
50
83
|
const progressCallback = onProgress
|
|
@@ -67,12 +100,11 @@ export class LocalEmbeddingProvider {
|
|
|
67
100
|
onProgress?.('loading');
|
|
68
101
|
// Load the model with q8 (int8) quantization for smaller size and faster inference
|
|
69
102
|
// First load will download the model (~700MB)
|
|
70
|
-
|
|
103
|
+
cachedExtractor = await pipeline('feature-extraction', MODEL_NAME, {
|
|
71
104
|
dtype: 'q8', // int8 quantization
|
|
72
105
|
progress_callback: progressCallback,
|
|
73
106
|
});
|
|
74
107
|
onProgress?.('ready');
|
|
75
|
-
this.initialized = true;
|
|
76
108
|
}
|
|
77
109
|
async embed(texts) {
|
|
78
110
|
if (!this.initialized) {
|
|
@@ -93,7 +125,7 @@ export class LocalEmbeddingProvider {
|
|
|
93
125
|
async embedBatch(texts) {
|
|
94
126
|
const results = [];
|
|
95
127
|
for (const text of texts) {
|
|
96
|
-
const output = await
|
|
128
|
+
const output = await cachedExtractor(text, {
|
|
97
129
|
pooling: 'mean',
|
|
98
130
|
normalize: true,
|
|
99
131
|
});
|
|
@@ -107,14 +139,15 @@ export class LocalEmbeddingProvider {
|
|
|
107
139
|
if (!this.initialized) {
|
|
108
140
|
await this.initialize();
|
|
109
141
|
}
|
|
110
|
-
const output = await
|
|
142
|
+
const output = await cachedExtractor(text, {
|
|
111
143
|
pooling: 'mean',
|
|
112
144
|
normalize: true,
|
|
113
145
|
});
|
|
114
146
|
return Array.from(output.data);
|
|
115
147
|
}
|
|
116
148
|
close() {
|
|
117
|
-
this
|
|
149
|
+
// Mark this instance as uninitialized, but don't clear the cached model
|
|
150
|
+
// Other instances may still be using it
|
|
118
151
|
this.initialized = false;
|
|
119
152
|
}
|
|
120
153
|
}
|
|
@@ -18,14 +18,6 @@ export declare class MistralEmbeddingProvider implements EmbeddingProvider {
|
|
|
18
18
|
constructor(apiKey?: string);
|
|
19
19
|
initialize(_onProgress?: ModelProgressCallback): Promise<void>;
|
|
20
20
|
embed(texts: string[]): Promise<number[][]>;
|
|
21
|
-
/**
|
|
22
|
-
* Embed a batch with exponential backoff retry on rate limit errors.
|
|
23
|
-
*/
|
|
24
|
-
private embedBatchWithRetry;
|
|
25
|
-
/**
|
|
26
|
-
* Check if an error is a rate limit error (429 or quota exceeded).
|
|
27
|
-
*/
|
|
28
|
-
private isRateLimitError;
|
|
29
21
|
private embedBatch;
|
|
30
22
|
embedSingle(text: string): Promise<number[]>;
|
|
31
23
|
close(): void;
|
|
@@ -4,19 +4,13 @@
|
|
|
4
4
|
* Uses codestral-embed model (1536 dimensions).
|
|
5
5
|
* Optimized for code and technical content.
|
|
6
6
|
*/
|
|
7
|
+
import { chunk, processBatchesWithLimit, withRetry, } from './api-utils.js';
|
|
7
8
|
const MISTRAL_API_BASE = 'https://api.mistral.ai/v1';
|
|
8
9
|
const MODEL = 'codestral-embed';
|
|
9
10
|
// Mistral limits: 8,192 tokens/text, 16,000 tokens/batch TOTAL
|
|
10
|
-
//
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
const CONCURRENCY = 5; // Max concurrent API requests
|
|
14
|
-
const MAX_RETRIES = 12; // Max retry attempts on rate limit
|
|
15
|
-
const INITIAL_BACKOFF_MS = 1000; // Start at 1s
|
|
16
|
-
const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
|
|
17
|
-
function sleep(ms) {
|
|
18
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
19
|
-
}
|
|
11
|
+
// Chunks are ~2000 chars + context header ≈ 800-1000 tokens each
|
|
12
|
+
// 12 chunks × 1000 tokens = 12,000 tokens (safe margin under 16k limit)
|
|
13
|
+
const BATCH_SIZE = 12;
|
|
20
14
|
/**
|
|
21
15
|
* Mistral embedding provider.
|
|
22
16
|
* Uses codestral-embed model via Mistral AI API.
|
|
@@ -71,66 +65,12 @@ export class MistralEmbeddingProvider {
|
|
|
71
65
|
if (texts.length === 0) {
|
|
72
66
|
return [];
|
|
73
67
|
}
|
|
74
|
-
|
|
75
|
-
const
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
const results = [];
|
|
81
|
-
let completed = 0;
|
|
82
|
-
for (let i = 0; i < batches.length; i += CONCURRENCY) {
|
|
83
|
-
const concurrentBatches = batches.slice(i, i + CONCURRENCY);
|
|
84
|
-
// Fire concurrent requests
|
|
85
|
-
const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
|
|
86
|
-
// Flatten and collect results (Promise.all preserves order)
|
|
87
|
-
for (const result of batchResults) {
|
|
88
|
-
results.push(...result);
|
|
89
|
-
}
|
|
90
|
-
// Report progress after concurrent group completes
|
|
91
|
-
completed += concurrentBatches.length;
|
|
92
|
-
const processed = Math.min(completed * BATCH_SIZE, texts.length);
|
|
93
|
-
this.onBatchProgress?.(processed, texts.length);
|
|
94
|
-
}
|
|
95
|
-
return results;
|
|
96
|
-
}
|
|
97
|
-
/**
|
|
98
|
-
* Embed a batch with exponential backoff retry on rate limit errors.
|
|
99
|
-
*/
|
|
100
|
-
async embedBatchWithRetry(batch) {
|
|
101
|
-
let attempt = 0;
|
|
102
|
-
let backoffMs = INITIAL_BACKOFF_MS;
|
|
103
|
-
while (true) {
|
|
104
|
-
try {
|
|
105
|
-
const result = await this.embedBatch(batch);
|
|
106
|
-
// Clear throttle message on success (if was throttling)
|
|
107
|
-
if (attempt > 0)
|
|
108
|
-
this.onThrottle?.(null);
|
|
109
|
-
return result;
|
|
110
|
-
}
|
|
111
|
-
catch (error) {
|
|
112
|
-
if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
|
|
113
|
-
attempt++;
|
|
114
|
-
const secs = Math.round(backoffMs / 1000);
|
|
115
|
-
this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
|
|
116
|
-
await sleep(backoffMs);
|
|
117
|
-
backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
|
|
118
|
-
}
|
|
119
|
-
else {
|
|
120
|
-
throw error;
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
/**
|
|
126
|
-
* Check if an error is a rate limit error (429 or quota exceeded).
|
|
127
|
-
*/
|
|
128
|
-
isRateLimitError(error) {
|
|
129
|
-
if (error instanceof Error) {
|
|
130
|
-
const msg = error.message.toLowerCase();
|
|
131
|
-
return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
|
|
132
|
-
}
|
|
133
|
-
return false;
|
|
68
|
+
const batches = chunk(texts, BATCH_SIZE);
|
|
69
|
+
const callbacks = {
|
|
70
|
+
onThrottle: this.onThrottle,
|
|
71
|
+
onBatchProgress: this.onBatchProgress,
|
|
72
|
+
};
|
|
73
|
+
return processBatchesWithLimit(batches, batch => withRetry(() => this.embedBatch(batch), callbacks), callbacks);
|
|
134
74
|
}
|
|
135
75
|
async embedBatch(texts) {
|
|
136
76
|
const response = await fetch(`${MISTRAL_API_BASE}/embeddings`, {
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mock embedding provider for testing.
|
|
3
|
+
*
|
|
4
|
+
* Generates deterministic hash-based embeddings that:
|
|
5
|
+
* - Run instantly (no model loading)
|
|
6
|
+
* - Are deterministic (same input = same output)
|
|
7
|
+
* - Normalized to unit length
|
|
8
|
+
* - Support any dimension count
|
|
9
|
+
*
|
|
10
|
+
* Usage:
|
|
11
|
+
* - Unit tests that need embeddings but don't need semantic quality
|
|
12
|
+
* - Testing search infrastructure without ONNX overhead
|
|
13
|
+
* - CI pipeline fast checks
|
|
14
|
+
*/
|
|
15
|
+
import type { EmbeddingProvider, ModelProgressCallback } from './types.js';
|
|
16
|
+
/**
|
|
17
|
+
* Mock embedding provider using deterministic hash-based vectors.
|
|
18
|
+
*/
|
|
19
|
+
export declare class MockEmbeddingProvider implements EmbeddingProvider {
|
|
20
|
+
readonly dimensions: number;
|
|
21
|
+
constructor(dimensions?: number);
|
|
22
|
+
initialize(_onProgress?: ModelProgressCallback): Promise<void>;
|
|
23
|
+
embed(texts: string[]): Promise<number[][]>;
|
|
24
|
+
embedSingle(text: string): Promise<number[]>;
|
|
25
|
+
/**
|
|
26
|
+
* Convert text to a deterministic unit vector.
|
|
27
|
+
* Uses a simple hash-based approach to generate pseudo-random but repeatable values.
|
|
28
|
+
*/
|
|
29
|
+
private hashToVector;
|
|
30
|
+
/**
|
|
31
|
+
* Simple string hash function (djb2).
|
|
32
|
+
*/
|
|
33
|
+
private hash;
|
|
34
|
+
close(): void;
|
|
35
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mock embedding provider for testing.
|
|
3
|
+
*
|
|
4
|
+
* Generates deterministic hash-based embeddings that:
|
|
5
|
+
* - Run instantly (no model loading)
|
|
6
|
+
* - Are deterministic (same input = same output)
|
|
7
|
+
* - Normalized to unit length
|
|
8
|
+
* - Support any dimension count
|
|
9
|
+
*
|
|
10
|
+
* Usage:
|
|
11
|
+
* - Unit tests that need embeddings but don't need semantic quality
|
|
12
|
+
* - Testing search infrastructure without ONNX overhead
|
|
13
|
+
* - CI pipeline fast checks
|
|
14
|
+
*/
|
|
15
|
+
const DEFAULT_DIMENSIONS = 1024;
|
|
16
|
+
/**
|
|
17
|
+
* Mock embedding provider using deterministic hash-based vectors.
|
|
18
|
+
*/
|
|
19
|
+
export class MockEmbeddingProvider {
|
|
20
|
+
constructor(dimensions = DEFAULT_DIMENSIONS) {
|
|
21
|
+
Object.defineProperty(this, "dimensions", {
|
|
22
|
+
enumerable: true,
|
|
23
|
+
configurable: true,
|
|
24
|
+
writable: true,
|
|
25
|
+
value: void 0
|
|
26
|
+
});
|
|
27
|
+
this.dimensions = dimensions;
|
|
28
|
+
}
|
|
29
|
+
async initialize(_onProgress) {
|
|
30
|
+
// No initialization needed - instant startup
|
|
31
|
+
}
|
|
32
|
+
async embed(texts) {
|
|
33
|
+
return texts.map(t => this.hashToVector(t));
|
|
34
|
+
}
|
|
35
|
+
async embedSingle(text) {
|
|
36
|
+
return this.hashToVector(text);
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Convert text to a deterministic unit vector.
|
|
40
|
+
* Uses a simple hash-based approach to generate pseudo-random but repeatable values.
|
|
41
|
+
*/
|
|
42
|
+
hashToVector(text) {
|
|
43
|
+
const seed = this.hash(text);
|
|
44
|
+
// Generate deterministic pseudo-random values
|
|
45
|
+
const vec = new Array(this.dimensions).fill(0).map((_, i) => {
|
|
46
|
+
// LCG-like pseudo-random based on seed and index
|
|
47
|
+
const state = (((seed * (i + 1) * 1103515245 + 12345) >>> 0) % 0x7fffffff) /
|
|
48
|
+
0x7fffffff;
|
|
49
|
+
return state * 2 - 1; // Range [-1, 1]
|
|
50
|
+
});
|
|
51
|
+
// Normalize to unit length
|
|
52
|
+
const magnitude = Math.sqrt(vec.reduce((sum, v) => sum + v * v, 0));
|
|
53
|
+
return vec.map(v => (magnitude > 0 ? v / magnitude : 0));
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Simple string hash function (djb2).
|
|
57
|
+
*/
|
|
58
|
+
hash(str) {
|
|
59
|
+
let h = 5381;
|
|
60
|
+
for (let i = 0; i < str.length; i++) {
|
|
61
|
+
h = (h * 33) ^ str.charCodeAt(i);
|
|
62
|
+
h = h >>> 0; // Convert to unsigned 32-bit
|
|
63
|
+
}
|
|
64
|
+
return h;
|
|
65
|
+
}
|
|
66
|
+
close() {
|
|
67
|
+
// Nothing to close
|
|
68
|
+
}
|
|
69
|
+
}
|
|
@@ -8,24 +8,22 @@ import type { EmbeddingProvider, ModelProgressCallback } from './types.js';
|
|
|
8
8
|
/**
|
|
9
9
|
* OpenAI embedding provider.
|
|
10
10
|
* Uses text-embedding-3-small model via OpenAI API.
|
|
11
|
+
*
|
|
12
|
+
* Supports regional endpoints for corporate accounts with data residency:
|
|
13
|
+
* - Default: https://api.openai.com/v1
|
|
14
|
+
* - US: https://us.api.openai.com/v1
|
|
15
|
+
* - EU: https://eu.api.openai.com/v1
|
|
11
16
|
*/
|
|
12
17
|
export declare class OpenAIEmbeddingProvider implements EmbeddingProvider {
|
|
13
18
|
readonly dimensions = 1536;
|
|
14
19
|
private apiKey;
|
|
20
|
+
private apiBase;
|
|
15
21
|
private initialized;
|
|
16
22
|
onThrottle?: (message: string | null) => void;
|
|
17
23
|
onBatchProgress?: (processed: number, total: number) => void;
|
|
18
|
-
constructor(apiKey?: string);
|
|
24
|
+
constructor(apiKey?: string, baseUrl?: string);
|
|
19
25
|
initialize(_onProgress?: ModelProgressCallback): Promise<void>;
|
|
20
26
|
embed(texts: string[]): Promise<number[][]>;
|
|
21
|
-
/**
|
|
22
|
-
* Embed a batch with exponential backoff retry on rate limit errors.
|
|
23
|
-
*/
|
|
24
|
-
private embedBatchWithRetry;
|
|
25
|
-
/**
|
|
26
|
-
* Check if an error is a rate limit error (429 or quota exceeded).
|
|
27
|
-
*/
|
|
28
|
-
private isRateLimitError;
|
|
29
27
|
private embedBatch;
|
|
30
28
|
embedSingle(text: string): Promise<number[]>;
|
|
31
29
|
close(): void;
|
|
@@ -4,25 +4,24 @@
|
|
|
4
4
|
* Uses text-embedding-3-small model (1536 dimensions).
|
|
5
5
|
* Good quality with fast API responses and low cost ($0.02/1M tokens).
|
|
6
6
|
*/
|
|
7
|
-
|
|
7
|
+
import { chunk, processBatchesWithLimit, withRetry, } from './api-utils.js';
|
|
8
|
+
const DEFAULT_API_BASE = 'https://api.openai.com/v1';
|
|
8
9
|
const MODEL = 'text-embedding-3-small';
|
|
9
10
|
// OpenAI limits: 8,191 tokens/text, 300,000 tokens/batch, 2,048 texts/batch
|
|
10
|
-
//
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
const CONCURRENCY = 5; // Max concurrent API requests
|
|
14
|
-
const MAX_RETRIES = 12; // Max retry attempts on rate limit
|
|
15
|
-
const INITIAL_BACKOFF_MS = 1000; // Start at 1s
|
|
16
|
-
const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
|
|
17
|
-
function sleep(ms) {
|
|
18
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
19
|
-
}
|
|
11
|
+
// Chunks are ~2000 chars + context header ≈ 800-1000 tokens each
|
|
12
|
+
// 200 chunks × 1000 tokens = 200,000 tokens (safe margin under 300k limit)
|
|
13
|
+
const BATCH_SIZE = 200;
|
|
20
14
|
/**
|
|
21
15
|
* OpenAI embedding provider.
|
|
22
16
|
* Uses text-embedding-3-small model via OpenAI API.
|
|
17
|
+
*
|
|
18
|
+
* Supports regional endpoints for corporate accounts with data residency:
|
|
19
|
+
* - Default: https://api.openai.com/v1
|
|
20
|
+
* - US: https://us.api.openai.com/v1
|
|
21
|
+
* - EU: https://eu.api.openai.com/v1
|
|
23
22
|
*/
|
|
24
23
|
export class OpenAIEmbeddingProvider {
|
|
25
|
-
constructor(apiKey) {
|
|
24
|
+
constructor(apiKey, baseUrl) {
|
|
26
25
|
Object.defineProperty(this, "dimensions", {
|
|
27
26
|
enumerable: true,
|
|
28
27
|
configurable: true,
|
|
@@ -35,6 +34,12 @@ export class OpenAIEmbeddingProvider {
|
|
|
35
34
|
writable: true,
|
|
36
35
|
value: void 0
|
|
37
36
|
});
|
|
37
|
+
Object.defineProperty(this, "apiBase", {
|
|
38
|
+
enumerable: true,
|
|
39
|
+
configurable: true,
|
|
40
|
+
writable: true,
|
|
41
|
+
value: void 0
|
|
42
|
+
});
|
|
38
43
|
Object.defineProperty(this, "initialized", {
|
|
39
44
|
enumerable: true,
|
|
40
45
|
configurable: true,
|
|
@@ -57,6 +62,7 @@ export class OpenAIEmbeddingProvider {
|
|
|
57
62
|
});
|
|
58
63
|
// Trim the key to remove any accidental whitespace
|
|
59
64
|
this.apiKey = (apiKey ?? '').trim();
|
|
65
|
+
this.apiBase = baseUrl ?? DEFAULT_API_BASE;
|
|
60
66
|
}
|
|
61
67
|
async initialize(_onProgress) {
|
|
62
68
|
if (!this.apiKey) {
|
|
@@ -75,69 +81,15 @@ export class OpenAIEmbeddingProvider {
|
|
|
75
81
|
if (texts.length === 0) {
|
|
76
82
|
return [];
|
|
77
83
|
}
|
|
78
|
-
|
|
79
|
-
const
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
const results = [];
|
|
85
|
-
let completed = 0;
|
|
86
|
-
for (let i = 0; i < batches.length; i += CONCURRENCY) {
|
|
87
|
-
const concurrentBatches = batches.slice(i, i + CONCURRENCY);
|
|
88
|
-
// Fire concurrent requests
|
|
89
|
-
const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
|
|
90
|
-
// Flatten and collect results (Promise.all preserves order)
|
|
91
|
-
for (const result of batchResults) {
|
|
92
|
-
results.push(...result);
|
|
93
|
-
}
|
|
94
|
-
// Report progress after concurrent group completes
|
|
95
|
-
completed += concurrentBatches.length;
|
|
96
|
-
const processed = Math.min(completed * BATCH_SIZE, texts.length);
|
|
97
|
-
this.onBatchProgress?.(processed, texts.length);
|
|
98
|
-
}
|
|
99
|
-
return results;
|
|
100
|
-
}
|
|
101
|
-
/**
|
|
102
|
-
* Embed a batch with exponential backoff retry on rate limit errors.
|
|
103
|
-
*/
|
|
104
|
-
async embedBatchWithRetry(batch) {
|
|
105
|
-
let attempt = 0;
|
|
106
|
-
let backoffMs = INITIAL_BACKOFF_MS;
|
|
107
|
-
while (true) {
|
|
108
|
-
try {
|
|
109
|
-
const result = await this.embedBatch(batch);
|
|
110
|
-
// Clear throttle message on success (if was throttling)
|
|
111
|
-
if (attempt > 0)
|
|
112
|
-
this.onThrottle?.(null);
|
|
113
|
-
return result;
|
|
114
|
-
}
|
|
115
|
-
catch (error) {
|
|
116
|
-
if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
|
|
117
|
-
attempt++;
|
|
118
|
-
const secs = Math.round(backoffMs / 1000);
|
|
119
|
-
this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
|
|
120
|
-
await sleep(backoffMs);
|
|
121
|
-
backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
|
|
122
|
-
}
|
|
123
|
-
else {
|
|
124
|
-
throw error;
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
/**
|
|
130
|
-
* Check if an error is a rate limit error (429 or quota exceeded).
|
|
131
|
-
*/
|
|
132
|
-
isRateLimitError(error) {
|
|
133
|
-
if (error instanceof Error) {
|
|
134
|
-
const msg = error.message.toLowerCase();
|
|
135
|
-
return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
|
|
136
|
-
}
|
|
137
|
-
return false;
|
|
84
|
+
const batches = chunk(texts, BATCH_SIZE);
|
|
85
|
+
const callbacks = {
|
|
86
|
+
onThrottle: this.onThrottle,
|
|
87
|
+
onBatchProgress: this.onBatchProgress,
|
|
88
|
+
};
|
|
89
|
+
return processBatchesWithLimit(batches, batch => withRetry(() => this.embedBatch(batch), callbacks), callbacks);
|
|
138
90
|
}
|
|
139
91
|
async embedBatch(texts) {
|
|
140
|
-
const response = await fetch(`${
|
|
92
|
+
const response = await fetch(`${this.apiBase}/embeddings`, {
|
|
141
93
|
method: 'POST',
|
|
142
94
|
headers: {
|
|
143
95
|
'Content-Type': 'application/json',
|
|
@@ -285,7 +285,8 @@ export class Indexer {
|
|
|
285
285
|
// Track chunks processed for progress updates
|
|
286
286
|
let lastReportedChunks = 0;
|
|
287
287
|
// Wire batch progress callback to report incremental chunks
|
|
288
|
-
if (progressContext?.onChunksProcessed &&
|
|
288
|
+
if (progressContext?.onChunksProcessed &&
|
|
289
|
+
'onBatchProgress' in embeddings) {
|
|
289
290
|
embeddings.onBatchProgress = (processed, _total) => {
|
|
290
291
|
// Report only the delta since last update
|
|
291
292
|
const delta = processed - lastReportedChunks;
|
|
@@ -414,7 +415,7 @@ export class Indexer {
|
|
|
414
415
|
case 'mistral':
|
|
415
416
|
return new MistralEmbeddingProvider(apiKey);
|
|
416
417
|
case 'openai':
|
|
417
|
-
return new OpenAIEmbeddingProvider(apiKey);
|
|
418
|
+
return new OpenAIEmbeddingProvider(apiKey, config.openaiBaseUrl);
|
|
418
419
|
default:
|
|
419
420
|
throw new Error(`Unknown embedding provider: ${config.embeddingProvider}`);
|
|
420
421
|
}
|
package/dist/rag/search/index.js
CHANGED
|
@@ -362,7 +362,7 @@ export class SearchEngine {
|
|
|
362
362
|
case 'mistral':
|
|
363
363
|
return new MistralEmbeddingProvider(apiKey);
|
|
364
364
|
case 'openai':
|
|
365
|
-
return new OpenAIEmbeddingProvider(apiKey);
|
|
365
|
+
return new OpenAIEmbeddingProvider(apiKey, config.openaiBaseUrl);
|
|
366
366
|
default:
|
|
367
367
|
throw new Error(`Unknown embedding provider: ${config.embeddingProvider}`);
|
|
368
368
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "viberag",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.3",
|
|
4
4
|
"description": "Local code RAG for AI coding assistants - semantic search via MCP server",
|
|
5
5
|
"license": "AGPL-3.0",
|
|
6
6
|
"keywords": [
|
|
@@ -38,6 +38,8 @@
|
|
|
38
38
|
"build": "tsc",
|
|
39
39
|
"dev": "tsc --watch",
|
|
40
40
|
"test": "prettier --check . && eslint . && vitest run",
|
|
41
|
+
"test:fast": "vitest run --project=fast",
|
|
42
|
+
"test:rag": "vitest run --project=rag",
|
|
41
43
|
"test:smoke": "vitest run --testNamePattern='Grammar Smoke'",
|
|
42
44
|
"lint": "eslint .",
|
|
43
45
|
"lint:fix": "eslint . --fix",
|
|
@@ -62,6 +64,7 @@
|
|
|
62
64
|
],
|
|
63
65
|
"dependencies": {
|
|
64
66
|
"@huggingface/transformers": "^3.8.1",
|
|
67
|
+
"p-limit": "^6.2.0",
|
|
65
68
|
"@lancedb/lancedb": "^0.23.0",
|
|
66
69
|
"apache-arrow": "^18.1.0",
|
|
67
70
|
"chalk": "^5.6.2",
|