viberag 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41,12 +41,21 @@ export async function runInit(projectRoot, isReinit = false, wizardConfig) {
41
41
  // Build config from wizard choices
42
42
  const provider = wizardConfig?.provider ?? 'gemini';
43
43
  const { model, dimensions } = PROVIDER_CONFIGS[provider];
44
+ // Map OpenAI region to base URL
45
+ const openaiBaseUrl = wizardConfig?.openaiRegion
46
+ ? {
47
+ default: undefined,
48
+ us: 'https://us.api.openai.com/v1',
49
+ eu: 'https://eu.api.openai.com/v1',
50
+ }[wizardConfig.openaiRegion]
51
+ : undefined;
44
52
  const config = {
45
53
  ...DEFAULT_CONFIG,
46
54
  embeddingProvider: provider,
47
55
  embeddingModel: model,
48
56
  embeddingDimensions: dimensions,
49
57
  ...(wizardConfig?.apiKey && { apiKey: wizardConfig.apiKey }),
58
+ ...(openaiBaseUrl && { openaiBaseUrl }),
50
59
  };
51
60
  // Save config
52
61
  await saveConfig(projectRoot, config);
@@ -176,6 +176,21 @@ const API_KEY_ACTION_ITEMS = [
176
176
  { label: 'Keep existing API key', value: 'keep' },
177
177
  { label: 'Enter new API key', value: 'new' },
178
178
  ];
179
+ // OpenAI region options for data residency
180
+ const OPENAI_REGION_ITEMS = [
181
+ {
182
+ label: 'Default (api.openai.com) - Recommended',
183
+ value: 'default',
184
+ },
185
+ {
186
+ label: 'US (us.api.openai.com) - US Data Residency',
187
+ value: 'us',
188
+ },
189
+ {
190
+ label: 'EU (eu.api.openai.com) - EU Data Residency',
191
+ value: 'eu',
192
+ },
193
+ ];
179
194
  /**
180
195
  * Simple text input component for API key entry.
181
196
  * Uses a ref to accumulate input, which handles paste better than
@@ -227,6 +242,8 @@ export function InitWizard({ step, config, isReinit, existingApiKey, existingPro
227
242
  // State for API key input
228
243
  const [apiKeyInput, setApiKeyInput] = useState('');
229
244
  const [apiKeyAction, setApiKeyAction] = useState(null);
245
+ // State for OpenAI region selection (shown after API key for OpenAI)
246
+ const [showRegionSelect, setShowRegionSelect] = useState(false);
230
247
  // Handle Escape to cancel
231
248
  useInput((input, key) => {
232
249
  if (key.escape || (key.ctrl && input === 'c')) {
@@ -277,9 +294,10 @@ export function InitWizard({ step, config, isReinit, existingApiKey, existingPro
277
294
  React.createElement(Text, { bold: true }, "Choose Embedding Provider"),
278
295
  React.createElement(Box, { marginTop: 1 },
279
296
  React.createElement(SelectInput, { items: PROVIDER_ITEMS, onSelect: item => {
280
- // Reset API key state when provider changes
297
+ // Reset API key and region state when provider changes
281
298
  setApiKeyInput('');
282
299
  setApiKeyAction(null);
300
+ setShowRegionSelect(false);
283
301
  // Use relative increment: step + 1
284
302
  onStepChange(normalizedStep + 1, { provider: item.value });
285
303
  } })),
@@ -298,6 +316,21 @@ export function InitWizard({ step, config, isReinit, existingApiKey, existingPro
298
316
  const provider = currentProvider;
299
317
  const info = PROVIDER_CONFIG[provider];
300
318
  const apiKeyUrl = API_KEY_URLS[provider];
319
+ const isOpenAI = provider === 'openai';
320
+ // Show OpenAI region selection after API key is entered
321
+ if (isOpenAI && showRegionSelect) {
322
+ return (React.createElement(Box, { flexDirection: "column", borderStyle: "round", paddingX: 2, paddingY: 1 },
323
+ React.createElement(Text, { bold: true }, "Select OpenAI API Region"),
324
+ React.createElement(Box, { marginTop: 1, flexDirection: "column" },
325
+ React.createElement(Text, { dimColor: true }, "Corporate accounts with data residency require regional endpoints."),
326
+ React.createElement(Text, { dimColor: true }, "Most users should select Default.")),
327
+ React.createElement(Box, { marginTop: 1 },
328
+ React.createElement(SelectInput, { items: OPENAI_REGION_ITEMS, onSelect: item => {
329
+ onStepChange(normalizedStep + 1, { openaiRegion: item.value });
330
+ } })),
331
+ React.createElement(Box, { marginTop: 1 },
332
+ React.createElement(Text, { dimColor: true }, "\u2191/\u2193 navigate, Enter select, Esc cancel"))));
333
+ }
301
334
  return (React.createElement(Box, { flexDirection: "column", borderStyle: "round", paddingX: 2, paddingY: 1 },
302
335
  React.createElement(Text, { bold: true },
303
336
  "Configure ",
@@ -316,8 +349,18 @@ export function InitWizard({ step, config, isReinit, existingApiKey, existingPro
316
349
  React.createElement(Box, { marginTop: 1 },
317
350
  React.createElement(SelectInput, { items: API_KEY_ACTION_ITEMS, onSelect: item => {
318
351
  if (item.value === 'keep') {
319
- // Keep existing key, advance to confirmation
320
- onStepChange(normalizedStep + 1, { apiKey: existingApiKey });
352
+ // Keep existing key
353
+ onStepChange(normalizedStep, { apiKey: existingApiKey });
354
+ if (isOpenAI) {
355
+ // Show region selection for OpenAI
356
+ setShowRegionSelect(true);
357
+ }
358
+ else {
359
+ // Advance to confirmation for other providers
360
+ onStepChange(normalizedStep + 1, {
361
+ apiKey: existingApiKey,
362
+ });
363
+ }
321
364
  }
322
365
  else {
323
366
  // Show text input for new key
@@ -325,7 +368,15 @@ export function InitWizard({ step, config, isReinit, existingApiKey, existingPro
325
368
  }
326
369
  } })))) : (React.createElement(ApiKeyInputStep, { providerName: info.name, apiKeyInput: apiKeyInput, setApiKeyInput: setApiKeyInput, onSubmit: key => {
327
370
  if (key.trim()) {
328
- onStepChange(normalizedStep + 1, { apiKey: key.trim() });
371
+ onStepChange(normalizedStep, { apiKey: key.trim() });
372
+ if (isOpenAI) {
373
+ // Show region selection for OpenAI
374
+ setShowRegionSelect(true);
375
+ }
376
+ else {
377
+ // Advance to confirmation for other providers
378
+ onStepChange(normalizedStep + 1, { apiKey: key.trim() });
379
+ }
329
380
  }
330
381
  } })),
331
382
  React.createElement(Box, { marginTop: 1 },
@@ -91,6 +91,10 @@ export type IndexDisplayStats = {
91
91
  * - openai: text-embedding-3-small (1536d) - Fast API
92
92
  */
93
93
  export type EmbeddingProviderType = 'local' | 'local-4b' | 'gemini' | 'mistral' | 'openai';
94
+ /**
95
+ * OpenAI API regional endpoints for data residency.
96
+ */
97
+ export type OpenAIRegion = 'default' | 'us' | 'eu';
94
98
  /**
95
99
  * Configuration collected from the init wizard.
96
100
  */
@@ -98,6 +102,8 @@ export type InitWizardConfig = {
98
102
  provider: EmbeddingProviderType;
99
103
  /** API key for cloud providers (gemini, mistral, openai) */
100
104
  apiKey?: string;
105
+ /** OpenAI regional endpoint (for corporate accounts with data residency) */
106
+ openaiRegion?: OpenAIRegion;
101
107
  };
102
108
  /**
103
109
  * MCP editor identifiers.
@@ -20,6 +20,8 @@ export interface ViberagConfig {
20
20
  embeddingDimensions: number;
21
21
  /** API key for cloud providers (gemini, mistral, openai) */
22
22
  apiKey?: string;
23
+ /** OpenAI API base URL (for corporate accounts with data residency) */
24
+ openaiBaseUrl?: string;
23
25
  extensions: string[];
24
26
  excludePatterns: string[];
25
27
  chunkMaxSize: number;
@@ -0,0 +1,81 @@
1
+ /**
2
+ * Shared utilities for API-based embedding providers.
3
+ * Provides common retry logic, rate limiting, and concurrency patterns.
4
+ */
5
+ /** Max concurrent API requests */
6
+ export declare const CONCURRENCY = 5;
7
+ /** Delay (ms) between batch completion and next batch start (per slot) */
8
+ export declare const BATCH_DELAY_MS = 200;
9
+ /** Max retry attempts on rate limit */
10
+ export declare const MAX_RETRIES = 12;
11
+ /** Initial backoff (ms) */
12
+ export declare const INITIAL_BACKOFF_MS = 1000;
13
+ /** Maximum backoff (ms) */
14
+ export declare const MAX_BACKOFF_MS = 60000;
15
+ /**
16
+ * Sleep for a specified duration.
17
+ */
18
+ export declare function sleep(ms: number): Promise<void>;
19
+ /**
20
+ * Check if an error is a rate limit error (429 or quota exceeded).
21
+ */
22
+ export declare function isRateLimitError(error: unknown): boolean;
23
+ /**
24
+ * Check if an error is a known transient API error that should be retried.
25
+ *
26
+ * GEMINI TRANSIENT BUG:
27
+ * The Gemini API has a known server-side bug where it intermittently returns
28
+ * a 400 "API key expired" error even when the key is valid. This is NOT an
29
+ * actual authentication failure - it's a transient error that resolves on retry.
30
+ *
31
+ * Evidence:
32
+ * - Users report: "if I try the same request again a few times, it usually works fine"
33
+ * - New API keys don't fix it
34
+ * - Same key works in curl but fails randomly via API clients
35
+ * - Google has acknowledged this as a P1/P2 bug
36
+ *
37
+ * GitHub issues documenting this bug:
38
+ * - https://github.com/google-gemini/gemini-cli/issues/4430
39
+ * - https://github.com/google-gemini/gemini-cli/issues/1712
40
+ * - https://github.com/google-gemini/gemini-cli/issues/8675
41
+ *
42
+ * We detect this specific error and retry it rather than failing immediately.
43
+ */
44
+ export declare function isTransientApiError(error: unknown): boolean;
45
+ /**
46
+ * Check if an error should trigger a retry (rate limit OR transient error).
47
+ */
48
+ export declare function isRetriableError(error: unknown): boolean;
49
+ /**
50
+ * Callbacks for rate limiting and progress reporting.
51
+ */
52
+ export interface ApiProviderCallbacks {
53
+ onThrottle?: (message: string | null) => void;
54
+ onBatchProgress?: (processed: number, total: number) => void;
55
+ }
56
+ /**
57
+ * Execute an async function with exponential backoff retry on retriable errors.
58
+ *
59
+ * Retries on:
60
+ * - Rate limit errors (429, quota exceeded)
61
+ * - Transient API errors (e.g., Gemini's spurious "API key expired" bug)
62
+ *
63
+ * @param fn - The async function to execute
64
+ * @param callbacks - Optional callbacks for throttle notifications
65
+ * @returns The result of the function
66
+ */
67
+ export declare function withRetry<T>(fn: () => Promise<T>, callbacks?: ApiProviderCallbacks): Promise<T>;
68
+ /**
69
+ * Process batches with p-limit sliding window concurrency and inter-batch delay.
70
+ * Reports progress per-batch (more granular than group-based).
71
+ *
72
+ * @param batches - Array of batches to process
73
+ * @param processBatch - Function to process a single batch
74
+ * @param callbacks - Optional callbacks for progress reporting
75
+ * @returns Flattened array of results
76
+ */
77
+ export declare function processBatchesWithLimit<T>(batches: T[][], processBatch: (batch: T[]) => Promise<number[][]>, callbacks?: ApiProviderCallbacks): Promise<number[][]>;
78
+ /**
79
+ * Split an array into batches of a specified size.
80
+ */
81
+ export declare function chunk<T>(array: T[], size: number): T[][];
@@ -0,0 +1,150 @@
1
+ /**
2
+ * Shared utilities for API-based embedding providers.
3
+ * Provides common retry logic, rate limiting, and concurrency patterns.
4
+ */
5
+ import pLimit from 'p-limit';
6
+ // ============================================================================
7
+ // Constants
8
+ // ============================================================================
9
+ /** Max concurrent API requests */
10
+ export const CONCURRENCY = 5;
11
+ /** Delay (ms) between batch completion and next batch start (per slot) */
12
+ export const BATCH_DELAY_MS = 200;
13
+ /** Max retry attempts on rate limit */
14
+ export const MAX_RETRIES = 12;
15
+ /** Initial backoff (ms) */
16
+ export const INITIAL_BACKOFF_MS = 1000;
17
+ /** Maximum backoff (ms) */
18
+ export const MAX_BACKOFF_MS = 60000;
19
+ // ============================================================================
20
+ // Utility Functions
21
+ // ============================================================================
22
+ /**
23
+ * Sleep for a specified duration.
24
+ */
25
+ export function sleep(ms) {
26
+ return new Promise(resolve => setTimeout(resolve, ms));
27
+ }
28
+ /**
29
+ * Check if an error is a rate limit error (429 or quota exceeded).
30
+ */
31
+ export function isRateLimitError(error) {
32
+ if (error instanceof Error) {
33
+ const msg = error.message.toLowerCase();
34
+ return msg.includes('429') || msg.includes('rate') || msg.includes('quota');
35
+ }
36
+ return false;
37
+ }
38
+ /**
39
+ * Check if an error is a known transient API error that should be retried.
40
+ *
41
+ * GEMINI TRANSIENT BUG:
42
+ * The Gemini API has a known server-side bug where it intermittently returns
43
+ * a 400 "API key expired" error even when the key is valid. This is NOT an
44
+ * actual authentication failure - it's a transient error that resolves on retry.
45
+ *
46
+ * Evidence:
47
+ * - Users report: "if I try the same request again a few times, it usually works fine"
48
+ * - New API keys don't fix it
49
+ * - Same key works in curl but fails randomly via API clients
50
+ * - Google has acknowledged this as a P1/P2 bug
51
+ *
52
+ * GitHub issues documenting this bug:
53
+ * - https://github.com/google-gemini/gemini-cli/issues/4430
54
+ * - https://github.com/google-gemini/gemini-cli/issues/1712
55
+ * - https://github.com/google-gemini/gemini-cli/issues/8675
56
+ *
57
+ * We detect this specific error and retry it rather than failing immediately.
58
+ */
59
+ export function isTransientApiError(error) {
60
+ if (error instanceof Error) {
61
+ const msg = error.message.toLowerCase();
62
+ // Gemini transient "API key expired" bug (400 status)
63
+ // The specific message is: "API key expired. Please renew the API key."
64
+ // We check for this specific pattern to avoid retrying actual auth failures
65
+ if (msg.includes('api key expired') &&
66
+ (msg.includes('400') || msg.includes('invalid_argument'))) {
67
+ return true;
68
+ }
69
+ }
70
+ return false;
71
+ }
72
+ /**
73
+ * Check if an error should trigger a retry (rate limit OR transient error).
74
+ */
75
+ export function isRetriableError(error) {
76
+ return isRateLimitError(error) || isTransientApiError(error);
77
+ }
78
+ /**
79
+ * Execute an async function with exponential backoff retry on retriable errors.
80
+ *
81
+ * Retries on:
82
+ * - Rate limit errors (429, quota exceeded)
83
+ * - Transient API errors (e.g., Gemini's spurious "API key expired" bug)
84
+ *
85
+ * @param fn - The async function to execute
86
+ * @param callbacks - Optional callbacks for throttle notifications
87
+ * @returns The result of the function
88
+ */
89
+ export async function withRetry(fn, callbacks) {
90
+ let attempt = 0;
91
+ let backoffMs = INITIAL_BACKOFF_MS;
92
+ while (true) {
93
+ try {
94
+ const result = await fn();
95
+ // Clear throttle message on success (if was throttling)
96
+ if (attempt > 0)
97
+ callbacks?.onThrottle?.(null);
98
+ return result;
99
+ }
100
+ catch (error) {
101
+ if (isRetriableError(error) && attempt < MAX_RETRIES) {
102
+ attempt++;
103
+ const secs = Math.round(backoffMs / 1000);
104
+ // Provide context-appropriate message
105
+ const isTransient = isTransientApiError(error);
106
+ const reason = isTransient ? 'Transient API error' : 'Rate limited';
107
+ callbacks?.onThrottle?.(`${reason} - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
108
+ await sleep(backoffMs);
109
+ backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
110
+ }
111
+ else {
112
+ throw error;
113
+ }
114
+ }
115
+ }
116
+ }
117
+ /**
118
+ * Process batches with p-limit sliding window concurrency and inter-batch delay.
119
+ * Reports progress per-batch (more granular than group-based).
120
+ *
121
+ * @param batches - Array of batches to process
122
+ * @param processBatch - Function to process a single batch
123
+ * @param callbacks - Optional callbacks for progress reporting
124
+ * @returns Flattened array of results
125
+ */
126
+ export async function processBatchesWithLimit(batches, processBatch, callbacks) {
127
+ const limit = pLimit(CONCURRENCY);
128
+ let processedItems = 0;
129
+ const totalItems = batches.reduce((sum, batch) => sum + batch.length, 0);
130
+ const batchResults = await Promise.all(batches.map(batch => limit(async () => {
131
+ const result = await processBatch(batch);
132
+ // Delay before releasing the slot (rate limit protection)
133
+ await sleep(BATCH_DELAY_MS);
134
+ // Report progress per-batch
135
+ processedItems += batch.length;
136
+ callbacks?.onBatchProgress?.(processedItems, totalItems);
137
+ return result;
138
+ })));
139
+ return batchResults.flat();
140
+ }
141
+ /**
142
+ * Split an array into batches of a specified size.
143
+ */
144
+ export function chunk(array, size) {
145
+ const batches = [];
146
+ for (let i = 0; i < array.length; i += size) {
147
+ batches.push(array.slice(i, i + size));
148
+ }
149
+ return batches;
150
+ }
@@ -22,14 +22,6 @@ export declare class GeminiEmbeddingProvider implements EmbeddingProvider {
22
22
  constructor(apiKey?: string);
23
23
  initialize(_onProgress?: ModelProgressCallback): Promise<void>;
24
24
  embed(texts: string[]): Promise<number[][]>;
25
- /**
26
- * Embed a batch with exponential backoff retry on rate limit errors.
27
- */
28
- private embedBatchWithRetry;
29
- /**
30
- * Check if an error is a rate limit error (429 or quota exceeded).
31
- */
32
- private isRateLimitError;
33
25
  private embedBatch;
34
26
  embedSingle(text: string): Promise<number[]>;
35
27
  close(): void;
@@ -8,19 +8,13 @@
8
8
  *
9
9
  * Free tier available with generous limits.
10
10
  */
11
+ import { chunk, processBatchesWithLimit, withRetry, } from './api-utils.js';
11
12
  const GEMINI_API_BASE = 'https://generativelanguage.googleapis.com/v1beta/models';
12
13
  const MODEL = 'gemini-embedding-001';
13
14
  // Gemini limits: 2,048 tokens/text, 20,000 tokens/batch, 100-250 texts/batch
14
- // With avg ~1000 tokens/chunk, safe limit is 20 texts.
15
- const BATCH_SIZE = 20;
16
- // Concurrency and rate limiting
17
- const CONCURRENCY = 5; // Max concurrent API requests
18
- const MAX_RETRIES = 12; // Max retry attempts on rate limit
19
- const INITIAL_BACKOFF_MS = 1000; // Start at 1s
20
- const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
21
- function sleep(ms) {
22
- return new Promise(resolve => setTimeout(resolve, ms));
23
- }
15
+ // Chunks are ~2000 chars + context header 800-1000 tokens each
16
+ // 16 chunks × 1000 tokens = 16,000 tokens (safe margin under 20k limit)
17
+ const BATCH_SIZE = 16;
24
18
  /**
25
19
  * Gemini embedding provider.
26
20
  * Uses gemini-embedding-001 model via Google's Generative AI API.
@@ -75,66 +69,12 @@ export class GeminiEmbeddingProvider {
75
69
  if (texts.length === 0) {
76
70
  return [];
77
71
  }
78
- // Split into batches
79
- const batches = [];
80
- for (let i = 0; i < texts.length; i += BATCH_SIZE) {
81
- batches.push(texts.slice(i, i + BATCH_SIZE));
82
- }
83
- // Process batches with limited concurrency
84
- const results = [];
85
- let completed = 0;
86
- for (let i = 0; i < batches.length; i += CONCURRENCY) {
87
- const concurrentBatches = batches.slice(i, i + CONCURRENCY);
88
- // Fire concurrent requests
89
- const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
90
- // Flatten and collect results (Promise.all preserves order)
91
- for (const result of batchResults) {
92
- results.push(...result);
93
- }
94
- // Report progress after concurrent group completes
95
- completed += concurrentBatches.length;
96
- const processed = Math.min(completed * BATCH_SIZE, texts.length);
97
- this.onBatchProgress?.(processed, texts.length);
98
- }
99
- return results;
100
- }
101
- /**
102
- * Embed a batch with exponential backoff retry on rate limit errors.
103
- */
104
- async embedBatchWithRetry(batch) {
105
- let attempt = 0;
106
- let backoffMs = INITIAL_BACKOFF_MS;
107
- while (true) {
108
- try {
109
- const result = await this.embedBatch(batch);
110
- // Clear throttle message on success (if was throttling)
111
- if (attempt > 0)
112
- this.onThrottle?.(null);
113
- return result;
114
- }
115
- catch (error) {
116
- if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
117
- attempt++;
118
- const secs = Math.round(backoffMs / 1000);
119
- this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
120
- await sleep(backoffMs);
121
- backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
122
- }
123
- else {
124
- throw error;
125
- }
126
- }
127
- }
128
- }
129
- /**
130
- * Check if an error is a rate limit error (429 or quota exceeded).
131
- */
132
- isRateLimitError(error) {
133
- if (error instanceof Error) {
134
- const msg = error.message.toLowerCase();
135
- return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
136
- }
137
- return false;
72
+ const batches = chunk(texts, BATCH_SIZE);
73
+ const callbacks = {
74
+ onThrottle: this.onThrottle,
75
+ onBatchProgress: this.onBatchProgress,
76
+ };
77
+ return processBatchesWithLimit(batches, batch => withRetry(() => this.embedBatch(batch), callbacks), callbacks);
138
78
  }
139
79
  async embedBatch(texts) {
140
80
  const url = `${GEMINI_API_BASE}/${MODEL}:batchEmbedContents`;
@@ -4,8 +4,10 @@
4
4
  */
5
5
  export { GeminiEmbeddingProvider } from './gemini.js';
6
6
  export { Local4BEmbeddingProvider } from './local-4b.js';
7
- export { LocalEmbeddingProvider } from './local.js';
7
+ export { LocalEmbeddingProvider, clearCachedPipeline } from './local.js';
8
8
  export { MistralEmbeddingProvider } from './mistral.js';
9
+ export { MockEmbeddingProvider } from './mock.js';
9
10
  export { OpenAIEmbeddingProvider } from './openai.js';
10
11
  export { validateApiKey, type ValidationResult } from './validate.js';
11
12
  export type { EmbeddingProvider, ModelProgressCallback } from './types.js';
13
+ export { CONCURRENCY, BATCH_DELAY_MS, MAX_RETRIES, INITIAL_BACKOFF_MS, MAX_BACKOFF_MS, sleep, isRateLimitError, isTransientApiError, isRetriableError, withRetry, processBatchesWithLimit, chunk, type ApiProviderCallbacks, } from './api-utils.js';
@@ -4,7 +4,10 @@
4
4
  */
5
5
  export { GeminiEmbeddingProvider } from './gemini.js';
6
6
  export { Local4BEmbeddingProvider } from './local-4b.js';
7
- export { LocalEmbeddingProvider } from './local.js';
7
+ export { LocalEmbeddingProvider, clearCachedPipeline } from './local.js';
8
8
  export { MistralEmbeddingProvider } from './mistral.js';
9
+ export { MockEmbeddingProvider } from './mock.js';
9
10
  export { OpenAIEmbeddingProvider } from './openai.js';
10
11
  export { validateApiKey } from './validate.js';
12
+ // Shared utilities for API-based providers
13
+ export { CONCURRENCY, BATCH_DELAY_MS, MAX_RETRIES, INITIAL_BACKOFF_MS, MAX_BACKOFF_MS, sleep, isRateLimitError, isTransientApiError, isRetriableError, withRetry, processBatchesWithLimit, chunk, } from './api-utils.js';
@@ -14,14 +14,19 @@
14
14
  * - Data never leaves your machine
15
15
  */
16
16
  import type { EmbeddingProvider, ModelProgressCallback } from './types.js';
17
+ /**
18
+ * Clear the cached pipeline.
19
+ * Useful for tests that need to reset state between runs.
20
+ */
21
+ export declare function clearCachedPipeline(): void;
17
22
  /**
18
23
  * Local embedding provider using Qwen3-Embedding-0.6B Q8.
19
24
  */
20
25
  export declare class LocalEmbeddingProvider implements EmbeddingProvider {
21
26
  readonly dimensions = 1024;
22
- private extractor;
23
27
  private initialized;
24
28
  initialize(onProgress?: ModelProgressCallback): Promise<void>;
29
+ private loadModel;
25
30
  embed(texts: string[]): Promise<number[][]>;
26
31
  private embedBatch;
27
32
  embedSingle(text: string): Promise<number[]>;
@@ -17,6 +17,19 @@ import { pipeline } from '@huggingface/transformers';
17
17
  const MODEL_NAME = 'onnx-community/Qwen3-Embedding-0.6B-ONNX';
18
18
  const DIMENSIONS = 1024;
19
19
  const BATCH_SIZE = 8;
20
+ // Module-level cache for the ONNX pipeline
21
+ // Shared across all LocalEmbeddingProvider instances to avoid reloading the model
22
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any -- HuggingFace pipeline type is too complex
23
+ let cachedExtractor = null;
24
+ let initPromise = null;
25
+ /**
26
+ * Clear the cached pipeline.
27
+ * Useful for tests that need to reset state between runs.
28
+ */
29
+ export function clearCachedPipeline() {
30
+ cachedExtractor = null;
31
+ initPromise = null;
32
+ }
20
33
  /**
21
34
  * Local embedding provider using Qwen3-Embedding-0.6B Q8.
22
35
  */
@@ -28,13 +41,6 @@ export class LocalEmbeddingProvider {
28
41
  writable: true,
29
42
  value: DIMENSIONS
30
43
  });
31
- // eslint-disable-next-line @typescript-eslint/no-explicit-any -- HuggingFace pipeline type is too complex
32
- Object.defineProperty(this, "extractor", {
33
- enumerable: true,
34
- configurable: true,
35
- writable: true,
36
- value: null
37
- });
38
44
  Object.defineProperty(this, "initialized", {
39
45
  enumerable: true,
40
46
  configurable: true,
@@ -45,6 +51,33 @@ export class LocalEmbeddingProvider {
45
51
  async initialize(onProgress) {
46
52
  if (this.initialized)
47
53
  return;
54
+ // Reuse cached model if available
55
+ if (cachedExtractor) {
56
+ this.initialized = true;
57
+ onProgress?.('ready');
58
+ return;
59
+ }
60
+ // If another instance is already loading, wait for it
61
+ if (initPromise) {
62
+ await initPromise;
63
+ this.initialized = true;
64
+ onProgress?.('ready');
65
+ return;
66
+ }
67
+ // First load - this instance will load the model and cache it
68
+ initPromise = this.loadModel(onProgress);
69
+ try {
70
+ await initPromise;
71
+ this.initialized = true;
72
+ }
73
+ catch (error) {
74
+ // Clear the cached promise so future calls can retry
75
+ // (e.g., after network recovery or freeing memory)
76
+ initPromise = null;
77
+ throw error;
78
+ }
79
+ }
80
+ async loadModel(onProgress) {
48
81
  // Track download progress for the model files
49
82
  let lastProgress = 0;
50
83
  const progressCallback = onProgress
@@ -67,12 +100,11 @@ export class LocalEmbeddingProvider {
67
100
  onProgress?.('loading');
68
101
  // Load the model with q8 (int8) quantization for smaller size and faster inference
69
102
  // First load will download the model (~700MB)
70
- this.extractor = await pipeline('feature-extraction', MODEL_NAME, {
103
+ cachedExtractor = await pipeline('feature-extraction', MODEL_NAME, {
71
104
  dtype: 'q8', // int8 quantization
72
105
  progress_callback: progressCallback,
73
106
  });
74
107
  onProgress?.('ready');
75
- this.initialized = true;
76
108
  }
77
109
  async embed(texts) {
78
110
  if (!this.initialized) {
@@ -93,7 +125,7 @@ export class LocalEmbeddingProvider {
93
125
  async embedBatch(texts) {
94
126
  const results = [];
95
127
  for (const text of texts) {
96
- const output = await this.extractor(text, {
128
+ const output = await cachedExtractor(text, {
97
129
  pooling: 'mean',
98
130
  normalize: true,
99
131
  });
@@ -107,14 +139,15 @@ export class LocalEmbeddingProvider {
107
139
  if (!this.initialized) {
108
140
  await this.initialize();
109
141
  }
110
- const output = await this.extractor(text, {
142
+ const output = await cachedExtractor(text, {
111
143
  pooling: 'mean',
112
144
  normalize: true,
113
145
  });
114
146
  return Array.from(output.data);
115
147
  }
116
148
  close() {
117
- this.extractor = null;
149
+ // Mark this instance as uninitialized, but don't clear the cached model
150
+ // Other instances may still be using it
118
151
  this.initialized = false;
119
152
  }
120
153
  }
@@ -18,14 +18,6 @@ export declare class MistralEmbeddingProvider implements EmbeddingProvider {
18
18
  constructor(apiKey?: string);
19
19
  initialize(_onProgress?: ModelProgressCallback): Promise<void>;
20
20
  embed(texts: string[]): Promise<number[][]>;
21
- /**
22
- * Embed a batch with exponential backoff retry on rate limit errors.
23
- */
24
- private embedBatchWithRetry;
25
- /**
26
- * Check if an error is a rate limit error (429 or quota exceeded).
27
- */
28
- private isRateLimitError;
29
21
  private embedBatch;
30
22
  embedSingle(text: string): Promise<number[]>;
31
23
  close(): void;
@@ -4,19 +4,13 @@
4
4
  * Uses codestral-embed model (1536 dimensions).
5
5
  * Optimized for code and technical content.
6
6
  */
7
+ import { chunk, processBatchesWithLimit, withRetry, } from './api-utils.js';
7
8
  const MISTRAL_API_BASE = 'https://api.mistral.ai/v1';
8
9
  const MODEL = 'codestral-embed';
9
10
  // Mistral limits: 8,192 tokens/text, 16,000 tokens/batch TOTAL
10
- // With avg ~500 tokens/chunk, can fit ~32. Use 24 for safety margin.
11
- const BATCH_SIZE = 24;
12
- // Concurrency and rate limiting
13
- const CONCURRENCY = 5; // Max concurrent API requests
14
- const MAX_RETRIES = 12; // Max retry attempts on rate limit
15
- const INITIAL_BACKOFF_MS = 1000; // Start at 1s
16
- const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
17
- function sleep(ms) {
18
- return new Promise(resolve => setTimeout(resolve, ms));
19
- }
11
+ // Chunks are ~2000 chars + context header 800-1000 tokens each
12
+ // 12 chunks × 1000 tokens = 12,000 tokens (safe margin under 16k limit)
13
+ const BATCH_SIZE = 12;
20
14
  /**
21
15
  * Mistral embedding provider.
22
16
  * Uses codestral-embed model via Mistral AI API.
@@ -71,66 +65,12 @@ export class MistralEmbeddingProvider {
71
65
  if (texts.length === 0) {
72
66
  return [];
73
67
  }
74
- // Split into batches
75
- const batches = [];
76
- for (let i = 0; i < texts.length; i += BATCH_SIZE) {
77
- batches.push(texts.slice(i, i + BATCH_SIZE));
78
- }
79
- // Process batches with limited concurrency
80
- const results = [];
81
- let completed = 0;
82
- for (let i = 0; i < batches.length; i += CONCURRENCY) {
83
- const concurrentBatches = batches.slice(i, i + CONCURRENCY);
84
- // Fire concurrent requests
85
- const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
86
- // Flatten and collect results (Promise.all preserves order)
87
- for (const result of batchResults) {
88
- results.push(...result);
89
- }
90
- // Report progress after concurrent group completes
91
- completed += concurrentBatches.length;
92
- const processed = Math.min(completed * BATCH_SIZE, texts.length);
93
- this.onBatchProgress?.(processed, texts.length);
94
- }
95
- return results;
96
- }
97
- /**
98
- * Embed a batch with exponential backoff retry on rate limit errors.
99
- */
100
- async embedBatchWithRetry(batch) {
101
- let attempt = 0;
102
- let backoffMs = INITIAL_BACKOFF_MS;
103
- while (true) {
104
- try {
105
- const result = await this.embedBatch(batch);
106
- // Clear throttle message on success (if was throttling)
107
- if (attempt > 0)
108
- this.onThrottle?.(null);
109
- return result;
110
- }
111
- catch (error) {
112
- if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
113
- attempt++;
114
- const secs = Math.round(backoffMs / 1000);
115
- this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
116
- await sleep(backoffMs);
117
- backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
118
- }
119
- else {
120
- throw error;
121
- }
122
- }
123
- }
124
- }
125
- /**
126
- * Check if an error is a rate limit error (429 or quota exceeded).
127
- */
128
- isRateLimitError(error) {
129
- if (error instanceof Error) {
130
- const msg = error.message.toLowerCase();
131
- return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
132
- }
133
- return false;
68
+ const batches = chunk(texts, BATCH_SIZE);
69
+ const callbacks = {
70
+ onThrottle: this.onThrottle,
71
+ onBatchProgress: this.onBatchProgress,
72
+ };
73
+ return processBatchesWithLimit(batches, batch => withRetry(() => this.embedBatch(batch), callbacks), callbacks);
134
74
  }
135
75
  async embedBatch(texts) {
136
76
  const response = await fetch(`${MISTRAL_API_BASE}/embeddings`, {
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Mock embedding provider for testing.
3
+ *
4
+ * Generates deterministic hash-based embeddings that:
5
+ * - Run instantly (no model loading)
6
+ * - Are deterministic (same input = same output)
7
+ * - Normalized to unit length
8
+ * - Support any dimension count
9
+ *
10
+ * Usage:
11
+ * - Unit tests that need embeddings but don't need semantic quality
12
+ * - Testing search infrastructure without ONNX overhead
13
+ * - CI pipeline fast checks
14
+ */
15
+ import type { EmbeddingProvider, ModelProgressCallback } from './types.js';
16
+ /**
17
+ * Mock embedding provider using deterministic hash-based vectors.
18
+ */
19
+ export declare class MockEmbeddingProvider implements EmbeddingProvider {
20
+ readonly dimensions: number;
21
+ constructor(dimensions?: number);
22
+ initialize(_onProgress?: ModelProgressCallback): Promise<void>;
23
+ embed(texts: string[]): Promise<number[][]>;
24
+ embedSingle(text: string): Promise<number[]>;
25
+ /**
26
+ * Convert text to a deterministic unit vector.
27
+ * Uses a simple hash-based approach to generate pseudo-random but repeatable values.
28
+ */
29
+ private hashToVector;
30
+ /**
31
+ * Simple string hash function (djb2).
32
+ */
33
+ private hash;
34
+ close(): void;
35
+ }
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Mock embedding provider for testing.
3
+ *
4
+ * Generates deterministic hash-based embeddings that:
5
+ * - Run instantly (no model loading)
6
+ * - Are deterministic (same input = same output)
7
+ * - Normalized to unit length
8
+ * - Support any dimension count
9
+ *
10
+ * Usage:
11
+ * - Unit tests that need embeddings but don't need semantic quality
12
+ * - Testing search infrastructure without ONNX overhead
13
+ * - CI pipeline fast checks
14
+ */
15
+ const DEFAULT_DIMENSIONS = 1024;
16
+ /**
17
+ * Mock embedding provider using deterministic hash-based vectors.
18
+ */
19
+ export class MockEmbeddingProvider {
20
+ constructor(dimensions = DEFAULT_DIMENSIONS) {
21
+ Object.defineProperty(this, "dimensions", {
22
+ enumerable: true,
23
+ configurable: true,
24
+ writable: true,
25
+ value: void 0
26
+ });
27
+ this.dimensions = dimensions;
28
+ }
29
+ async initialize(_onProgress) {
30
+ // No initialization needed - instant startup
31
+ }
32
+ async embed(texts) {
33
+ return texts.map(t => this.hashToVector(t));
34
+ }
35
+ async embedSingle(text) {
36
+ return this.hashToVector(text);
37
+ }
38
+ /**
39
+ * Convert text to a deterministic unit vector.
40
+ * Uses a simple hash-based approach to generate pseudo-random but repeatable values.
41
+ */
42
+ hashToVector(text) {
43
+ const seed = this.hash(text);
44
+ // Generate deterministic pseudo-random values
45
+ const vec = new Array(this.dimensions).fill(0).map((_, i) => {
46
+ // LCG-like pseudo-random based on seed and index
47
+ const state = (((seed * (i + 1) * 1103515245 + 12345) >>> 0) % 0x7fffffff) /
48
+ 0x7fffffff;
49
+ return state * 2 - 1; // Range [-1, 1]
50
+ });
51
+ // Normalize to unit length
52
+ const magnitude = Math.sqrt(vec.reduce((sum, v) => sum + v * v, 0));
53
+ return vec.map(v => (magnitude > 0 ? v / magnitude : 0));
54
+ }
55
+ /**
56
+ * Simple string hash function (djb2).
57
+ */
58
+ hash(str) {
59
+ let h = 5381;
60
+ for (let i = 0; i < str.length; i++) {
61
+ h = (h * 33) ^ str.charCodeAt(i);
62
+ h = h >>> 0; // Convert to unsigned 32-bit
63
+ }
64
+ return h;
65
+ }
66
+ close() {
67
+ // Nothing to close
68
+ }
69
+ }
@@ -8,24 +8,22 @@ import type { EmbeddingProvider, ModelProgressCallback } from './types.js';
8
8
  /**
9
9
  * OpenAI embedding provider.
10
10
  * Uses text-embedding-3-small model via OpenAI API.
11
+ *
12
+ * Supports regional endpoints for corporate accounts with data residency:
13
+ * - Default: https://api.openai.com/v1
14
+ * - US: https://us.api.openai.com/v1
15
+ * - EU: https://eu.api.openai.com/v1
11
16
  */
12
17
  export declare class OpenAIEmbeddingProvider implements EmbeddingProvider {
13
18
  readonly dimensions = 1536;
14
19
  private apiKey;
20
+ private apiBase;
15
21
  private initialized;
16
22
  onThrottle?: (message: string | null) => void;
17
23
  onBatchProgress?: (processed: number, total: number) => void;
18
- constructor(apiKey?: string);
24
+ constructor(apiKey?: string, baseUrl?: string);
19
25
  initialize(_onProgress?: ModelProgressCallback): Promise<void>;
20
26
  embed(texts: string[]): Promise<number[][]>;
21
- /**
22
- * Embed a batch with exponential backoff retry on rate limit errors.
23
- */
24
- private embedBatchWithRetry;
25
- /**
26
- * Check if an error is a rate limit error (429 or quota exceeded).
27
- */
28
- private isRateLimitError;
29
27
  private embedBatch;
30
28
  embedSingle(text: string): Promise<number[]>;
31
29
  close(): void;
@@ -4,25 +4,24 @@
4
4
  * Uses text-embedding-3-small model (1536 dimensions).
5
5
  * Good quality with fast API responses and low cost ($0.02/1M tokens).
6
6
  */
7
- const OPENAI_API_BASE = 'https://api.openai.com/v1';
7
+ import { chunk, processBatchesWithLimit, withRetry, } from './api-utils.js';
8
+ const DEFAULT_API_BASE = 'https://api.openai.com/v1';
8
9
  const MODEL = 'text-embedding-3-small';
9
10
  // OpenAI limits: 8,191 tokens/text, 300,000 tokens/batch, 2,048 texts/batch
10
- // With avg ~1000 tokens/chunk, safe limit is 300 texts. Use 256 for margin.
11
- const BATCH_SIZE = 256;
12
- // Concurrency and rate limiting
13
- const CONCURRENCY = 5; // Max concurrent API requests
14
- const MAX_RETRIES = 12; // Max retry attempts on rate limit
15
- const INITIAL_BACKOFF_MS = 1000; // Start at 1s
16
- const MAX_BACKOFF_MS = 60000; // Cap at 60s (1 min)
17
- function sleep(ms) {
18
- return new Promise(resolve => setTimeout(resolve, ms));
19
- }
11
+ // Chunks are ~2000 chars + context header 800-1000 tokens each
12
+ // 200 chunks × 1000 tokens = 200,000 tokens (safe margin under 300k limit)
13
+ const BATCH_SIZE = 200;
20
14
  /**
21
15
  * OpenAI embedding provider.
22
16
  * Uses text-embedding-3-small model via OpenAI API.
17
+ *
18
+ * Supports regional endpoints for corporate accounts with data residency:
19
+ * - Default: https://api.openai.com/v1
20
+ * - US: https://us.api.openai.com/v1
21
+ * - EU: https://eu.api.openai.com/v1
23
22
  */
24
23
  export class OpenAIEmbeddingProvider {
25
- constructor(apiKey) {
24
+ constructor(apiKey, baseUrl) {
26
25
  Object.defineProperty(this, "dimensions", {
27
26
  enumerable: true,
28
27
  configurable: true,
@@ -35,6 +34,12 @@ export class OpenAIEmbeddingProvider {
35
34
  writable: true,
36
35
  value: void 0
37
36
  });
37
+ Object.defineProperty(this, "apiBase", {
38
+ enumerable: true,
39
+ configurable: true,
40
+ writable: true,
41
+ value: void 0
42
+ });
38
43
  Object.defineProperty(this, "initialized", {
39
44
  enumerable: true,
40
45
  configurable: true,
@@ -57,6 +62,7 @@ export class OpenAIEmbeddingProvider {
57
62
  });
58
63
  // Trim the key to remove any accidental whitespace
59
64
  this.apiKey = (apiKey ?? '').trim();
65
+ this.apiBase = baseUrl ?? DEFAULT_API_BASE;
60
66
  }
61
67
  async initialize(_onProgress) {
62
68
  if (!this.apiKey) {
@@ -75,69 +81,15 @@ export class OpenAIEmbeddingProvider {
75
81
  if (texts.length === 0) {
76
82
  return [];
77
83
  }
78
- // Split into batches
79
- const batches = [];
80
- for (let i = 0; i < texts.length; i += BATCH_SIZE) {
81
- batches.push(texts.slice(i, i + BATCH_SIZE));
82
- }
83
- // Process batches with limited concurrency
84
- const results = [];
85
- let completed = 0;
86
- for (let i = 0; i < batches.length; i += CONCURRENCY) {
87
- const concurrentBatches = batches.slice(i, i + CONCURRENCY);
88
- // Fire concurrent requests
89
- const batchResults = await Promise.all(concurrentBatches.map(batch => this.embedBatchWithRetry(batch)));
90
- // Flatten and collect results (Promise.all preserves order)
91
- for (const result of batchResults) {
92
- results.push(...result);
93
- }
94
- // Report progress after concurrent group completes
95
- completed += concurrentBatches.length;
96
- const processed = Math.min(completed * BATCH_SIZE, texts.length);
97
- this.onBatchProgress?.(processed, texts.length);
98
- }
99
- return results;
100
- }
101
- /**
102
- * Embed a batch with exponential backoff retry on rate limit errors.
103
- */
104
- async embedBatchWithRetry(batch) {
105
- let attempt = 0;
106
- let backoffMs = INITIAL_BACKOFF_MS;
107
- while (true) {
108
- try {
109
- const result = await this.embedBatch(batch);
110
- // Clear throttle message on success (if was throttling)
111
- if (attempt > 0)
112
- this.onThrottle?.(null);
113
- return result;
114
- }
115
- catch (error) {
116
- if (this.isRateLimitError(error) && attempt < MAX_RETRIES) {
117
- attempt++;
118
- const secs = Math.round(backoffMs / 1000);
119
- this.onThrottle?.(`Rate limited - retry ${attempt}/${MAX_RETRIES} in ${secs}s`);
120
- await sleep(backoffMs);
121
- backoffMs = Math.min(backoffMs * 2, MAX_BACKOFF_MS);
122
- }
123
- else {
124
- throw error;
125
- }
126
- }
127
- }
128
- }
129
- /**
130
- * Check if an error is a rate limit error (429 or quota exceeded).
131
- */
132
- isRateLimitError(error) {
133
- if (error instanceof Error) {
134
- const msg = error.message.toLowerCase();
135
- return (msg.includes('429') || msg.includes('rate') || msg.includes('quota'));
136
- }
137
- return false;
84
+ const batches = chunk(texts, BATCH_SIZE);
85
+ const callbacks = {
86
+ onThrottle: this.onThrottle,
87
+ onBatchProgress: this.onBatchProgress,
88
+ };
89
+ return processBatchesWithLimit(batches, batch => withRetry(() => this.embedBatch(batch), callbacks), callbacks);
138
90
  }
139
91
  async embedBatch(texts) {
140
- const response = await fetch(`${OPENAI_API_BASE}/embeddings`, {
92
+ const response = await fetch(`${this.apiBase}/embeddings`, {
141
93
  method: 'POST',
142
94
  headers: {
143
95
  'Content-Type': 'application/json',
@@ -285,7 +285,8 @@ export class Indexer {
285
285
  // Track chunks processed for progress updates
286
286
  let lastReportedChunks = 0;
287
287
  // Wire batch progress callback to report incremental chunks
288
- if (progressContext?.onChunksProcessed && 'onBatchProgress' in embeddings) {
288
+ if (progressContext?.onChunksProcessed &&
289
+ 'onBatchProgress' in embeddings) {
289
290
  embeddings.onBatchProgress = (processed, _total) => {
290
291
  // Report only the delta since last update
291
292
  const delta = processed - lastReportedChunks;
@@ -414,7 +415,7 @@ export class Indexer {
414
415
  case 'mistral':
415
416
  return new MistralEmbeddingProvider(apiKey);
416
417
  case 'openai':
417
- return new OpenAIEmbeddingProvider(apiKey);
418
+ return new OpenAIEmbeddingProvider(apiKey, config.openaiBaseUrl);
418
419
  default:
419
420
  throw new Error(`Unknown embedding provider: ${config.embeddingProvider}`);
420
421
  }
@@ -362,7 +362,7 @@ export class SearchEngine {
362
362
  case 'mistral':
363
363
  return new MistralEmbeddingProvider(apiKey);
364
364
  case 'openai':
365
- return new OpenAIEmbeddingProvider(apiKey);
365
+ return new OpenAIEmbeddingProvider(apiKey, config.openaiBaseUrl);
366
366
  default:
367
367
  throw new Error(`Unknown embedding provider: ${config.embeddingProvider}`);
368
368
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "viberag",
3
- "version": "0.3.2",
3
+ "version": "0.3.3",
4
4
  "description": "Local code RAG for AI coding assistants - semantic search via MCP server",
5
5
  "license": "AGPL-3.0",
6
6
  "keywords": [
@@ -38,6 +38,8 @@
38
38
  "build": "tsc",
39
39
  "dev": "tsc --watch",
40
40
  "test": "prettier --check . && eslint . && vitest run",
41
+ "test:fast": "vitest run --project=fast",
42
+ "test:rag": "vitest run --project=rag",
41
43
  "test:smoke": "vitest run --testNamePattern='Grammar Smoke'",
42
44
  "lint": "eslint .",
43
45
  "lint:fix": "eslint . --fix",
@@ -62,6 +64,7 @@
62
64
  ],
63
65
  "dependencies": {
64
66
  "@huggingface/transformers": "^3.8.1",
67
+ "p-limit": "^6.2.0",
65
68
  "@lancedb/lancedb": "^0.23.0",
66
69
  "apache-arrow": "^18.1.0",
67
70
  "chalk": "^5.6.2",