viberag 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (209) hide show
  1. package/README.md +2 -2
  2. package/dist/cli/app.d.ts +3 -0
  3. package/dist/cli/app.js +100 -102
  4. package/dist/cli/commands/handlers.d.ts +8 -6
  5. package/dist/cli/commands/handlers.js +90 -32
  6. package/dist/cli/commands/useCommands.d.ts +20 -0
  7. package/dist/cli/commands/useCommands.js +189 -0
  8. package/dist/cli/commands/useRagCommands.d.ts +2 -5
  9. package/dist/cli/commands/useRagCommands.js +11 -18
  10. package/dist/cli/components/InitWizard.js +66 -27
  11. package/dist/cli/components/McpSetupWizard.js +23 -4
  12. package/dist/cli/components/SlotRow.d.ts +22 -0
  13. package/dist/cli/components/SlotRow.js +55 -0
  14. package/dist/cli/components/StatusBar.d.ts +14 -0
  15. package/dist/cli/components/StatusBar.js +156 -0
  16. package/dist/cli/contexts/DaemonStatusContext.d.ts +38 -0
  17. package/dist/cli/contexts/DaemonStatusContext.js +106 -0
  18. package/dist/cli/hooks/useStatusPolling.d.ts +34 -0
  19. package/dist/cli/hooks/useStatusPolling.js +121 -0
  20. package/dist/cli/store/app/selectors.d.ts +87 -0
  21. package/dist/cli/store/app/selectors.js +28 -0
  22. package/dist/cli/store/app/slice.d.ts +1013 -0
  23. package/dist/cli/store/app/slice.js +112 -0
  24. package/dist/cli/store/hooks.d.ts +22 -0
  25. package/dist/cli/store/hooks.js +17 -0
  26. package/dist/cli/store/store.d.ts +17 -0
  27. package/dist/cli/store/store.js +18 -0
  28. package/dist/cli/store/wizard/selectors.d.ts +115 -0
  29. package/dist/cli/store/wizard/selectors.js +36 -0
  30. package/dist/cli/store/wizard/slice.d.ts +523 -0
  31. package/dist/cli/store/wizard/slice.js +119 -0
  32. package/dist/cli/utils/error-handler.d.ts +55 -0
  33. package/dist/cli/utils/error-handler.js +92 -0
  34. package/dist/client/auto-start.d.ts +42 -0
  35. package/dist/client/auto-start.js +250 -0
  36. package/dist/client/connection.d.ts +48 -0
  37. package/dist/client/connection.js +200 -0
  38. package/dist/client/index.d.ts +93 -0
  39. package/dist/client/index.js +209 -0
  40. package/dist/client/types.d.ts +105 -0
  41. package/dist/client/types.js +7 -0
  42. package/dist/common/components/SlotRow.d.ts +22 -0
  43. package/dist/common/components/SlotRow.js +53 -0
  44. package/dist/common/components/StatusBar.js +82 -31
  45. package/dist/common/types.d.ts +12 -13
  46. package/dist/daemon/handlers.d.ts +15 -0
  47. package/dist/daemon/handlers.js +157 -0
  48. package/dist/daemon/index.d.ts +21 -0
  49. package/dist/daemon/index.js +123 -0
  50. package/dist/daemon/lib/chunker/bounded-channel.d.ts +51 -0
  51. package/dist/daemon/lib/chunker/bounded-channel.js +138 -0
  52. package/dist/daemon/lib/chunker/index.d.ts +135 -0
  53. package/dist/daemon/lib/chunker/index.js +1370 -0
  54. package/dist/daemon/lib/chunker/types.d.ts +77 -0
  55. package/dist/daemon/lib/chunker/types.js +50 -0
  56. package/dist/daemon/lib/config.d.ts +73 -0
  57. package/dist/daemon/lib/config.js +149 -0
  58. package/dist/daemon/lib/constants.d.ts +75 -0
  59. package/dist/daemon/lib/constants.js +114 -0
  60. package/dist/daemon/lib/gitignore.d.ts +57 -0
  61. package/dist/daemon/lib/gitignore.js +246 -0
  62. package/dist/daemon/lib/logger.d.ts +51 -0
  63. package/dist/daemon/lib/logger.js +167 -0
  64. package/dist/daemon/lib/manifest.d.ts +58 -0
  65. package/dist/daemon/lib/manifest.js +116 -0
  66. package/dist/daemon/lib/merkle/diff.d.ts +32 -0
  67. package/dist/daemon/lib/merkle/diff.js +107 -0
  68. package/dist/daemon/lib/merkle/hash.d.ts +40 -0
  69. package/dist/daemon/lib/merkle/hash.js +180 -0
  70. package/dist/daemon/lib/merkle/index.d.ts +71 -0
  71. package/dist/daemon/lib/merkle/index.js +309 -0
  72. package/dist/daemon/lib/merkle/node.d.ts +55 -0
  73. package/dist/daemon/lib/merkle/node.js +82 -0
  74. package/dist/daemon/lifecycle.d.ts +50 -0
  75. package/dist/daemon/lifecycle.js +142 -0
  76. package/dist/daemon/owner.d.ts +175 -0
  77. package/dist/daemon/owner.js +609 -0
  78. package/dist/daemon/protocol.d.ts +100 -0
  79. package/dist/daemon/protocol.js +163 -0
  80. package/dist/daemon/providers/api-utils.d.ts +130 -0
  81. package/dist/daemon/providers/api-utils.js +248 -0
  82. package/dist/daemon/providers/gemini.d.ts +39 -0
  83. package/dist/daemon/providers/gemini.js +205 -0
  84. package/dist/daemon/providers/index.d.ts +14 -0
  85. package/dist/daemon/providers/index.js +14 -0
  86. package/dist/daemon/providers/local-4b.d.ts +28 -0
  87. package/dist/daemon/providers/local-4b.js +51 -0
  88. package/dist/daemon/providers/local.d.ts +36 -0
  89. package/dist/daemon/providers/local.js +166 -0
  90. package/dist/daemon/providers/mistral.d.ts +35 -0
  91. package/dist/daemon/providers/mistral.js +160 -0
  92. package/dist/daemon/providers/mock.d.ts +35 -0
  93. package/dist/daemon/providers/mock.js +69 -0
  94. package/dist/daemon/providers/openai.d.ts +41 -0
  95. package/dist/daemon/providers/openai.js +190 -0
  96. package/dist/daemon/providers/types.d.ts +68 -0
  97. package/dist/daemon/providers/types.js +6 -0
  98. package/dist/daemon/providers/validate.d.ts +30 -0
  99. package/dist/daemon/providers/validate.js +162 -0
  100. package/dist/daemon/server.d.ts +79 -0
  101. package/dist/daemon/server.js +293 -0
  102. package/dist/daemon/services/index.d.ts +11 -0
  103. package/dist/daemon/services/index.js +16 -0
  104. package/dist/daemon/services/indexing.d.ts +117 -0
  105. package/dist/daemon/services/indexing.js +573 -0
  106. package/dist/daemon/services/search/filters.d.ts +21 -0
  107. package/dist/daemon/services/search/filters.js +106 -0
  108. package/dist/daemon/services/search/fts.d.ts +32 -0
  109. package/dist/daemon/services/search/fts.js +61 -0
  110. package/dist/daemon/services/search/hybrid.d.ts +17 -0
  111. package/dist/daemon/services/search/hybrid.js +58 -0
  112. package/dist/daemon/services/search/index.d.ts +108 -0
  113. package/dist/daemon/services/search/index.js +417 -0
  114. package/dist/daemon/services/search/types.d.ts +126 -0
  115. package/dist/daemon/services/search/types.js +4 -0
  116. package/dist/daemon/services/search/vector.d.ts +25 -0
  117. package/dist/daemon/services/search/vector.js +44 -0
  118. package/dist/daemon/services/storage/index.d.ts +110 -0
  119. package/dist/daemon/services/storage/index.js +378 -0
  120. package/dist/daemon/services/storage/schema.d.ts +24 -0
  121. package/dist/daemon/services/storage/schema.js +51 -0
  122. package/dist/daemon/services/storage/types.d.ts +105 -0
  123. package/dist/daemon/services/storage/types.js +71 -0
  124. package/dist/daemon/services/types.d.ts +192 -0
  125. package/dist/daemon/services/types.js +53 -0
  126. package/dist/daemon/services/watcher.d.ts +98 -0
  127. package/dist/daemon/services/watcher.js +386 -0
  128. package/dist/daemon/state.d.ts +119 -0
  129. package/dist/daemon/state.js +161 -0
  130. package/dist/mcp/index.d.ts +1 -1
  131. package/dist/mcp/index.js +44 -60
  132. package/dist/mcp/server.d.ts +10 -14
  133. package/dist/mcp/server.js +75 -74
  134. package/dist/mcp/services/lazy-loader.d.ts +23 -0
  135. package/dist/mcp/services/lazy-loader.js +34 -0
  136. package/dist/mcp/warmup.d.ts +3 -3
  137. package/dist/mcp/warmup.js +39 -40
  138. package/dist/mcp/watcher.d.ts +5 -7
  139. package/dist/mcp/watcher.js +73 -64
  140. package/dist/rag/config/index.d.ts +2 -0
  141. package/dist/rag/constants.d.ts +30 -0
  142. package/dist/rag/constants.js +38 -0
  143. package/dist/rag/embeddings/api-utils.d.ts +121 -0
  144. package/dist/rag/embeddings/api-utils.js +259 -0
  145. package/dist/rag/embeddings/gemini.d.ts +4 -12
  146. package/dist/rag/embeddings/gemini.js +22 -72
  147. package/dist/rag/embeddings/index.d.ts +5 -3
  148. package/dist/rag/embeddings/index.js +5 -2
  149. package/dist/rag/embeddings/local-4b.d.ts +2 -2
  150. package/dist/rag/embeddings/local-4b.js +1 -1
  151. package/dist/rag/embeddings/local.d.ts +10 -3
  152. package/dist/rag/embeddings/local.js +58 -12
  153. package/dist/rag/embeddings/mistral.d.ts +4 -12
  154. package/dist/rag/embeddings/mistral.js +22 -72
  155. package/dist/rag/embeddings/mock.d.ts +35 -0
  156. package/dist/rag/embeddings/mock.js +69 -0
  157. package/dist/rag/embeddings/openai.d.ts +11 -13
  158. package/dist/rag/embeddings/openai.js +47 -75
  159. package/dist/rag/embeddings/types.d.ts +27 -1
  160. package/dist/rag/embeddings/validate.d.ts +9 -1
  161. package/dist/rag/embeddings/validate.js +17 -4
  162. package/dist/rag/index.d.ts +2 -2
  163. package/dist/rag/index.js +1 -1
  164. package/dist/rag/indexer/bounded-channel.d.ts +51 -0
  165. package/dist/rag/indexer/bounded-channel.js +138 -0
  166. package/dist/rag/indexer/indexer.d.ts +4 -14
  167. package/dist/rag/indexer/indexer.js +246 -169
  168. package/dist/rag/indexer/types.d.ts +1 -0
  169. package/dist/rag/logger/index.d.ts +22 -0
  170. package/dist/rag/logger/index.js +78 -1
  171. package/dist/rag/manifest/index.js +1 -2
  172. package/dist/rag/search/index.js +1 -1
  173. package/dist/rag/storage/schema.d.ts +2 -4
  174. package/dist/rag/storage/schema.js +3 -5
  175. package/dist/store/app/selectors.d.ts +87 -0
  176. package/dist/store/app/selectors.js +28 -0
  177. package/dist/store/app/slice.d.ts +1013 -0
  178. package/dist/store/app/slice.js +112 -0
  179. package/dist/store/hooks.d.ts +22 -0
  180. package/dist/store/hooks.js +17 -0
  181. package/dist/store/index.d.ts +12 -0
  182. package/dist/store/index.js +18 -0
  183. package/dist/store/indexing/listeners.d.ts +25 -0
  184. package/dist/store/indexing/listeners.js +46 -0
  185. package/dist/store/indexing/selectors.d.ts +195 -0
  186. package/dist/store/indexing/selectors.js +69 -0
  187. package/dist/store/indexing/slice.d.ts +309 -0
  188. package/dist/store/indexing/slice.js +113 -0
  189. package/dist/store/slot-progress/listeners.d.ts +23 -0
  190. package/dist/store/slot-progress/listeners.js +33 -0
  191. package/dist/store/slot-progress/selectors.d.ts +67 -0
  192. package/dist/store/slot-progress/selectors.js +36 -0
  193. package/dist/store/slot-progress/slice.d.ts +246 -0
  194. package/dist/store/slot-progress/slice.js +70 -0
  195. package/dist/store/store.d.ts +17 -0
  196. package/dist/store/store.js +18 -0
  197. package/dist/store/warmup/selectors.d.ts +109 -0
  198. package/dist/store/warmup/selectors.js +44 -0
  199. package/dist/store/warmup/slice.d.ts +137 -0
  200. package/dist/store/warmup/slice.js +72 -0
  201. package/dist/store/watcher/selectors.d.ts +115 -0
  202. package/dist/store/watcher/selectors.js +52 -0
  203. package/dist/store/watcher/slice.d.ts +269 -0
  204. package/dist/store/watcher/slice.js +100 -0
  205. package/dist/store/wizard/selectors.d.ts +115 -0
  206. package/dist/store/wizard/selectors.js +36 -0
  207. package/dist/store/wizard/slice.d.ts +523 -0
  208. package/dist/store/wizard/slice.js +119 -0
  209. package/package.json +10 -2
@@ -0,0 +1,160 @@
1
+ /**
2
+ * Mistral embedding provider using Mistral AI API.
3
+ *
4
+ * Uses codestral-embed model (1536 dimensions).
5
+ * Optimized for code and technical content.
6
+ */
7
+ import { chunk, processBatchesWithLimit, withRetry, } from './api-utils.js';
8
+ const MISTRAL_API_BASE = 'https://api.mistral.ai/v1';
9
+ const MODEL = 'codestral-embed';
10
+ // Mistral limits: 8,192 tokens/text, 16,000 tokens/batch TOTAL
11
+ // Chunks are ~2000 chars but token count varies (code can be 1.5-2x tokens/char)
12
+ // 8 chunks × ~1500 tokens worst case = 12,000 tokens (75% margin under 16k limit)
13
+ const BATCH_SIZE = 8;
14
+ /**
15
+ * Mistral embedding provider.
16
+ * Uses codestral-embed model via Mistral AI API.
17
+ */
18
+ export class MistralEmbeddingProvider {
19
+ constructor(apiKey) {
20
+ Object.defineProperty(this, "dimensions", {
21
+ enumerable: true,
22
+ configurable: true,
23
+ writable: true,
24
+ value: 1536
25
+ });
26
+ Object.defineProperty(this, "apiKey", {
27
+ enumerable: true,
28
+ configurable: true,
29
+ writable: true,
30
+ value: void 0
31
+ });
32
+ Object.defineProperty(this, "initialized", {
33
+ enumerable: true,
34
+ configurable: true,
35
+ writable: true,
36
+ value: false
37
+ });
38
+ // Callback for rate limit throttling - message or null to clear
39
+ Object.defineProperty(this, "onThrottle", {
40
+ enumerable: true,
41
+ configurable: true,
42
+ writable: true,
43
+ value: undefined
44
+ });
45
+ // Callback for batch progress - (processed, total) chunks
46
+ Object.defineProperty(this, "onBatchProgress", {
47
+ enumerable: true,
48
+ configurable: true,
49
+ writable: true,
50
+ value: undefined
51
+ });
52
+ // Slot progress callbacks (wired by daemon owner)
53
+ Object.defineProperty(this, "onSlotProcessing", {
54
+ enumerable: true,
55
+ configurable: true,
56
+ writable: true,
57
+ value: undefined
58
+ });
59
+ Object.defineProperty(this, "onSlotRateLimited", {
60
+ enumerable: true,
61
+ configurable: true,
62
+ writable: true,
63
+ value: undefined
64
+ });
65
+ Object.defineProperty(this, "onSlotIdle", {
66
+ enumerable: true,
67
+ configurable: true,
68
+ writable: true,
69
+ value: undefined
70
+ });
71
+ Object.defineProperty(this, "onSlotFailure", {
72
+ enumerable: true,
73
+ configurable: true,
74
+ writable: true,
75
+ value: undefined
76
+ });
77
+ Object.defineProperty(this, "onResetSlots", {
78
+ enumerable: true,
79
+ configurable: true,
80
+ writable: true,
81
+ value: undefined
82
+ });
83
+ // Trim the key to remove any accidental whitespace
84
+ this.apiKey = (apiKey ?? '').trim();
85
+ }
86
+ async initialize(_onProgress) {
87
+ if (!this.apiKey) {
88
+ throw new Error('Mistral API key required. Run /init to configure your API key.');
89
+ }
90
+ this.initialized = true;
91
+ }
92
+ async embed(texts, options) {
93
+ if (!this.initialized) {
94
+ await this.initialize();
95
+ }
96
+ if (texts.length === 0) {
97
+ return [];
98
+ }
99
+ const batches = chunk(texts, BATCH_SIZE);
100
+ const callbacks = {
101
+ onThrottle: this.onThrottle,
102
+ onBatchProgress: this.onBatchProgress,
103
+ onSlotProcessing: this.onSlotProcessing,
104
+ onSlotRateLimited: this.onSlotRateLimited,
105
+ onSlotIdle: this.onSlotIdle,
106
+ onSlotFailure: this.onSlotFailure,
107
+ onResetSlots: this.onResetSlots,
108
+ };
109
+ // Convert chunk metadata to batch metadata if provided
110
+ let batchMetadata;
111
+ if (options?.chunkMetadata) {
112
+ const metaBatches = chunk(options.chunkMetadata, BATCH_SIZE);
113
+ batchMetadata = metaBatches.map(metaBatch => ({
114
+ filepaths: metaBatch.map(m => m.filepath),
115
+ lineRanges: metaBatch.map(m => ({ start: m.startLine, end: m.endLine })),
116
+ sizes: metaBatch.map(m => m.size),
117
+ }));
118
+ }
119
+ return processBatchesWithLimit(batches, (batch, onRetrying) => withRetry(() => this.embedBatch(batch), callbacks, onRetrying), callbacks, BATCH_SIZE, batchMetadata, options?.logger, options?.chunkOffset ?? 0);
120
+ }
121
+ async embedBatch(texts) {
122
+ const response = await fetch(`${MISTRAL_API_BASE}/embeddings`, {
123
+ method: 'POST',
124
+ headers: {
125
+ 'Content-Type': 'application/json',
126
+ Authorization: `Bearer ${this.apiKey}`,
127
+ },
128
+ body: JSON.stringify({
129
+ model: MODEL,
130
+ input: texts,
131
+ }),
132
+ });
133
+ if (!response.ok) {
134
+ const errorText = await response.text();
135
+ let errorMessage;
136
+ try {
137
+ const errorJson = JSON.parse(errorText);
138
+ errorMessage = errorJson.message || errorJson.detail || errorText;
139
+ }
140
+ catch {
141
+ errorMessage = errorText;
142
+ }
143
+ if (response.status === 401) {
144
+ throw new Error(`Mistral API authentication failed (401). ` +
145
+ `Verify your API key at https://console.mistral.ai/api-keys. Error: ${errorMessage}`);
146
+ }
147
+ throw new Error(`Mistral API error (${response.status}): ${errorMessage}`);
148
+ }
149
+ const data = (await response.json());
150
+ // Sort by index to ensure correct order
151
+ return data.data.sort((a, b) => a.index - b.index).map(d => d.embedding);
152
+ }
153
+ async embedSingle(text) {
154
+ const results = await this.embed([text]);
155
+ return results[0];
156
+ }
157
+ close() {
158
+ this.initialized = false;
159
+ }
160
+ }
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Mock embedding provider for testing.
3
+ *
4
+ * Generates deterministic hash-based embeddings that:
5
+ * - Run instantly (no model loading)
6
+ * - Are deterministic (same input = same output)
7
+ * - Normalized to unit length
8
+ * - Support any dimension count
9
+ *
10
+ * Usage:
11
+ * - Unit tests that need embeddings but don't need semantic quality
12
+ * - Testing search infrastructure without ONNX overhead
13
+ * - CI pipeline fast checks
14
+ */
15
+ import type { EmbeddingProvider, ModelProgressCallback, EmbedOptions } from './types.js';
16
+ /**
17
+ * Mock embedding provider using deterministic hash-based vectors.
18
+ */
19
+ export declare class MockEmbeddingProvider implements EmbeddingProvider {
20
+ readonly dimensions: number;
21
+ constructor(dimensions?: number);
22
+ initialize(_onProgress?: ModelProgressCallback): Promise<void>;
23
+ embed(texts: string[], _options?: EmbedOptions): Promise<number[][]>;
24
+ embedSingle(text: string): Promise<number[]>;
25
+ /**
26
+ * Convert text to a deterministic unit vector.
27
+ * Uses a simple hash-based approach to generate pseudo-random but repeatable values.
28
+ */
29
+ private hashToVector;
30
+ /**
31
+ * Simple string hash function (djb2).
32
+ */
33
+ private hash;
34
+ close(): void;
35
+ }
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Mock embedding provider for testing.
3
+ *
4
+ * Generates deterministic hash-based embeddings that:
5
+ * - Run instantly (no model loading)
6
+ * - Are deterministic (same input = same output)
7
+ * - Normalized to unit length
8
+ * - Support any dimension count
9
+ *
10
+ * Usage:
11
+ * - Unit tests that need embeddings but don't need semantic quality
12
+ * - Testing search infrastructure without ONNX overhead
13
+ * - CI pipeline fast checks
14
+ */
15
+ const DEFAULT_DIMENSIONS = 1024;
16
+ /**
17
+ * Mock embedding provider using deterministic hash-based vectors.
18
+ */
19
+ export class MockEmbeddingProvider {
20
+ constructor(dimensions = DEFAULT_DIMENSIONS) {
21
+ Object.defineProperty(this, "dimensions", {
22
+ enumerable: true,
23
+ configurable: true,
24
+ writable: true,
25
+ value: void 0
26
+ });
27
+ this.dimensions = dimensions;
28
+ }
29
+ async initialize(_onProgress) {
30
+ // No initialization needed - instant startup
31
+ }
32
+ async embed(texts, _options) {
33
+ return texts.map(t => this.hashToVector(t));
34
+ }
35
+ async embedSingle(text) {
36
+ return this.hashToVector(text);
37
+ }
38
+ /**
39
+ * Convert text to a deterministic unit vector.
40
+ * Uses a simple hash-based approach to generate pseudo-random but repeatable values.
41
+ */
42
+ hashToVector(text) {
43
+ const seed = this.hash(text);
44
+ // Generate deterministic pseudo-random values
45
+ const vec = new Array(this.dimensions).fill(0).map((_, i) => {
46
+ // LCG-like pseudo-random based on seed and index
47
+ const state = (((seed * (i + 1) * 1103515245 + 12345) >>> 0) % 0x7fffffff) /
48
+ 0x7fffffff;
49
+ return state * 2 - 1; // Range [-1, 1]
50
+ });
51
+ // Normalize to unit length
52
+ const magnitude = Math.sqrt(vec.reduce((sum, v) => sum + v * v, 0));
53
+ return vec.map(v => (magnitude > 0 ? v / magnitude : 0));
54
+ }
55
+ /**
56
+ * Simple string hash function (djb2).
57
+ */
58
+ hash(str) {
59
+ let h = 5381;
60
+ for (let i = 0; i < str.length; i++) {
61
+ h = (h * 33) ^ str.charCodeAt(i);
62
+ h = h >>> 0; // Convert to unsigned 32-bit
63
+ }
64
+ return h;
65
+ }
66
+ close() {
67
+ // Nothing to close
68
+ }
69
+ }
@@ -0,0 +1,41 @@
1
+ /**
2
+ * OpenAI embedding provider using OpenAI API.
3
+ *
4
+ * Uses text-embedding-3-large model with reduced dimensions (1536).
5
+ * High quality embeddings with fast API responses ($0.13/1M tokens).
6
+ */
7
+ import type { EmbeddingProvider, ModelProgressCallback, EmbedOptions } from './types.js';
8
+ /**
9
+ * OpenAI embedding provider.
10
+ * Uses text-embedding-3-large model via OpenAI API with reduced dimensions.
11
+ *
12
+ * Supports regional endpoints for corporate accounts with data residency:
13
+ * - Default: https://api.openai.com/v1
14
+ * - US: https://us.api.openai.com/v1
15
+ * - EU: https://eu.api.openai.com/v1
16
+ */
17
+ export declare class OpenAIEmbeddingProvider implements EmbeddingProvider {
18
+ readonly dimensions = 1536;
19
+ private apiKey;
20
+ private apiBase;
21
+ private initialized;
22
+ onThrottle: ((message: string | null) => void) | undefined;
23
+ onBatchProgress: ((processed: number, total: number) => void) | undefined;
24
+ onSlotProcessing: ((index: number, batchInfo: string) => void) | undefined;
25
+ onSlotRateLimited: ((index: number, batchInfo: string, retryInfo: string) => void) | undefined;
26
+ onSlotIdle: ((index: number) => void) | undefined;
27
+ onSlotFailure: ((data: {
28
+ batchInfo: string;
29
+ files: string[];
30
+ chunkCount: number;
31
+ error: string;
32
+ timestamp: string;
33
+ }) => void) | undefined;
34
+ onResetSlots: (() => void) | undefined;
35
+ constructor(apiKey?: string, baseUrl?: string);
36
+ initialize(_onProgress?: ModelProgressCallback): Promise<void>;
37
+ embed(texts: string[], options?: EmbedOptions): Promise<number[][]>;
38
+ private embedBatch;
39
+ embedSingle(text: string): Promise<number[]>;
40
+ close(): void;
41
+ }
@@ -0,0 +1,190 @@
1
+ /**
2
+ * OpenAI embedding provider using OpenAI API.
3
+ *
4
+ * Uses text-embedding-3-large model with reduced dimensions (1536).
5
+ * High quality embeddings with fast API responses ($0.13/1M tokens).
6
+ */
7
+ import { chunk, processBatchesWithLimit, withRetry, } from './api-utils.js';
8
+ const DEFAULT_API_BASE = 'https://api.openai.com/v1';
9
+ const MODEL = 'text-embedding-3-large';
10
+ const DIMENSIONS = 1536; // Reduced from 3072 for storage efficiency
11
+ // OpenAI limits: 8,191 tokens/text, 300,000 tokens/batch, 2,048 texts/batch
12
+ // Chunks are ~2000 chars + context header ≈ 800-1000 tokens each
13
+ // 32 chunks × 1000 tokens = 32,000 tokens (well under 300k limit)
14
+ // Smaller batches = more progress visibility with 5 concurrent slots
15
+ const BATCH_SIZE = 32;
16
+ /**
17
+ * OpenAI embedding provider.
18
+ * Uses text-embedding-3-large model via OpenAI API with reduced dimensions.
19
+ *
20
+ * Supports regional endpoints for corporate accounts with data residency:
21
+ * - Default: https://api.openai.com/v1
22
+ * - US: https://us.api.openai.com/v1
23
+ * - EU: https://eu.api.openai.com/v1
24
+ */
25
+ export class OpenAIEmbeddingProvider {
26
+ constructor(apiKey, baseUrl) {
27
+ Object.defineProperty(this, "dimensions", {
28
+ enumerable: true,
29
+ configurable: true,
30
+ writable: true,
31
+ value: 1536
32
+ });
33
+ Object.defineProperty(this, "apiKey", {
34
+ enumerable: true,
35
+ configurable: true,
36
+ writable: true,
37
+ value: void 0
38
+ });
39
+ Object.defineProperty(this, "apiBase", {
40
+ enumerable: true,
41
+ configurable: true,
42
+ writable: true,
43
+ value: void 0
44
+ });
45
+ Object.defineProperty(this, "initialized", {
46
+ enumerable: true,
47
+ configurable: true,
48
+ writable: true,
49
+ value: false
50
+ });
51
+ // Callback for rate limit throttling - message or null to clear
52
+ Object.defineProperty(this, "onThrottle", {
53
+ enumerable: true,
54
+ configurable: true,
55
+ writable: true,
56
+ value: undefined
57
+ });
58
+ // Callback for batch progress - (processed, total) chunks
59
+ Object.defineProperty(this, "onBatchProgress", {
60
+ enumerable: true,
61
+ configurable: true,
62
+ writable: true,
63
+ value: undefined
64
+ });
65
+ // Slot progress callbacks (wired by daemon owner)
66
+ Object.defineProperty(this, "onSlotProcessing", {
67
+ enumerable: true,
68
+ configurable: true,
69
+ writable: true,
70
+ value: undefined
71
+ });
72
+ Object.defineProperty(this, "onSlotRateLimited", {
73
+ enumerable: true,
74
+ configurable: true,
75
+ writable: true,
76
+ value: undefined
77
+ });
78
+ Object.defineProperty(this, "onSlotIdle", {
79
+ enumerable: true,
80
+ configurable: true,
81
+ writable: true,
82
+ value: undefined
83
+ });
84
+ Object.defineProperty(this, "onSlotFailure", {
85
+ enumerable: true,
86
+ configurable: true,
87
+ writable: true,
88
+ value: undefined
89
+ });
90
+ Object.defineProperty(this, "onResetSlots", {
91
+ enumerable: true,
92
+ configurable: true,
93
+ writable: true,
94
+ value: undefined
95
+ });
96
+ // Trim the key to remove any accidental whitespace
97
+ this.apiKey = (apiKey ?? '').trim();
98
+ this.apiBase = baseUrl ?? DEFAULT_API_BASE;
99
+ }
100
+ async initialize(_onProgress) {
101
+ if (!this.apiKey) {
102
+ throw new Error('OpenAI API key required. Run /init to configure your API key.');
103
+ }
104
+ // Validate key format (should start with sk-)
105
+ if (!this.apiKey.startsWith('sk-')) {
106
+ throw new Error(`Invalid OpenAI API key format. Key should start with "sk-" but got "${this.apiKey.slice(0, 3)}..."`);
107
+ }
108
+ this.initialized = true;
109
+ }
110
+ async embed(texts, options) {
111
+ if (!this.initialized) {
112
+ await this.initialize();
113
+ }
114
+ if (texts.length === 0) {
115
+ return [];
116
+ }
117
+ const batches = chunk(texts, BATCH_SIZE);
118
+ const callbacks = {
119
+ onThrottle: this.onThrottle,
120
+ onBatchProgress: this.onBatchProgress,
121
+ onSlotProcessing: this.onSlotProcessing,
122
+ onSlotRateLimited: this.onSlotRateLimited,
123
+ onSlotIdle: this.onSlotIdle,
124
+ onSlotFailure: this.onSlotFailure,
125
+ onResetSlots: this.onResetSlots,
126
+ };
127
+ // Convert chunk metadata to batch metadata if provided
128
+ let batchMetadata;
129
+ if (options?.chunkMetadata) {
130
+ const metaBatches = chunk(options.chunkMetadata, BATCH_SIZE);
131
+ batchMetadata = metaBatches.map(metaBatch => ({
132
+ filepaths: metaBatch.map(m => m.filepath),
133
+ lineRanges: metaBatch.map(m => ({ start: m.startLine, end: m.endLine })),
134
+ sizes: metaBatch.map(m => m.size),
135
+ }));
136
+ }
137
+ return processBatchesWithLimit(batches, (batch, onRetrying) => withRetry(() => this.embedBatch(batch), callbacks, onRetrying), callbacks, BATCH_SIZE, batchMetadata, options?.logger, options?.chunkOffset ?? 0);
138
+ }
139
+ async embedBatch(texts) {
140
+ const response = await fetch(`${this.apiBase}/embeddings`, {
141
+ method: 'POST',
142
+ headers: {
143
+ 'Content-Type': 'application/json',
144
+ Authorization: `Bearer ${this.apiKey}`,
145
+ },
146
+ body: JSON.stringify({
147
+ model: MODEL,
148
+ input: texts,
149
+ dimensions: DIMENSIONS,
150
+ }),
151
+ });
152
+ if (!response.ok) {
153
+ const errorText = await response.text();
154
+ let errorMessage;
155
+ try {
156
+ const errorJson = JSON.parse(errorText);
157
+ errorMessage = errorJson.error?.message || errorText;
158
+ }
159
+ catch {
160
+ errorMessage = errorText;
161
+ }
162
+ // Provide helpful context for common errors
163
+ if (response.status === 401) {
164
+ const keyPreview = `${this.apiKey.slice(0, 7)}...${this.apiKey.slice(-4)}`;
165
+ // Check for regional endpoint mismatch
166
+ if (errorMessage.includes('incorrect regional hostname')) {
167
+ // Extract the required region from the error message if present
168
+ const regionMatch = errorMessage.match(/make your request to (\w+\.api\.openai\.com)/);
169
+ const requiredEndpoint = regionMatch?.[1] ?? 'the correct regional endpoint';
170
+ throw new Error(`OpenAI API regional endpoint mismatch. Your account requires ${requiredEndpoint}. ` +
171
+ `Run /init again and select the matching region (US or EU) instead of Default. ` +
172
+ `Key: ${keyPreview}`);
173
+ }
174
+ throw new Error(`OpenAI API authentication failed (401). Key format: ${keyPreview}. ` +
175
+ `Verify your API key at https://platform.openai.com/api-keys. Error: ${errorMessage}`);
176
+ }
177
+ throw new Error(`OpenAI API error (${response.status}): ${errorMessage}`);
178
+ }
179
+ const data = (await response.json());
180
+ // Sort by index to ensure correct order
181
+ return data.data.sort((a, b) => a.index - b.index).map(d => d.embedding);
182
+ }
183
+ async embedSingle(text) {
184
+ const results = await this.embed([text]);
185
+ return results[0];
186
+ }
187
+ close() {
188
+ this.initialized = false;
189
+ }
190
+ }
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Embedding Provider Types.
3
+ *
4
+ * Types for embedding providers that generate vector embeddings from text.
5
+ */
6
+ import type { Logger } from '../lib/logger.js';
7
+ /**
8
+ * Progress callback for model loading/downloading.
9
+ * @param status - Current status: 'downloading', 'loading', 'ready'
10
+ * @param progress - Download progress 0-100 (only for 'downloading')
11
+ * @param message - Optional message (e.g., file being downloaded)
12
+ */
13
+ export type ModelProgressCallback = (status: 'downloading' | 'loading' | 'ready', progress?: number, message?: string) => void;
14
+ /**
15
+ * Metadata for a single chunk, used for detailed failure logging.
16
+ */
17
+ export interface ChunkMetadata {
18
+ /** File path for this chunk */
19
+ filepath: string;
20
+ /** Start line number (1-indexed) */
21
+ startLine: number;
22
+ /** End line number (1-indexed) */
23
+ endLine: number;
24
+ /** Text size in characters */
25
+ size: number;
26
+ }
27
+ /**
28
+ * Options for embedding operations.
29
+ */
30
+ export interface EmbedOptions {
31
+ /** Metadata for each chunk being embedded (parallel array to texts) */
32
+ chunkMetadata?: ChunkMetadata[];
33
+ /** Logger for debug output on failures */
34
+ logger?: Logger;
35
+ /** Offset for cumulative chunk numbering in progress display */
36
+ chunkOffset?: number;
37
+ }
38
+ /**
39
+ * Embedding provider interface for generating vector embeddings.
40
+ */
41
+ export interface EmbeddingProvider {
42
+ /** Number of dimensions in the embedding vectors */
43
+ readonly dimensions: number;
44
+ /**
45
+ * Initialize the provider (load model, etc.)
46
+ * Must be called before using embed() or embedSingle().
47
+ * @param onProgress - Optional callback for download/loading progress
48
+ */
49
+ initialize(onProgress?: ModelProgressCallback): Promise<void>;
50
+ /**
51
+ * Generate embeddings for multiple texts.
52
+ * @param texts - Array of text strings to embed
53
+ * @param options - Optional settings for logging and metadata
54
+ * @returns Array of embedding vectors (one per text)
55
+ */
56
+ embed(texts: string[], options?: EmbedOptions): Promise<number[][]>;
57
+ /**
58
+ * Generate embedding for a single text.
59
+ * Optimized for query embedding.
60
+ * @param text - Text string to embed
61
+ * @returns Embedding vector
62
+ */
63
+ embedSingle(text: string): Promise<number[]>;
64
+ /**
65
+ * Close the provider and free resources.
66
+ */
67
+ close(): void;
68
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Embedding Provider Types.
3
+ *
4
+ * Types for embedding providers that generate vector embeddings from text.
5
+ */
6
+ export {};
@@ -0,0 +1,30 @@
1
+ /**
2
+ * API key validation for cloud embedding providers.
3
+ *
4
+ * Makes a minimal test embedding call to verify the API key is valid
5
+ * before proceeding with indexing.
6
+ */
7
+ import type { EmbeddingProviderType } from '../../common/types.js';
8
+ /**
9
+ * Result of API key validation.
10
+ */
11
+ export interface ValidationResult {
12
+ valid: boolean;
13
+ error?: string;
14
+ }
15
+ /**
16
+ * Options for API key validation.
17
+ */
18
+ export interface ValidateApiKeyOptions {
19
+ /** OpenAI base URL for regional endpoints (e.g., https://us.api.openai.com/v1) */
20
+ openaiBaseUrl?: string;
21
+ }
22
+ /**
23
+ * Validate an API key by making a minimal test embedding call.
24
+ *
25
+ * @param provider - The embedding provider type
26
+ * @param apiKey - The API key to validate
27
+ * @param options - Optional configuration (e.g., openaiBaseUrl for regional endpoints)
28
+ * @returns Validation result with error message if invalid
29
+ */
30
+ export declare function validateApiKey(provider: EmbeddingProviderType, apiKey: string, options?: ValidateApiKeyOptions): Promise<ValidationResult>;