viberag 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/cli/app.d.ts +3 -0
- package/dist/cli/app.js +100 -102
- package/dist/cli/commands/handlers.d.ts +8 -6
- package/dist/cli/commands/handlers.js +90 -32
- package/dist/cli/commands/useCommands.d.ts +20 -0
- package/dist/cli/commands/useCommands.js +189 -0
- package/dist/cli/commands/useRagCommands.d.ts +2 -5
- package/dist/cli/commands/useRagCommands.js +11 -18
- package/dist/cli/components/InitWizard.js +66 -27
- package/dist/cli/components/McpSetupWizard.js +23 -4
- package/dist/cli/components/SlotRow.d.ts +22 -0
- package/dist/cli/components/SlotRow.js +55 -0
- package/dist/cli/components/StatusBar.d.ts +14 -0
- package/dist/cli/components/StatusBar.js +156 -0
- package/dist/cli/contexts/DaemonStatusContext.d.ts +38 -0
- package/dist/cli/contexts/DaemonStatusContext.js +106 -0
- package/dist/cli/hooks/useStatusPolling.d.ts +34 -0
- package/dist/cli/hooks/useStatusPolling.js +121 -0
- package/dist/cli/store/app/selectors.d.ts +87 -0
- package/dist/cli/store/app/selectors.js +28 -0
- package/dist/cli/store/app/slice.d.ts +1013 -0
- package/dist/cli/store/app/slice.js +112 -0
- package/dist/cli/store/hooks.d.ts +22 -0
- package/dist/cli/store/hooks.js +17 -0
- package/dist/cli/store/store.d.ts +17 -0
- package/dist/cli/store/store.js +18 -0
- package/dist/cli/store/wizard/selectors.d.ts +115 -0
- package/dist/cli/store/wizard/selectors.js +36 -0
- package/dist/cli/store/wizard/slice.d.ts +523 -0
- package/dist/cli/store/wizard/slice.js +119 -0
- package/dist/cli/utils/error-handler.d.ts +55 -0
- package/dist/cli/utils/error-handler.js +92 -0
- package/dist/client/auto-start.d.ts +42 -0
- package/dist/client/auto-start.js +250 -0
- package/dist/client/connection.d.ts +48 -0
- package/dist/client/connection.js +200 -0
- package/dist/client/index.d.ts +93 -0
- package/dist/client/index.js +209 -0
- package/dist/client/types.d.ts +105 -0
- package/dist/client/types.js +7 -0
- package/dist/common/components/SlotRow.d.ts +22 -0
- package/dist/common/components/SlotRow.js +53 -0
- package/dist/common/components/StatusBar.js +82 -31
- package/dist/common/types.d.ts +12 -13
- package/dist/daemon/handlers.d.ts +15 -0
- package/dist/daemon/handlers.js +157 -0
- package/dist/daemon/index.d.ts +21 -0
- package/dist/daemon/index.js +123 -0
- package/dist/daemon/lib/chunker/bounded-channel.d.ts +51 -0
- package/dist/daemon/lib/chunker/bounded-channel.js +138 -0
- package/dist/daemon/lib/chunker/index.d.ts +135 -0
- package/dist/daemon/lib/chunker/index.js +1370 -0
- package/dist/daemon/lib/chunker/types.d.ts +77 -0
- package/dist/daemon/lib/chunker/types.js +50 -0
- package/dist/daemon/lib/config.d.ts +73 -0
- package/dist/daemon/lib/config.js +149 -0
- package/dist/daemon/lib/constants.d.ts +75 -0
- package/dist/daemon/lib/constants.js +114 -0
- package/dist/daemon/lib/gitignore.d.ts +57 -0
- package/dist/daemon/lib/gitignore.js +246 -0
- package/dist/daemon/lib/logger.d.ts +51 -0
- package/dist/daemon/lib/logger.js +167 -0
- package/dist/daemon/lib/manifest.d.ts +58 -0
- package/dist/daemon/lib/manifest.js +116 -0
- package/dist/daemon/lib/merkle/diff.d.ts +32 -0
- package/dist/daemon/lib/merkle/diff.js +107 -0
- package/dist/daemon/lib/merkle/hash.d.ts +40 -0
- package/dist/daemon/lib/merkle/hash.js +180 -0
- package/dist/daemon/lib/merkle/index.d.ts +71 -0
- package/dist/daemon/lib/merkle/index.js +309 -0
- package/dist/daemon/lib/merkle/node.d.ts +55 -0
- package/dist/daemon/lib/merkle/node.js +82 -0
- package/dist/daemon/lifecycle.d.ts +50 -0
- package/dist/daemon/lifecycle.js +142 -0
- package/dist/daemon/owner.d.ts +175 -0
- package/dist/daemon/owner.js +609 -0
- package/dist/daemon/protocol.d.ts +100 -0
- package/dist/daemon/protocol.js +163 -0
- package/dist/daemon/providers/api-utils.d.ts +130 -0
- package/dist/daemon/providers/api-utils.js +248 -0
- package/dist/daemon/providers/gemini.d.ts +39 -0
- package/dist/daemon/providers/gemini.js +205 -0
- package/dist/daemon/providers/index.d.ts +14 -0
- package/dist/daemon/providers/index.js +14 -0
- package/dist/daemon/providers/local-4b.d.ts +28 -0
- package/dist/daemon/providers/local-4b.js +51 -0
- package/dist/daemon/providers/local.d.ts +36 -0
- package/dist/daemon/providers/local.js +166 -0
- package/dist/daemon/providers/mistral.d.ts +35 -0
- package/dist/daemon/providers/mistral.js +160 -0
- package/dist/daemon/providers/mock.d.ts +35 -0
- package/dist/daemon/providers/mock.js +69 -0
- package/dist/daemon/providers/openai.d.ts +41 -0
- package/dist/daemon/providers/openai.js +190 -0
- package/dist/daemon/providers/types.d.ts +68 -0
- package/dist/daemon/providers/types.js +6 -0
- package/dist/daemon/providers/validate.d.ts +30 -0
- package/dist/daemon/providers/validate.js +162 -0
- package/dist/daemon/server.d.ts +79 -0
- package/dist/daemon/server.js +293 -0
- package/dist/daemon/services/index.d.ts +11 -0
- package/dist/daemon/services/index.js +16 -0
- package/dist/daemon/services/indexing.d.ts +117 -0
- package/dist/daemon/services/indexing.js +573 -0
- package/dist/daemon/services/search/filters.d.ts +21 -0
- package/dist/daemon/services/search/filters.js +106 -0
- package/dist/daemon/services/search/fts.d.ts +32 -0
- package/dist/daemon/services/search/fts.js +61 -0
- package/dist/daemon/services/search/hybrid.d.ts +17 -0
- package/dist/daemon/services/search/hybrid.js +58 -0
- package/dist/daemon/services/search/index.d.ts +108 -0
- package/dist/daemon/services/search/index.js +417 -0
- package/dist/daemon/services/search/types.d.ts +126 -0
- package/dist/daemon/services/search/types.js +4 -0
- package/dist/daemon/services/search/vector.d.ts +25 -0
- package/dist/daemon/services/search/vector.js +44 -0
- package/dist/daemon/services/storage/index.d.ts +110 -0
- package/dist/daemon/services/storage/index.js +378 -0
- package/dist/daemon/services/storage/schema.d.ts +24 -0
- package/dist/daemon/services/storage/schema.js +51 -0
- package/dist/daemon/services/storage/types.d.ts +105 -0
- package/dist/daemon/services/storage/types.js +71 -0
- package/dist/daemon/services/types.d.ts +192 -0
- package/dist/daemon/services/types.js +53 -0
- package/dist/daemon/services/watcher.d.ts +98 -0
- package/dist/daemon/services/watcher.js +386 -0
- package/dist/daemon/state.d.ts +119 -0
- package/dist/daemon/state.js +161 -0
- package/dist/mcp/index.d.ts +1 -1
- package/dist/mcp/index.js +44 -60
- package/dist/mcp/server.d.ts +10 -14
- package/dist/mcp/server.js +75 -74
- package/dist/mcp/services/lazy-loader.d.ts +23 -0
- package/dist/mcp/services/lazy-loader.js +34 -0
- package/dist/mcp/warmup.d.ts +3 -3
- package/dist/mcp/warmup.js +39 -40
- package/dist/mcp/watcher.d.ts +5 -7
- package/dist/mcp/watcher.js +73 -64
- package/dist/rag/config/index.d.ts +2 -0
- package/dist/rag/constants.d.ts +30 -0
- package/dist/rag/constants.js +38 -0
- package/dist/rag/embeddings/api-utils.d.ts +121 -0
- package/dist/rag/embeddings/api-utils.js +259 -0
- package/dist/rag/embeddings/gemini.d.ts +4 -12
- package/dist/rag/embeddings/gemini.js +22 -72
- package/dist/rag/embeddings/index.d.ts +5 -3
- package/dist/rag/embeddings/index.js +5 -2
- package/dist/rag/embeddings/local-4b.d.ts +2 -2
- package/dist/rag/embeddings/local-4b.js +1 -1
- package/dist/rag/embeddings/local.d.ts +10 -3
- package/dist/rag/embeddings/local.js +58 -12
- package/dist/rag/embeddings/mistral.d.ts +4 -12
- package/dist/rag/embeddings/mistral.js +22 -72
- package/dist/rag/embeddings/mock.d.ts +35 -0
- package/dist/rag/embeddings/mock.js +69 -0
- package/dist/rag/embeddings/openai.d.ts +11 -13
- package/dist/rag/embeddings/openai.js +47 -75
- package/dist/rag/embeddings/types.d.ts +27 -1
- package/dist/rag/embeddings/validate.d.ts +9 -1
- package/dist/rag/embeddings/validate.js +17 -4
- package/dist/rag/index.d.ts +2 -2
- package/dist/rag/index.js +1 -1
- package/dist/rag/indexer/bounded-channel.d.ts +51 -0
- package/dist/rag/indexer/bounded-channel.js +138 -0
- package/dist/rag/indexer/indexer.d.ts +4 -14
- package/dist/rag/indexer/indexer.js +246 -169
- package/dist/rag/indexer/types.d.ts +1 -0
- package/dist/rag/logger/index.d.ts +22 -0
- package/dist/rag/logger/index.js +78 -1
- package/dist/rag/manifest/index.js +1 -2
- package/dist/rag/search/index.js +1 -1
- package/dist/rag/storage/schema.d.ts +2 -4
- package/dist/rag/storage/schema.js +3 -5
- package/dist/store/app/selectors.d.ts +87 -0
- package/dist/store/app/selectors.js +28 -0
- package/dist/store/app/slice.d.ts +1013 -0
- package/dist/store/app/slice.js +112 -0
- package/dist/store/hooks.d.ts +22 -0
- package/dist/store/hooks.js +17 -0
- package/dist/store/index.d.ts +12 -0
- package/dist/store/index.js +18 -0
- package/dist/store/indexing/listeners.d.ts +25 -0
- package/dist/store/indexing/listeners.js +46 -0
- package/dist/store/indexing/selectors.d.ts +195 -0
- package/dist/store/indexing/selectors.js +69 -0
- package/dist/store/indexing/slice.d.ts +309 -0
- package/dist/store/indexing/slice.js +113 -0
- package/dist/store/slot-progress/listeners.d.ts +23 -0
- package/dist/store/slot-progress/listeners.js +33 -0
- package/dist/store/slot-progress/selectors.d.ts +67 -0
- package/dist/store/slot-progress/selectors.js +36 -0
- package/dist/store/slot-progress/slice.d.ts +246 -0
- package/dist/store/slot-progress/slice.js +70 -0
- package/dist/store/store.d.ts +17 -0
- package/dist/store/store.js +18 -0
- package/dist/store/warmup/selectors.d.ts +109 -0
- package/dist/store/warmup/selectors.js +44 -0
- package/dist/store/warmup/slice.d.ts +137 -0
- package/dist/store/warmup/slice.js +72 -0
- package/dist/store/watcher/selectors.d.ts +115 -0
- package/dist/store/watcher/selectors.js +52 -0
- package/dist/store/watcher/slice.d.ts +269 -0
- package/dist/store/watcher/slice.js +100 -0
- package/dist/store/wizard/selectors.d.ts +115 -0
- package/dist/store/wizard/selectors.js +36 -0
- package/dist/store/wizard/slice.d.ts +523 -0
- package/dist/store/wizard/slice.js +119 -0
- package/package.json +10 -2
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mistral embedding provider using Mistral AI API.
|
|
3
|
+
*
|
|
4
|
+
* Uses codestral-embed model (1536 dimensions).
|
|
5
|
+
* Optimized for code and technical content.
|
|
6
|
+
*/
|
|
7
|
+
import { chunk, processBatchesWithLimit, withRetry, } from './api-utils.js';
|
|
8
|
+
const MISTRAL_API_BASE = 'https://api.mistral.ai/v1';
|
|
9
|
+
const MODEL = 'codestral-embed';
|
|
10
|
+
// Mistral limits: 8,192 tokens/text, 16,000 tokens/batch TOTAL
|
|
11
|
+
// Chunks are ~2000 chars but token count varies (code can be 1.5-2x tokens/char)
|
|
12
|
+
// 8 chunks × ~1500 tokens worst case = 12,000 tokens (75% margin under 16k limit)
|
|
13
|
+
const BATCH_SIZE = 8;
|
|
14
|
+
/**
|
|
15
|
+
* Mistral embedding provider.
|
|
16
|
+
* Uses codestral-embed model via Mistral AI API.
|
|
17
|
+
*/
|
|
18
|
+
export class MistralEmbeddingProvider {
|
|
19
|
+
constructor(apiKey) {
|
|
20
|
+
Object.defineProperty(this, "dimensions", {
|
|
21
|
+
enumerable: true,
|
|
22
|
+
configurable: true,
|
|
23
|
+
writable: true,
|
|
24
|
+
value: 1536
|
|
25
|
+
});
|
|
26
|
+
Object.defineProperty(this, "apiKey", {
|
|
27
|
+
enumerable: true,
|
|
28
|
+
configurable: true,
|
|
29
|
+
writable: true,
|
|
30
|
+
value: void 0
|
|
31
|
+
});
|
|
32
|
+
Object.defineProperty(this, "initialized", {
|
|
33
|
+
enumerable: true,
|
|
34
|
+
configurable: true,
|
|
35
|
+
writable: true,
|
|
36
|
+
value: false
|
|
37
|
+
});
|
|
38
|
+
// Callback for rate limit throttling - message or null to clear
|
|
39
|
+
Object.defineProperty(this, "onThrottle", {
|
|
40
|
+
enumerable: true,
|
|
41
|
+
configurable: true,
|
|
42
|
+
writable: true,
|
|
43
|
+
value: undefined
|
|
44
|
+
});
|
|
45
|
+
// Callback for batch progress - (processed, total) chunks
|
|
46
|
+
Object.defineProperty(this, "onBatchProgress", {
|
|
47
|
+
enumerable: true,
|
|
48
|
+
configurable: true,
|
|
49
|
+
writable: true,
|
|
50
|
+
value: undefined
|
|
51
|
+
});
|
|
52
|
+
// Slot progress callbacks (wired by daemon owner)
|
|
53
|
+
Object.defineProperty(this, "onSlotProcessing", {
|
|
54
|
+
enumerable: true,
|
|
55
|
+
configurable: true,
|
|
56
|
+
writable: true,
|
|
57
|
+
value: undefined
|
|
58
|
+
});
|
|
59
|
+
Object.defineProperty(this, "onSlotRateLimited", {
|
|
60
|
+
enumerable: true,
|
|
61
|
+
configurable: true,
|
|
62
|
+
writable: true,
|
|
63
|
+
value: undefined
|
|
64
|
+
});
|
|
65
|
+
Object.defineProperty(this, "onSlotIdle", {
|
|
66
|
+
enumerable: true,
|
|
67
|
+
configurable: true,
|
|
68
|
+
writable: true,
|
|
69
|
+
value: undefined
|
|
70
|
+
});
|
|
71
|
+
Object.defineProperty(this, "onSlotFailure", {
|
|
72
|
+
enumerable: true,
|
|
73
|
+
configurable: true,
|
|
74
|
+
writable: true,
|
|
75
|
+
value: undefined
|
|
76
|
+
});
|
|
77
|
+
Object.defineProperty(this, "onResetSlots", {
|
|
78
|
+
enumerable: true,
|
|
79
|
+
configurable: true,
|
|
80
|
+
writable: true,
|
|
81
|
+
value: undefined
|
|
82
|
+
});
|
|
83
|
+
// Trim the key to remove any accidental whitespace
|
|
84
|
+
this.apiKey = (apiKey ?? '').trim();
|
|
85
|
+
}
|
|
86
|
+
async initialize(_onProgress) {
|
|
87
|
+
if (!this.apiKey) {
|
|
88
|
+
throw new Error('Mistral API key required. Run /init to configure your API key.');
|
|
89
|
+
}
|
|
90
|
+
this.initialized = true;
|
|
91
|
+
}
|
|
92
|
+
async embed(texts, options) {
|
|
93
|
+
if (!this.initialized) {
|
|
94
|
+
await this.initialize();
|
|
95
|
+
}
|
|
96
|
+
if (texts.length === 0) {
|
|
97
|
+
return [];
|
|
98
|
+
}
|
|
99
|
+
const batches = chunk(texts, BATCH_SIZE);
|
|
100
|
+
const callbacks = {
|
|
101
|
+
onThrottle: this.onThrottle,
|
|
102
|
+
onBatchProgress: this.onBatchProgress,
|
|
103
|
+
onSlotProcessing: this.onSlotProcessing,
|
|
104
|
+
onSlotRateLimited: this.onSlotRateLimited,
|
|
105
|
+
onSlotIdle: this.onSlotIdle,
|
|
106
|
+
onSlotFailure: this.onSlotFailure,
|
|
107
|
+
onResetSlots: this.onResetSlots,
|
|
108
|
+
};
|
|
109
|
+
// Convert chunk metadata to batch metadata if provided
|
|
110
|
+
let batchMetadata;
|
|
111
|
+
if (options?.chunkMetadata) {
|
|
112
|
+
const metaBatches = chunk(options.chunkMetadata, BATCH_SIZE);
|
|
113
|
+
batchMetadata = metaBatches.map(metaBatch => ({
|
|
114
|
+
filepaths: metaBatch.map(m => m.filepath),
|
|
115
|
+
lineRanges: metaBatch.map(m => ({ start: m.startLine, end: m.endLine })),
|
|
116
|
+
sizes: metaBatch.map(m => m.size),
|
|
117
|
+
}));
|
|
118
|
+
}
|
|
119
|
+
return processBatchesWithLimit(batches, (batch, onRetrying) => withRetry(() => this.embedBatch(batch), callbacks, onRetrying), callbacks, BATCH_SIZE, batchMetadata, options?.logger, options?.chunkOffset ?? 0);
|
|
120
|
+
}
|
|
121
|
+
async embedBatch(texts) {
|
|
122
|
+
const response = await fetch(`${MISTRAL_API_BASE}/embeddings`, {
|
|
123
|
+
method: 'POST',
|
|
124
|
+
headers: {
|
|
125
|
+
'Content-Type': 'application/json',
|
|
126
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
127
|
+
},
|
|
128
|
+
body: JSON.stringify({
|
|
129
|
+
model: MODEL,
|
|
130
|
+
input: texts,
|
|
131
|
+
}),
|
|
132
|
+
});
|
|
133
|
+
if (!response.ok) {
|
|
134
|
+
const errorText = await response.text();
|
|
135
|
+
let errorMessage;
|
|
136
|
+
try {
|
|
137
|
+
const errorJson = JSON.parse(errorText);
|
|
138
|
+
errorMessage = errorJson.message || errorJson.detail || errorText;
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
errorMessage = errorText;
|
|
142
|
+
}
|
|
143
|
+
if (response.status === 401) {
|
|
144
|
+
throw new Error(`Mistral API authentication failed (401). ` +
|
|
145
|
+
`Verify your API key at https://console.mistral.ai/api-keys. Error: ${errorMessage}`);
|
|
146
|
+
}
|
|
147
|
+
throw new Error(`Mistral API error (${response.status}): ${errorMessage}`);
|
|
148
|
+
}
|
|
149
|
+
const data = (await response.json());
|
|
150
|
+
// Sort by index to ensure correct order
|
|
151
|
+
return data.data.sort((a, b) => a.index - b.index).map(d => d.embedding);
|
|
152
|
+
}
|
|
153
|
+
async embedSingle(text) {
|
|
154
|
+
const results = await this.embed([text]);
|
|
155
|
+
return results[0];
|
|
156
|
+
}
|
|
157
|
+
close() {
|
|
158
|
+
this.initialized = false;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mock embedding provider for testing.
|
|
3
|
+
*
|
|
4
|
+
* Generates deterministic hash-based embeddings that:
|
|
5
|
+
* - Run instantly (no model loading)
|
|
6
|
+
* - Are deterministic (same input = same output)
|
|
7
|
+
* - Normalized to unit length
|
|
8
|
+
* - Support any dimension count
|
|
9
|
+
*
|
|
10
|
+
* Usage:
|
|
11
|
+
* - Unit tests that need embeddings but don't need semantic quality
|
|
12
|
+
* - Testing search infrastructure without ONNX overhead
|
|
13
|
+
* - CI pipeline fast checks
|
|
14
|
+
*/
|
|
15
|
+
import type { EmbeddingProvider, ModelProgressCallback, EmbedOptions } from './types.js';
|
|
16
|
+
/**
|
|
17
|
+
* Mock embedding provider using deterministic hash-based vectors.
|
|
18
|
+
*/
|
|
19
|
+
export declare class MockEmbeddingProvider implements EmbeddingProvider {
|
|
20
|
+
readonly dimensions: number;
|
|
21
|
+
constructor(dimensions?: number);
|
|
22
|
+
initialize(_onProgress?: ModelProgressCallback): Promise<void>;
|
|
23
|
+
embed(texts: string[], _options?: EmbedOptions): Promise<number[][]>;
|
|
24
|
+
embedSingle(text: string): Promise<number[]>;
|
|
25
|
+
/**
|
|
26
|
+
* Convert text to a deterministic unit vector.
|
|
27
|
+
* Uses a simple hash-based approach to generate pseudo-random but repeatable values.
|
|
28
|
+
*/
|
|
29
|
+
private hashToVector;
|
|
30
|
+
/**
|
|
31
|
+
* Simple string hash function (djb2).
|
|
32
|
+
*/
|
|
33
|
+
private hash;
|
|
34
|
+
close(): void;
|
|
35
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mock embedding provider for testing.
|
|
3
|
+
*
|
|
4
|
+
* Generates deterministic hash-based embeddings that:
|
|
5
|
+
* - Run instantly (no model loading)
|
|
6
|
+
* - Are deterministic (same input = same output)
|
|
7
|
+
* - Normalized to unit length
|
|
8
|
+
* - Support any dimension count
|
|
9
|
+
*
|
|
10
|
+
* Usage:
|
|
11
|
+
* - Unit tests that need embeddings but don't need semantic quality
|
|
12
|
+
* - Testing search infrastructure without ONNX overhead
|
|
13
|
+
* - CI pipeline fast checks
|
|
14
|
+
*/
|
|
15
|
+
const DEFAULT_DIMENSIONS = 1024;
|
|
16
|
+
/**
|
|
17
|
+
* Mock embedding provider using deterministic hash-based vectors.
|
|
18
|
+
*/
|
|
19
|
+
export class MockEmbeddingProvider {
|
|
20
|
+
constructor(dimensions = DEFAULT_DIMENSIONS) {
|
|
21
|
+
Object.defineProperty(this, "dimensions", {
|
|
22
|
+
enumerable: true,
|
|
23
|
+
configurable: true,
|
|
24
|
+
writable: true,
|
|
25
|
+
value: void 0
|
|
26
|
+
});
|
|
27
|
+
this.dimensions = dimensions;
|
|
28
|
+
}
|
|
29
|
+
async initialize(_onProgress) {
|
|
30
|
+
// No initialization needed - instant startup
|
|
31
|
+
}
|
|
32
|
+
async embed(texts, _options) {
|
|
33
|
+
return texts.map(t => this.hashToVector(t));
|
|
34
|
+
}
|
|
35
|
+
async embedSingle(text) {
|
|
36
|
+
return this.hashToVector(text);
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Convert text to a deterministic unit vector.
|
|
40
|
+
* Uses a simple hash-based approach to generate pseudo-random but repeatable values.
|
|
41
|
+
*/
|
|
42
|
+
hashToVector(text) {
|
|
43
|
+
const seed = this.hash(text);
|
|
44
|
+
// Generate deterministic pseudo-random values
|
|
45
|
+
const vec = new Array(this.dimensions).fill(0).map((_, i) => {
|
|
46
|
+
// LCG-like pseudo-random based on seed and index
|
|
47
|
+
const state = (((seed * (i + 1) * 1103515245 + 12345) >>> 0) % 0x7fffffff) /
|
|
48
|
+
0x7fffffff;
|
|
49
|
+
return state * 2 - 1; // Range [-1, 1]
|
|
50
|
+
});
|
|
51
|
+
// Normalize to unit length
|
|
52
|
+
const magnitude = Math.sqrt(vec.reduce((sum, v) => sum + v * v, 0));
|
|
53
|
+
return vec.map(v => (magnitude > 0 ? v / magnitude : 0));
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Simple string hash function (djb2).
|
|
57
|
+
*/
|
|
58
|
+
hash(str) {
|
|
59
|
+
let h = 5381;
|
|
60
|
+
for (let i = 0; i < str.length; i++) {
|
|
61
|
+
h = (h * 33) ^ str.charCodeAt(i);
|
|
62
|
+
h = h >>> 0; // Convert to unsigned 32-bit
|
|
63
|
+
}
|
|
64
|
+
return h;
|
|
65
|
+
}
|
|
66
|
+
close() {
|
|
67
|
+
// Nothing to close
|
|
68
|
+
}
|
|
69
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI embedding provider using OpenAI API.
|
|
3
|
+
*
|
|
4
|
+
* Uses text-embedding-3-large model with reduced dimensions (1536).
|
|
5
|
+
* High quality embeddings with fast API responses ($0.13/1M tokens).
|
|
6
|
+
*/
|
|
7
|
+
import type { EmbeddingProvider, ModelProgressCallback, EmbedOptions } from './types.js';
|
|
8
|
+
/**
|
|
9
|
+
* OpenAI embedding provider.
|
|
10
|
+
* Uses text-embedding-3-large model via OpenAI API with reduced dimensions.
|
|
11
|
+
*
|
|
12
|
+
* Supports regional endpoints for corporate accounts with data residency:
|
|
13
|
+
* - Default: https://api.openai.com/v1
|
|
14
|
+
* - US: https://us.api.openai.com/v1
|
|
15
|
+
* - EU: https://eu.api.openai.com/v1
|
|
16
|
+
*/
|
|
17
|
+
export declare class OpenAIEmbeddingProvider implements EmbeddingProvider {
|
|
18
|
+
readonly dimensions = 1536;
|
|
19
|
+
private apiKey;
|
|
20
|
+
private apiBase;
|
|
21
|
+
private initialized;
|
|
22
|
+
onThrottle: ((message: string | null) => void) | undefined;
|
|
23
|
+
onBatchProgress: ((processed: number, total: number) => void) | undefined;
|
|
24
|
+
onSlotProcessing: ((index: number, batchInfo: string) => void) | undefined;
|
|
25
|
+
onSlotRateLimited: ((index: number, batchInfo: string, retryInfo: string) => void) | undefined;
|
|
26
|
+
onSlotIdle: ((index: number) => void) | undefined;
|
|
27
|
+
onSlotFailure: ((data: {
|
|
28
|
+
batchInfo: string;
|
|
29
|
+
files: string[];
|
|
30
|
+
chunkCount: number;
|
|
31
|
+
error: string;
|
|
32
|
+
timestamp: string;
|
|
33
|
+
}) => void) | undefined;
|
|
34
|
+
onResetSlots: (() => void) | undefined;
|
|
35
|
+
constructor(apiKey?: string, baseUrl?: string);
|
|
36
|
+
initialize(_onProgress?: ModelProgressCallback): Promise<void>;
|
|
37
|
+
embed(texts: string[], options?: EmbedOptions): Promise<number[][]>;
|
|
38
|
+
private embedBatch;
|
|
39
|
+
embedSingle(text: string): Promise<number[]>;
|
|
40
|
+
close(): void;
|
|
41
|
+
}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI embedding provider using OpenAI API.
|
|
3
|
+
*
|
|
4
|
+
* Uses text-embedding-3-large model with reduced dimensions (1536).
|
|
5
|
+
* High quality embeddings with fast API responses ($0.13/1M tokens).
|
|
6
|
+
*/
|
|
7
|
+
import { chunk, processBatchesWithLimit, withRetry, } from './api-utils.js';
|
|
8
|
+
const DEFAULT_API_BASE = 'https://api.openai.com/v1';
|
|
9
|
+
const MODEL = 'text-embedding-3-large';
|
|
10
|
+
const DIMENSIONS = 1536; // Reduced from 3072 for storage efficiency
|
|
11
|
+
// OpenAI limits: 8,191 tokens/text, 300,000 tokens/batch, 2,048 texts/batch
|
|
12
|
+
// Chunks are ~2000 chars + context header ≈ 800-1000 tokens each
|
|
13
|
+
// 32 chunks × 1000 tokens = 32,000 tokens (well under 300k limit)
|
|
14
|
+
// Smaller batches = more progress visibility with 5 concurrent slots
|
|
15
|
+
const BATCH_SIZE = 32;
|
|
16
|
+
/**
|
|
17
|
+
* OpenAI embedding provider.
|
|
18
|
+
* Uses text-embedding-3-large model via OpenAI API with reduced dimensions.
|
|
19
|
+
*
|
|
20
|
+
* Supports regional endpoints for corporate accounts with data residency:
|
|
21
|
+
* - Default: https://api.openai.com/v1
|
|
22
|
+
* - US: https://us.api.openai.com/v1
|
|
23
|
+
* - EU: https://eu.api.openai.com/v1
|
|
24
|
+
*/
|
|
25
|
+
export class OpenAIEmbeddingProvider {
|
|
26
|
+
constructor(apiKey, baseUrl) {
|
|
27
|
+
Object.defineProperty(this, "dimensions", {
|
|
28
|
+
enumerable: true,
|
|
29
|
+
configurable: true,
|
|
30
|
+
writable: true,
|
|
31
|
+
value: 1536
|
|
32
|
+
});
|
|
33
|
+
Object.defineProperty(this, "apiKey", {
|
|
34
|
+
enumerable: true,
|
|
35
|
+
configurable: true,
|
|
36
|
+
writable: true,
|
|
37
|
+
value: void 0
|
|
38
|
+
});
|
|
39
|
+
Object.defineProperty(this, "apiBase", {
|
|
40
|
+
enumerable: true,
|
|
41
|
+
configurable: true,
|
|
42
|
+
writable: true,
|
|
43
|
+
value: void 0
|
|
44
|
+
});
|
|
45
|
+
Object.defineProperty(this, "initialized", {
|
|
46
|
+
enumerable: true,
|
|
47
|
+
configurable: true,
|
|
48
|
+
writable: true,
|
|
49
|
+
value: false
|
|
50
|
+
});
|
|
51
|
+
// Callback for rate limit throttling - message or null to clear
|
|
52
|
+
Object.defineProperty(this, "onThrottle", {
|
|
53
|
+
enumerable: true,
|
|
54
|
+
configurable: true,
|
|
55
|
+
writable: true,
|
|
56
|
+
value: undefined
|
|
57
|
+
});
|
|
58
|
+
// Callback for batch progress - (processed, total) chunks
|
|
59
|
+
Object.defineProperty(this, "onBatchProgress", {
|
|
60
|
+
enumerable: true,
|
|
61
|
+
configurable: true,
|
|
62
|
+
writable: true,
|
|
63
|
+
value: undefined
|
|
64
|
+
});
|
|
65
|
+
// Slot progress callbacks (wired by daemon owner)
|
|
66
|
+
Object.defineProperty(this, "onSlotProcessing", {
|
|
67
|
+
enumerable: true,
|
|
68
|
+
configurable: true,
|
|
69
|
+
writable: true,
|
|
70
|
+
value: undefined
|
|
71
|
+
});
|
|
72
|
+
Object.defineProperty(this, "onSlotRateLimited", {
|
|
73
|
+
enumerable: true,
|
|
74
|
+
configurable: true,
|
|
75
|
+
writable: true,
|
|
76
|
+
value: undefined
|
|
77
|
+
});
|
|
78
|
+
Object.defineProperty(this, "onSlotIdle", {
|
|
79
|
+
enumerable: true,
|
|
80
|
+
configurable: true,
|
|
81
|
+
writable: true,
|
|
82
|
+
value: undefined
|
|
83
|
+
});
|
|
84
|
+
Object.defineProperty(this, "onSlotFailure", {
|
|
85
|
+
enumerable: true,
|
|
86
|
+
configurable: true,
|
|
87
|
+
writable: true,
|
|
88
|
+
value: undefined
|
|
89
|
+
});
|
|
90
|
+
Object.defineProperty(this, "onResetSlots", {
|
|
91
|
+
enumerable: true,
|
|
92
|
+
configurable: true,
|
|
93
|
+
writable: true,
|
|
94
|
+
value: undefined
|
|
95
|
+
});
|
|
96
|
+
// Trim the key to remove any accidental whitespace
|
|
97
|
+
this.apiKey = (apiKey ?? '').trim();
|
|
98
|
+
this.apiBase = baseUrl ?? DEFAULT_API_BASE;
|
|
99
|
+
}
|
|
100
|
+
async initialize(_onProgress) {
|
|
101
|
+
if (!this.apiKey) {
|
|
102
|
+
throw new Error('OpenAI API key required. Run /init to configure your API key.');
|
|
103
|
+
}
|
|
104
|
+
// Validate key format (should start with sk-)
|
|
105
|
+
if (!this.apiKey.startsWith('sk-')) {
|
|
106
|
+
throw new Error(`Invalid OpenAI API key format. Key should start with "sk-" but got "${this.apiKey.slice(0, 3)}..."`);
|
|
107
|
+
}
|
|
108
|
+
this.initialized = true;
|
|
109
|
+
}
|
|
110
|
+
async embed(texts, options) {
|
|
111
|
+
if (!this.initialized) {
|
|
112
|
+
await this.initialize();
|
|
113
|
+
}
|
|
114
|
+
if (texts.length === 0) {
|
|
115
|
+
return [];
|
|
116
|
+
}
|
|
117
|
+
const batches = chunk(texts, BATCH_SIZE);
|
|
118
|
+
const callbacks = {
|
|
119
|
+
onThrottle: this.onThrottle,
|
|
120
|
+
onBatchProgress: this.onBatchProgress,
|
|
121
|
+
onSlotProcessing: this.onSlotProcessing,
|
|
122
|
+
onSlotRateLimited: this.onSlotRateLimited,
|
|
123
|
+
onSlotIdle: this.onSlotIdle,
|
|
124
|
+
onSlotFailure: this.onSlotFailure,
|
|
125
|
+
onResetSlots: this.onResetSlots,
|
|
126
|
+
};
|
|
127
|
+
// Convert chunk metadata to batch metadata if provided
|
|
128
|
+
let batchMetadata;
|
|
129
|
+
if (options?.chunkMetadata) {
|
|
130
|
+
const metaBatches = chunk(options.chunkMetadata, BATCH_SIZE);
|
|
131
|
+
batchMetadata = metaBatches.map(metaBatch => ({
|
|
132
|
+
filepaths: metaBatch.map(m => m.filepath),
|
|
133
|
+
lineRanges: metaBatch.map(m => ({ start: m.startLine, end: m.endLine })),
|
|
134
|
+
sizes: metaBatch.map(m => m.size),
|
|
135
|
+
}));
|
|
136
|
+
}
|
|
137
|
+
return processBatchesWithLimit(batches, (batch, onRetrying) => withRetry(() => this.embedBatch(batch), callbacks, onRetrying), callbacks, BATCH_SIZE, batchMetadata, options?.logger, options?.chunkOffset ?? 0);
|
|
138
|
+
}
|
|
139
|
+
async embedBatch(texts) {
|
|
140
|
+
const response = await fetch(`${this.apiBase}/embeddings`, {
|
|
141
|
+
method: 'POST',
|
|
142
|
+
headers: {
|
|
143
|
+
'Content-Type': 'application/json',
|
|
144
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
145
|
+
},
|
|
146
|
+
body: JSON.stringify({
|
|
147
|
+
model: MODEL,
|
|
148
|
+
input: texts,
|
|
149
|
+
dimensions: DIMENSIONS,
|
|
150
|
+
}),
|
|
151
|
+
});
|
|
152
|
+
if (!response.ok) {
|
|
153
|
+
const errorText = await response.text();
|
|
154
|
+
let errorMessage;
|
|
155
|
+
try {
|
|
156
|
+
const errorJson = JSON.parse(errorText);
|
|
157
|
+
errorMessage = errorJson.error?.message || errorText;
|
|
158
|
+
}
|
|
159
|
+
catch {
|
|
160
|
+
errorMessage = errorText;
|
|
161
|
+
}
|
|
162
|
+
// Provide helpful context for common errors
|
|
163
|
+
if (response.status === 401) {
|
|
164
|
+
const keyPreview = `${this.apiKey.slice(0, 7)}...${this.apiKey.slice(-4)}`;
|
|
165
|
+
// Check for regional endpoint mismatch
|
|
166
|
+
if (errorMessage.includes('incorrect regional hostname')) {
|
|
167
|
+
// Extract the required region from the error message if present
|
|
168
|
+
const regionMatch = errorMessage.match(/make your request to (\w+\.api\.openai\.com)/);
|
|
169
|
+
const requiredEndpoint = regionMatch?.[1] ?? 'the correct regional endpoint';
|
|
170
|
+
throw new Error(`OpenAI API regional endpoint mismatch. Your account requires ${requiredEndpoint}. ` +
|
|
171
|
+
`Run /init again and select the matching region (US or EU) instead of Default. ` +
|
|
172
|
+
`Key: ${keyPreview}`);
|
|
173
|
+
}
|
|
174
|
+
throw new Error(`OpenAI API authentication failed (401). Key format: ${keyPreview}. ` +
|
|
175
|
+
`Verify your API key at https://platform.openai.com/api-keys. Error: ${errorMessage}`);
|
|
176
|
+
}
|
|
177
|
+
throw new Error(`OpenAI API error (${response.status}): ${errorMessage}`);
|
|
178
|
+
}
|
|
179
|
+
const data = (await response.json());
|
|
180
|
+
// Sort by index to ensure correct order
|
|
181
|
+
return data.data.sort((a, b) => a.index - b.index).map(d => d.embedding);
|
|
182
|
+
}
|
|
183
|
+
async embedSingle(text) {
|
|
184
|
+
const results = await this.embed([text]);
|
|
185
|
+
return results[0];
|
|
186
|
+
}
|
|
187
|
+
close() {
|
|
188
|
+
this.initialized = false;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Provider Types.
|
|
3
|
+
*
|
|
4
|
+
* Types for embedding providers that generate vector embeddings from text.
|
|
5
|
+
*/
|
|
6
|
+
import type { Logger } from '../lib/logger.js';
|
|
7
|
+
/**
|
|
8
|
+
* Progress callback for model loading/downloading.
|
|
9
|
+
* @param status - Current status: 'downloading', 'loading', 'ready'
|
|
10
|
+
* @param progress - Download progress 0-100 (only for 'downloading')
|
|
11
|
+
* @param message - Optional message (e.g., file being downloaded)
|
|
12
|
+
*/
|
|
13
|
+
export type ModelProgressCallback = (status: 'downloading' | 'loading' | 'ready', progress?: number, message?: string) => void;
|
|
14
|
+
/**
|
|
15
|
+
* Metadata for a single chunk, used for detailed failure logging.
|
|
16
|
+
*/
|
|
17
|
+
export interface ChunkMetadata {
|
|
18
|
+
/** File path for this chunk */
|
|
19
|
+
filepath: string;
|
|
20
|
+
/** Start line number (1-indexed) */
|
|
21
|
+
startLine: number;
|
|
22
|
+
/** End line number (1-indexed) */
|
|
23
|
+
endLine: number;
|
|
24
|
+
/** Text size in characters */
|
|
25
|
+
size: number;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Options for embedding operations.
|
|
29
|
+
*/
|
|
30
|
+
export interface EmbedOptions {
|
|
31
|
+
/** Metadata for each chunk being embedded (parallel array to texts) */
|
|
32
|
+
chunkMetadata?: ChunkMetadata[];
|
|
33
|
+
/** Logger for debug output on failures */
|
|
34
|
+
logger?: Logger;
|
|
35
|
+
/** Offset for cumulative chunk numbering in progress display */
|
|
36
|
+
chunkOffset?: number;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Embedding provider interface for generating vector embeddings.
|
|
40
|
+
*/
|
|
41
|
+
export interface EmbeddingProvider {
|
|
42
|
+
/** Number of dimensions in the embedding vectors */
|
|
43
|
+
readonly dimensions: number;
|
|
44
|
+
/**
|
|
45
|
+
* Initialize the provider (load model, etc.)
|
|
46
|
+
* Must be called before using embed() or embedSingle().
|
|
47
|
+
* @param onProgress - Optional callback for download/loading progress
|
|
48
|
+
*/
|
|
49
|
+
initialize(onProgress?: ModelProgressCallback): Promise<void>;
|
|
50
|
+
/**
|
|
51
|
+
* Generate embeddings for multiple texts.
|
|
52
|
+
* @param texts - Array of text strings to embed
|
|
53
|
+
* @param options - Optional settings for logging and metadata
|
|
54
|
+
* @returns Array of embedding vectors (one per text)
|
|
55
|
+
*/
|
|
56
|
+
embed(texts: string[], options?: EmbedOptions): Promise<number[][]>;
|
|
57
|
+
/**
|
|
58
|
+
* Generate embedding for a single text.
|
|
59
|
+
* Optimized for query embedding.
|
|
60
|
+
* @param text - Text string to embed
|
|
61
|
+
* @returns Embedding vector
|
|
62
|
+
*/
|
|
63
|
+
embedSingle(text: string): Promise<number[]>;
|
|
64
|
+
/**
|
|
65
|
+
* Close the provider and free resources.
|
|
66
|
+
*/
|
|
67
|
+
close(): void;
|
|
68
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* API key validation for cloud embedding providers.
|
|
3
|
+
*
|
|
4
|
+
* Makes a minimal test embedding call to verify the API key is valid
|
|
5
|
+
* before proceeding with indexing.
|
|
6
|
+
*/
|
|
7
|
+
import type { EmbeddingProviderType } from '../../common/types.js';
|
|
8
|
+
/**
|
|
9
|
+
* Result of API key validation.
|
|
10
|
+
*/
|
|
11
|
+
export interface ValidationResult {
|
|
12
|
+
valid: boolean;
|
|
13
|
+
error?: string;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Options for API key validation.
|
|
17
|
+
*/
|
|
18
|
+
export interface ValidateApiKeyOptions {
|
|
19
|
+
/** OpenAI base URL for regional endpoints (e.g., https://us.api.openai.com/v1) */
|
|
20
|
+
openaiBaseUrl?: string;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Validate an API key by making a minimal test embedding call.
|
|
24
|
+
*
|
|
25
|
+
* @param provider - The embedding provider type
|
|
26
|
+
* @param apiKey - The API key to validate
|
|
27
|
+
* @param options - Optional configuration (e.g., openaiBaseUrl for regional endpoints)
|
|
28
|
+
* @returns Validation result with error message if invalid
|
|
29
|
+
*/
|
|
30
|
+
export declare function validateApiKey(provider: EmbeddingProviderType, apiKey: string, options?: ValidateApiKeyOptions): Promise<ValidationResult>;
|