sweet-search 0.0.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/NOTICE +23 -0
- package/core/cli.js +51 -0
- package/core/config.js +27 -0
- package/core/embedding/embedding-cache.js +467 -0
- package/core/embedding/embedding-local-model.js +845 -0
- package/core/embedding/embedding-remote.js +492 -0
- package/core/embedding/embedding-service.js +712 -0
- package/core/embedding/embedding-telemetry.js +219 -0
- package/core/embedding/index.js +40 -0
- package/core/graph/community-detector.js +294 -0
- package/core/graph/graph-expansion.js +839 -0
- package/core/graph/graph-extractor.js +2304 -0
- package/core/graph/graph-search.js +2148 -0
- package/core/graph/hcgs-generator.js +666 -0
- package/core/graph/index.js +16 -0
- package/core/graph/leiden-algorithm.js +547 -0
- package/core/graph/relationship-resolver.js +366 -0
- package/core/graph/repo-map.js +408 -0
- package/core/graph/summary-manager.js +549 -0
- package/core/indexing/artifact-builder.js +1054 -0
- package/core/indexing/ast-chunker.js +709 -0
- package/core/indexing/chunking/chunk-builder.js +170 -0
- package/core/indexing/chunking/markdown-chunker.js +503 -0
- package/core/indexing/chunking/plaintext-chunker.js +104 -0
- package/core/indexing/dedup/dedup-phase.js +159 -0
- package/core/indexing/dedup/exemplar-selector.js +65 -0
- package/core/indexing/document-chunker.js +56 -0
- package/core/indexing/incremental-parser.js +390 -0
- package/core/indexing/incremental-tracker.js +761 -0
- package/core/indexing/index-codebase-v21.js +472 -0
- package/core/indexing/index-maintainer.mjs +1674 -0
- package/core/indexing/index.js +90 -0
- package/core/indexing/indexer-ann.js +1077 -0
- package/core/indexing/indexer-build.js +742 -0
- package/core/indexing/indexer-phases.js +800 -0
- package/core/indexing/indexer-pool.js +764 -0
- package/core/indexing/indexer-sparse-gram.js +98 -0
- package/core/indexing/indexer-utils.js +536 -0
- package/core/indexing/indexer-worker.js +148 -0
- package/core/indexing/li-skip-policy.js +225 -0
- package/core/indexing/merkle-tracker.js +244 -0
- package/core/indexing/model-pool.js +166 -0
- package/core/infrastructure/code-graph-repository.js +120 -0
- package/core/infrastructure/codebase-repository.js +131 -0
- package/core/infrastructure/config/dedup.js +54 -0
- package/core/infrastructure/config/embedding.js +298 -0
- package/core/infrastructure/config/graph.js +80 -0
- package/core/infrastructure/config/index.js +82 -0
- package/core/infrastructure/config/indexing.js +8 -0
- package/core/infrastructure/config/platform.js +254 -0
- package/core/infrastructure/config/ranking.js +221 -0
- package/core/infrastructure/config/search.js +396 -0
- package/core/infrastructure/config/translation.js +89 -0
- package/core/infrastructure/config/vector-store.js +114 -0
- package/core/infrastructure/constants.js +86 -0
- package/core/infrastructure/coreml-cascade.js +909 -0
- package/core/infrastructure/coreml-cascade.json +46 -0
- package/core/infrastructure/coreml-provider.js +81 -0
- package/core/infrastructure/db-utils.js +69 -0
- package/core/infrastructure/dedup-hashing.js +83 -0
- package/core/infrastructure/hardware-capability.js +332 -0
- package/core/infrastructure/index.js +104 -0
- package/core/infrastructure/language-patterns/maps.js +121 -0
- package/core/infrastructure/language-patterns/registry-core.js +323 -0
- package/core/infrastructure/language-patterns/registry-data-query.js +155 -0
- package/core/infrastructure/language-patterns/registry-object-oriented.js +285 -0
- package/core/infrastructure/language-patterns/registry-tooling.js +240 -0
- package/core/infrastructure/language-patterns/registry-web-style.js +143 -0
- package/core/infrastructure/language-patterns/registry.js +19 -0
- package/core/infrastructure/language-patterns.js +141 -0
- package/core/infrastructure/llm-provider.js +733 -0
- package/core/infrastructure/manifest.json +46 -0
- package/core/infrastructure/maxsim.wasm +0 -0
- package/core/infrastructure/model-fetcher.js +423 -0
- package/core/infrastructure/model-registry.js +214 -0
- package/core/infrastructure/native-inference.js +587 -0
- package/core/infrastructure/native-resolver.js +187 -0
- package/core/infrastructure/native-sparse-gram.js +257 -0
- package/core/infrastructure/native-tokenizer.js +160 -0
- package/core/infrastructure/onnx-mutex.js +45 -0
- package/core/infrastructure/onnx-session-utils.js +261 -0
- package/core/infrastructure/ort-pipeline.js +111 -0
- package/core/infrastructure/project-detector.js +102 -0
- package/core/infrastructure/quantization.js +410 -0
- package/core/infrastructure/simd-distance.js +502 -0
- package/core/infrastructure/simd-distance.wasm +0 -0
- package/core/infrastructure/tree-sitter-provider.js +665 -0
- package/core/infrastructure/webgpu-maxsim.js +222 -0
- package/core/query/index.js +35 -0
- package/core/query/intent-detector.js +201 -0
- package/core/query/intent-router.js +156 -0
- package/core/query/query-router-catboost.js +222 -0
- package/core/query/query-router-ml.js +266 -0
- package/core/query/query-router.js +213 -0
- package/core/ranking/cascaded-scorer.js +379 -0
- package/core/ranking/flashrank.js +810 -0
- package/core/ranking/index.js +49 -0
- package/core/ranking/late-interaction-index.js +2383 -0
- package/core/ranking/late-interaction-model.js +812 -0
- package/core/ranking/local-reranker.js +374 -0
- package/core/ranking/mmr.js +379 -0
- package/core/ranking/quality-scorer.js +363 -0
- package/core/search/context-expander.js +1167 -0
- package/core/search/dedup/sibling-expander.js +327 -0
- package/core/search/index.js +16 -0
- package/core/search/search-boost.js +259 -0
- package/core/search/search-cli.js +544 -0
- package/core/search/search-format.js +282 -0
- package/core/search/search-fusion.js +327 -0
- package/core/search/search-hybrid.js +204 -0
- package/core/search/search-pattern-chunks.js +337 -0
- package/core/search/search-pattern-planner.js +439 -0
- package/core/search/search-pattern-prefilter.js +412 -0
- package/core/search/search-pattern-ripgrep.js +663 -0
- package/core/search/search-pattern.js +463 -0
- package/core/search/search-postprocess.js +452 -0
- package/core/search/search-semantic.js +706 -0
- package/core/search/search-server.js +554 -0
- package/core/search/session-daemon-prewarm.mjs +164 -0
- package/core/search/session-warmup.js +595 -0
- package/core/search/sweet-search.js +632 -0
- package/core/search/warmup-metrics.js +532 -0
- package/core/start-server.js +6 -0
- package/core/training/query-router/features/extractor.js +762 -0
- package/core/training/query-router/features/multilingual-patterns.js +431 -0
- package/core/training/query-router/features/text-segmenter.js +303 -0
- package/core/training/query-router/features/unicode-utils.js +383 -0
- package/core/training/query-router/output/v45_router_d4.js +11521 -0
- package/core/training/query-router/output/v46_router_d4.js +11498 -0
- package/core/vector-store/binary-heap.js +227 -0
- package/core/vector-store/binary-hnsw-index.js +1004 -0
- package/core/vector-store/float-vector-store.js +234 -0
- package/core/vector-store/hnsw-index.js +580 -0
- package/core/vector-store/index.js +39 -0
- package/core/vector-store/seismic-index.js +498 -0
- package/core/vocabulary/index.js +84 -0
- package/core/vocabulary/vocab-constants.js +20 -0
- package/core/vocabulary/vocab-miner-extractors.js +375 -0
- package/core/vocabulary/vocab-miner-nl.js +404 -0
- package/core/vocabulary/vocab-miner-utils.js +146 -0
- package/core/vocabulary/vocab-miner.js +574 -0
- package/core/vocabulary/vocab-prewarm-cli.js +110 -0
- package/core/vocabulary/vocab-ranker.js +492 -0
- package/core/vocabulary/vocab-warmer.js +523 -0
- package/core/vocabulary/vocab-warmup-orchestrator.js +425 -0
- package/core/vocabulary/vocabulary-utils.js +704 -0
- package/crates/wasm-router/pkg/package.json +13 -0
- package/crates/wasm-router/pkg/query_router_wasm.d.ts +36 -0
- package/crates/wasm-router/pkg/query_router_wasm.js +271 -0
- package/crates/wasm-router/pkg/query_router_wasm_bg.wasm +0 -0
- package/crates/wasm-router/pkg/query_router_wasm_bg.wasm.d.ts +19 -0
- package/mcp/config-gen.js +121 -0
- package/mcp/server.js +335 -0
- package/mcp/tool-handlers.js +476 -0
- package/package.json +131 -9
- package/scripts/benchmark-harness.js +794 -0
- package/scripts/init.js +1058 -0
- package/scripts/smoke-test.js +435 -0
- package/scripts/uninstall.js +478 -0
- package/scripts/verify-runtime.js +176 -0
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Remote - Circuit breaker, compression, HTTP pooling, and API clients.
|
|
3
|
+
* Extracted from embedding-service.js for file size compliance (<500 lines).
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { gzipSync, gunzipSync, brotliDecompressSync } from 'zlib';
|
|
7
|
+
import { EMBEDDING_PROVIDERS } from '../infrastructure/config/index.js';
|
|
8
|
+
|
|
9
|
+
// =============================================================================
|
|
10
|
+
// CIRCUIT BREAKER FOR API STABILITY
|
|
11
|
+
// =============================================================================
|
|
12
|
+
|
|
13
|
+
export const circuitBreaker = {
|
|
14
|
+
failures: 0,
|
|
15
|
+
lastFailure: 0,
|
|
16
|
+
state: 'CLOSED', // CLOSED (normal), OPEN (blocking), HALF_OPEN (testing)
|
|
17
|
+
|
|
18
|
+
FAILURE_THRESHOLD: 5,
|
|
19
|
+
COOLDOWN_MS: 60000,
|
|
20
|
+
SUCCESS_TO_CLOSE: 2,
|
|
21
|
+
successCount: 0,
|
|
22
|
+
|
|
23
|
+
/** Check if request is allowed through the circuit */
|
|
24
|
+
canRequest() {
|
|
25
|
+
const now = Date.now();
|
|
26
|
+
if (this.state === 'CLOSED') return { allowed: true };
|
|
27
|
+
|
|
28
|
+
if (this.state === 'OPEN') {
|
|
29
|
+
if (now - this.lastFailure > this.COOLDOWN_MS) {
|
|
30
|
+
this.state = 'HALF_OPEN';
|
|
31
|
+
this.successCount = 0;
|
|
32
|
+
console.log('[embedding-service] Circuit breaker entering HALF_OPEN state');
|
|
33
|
+
return { allowed: true };
|
|
34
|
+
}
|
|
35
|
+
return { allowed: false, reason: `Circuit OPEN - retry in ${Math.ceil((this.COOLDOWN_MS - (now - this.lastFailure)) / 1000)}s` };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return { allowed: true };
|
|
39
|
+
},
|
|
40
|
+
|
|
41
|
+
recordSuccess() {
|
|
42
|
+
if (this.state === 'HALF_OPEN') {
|
|
43
|
+
this.successCount++;
|
|
44
|
+
if (this.successCount >= this.SUCCESS_TO_CLOSE) {
|
|
45
|
+
this.state = 'CLOSED';
|
|
46
|
+
this.failures = 0;
|
|
47
|
+
console.log('[embedding-service] Circuit breaker CLOSED - API recovered');
|
|
48
|
+
}
|
|
49
|
+
} else {
|
|
50
|
+
this.failures = 0;
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
|
|
54
|
+
recordFailure() {
|
|
55
|
+
this.failures++;
|
|
56
|
+
this.lastFailure = Date.now();
|
|
57
|
+
|
|
58
|
+
if (this.state === 'HALF_OPEN') {
|
|
59
|
+
this.state = 'OPEN';
|
|
60
|
+
console.log('[embedding-service] Circuit breaker re-OPENED - recovery failed');
|
|
61
|
+
} else if (this.failures >= this.FAILURE_THRESHOLD) {
|
|
62
|
+
this.state = 'OPEN';
|
|
63
|
+
console.error(`[embedding-service] Circuit breaker OPENED after ${this.failures} consecutive failures`);
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
|
|
67
|
+
getState() {
|
|
68
|
+
return {
|
|
69
|
+
state: this.state,
|
|
70
|
+
failures: this.failures,
|
|
71
|
+
lastFailure: this.lastFailure,
|
|
72
|
+
cooldownRemaining: this.state === 'OPEN'
|
|
73
|
+
? Math.max(0, this.COOLDOWN_MS - (Date.now() - this.lastFailure))
|
|
74
|
+
: 0
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
// =============================================================================
|
|
80
|
+
// V2b: REQUEST/RESPONSE COMPRESSION
|
|
81
|
+
// =============================================================================
|
|
82
|
+
|
|
83
|
+
export const _providerCompressionSupport = new Map();
|
|
84
|
+
|
|
85
|
+
export function providerSupportsRequestCompression(provider) {
|
|
86
|
+
return _providerCompressionSupport.get(provider) !== false;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export function markProviderNoCompression(provider) {
|
|
90
|
+
_providerCompressionSupport.set(provider, false);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Quick check if data looks like JSON (starts with '{' or '[' after whitespace).
|
|
95
|
+
*/
|
|
96
|
+
export function looksLikeJson(data) {
|
|
97
|
+
const u8 = new Uint8Array(
|
|
98
|
+
data instanceof Buffer
|
|
99
|
+
? data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength)
|
|
100
|
+
: data
|
|
101
|
+
);
|
|
102
|
+
if (u8.length === 0) return false;
|
|
103
|
+
for (let i = 0; i < u8.length; i++) {
|
|
104
|
+
const c = u8[i];
|
|
105
|
+
if (c === 0x20 || c === 0x0A || c === 0x0D || c === 0x09) continue;
|
|
106
|
+
return c === 0x7B || c === 0x5B; // '{' or '['
|
|
107
|
+
}
|
|
108
|
+
return false;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Parse a potentially compressed API response.
|
|
113
|
+
*/
|
|
114
|
+
export async function parseCompressedResponse({ body, statusCode, headers: resHeaders }) {
|
|
115
|
+
if (statusCode !== 200) {
|
|
116
|
+
const error = await body.text();
|
|
117
|
+
throw new Error(`API error: ${statusCode} - ${error}`);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const encoding = resHeaders?.['content-encoding'];
|
|
121
|
+
let responseData = await body.arrayBuffer();
|
|
122
|
+
|
|
123
|
+
if (encoding && !looksLikeJson(responseData)) {
|
|
124
|
+
try {
|
|
125
|
+
if (encoding === 'gzip') {
|
|
126
|
+
responseData = gunzipSync(Buffer.from(responseData));
|
|
127
|
+
} else if (encoding === 'br') {
|
|
128
|
+
responseData = brotliDecompressSync(Buffer.from(responseData));
|
|
129
|
+
}
|
|
130
|
+
} catch {
|
|
131
|
+
// Decompression failed - data was likely already decompressed by undici
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const text = typeof responseData === 'string'
|
|
136
|
+
? responseData
|
|
137
|
+
: Buffer.isBuffer(responseData)
|
|
138
|
+
? responseData.toString('utf8')
|
|
139
|
+
: new TextDecoder().decode(responseData);
|
|
140
|
+
return JSON.parse(text);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* V2b: Make an API request with optional gzip request compression.
|
|
145
|
+
*/
|
|
146
|
+
export async function compressedApiRequest(pool, provider, apiPath, requestBody, apiKey) {
|
|
147
|
+
const jsonBody = JSON.stringify(requestBody);
|
|
148
|
+
|
|
149
|
+
const headers = {
|
|
150
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
151
|
+
'Content-Type': 'application/json',
|
|
152
|
+
'Accept-Encoding': 'gzip, br',
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
let body = jsonBody;
|
|
156
|
+
if (providerSupportsRequestCompression(provider)) {
|
|
157
|
+
try {
|
|
158
|
+
const compressed = gzipSync(Buffer.from(jsonBody));
|
|
159
|
+
if (compressed.length < jsonBody.length * 0.9) {
|
|
160
|
+
headers['Content-Encoding'] = 'gzip';
|
|
161
|
+
body = compressed;
|
|
162
|
+
}
|
|
163
|
+
} catch {
|
|
164
|
+
// gzip failed - send uncompressed
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const response = await pool.request({
|
|
169
|
+
path: apiPath,
|
|
170
|
+
method: 'POST',
|
|
171
|
+
headers,
|
|
172
|
+
body,
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
if ((response.statusCode === 415 || response.statusCode === 400) && headers['Content-Encoding']) {
|
|
176
|
+
try { await response.body.text(); } catch { /* best-effort drain */ }
|
|
177
|
+
|
|
178
|
+
markProviderNoCompression(provider);
|
|
179
|
+
delete headers['Content-Encoding'];
|
|
180
|
+
const retryResponse = await pool.request({
|
|
181
|
+
path: apiPath,
|
|
182
|
+
method: 'POST',
|
|
183
|
+
headers,
|
|
184
|
+
body: jsonBody,
|
|
185
|
+
});
|
|
186
|
+
return parseCompressedResponse(retryResponse);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
return parseCompressedResponse(response);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// =============================================================================
|
|
193
|
+
// RATE LIMITER
|
|
194
|
+
// =============================================================================
|
|
195
|
+
|
|
196
|
+
export class RateLimiter {
|
|
197
|
+
constructor(requestsPerMinute, tokensPerMinute = Infinity) {
|
|
198
|
+
this.requestsPerMinute = requestsPerMinute;
|
|
199
|
+
this.tokensPerMinute = tokensPerMinute;
|
|
200
|
+
this.requestTimestamps = [];
|
|
201
|
+
this.tokenTimestamps = [];
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
async waitForSlot(tokenCount = 0) {
|
|
205
|
+
const now = Date.now();
|
|
206
|
+
const oneMinuteAgo = now - 60000;
|
|
207
|
+
|
|
208
|
+
this.requestTimestamps = this.requestTimestamps.filter(t => t > oneMinuteAgo);
|
|
209
|
+
this.tokenTimestamps = this.tokenTimestamps.filter(t => t.time > oneMinuteAgo);
|
|
210
|
+
|
|
211
|
+
if (this.requestTimestamps.length >= this.requestsPerMinute) {
|
|
212
|
+
const oldestRequest = this.requestTimestamps[0];
|
|
213
|
+
const waitTime = oldestRequest + 60000 - now;
|
|
214
|
+
if (waitTime > 0) {
|
|
215
|
+
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const currentTokens = this.tokenTimestamps.reduce((sum, t) => sum + t.tokens, 0);
|
|
220
|
+
if (currentTokens + tokenCount > this.tokensPerMinute) {
|
|
221
|
+
const waitTime = 60000 - (now - this.tokenTimestamps[0]?.time || 0);
|
|
222
|
+
if (waitTime > 0) {
|
|
223
|
+
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
this.requestTimestamps.push(Date.now());
|
|
228
|
+
if (tokenCount > 0) {
|
|
229
|
+
this.tokenTimestamps.push({ time: Date.now(), tokens: tokenCount });
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// =============================================================================
|
|
235
|
+
// TIME-WINDOW RATE LIMITER (V2: concurrent-safe for Promise.all)
|
|
236
|
+
// =============================================================================
|
|
237
|
+
|
|
238
|
+
export class TimeWindowRateLimiter {
|
|
239
|
+
constructor(maxRPM, options = {}) {
|
|
240
|
+
this.windowMs = 60_000;
|
|
241
|
+
this.maxInWindow = maxRPM;
|
|
242
|
+
this.timestamps = [];
|
|
243
|
+
|
|
244
|
+
this.secondWindowMs = 1_000;
|
|
245
|
+
this.maxPerSecond = options.maxPerSecond ?? (Math.floor(maxRPM / 60) + 1);
|
|
246
|
+
this.secondTimestamps = [];
|
|
247
|
+
|
|
248
|
+
this._mutex = Promise.resolve();
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
async acquire() {
|
|
252
|
+
const prev = this._mutex;
|
|
253
|
+
let releaseMutex;
|
|
254
|
+
this._mutex = new Promise(resolve => { releaseMutex = resolve; });
|
|
255
|
+
await prev;
|
|
256
|
+
|
|
257
|
+
try {
|
|
258
|
+
while (this._atMinuteCapacity() || this._atSecondCapacity()) {
|
|
259
|
+
const waitMs = this._nextWaitMs();
|
|
260
|
+
await new Promise(r => setTimeout(r, waitMs));
|
|
261
|
+
this._pruneWindows();
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
const now = Date.now();
|
|
265
|
+
this.timestamps.push(now);
|
|
266
|
+
this.secondTimestamps.push(now);
|
|
267
|
+
} finally {
|
|
268
|
+
releaseMutex();
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
_atMinuteCapacity() {
|
|
273
|
+
this._pruneWindows();
|
|
274
|
+
return this.timestamps.length >= this.maxInWindow;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
_atSecondCapacity() {
|
|
278
|
+
this._pruneWindows();
|
|
279
|
+
return this.secondTimestamps.length >= this.maxPerSecond;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
_nextWaitMs() {
|
|
283
|
+
const now = Date.now();
|
|
284
|
+
const minuteWait = this.timestamps.length > 0
|
|
285
|
+
? Math.max(1, this.windowMs - (now - this.timestamps[0]))
|
|
286
|
+
: 1;
|
|
287
|
+
const secondWait = this.secondTimestamps.length > 0
|
|
288
|
+
? Math.max(1, this.secondWindowMs - (now - this.secondTimestamps[0]))
|
|
289
|
+
: 1;
|
|
290
|
+
return Math.min(minuteWait, secondWait);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
_pruneWindows() {
|
|
294
|
+
const now = Date.now();
|
|
295
|
+
const minuteCutoff = now - this.windowMs;
|
|
296
|
+
const secondCutoff = now - this.secondWindowMs;
|
|
297
|
+
while (this.timestamps.length > 0 && this.timestamps[0] < minuteCutoff) {
|
|
298
|
+
this.timestamps.shift();
|
|
299
|
+
}
|
|
300
|
+
while (this.secondTimestamps.length > 0 && this.secondTimestamps[0] < secondCutoff) {
|
|
301
|
+
this.secondTimestamps.shift();
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// =============================================================================
|
|
307
|
+
// REMOTE API CLIENTS (with HTTP/2 and connection pooling)
|
|
308
|
+
// =============================================================================
|
|
309
|
+
|
|
310
|
+
let undiciPool = null;
|
|
311
|
+
|
|
312
|
+
export async function getUndiciPool() {
|
|
313
|
+
if (undiciPool) return undiciPool;
|
|
314
|
+
|
|
315
|
+
try {
|
|
316
|
+
const { Pool } = await import('undici');
|
|
317
|
+
undiciPool = new Pool('https://api.voyageai.com', {
|
|
318
|
+
connections: 10,
|
|
319
|
+
pipelining: 1,
|
|
320
|
+
keepAliveTimeout: 30000,
|
|
321
|
+
keepAliveMaxTimeout: 60000,
|
|
322
|
+
});
|
|
323
|
+
return undiciPool;
|
|
324
|
+
} catch {
|
|
325
|
+
return null;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
let httpsAgent = null;
|
|
330
|
+
|
|
331
|
+
export async function getHttpsAgent() {
|
|
332
|
+
if (httpsAgent) return httpsAgent;
|
|
333
|
+
|
|
334
|
+
try {
|
|
335
|
+
const https = await import('https');
|
|
336
|
+
httpsAgent = new https.Agent({
|
|
337
|
+
keepAlive: true,
|
|
338
|
+
maxSockets: 10,
|
|
339
|
+
maxFreeSockets: 5,
|
|
340
|
+
timeout: 30000,
|
|
341
|
+
freeSocketTimeout: 15000,
|
|
342
|
+
});
|
|
343
|
+
return httpsAgent;
|
|
344
|
+
} catch {
|
|
345
|
+
return undefined;
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
export async function callVoyageAPI(texts, config, options = {}) {
|
|
350
|
+
const {
|
|
351
|
+
inputType = 'document',
|
|
352
|
+
outputDtype = 'float',
|
|
353
|
+
outputDimension = config.dimensions.full,
|
|
354
|
+
} = options;
|
|
355
|
+
|
|
356
|
+
const requestBody = {
|
|
357
|
+
model: config.model,
|
|
358
|
+
input: texts,
|
|
359
|
+
input_type: inputType,
|
|
360
|
+
output_dimension: outputDimension,
|
|
361
|
+
};
|
|
362
|
+
|
|
363
|
+
if (outputDtype !== 'float') {
|
|
364
|
+
requestBody.output_dtype = outputDtype;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
const pool = await getUndiciPool();
|
|
368
|
+
if (pool) {
|
|
369
|
+
try {
|
|
370
|
+
const data = await compressedApiRequest(pool, 'voyage', '/v1/embeddings', requestBody, config.apiKey);
|
|
371
|
+
return data.data.map(d => d.embedding);
|
|
372
|
+
} catch (err) {
|
|
373
|
+
if (!err.message.includes('API error')) {
|
|
374
|
+
console.warn('[HTTP/2] Falling back to fetch:', err.message);
|
|
375
|
+
} else {
|
|
376
|
+
throw err;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
const fetchOptions = {
|
|
382
|
+
method: 'POST',
|
|
383
|
+
headers: {
|
|
384
|
+
'Authorization': `Bearer ${config.apiKey}`,
|
|
385
|
+
'Content-Type': 'application/json',
|
|
386
|
+
'Connection': 'keep-alive',
|
|
387
|
+
'Accept-Encoding': 'gzip, br',
|
|
388
|
+
},
|
|
389
|
+
body: JSON.stringify(requestBody),
|
|
390
|
+
};
|
|
391
|
+
|
|
392
|
+
const response = await fetch(config.endpoint, fetchOptions);
|
|
393
|
+
|
|
394
|
+
if (!response.ok) {
|
|
395
|
+
const error = await response.text();
|
|
396
|
+
throw new Error(`Voyage API error: ${response.status} - ${error}`);
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
const data = await response.json();
|
|
400
|
+
return data.data.map(d => d.embedding);
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
export async function callMistralAPI(texts, config, options = {}) {
|
|
404
|
+
const { outputDimension } = options;
|
|
405
|
+
|
|
406
|
+
const requestBody = {
|
|
407
|
+
model: config.model,
|
|
408
|
+
input: texts,
|
|
409
|
+
};
|
|
410
|
+
|
|
411
|
+
if (outputDimension) {
|
|
412
|
+
requestBody.dimensions = outputDimension;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
const response = await fetch(config.endpoint, {
|
|
416
|
+
method: 'POST',
|
|
417
|
+
headers: {
|
|
418
|
+
'Authorization': `Bearer ${config.apiKey}`,
|
|
419
|
+
'Content-Type': 'application/json',
|
|
420
|
+
'Accept-Encoding': 'gzip, br',
|
|
421
|
+
},
|
|
422
|
+
body: JSON.stringify(requestBody),
|
|
423
|
+
});
|
|
424
|
+
|
|
425
|
+
if (!response.ok) {
|
|
426
|
+
const error = await response.text();
|
|
427
|
+
throw new Error(`Mistral API error: ${response.status} - ${error}`);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
const data = await response.json();
|
|
431
|
+
return data.data.map(d => d.embedding);
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
export async function callJinaAPI(texts, config, options = {}) {
|
|
435
|
+
const {
|
|
436
|
+
task = 'retrieval.passage',
|
|
437
|
+
outputDimension,
|
|
438
|
+
} = options;
|
|
439
|
+
|
|
440
|
+
const requestBody = {
|
|
441
|
+
model: config.model,
|
|
442
|
+
input: texts,
|
|
443
|
+
task,
|
|
444
|
+
};
|
|
445
|
+
|
|
446
|
+
if (outputDimension) {
|
|
447
|
+
requestBody.dimensions = outputDimension;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
const response = await fetch(config.endpoint, {
|
|
451
|
+
method: 'POST',
|
|
452
|
+
headers: {
|
|
453
|
+
'Authorization': `Bearer ${config.apiKey}`,
|
|
454
|
+
'Content-Type': 'application/json',
|
|
455
|
+
'Accept-Encoding': 'gzip, br',
|
|
456
|
+
},
|
|
457
|
+
body: JSON.stringify(requestBody),
|
|
458
|
+
});
|
|
459
|
+
|
|
460
|
+
if (!response.ok) {
|
|
461
|
+
const error = await response.text();
|
|
462
|
+
throw new Error(`Jina API error: ${response.status} - ${error}`);
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
const data = await response.json();
|
|
466
|
+
return data.data.map(d => d.embedding);
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// =============================================================================
|
|
470
|
+
// RATE LIMITER SINGLETONS
|
|
471
|
+
// =============================================================================
|
|
472
|
+
|
|
473
|
+
export const rateLimiters = {
|
|
474
|
+
voyage: new RateLimiter(
|
|
475
|
+
EMBEDDING_PROVIDERS.voyage.rateLimit?.requestsPerMinute || 300,
|
|
476
|
+
EMBEDDING_PROVIDERS.voyage.rateLimit?.tokensPerMinute || 1000000
|
|
477
|
+
),
|
|
478
|
+
mistral: new RateLimiter(EMBEDDING_PROVIDERS.mistral.rateLimit?.requestsPerMinute || 100),
|
|
479
|
+
jina: new RateLimiter(EMBEDDING_PROVIDERS.jina.rateLimit?.requestsPerMinute || 500),
|
|
480
|
+
};
|
|
481
|
+
|
|
482
|
+
export const timeWindowLimiters = {
|
|
483
|
+
voyage: new TimeWindowRateLimiter(
|
|
484
|
+
EMBEDDING_PROVIDERS.voyage.rateLimit?.requestsPerMinute || 300
|
|
485
|
+
),
|
|
486
|
+
mistral: new TimeWindowRateLimiter(
|
|
487
|
+
EMBEDDING_PROVIDERS.mistral.rateLimit?.requestsPerMinute || 100
|
|
488
|
+
),
|
|
489
|
+
jina: new TimeWindowRateLimiter(
|
|
490
|
+
EMBEDDING_PROVIDERS.jina.rateLimit?.requestsPerMinute || 500
|
|
491
|
+
),
|
|
492
|
+
};
|