semantic-code-mcp 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +259 -0
- package/config.json +85 -0
- package/features/check-last-version.js +504 -0
- package/features/clear-cache.js +75 -0
- package/features/get-status.js +210 -0
- package/features/hybrid-search.js +189 -0
- package/features/index-codebase.js +999 -0
- package/features/set-workspace.js +183 -0
- package/index.js +297 -0
- package/lib/ast-chunker.js +273 -0
- package/lib/cache-factory.js +13 -0
- package/lib/cache.js +157 -0
- package/lib/config.js +1296 -0
- package/lib/embedding-worker.js +155 -0
- package/lib/gemini-embedder.js +351 -0
- package/lib/ignore-patterns.js +896 -0
- package/lib/milvus-cache.js +478 -0
- package/lib/mrl-embedder.js +235 -0
- package/lib/project-detector.js +75 -0
- package/lib/resource-throttle.js +85 -0
- package/lib/sqlite-cache.js +468 -0
- package/lib/tokenizer.js +149 -0
- package/lib/utils.js +214 -0
- package/package.json +70 -0
- package/reindex.js +109 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import { parentPort, workerData } from "worker_threads";
|
|
2
|
+
import { pipeline, layer_norm } from "@huggingface/transformers";
|
|
3
|
+
import { existsSync, rmSync } from 'fs';
|
|
4
|
+
import { join, dirname } from 'path';
|
|
5
|
+
import { fileURLToPath } from 'url';
|
|
6
|
+
import { createGeminiEmbedder } from './gemini-embedder.js';
|
|
7
|
+
|
|
8
|
+
let embedder = null;
|
|
9
|
+
const VALID_DIMENSIONS = [64, 128, 256, 512, 768];
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Clear the HuggingFace transformers cache for a specific model
|
|
13
|
+
* Used for auto-recovery from corrupted model files
|
|
14
|
+
*/
|
|
15
|
+
function clearModelCache(modelName) {
|
|
16
|
+
try {
|
|
17
|
+
const transformersPath = dirname(fileURLToPath(import.meta.resolve('@huggingface/transformers')));
|
|
18
|
+
const cacheDir = join(transformersPath, '.cache', ...modelName.split('/'));
|
|
19
|
+
if (existsSync(cacheDir)) {
|
|
20
|
+
console.error(`[Worker] Clearing corrupted cache: ${cacheDir}`);
|
|
21
|
+
rmSync(cacheDir, { recursive: true, force: true });
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
24
|
+
} catch (e) {
|
|
25
|
+
console.error(`[Worker] Failed to clear cache: ${e.message}`);
|
|
26
|
+
}
|
|
27
|
+
return false;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Initialize the embedding model once when worker starts
|
|
31
|
+
async function initializeEmbedder() {
|
|
32
|
+
if (!embedder) {
|
|
33
|
+
const provider = (workerData.embeddingProvider || 'local').toLowerCase();
|
|
34
|
+
if (['gemini', 'openai', 'openai-compatible', 'vertex'].includes(provider)) {
|
|
35
|
+
embedder = await createGeminiEmbedder({
|
|
36
|
+
embeddingProvider: provider,
|
|
37
|
+
embeddingModel: workerData.embeddingModel,
|
|
38
|
+
embeddingDimension: workerData.embeddingDimension,
|
|
39
|
+
geminiApiKey: workerData.geminiApiKey,
|
|
40
|
+
geminiModel: workerData.geminiModel,
|
|
41
|
+
geminiBaseURL: workerData.geminiBaseURL,
|
|
42
|
+
embeddingApiKey: workerData.embeddingApiKey,
|
|
43
|
+
embeddingBaseURL: workerData.embeddingBaseURL,
|
|
44
|
+
openaiApiKey: workerData.openaiApiKey,
|
|
45
|
+
vertexProject: workerData.vertexProject,
|
|
46
|
+
vertexLocation: workerData.vertexLocation,
|
|
47
|
+
googleApplicationCredentials: workerData.googleApplicationCredentials,
|
|
48
|
+
geminiDimensions: workerData.geminiDimensions,
|
|
49
|
+
geminiBatchSize: workerData.geminiBatchSize,
|
|
50
|
+
geminiBatchFlushMs: workerData.geminiBatchFlushMs,
|
|
51
|
+
geminiMaxRetries: workerData.geminiMaxRetries,
|
|
52
|
+
verbose: workerData.verbose
|
|
53
|
+
});
|
|
54
|
+
return embedder;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const modelName = workerData.embeddingModel || 'nomic-ai/nomic-embed-text-v1.5';
|
|
58
|
+
const dimension = workerData.embeddingDimension || 256;
|
|
59
|
+
const targetDim = VALID_DIMENSIONS.includes(dimension) ? dimension : 256;
|
|
60
|
+
const isNomic = modelName.includes('nomic');
|
|
61
|
+
|
|
62
|
+
// Load model with auto-recovery for corrupted files
|
|
63
|
+
let extractor;
|
|
64
|
+
try {
|
|
65
|
+
extractor = await pipeline("feature-extraction", modelName);
|
|
66
|
+
} catch (err) {
|
|
67
|
+
if (err.message && err.message.includes('Protobuf parsing failed')) {
|
|
68
|
+
console.error(`[Worker] Corrupted model detected, attempting auto-recovery...`);
|
|
69
|
+
if (clearModelCache(modelName)) {
|
|
70
|
+
extractor = await pipeline("feature-extraction", modelName);
|
|
71
|
+
} else {
|
|
72
|
+
throw err;
|
|
73
|
+
}
|
|
74
|
+
} else {
|
|
75
|
+
throw err;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (isNomic) {
|
|
80
|
+
// MRL embedder with dimension slicing
|
|
81
|
+
embedder = async function(text) {
|
|
82
|
+
let embeddings = await extractor(text, { pooling: 'mean' });
|
|
83
|
+
embeddings = layer_norm(embeddings, [embeddings.dims[1]])
|
|
84
|
+
.slice(null, [0, targetDim])
|
|
85
|
+
.normalize(2, -1);
|
|
86
|
+
return { data: embeddings.data };
|
|
87
|
+
};
|
|
88
|
+
embedder.dimension = targetDim;
|
|
89
|
+
} else {
|
|
90
|
+
// Legacy embedder (MiniLM etc.)
|
|
91
|
+
embedder = async function(text) {
|
|
92
|
+
return await extractor(text, { pooling: 'mean', normalize: true });
|
|
93
|
+
};
|
|
94
|
+
embedder.dimension = 384;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
embedder.modelName = modelName;
|
|
98
|
+
}
|
|
99
|
+
return embedder;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Process chunks with optimized single-text embedding
|
|
104
|
+
* Note: Batch processing with transformers.js WASM backend doesn't improve speed
|
|
105
|
+
* because it loops internally. Single calls are actually faster.
|
|
106
|
+
*/
|
|
107
|
+
async function processChunks(chunks) {
|
|
108
|
+
const embedder = await initializeEmbedder();
|
|
109
|
+
const results = [];
|
|
110
|
+
|
|
111
|
+
for (const chunk of chunks) {
|
|
112
|
+
try {
|
|
113
|
+
const output = await embedder(chunk.text, { pooling: "mean", normalize: true });
|
|
114
|
+
results.push({
|
|
115
|
+
file: chunk.file,
|
|
116
|
+
startLine: chunk.startLine,
|
|
117
|
+
endLine: chunk.endLine,
|
|
118
|
+
content: chunk.text,
|
|
119
|
+
vector: Array.from(output.data),
|
|
120
|
+
success: true
|
|
121
|
+
});
|
|
122
|
+
} catch (error) {
|
|
123
|
+
results.push({
|
|
124
|
+
file: chunk.file,
|
|
125
|
+
startLine: chunk.startLine,
|
|
126
|
+
endLine: chunk.endLine,
|
|
127
|
+
error: error.message,
|
|
128
|
+
success: false
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return results;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Listen for messages from main thread
|
|
137
|
+
parentPort.on("message", async (message) => {
|
|
138
|
+
if (message.type === "process") {
|
|
139
|
+
try {
|
|
140
|
+
const results = await processChunks(message.chunks);
|
|
141
|
+
parentPort.postMessage({ type: "results", results, batchId: message.batchId });
|
|
142
|
+
} catch (error) {
|
|
143
|
+
parentPort.postMessage({ type: "error", error: error.message, batchId: message.batchId });
|
|
144
|
+
}
|
|
145
|
+
} else if (message.type === "shutdown") {
|
|
146
|
+
process.exit(0);
|
|
147
|
+
}
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
// Signal that worker is ready
|
|
151
|
+
initializeEmbedder().then(() => {
|
|
152
|
+
parentPort.postMessage({ type: "ready" });
|
|
153
|
+
}).catch((error) => {
|
|
154
|
+
parentPort.postMessage({ type: "error", error: error.message });
|
|
155
|
+
});
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* API embedder used by gemini/openai/openai-compatible/vertex providers.
|
|
3
|
+
*
|
|
4
|
+
* Contract:
|
|
5
|
+
* async embed(text) -> { data: Float32Array, dims: [1, n] }
|
|
6
|
+
* Metadata:
|
|
7
|
+
* embed.modelName, embed.dimension, embed.device
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { GoogleAuth } from "google-auth-library";
|
|
11
|
+
|
|
12
|
+
const DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai";
|
|
13
|
+
const DEFAULT_GEMINI_MODEL = "gemini-embedding-001";
|
|
14
|
+
const DEFAULT_DIMENSIONS = 768;
|
|
15
|
+
const DEFAULT_BATCH_SIZE = 24;
|
|
16
|
+
const DEFAULT_BATCH_FLUSH_MS = 12;
|
|
17
|
+
const DEFAULT_MAX_RETRIES = 3;
|
|
18
|
+
const DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
|
|
19
|
+
const DEFAULT_OPENAI_MODEL = "text-embedding-3-small";
|
|
20
|
+
|
|
21
|
+
function normalizeBaseUrl(baseUrl, fallback = DEFAULT_GEMINI_BASE_URL) {
|
|
22
|
+
return (baseUrl || fallback).trim().replace(/\/+$/, "");
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function clampInt(value, fallback, min, max) {
|
|
26
|
+
const parsed = Number.parseInt(String(value), 10);
|
|
27
|
+
if (Number.isNaN(parsed)) return fallback;
|
|
28
|
+
if (parsed < min) return min;
|
|
29
|
+
if (parsed > max) return max;
|
|
30
|
+
return parsed;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function sleep(ms) {
|
|
34
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function cleanErrorText(text) {
|
|
38
|
+
if (!text) return "";
|
|
39
|
+
return text.replace(/\s+/g, " ").trim().slice(0, 400);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function getRetryDelayMs(attempt) {
|
|
43
|
+
// attempt starts at 1
|
|
44
|
+
return Math.min(2000, 150 * (2 ** (attempt - 1)));
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
async function createVertexTokenProvider(options = {}) {
|
|
48
|
+
const credentialsPath =
|
|
49
|
+
options.googleApplicationCredentials ||
|
|
50
|
+
process.env.GOOGLE_APPLICATION_CREDENTIALS;
|
|
51
|
+
const authOptions = {
|
|
52
|
+
scopes: ["https://www.googleapis.com/auth/cloud-platform"]
|
|
53
|
+
};
|
|
54
|
+
if (credentialsPath && String(credentialsPath).trim()) {
|
|
55
|
+
authOptions.keyFilename = String(credentialsPath).trim();
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const auth = new GoogleAuth(authOptions);
|
|
59
|
+
const client = await auth.getClient();
|
|
60
|
+
return async function getAccessToken() {
|
|
61
|
+
const tokenResponse = await client.getAccessToken();
|
|
62
|
+
const token = tokenResponse?.token || tokenResponse;
|
|
63
|
+
if (!token || !String(token).trim()) {
|
|
64
|
+
throw new Error("[Vertex] Failed to obtain access token from Google credentials.");
|
|
65
|
+
}
|
|
66
|
+
return String(token).trim();
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Create an API embedder with micro-batching and retry.
|
|
72
|
+
*/
|
|
73
|
+
export async function createGeminiEmbedder(options = {}) {
|
|
74
|
+
const provider = (options.embeddingProvider || "gemini").toLowerCase();
|
|
75
|
+
|
|
76
|
+
const batchSize = clampInt(options.geminiBatchSize ?? DEFAULT_BATCH_SIZE, DEFAULT_BATCH_SIZE, 1, 128);
|
|
77
|
+
const batchFlushMs = clampInt(
|
|
78
|
+
options.geminiBatchFlushMs ?? DEFAULT_BATCH_FLUSH_MS,
|
|
79
|
+
DEFAULT_BATCH_FLUSH_MS,
|
|
80
|
+
0,
|
|
81
|
+
1000
|
|
82
|
+
);
|
|
83
|
+
const maxRetries = clampInt(
|
|
84
|
+
options.geminiMaxRetries ?? DEFAULT_MAX_RETRIES,
|
|
85
|
+
DEFAULT_MAX_RETRIES,
|
|
86
|
+
0,
|
|
87
|
+
10
|
|
88
|
+
);
|
|
89
|
+
const verbose = options.verbose === true;
|
|
90
|
+
const dimension = clampInt(
|
|
91
|
+
options.geminiDimensions ?? options.embeddingDimension ?? DEFAULT_DIMENSIONS,
|
|
92
|
+
DEFAULT_DIMENSIONS,
|
|
93
|
+
1,
|
|
94
|
+
3072
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
let defaultModel = DEFAULT_GEMINI_MODEL;
|
|
98
|
+
let endpoint = "";
|
|
99
|
+
let logPrefix = "Gemini";
|
|
100
|
+
let staticToken = "";
|
|
101
|
+
let getAuthToken = async () => staticToken;
|
|
102
|
+
|
|
103
|
+
if (provider === "openai") {
|
|
104
|
+
logPrefix = "OpenAI";
|
|
105
|
+
defaultModel = DEFAULT_OPENAI_MODEL;
|
|
106
|
+
staticToken =
|
|
107
|
+
options.openaiApiKey ||
|
|
108
|
+
options.apiKey ||
|
|
109
|
+
process.env.OPENAI_API_KEY ||
|
|
110
|
+
"";
|
|
111
|
+
endpoint = `${normalizeBaseUrl(DEFAULT_OPENAI_BASE_URL, DEFAULT_OPENAI_BASE_URL)}/embeddings`;
|
|
112
|
+
} else if (provider === "openai-compatible") {
|
|
113
|
+
logPrefix = "OpenAI-compatible";
|
|
114
|
+
defaultModel = DEFAULT_OPENAI_MODEL;
|
|
115
|
+
staticToken =
|
|
116
|
+
options.embeddingApiKey ||
|
|
117
|
+
options.apiKey ||
|
|
118
|
+
process.env.SMART_CODING_EMBEDDING_API_KEY ||
|
|
119
|
+
process.env.EMBEDDING_API_KEY ||
|
|
120
|
+
"";
|
|
121
|
+
const baseUrl =
|
|
122
|
+
options.embeddingBaseURL ||
|
|
123
|
+
process.env.SMART_CODING_EMBEDDING_BASE_URL ||
|
|
124
|
+
process.env.EMBEDDING_BASE_URL;
|
|
125
|
+
if (!baseUrl || !String(baseUrl).trim()) {
|
|
126
|
+
throw new Error("[OpenAI-compatible] Missing base URL. Set SMART_CODING_EMBEDDING_BASE_URL.");
|
|
127
|
+
}
|
|
128
|
+
endpoint = `${normalizeBaseUrl(baseUrl, DEFAULT_OPENAI_BASE_URL)}/embeddings`;
|
|
129
|
+
} else if (provider === "vertex") {
|
|
130
|
+
logPrefix = "Vertex";
|
|
131
|
+
defaultModel = DEFAULT_GEMINI_MODEL;
|
|
132
|
+
} else {
|
|
133
|
+
logPrefix = "Gemini";
|
|
134
|
+
defaultModel = DEFAULT_GEMINI_MODEL;
|
|
135
|
+
staticToken =
|
|
136
|
+
options.geminiApiKey ||
|
|
137
|
+
options.apiKey ||
|
|
138
|
+
process.env.SMART_CODING_GEMINI_API_KEY ||
|
|
139
|
+
process.env.GEMINI_API_KEY ||
|
|
140
|
+
"";
|
|
141
|
+
const baseUrl =
|
|
142
|
+
options.geminiBaseURL ||
|
|
143
|
+
process.env.SMART_CODING_GEMINI_BASE_URL ||
|
|
144
|
+
process.env.GEMINI_BASE_URL ||
|
|
145
|
+
DEFAULT_GEMINI_BASE_URL;
|
|
146
|
+
endpoint = `${normalizeBaseUrl(baseUrl, DEFAULT_GEMINI_BASE_URL)}/embeddings`;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const isApiProvider = ["gemini", "openai", "openai-compatible", "vertex"].includes(provider);
|
|
150
|
+
const configuredEmbeddingModel =
|
|
151
|
+
typeof options.embeddingModel === "string" ? options.embeddingModel.trim() : "";
|
|
152
|
+
const shouldIgnoreLocalDefaultModel =
|
|
153
|
+
isApiProvider &&
|
|
154
|
+
(!configuredEmbeddingModel || configuredEmbeddingModel === "nomic-ai/nomic-embed-text-v1.5");
|
|
155
|
+
const modelName = shouldIgnoreLocalDefaultModel
|
|
156
|
+
? (options.geminiModel || defaultModel)
|
|
157
|
+
: (configuredEmbeddingModel || options.geminiModel || defaultModel);
|
|
158
|
+
|
|
159
|
+
if (provider === "vertex") {
|
|
160
|
+
const project =
|
|
161
|
+
options.vertexProject ||
|
|
162
|
+
process.env.SMART_CODING_VERTEX_PROJECT ||
|
|
163
|
+
process.env.VERTEX_PROJECT;
|
|
164
|
+
const location =
|
|
165
|
+
options.vertexLocation ||
|
|
166
|
+
process.env.SMART_CODING_VERTEX_LOCATION ||
|
|
167
|
+
process.env.VERTEX_LOCATION ||
|
|
168
|
+
"us-central1";
|
|
169
|
+
|
|
170
|
+
if (!project || !String(project).trim()) {
|
|
171
|
+
throw new Error(
|
|
172
|
+
"[Vertex] Missing project. Set SMART_CODING_VERTEX_PROJECT (or VERTEX_PROJECT)."
|
|
173
|
+
);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
getAuthToken = await createVertexTokenProvider(options);
|
|
177
|
+
endpoint =
|
|
178
|
+
`https://${location}-aiplatform.googleapis.com/v1/projects/${project}` +
|
|
179
|
+
`/locations/${location}/publishers/google/models/${modelName}:predict`;
|
|
180
|
+
} else {
|
|
181
|
+
if (!staticToken || !String(staticToken).trim()) {
|
|
182
|
+
throw new Error(
|
|
183
|
+
`[${logPrefix}] Missing API key/token for embedding provider '${provider}'.`
|
|
184
|
+
);
|
|
185
|
+
}
|
|
186
|
+
getAuthToken = async () => String(staticToken).trim();
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const queue = [];
|
|
190
|
+
let flushTimer = null;
|
|
191
|
+
let inFlight = false;
|
|
192
|
+
|
|
193
|
+
console.error(
|
|
194
|
+
`[${logPrefix}] Provider init: provider=${provider} model=${modelName} endpoint=${endpoint} dim=${dimension}`
|
|
195
|
+
);
|
|
196
|
+
|
|
197
|
+
if (verbose) {
|
|
198
|
+
console.error(
|
|
199
|
+
`[${logPrefix}] Provider ready: model=${modelName}, dim=${dimension}, batch=${batchSize}, flush=${batchFlushMs}ms`
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
async function requestEmbeddings(inputTexts) {
|
|
204
|
+
const body = provider === "vertex"
|
|
205
|
+
? {
|
|
206
|
+
instances: inputTexts.map((text) => ({ content: text })),
|
|
207
|
+
...(dimension > 0 ? { parameters: { outputDimensionality: dimension } } : {})
|
|
208
|
+
}
|
|
209
|
+
: {
|
|
210
|
+
model: modelName,
|
|
211
|
+
input: inputTexts,
|
|
212
|
+
...(dimension > 0 ? { dimensions: dimension } : {})
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
let attempt = 0;
|
|
216
|
+
while (attempt <= maxRetries) {
|
|
217
|
+
attempt += 1;
|
|
218
|
+
try {
|
|
219
|
+
const response = await fetch(endpoint, {
|
|
220
|
+
method: "POST",
|
|
221
|
+
headers: {
|
|
222
|
+
Authorization: `Bearer ${await getAuthToken()}`,
|
|
223
|
+
"Content-Type": "application/json"
|
|
224
|
+
},
|
|
225
|
+
body: JSON.stringify(body)
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
if (response.ok) {
|
|
229
|
+
const payload = await response.json();
|
|
230
|
+
if (provider === "vertex") {
|
|
231
|
+
if (!payload || !Array.isArray(payload.predictions)) {
|
|
232
|
+
throw new Error("[Vertex] Invalid embeddings response shape");
|
|
233
|
+
}
|
|
234
|
+
return payload.predictions.map((prediction, idx) => {
|
|
235
|
+
const vector = prediction?.embeddings?.values;
|
|
236
|
+
if (!Array.isArray(vector)) {
|
|
237
|
+
throw new Error(
|
|
238
|
+
`[Vertex] Invalid embeddings response at predictions[${idx}].embeddings.values`
|
|
239
|
+
);
|
|
240
|
+
}
|
|
241
|
+
return vector;
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
if (!payload || !Array.isArray(payload.data)) {
|
|
245
|
+
throw new Error(`[${logPrefix}] Invalid embeddings response shape`);
|
|
246
|
+
}
|
|
247
|
+
return payload.data.map((row) => row?.embedding);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const errorText = cleanErrorText(await response.text());
|
|
251
|
+
const shouldRetry = response.status === 429 || response.status >= 500;
|
|
252
|
+
|
|
253
|
+
if (shouldRetry && attempt <= maxRetries) {
|
|
254
|
+
await sleep(getRetryDelayMs(attempt));
|
|
255
|
+
continue;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const nonRetryableError = new Error(
|
|
259
|
+
`[${logPrefix}] Embedding request failed (${response.status}): ${errorText || "no response body"}`
|
|
260
|
+
);
|
|
261
|
+
nonRetryableError.retryable = false;
|
|
262
|
+
throw nonRetryableError;
|
|
263
|
+
} catch (error) {
|
|
264
|
+
if (attempt > maxRetries || error?.retryable === false) {
|
|
265
|
+
throw error;
|
|
266
|
+
}
|
|
267
|
+
await sleep(getRetryDelayMs(attempt));
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
throw new Error(`[${logPrefix}] Exhausted retries for embedding request`);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
async function flushNow() {
|
|
275
|
+
if (inFlight || queue.length === 0) {
|
|
276
|
+
return;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
inFlight = true;
|
|
280
|
+
const batch = queue.splice(0, batchSize);
|
|
281
|
+
const batchTexts = batch.map((item) => item.text);
|
|
282
|
+
|
|
283
|
+
try {
|
|
284
|
+
const embeddings = await requestEmbeddings(batchTexts);
|
|
285
|
+
if (!Array.isArray(embeddings) || embeddings.length !== batch.length) {
|
|
286
|
+
throw new Error(
|
|
287
|
+
`[${logPrefix}] Embedding count mismatch. expected=${batch.length}, got=${embeddings?.length ?? 0}`
|
|
288
|
+
);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
for (let i = 0; i < batch.length; i += 1) {
|
|
292
|
+
const vector = embeddings[i];
|
|
293
|
+
if (!Array.isArray(vector)) {
|
|
294
|
+
batch[i].reject(new Error(`[${logPrefix}] Missing embedding vector`));
|
|
295
|
+
continue;
|
|
296
|
+
}
|
|
297
|
+
batch[i].resolve({
|
|
298
|
+
data: Float32Array.from(vector),
|
|
299
|
+
dims: [1, vector.length]
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
} catch (error) {
|
|
303
|
+
for (const item of batch) {
|
|
304
|
+
item.reject(error);
|
|
305
|
+
}
|
|
306
|
+
} finally {
|
|
307
|
+
inFlight = false;
|
|
308
|
+
if (queue.length > 0) {
|
|
309
|
+
queueMicrotask(flushNow);
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
function scheduleFlush() {
|
|
315
|
+
if (queue.length >= batchSize) {
|
|
316
|
+
if (flushTimer) {
|
|
317
|
+
clearTimeout(flushTimer);
|
|
318
|
+
flushTimer = null;
|
|
319
|
+
}
|
|
320
|
+
queueMicrotask(flushNow);
|
|
321
|
+
return;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
if (flushTimer) {
|
|
325
|
+
return;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
flushTimer = setTimeout(() => {
|
|
329
|
+
flushTimer = null;
|
|
330
|
+
queueMicrotask(flushNow);
|
|
331
|
+
}, batchFlushMs);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
async function embed(text) {
|
|
335
|
+
if (typeof text !== "string" || text.trim().length === 0) {
|
|
336
|
+
throw new Error(`[${logPrefix}] embed(text) requires a non-empty string`);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
return new Promise((resolve, reject) => {
|
|
340
|
+
queue.push({ text, resolve, reject });
|
|
341
|
+
scheduleFlush();
|
|
342
|
+
});
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
embed.modelName = modelName;
|
|
346
|
+
embed.dimension = dimension;
|
|
347
|
+
embed.device = "api";
|
|
348
|
+
embed.provider = provider;
|
|
349
|
+
|
|
350
|
+
return embed;
|
|
351
|
+
}
|