@mrc2204/agent-smart-memo 4.0.8 → 4.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/hooks/auto-capture.d.ts.map +1 -1
- package/dist/hooks/auto-capture.js +37 -3
- package/dist/hooks/auto-capture.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +35 -0
- package/dist/index.js.map +1 -1
- package/dist/scripts/reembed-collection.d.ts +2 -0
- package/dist/scripts/reembed-collection.d.ts.map +1 -0
- package/dist/scripts/reembed-collection.js +165 -0
- package/dist/scripts/reembed-collection.js.map +1 -0
- package/dist/services/embedding-capability-registry.d.ts +23 -0
- package/dist/services/embedding-capability-registry.d.ts.map +1 -0
- package/dist/services/embedding-capability-registry.js +56 -0
- package/dist/services/embedding-capability-registry.js.map +1 -0
- package/dist/services/embedding.d.ts +50 -10
- package/dist/services/embedding.d.ts.map +1 -1
- package/dist/services/embedding.js +486 -74
- package/dist/services/embedding.js.map +1 -1
- package/dist/services/qdrant.d.ts +25 -22
- package/dist/services/qdrant.d.ts.map +1 -1
- package/dist/services/qdrant.js +119 -25
- package/dist/services/qdrant.js.map +1 -1
- package/dist/tools/memory_store.d.ts.map +1 -1
- package/dist/tools/memory_store.js +29 -4
- package/dist/tools/memory_store.js.map +1 -1
- package/package.json +2 -2
|
@@ -1,33 +1,82 @@
|
|
|
1
|
+
import { EmbeddingCapabilityRegistry } from "./embedding-capability-registry.js";
|
|
2
|
+
class EmbeddingHttpError extends Error {
|
|
3
|
+
status;
|
|
4
|
+
bodyPreview;
|
|
5
|
+
constructor(status, bodyPreview, message) {
|
|
6
|
+
super(message || `Embedding API error: ${status}`);
|
|
7
|
+
this.name = "EmbeddingHttpError";
|
|
8
|
+
this.status = status;
|
|
9
|
+
this.bodyPreview = bodyPreview;
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
const MODEL_DEFAULTS = {
|
|
13
|
+
"text-embedding-3-small": { seedMaxTokens: 8192, safeRatio: 0.82, reserveTokens: 64, vectorDim: 1536 },
|
|
14
|
+
"text-embedding-3-large": { seedMaxTokens: 8192, safeRatio: 0.82, reserveTokens: 64, vectorDim: 3072 },
|
|
15
|
+
"qwen3-embedding:0.6b": { seedMaxTokens: 8192, safeRatio: 0.76, reserveTokens: 80, vectorDim: 1024 },
|
|
16
|
+
"qwen3-embedding:4b": { seedMaxTokens: 8192, safeRatio: 0.72, reserveTokens: 128, vectorDim: 2560 },
|
|
17
|
+
};
|
|
1
18
|
/**
|
|
2
|
-
* Embedding service client
|
|
19
|
+
* Embedding service client with runtime capability calibration + persistence
|
|
3
20
|
*/
|
|
4
21
|
export class EmbeddingClient {
|
|
5
22
|
config;
|
|
6
23
|
logger;
|
|
7
|
-
|
|
24
|
+
registry;
|
|
25
|
+
capability;
|
|
26
|
+
activeEndpoint = "";
|
|
27
|
+
provider = "auto";
|
|
28
|
+
modelKey = "";
|
|
29
|
+
ready;
|
|
8
30
|
constructor(config, logger) {
|
|
31
|
+
const model = config.model || "qwen3-embedding:0.6b";
|
|
32
|
+
const defaults = MODEL_DEFAULTS[model] || { seedMaxTokens: 4096, safeRatio: 0.72, reserveTokens: 96, vectorDim: config.dimensions || 1024 };
|
|
9
33
|
this.config = {
|
|
10
34
|
embeddingApiUrl: config.embeddingApiUrl || "http://localhost:11434",
|
|
11
35
|
timeout: config.timeout || 30000,
|
|
12
|
-
model
|
|
36
|
+
model,
|
|
37
|
+
dimensions: config.dimensions || defaults.vectorDim,
|
|
38
|
+
stateDir: config.stateDir || process.env.OPENCLAW_STATE_DIR || `${process.env.HOME}/.openclaw`,
|
|
13
39
|
};
|
|
14
40
|
this.logger = logger || console;
|
|
15
|
-
this.
|
|
41
|
+
this.registry = new EmbeddingCapabilityRegistry(this.config.stateDir, this.logger);
|
|
42
|
+
this.ready = this.initializeCapabilities();
|
|
16
43
|
}
|
|
17
44
|
resolveEmbeddingEndpoints(rawBaseUrl) {
|
|
18
45
|
const base = (rawBaseUrl || "").trim();
|
|
19
46
|
const normalizedBase = (base || "http://localhost:11434").replace(/\/+$/, "");
|
|
20
|
-
// If already a full embeddings path, use directly.
|
|
21
47
|
if (/(\/v1\/embeddings|\/api\/embeddings)\/?$/i.test(normalizedBase)) {
|
|
22
48
|
return [normalizedBase];
|
|
23
49
|
}
|
|
24
|
-
// Smart handling for base URL only:
|
|
25
|
-
// 1) Prefer OpenAI-compatible /v1/embeddings (for proxypal/openai-like services)
|
|
26
|
-
// 2) Fallback to Ollama /api/embeddings (for backward compatibility)
|
|
27
50
|
return [`${normalizedBase}/v1/embeddings`, `${normalizedBase}/api/embeddings`];
|
|
28
51
|
}
|
|
29
|
-
|
|
30
|
-
|
|
52
|
+
detectProvider(endpoint) {
|
|
53
|
+
if (/\/v1\/embeddings\/?$/i.test(endpoint))
|
|
54
|
+
return "openai";
|
|
55
|
+
if (/\/api\/embeddings\/?$/i.test(endpoint))
|
|
56
|
+
return "ollama";
|
|
57
|
+
return "auto";
|
|
58
|
+
}
|
|
59
|
+
getDefaults() {
|
|
60
|
+
return MODEL_DEFAULTS[this.config.model] || {
|
|
61
|
+
seedMaxTokens: 4096,
|
|
62
|
+
safeRatio: 0.72,
|
|
63
|
+
reserveTokens: 96,
|
|
64
|
+
vectorDim: this.config.dimensions,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
buildModelKey(provider, endpoint) {
|
|
68
|
+
return `${provider}::${endpoint}::${this.config.model}`;
|
|
69
|
+
}
|
|
70
|
+
tokenBudget() {
|
|
71
|
+
const discovered = Math.max(256, this.capability.discoveredMaxTokens || this.capability.seedMaxTokens);
|
|
72
|
+
const rawBudget = Math.floor(discovered * this.capability.safeRatio) - this.capability.reserveTokens;
|
|
73
|
+
return Math.max(128, rawBudget);
|
|
74
|
+
}
|
|
75
|
+
// conservative estimator: whitespace tokens + char heuristic safeguard
|
|
76
|
+
estimateTokens(text) {
|
|
77
|
+
const whitespaceTokens = text.trim() ? text.trim().split(/\s+/).length : 0;
|
|
78
|
+
const charTokens = Math.ceil(text.length / 4);
|
|
79
|
+
return Math.max(1, Math.max(whitespaceTokens, charTokens));
|
|
31
80
|
}
|
|
32
81
|
normalizeInput(input) {
|
|
33
82
|
if (Array.isArray(input)) {
|
|
@@ -41,89 +90,446 @@ export class EmbeddingClient {
|
|
|
41
90
|
}
|
|
42
91
|
return [];
|
|
43
92
|
}
|
|
93
|
+
splitIntoSentences(text) {
|
|
94
|
+
return text
|
|
95
|
+
.split(/(?<=[\n\.!?;])\s+/)
|
|
96
|
+
.map((s) => s.trim())
|
|
97
|
+
.filter(Boolean);
|
|
98
|
+
}
|
|
99
|
+
chunkTextByTokenBudget(text, tokenBudget) {
|
|
100
|
+
if (this.estimateTokens(text) <= tokenBudget)
|
|
101
|
+
return [text];
|
|
102
|
+
const sentences = this.splitIntoSentences(text);
|
|
103
|
+
if (sentences.length === 0)
|
|
104
|
+
return [text.slice(0, Math.max(64, tokenBudget * 4))];
|
|
105
|
+
const chunks = [];
|
|
106
|
+
let current = "";
|
|
107
|
+
const pushCurrent = () => {
|
|
108
|
+
const trimmed = current.trim();
|
|
109
|
+
if (trimmed.length > 0)
|
|
110
|
+
chunks.push(trimmed);
|
|
111
|
+
current = "";
|
|
112
|
+
};
|
|
113
|
+
for (const sentence of sentences) {
|
|
114
|
+
const next = current ? `${current} ${sentence}` : sentence;
|
|
115
|
+
if (this.estimateTokens(next) <= tokenBudget) {
|
|
116
|
+
current = next;
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
if (current)
|
|
120
|
+
pushCurrent();
|
|
121
|
+
if (this.estimateTokens(sentence) <= tokenBudget) {
|
|
122
|
+
current = sentence;
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
// ultra-long sentence fallback: split by words with hard guard
|
|
126
|
+
const words = sentence.split(/\s+/).filter(Boolean);
|
|
127
|
+
let wordChunk = "";
|
|
128
|
+
for (const word of words) {
|
|
129
|
+
const candidate = wordChunk ? `${wordChunk} ${word}` : word;
|
|
130
|
+
if (this.estimateTokens(candidate) <= tokenBudget) {
|
|
131
|
+
wordChunk = candidate;
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
if (wordChunk)
|
|
135
|
+
chunks.push(wordChunk);
|
|
136
|
+
wordChunk = word;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
if (wordChunk)
|
|
140
|
+
chunks.push(wordChunk);
|
|
141
|
+
}
|
|
142
|
+
if (current)
|
|
143
|
+
pushCurrent();
|
|
144
|
+
return chunks.filter((c) => this.estimateTokens(c) <= tokenBudget + 2);
|
|
145
|
+
}
|
|
146
|
+
l2Normalize(vector) {
|
|
147
|
+
const norm = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
|
|
148
|
+
if (!Number.isFinite(norm) || norm === 0)
|
|
149
|
+
return vector;
|
|
150
|
+
return vector.map((v) => v / norm);
|
|
151
|
+
}
|
|
152
|
+
weightedAverage(vectors, weights) {
|
|
153
|
+
if (vectors.length === 0)
|
|
154
|
+
return [];
|
|
155
|
+
const dim = vectors[0].length;
|
|
156
|
+
const out = new Array(dim).fill(0);
|
|
157
|
+
const weightSum = weights.reduce((a, b) => a + b, 0) || 1;
|
|
158
|
+
for (let i = 0; i < vectors.length; i++) {
|
|
159
|
+
const vec = vectors[i];
|
|
160
|
+
const w = weights[i] || 1;
|
|
161
|
+
for (let d = 0; d < dim; d++) {
|
|
162
|
+
out[d] += vec[d] * w;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
for (let d = 0; d < dim; d++) {
|
|
166
|
+
out[d] /= weightSum;
|
|
167
|
+
}
|
|
168
|
+
return this.l2Normalize(out);
|
|
169
|
+
}
|
|
170
|
+
isContextLengthError(error) {
|
|
171
|
+
if (!(error instanceof EmbeddingHttpError))
|
|
172
|
+
return false;
|
|
173
|
+
if (![400, 413, 422, 500].includes(error.status))
|
|
174
|
+
return false;
|
|
175
|
+
return /context length|maximum context|too many tokens|exceed|token limit|8192|input length/i.test(error.bodyPreview || "");
|
|
176
|
+
}
|
|
177
|
+
extractTokenLimitFromError(errorText) {
|
|
178
|
+
const normalized = errorText || "";
|
|
179
|
+
const patterns = [
|
|
180
|
+
/(?:context length|maximum context|token(?:s)? limit)[^\d]*(\d{3,6})/i,
|
|
181
|
+
/exceeds[^\d]*(\d{3,6})/i,
|
|
182
|
+
/max(?:imum)?[^\d]*(\d{3,6})\s*tokens?/i,
|
|
183
|
+
];
|
|
184
|
+
for (const p of patterns) {
|
|
185
|
+
const m = normalized.match(p);
|
|
186
|
+
if (m?.[1]) {
|
|
187
|
+
const parsed = Number(m[1]);
|
|
188
|
+
if (Number.isFinite(parsed) && parsed >= 128)
|
|
189
|
+
return parsed;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return null;
|
|
193
|
+
}
|
|
194
|
+
async updateCapabilityFromContextError(error) {
|
|
195
|
+
const parsed = this.extractTokenLimitFromError(error.bodyPreview || "");
|
|
196
|
+
const current = this.capability.discoveredMaxTokens || this.capability.seedMaxTokens;
|
|
197
|
+
const fallback = Math.floor(current * 0.85);
|
|
198
|
+
const discovered = Math.max(128, parsed ? Math.min(current, parsed) : fallback);
|
|
199
|
+
if (discovered < current) {
|
|
200
|
+
this.capability = {
|
|
201
|
+
...this.capability,
|
|
202
|
+
discoveredMaxTokens: discovered,
|
|
203
|
+
updatedAt: new Date().toISOString(),
|
|
204
|
+
source: "error-feedback",
|
|
205
|
+
};
|
|
206
|
+
await this.registry.set(this.modelKey, this.capability);
|
|
207
|
+
this.logger.warn(`[Embedding] capability refined from error-feedback: ${current} -> ${discovered} (modelKey=${this.modelKey})`);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
async initializeCapabilities() {
|
|
211
|
+
const endpoints = this.resolveEmbeddingEndpoints(this.config.embeddingApiUrl);
|
|
212
|
+
const endpoint = endpoints[0];
|
|
213
|
+
const provider = this.detectProvider(endpoint);
|
|
214
|
+
this.activeEndpoint = endpoint;
|
|
215
|
+
this.provider = provider;
|
|
216
|
+
this.modelKey = this.buildModelKey(provider, endpoint);
|
|
217
|
+
const defaults = this.getDefaults();
|
|
218
|
+
const existing = await this.registry.get(this.modelKey);
|
|
219
|
+
this.capability = existing || {
|
|
220
|
+
seedMaxTokens: defaults.seedMaxTokens,
|
|
221
|
+
discoveredMaxTokens: defaults.seedMaxTokens,
|
|
222
|
+
safeRatio: defaults.safeRatio,
|
|
223
|
+
reserveTokens: defaults.reserveTokens,
|
|
224
|
+
vectorDim: defaults.vectorDim,
|
|
225
|
+
updatedAt: new Date().toISOString(),
|
|
226
|
+
source: "docs",
|
|
227
|
+
};
|
|
228
|
+
if (!existing) {
|
|
229
|
+
await this.registry.set(this.modelKey, this.capability);
|
|
230
|
+
}
|
|
231
|
+
// light startup calibration (max 1/day)
|
|
232
|
+
const ageMs = Date.now() - new Date(this.capability.updatedAt).getTime();
|
|
233
|
+
if (!Number.isFinite(ageMs) || ageMs > 24 * 60 * 60 * 1000) {
|
|
234
|
+
await this.calibrateRuntimeCapability();
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
async readEndpointMetadata() {
|
|
238
|
+
const endpoint = this.activeEndpoint;
|
|
239
|
+
const provider = this.detectProvider(endpoint);
|
|
240
|
+
try {
|
|
241
|
+
if (provider === "ollama") {
|
|
242
|
+
const base = endpoint.replace(/\/api\/embeddings\/?$/i, "");
|
|
243
|
+
const res = await fetch(`${base}/api/tags`, { signal: AbortSignal.timeout(4000) });
|
|
244
|
+
if (!res.ok)
|
|
245
|
+
return {};
|
|
246
|
+
const json = await res.json();
|
|
247
|
+
const models = Array.isArray(json?.models) ? json.models : [];
|
|
248
|
+
const modelInfo = models.find((m) => m?.model === this.config.model || m?.name === this.config.model);
|
|
249
|
+
const dimFromModel = Number(modelInfo?.details?.embedding_length || modelInfo?.details?.dimensions || 0);
|
|
250
|
+
return {
|
|
251
|
+
vectorDim: dimFromModel > 0 ? dimFromModel : undefined,
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
catch {
|
|
256
|
+
// best effort metadata
|
|
257
|
+
}
|
|
258
|
+
return {};
|
|
259
|
+
}
|
|
260
|
+
async probeWithinBudget(tokenTarget) {
|
|
261
|
+
const sample = Array(tokenTarget).fill("t").join(" ");
|
|
262
|
+
try {
|
|
263
|
+
await this.embedChunksFromApi([sample]);
|
|
264
|
+
return true;
|
|
265
|
+
}
|
|
266
|
+
catch (error) {
|
|
267
|
+
if (this.isContextLengthError(error))
|
|
268
|
+
return false;
|
|
269
|
+
throw error;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
async probeContextWindow(seed) {
|
|
273
|
+
const clamp = (n) => Math.max(128, Math.floor(n));
|
|
274
|
+
let low = 256;
|
|
275
|
+
let high = clamp(seed);
|
|
276
|
+
// stepped exploration (safe / low spam)
|
|
277
|
+
const steps = [0.5, 0.75, 1, 1.1].map((x) => clamp(seed * x));
|
|
278
|
+
for (const s of steps) {
|
|
279
|
+
let ok = false;
|
|
280
|
+
try {
|
|
281
|
+
ok = await this.probeWithinBudget(s);
|
|
282
|
+
}
|
|
283
|
+
catch {
|
|
284
|
+
continue;
|
|
285
|
+
}
|
|
286
|
+
if (ok) {
|
|
287
|
+
low = Math.max(low, s);
|
|
288
|
+
high = Math.max(high, s);
|
|
289
|
+
}
|
|
290
|
+
else {
|
|
291
|
+
high = Math.min(high, s);
|
|
292
|
+
break;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
// binary search refinement, max 5 probes
|
|
296
|
+
for (let i = 0; i < 5 && high - low > 96; i++) {
|
|
297
|
+
const mid = clamp((low + high) / 2);
|
|
298
|
+
const ok = await this.probeWithinBudget(mid);
|
|
299
|
+
if (ok)
|
|
300
|
+
low = mid;
|
|
301
|
+
else
|
|
302
|
+
high = mid;
|
|
303
|
+
}
|
|
304
|
+
return clamp(low);
|
|
305
|
+
}
|
|
306
|
+
async calibrateRuntimeCapability(force = false) {
|
|
307
|
+
await this.ready;
|
|
308
|
+
if (!force) {
|
|
309
|
+
const ageMs = Date.now() - new Date(this.capability.updatedAt).getTime();
|
|
310
|
+
if (Number.isFinite(ageMs) && ageMs < 30 * 60 * 1000)
|
|
311
|
+
return;
|
|
312
|
+
}
|
|
313
|
+
const metadata = await this.readEndpointMetadata();
|
|
314
|
+
const seed = Math.max(256, metadata.discoveredMaxTokens || metadata.seedMaxTokens || this.capability.seedMaxTokens);
|
|
315
|
+
let discovered = this.capability.discoveredMaxTokens;
|
|
316
|
+
try {
|
|
317
|
+
discovered = await this.probeContextWindow(seed);
|
|
318
|
+
}
|
|
319
|
+
catch (error) {
|
|
320
|
+
this.logger.warn(`[Embedding] calibration probe skipped: ${error.message}`);
|
|
321
|
+
}
|
|
322
|
+
this.capability = {
|
|
323
|
+
...this.capability,
|
|
324
|
+
discoveredMaxTokens: Math.max(128, discovered || seed),
|
|
325
|
+
vectorDim: metadata.vectorDim || this.capability.vectorDim,
|
|
326
|
+
updatedAt: new Date().toISOString(),
|
|
327
|
+
source: "probe",
|
|
328
|
+
};
|
|
329
|
+
await this.registry.set(this.modelKey, this.capability);
|
|
330
|
+
this.logger.info(`[Embedding] calibrated capability modelKey=${this.modelKey} maxTokens=${this.capability.discoveredMaxTokens} vectorDim=${this.capability.vectorDim}`);
|
|
331
|
+
}
|
|
332
|
+
async getVectorDimensionHint() {
|
|
333
|
+
await this.ready;
|
|
334
|
+
return this.capability.vectorDim || this.config.dimensions;
|
|
335
|
+
}
|
|
336
|
+
async getModelKey() {
|
|
337
|
+
await this.ready;
|
|
338
|
+
return this.modelKey;
|
|
339
|
+
}
|
|
44
340
|
/**
|
|
45
|
-
*
|
|
46
|
-
* Fallback to hash-based embedding if API unavailable
|
|
341
|
+
* Backward-compatible method
|
|
47
342
|
*/
|
|
48
343
|
async embed(text) {
|
|
344
|
+
const result = await this.embedDetailed(text);
|
|
345
|
+
return result.vector;
|
|
346
|
+
}
|
|
347
|
+
/**
|
|
348
|
+
* New method with calibration-aware adaptive chunking + metadata
|
|
349
|
+
*/
|
|
350
|
+
async embedDetailed(text) {
|
|
351
|
+
await this.ready;
|
|
49
352
|
const normalizedInput = this.normalizeInput(text);
|
|
50
|
-
// Validate/filter empty input BEFORE calling embedding API
|
|
51
353
|
if (normalizedInput.length === 0) {
|
|
52
354
|
this.logger.warn("[Embedding] Skip API call: empty input after trim/filter");
|
|
53
|
-
return
|
|
355
|
+
return {
|
|
356
|
+
vector: this.embedFromHash(""),
|
|
357
|
+
metadata: {
|
|
358
|
+
embedding_chunked: false,
|
|
359
|
+
embedding_chunks_count: 0,
|
|
360
|
+
embedding_chunking_strategy: "array_batch_weighted_avg",
|
|
361
|
+
embedding_model: this.config.model,
|
|
362
|
+
embedding_model_key: this.modelKey,
|
|
363
|
+
embedding_provider: this.provider,
|
|
364
|
+
embedding_max_tokens: this.capability.discoveredMaxTokens,
|
|
365
|
+
embedding_safe_chunk_tokens: this.tokenBudget(),
|
|
366
|
+
embedding_source: this.capability.source,
|
|
367
|
+
embedding_fallback_hash: true,
|
|
368
|
+
},
|
|
369
|
+
};
|
|
54
370
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
371
|
+
const mergedText = normalizedInput.join("\n\n");
|
|
372
|
+
const baseBudget = this.tokenBudget();
|
|
373
|
+
// retry policy with progressive budget reduction
|
|
374
|
+
const safetyMultipliers = [1, 0.8, 0.65, 0.5, 0.4, 0.3];
|
|
375
|
+
for (const mul of safetyMultipliers) {
|
|
376
|
+
const safeChunkTokens = Math.max(128, Math.floor(baseBudget * mul));
|
|
377
|
+
const chunks = this.chunkTextByTokenBudget(mergedText, safeChunkTokens);
|
|
378
|
+
const chunkWeights = chunks.map((c) => this.estimateTokens(c));
|
|
379
|
+
// hard guard: never send chunk above discovered budget
|
|
380
|
+
if (chunks.some((chunk) => this.estimateTokens(chunk) > safeChunkTokens + 2)) {
|
|
381
|
+
continue;
|
|
382
|
+
}
|
|
383
|
+
try {
|
|
384
|
+
const vectors = await this.embedChunksFromApi(chunks);
|
|
385
|
+
const vector = vectors.length === 1
|
|
386
|
+
? this.l2Normalize(vectors[0])
|
|
387
|
+
: this.weightedAverage(vectors, chunkWeights);
|
|
388
|
+
return {
|
|
389
|
+
vector,
|
|
390
|
+
metadata: {
|
|
391
|
+
embedding_chunked: chunks.length > 1,
|
|
392
|
+
embedding_chunks_count: chunks.length,
|
|
393
|
+
embedding_chunking_strategy: "array_batch_weighted_avg",
|
|
394
|
+
embedding_model: this.config.model,
|
|
395
|
+
embedding_model_key: this.modelKey,
|
|
396
|
+
embedding_provider: this.provider,
|
|
397
|
+
embedding_max_tokens: this.capability.discoveredMaxTokens,
|
|
398
|
+
embedding_safe_chunk_tokens: safeChunkTokens,
|
|
399
|
+
embedding_source: this.capability.source,
|
|
400
|
+
embedding_fallback_hash: false,
|
|
401
|
+
},
|
|
402
|
+
};
|
|
403
|
+
}
|
|
404
|
+
catch (error) {
|
|
405
|
+
if (this.isContextLengthError(error)) {
|
|
406
|
+
await this.updateCapabilityFromContextError(error);
|
|
407
|
+
this.logger.warn(`[Embedding] context-length detected. retry with smaller chunk budget=${safeChunkTokens} modelKey=${this.modelKey}`);
|
|
408
|
+
continue;
|
|
409
|
+
}
|
|
410
|
+
// non context-length error -> fallback hash immediately
|
|
411
|
+
this.logger.error(`[Embedding][HIGH] API failed; fallback to hash embedding. reason=${error.message} modelKey=${this.modelKey}`);
|
|
412
|
+
return {
|
|
413
|
+
vector: this.embedFromHash(mergedText),
|
|
414
|
+
metadata: {
|
|
415
|
+
embedding_chunked: chunks.length > 1,
|
|
416
|
+
embedding_chunks_count: chunks.length,
|
|
417
|
+
embedding_chunking_strategy: "array_batch_weighted_avg",
|
|
418
|
+
embedding_model: this.config.model,
|
|
419
|
+
embedding_model_key: this.modelKey,
|
|
420
|
+
embedding_provider: this.provider,
|
|
421
|
+
embedding_max_tokens: this.capability.discoveredMaxTokens,
|
|
422
|
+
embedding_safe_chunk_tokens: safeChunkTokens,
|
|
423
|
+
embedding_source: this.capability.source,
|
|
424
|
+
embedding_fallback_hash: true,
|
|
425
|
+
},
|
|
426
|
+
};
|
|
427
|
+
}
|
|
58
428
|
}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
429
|
+
// exhausted retries
|
|
430
|
+
this.logger.error(`[Embedding][CRITICAL] exhausted context retries; fallback hash modelKey=${this.modelKey}`);
|
|
431
|
+
return {
|
|
432
|
+
vector: this.embedFromHash(mergedText),
|
|
433
|
+
metadata: {
|
|
434
|
+
embedding_chunked: true,
|
|
435
|
+
embedding_chunks_count: Math.max(1, this.chunkTextByTokenBudget(mergedText, Math.max(128, Math.floor(baseBudget * 0.3))).length),
|
|
436
|
+
embedding_chunking_strategy: "array_batch_weighted_avg",
|
|
437
|
+
embedding_model: this.config.model,
|
|
438
|
+
embedding_model_key: this.modelKey,
|
|
439
|
+
embedding_provider: this.provider,
|
|
440
|
+
embedding_max_tokens: this.capability.discoveredMaxTokens,
|
|
441
|
+
embedding_safe_chunk_tokens: Math.max(128, Math.floor(baseBudget * 0.3)),
|
|
442
|
+
embedding_source: this.capability.source,
|
|
443
|
+
embedding_fallback_hash: true,
|
|
444
|
+
},
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
async embedChunksFromApi(chunks) {
|
|
448
|
+
if (chunks.length === 0) {
|
|
449
|
+
throw new Error("No chunks to embed");
|
|
62
450
|
}
|
|
451
|
+
const endpoints = this.resolveEmbeddingEndpoints(this.config.embeddingApiUrl);
|
|
452
|
+
let lastError = null;
|
|
453
|
+
for (const url of endpoints) {
|
|
454
|
+
const useOpenAiFormat = /\/v1\/embeddings\/?$/i.test(url);
|
|
455
|
+
try {
|
|
456
|
+
this.activeEndpoint = url;
|
|
457
|
+
this.provider = this.detectProvider(url);
|
|
458
|
+
this.modelKey = this.buildModelKey(this.provider, this.activeEndpoint);
|
|
459
|
+
if (!useOpenAiFormat && chunks.length > 1) {
|
|
460
|
+
// Ollama /api/embeddings: sequential requests
|
|
461
|
+
const vectors = [];
|
|
462
|
+
for (const c of chunks) {
|
|
463
|
+
vectors.push(await this.embedSingle(url, false, c));
|
|
464
|
+
}
|
|
465
|
+
return vectors;
|
|
466
|
+
}
|
|
467
|
+
const vectors = await this.embedBatch(url, useOpenAiFormat, chunks);
|
|
468
|
+
if (vectors.length !== chunks.length) {
|
|
469
|
+
throw new Error(`Embedding vector count mismatch: expected=${chunks.length}, got=${vectors.length}`);
|
|
470
|
+
}
|
|
471
|
+
return vectors;
|
|
472
|
+
}
|
|
473
|
+
catch (error) {
|
|
474
|
+
lastError = error;
|
|
475
|
+
if (this.isContextLengthError(error)) {
|
|
476
|
+
throw error;
|
|
477
|
+
}
|
|
478
|
+
if (error instanceof EmbeddingHttpError &&
|
|
479
|
+
[404, 429].includes(error.status) &&
|
|
480
|
+
endpoints.length > 1 &&
|
|
481
|
+
url !== endpoints[endpoints.length - 1]) {
|
|
482
|
+
continue;
|
|
483
|
+
}
|
|
484
|
+
if (url !== endpoints[endpoints.length - 1]) {
|
|
485
|
+
continue;
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
throw lastError || new Error("Embedding API error: no endpoint succeeded");
|
|
63
490
|
}
|
|
64
|
-
|
|
65
|
-
* Get embedding from API
|
|
66
|
-
*/
|
|
67
|
-
async embedFromApi(input) {
|
|
68
|
-
this.logger.debug?.(`[Embedding] Calling API with inputCount=${input.length} firstItemLength=${input[0]?.length || 0} preview=${JSON.stringify((input[0] || "").slice(0, 80))}`);
|
|
491
|
+
async embedBatch(url, useOpenAiFormat, chunks) {
|
|
69
492
|
const controller = new AbortController();
|
|
70
493
|
const timeoutId = setTimeout(() => controller.abort(), this.config.timeout);
|
|
71
494
|
try {
|
|
72
|
-
const
|
|
73
|
-
let
|
|
74
|
-
for (const url of endpoints) {
|
|
75
|
-
const useOpenAiFormat = this.isOpenAIEmbeddingEndpoint(url);
|
|
495
|
+
const max429Retries = 3;
|
|
496
|
+
for (let attempt = 0; attempt <= max429Retries; attempt++) {
|
|
76
497
|
const response = await fetch(url, {
|
|
77
498
|
method: "POST",
|
|
78
|
-
headers: {
|
|
79
|
-
"Content-Type": "application/json",
|
|
80
|
-
},
|
|
499
|
+
headers: { "Content-Type": "application/json" },
|
|
81
500
|
body: JSON.stringify(useOpenAiFormat
|
|
82
|
-
? {
|
|
83
|
-
|
|
84
|
-
input,
|
|
85
|
-
}
|
|
86
|
-
: {
|
|
87
|
-
model: this.config.model,
|
|
88
|
-
prompt: input[0],
|
|
89
|
-
}),
|
|
501
|
+
? { model: this.config.model, input: chunks }
|
|
502
|
+
: { model: this.config.model, prompt: chunks[0] }),
|
|
90
503
|
signal: controller.signal,
|
|
91
504
|
});
|
|
505
|
+
if (response.status === 429 && attempt < max429Retries) {
|
|
506
|
+
const backoffMs = Math.min(4000, 300 * Math.pow(2, attempt));
|
|
507
|
+
this.logger.warn(`[Embedding] 429 rate limit. retry in ${backoffMs}ms (attempt ${attempt + 1}/${max429Retries})`);
|
|
508
|
+
await new Promise((r) => setTimeout(r, backoffMs));
|
|
509
|
+
continue;
|
|
510
|
+
}
|
|
92
511
|
if (!response.ok) {
|
|
93
512
|
const errorText = await response.text().catch(() => "Unknown error");
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
this.logger.error(`[Embedding] 400 schema debug @ ${url}: ${JSON.stringify({
|
|
97
|
-
model: this.config.model,
|
|
98
|
-
inputType: Array.isArray(input) ? "array" : typeof input,
|
|
99
|
-
inputLength: Array.isArray(input) ? input.length : 0,
|
|
100
|
-
firstItemLength: input[0]?.length || 0,
|
|
101
|
-
})}`);
|
|
102
|
-
}
|
|
103
|
-
// If this endpoint not found and we still have fallback endpoint, continue.
|
|
104
|
-
if (response.status === 404 && endpoints.length > 1 && url !== endpoints[endpoints.length - 1]) {
|
|
105
|
-
continue;
|
|
106
|
-
}
|
|
107
|
-
lastError = new Error(`Embedding API error: ${response.status}`);
|
|
108
|
-
break;
|
|
513
|
+
const preview = errorText.substring(0, 500);
|
|
514
|
+
throw new EmbeddingHttpError(response.status, preview);
|
|
109
515
|
}
|
|
110
516
|
const data = await response.json();
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
517
|
+
if (!useOpenAiFormat) {
|
|
518
|
+
if (data.embedding && Array.isArray(data.embedding)) {
|
|
519
|
+
return [data.embedding];
|
|
520
|
+
}
|
|
521
|
+
throw new Error("Invalid Ollama embedding response format");
|
|
115
522
|
}
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
523
|
+
if (Array.isArray(data.data)) {
|
|
524
|
+
const vectors = data.data
|
|
525
|
+
.map((d) => d?.embedding)
|
|
526
|
+
.filter((v) => Array.isArray(v));
|
|
527
|
+
if (vectors.length > 0)
|
|
528
|
+
return vectors;
|
|
120
529
|
}
|
|
121
|
-
|
|
122
|
-
lastError = new Error("Invalid embedding response format");
|
|
123
|
-
break;
|
|
530
|
+
throw new Error("Invalid OpenAI embedding response format");
|
|
124
531
|
}
|
|
125
|
-
|
|
126
|
-
throw lastError || new Error("Embedding API error: no endpoint succeeded");
|
|
532
|
+
throw new Error("Embedding API 429 retries exhausted");
|
|
127
533
|
}
|
|
128
534
|
catch (error) {
|
|
129
535
|
if (error.name === "AbortError") {
|
|
@@ -131,24 +537,30 @@ export class EmbeddingClient {
|
|
|
131
537
|
}
|
|
132
538
|
throw error;
|
|
133
539
|
}
|
|
540
|
+
finally {
|
|
541
|
+
clearTimeout(timeoutId);
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
async embedSingle(url, useOpenAiFormat, chunk) {
|
|
545
|
+
const vectors = await this.embedBatch(url, useOpenAiFormat, [chunk]);
|
|
546
|
+
if (!vectors[0])
|
|
547
|
+
throw new Error("No embedding vector returned");
|
|
548
|
+
return vectors[0];
|
|
134
549
|
}
|
|
135
550
|
/**
|
|
136
551
|
* Fallback: Generate embedding from text hash (deterministic)
|
|
137
552
|
*/
|
|
138
553
|
embedFromHash(text) {
|
|
139
|
-
const hash = text.split(
|
|
554
|
+
const hash = text.split("").reduce((a, b) => {
|
|
140
555
|
a = ((a << 5) - a) + b.charCodeAt(0);
|
|
141
556
|
return a & a;
|
|
142
557
|
}, 0);
|
|
143
558
|
const embedding = [];
|
|
144
|
-
for (let i = 0; i < this.dimensions; i++) {
|
|
559
|
+
for (let i = 0; i < this.config.dimensions; i++) {
|
|
145
560
|
embedding.push(Math.sin(hash + i) * 0.1);
|
|
146
561
|
}
|
|
147
|
-
return embedding;
|
|
562
|
+
return this.l2Normalize(embedding);
|
|
148
563
|
}
|
|
149
|
-
/**
|
|
150
|
-
* Calculate cosine similarity
|
|
151
|
-
*/
|
|
152
564
|
cosineSimilarity(a, b) {
|
|
153
565
|
if (a.length !== b.length) {
|
|
154
566
|
throw new Error("Vector dimensions mismatch");
|