@mrc2204/agent-smart-memo 4.0.8 → 4.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,33 +1,82 @@
1
+ import { EmbeddingCapabilityRegistry } from "./embedding-capability-registry.js";
2
+ class EmbeddingHttpError extends Error {
3
+ status;
4
+ bodyPreview;
5
+ constructor(status, bodyPreview, message) {
6
+ super(message || `Embedding API error: ${status}`);
7
+ this.name = "EmbeddingHttpError";
8
+ this.status = status;
9
+ this.bodyPreview = bodyPreview;
10
+ }
11
+ }
12
+ const MODEL_DEFAULTS = {
13
+ "text-embedding-3-small": { seedMaxTokens: 8192, safeRatio: 0.82, reserveTokens: 64, vectorDim: 1536 },
14
+ "text-embedding-3-large": { seedMaxTokens: 8192, safeRatio: 0.82, reserveTokens: 64, vectorDim: 3072 },
15
+ "qwen3-embedding:0.6b": { seedMaxTokens: 8192, safeRatio: 0.76, reserveTokens: 80, vectorDim: 1024 },
16
+ "qwen3-embedding:4b": { seedMaxTokens: 8192, safeRatio: 0.72, reserveTokens: 128, vectorDim: 2560 },
17
+ };
1
18
  /**
2
- * Embedding service client - Ollama compatible
19
+ * Embedding service client with runtime capability calibration + persistence
3
20
  */
4
21
  export class EmbeddingClient {
5
22
  config;
6
23
  logger;
7
- dimensions;
24
+ registry;
25
+ capability;
26
+ activeEndpoint = "";
27
+ provider = "auto";
28
+ modelKey = "";
29
+ ready;
8
30
  constructor(config, logger) {
31
+ const model = config.model || "qwen3-embedding:0.6b";
32
+ const defaults = MODEL_DEFAULTS[model] || { seedMaxTokens: 4096, safeRatio: 0.72, reserveTokens: 96, vectorDim: config.dimensions || 1024 };
9
33
  this.config = {
10
34
  embeddingApiUrl: config.embeddingApiUrl || "http://localhost:11434",
11
35
  timeout: config.timeout || 30000,
12
- model: config.model || "qwen3-embedding:0.6b",
36
+ model,
37
+ dimensions: config.dimensions || defaults.vectorDim,
38
+ stateDir: config.stateDir || process.env.OPENCLAW_STATE_DIR || `${process.env.HOME}/.openclaw`,
13
39
  };
14
40
  this.logger = logger || console;
15
- this.dimensions = config.dimensions || 1024;
41
+ this.registry = new EmbeddingCapabilityRegistry(this.config.stateDir, this.logger);
42
+ this.ready = this.initializeCapabilities();
16
43
  }
17
44
  resolveEmbeddingEndpoints(rawBaseUrl) {
18
45
  const base = (rawBaseUrl || "").trim();
19
46
  const normalizedBase = (base || "http://localhost:11434").replace(/\/+$/, "");
20
- // If already a full embeddings path, use directly.
21
47
  if (/(\/v1\/embeddings|\/api\/embeddings)\/?$/i.test(normalizedBase)) {
22
48
  return [normalizedBase];
23
49
  }
24
- // Smart handling for base URL only:
25
- // 1) Prefer OpenAI-compatible /v1/embeddings (for proxypal/openai-like services)
26
- // 2) Fallback to Ollama /api/embeddings (for backward compatibility)
27
50
  return [`${normalizedBase}/v1/embeddings`, `${normalizedBase}/api/embeddings`];
28
51
  }
29
- isOpenAIEmbeddingEndpoint(url) {
30
- return /\/v1\/embeddings\/?$/i.test(url);
52
+ detectProvider(endpoint) {
53
+ if (/\/v1\/embeddings\/?$/i.test(endpoint))
54
+ return "openai";
55
+ if (/\/api\/embeddings\/?$/i.test(endpoint))
56
+ return "ollama";
57
+ return "auto";
58
+ }
59
+ getDefaults() {
60
+ return MODEL_DEFAULTS[this.config.model] || {
61
+ seedMaxTokens: 4096,
62
+ safeRatio: 0.72,
63
+ reserveTokens: 96,
64
+ vectorDim: this.config.dimensions,
65
+ };
66
+ }
67
+ buildModelKey(provider, endpoint) {
68
+ return `${provider}::${endpoint}::${this.config.model}`;
69
+ }
70
+ tokenBudget() {
71
+ const discovered = Math.max(256, this.capability.discoveredMaxTokens || this.capability.seedMaxTokens);
72
+ const rawBudget = Math.floor(discovered * this.capability.safeRatio) - this.capability.reserveTokens;
73
+ return Math.max(128, rawBudget);
74
+ }
75
+ // conservative estimator: whitespace tokens + char heuristic safeguard
76
+ estimateTokens(text) {
77
+ const whitespaceTokens = text.trim() ? text.trim().split(/\s+/).length : 0;
78
+ const charTokens = Math.ceil(text.length / 4);
79
+ return Math.max(1, Math.max(whitespaceTokens, charTokens));
31
80
  }
32
81
  normalizeInput(input) {
33
82
  if (Array.isArray(input)) {
@@ -41,89 +90,446 @@ export class EmbeddingClient {
41
90
  }
42
91
  return [];
43
92
  }
93
+ splitIntoSentences(text) {
94
+ return text
95
+ .split(/(?<=[\n\.!?;])\s+/)
96
+ .map((s) => s.trim())
97
+ .filter(Boolean);
98
+ }
99
+ chunkTextByTokenBudget(text, tokenBudget) {
100
+ if (this.estimateTokens(text) <= tokenBudget)
101
+ return [text];
102
+ const sentences = this.splitIntoSentences(text);
103
+ if (sentences.length === 0)
104
+ return [text.slice(0, Math.max(64, tokenBudget * 4))];
105
+ const chunks = [];
106
+ let current = "";
107
+ const pushCurrent = () => {
108
+ const trimmed = current.trim();
109
+ if (trimmed.length > 0)
110
+ chunks.push(trimmed);
111
+ current = "";
112
+ };
113
+ for (const sentence of sentences) {
114
+ const next = current ? `${current} ${sentence}` : sentence;
115
+ if (this.estimateTokens(next) <= tokenBudget) {
116
+ current = next;
117
+ continue;
118
+ }
119
+ if (current)
120
+ pushCurrent();
121
+ if (this.estimateTokens(sentence) <= tokenBudget) {
122
+ current = sentence;
123
+ continue;
124
+ }
125
+ // ultra-long sentence fallback: split by words with hard guard
126
+ const words = sentence.split(/\s+/).filter(Boolean);
127
+ let wordChunk = "";
128
+ for (const word of words) {
129
+ const candidate = wordChunk ? `${wordChunk} ${word}` : word;
130
+ if (this.estimateTokens(candidate) <= tokenBudget) {
131
+ wordChunk = candidate;
132
+ }
133
+ else {
134
+ if (wordChunk)
135
+ chunks.push(wordChunk);
136
+ wordChunk = word;
137
+ }
138
+ }
139
+ if (wordChunk)
140
+ chunks.push(wordChunk);
141
+ }
142
+ if (current)
143
+ pushCurrent();
144
+ return chunks.filter((c) => this.estimateTokens(c) <= tokenBudget + 2);
145
+ }
146
+ l2Normalize(vector) {
147
+ const norm = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
148
+ if (!Number.isFinite(norm) || norm === 0)
149
+ return vector;
150
+ return vector.map((v) => v / norm);
151
+ }
152
+ weightedAverage(vectors, weights) {
153
+ if (vectors.length === 0)
154
+ return [];
155
+ const dim = vectors[0].length;
156
+ const out = new Array(dim).fill(0);
157
+ const weightSum = weights.reduce((a, b) => a + b, 0) || 1;
158
+ for (let i = 0; i < vectors.length; i++) {
159
+ const vec = vectors[i];
160
+ const w = weights[i] || 1;
161
+ for (let d = 0; d < dim; d++) {
162
+ out[d] += vec[d] * w;
163
+ }
164
+ }
165
+ for (let d = 0; d < dim; d++) {
166
+ out[d] /= weightSum;
167
+ }
168
+ return this.l2Normalize(out);
169
+ }
170
+ isContextLengthError(error) {
171
+ if (!(error instanceof EmbeddingHttpError))
172
+ return false;
173
+ if (![400, 413, 422, 500].includes(error.status))
174
+ return false;
175
+ return /context length|maximum context|too many tokens|exceed|token limit|8192|input length/i.test(error.bodyPreview || "");
176
+ }
177
+ extractTokenLimitFromError(errorText) {
178
+ const normalized = errorText || "";
179
+ const patterns = [
180
+ /(?:context length|maximum context|token(?:s)? limit)[^\d]*(\d{3,6})/i,
181
+ /exceeds[^\d]*(\d{3,6})/i,
182
+ /max(?:imum)?[^\d]*(\d{3,6})\s*tokens?/i,
183
+ ];
184
+ for (const p of patterns) {
185
+ const m = normalized.match(p);
186
+ if (m?.[1]) {
187
+ const parsed = Number(m[1]);
188
+ if (Number.isFinite(parsed) && parsed >= 128)
189
+ return parsed;
190
+ }
191
+ }
192
+ return null;
193
+ }
194
+ async updateCapabilityFromContextError(error) {
195
+ const parsed = this.extractTokenLimitFromError(error.bodyPreview || "");
196
+ const current = this.capability.discoveredMaxTokens || this.capability.seedMaxTokens;
197
+ const fallback = Math.floor(current * 0.85);
198
+ const discovered = Math.max(128, parsed ? Math.min(current, parsed) : fallback);
199
+ if (discovered < current) {
200
+ this.capability = {
201
+ ...this.capability,
202
+ discoveredMaxTokens: discovered,
203
+ updatedAt: new Date().toISOString(),
204
+ source: "error-feedback",
205
+ };
206
+ await this.registry.set(this.modelKey, this.capability);
207
+ this.logger.warn(`[Embedding] capability refined from error-feedback: ${current} -> ${discovered} (modelKey=${this.modelKey})`);
208
+ }
209
+ }
210
+ async initializeCapabilities() {
211
+ const endpoints = this.resolveEmbeddingEndpoints(this.config.embeddingApiUrl);
212
+ const endpoint = endpoints[0];
213
+ const provider = this.detectProvider(endpoint);
214
+ this.activeEndpoint = endpoint;
215
+ this.provider = provider;
216
+ this.modelKey = this.buildModelKey(provider, endpoint);
217
+ const defaults = this.getDefaults();
218
+ const existing = await this.registry.get(this.modelKey);
219
+ this.capability = existing || {
220
+ seedMaxTokens: defaults.seedMaxTokens,
221
+ discoveredMaxTokens: defaults.seedMaxTokens,
222
+ safeRatio: defaults.safeRatio,
223
+ reserveTokens: defaults.reserveTokens,
224
+ vectorDim: defaults.vectorDim,
225
+ updatedAt: new Date().toISOString(),
226
+ source: "docs",
227
+ };
228
+ if (!existing) {
229
+ await this.registry.set(this.modelKey, this.capability);
230
+ }
231
+ // light startup calibration (max 1/day)
232
+ const ageMs = Date.now() - new Date(this.capability.updatedAt).getTime();
233
+ if (!Number.isFinite(ageMs) || ageMs > 24 * 60 * 60 * 1000) {
234
+ await this.calibrateRuntimeCapability();
235
+ }
236
+ }
237
+ async readEndpointMetadata() {
238
+ const endpoint = this.activeEndpoint;
239
+ const provider = this.detectProvider(endpoint);
240
+ try {
241
+ if (provider === "ollama") {
242
+ const base = endpoint.replace(/\/api\/embeddings\/?$/i, "");
243
+ const res = await fetch(`${base}/api/tags`, { signal: AbortSignal.timeout(4000) });
244
+ if (!res.ok)
245
+ return {};
246
+ const json = await res.json();
247
+ const models = Array.isArray(json?.models) ? json.models : [];
248
+ const modelInfo = models.find((m) => m?.model === this.config.model || m?.name === this.config.model);
249
+ const dimFromModel = Number(modelInfo?.details?.embedding_length || modelInfo?.details?.dimensions || 0);
250
+ return {
251
+ vectorDim: dimFromModel > 0 ? dimFromModel : undefined,
252
+ };
253
+ }
254
+ }
255
+ catch {
256
+ // best effort metadata
257
+ }
258
+ return {};
259
+ }
260
+ async probeWithinBudget(tokenTarget) {
261
+ const sample = Array(tokenTarget).fill("t").join(" ");
262
+ try {
263
+ await this.embedChunksFromApi([sample]);
264
+ return true;
265
+ }
266
+ catch (error) {
267
+ if (this.isContextLengthError(error))
268
+ return false;
269
+ throw error;
270
+ }
271
+ }
272
+ async probeContextWindow(seed) {
273
+ const clamp = (n) => Math.max(128, Math.floor(n));
274
+ let low = 256;
275
+ let high = clamp(seed);
276
+ // stepped exploration (safe / low spam)
277
+ const steps = [0.5, 0.75, 1, 1.1].map((x) => clamp(seed * x));
278
+ for (const s of steps) {
279
+ let ok = false;
280
+ try {
281
+ ok = await this.probeWithinBudget(s);
282
+ }
283
+ catch {
284
+ continue;
285
+ }
286
+ if (ok) {
287
+ low = Math.max(low, s);
288
+ high = Math.max(high, s);
289
+ }
290
+ else {
291
+ high = Math.min(high, s);
292
+ break;
293
+ }
294
+ }
295
+ // binary search refinement, max 5 probes
296
+ for (let i = 0; i < 5 && high - low > 96; i++) {
297
+ const mid = clamp((low + high) / 2);
298
+ const ok = await this.probeWithinBudget(mid);
299
+ if (ok)
300
+ low = mid;
301
+ else
302
+ high = mid;
303
+ }
304
+ return clamp(low);
305
+ }
306
+ async calibrateRuntimeCapability(force = false) {
307
+ await this.ready;
308
+ if (!force) {
309
+ const ageMs = Date.now() - new Date(this.capability.updatedAt).getTime();
310
+ if (Number.isFinite(ageMs) && ageMs < 30 * 60 * 1000)
311
+ return;
312
+ }
313
+ const metadata = await this.readEndpointMetadata();
314
+ const seed = Math.max(256, metadata.discoveredMaxTokens || metadata.seedMaxTokens || this.capability.seedMaxTokens);
315
+ let discovered = this.capability.discoveredMaxTokens;
316
+ try {
317
+ discovered = await this.probeContextWindow(seed);
318
+ }
319
+ catch (error) {
320
+ this.logger.warn(`[Embedding] calibration probe skipped: ${error.message}`);
321
+ }
322
+ this.capability = {
323
+ ...this.capability,
324
+ discoveredMaxTokens: Math.max(128, discovered || seed),
325
+ vectorDim: metadata.vectorDim || this.capability.vectorDim,
326
+ updatedAt: new Date().toISOString(),
327
+ source: "probe",
328
+ };
329
+ await this.registry.set(this.modelKey, this.capability);
330
+ this.logger.info(`[Embedding] calibrated capability modelKey=${this.modelKey} maxTokens=${this.capability.discoveredMaxTokens} vectorDim=${this.capability.vectorDim}`);
331
+ }
332
+ async getVectorDimensionHint() {
333
+ await this.ready;
334
+ return this.capability.vectorDim || this.config.dimensions;
335
+ }
336
+ async getModelKey() {
337
+ await this.ready;
338
+ return this.modelKey;
339
+ }
44
340
  /**
45
- * Get embedding vector for text
46
- * Fallback to hash-based embedding if API unavailable
341
+ * Backward-compatible method
47
342
  */
48
343
  async embed(text) {
344
+ const result = await this.embedDetailed(text);
345
+ return result.vector;
346
+ }
347
+ /**
348
+ * New method with calibration-aware adaptive chunking + metadata
349
+ */
350
+ async embedDetailed(text) {
351
+ await this.ready;
49
352
  const normalizedInput = this.normalizeInput(text);
50
- // Validate/filter empty input BEFORE calling embedding API
51
353
  if (normalizedInput.length === 0) {
52
354
  this.logger.warn("[Embedding] Skip API call: empty input after trim/filter");
53
- return this.embedFromHash("");
355
+ return {
356
+ vector: this.embedFromHash(""),
357
+ metadata: {
358
+ embedding_chunked: false,
359
+ embedding_chunks_count: 0,
360
+ embedding_chunking_strategy: "array_batch_weighted_avg",
361
+ embedding_model: this.config.model,
362
+ embedding_model_key: this.modelKey,
363
+ embedding_provider: this.provider,
364
+ embedding_max_tokens: this.capability.discoveredMaxTokens,
365
+ embedding_safe_chunk_tokens: this.tokenBudget(),
366
+ embedding_source: this.capability.source,
367
+ embedding_fallback_hash: true,
368
+ },
369
+ };
54
370
  }
55
- // Try API first
56
- try {
57
- return await this.embedFromApi(normalizedInput);
371
+ const mergedText = normalizedInput.join("\n\n");
372
+ const baseBudget = this.tokenBudget();
373
+ // retry policy with progressive budget reduction
374
+ const safetyMultipliers = [1, 0.8, 0.65, 0.5, 0.4, 0.3];
375
+ for (const mul of safetyMultipliers) {
376
+ const safeChunkTokens = Math.max(128, Math.floor(baseBudget * mul));
377
+ const chunks = this.chunkTextByTokenBudget(mergedText, safeChunkTokens);
378
+ const chunkWeights = chunks.map((c) => this.estimateTokens(c));
379
+ // hard guard: never send chunk above discovered budget
380
+ if (chunks.some((chunk) => this.estimateTokens(chunk) > safeChunkTokens + 2)) {
381
+ continue;
382
+ }
383
+ try {
384
+ const vectors = await this.embedChunksFromApi(chunks);
385
+ const vector = vectors.length === 1
386
+ ? this.l2Normalize(vectors[0])
387
+ : this.weightedAverage(vectors, chunkWeights);
388
+ return {
389
+ vector,
390
+ metadata: {
391
+ embedding_chunked: chunks.length > 1,
392
+ embedding_chunks_count: chunks.length,
393
+ embedding_chunking_strategy: "array_batch_weighted_avg",
394
+ embedding_model: this.config.model,
395
+ embedding_model_key: this.modelKey,
396
+ embedding_provider: this.provider,
397
+ embedding_max_tokens: this.capability.discoveredMaxTokens,
398
+ embedding_safe_chunk_tokens: safeChunkTokens,
399
+ embedding_source: this.capability.source,
400
+ embedding_fallback_hash: false,
401
+ },
402
+ };
403
+ }
404
+ catch (error) {
405
+ if (this.isContextLengthError(error)) {
406
+ await this.updateCapabilityFromContextError(error);
407
+ this.logger.warn(`[Embedding] context-length detected. retry with smaller chunk budget=${safeChunkTokens} modelKey=${this.modelKey}`);
408
+ continue;
409
+ }
410
+ // non context-length error -> fallback hash immediately
411
+ this.logger.error(`[Embedding][HIGH] API failed; fallback to hash embedding. reason=${error.message} modelKey=${this.modelKey}`);
412
+ return {
413
+ vector: this.embedFromHash(mergedText),
414
+ metadata: {
415
+ embedding_chunked: chunks.length > 1,
416
+ embedding_chunks_count: chunks.length,
417
+ embedding_chunking_strategy: "array_batch_weighted_avg",
418
+ embedding_model: this.config.model,
419
+ embedding_model_key: this.modelKey,
420
+ embedding_provider: this.provider,
421
+ embedding_max_tokens: this.capability.discoveredMaxTokens,
422
+ embedding_safe_chunk_tokens: safeChunkTokens,
423
+ embedding_source: this.capability.source,
424
+ embedding_fallback_hash: true,
425
+ },
426
+ };
427
+ }
58
428
  }
59
- catch (error) {
60
- // Fallback to deterministic hash-based embedding
61
- return this.embedFromHash(normalizedInput[0]);
429
+ // exhausted retries
430
+ this.logger.error(`[Embedding][CRITICAL] exhausted context retries; fallback hash modelKey=${this.modelKey}`);
431
+ return {
432
+ vector: this.embedFromHash(mergedText),
433
+ metadata: {
434
+ embedding_chunked: true,
435
+ embedding_chunks_count: Math.max(1, this.chunkTextByTokenBudget(mergedText, Math.max(128, Math.floor(baseBudget * 0.3))).length),
436
+ embedding_chunking_strategy: "array_batch_weighted_avg",
437
+ embedding_model: this.config.model,
438
+ embedding_model_key: this.modelKey,
439
+ embedding_provider: this.provider,
440
+ embedding_max_tokens: this.capability.discoveredMaxTokens,
441
+ embedding_safe_chunk_tokens: Math.max(128, Math.floor(baseBudget * 0.3)),
442
+ embedding_source: this.capability.source,
443
+ embedding_fallback_hash: true,
444
+ },
445
+ };
446
+ }
447
+ async embedChunksFromApi(chunks) {
448
+ if (chunks.length === 0) {
449
+ throw new Error("No chunks to embed");
62
450
  }
451
+ const endpoints = this.resolveEmbeddingEndpoints(this.config.embeddingApiUrl);
452
+ let lastError = null;
453
+ for (const url of endpoints) {
454
+ const useOpenAiFormat = /\/v1\/embeddings\/?$/i.test(url);
455
+ try {
456
+ this.activeEndpoint = url;
457
+ this.provider = this.detectProvider(url);
458
+ this.modelKey = this.buildModelKey(this.provider, this.activeEndpoint);
459
+ if (!useOpenAiFormat && chunks.length > 1) {
460
+ // Ollama /api/embeddings: sequential requests
461
+ const vectors = [];
462
+ for (const c of chunks) {
463
+ vectors.push(await this.embedSingle(url, false, c));
464
+ }
465
+ return vectors;
466
+ }
467
+ const vectors = await this.embedBatch(url, useOpenAiFormat, chunks);
468
+ if (vectors.length !== chunks.length) {
469
+ throw new Error(`Embedding vector count mismatch: expected=${chunks.length}, got=${vectors.length}`);
470
+ }
471
+ return vectors;
472
+ }
473
+ catch (error) {
474
+ lastError = error;
475
+ if (this.isContextLengthError(error)) {
476
+ throw error;
477
+ }
478
+ if (error instanceof EmbeddingHttpError &&
479
+ [404, 429].includes(error.status) &&
480
+ endpoints.length > 1 &&
481
+ url !== endpoints[endpoints.length - 1]) {
482
+ continue;
483
+ }
484
+ if (url !== endpoints[endpoints.length - 1]) {
485
+ continue;
486
+ }
487
+ }
488
+ }
489
+ throw lastError || new Error("Embedding API error: no endpoint succeeded");
63
490
  }
64
- /**
65
- * Get embedding from API
66
- */
67
- async embedFromApi(input) {
68
- this.logger.debug?.(`[Embedding] Calling API with inputCount=${input.length} firstItemLength=${input[0]?.length || 0} preview=${JSON.stringify((input[0] || "").slice(0, 80))}`);
491
+ async embedBatch(url, useOpenAiFormat, chunks) {
69
492
  const controller = new AbortController();
70
493
  const timeoutId = setTimeout(() => controller.abort(), this.config.timeout);
71
494
  try {
72
- const endpoints = this.resolveEmbeddingEndpoints(this.config.embeddingApiUrl);
73
- let lastError = null;
74
- for (const url of endpoints) {
75
- const useOpenAiFormat = this.isOpenAIEmbeddingEndpoint(url);
495
+ const max429Retries = 3;
496
+ for (let attempt = 0; attempt <= max429Retries; attempt++) {
76
497
  const response = await fetch(url, {
77
498
  method: "POST",
78
- headers: {
79
- "Content-Type": "application/json",
80
- },
499
+ headers: { "Content-Type": "application/json" },
81
500
  body: JSON.stringify(useOpenAiFormat
82
- ? {
83
- model: this.config.model,
84
- input,
85
- }
86
- : {
87
- model: this.config.model,
88
- prompt: input[0],
89
- }),
501
+ ? { model: this.config.model, input: chunks }
502
+ : { model: this.config.model, prompt: chunks[0] }),
90
503
  signal: controller.signal,
91
504
  });
505
+ if (response.status === 429 && attempt < max429Retries) {
506
+ const backoffMs = Math.min(4000, 300 * Math.pow(2, attempt));
507
+ this.logger.warn(`[Embedding] 429 rate limit. retry in ${backoffMs}ms (attempt ${attempt + 1}/${max429Retries})`);
508
+ await new Promise((r) => setTimeout(r, backoffMs));
509
+ continue;
510
+ }
92
511
  if (!response.ok) {
93
512
  const errorText = await response.text().catch(() => "Unknown error");
94
- this.logger.error(`[Embedding] HTTP ${response.status} @ ${url}: ${errorText.substring(0, 200)}`);
95
- if (response.status === 400) {
96
- this.logger.error(`[Embedding] 400 schema debug @ ${url}: ${JSON.stringify({
97
- model: this.config.model,
98
- inputType: Array.isArray(input) ? "array" : typeof input,
99
- inputLength: Array.isArray(input) ? input.length : 0,
100
- firstItemLength: input[0]?.length || 0,
101
- })}`);
102
- }
103
- // If this endpoint not found and we still have fallback endpoint, continue.
104
- if (response.status === 404 && endpoints.length > 1 && url !== endpoints[endpoints.length - 1]) {
105
- continue;
106
- }
107
- lastError = new Error(`Embedding API error: ${response.status}`);
108
- break;
513
+ const preview = errorText.substring(0, 500);
514
+ throw new EmbeddingHttpError(response.status, preview);
109
515
  }
110
516
  const data = await response.json();
111
- // Ollama API format: { embedding: [...] }
112
- if (data.embedding && Array.isArray(data.embedding)) {
113
- clearTimeout(timeoutId);
114
- return data.embedding;
517
+ if (!useOpenAiFormat) {
518
+ if (data.embedding && Array.isArray(data.embedding)) {
519
+ return [data.embedding];
520
+ }
521
+ throw new Error("Invalid Ollama embedding response format");
115
522
  }
116
- // OpenAI-compatible format: { data: [{ embedding: [...] }] }
117
- if (Array.isArray(data.data) && data.data[0]?.embedding && Array.isArray(data.data[0].embedding)) {
118
- clearTimeout(timeoutId);
119
- return data.data[0].embedding;
523
+ if (Array.isArray(data.data)) {
524
+ const vectors = data.data
525
+ .map((d) => d?.embedding)
526
+ .filter((v) => Array.isArray(v));
527
+ if (vectors.length > 0)
528
+ return vectors;
120
529
  }
121
- this.logger.error(`[Embedding] Unexpected response format: ${JSON.stringify(data).substring(0, 200)}`);
122
- lastError = new Error("Invalid embedding response format");
123
- break;
530
+ throw new Error("Invalid OpenAI embedding response format");
124
531
  }
125
- clearTimeout(timeoutId);
126
- throw lastError || new Error("Embedding API error: no endpoint succeeded");
532
+ throw new Error("Embedding API 429 retries exhausted");
127
533
  }
128
534
  catch (error) {
129
535
  if (error.name === "AbortError") {
@@ -131,24 +537,30 @@ export class EmbeddingClient {
131
537
  }
132
538
  throw error;
133
539
  }
540
+ finally {
541
+ clearTimeout(timeoutId);
542
+ }
543
+ }
544
+ async embedSingle(url, useOpenAiFormat, chunk) {
545
+ const vectors = await this.embedBatch(url, useOpenAiFormat, [chunk]);
546
+ if (!vectors[0])
547
+ throw new Error("No embedding vector returned");
548
+ return vectors[0];
134
549
  }
135
550
  /**
136
551
  * Fallback: Generate embedding from text hash (deterministic)
137
552
  */
138
553
  embedFromHash(text) {
139
- const hash = text.split('').reduce((a, b) => {
554
+ const hash = text.split("").reduce((a, b) => {
140
555
  a = ((a << 5) - a) + b.charCodeAt(0);
141
556
  return a & a;
142
557
  }, 0);
143
558
  const embedding = [];
144
- for (let i = 0; i < this.dimensions; i++) {
559
+ for (let i = 0; i < this.config.dimensions; i++) {
145
560
  embedding.push(Math.sin(hash + i) * 0.1);
146
561
  }
147
- return embedding;
562
+ return this.l2Normalize(embedding);
148
563
  }
149
- /**
150
- * Calculate cosine similarity
151
- */
152
564
  cosineSimilarity(a, b) {
153
565
  if (a.length !== b.length) {
154
566
  throw new Error("Vector dimensions mismatch");