@mnemoai/core 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +3395 -0
- package/openclaw.plugin.json +815 -0
- package/package.json +59 -0
- package/src/access-tracker.ts +341 -0
- package/src/adapters/README.md +78 -0
- package/src/adapters/chroma.ts +206 -0
- package/src/adapters/lancedb.ts +237 -0
- package/src/adapters/pgvector.ts +218 -0
- package/src/adapters/qdrant.ts +191 -0
- package/src/adaptive-retrieval.ts +90 -0
- package/src/audit-log.ts +238 -0
- package/src/chunker.ts +254 -0
- package/src/config.ts +271 -0
- package/src/decay-engine.ts +238 -0
- package/src/embedder.ts +735 -0
- package/src/extraction-prompts.ts +339 -0
- package/src/license.ts +258 -0
- package/src/llm-client.ts +125 -0
- package/src/mcp-server.ts +415 -0
- package/src/memory-categories.ts +71 -0
- package/src/memory-upgrader.ts +388 -0
- package/src/migrate.ts +364 -0
- package/src/mnemo.ts +142 -0
- package/src/noise-filter.ts +97 -0
- package/src/noise-prototypes.ts +164 -0
- package/src/observability.ts +81 -0
- package/src/query-tracker.ts +57 -0
- package/src/reflection-event-store.ts +98 -0
- package/src/reflection-item-store.ts +112 -0
- package/src/reflection-mapped-metadata.ts +84 -0
- package/src/reflection-metadata.ts +23 -0
- package/src/reflection-ranking.ts +33 -0
- package/src/reflection-retry.ts +181 -0
- package/src/reflection-slices.ts +265 -0
- package/src/reflection-store.ts +602 -0
- package/src/resonance-state.ts +85 -0
- package/src/retriever.ts +1510 -0
- package/src/scopes.ts +375 -0
- package/src/self-improvement-files.ts +143 -0
- package/src/semantic-gate.ts +121 -0
- package/src/session-recovery.ts +138 -0
- package/src/smart-extractor.ts +923 -0
- package/src/smart-metadata.ts +561 -0
- package/src/storage-adapter.ts +153 -0
- package/src/store.ts +1330 -0
- package/src/tier-manager.ts +189 -0
- package/src/tools.ts +1292 -0
- package/src/wal-recovery.ts +172 -0
- package/test/core.test.mjs +301 -0
package/src/embedder.ts
ADDED
|
@@ -0,0 +1,735 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
/**
|
|
3
|
+
* Embedding Abstraction Layer
|
|
4
|
+
* OpenAI-compatible API for various embedding providers.
|
|
5
|
+
* Supports automatic chunking for documents exceeding embedding context limits.
|
|
6
|
+
*
|
|
7
|
+
* Note: Some providers (e.g. Jina) support extra parameters like `task` and
|
|
8
|
+
* `normalized` on the embeddings endpoint. The OpenAI SDK types do not include
|
|
9
|
+
* these fields, so we pass them via a narrow `any` cast.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import OpenAI from "openai";
|
|
13
|
+
import { createHash } from "node:crypto";
|
|
14
|
+
import { smartChunk } from "./chunker.js";
|
|
15
|
+
|
|
16
|
+
// ============================================================================
|
|
17
|
+
// Embedding Cache (LRU with TTL)
|
|
18
|
+
// ============================================================================
|
|
19
|
+
|
|
20
|
+
interface CacheEntry {
|
|
21
|
+
vector: number[];
|
|
22
|
+
createdAt: number;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
class EmbeddingCache {
|
|
26
|
+
private cache = new Map<string, CacheEntry>();
|
|
27
|
+
private readonly maxSize: number;
|
|
28
|
+
private readonly ttlMs: number;
|
|
29
|
+
public hits = 0;
|
|
30
|
+
public misses = 0;
|
|
31
|
+
|
|
32
|
+
constructor(maxSize = 256, ttlMinutes = 30) {
|
|
33
|
+
this.maxSize = maxSize;
|
|
34
|
+
this.ttlMs = ttlMinutes * 60_000;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
private key(text: string, task?: string): string {
|
|
38
|
+
const hash = createHash("sha256").update(`${task || ""}:${text}`).digest("hex").slice(0, 24);
|
|
39
|
+
return hash;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
get(text: string, task?: string): number[] | undefined {
|
|
43
|
+
const k = this.key(text, task);
|
|
44
|
+
const entry = this.cache.get(k);
|
|
45
|
+
if (!entry) {
|
|
46
|
+
this.misses++;
|
|
47
|
+
return undefined;
|
|
48
|
+
}
|
|
49
|
+
if (Date.now() - entry.createdAt > this.ttlMs) {
|
|
50
|
+
this.cache.delete(k);
|
|
51
|
+
this.misses++;
|
|
52
|
+
return undefined;
|
|
53
|
+
}
|
|
54
|
+
// Move to end (most recently used)
|
|
55
|
+
this.cache.delete(k);
|
|
56
|
+
this.cache.set(k, entry);
|
|
57
|
+
this.hits++;
|
|
58
|
+
return entry.vector;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
set(text: string, task: string | undefined, vector: number[]): void {
|
|
62
|
+
const k = this.key(text, task);
|
|
63
|
+
// Evict oldest if full
|
|
64
|
+
if (this.cache.size >= this.maxSize) {
|
|
65
|
+
const firstKey = this.cache.keys().next().value;
|
|
66
|
+
if (firstKey !== undefined) this.cache.delete(firstKey);
|
|
67
|
+
}
|
|
68
|
+
this.cache.set(k, { vector, createdAt: Date.now() });
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
get size(): number { return this.cache.size; }
|
|
72
|
+
get stats(): { size: number; hits: number; misses: number; hitRate: string } {
|
|
73
|
+
const total = this.hits + this.misses;
|
|
74
|
+
return {
|
|
75
|
+
size: this.cache.size,
|
|
76
|
+
hits: this.hits,
|
|
77
|
+
misses: this.misses,
|
|
78
|
+
hitRate: total > 0 ? `${((this.hits / total) * 100).toFixed(1)}%` : "N/A",
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// ============================================================================
|
|
84
|
+
// Types & Configuration
|
|
85
|
+
// ============================================================================
|
|
86
|
+
|
|
87
|
+
export interface EmbeddingConfig {
|
|
88
|
+
provider: "openai-compatible";
|
|
89
|
+
/** Single API key or array of keys for round-robin rotation with failover. */
|
|
90
|
+
apiKey: string | string[];
|
|
91
|
+
model: string;
|
|
92
|
+
baseURL?: string;
|
|
93
|
+
dimensions?: number;
|
|
94
|
+
|
|
95
|
+
/** Optional task type for query embeddings (e.g. "retrieval.query") */
|
|
96
|
+
taskQuery?: string;
|
|
97
|
+
/** Optional task type for passage/document embeddings (e.g. "retrieval.passage") */
|
|
98
|
+
taskPassage?: string;
|
|
99
|
+
/** Optional flag to request normalized embeddings (provider-dependent, e.g. Jina v5) */
|
|
100
|
+
normalized?: boolean;
|
|
101
|
+
/** Enable automatic chunking for documents exceeding context limits (default: true) */
|
|
102
|
+
chunking?: boolean;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Known embedding model dimensions
|
|
106
|
+
const EMBEDDING_DIMENSIONS: Record<string, number> = {
|
|
107
|
+
"text-embedding-3-small": 1536,
|
|
108
|
+
"text-embedding-3-large": 3072,
|
|
109
|
+
"text-embedding-004": 768,
|
|
110
|
+
"gemini-embedding-001": 3072,
|
|
111
|
+
"nomic-embed-text": 768,
|
|
112
|
+
"mxbai-embed-large": 1024,
|
|
113
|
+
"BAAI/bge-m3": 1024,
|
|
114
|
+
"all-MiniLM-L6-v2": 384,
|
|
115
|
+
"all-mpnet-base-v2": 512,
|
|
116
|
+
|
|
117
|
+
// Jina v5
|
|
118
|
+
"jina-embeddings-v5-text-small": 1024,
|
|
119
|
+
"jina-embeddings-v5-text-nano": 768,
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
// ============================================================================
|
|
123
|
+
// Utility Functions
|
|
124
|
+
// ============================================================================
|
|
125
|
+
|
|
126
|
+
function resolveEnvVars(value: string): string {
|
|
127
|
+
return value.replace(/\$\{([^}]+)\}/g, (_, envVar) => {
|
|
128
|
+
const envValue = process.env[envVar];
|
|
129
|
+
if (!envValue) {
|
|
130
|
+
throw new Error(`Environment variable ${envVar} is not set`);
|
|
131
|
+
}
|
|
132
|
+
return envValue;
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function getErrorMessage(error: unknown): string {
|
|
137
|
+
return error instanceof Error ? error.message : String(error);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function getErrorStatus(error: unknown): number | undefined {
|
|
141
|
+
if (!error || typeof error !== "object") return undefined;
|
|
142
|
+
const err = error as Record<string, any>;
|
|
143
|
+
if (typeof err.status === "number") return err.status;
|
|
144
|
+
if (typeof err.statusCode === "number") return err.statusCode;
|
|
145
|
+
if (err.error && typeof err.error === "object") {
|
|
146
|
+
if (typeof err.error.status === "number") return err.error.status;
|
|
147
|
+
if (typeof err.error.statusCode === "number") return err.error.statusCode;
|
|
148
|
+
}
|
|
149
|
+
return undefined;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function getErrorCode(error: unknown): string | undefined {
|
|
153
|
+
if (!error || typeof error !== "object") return undefined;
|
|
154
|
+
const err = error as Record<string, any>;
|
|
155
|
+
if (typeof err.code === "string") return err.code;
|
|
156
|
+
if (err.error && typeof err.error === "object" && typeof err.error.code === "string") {
|
|
157
|
+
return err.error.code;
|
|
158
|
+
}
|
|
159
|
+
return undefined;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function getProviderLabel(baseURL: string | undefined, model: string): string {
|
|
163
|
+
const base = baseURL || "";
|
|
164
|
+
|
|
165
|
+
if (base) {
|
|
166
|
+
if (/api\.jina\.ai/i.test(base)) return "Jina";
|
|
167
|
+
if (/localhost:11434|127\.0\.0\.1:11434|\/ollama\b/i.test(base)) return "Ollama";
|
|
168
|
+
if (/api\.openai\.com/i.test(base)) return "OpenAI";
|
|
169
|
+
|
|
170
|
+
try {
|
|
171
|
+
return new URL(base).host;
|
|
172
|
+
} catch {
|
|
173
|
+
return base;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (/^jina-/i.test(model)) return "Jina";
|
|
178
|
+
|
|
179
|
+
return "embedding provider";
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function isAuthError(error: unknown): boolean {
|
|
183
|
+
const status = getErrorStatus(error);
|
|
184
|
+
if (status === 401 || status === 403) return true;
|
|
185
|
+
|
|
186
|
+
const code = getErrorCode(error);
|
|
187
|
+
if (code && /invalid.*key|auth|forbidden|unauthorized/i.test(code)) return true;
|
|
188
|
+
|
|
189
|
+
const msg = getErrorMessage(error);
|
|
190
|
+
return /\b401\b|\b403\b|invalid api key|api key expired|expired api key|forbidden|unauthorized|authentication failed|access denied/i.test(msg);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
function isNetworkError(error: unknown): boolean {
|
|
194
|
+
const code = getErrorCode(error);
|
|
195
|
+
if (code && /ECONNREFUSED|ECONNRESET|ENOTFOUND|EHOSTUNREACH|ETIMEDOUT/i.test(code)) {
|
|
196
|
+
return true;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const msg = getErrorMessage(error);
|
|
200
|
+
return /ECONNREFUSED|ECONNRESET|ENOTFOUND|EHOSTUNREACH|ETIMEDOUT|fetch failed|network error|socket hang up|connection refused|getaddrinfo/i.test(msg);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
export function formatEmbeddingProviderError(
|
|
204
|
+
error: unknown,
|
|
205
|
+
opts: { baseURL?: string; model: string; mode?: "single" | "batch" },
|
|
206
|
+
): string {
|
|
207
|
+
const raw = getErrorMessage(error).trim();
|
|
208
|
+
if (
|
|
209
|
+
raw.startsWith("Embedding provider authentication failed") ||
|
|
210
|
+
raw.startsWith("Embedding provider unreachable") ||
|
|
211
|
+
raw.startsWith("Failed to generate embedding from ") ||
|
|
212
|
+
raw.startsWith("Failed to generate batch embeddings from ")
|
|
213
|
+
) {
|
|
214
|
+
return raw;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const status = getErrorStatus(error);
|
|
218
|
+
const code = getErrorCode(error);
|
|
219
|
+
const provider = getProviderLabel(opts.baseURL, opts.model);
|
|
220
|
+
const detail = raw.length > 0 ? raw : "unknown error";
|
|
221
|
+
const suffix = [status, code].filter(Boolean).join(" ");
|
|
222
|
+
const detailText = suffix ? `${suffix}: ${detail}` : detail;
|
|
223
|
+
const genericPrefix =
|
|
224
|
+
opts.mode === "batch"
|
|
225
|
+
? `Failed to generate batch embeddings from ${provider}: `
|
|
226
|
+
: `Failed to generate embedding from ${provider}: `;
|
|
227
|
+
|
|
228
|
+
if (isAuthError(error)) {
|
|
229
|
+
let hint = `Check embedding.apiKey and endpoint for ${provider}.`;
|
|
230
|
+
if (provider === "Jina") {
|
|
231
|
+
hint +=
|
|
232
|
+
" If your Jina key expired or lost access, replace the key or switch to a local OpenAI-compatible endpoint such as Ollama (for example baseURL http://127.0.0.1:11434/v1, with a matching model and embedding.dimensions).";
|
|
233
|
+
} else if (provider === "Ollama") {
|
|
234
|
+
hint +=
|
|
235
|
+
" Ollama usually works with a dummy apiKey; verify the local server is running, the model is pulled, and embedding.dimensions matches the model output.";
|
|
236
|
+
}
|
|
237
|
+
return `Embedding provider authentication failed (${detailText}). ${hint}`;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (isNetworkError(error)) {
|
|
241
|
+
let hint = `Verify the endpoint is reachable`;
|
|
242
|
+
if (opts.baseURL) {
|
|
243
|
+
hint += ` at ${opts.baseURL}`;
|
|
244
|
+
}
|
|
245
|
+
hint += ` and that model \"${opts.model}\" is available.`;
|
|
246
|
+
return `Embedding provider unreachable (${detailText}). ${hint}`;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
return `${genericPrefix}${detailText}`;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
export function getVectorDimensions(model: string, overrideDims?: number): number {
|
|
253
|
+
if (overrideDims && overrideDims > 0) {
|
|
254
|
+
return overrideDims;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const dims = EMBEDDING_DIMENSIONS[model];
|
|
258
|
+
if (!dims) {
|
|
259
|
+
throw new Error(
|
|
260
|
+
`Unsupported embedding model: ${model}. Either add it to EMBEDDING_DIMENSIONS or set embedding.dimensions in config.`
|
|
261
|
+
);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return dims;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// ============================================================================
|
|
268
|
+
// Embedder Class
|
|
269
|
+
// ============================================================================
|
|
270
|
+
|
|
271
|
+
export class Embedder {
|
|
272
|
+
/** Pool of OpenAI clients — one per API key for round-robin rotation. */
|
|
273
|
+
private clients: OpenAI[];
|
|
274
|
+
/** Round-robin index for client rotation. */
|
|
275
|
+
private _clientIndex: number = 0;
|
|
276
|
+
|
|
277
|
+
public readonly dimensions: number;
|
|
278
|
+
private readonly _cache: EmbeddingCache;
|
|
279
|
+
|
|
280
|
+
private readonly _model: string;
|
|
281
|
+
private readonly _baseURL?: string;
|
|
282
|
+
private readonly _taskQuery?: string;
|
|
283
|
+
private readonly _taskPassage?: string;
|
|
284
|
+
private readonly _normalized?: boolean;
|
|
285
|
+
|
|
286
|
+
/** Optional requested dimensions to pass through to the embedding provider (OpenAI-compatible). */
|
|
287
|
+
private readonly _requestDimensions?: number;
|
|
288
|
+
/** Enable automatic chunking for long documents (default: true) */
|
|
289
|
+
private readonly _autoChunk: boolean;
|
|
290
|
+
|
|
291
|
+
constructor(config: EmbeddingConfig & { chunking?: boolean }) {
|
|
292
|
+
// Normalize apiKey to array and resolve environment variables
|
|
293
|
+
const apiKeys = Array.isArray(config.apiKey) ? config.apiKey : [config.apiKey];
|
|
294
|
+
const resolvedKeys = apiKeys.map(k => resolveEnvVars(k));
|
|
295
|
+
|
|
296
|
+
this._model = config.model;
|
|
297
|
+
this._baseURL = config.baseURL;
|
|
298
|
+
this._taskQuery = config.taskQuery;
|
|
299
|
+
this._taskPassage = config.taskPassage;
|
|
300
|
+
this._normalized = config.normalized;
|
|
301
|
+
this._requestDimensions = config.dimensions;
|
|
302
|
+
// Enable auto-chunking by default for better handling of long documents
|
|
303
|
+
this._autoChunk = config.chunking !== false;
|
|
304
|
+
|
|
305
|
+
// Create a client pool — one OpenAI client per key
|
|
306
|
+
this.clients = resolvedKeys.map(key => new OpenAI({
|
|
307
|
+
apiKey: key,
|
|
308
|
+
...(config.baseURL ? { baseURL: config.baseURL } : {}),
|
|
309
|
+
}));
|
|
310
|
+
|
|
311
|
+
if (this.clients.length > 1) {
|
|
312
|
+
console.log(`[mnemo] Initialized ${this.clients.length} API keys for round-robin rotation`);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
this.dimensions = getVectorDimensions(config.model, config.dimensions);
|
|
316
|
+
this._cache = new EmbeddingCache(256, 30); // 256 entries, 30 min TTL
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// --------------------------------------------------------------------------
|
|
320
|
+
// Multi-key rotation helpers
|
|
321
|
+
// --------------------------------------------------------------------------
|
|
322
|
+
|
|
323
|
+
/** Return the next client in round-robin order. */
|
|
324
|
+
private nextClient(): OpenAI {
|
|
325
|
+
const client = this.clients[this._clientIndex % this.clients.length];
|
|
326
|
+
this._clientIndex = (this._clientIndex + 1) % this.clients.length;
|
|
327
|
+
return client;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/** Check whether an error is a rate-limit / quota-exceeded / overload error. */
|
|
331
|
+
private isRateLimitError(error: unknown): boolean {
|
|
332
|
+
if (!error || typeof error !== "object") return false;
|
|
333
|
+
|
|
334
|
+
const err = error as Record<string, any>;
|
|
335
|
+
|
|
336
|
+
// HTTP status: 429 (rate limit) or 503 (service overload)
|
|
337
|
+
if (err.status === 429 || err.status === 503) return true;
|
|
338
|
+
|
|
339
|
+
// OpenAI SDK structured error code
|
|
340
|
+
if (err.code === "rate_limit_exceeded" || err.code === "insufficient_quota") return true;
|
|
341
|
+
|
|
342
|
+
// Nested error object (some providers)
|
|
343
|
+
const nested = err.error;
|
|
344
|
+
if (nested && typeof nested === "object") {
|
|
345
|
+
if (nested.type === "rate_limit_exceeded" || nested.type === "insufficient_quota") return true;
|
|
346
|
+
if (nested.code === "rate_limit_exceeded" || nested.code === "insufficient_quota") return true;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
// Fallback: message text matching
|
|
350
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
351
|
+
return /rate.limit|quota|too many requests|insufficient.*credit|429|503.*overload/i.test(msg);
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* Call embeddings.create with automatic key rotation on rate-limit errors.
|
|
356
|
+
* Tries each key in the pool at most once before giving up.
|
|
357
|
+
*/
|
|
358
|
+
private async embedWithRetry(payload: any): Promise<any> {
|
|
359
|
+
const maxAttempts = this.clients.length;
|
|
360
|
+
let lastError: Error | undefined;
|
|
361
|
+
|
|
362
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
363
|
+
const client = this.nextClient();
|
|
364
|
+
try {
|
|
365
|
+
return await client.embeddings.create(payload);
|
|
366
|
+
} catch (error) {
|
|
367
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
368
|
+
|
|
369
|
+
if (this.isRateLimitError(error) && attempt < maxAttempts - 1) {
|
|
370
|
+
console.log(
|
|
371
|
+
`[mnemo] Attempt ${attempt + 1}/${maxAttempts} hit rate limit, rotating to next key...`
|
|
372
|
+
);
|
|
373
|
+
continue;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
// Non-rate-limit error → don't retry, let caller handle (e.g. chunking)
|
|
377
|
+
if (!this.isRateLimitError(error)) {
|
|
378
|
+
throw error;
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// All keys exhausted with rate-limit errors
|
|
384
|
+
throw new Error(
|
|
385
|
+
`All ${maxAttempts} API keys exhausted (rate limited). Last error: ${lastError?.message || "unknown"}`,
|
|
386
|
+
{ cause: lastError }
|
|
387
|
+
);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/** Number of API keys in the rotation pool. */
|
|
391
|
+
get keyCount(): number {
|
|
392
|
+
return this.clients.length;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// --------------------------------------------------------------------------
|
|
396
|
+
// Backward-compatible API
|
|
397
|
+
// --------------------------------------------------------------------------
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* Backward-compatible embedding API.
|
|
401
|
+
*
|
|
402
|
+
* Historically the plugin used a single `embed()` method for both query and
|
|
403
|
+
* passage embeddings. With task-aware providers we treat this as passage.
|
|
404
|
+
*/
|
|
405
|
+
async embed(text: string): Promise<number[]> {
|
|
406
|
+
return this.embedPassage(text);
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
/** Backward-compatible batch embedding API (treated as passage). */
|
|
410
|
+
async embedBatch(texts: string[]): Promise<number[][]> {
|
|
411
|
+
return this.embedBatchPassage(texts);
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// --------------------------------------------------------------------------
|
|
415
|
+
// Task-aware API
|
|
416
|
+
// --------------------------------------------------------------------------
|
|
417
|
+
|
|
418
|
+
async embedQuery(text: string): Promise<number[]> {
|
|
419
|
+
return this.embedSingle(text, this._taskQuery);
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
async embedPassage(text: string): Promise<number[]> {
|
|
423
|
+
return this.embedSingle(text, this._taskPassage);
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
async embedBatchQuery(texts: string[]): Promise<number[][]> {
|
|
427
|
+
return this.embedMany(texts, this._taskQuery);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
async embedBatchPassage(texts: string[]): Promise<number[][]> {
|
|
431
|
+
return this.embedMany(texts, this._taskPassage);
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// --------------------------------------------------------------------------
|
|
435
|
+
// Internals
|
|
436
|
+
// --------------------------------------------------------------------------
|
|
437
|
+
|
|
438
|
+
private validateEmbedding(embedding: number[]): void {
|
|
439
|
+
if (!Array.isArray(embedding)) {
|
|
440
|
+
throw new Error(`Embedding is not an array (got ${typeof embedding})`);
|
|
441
|
+
}
|
|
442
|
+
if (embedding.length !== this.dimensions) {
|
|
443
|
+
throw new Error(
|
|
444
|
+
`Embedding dimension mismatch: expected ${this.dimensions}, got ${embedding.length}`
|
|
445
|
+
);
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
private buildPayload(input: string | string[], task?: string): any {
|
|
450
|
+
const payload: any = {
|
|
451
|
+
model: this.model,
|
|
452
|
+
input,
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
// Force float output to avoid SDK default base64 decoding path.
|
|
456
|
+
// Skip for providers that reject this field (e.g. Voyage).
|
|
457
|
+
const isVoyage = this._baseURL?.includes("voyageai.com");
|
|
458
|
+
if (!isVoyage) {
|
|
459
|
+
payload.encoding_format = "float";
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Voyage uses "input_type" instead of "task"
|
|
463
|
+
if (task && isVoyage) {
|
|
464
|
+
// Map taskQuery/taskPassage to Voyage input_type
|
|
465
|
+
if (task.includes("query")) payload.input_type = "query";
|
|
466
|
+
else if (task.includes("passage") || task.includes("document")) payload.input_type = "document";
|
|
467
|
+
else payload.input_type = task;
|
|
468
|
+
} else if (task) {
|
|
469
|
+
payload.task = task;
|
|
470
|
+
}
|
|
471
|
+
if (this._normalized !== undefined) payload.normalized = this._normalized;
|
|
472
|
+
|
|
473
|
+
// Some OpenAI-compatible providers support requesting a specific vector size.
|
|
474
|
+
// We only pass it through when explicitly configured to avoid breaking providers
|
|
475
|
+
// that reject unknown fields.
|
|
476
|
+
if (this._requestDimensions && this._requestDimensions > 0 && !isVoyage) {
|
|
477
|
+
payload.dimensions = this._requestDimensions;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
return payload;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
private async embedSingle(text: string, task?: string): Promise<number[]> {
|
|
486
|
+
if (!text || text.trim().length === 0) {
|
|
487
|
+
throw new Error("Cannot embed empty text");
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// Check cache first
|
|
491
|
+
const cached = this._cache.get(text, task);
|
|
492
|
+
if (cached) return cached;
|
|
493
|
+
|
|
494
|
+
try {
|
|
495
|
+
const response = await this.embedWithRetry(this.buildPayload(text, task));
|
|
496
|
+
const embedding = response.data[0]?.embedding as number[] | undefined;
|
|
497
|
+
if (!embedding) {
|
|
498
|
+
throw new Error("No embedding returned from provider");
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
this.validateEmbedding(embedding);
|
|
502
|
+
this._cache.set(text, task, embedding);
|
|
503
|
+
return embedding;
|
|
504
|
+
} catch (error) {
|
|
505
|
+
// Check if this is a context length exceeded error and try chunking
|
|
506
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
507
|
+
const isContextError = /context|too long|exceed|length/i.test(errorMsg);
|
|
508
|
+
|
|
509
|
+
if (isContextError && this._autoChunk) {
|
|
510
|
+
try {
|
|
511
|
+
console.log(`Document exceeded context limit (${errorMsg}), attempting chunking...`);
|
|
512
|
+
const chunkResult = smartChunk(text, this._model);
|
|
513
|
+
|
|
514
|
+
if (chunkResult.chunks.length === 0) {
|
|
515
|
+
throw new Error(`Failed to chunk document: ${errorMsg}`);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
// Embed all chunks in parallel
|
|
519
|
+
console.log(`Split document into ${chunkResult.chunkCount} chunks for embedding`);
|
|
520
|
+
const chunkEmbeddings = await Promise.all(
|
|
521
|
+
chunkResult.chunks.map(async (chunk, idx) => {
|
|
522
|
+
try {
|
|
523
|
+
const embedding = await this.embedSingle(chunk, task);
|
|
524
|
+
return { embedding };
|
|
525
|
+
} catch (chunkError) {
|
|
526
|
+
console.warn(`Failed to embed chunk ${idx}:`, chunkError);
|
|
527
|
+
throw chunkError;
|
|
528
|
+
}
|
|
529
|
+
})
|
|
530
|
+
);
|
|
531
|
+
|
|
532
|
+
// Compute average embedding across chunks
|
|
533
|
+
const avgEmbedding = chunkEmbeddings.reduce(
|
|
534
|
+
(sum, { embedding }) => {
|
|
535
|
+
for (let i = 0; i < embedding.length; i++) {
|
|
536
|
+
sum[i] += embedding[i];
|
|
537
|
+
}
|
|
538
|
+
return sum;
|
|
539
|
+
},
|
|
540
|
+
new Array(this.dimensions).fill(0)
|
|
541
|
+
);
|
|
542
|
+
|
|
543
|
+
const finalEmbedding = avgEmbedding.map(v => v / chunkEmbeddings.length);
|
|
544
|
+
|
|
545
|
+
// Cache the result for the original text (using its hash)
|
|
546
|
+
this._cache.set(text, task, finalEmbedding);
|
|
547
|
+
console.log(`Successfully embedded long document as ${chunkEmbeddings.length} averaged chunks`);
|
|
548
|
+
|
|
549
|
+
return finalEmbedding;
|
|
550
|
+
} catch (chunkError) {
|
|
551
|
+
// If chunking fails, throw the original error
|
|
552
|
+
console.warn(`Chunking failed, using original error:`, chunkError);
|
|
553
|
+
const friendly = formatEmbeddingProviderError(error, {
|
|
554
|
+
baseURL: this._baseURL,
|
|
555
|
+
model: this._model,
|
|
556
|
+
mode: "single",
|
|
557
|
+
});
|
|
558
|
+
throw new Error(friendly, { cause: error });
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
const friendly = formatEmbeddingProviderError(error, {
|
|
563
|
+
baseURL: this._baseURL,
|
|
564
|
+
model: this._model,
|
|
565
|
+
mode: "single",
|
|
566
|
+
});
|
|
567
|
+
throw new Error(friendly, { cause: error instanceof Error ? error : undefined });
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
private async embedMany(texts: string[], task?: string): Promise<number[][]> {
|
|
572
|
+
if (!texts || texts.length === 0) {
|
|
573
|
+
return [];
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
// Filter out empty texts and track indices
|
|
577
|
+
const validTexts: string[] = [];
|
|
578
|
+
const validIndices: number[] = [];
|
|
579
|
+
|
|
580
|
+
texts.forEach((text, index) => {
|
|
581
|
+
if (text && text.trim().length > 0) {
|
|
582
|
+
validTexts.push(text);
|
|
583
|
+
validIndices.push(index);
|
|
584
|
+
}
|
|
585
|
+
});
|
|
586
|
+
|
|
587
|
+
if (validTexts.length === 0) {
|
|
588
|
+
return texts.map(() => []);
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
try {
|
|
592
|
+
const response = await this.embedWithRetry(
|
|
593
|
+
this.buildPayload(validTexts, task)
|
|
594
|
+
);
|
|
595
|
+
|
|
596
|
+
// Create result array with proper length
|
|
597
|
+
const results: number[][] = new Array(texts.length);
|
|
598
|
+
|
|
599
|
+
// Fill in embeddings for valid texts
|
|
600
|
+
response.data.forEach((item, idx) => {
|
|
601
|
+
const originalIndex = validIndices[idx];
|
|
602
|
+
const embedding = item.embedding as number[];
|
|
603
|
+
|
|
604
|
+
this.validateEmbedding(embedding);
|
|
605
|
+
results[originalIndex] = embedding;
|
|
606
|
+
});
|
|
607
|
+
|
|
608
|
+
// Fill empty arrays for invalid texts
|
|
609
|
+
for (let i = 0; i < texts.length; i++) {
|
|
610
|
+
if (!results[i]) {
|
|
611
|
+
results[i] = [];
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
return results;
|
|
616
|
+
} catch (error) {
|
|
617
|
+
// Check if this is a context length exceeded error and try chunking each text
|
|
618
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
619
|
+
const isContextError = /context|too long|exceed|length/i.test(errorMsg);
|
|
620
|
+
|
|
621
|
+
if (isContextError && this._autoChunk) {
|
|
622
|
+
try {
|
|
623
|
+
console.log(`Batch embedding failed with context error, attempting chunking...`);
|
|
624
|
+
|
|
625
|
+
const chunkResults = await Promise.all(
|
|
626
|
+
validTexts.map(async (text, idx) => {
|
|
627
|
+
const chunkResult = smartChunk(text, this._model);
|
|
628
|
+
if (chunkResult.chunks.length === 0) {
|
|
629
|
+
throw new Error("Chunker produced no chunks");
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
// Embed all chunks in parallel, then average.
|
|
633
|
+
const embeddings = await Promise.all(
|
|
634
|
+
chunkResult.chunks.map((chunk) => this.embedSingle(chunk, task))
|
|
635
|
+
);
|
|
636
|
+
|
|
637
|
+
const avgEmbedding = embeddings.reduce(
|
|
638
|
+
(sum, emb) => {
|
|
639
|
+
for (let i = 0; i < emb.length; i++) {
|
|
640
|
+
sum[i] += emb[i];
|
|
641
|
+
}
|
|
642
|
+
return sum;
|
|
643
|
+
},
|
|
644
|
+
new Array(this.dimensions).fill(0)
|
|
645
|
+
);
|
|
646
|
+
|
|
647
|
+
const finalEmbedding = avgEmbedding.map((v) => v / embeddings.length);
|
|
648
|
+
|
|
649
|
+
// Cache the averaged embedding for the original (long) text.
|
|
650
|
+
this._cache.set(text, task, finalEmbedding);
|
|
651
|
+
|
|
652
|
+
return { embedding: finalEmbedding, index: validIndices[idx] };
|
|
653
|
+
})
|
|
654
|
+
);
|
|
655
|
+
|
|
656
|
+
console.log(`Successfully chunked and embedded ${chunkResults.length} long documents`);
|
|
657
|
+
|
|
658
|
+
// Build results array
|
|
659
|
+
const results: number[][] = new Array(texts.length);
|
|
660
|
+
chunkResults.forEach(({ embedding, index }) => {
|
|
661
|
+
if (embedding.length > 0) {
|
|
662
|
+
this.validateEmbedding(embedding);
|
|
663
|
+
results[index] = embedding;
|
|
664
|
+
} else {
|
|
665
|
+
results[index] = [];
|
|
666
|
+
}
|
|
667
|
+
});
|
|
668
|
+
|
|
669
|
+
// Fill empty arrays for invalid texts
|
|
670
|
+
for (let i = 0; i < texts.length; i++) {
|
|
671
|
+
if (!results[i]) {
|
|
672
|
+
results[i] = [];
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
return results;
|
|
677
|
+
} catch (chunkError) {
|
|
678
|
+
const friendly = formatEmbeddingProviderError(error, {
|
|
679
|
+
baseURL: this._baseURL,
|
|
680
|
+
model: this._model,
|
|
681
|
+
mode: "batch",
|
|
682
|
+
});
|
|
683
|
+
throw new Error(`Failed to embed documents after chunking attempt: ${friendly}`, {
|
|
684
|
+
cause: error instanceof Error ? error : undefined,
|
|
685
|
+
});
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
const friendly = formatEmbeddingProviderError(error, {
|
|
690
|
+
baseURL: this._baseURL,
|
|
691
|
+
model: this._model,
|
|
692
|
+
mode: "batch",
|
|
693
|
+
});
|
|
694
|
+
throw new Error(friendly, {
|
|
695
|
+
cause: error instanceof Error ? error : undefined,
|
|
696
|
+
});
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
get model(): string {
|
|
701
|
+
return this._model;
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
// Test connection and validate configuration
|
|
705
|
+
async test(): Promise<{ success: boolean; error?: string; dimensions?: number }> {
|
|
706
|
+
try {
|
|
707
|
+
const testEmbedding = await this.embedPassage("test");
|
|
708
|
+
return {
|
|
709
|
+
success: true,
|
|
710
|
+
dimensions: testEmbedding.length,
|
|
711
|
+
};
|
|
712
|
+
} catch (error) {
|
|
713
|
+
|
|
714
|
+
return {
|
|
715
|
+
success: false,
|
|
716
|
+
error: error instanceof Error ? error.message : String(error),
|
|
717
|
+
};
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
get cacheStats() {
|
|
722
|
+
return {
|
|
723
|
+
...this._cache.stats,
|
|
724
|
+
keyCount: this.clients.length,
|
|
725
|
+
};
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
// ============================================================================
|
|
730
|
+
// Factory Function
|
|
731
|
+
// ============================================================================
|
|
732
|
+
|
|
733
|
+
export function createEmbedder(config: EmbeddingConfig): Embedder {
|
|
734
|
+
return new Embedder(config);
|
|
735
|
+
}
|