npm - @mrc2204/agent-smart-memo - Versions diffs - 4.0.8 → 4.0.10 - Mend

@mrc2204/agent-smart-memo 4.0.8 → 4.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dist/hooks/auto-capture.d.ts.map +1 -1
package/dist/hooks/auto-capture.js +37 -3
package/dist/hooks/auto-capture.js.map +1 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +35 -0
package/dist/index.js.map +1 -1
package/dist/scripts/reembed-collection.d.ts +2 -0
package/dist/scripts/reembed-collection.d.ts.map +1 -0
package/dist/scripts/reembed-collection.js +165 -0
package/dist/scripts/reembed-collection.js.map +1 -0
package/dist/services/embedding-capability-registry.d.ts +23 -0
package/dist/services/embedding-capability-registry.d.ts.map +1 -0
package/dist/services/embedding-capability-registry.js +56 -0
package/dist/services/embedding-capability-registry.js.map +1 -0
package/dist/services/embedding.d.ts +50 -10
package/dist/services/embedding.d.ts.map +1 -1
package/dist/services/embedding.js +486 -74
package/dist/services/embedding.js.map +1 -1
package/dist/services/qdrant.d.ts +25 -22
package/dist/services/qdrant.d.ts.map +1 -1
package/dist/services/qdrant.js +119 -25
package/dist/services/qdrant.js.map +1 -1
package/dist/tools/memory_store.d.ts.map +1 -1
package/dist/tools/memory_store.js +29 -4
package/dist/tools/memory_store.js.map +1 -1
package/package.json +2 -2

package/dist/services/embedding.js CHANGED Viewed

@@ -1,33 +1,82 @@
+import { EmbeddingCapabilityRegistry } from "./embedding-capability-registry.js";
+class EmbeddingHttpError extends Error {
+    status;
+    bodyPreview;
+    constructor(status, bodyPreview, message) {
+        super(message || `Embedding API error: ${status}`);
+        this.name = "EmbeddingHttpError";
+        this.status = status;
+        this.bodyPreview = bodyPreview;
+    }
+}
+const MODEL_DEFAULTS = {
+    "text-embedding-3-small": { seedMaxTokens: 8192, safeRatio: 0.82, reserveTokens: 64, vectorDim: 1536 },
+    "text-embedding-3-large": { seedMaxTokens: 8192, safeRatio: 0.82, reserveTokens: 64, vectorDim: 3072 },
+    "qwen3-embedding:0.6b": { seedMaxTokens: 8192, safeRatio: 0.76, reserveTokens: 80, vectorDim: 1024 },
+    "qwen3-embedding:4b": { seedMaxTokens: 8192, safeRatio: 0.72, reserveTokens: 128, vectorDim: 2560 },
+};
 /**
- * Embedding service client - Ollama compatible
+ * Embedding service client with runtime capability calibration + persistence
  */
 export class EmbeddingClient {
     config;
     logger;
-    dimensions;
+    registry;
+    capability;
+    activeEndpoint = "";
+    provider = "auto";
+    modelKey = "";
+    ready;
     constructor(config, logger) {
+        const model = config.model || "qwen3-embedding:0.6b";
+        const defaults = MODEL_DEFAULTS[model] || { seedMaxTokens: 4096, safeRatio: 0.72, reserveTokens: 96, vectorDim: config.dimensions || 1024 };
         this.config = {
             embeddingApiUrl: config.embeddingApiUrl || "http://localhost:11434",
             timeout: config.timeout || 30000,
-            model: config.model || "qwen3-embedding:0.6b",
+            model,
+            dimensions: config.dimensions || defaults.vectorDim,
+            stateDir: config.stateDir || process.env.OPENCLAW_STATE_DIR || `${process.env.HOME}/.openclaw`,
         };
         this.logger = logger || console;
-        this.dimensions = config.dimensions || 1024;
+        this.registry = new EmbeddingCapabilityRegistry(this.config.stateDir, this.logger);
+        this.ready = this.initializeCapabilities();
     }
     resolveEmbeddingEndpoints(rawBaseUrl) {
         const base = (rawBaseUrl || "").trim();
         const normalizedBase = (base || "http://localhost:11434").replace(/\/+$/, "");
-        // If already a full embeddings path, use directly.
         if (/(\/v1\/embeddings|\/api\/embeddings)\/?$/i.test(normalizedBase)) {
             return [normalizedBase];
         }
-        // Smart handling for base URL only:
-        // 1) Prefer OpenAI-compatible /v1/embeddings (for proxypal/openai-like services)
-        // 2) Fallback to Ollama /api/embeddings (for backward compatibility)
         return [`${normalizedBase}/v1/embeddings`, `${normalizedBase}/api/embeddings`];
     }
-    isOpenAIEmbeddingEndpoint(url) {
-        return /\/v1\/embeddings\/?$/i.test(url);
+    detectProvider(endpoint) {
+        if (/\/v1\/embeddings\/?$/i.test(endpoint))
+            return "openai";
+        if (/\/api\/embeddings\/?$/i.test(endpoint))
+            return "ollama";
+        return "auto";
+    }
+    getDefaults() {
+        return MODEL_DEFAULTS[this.config.model] || {
+            seedMaxTokens: 4096,
+            safeRatio: 0.72,
+            reserveTokens: 96,
+            vectorDim: this.config.dimensions,
+        };
+    }
+    buildModelKey(provider, endpoint) {
+        return `${provider}::${endpoint}::${this.config.model}`;
+    }
+    tokenBudget() {
+        const discovered = Math.max(256, this.capability.discoveredMaxTokens || this.capability.seedMaxTokens);
+        const rawBudget = Math.floor(discovered * this.capability.safeRatio) - this.capability.reserveTokens;
+        return Math.max(128, rawBudget);
+    }
+    // conservative estimator: whitespace tokens + char heuristic safeguard
+    estimateTokens(text) {
+        const whitespaceTokens = text.trim() ? text.trim().split(/\s+/).length : 0;
+        const charTokens = Math.ceil(text.length / 4);
+        return Math.max(1, Math.max(whitespaceTokens, charTokens));
     }
     normalizeInput(input) {
         if (Array.isArray(input)) {
@@ -41,89 +90,446 @@ export class EmbeddingClient {
         }
         return [];
     }
+    splitIntoSentences(text) {
+        return text
+            .split(/(?<=[\n\.!?;])\s+/)
+            .map((s) => s.trim())
+            .filter(Boolean);
+    }
+    chunkTextByTokenBudget(text, tokenBudget) {
+        if (this.estimateTokens(text) <= tokenBudget)
+            return [text];
+        const sentences = this.splitIntoSentences(text);
+        if (sentences.length === 0)
+            return [text.slice(0, Math.max(64, tokenBudget * 4))];
+        const chunks = [];
+        let current = "";
+        const pushCurrent = () => {
+            const trimmed = current.trim();
+            if (trimmed.length > 0)
+                chunks.push(trimmed);
+            current = "";
+        };
+        for (const sentence of sentences) {
+            const next = current ? `${current} ${sentence}` : sentence;
+            if (this.estimateTokens(next) <= tokenBudget) {
+                current = next;
+                continue;
+            }
+            if (current)
+                pushCurrent();
+            if (this.estimateTokens(sentence) <= tokenBudget) {
+                current = sentence;
+                continue;
+            }
+            // ultra-long sentence fallback: split by words with hard guard
+            const words = sentence.split(/\s+/).filter(Boolean);
+            let wordChunk = "";
+            for (const word of words) {
+                const candidate = wordChunk ? `${wordChunk} ${word}` : word;
+                if (this.estimateTokens(candidate) <= tokenBudget) {
+                    wordChunk = candidate;
+                }
+                else {
+                    if (wordChunk)
+                        chunks.push(wordChunk);
+                    wordChunk = word;
+                }
+            }
+            if (wordChunk)
+                chunks.push(wordChunk);
+        }
+        if (current)
+            pushCurrent();
+        return chunks.filter((c) => this.estimateTokens(c) <= tokenBudget + 2);
+    }
+    l2Normalize(vector) {
+        const norm = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
+        if (!Number.isFinite(norm) || norm === 0)
+            return vector;
+        return vector.map((v) => v / norm);
+    }
+    weightedAverage(vectors, weights) {
+        if (vectors.length === 0)
+            return [];
+        const dim = vectors[0].length;
+        const out = new Array(dim).fill(0);
+        const weightSum = weights.reduce((a, b) => a + b, 0) || 1;
+        for (let i = 0; i < vectors.length; i++) {
+            const vec = vectors[i];
+            const w = weights[i] || 1;
+            for (let d = 0; d < dim; d++) {
+                out[d] += vec[d] * w;
+            }
+        }
+        for (let d = 0; d < dim; d++) {
+            out[d] /= weightSum;
+        }
+        return this.l2Normalize(out);
+    }
+    isContextLengthError(error) {
+        if (!(error instanceof EmbeddingHttpError))
+            return false;
+        if (![400, 413, 422, 500].includes(error.status))
+            return false;
+        return /context length|maximum context|too many tokens|exceed|token limit|8192|input length/i.test(error.bodyPreview || "");
+    }
+    extractTokenLimitFromError(errorText) {
+        const normalized = errorText || "";
+        const patterns = [
+            /(?:context length|maximum context|token(?:s)? limit)[^\d]*(\d{3,6})/i,
+            /exceeds[^\d]*(\d{3,6})/i,
+            /max(?:imum)?[^\d]*(\d{3,6})\s*tokens?/i,
+        ];
+        for (const p of patterns) {
+            const m = normalized.match(p);
+            if (m?.[1]) {
+                const parsed = Number(m[1]);
+                if (Number.isFinite(parsed) && parsed >= 128)
+                    return parsed;
+            }
+        }
+        return null;
+    }
+    async updateCapabilityFromContextError(error) {
+        const parsed = this.extractTokenLimitFromError(error.bodyPreview || "");
+        const current = this.capability.discoveredMaxTokens || this.capability.seedMaxTokens;
+        const fallback = Math.floor(current * 0.85);
+        const discovered = Math.max(128, parsed ? Math.min(current, parsed) : fallback);
+        if (discovered < current) {
+            this.capability = {
+                ...this.capability,
+                discoveredMaxTokens: discovered,
+                updatedAt: new Date().toISOString(),
+                source: "error-feedback",
+            };
+            await this.registry.set(this.modelKey, this.capability);
+            this.logger.warn(`[Embedding] capability refined from error-feedback: ${current} -> ${discovered} (modelKey=${this.modelKey})`);
+        }
+    }
+    async initializeCapabilities() {
+        const endpoints = this.resolveEmbeddingEndpoints(this.config.embeddingApiUrl);
+        const endpoint = endpoints[0];
+        const provider = this.detectProvider(endpoint);
+        this.activeEndpoint = endpoint;
+        this.provider = provider;
+        this.modelKey = this.buildModelKey(provider, endpoint);
+        const defaults = this.getDefaults();
+        const existing = await this.registry.get(this.modelKey);
+        this.capability = existing || {
+            seedMaxTokens: defaults.seedMaxTokens,
+            discoveredMaxTokens: defaults.seedMaxTokens,
+            safeRatio: defaults.safeRatio,
+            reserveTokens: defaults.reserveTokens,
+            vectorDim: defaults.vectorDim,
+            updatedAt: new Date().toISOString(),
+            source: "docs",
+        };
+        if (!existing) {
+            await this.registry.set(this.modelKey, this.capability);
+        }
+        // light startup calibration (max 1/day)
+        const ageMs = Date.now() - new Date(this.capability.updatedAt).getTime();
+        if (!Number.isFinite(ageMs) || ageMs > 24 * 60 * 60 * 1000) {
+            await this.calibrateRuntimeCapability();
+        }
+    }
+    async readEndpointMetadata() {
+        const endpoint = this.activeEndpoint;
+        const provider = this.detectProvider(endpoint);
+        try {
+            if (provider === "ollama") {
+                const base = endpoint.replace(/\/api\/embeddings\/?$/i, "");
+                const res = await fetch(`${base}/api/tags`, { signal: AbortSignal.timeout(4000) });
+                if (!res.ok)
+                    return {};
+                const json = await res.json();
+                const models = Array.isArray(json?.models) ? json.models : [];
+                const modelInfo = models.find((m) => m?.model === this.config.model || m?.name === this.config.model);
+                const dimFromModel = Number(modelInfo?.details?.embedding_length || modelInfo?.details?.dimensions || 0);
+                return {
+                    vectorDim: dimFromModel > 0 ? dimFromModel : undefined,
+                };
+            }
+        }
+        catch {
+            // best effort metadata
+        }
+        return {};
+    }
+    async probeWithinBudget(tokenTarget) {
+        const sample = Array(tokenTarget).fill("t").join(" ");
+        try {
+            await this.embedChunksFromApi([sample]);
+            return true;
+        }
+        catch (error) {
+            if (this.isContextLengthError(error))
+                return false;
+            throw error;
+        }
+    }
+    async probeContextWindow(seed) {
+        const clamp = (n) => Math.max(128, Math.floor(n));
+        let low = 256;
+        let high = clamp(seed);
+        // stepped exploration (safe / low spam)
+        const steps = [0.5, 0.75, 1, 1.1].map((x) => clamp(seed * x));
+        for (const s of steps) {
+            let ok = false;
+            try {
+                ok = await this.probeWithinBudget(s);
+            }
+            catch {
+                continue;
+            }
+            if (ok) {
+                low = Math.max(low, s);
+                high = Math.max(high, s);
+            }
+            else {
+                high = Math.min(high, s);
+                break;
+            }
+        }
+        // binary search refinement, max 5 probes
+        for (let i = 0; i < 5 && high - low > 96; i++) {
+            const mid = clamp((low + high) / 2);
+            const ok = await this.probeWithinBudget(mid);
+            if (ok)
+                low = mid;
+            else
+                high = mid;
+        }
+        return clamp(low);
+    }
+    async calibrateRuntimeCapability(force = false) {
+        await this.ready;
+        if (!force) {
+            const ageMs = Date.now() - new Date(this.capability.updatedAt).getTime();
+            if (Number.isFinite(ageMs) && ageMs < 30 * 60 * 1000)
+                return;
+        }
+        const metadata = await this.readEndpointMetadata();
+        const seed = Math.max(256, metadata.discoveredMaxTokens || metadata.seedMaxTokens || this.capability.seedMaxTokens);
+        let discovered = this.capability.discoveredMaxTokens;
+        try {
+            discovered = await this.probeContextWindow(seed);
+        }
+        catch (error) {
+            this.logger.warn(`[Embedding] calibration probe skipped: ${error.message}`);
+        }
+        this.capability = {
+            ...this.capability,
+            discoveredMaxTokens: Math.max(128, discovered || seed),
+            vectorDim: metadata.vectorDim || this.capability.vectorDim,
+            updatedAt: new Date().toISOString(),
+            source: "probe",
+        };
+        await this.registry.set(this.modelKey, this.capability);
+        this.logger.info(`[Embedding] calibrated capability modelKey=${this.modelKey} maxTokens=${this.capability.discoveredMaxTokens} vectorDim=${this.capability.vectorDim}`);
+    }
+    async getVectorDimensionHint() {
+        await this.ready;
+        return this.capability.vectorDim || this.config.dimensions;
+    }
+    async getModelKey() {
+        await this.ready;
+        return this.modelKey;
+    }
     /**
-     * Get embedding vector for text
-     * Fallback to hash-based embedding if API unavailable
+     * Backward-compatible method
      */
     async embed(text) {
+        const result = await this.embedDetailed(text);
+        return result.vector;
+    }
+    /**
+     * New method with calibration-aware adaptive chunking + metadata
+     */
+    async embedDetailed(text) {
+        await this.ready;
         const normalizedInput = this.normalizeInput(text);
-        // Validate/filter empty input BEFORE calling embedding API
         if (normalizedInput.length === 0) {
             this.logger.warn("[Embedding] Skip API call: empty input after trim/filter");
-            return this.embedFromHash("");
+            return {
+                vector: this.embedFromHash(""),
+                metadata: {
+                    embedding_chunked: false,
+                    embedding_chunks_count: 0,
+                    embedding_chunking_strategy: "array_batch_weighted_avg",
+                    embedding_model: this.config.model,
+                    embedding_model_key: this.modelKey,
+                    embedding_provider: this.provider,
+                    embedding_max_tokens: this.capability.discoveredMaxTokens,
+                    embedding_safe_chunk_tokens: this.tokenBudget(),
+                    embedding_source: this.capability.source,
+                    embedding_fallback_hash: true,
+                },
+            };
         }
-        // Try API first
-        try {
-            return await this.embedFromApi(normalizedInput);
+        const mergedText = normalizedInput.join("\n\n");
+        const baseBudget = this.tokenBudget();
+        // retry policy with progressive budget reduction
+        const safetyMultipliers = [1, 0.8, 0.65, 0.5, 0.4, 0.3];
+        for (const mul of safetyMultipliers) {
+            const safeChunkTokens = Math.max(128, Math.floor(baseBudget * mul));
+            const chunks = this.chunkTextByTokenBudget(mergedText, safeChunkTokens);
+            const chunkWeights = chunks.map((c) => this.estimateTokens(c));
+            // hard guard: never send chunk above discovered budget
+            if (chunks.some((chunk) => this.estimateTokens(chunk) > safeChunkTokens + 2)) {
+                continue;
+            }
+            try {
+                const vectors = await this.embedChunksFromApi(chunks);
+                const vector = vectors.length === 1
+                    ? this.l2Normalize(vectors[0])
+                    : this.weightedAverage(vectors, chunkWeights);
+                return {
+                    vector,
+                    metadata: {
+                        embedding_chunked: chunks.length > 1,
+                        embedding_chunks_count: chunks.length,
+                        embedding_chunking_strategy: "array_batch_weighted_avg",
+                        embedding_model: this.config.model,
+                        embedding_model_key: this.modelKey,
+                        embedding_provider: this.provider,
+                        embedding_max_tokens: this.capability.discoveredMaxTokens,
+                        embedding_safe_chunk_tokens: safeChunkTokens,
+                        embedding_source: this.capability.source,
+                        embedding_fallback_hash: false,
+                    },
+                };
+            }
+            catch (error) {
+                if (this.isContextLengthError(error)) {
+                    await this.updateCapabilityFromContextError(error);
+                    this.logger.warn(`[Embedding] context-length detected. retry with smaller chunk budget=${safeChunkTokens} modelKey=${this.modelKey}`);
+                    continue;
+                }
+                // non context-length error -> fallback hash immediately
+                this.logger.error(`[Embedding][HIGH] API failed; fallback to hash embedding. reason=${error.message} modelKey=${this.modelKey}`);
+                return {
+                    vector: this.embedFromHash(mergedText),
+                    metadata: {
+                        embedding_chunked: chunks.length > 1,
+                        embedding_chunks_count: chunks.length,
+                        embedding_chunking_strategy: "array_batch_weighted_avg",
+                        embedding_model: this.config.model,
+                        embedding_model_key: this.modelKey,
+                        embedding_provider: this.provider,
+                        embedding_max_tokens: this.capability.discoveredMaxTokens,
+                        embedding_safe_chunk_tokens: safeChunkTokens,
+                        embedding_source: this.capability.source,
+                        embedding_fallback_hash: true,
+                    },
+                };
+            }
         }
-        catch (error) {
-            // Fallback to deterministic hash-based embedding
-            return this.embedFromHash(normalizedInput[0]);
+        // exhausted retries
+        this.logger.error(`[Embedding][CRITICAL] exhausted context retries; fallback hash modelKey=${this.modelKey}`);
+        return {
+            vector: this.embedFromHash(mergedText),
+            metadata: {
+                embedding_chunked: true,
+                embedding_chunks_count: Math.max(1, this.chunkTextByTokenBudget(mergedText, Math.max(128, Math.floor(baseBudget * 0.3))).length),
+                embedding_chunking_strategy: "array_batch_weighted_avg",
+                embedding_model: this.config.model,
+                embedding_model_key: this.modelKey,
+                embedding_provider: this.provider,
+                embedding_max_tokens: this.capability.discoveredMaxTokens,
+                embedding_safe_chunk_tokens: Math.max(128, Math.floor(baseBudget * 0.3)),
+                embedding_source: this.capability.source,
+                embedding_fallback_hash: true,
+            },
+        };
+    }
+    async embedChunksFromApi(chunks) {
+        if (chunks.length === 0) {
+            throw new Error("No chunks to embed");
         }
+        const endpoints = this.resolveEmbeddingEndpoints(this.config.embeddingApiUrl);
+        let lastError = null;
+        for (const url of endpoints) {
+            const useOpenAiFormat = /\/v1\/embeddings\/?$/i.test(url);
+            try {
+                this.activeEndpoint = url;
+                this.provider = this.detectProvider(url);
+                this.modelKey = this.buildModelKey(this.provider, this.activeEndpoint);
+                if (!useOpenAiFormat && chunks.length > 1) {
+                    // Ollama /api/embeddings: sequential requests
+                    const vectors = [];
+                    for (const c of chunks) {
+                        vectors.push(await this.embedSingle(url, false, c));
+                    }
+                    return vectors;
+                }
+                const vectors = await this.embedBatch(url, useOpenAiFormat, chunks);
+                if (vectors.length !== chunks.length) {
+                    throw new Error(`Embedding vector count mismatch: expected=${chunks.length}, got=${vectors.length}`);
+                }
+                return vectors;
+            }
+            catch (error) {
+                lastError = error;
+                if (this.isContextLengthError(error)) {
+                    throw error;
+                }
+                if (error instanceof EmbeddingHttpError &&
+                    [404, 429].includes(error.status) &&
+                    endpoints.length > 1 &&
+                    url !== endpoints[endpoints.length - 1]) {
+                    continue;
+                }
+                if (url !== endpoints[endpoints.length - 1]) {
+                    continue;
+                }
+            }
+        }
+        throw lastError || new Error("Embedding API error: no endpoint succeeded");
     }
-    /**
-     * Get embedding from API
-     */
-    async embedFromApi(input) {
-        this.logger.debug?.(`[Embedding] Calling API with inputCount=${input.length} firstItemLength=${input[0]?.length || 0} preview=${JSON.stringify((input[0] || "").slice(0, 80))}`);
+    async embedBatch(url, useOpenAiFormat, chunks) {
         const controller = new AbortController();
         const timeoutId = setTimeout(() => controller.abort(), this.config.timeout);
         try {
-            const endpoints = this.resolveEmbeddingEndpoints(this.config.embeddingApiUrl);
-            let lastError = null;
-            for (const url of endpoints) {
-                const useOpenAiFormat = this.isOpenAIEmbeddingEndpoint(url);
+            const max429Retries = 3;
+            for (let attempt = 0; attempt <= max429Retries; attempt++) {
                 const response = await fetch(url, {
                     method: "POST",
-                    headers: {
-                        "Content-Type": "application/json",
-                    },
+                    headers: { "Content-Type": "application/json" },
                     body: JSON.stringify(useOpenAiFormat
-                        ? {
-                            model: this.config.model,
-                            input,
-                        }
-                        : {
-                            model: this.config.model,
-                            prompt: input[0],
-                        }),
+                        ? { model: this.config.model, input: chunks }
+                        : { model: this.config.model, prompt: chunks[0] }),
                     signal: controller.signal,
                 });
+                if (response.status === 429 && attempt < max429Retries) {
+                    const backoffMs = Math.min(4000, 300 * Math.pow(2, attempt));
+                    this.logger.warn(`[Embedding] 429 rate limit. retry in ${backoffMs}ms (attempt ${attempt + 1}/${max429Retries})`);
+                    await new Promise((r) => setTimeout(r, backoffMs));
+                    continue;
+                }
                 if (!response.ok) {
                     const errorText = await response.text().catch(() => "Unknown error");
-                    this.logger.error(`[Embedding] HTTP ${response.status} @ ${url}: ${errorText.substring(0, 200)}`);
-                    if (response.status === 400) {
-                        this.logger.error(`[Embedding] 400 schema debug @ ${url}: ${JSON.stringify({
-                            model: this.config.model,
-                            inputType: Array.isArray(input) ? "array" : typeof input,
-                            inputLength: Array.isArray(input) ? input.length : 0,
-                            firstItemLength: input[0]?.length || 0,
-                        })}`);
-                    }
-                    // If this endpoint not found and we still have fallback endpoint, continue.
-                    if (response.status === 404 && endpoints.length > 1 && url !== endpoints[endpoints.length - 1]) {
-                        continue;
-                    }
-                    lastError = new Error(`Embedding API error: ${response.status}`);
-                    break;
+                    const preview = errorText.substring(0, 500);
+                    throw new EmbeddingHttpError(response.status, preview);
                 }
                 const data = await response.json();
-                // Ollama API format: { embedding: [...] }
-                if (data.embedding && Array.isArray(data.embedding)) {
-                    clearTimeout(timeoutId);
-                    return data.embedding;
+                if (!useOpenAiFormat) {
+                    if (data.embedding && Array.isArray(data.embedding)) {
+                        return [data.embedding];
+                    }
+                    throw new Error("Invalid Ollama embedding response format");
                 }
-                // OpenAI-compatible format: { data: [{ embedding: [...] }] }
-                if (Array.isArray(data.data) && data.data[0]?.embedding && Array.isArray(data.data[0].embedding)) {
-                    clearTimeout(timeoutId);
-                    return data.data[0].embedding;
+                if (Array.isArray(data.data)) {
+                    const vectors = data.data
+                        .map((d) => d?.embedding)
+                        .filter((v) => Array.isArray(v));
+                    if (vectors.length > 0)
+                        return vectors;
                 }
-                this.logger.error(`[Embedding] Unexpected response format: ${JSON.stringify(data).substring(0, 200)}`);
-                lastError = new Error("Invalid embedding response format");
-                break;
+                throw new Error("Invalid OpenAI embedding response format");
             }
-            clearTimeout(timeoutId);
-            throw lastError || new Error("Embedding API error: no endpoint succeeded");
+            throw new Error("Embedding API 429 retries exhausted");
         }
         catch (error) {
             if (error.name === "AbortError") {
@@ -131,24 +537,30 @@ export class EmbeddingClient {
             }
             throw error;
         }
+        finally {
+            clearTimeout(timeoutId);
+        }
+    }
+    async embedSingle(url, useOpenAiFormat, chunk) {
+        const vectors = await this.embedBatch(url, useOpenAiFormat, [chunk]);
+        if (!vectors[0])
+            throw new Error("No embedding vector returned");
+        return vectors[0];
     }
     /**
      * Fallback: Generate embedding from text hash (deterministic)
      */
     embedFromHash(text) {
-        const hash = text.split('').reduce((a, b) => {
+        const hash = text.split("").reduce((a, b) => {
             a = ((a << 5) - a) + b.charCodeAt(0);
             return a & a;
         }, 0);
         const embedding = [];
-        for (let i = 0; i < this.dimensions; i++) {
+        for (let i = 0; i < this.config.dimensions; i++) {
             embedding.push(Math.sin(hash + i) * 0.1);
         }
-        return embedding;
+        return this.l2Normalize(embedding);
     }
-    /**
-     * Calculate cosine similarity
-     */
     cosineSimilarity(a, b) {
         if (a.length !== b.length) {
             throw new Error("Vector dimensions mismatch");