npm - kongbrain - Versions diffs - 0.4.3 → 0.5.0 - Mend

kongbrain 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/.github/workflows/ci.yml +25 -18
package/.github/workflows/pr-check.yml +4 -4
package/CHANGELOG.md +47 -0
package/README.github.md +53 -3
package/README.md +29 -3
package/README.npm.md +29 -3
package/SKILL.md +1 -1
package/bin/kongbrain-reembed.ts +143 -0
package/openclaw.plugin.json +37 -7
package/package.json +4 -1
package/src/causal.ts +4 -1
package/src/cognitive-bootstrap.ts +1 -0
package/src/concept-extract.ts +4 -2
package/src/config.ts +56 -10
package/src/embeddings-openai.ts +232 -0
package/src/embeddings.ts +48 -6
package/src/identity.ts +2 -0
package/src/index.ts +54 -5
package/src/memory-daemon.ts +1 -1
package/src/migrate-reembed.ts +305 -0
package/src/reflection.ts +10 -4
package/src/schema.surql +29 -0
package/src/skills.ts +14 -5
package/src/supersedes.ts +2 -1
package/src/surreal.ts +77 -19
package/src/workspace-migrate.ts +3 -0

package/src/config.ts CHANGED Viewed

@@ -10,9 +10,26 @@ export interface SurrealConfig {
   db: string;
 }
+export type EmbeddingProvider = "local" | "openai-compat";
+export interface OpenAICompatEmbeddingConfig {
+  /** Model name passed in the embeddings request body (e.g. "text-embedding-3-small"). */
+  model: string;
+  /** Endpoint base URL. Default: "https://api.openai.com/v1". */
+  baseURL: string;
+  /** Name of the env var holding the API key. Default: "OPENAI_API_KEY". */
+  apiKeyEnv: string;
+}
 export interface EmbeddingConfig {
-  modelPath: string;
+  /** Which provider to use. Default "local" (BGE-M3 via node-llama-cpp). */
+  provider: EmbeddingProvider;
+  /** Vector dimensionality the active provider should produce. */
   dimensions: number;
+  /** Path to the local GGUF model — only consulted when provider === "local". */
+  modelPath: string;
+  /** OpenAI-compatible provider settings — only consulted when provider === "openai-compat". */
+  openaiCompat: OpenAICompatEmbeddingConfig;
 }
 export interface ThresholdConfig {
@@ -34,6 +51,43 @@ export interface KongBrainConfig {
   thresholds: ThresholdConfig;
 }
+function parseEmbeddingConfig(raw: Record<string, unknown>): EmbeddingConfig {
+  const openaiCompatRaw = (raw.openaiCompat ?? {}) as Record<string, unknown>;
+  // Provider precedence: env var > plugin config > default "local"
+  const rawProvider =
+    process.env.KONGBRAIN_EMBED_PROVIDER ??
+    (typeof raw.provider === "string" ? raw.provider : null);
+  const provider: EmbeddingProvider =
+    rawProvider === "openai-compat" ? "openai-compat" : "local";
+  return {
+    provider,
+    dimensions: typeof raw.dimensions === "number" ? raw.dimensions : 1024,
+    modelPath:
+      process.env.EMBED_MODEL_PATH ??
+      (typeof raw.modelPath === "string"
+        ? raw.modelPath
+        : join(homedir(), ".node-llama-cpp", "models", "bge-m3-q4_k_m.gguf")),
+    openaiCompat: {
+      model:
+        typeof openaiCompatRaw.model === "string"
+          ? openaiCompatRaw.model
+          : "text-embedding-3-small",
+      // baseURL: env wins (matches the official openai SDK convention)
+      baseURL:
+        process.env.OPENAI_BASE_URL ??
+        (typeof openaiCompatRaw.baseURL === "string"
+          ? openaiCompatRaw.baseURL
+          : "https://api.openai.com/v1"),
+      apiKeyEnv:
+        typeof openaiCompatRaw.apiKeyEnv === "string"
+          ? openaiCompatRaw.apiKeyEnv
+          : "OPENAI_API_KEY",
+    },
+  };
+}
 /**
  * Parse plugin config from openclaw.plugin.json configSchema values,
  * with env var overrides and sensible defaults.
@@ -66,15 +120,7 @@ export function parsePluginConfig(raw?: Record<string, unknown>): KongBrainConfi
       ns: (typeof surreal.ns === "string" ? surreal.ns : null) ?? process.env.SURREAL_NS ?? "kong",
       db: (typeof surreal.db === "string" ? surreal.db : null) ?? process.env.SURREAL_DB ?? "memory",
     },
-    embedding: {
-      modelPath:
-        process.env.EMBED_MODEL_PATH ??
-        (typeof embedding.modelPath === "string"
-          ? embedding.modelPath
-          : join(homedir(), ".node-llama-cpp", "models", "bge-m3-q4_k_m.gguf")),
-      dimensions:
-        typeof embedding.dimensions === "number" ? embedding.dimensions : 1024,
-    },
+    embedding: parseEmbeddingConfig(embedding),
     thresholds: {
       daemonTokenThreshold:
         typeof thresholds.daemonTokenThreshold === "number" ? thresholds.daemonTokenThreshold : 4000,

package/src/embeddings-openai.ts ADDED Viewed

@@ -0,0 +1,232 @@
+import type { EmbeddingService } from "./embeddings.js";
+import type { EmbeddingConfig } from "./config.js";
+import { swallow } from "./errors.js";
+import { log } from "./log.js";
+/**
+ * OpenAI-compatible embedding service. Speaks the /v1/embeddings shape that
+ * OpenAI, Azure OpenAI, Together, Anyscale, vLLM, LM Studio, Ollama (compat
+ * endpoint), DeepInfra, and others all conform to. Switching between any of
+ * them is a baseURL change.
+ *
+ * The vectors this service produces are NOT in the same space as a
+ * different provider's vectors, even at the same dimensionality. The
+ * providerId field is what the rest of the system uses to keep them apart.
+ */
+export class OpenAICompatEmbeddingService implements EmbeddingService {
+  readonly providerId: string;
+  readonly dimensions: number;
+  private readonly model: string;
+  private readonly baseURL: string;
+  private readonly apiKey: string | null;
+  private ready = false;
+  /** Per-batch limit. OpenAI accepts up to 2048 inputs; most compat servers are stricter. */
+  private readonly maxBatchSize = 96;
+  constructor(config: EmbeddingConfig) {
+    this.model = config.openaiCompat.model;
+    this.baseURL = config.openaiCompat.baseURL.replace(/\/+$/, "");
+    this.dimensions = config.dimensions;
+    // Resolve the API key from the named env var. Empty string is treated as
+    // missing — handled at initialize() time so the error is clear and early.
+    const keyName = config.openaiCompat.apiKeyEnv;
+    const keyVal = process.env[keyName];
+    this.apiKey = keyVal && keyVal.length > 0 ? keyVal : null;
+    // providerId encodes (provider, model, dim) so vectors written today can
+    // be distinguished from the same model at a different output dim later.
+    this.providerId = `openai-compat-${this.model}-${this.dimensions}d`;
+  }
+  async initialize(): Promise<boolean> {
+    if (this.ready) return false;
+    if (!this.apiKey) {
+      throw new Error(
+        `OpenAI-compatible embeddings: API key not set. Configure embedding.openaiCompat.apiKeyEnv (default OPENAI_API_KEY) and put the key in that env var.`,
+      );
+    }
+    // Sanity: require dimensions to be set. The OpenAI text-embedding-3-*
+    // models support a `dimensions` parameter; non-OpenAI compat servers
+    // generally ignore it and return their native dim. We verify on the
+    // first embed() call rather than here so we don't burn a request just
+    // to validate config.
+    if (!Number.isFinite(this.dimensions) || this.dimensions <= 0) {
+      throw new Error(
+        `OpenAI-compatible embeddings: invalid dimensions ${this.dimensions}`,
+      );
+    }
+    this.ready = true;
+    return true;
+  }
+  async embed(text: string): Promise<number[]> {
+    const result = await this.request([text]);
+    return result[0];
+  }
+  async embedBatch(texts: string[]): Promise<number[][]> {
+    if (texts.length === 0) return [];
+    if (texts.length <= this.maxBatchSize) return this.request(texts);
+    // Split into chunks so we never exceed the per-request limit.
+    const out: number[][] = [];
+    for (let i = 0; i < texts.length; i += this.maxBatchSize) {
+      const chunk = texts.slice(i, i + this.maxBatchSize);
+      const vecs = await this.request(chunk);
+      out.push(...vecs);
+    }
+    return out;
+  }
+  isAvailable(): boolean {
+    return this.ready;
+  }
+  async dispose(): Promise<void> {
+    this.ready = false;
+  }
+  /**
+   * POST one batch to /embeddings with retry-and-backoff on 429.
+   * 401/403 fail hard (config problem, retry will not help).
+   */
+  private async request(input: string[]): Promise<number[][]> {
+    if (!this.ready) throw new Error("OpenAI-compat embeddings not initialized");
+    const url = `${this.baseURL}/embeddings`;
+    const body = {
+      model: this.model,
+      input,
+      // text-embedding-3-* honors `dimensions`. Compat servers that ignore
+      // it will return their native dim — we verify after the fact.
+      dimensions: this.dimensions,
+      encoding_format: "float",
+    };
+    const maxAttempts = 4;
+    let attempt = 0;
+    let lastErr: unknown = null;
+    while (attempt < maxAttempts) {
+      attempt++;
+      let res: Response;
+      try {
+        res = await fetch(url, {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            Authorization: `Bearer ${this.apiKey}`,
+          },
+          body: JSON.stringify(body),
+        });
+      } catch (e) {
+        // Network-level failure — retry with backoff.
+        lastErr = e;
+        await this.sleep(backoffMs(attempt));
+        continue;
+      }
+      if (res.ok) {
+        const json = await res.json() as {
+          data?: Array<{ embedding: number[]; index: number }>;
+        };
+        const data = json.data ?? [];
+        // Sort by index — most servers return in order but the spec only
+        // guarantees the index field, so we honor it.
+        data.sort((a, b) => a.index - b.index);
+        const vecs = data.map(d => d.embedding);
+        if (vecs.length !== input.length) {
+          throw new Error(
+            `OpenAI-compat embeddings: returned ${vecs.length} vectors for ${input.length} inputs`,
+          );
+        }
+        // Verify dim once per response so a misconfigured server fails
+        // loudly instead of writing wrong-sized vectors into the DB.
+        if (vecs[0].length !== this.dimensions) {
+          throw new Error(
+            `OpenAI-compat embeddings: server returned ${vecs[0].length}-dim vectors but config requested ${this.dimensions}. ` +
+              `For non-OpenAI providers that ignore the 'dimensions' parameter, set embedding.dimensions in plugin config to match the server's native output.`,
+          );
+        }
+        return vecs;
+      }
+      // Hard fail on auth / not found — retrying will not help.
+      if (res.status === 401 || res.status === 403) {
+        const text = await readBodyText(res);
+        throw new Error(
+          `OpenAI-compat embeddings: auth failed (${res.status}). Check the API key in env var. Response: ${text.slice(0, 200)}`,
+        );
+      }
+      if (res.status === 404) {
+        const text = await readBodyText(res);
+        throw new Error(
+          `OpenAI-compat embeddings: endpoint not found at ${url}. Check baseURL. Response: ${text.slice(0, 200)}`,
+        );
+      }
+      // 429 (rate limit) and 5xx — retry with backoff. Honor Retry-After
+      // when present. Note: OpenAI returns HTTP 429 for both transient
+      // rate limits and "out of credits" (insufficient_quota) — the
+      // latter is not retryable, so peek at the body and fail fast.
+      if (res.status === 429 || res.status >= 500) {
+        const text = await readBodyText(res);
+        if (res.status === 429 && /insufficient_quota/i.test(text)) {
+          throw new Error(
+            `OpenAI-compat embeddings: insufficient quota on this API key. ` +
+              `Add credits / a payment method at the provider's billing page, or switch keys. ` +
+              `Response: ${text.slice(0, 200)}`,
+          );
+        }
+        const retryAfter = parseRetryAfter(res.headers.get("retry-after"));
+        const wait = retryAfter ?? backoffMs(attempt);
+        log.warn(`[embeddings:openai] ${res.status} from ${url}, retrying in ${wait}ms (attempt ${attempt}/${maxAttempts})`);
+        lastErr = new Error(`HTTP ${res.status}: ${text.slice(0, 200)}`);
+        await this.sleep(wait);
+        continue;
+      }
+      // Other 4xx — body usually has the reason. Don't retry.
+      const text = await readBodyText(res);
+      throw new Error(
+        `OpenAI-compat embeddings: HTTP ${res.status}. Response: ${text.slice(0, 300)}`,
+      );
+    }
+    throw new Error(
+      `OpenAI-compat embeddings: exhausted ${maxAttempts} attempts. Last error: ${String(lastErr)}`,
+    );
+  }
+  private sleep(ms: number): Promise<void> {
+    return new Promise(r => setTimeout(r, ms));
+  }
+}
+/** Exponential backoff with jitter. 1s, 2s, 4s, 8s base, +/- 25%. */
+function backoffMs(attempt: number): number {
+  const base = 1000 * Math.pow(2, attempt - 1);
+  const jitter = 1 + (Math.random() * 0.5 - 0.25);
+  return Math.round(base * jitter);
+}
+/** Parse Retry-After header (seconds or HTTP-date) into ms; null if absent or unparseable. */
+function parseRetryAfter(value: string | null): number | null {
+  if (!value) return null;
+  const asInt = parseInt(value, 10);
+  if (Number.isFinite(asInt)) return asInt * 1000;
+  const asDate = Date.parse(value);
+  if (Number.isFinite(asDate)) {
+    const ms = asDate - Date.now();
+    return ms > 0 ? ms : 0;
+  }
+  return null;
+}
+async function readBodyText(res: Response): Promise<string> {
+  try {
+    return await res.text();
+  } catch (e) {
+    swallow("embeddings:openai:readBody", e);
+    return "";
+  }
+}

package/src/embeddings.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { existsSync } from "node:fs";
 import type { EmbeddingConfig } from "./config.js";
+import { OpenAICompatEmbeddingService } from "./embeddings-openai.js";
 import { swallow } from "./errors.js";
 import { log } from "./log.js";
@@ -8,8 +9,38 @@ import { log } from "./log.js";
 type LlamaEmbeddingContext = import("node-llama-cpp").LlamaEmbeddingContext;
 type LlamaModel = import("node-llama-cpp").LlamaModel;
+/**
+ * Provider-agnostic embedding service.
+ *
+ * Implementations must guarantee that vectors they produce are in the same
+ * vector space across calls within a single instance. Different implementations
+ * (or different models within the same implementation) produce vectors in
+ * different spaces and must not be compared with cosine similarity. The
+ * `providerId` field is the stable tag used to detect cross-space mixing.
+ */
+export interface EmbeddingService {
+  /** Stable identifier for the (provider, model, dimension) tuple. */
+  readonly providerId: string;
+  /** Dimensionality of the vectors this service produces. */
+  readonly dimensions: number;
+  /** Initialize the underlying model. Returns true on first init, false if already ready. */
+  initialize(): Promise<boolean>;
+  /** Return the embedding vector for a single text. */
+  embed(text: string): Promise<number[]>;
+  /** Return embedding vectors for an array of texts. */
+  embedBatch(texts: string[]): Promise<number[][]>;
+  /** True once initialize() has succeeded. */
+  isAvailable(): boolean;
+  /** Release any underlying resources (model handles, sockets, etc.). */
+  dispose(): Promise<void>;
+}
 /** BGE-M3 embedding service (1024-dim via GGUF) with an LRU cache of up to 512 entries. */
-export class EmbeddingService {
+export class LocalEmbeddingService implements EmbeddingService {
+  readonly providerId: string;
+  readonly dimensions: number;
   private model: LlamaModel | null = null;
   private ctx: LlamaEmbeddingContext | null = null;
   private ready = false;
@@ -17,9 +48,11 @@ export class EmbeddingService {
   private cache = new Map<string, number[]>();
   private readonly maxCacheSize = 512;
-  constructor(private readonly config: EmbeddingConfig) {}
+  constructor(private readonly config: EmbeddingConfig) {
+    this.providerId = "local-bge-m3";
+    this.dimensions = config.dimensions;
+  }
-  /** Initialize the embedding model. Returns true if freshly loaded, false if already ready. */
   async initialize(): Promise<boolean> {
     if (this.ready) return false;
     if (!existsSync(this.config.modelPath)) {
@@ -42,19 +75,16 @@ export class EmbeddingService {
     return true;
   }
-  /** Return the embedding vector for text, serving from LRU cache on repeat calls. */
   async embed(text: string): Promise<number[]> {
     if (!this.ready || !this.ctx) throw new Error("Embeddings not initialized");
     const cached = this.cache.get(text);
     if (cached) {
-      // Move to end for LRU freshness
       this.cache.delete(text);
       this.cache.set(text, cached);
       return cached;
     }
     const result = await this.ctx.getEmbeddingFor(text);
     const vec = Array.from(result.vector);
-    // Evict oldest if at capacity
     if (this.cache.size >= this.maxCacheSize) {
       this.cache.delete(this.cache.keys().next().value!);
     }
@@ -82,3 +112,15 @@ export class EmbeddingService {
     }
   }
 }
+/** Construct the configured embedding service. Adding a new provider plugs in here. */
+export function createEmbeddingService(config: EmbeddingConfig): EmbeddingService {
+  if (config.provider === "openai-compat") {
+    // Lazy import keeps the local-only deployment path from paying the cost
+    // of parsing the OpenAI module on startup.
+    const { OpenAICompatEmbeddingService } = require("./embeddings-openai.js") as
+      typeof import("./embeddings-openai.js");
+    return new OpenAICompatEmbeddingService(config);
+  }
+  return new LocalEmbeddingService(config);
+}

package/src/identity.ts CHANGED Viewed

@@ -103,6 +103,7 @@ export async function seedIdentity(
             chunk_index: i,
             text: chunk.text,
             embedding: vec,
+            embedding_provider: embeddings.providerId,
             importance: chunk.importance,
           },
         },
@@ -183,6 +184,7 @@ export async function saveUserIdentity(
             chunk_index: i,
             text,
             embedding: vec,
+            embedding_provider: embeddings.providerId,
             importance: 0.95,
           },
         },

package/src/index.ts CHANGED Viewed

@@ -10,7 +10,7 @@ import { join, dirname } from "node:path";
 import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
 import { parsePluginConfig } from "./config.js";
 import { SurrealStore } from "./surreal.js";
-import { EmbeddingService } from "./embeddings.js";
+import { createEmbeddingService } from "./embeddings.js";
 import { GlobalPluginState, type CompleteFn } from "./state.js";
 import { KongBrainContextEngine } from "./context-engine.js";
 import { createRecallToolDef } from "./tools/recall.js";
@@ -299,10 +299,44 @@ async function detectGraduationEvent(
   }
 }
+/**
+ * Detect rows tagged with a provider other than the one currently active.
+ * Pre-existing data stays in the database; PR-B's search-time filter keeps
+ * it from corrupting recall, but it becomes invisible until re-embedded.
+ * Logging gives the user a clear cue that a migration is needed without
+ * refusing to start (the data is intact and reads remain safe).
+ */
+async function checkEmbeddingProviderMismatch(
+  store: SurrealStore,
+  activeProvider: string,
+  logger: { warn: (msg: string) => void },
+): Promise<void> {
+  if (!store.isAvailable()) return;
+  const tables = ["turn", "concept", "memory", "artifact", "identity_chunk", "skill", "reflection", "monologue"];
+  let mismatched = 0;
+  for (const t of tables) {
+    try {
+      const rows = await store.queryFirst<{ count: number }>(
+        `SELECT count() AS count FROM ${t} WHERE embedding != NONE AND embedding_provider != $provider GROUP ALL`,
+        { provider: activeProvider },
+      );
+      mismatched += Number(rows[0]?.count ?? 0);
+    } catch (e) {
+      swallow.warn(`factory:providerMismatchCount:${t}`, e);
+    }
+  }
+  if (mismatched > 0) {
+    logger.warn(
+      `Embedding provider mismatch: ${mismatched} rows in the database were embedded by a different provider than the active one (${activeProvider}). ` +
+        `These rows are filtered out of similarity search until re-embedded. To migrate, run the re-embed tool (PR-D, coming soon) or revert the embedding.provider config.`,
+    );
+  }
+}
 export default definePluginEntry({
   id: "kongbrain",
   name: "KongBrain",
-  description: "Graph-backed cognitive context engine with SurrealDB persistence and BGE-M3 embeddings.",
+  description: "Graph-backed cognitive context engine with SurrealDB persistence and pluggable embeddings (local BGE-M3 or OpenAI-compatible).",
   kind: "context-engine",
   register(api) {
@@ -316,7 +350,11 @@ export default definePluginEntry({
     let globalState = getGlobalState();
     if (!globalState) {
       const store = new SurrealStore(config.surreal);
-      const embeddings = new EmbeddingService(config.embedding);
+      const embeddings = createEmbeddingService(config.embedding);
+      // Tag every embedding write and filter every embedding search by this
+      // provider id, so vectors from different models (different vector
+      // spaces) never mix in the same HNSW result set.
+      store.setActiveProvider(embeddings.providerId);
       // Build a CompleteFn using pi-ai directly since api.runtime.complete
       // is not available in OpenClaw 2026.3.24 (unreleased feature).
       const apiRef = api;
@@ -419,10 +457,21 @@ export default definePluginEntry({
         throw e;
       }
-      // Initialize BGE-M3 embeddings (no-op if already loaded)
+      // Initialize the embedding provider (no-op if already loaded)
       try {
         const freshEmbed = await embeddings.initialize();
-        if (freshEmbed) logger.info(`BGE-M3 embeddings initialized: ${config.embedding.modelPath}`);
+        if (freshEmbed) {
+          const detail = config.embedding.provider === "openai-compat"
+            ? `${config.embedding.openaiCompat.baseURL} (${config.embedding.openaiCompat.model})`
+            : config.embedding.modelPath;
+          logger.info(`Embeddings initialized [${embeddings.providerId}]: ${detail}`);
+          // One-time check: warn if the DB has rows tagged with a different
+          // provider. PR-B's search-time filter prevents silent corruption,
+          // but those rows are now invisible to recall until they're
+          // re-embedded with the active provider.
+          checkEmbeddingProviderMismatch(store, embeddings.providerId, logger)
+            .catch(e => swallow.warn("factory:providerMismatchCheck", e));
+        }
       } catch (e) {
         logger.warn(`Embeddings init failed — running in degraded mode: ${e}`);
       }

package/src/memory-daemon.ts CHANGED Viewed

@@ -350,7 +350,7 @@ export async function writeExtractionResults(
                 trigger_context: String(s.trigger_context ?? "").slice(0, 200),
                 tags: ["auto-extracted"],
                 session_id: sessionId,
-                ...(emb ? { embedding: emb } : {}),
+                ...(emb ? { embedding: emb, embedding_provider: embeddings.providerId } : {}),
               },
             },
           );