kongbrain 0.4.4 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/config.ts CHANGED
@@ -10,9 +10,26 @@ export interface SurrealConfig {
10
10
  db: string;
11
11
  }
12
12
 
13
+ export type EmbeddingProvider = "local" | "openai-compat";
14
+
15
+ export interface OpenAICompatEmbeddingConfig {
16
+ /** Model name passed in the embeddings request body (e.g. "text-embedding-3-small"). */
17
+ model: string;
18
+ /** Endpoint base URL. Default: "https://api.openai.com/v1". */
19
+ baseURL: string;
20
+ /** Name of the env var holding the API key. Default: "OPENAI_API_KEY". */
21
+ apiKeyEnv: string;
22
+ }
23
+
13
24
  export interface EmbeddingConfig {
14
- modelPath: string;
25
+ /** Which provider to use. Default "local" (BGE-M3 via node-llama-cpp). */
26
+ provider: EmbeddingProvider;
27
+ /** Vector dimensionality the active provider should produce. */
15
28
  dimensions: number;
29
+ /** Path to the local GGUF model — only consulted when provider === "local". */
30
+ modelPath: string;
31
+ /** OpenAI-compatible provider settings — only consulted when provider === "openai-compat". */
32
+ openaiCompat: OpenAICompatEmbeddingConfig;
16
33
  }
17
34
 
18
35
  export interface ThresholdConfig {
@@ -34,6 +51,51 @@ export interface KongBrainConfig {
34
51
  thresholds: ThresholdConfig;
35
52
  }
36
53
 
54
+ const DEFAULT_EMBEDDING_DIMENSIONS = 1024;
55
+
56
+ function parsePositiveInteger(value: unknown, fallback: number): number {
57
+ return typeof value === "number" && Number.isInteger(value) && value > 0
58
+ ? value
59
+ : fallback;
60
+ }
61
+
62
+ function parseEmbeddingConfig(raw: Record<string, unknown>): EmbeddingConfig {
63
+ const openaiCompatRaw = (raw.openaiCompat ?? {}) as Record<string, unknown>;
64
+
65
+ // Provider precedence: env var > plugin config > default "local"
66
+ const rawProvider =
67
+ process.env.KONGBRAIN_EMBED_PROVIDER ??
68
+ (typeof raw.provider === "string" ? raw.provider : null);
69
+ const provider: EmbeddingProvider =
70
+ rawProvider === "openai-compat" ? "openai-compat" : "local";
71
+
72
+ return {
73
+ provider,
74
+ dimensions: parsePositiveInteger(raw.dimensions, DEFAULT_EMBEDDING_DIMENSIONS),
75
+ modelPath:
76
+ process.env.EMBED_MODEL_PATH ??
77
+ (typeof raw.modelPath === "string"
78
+ ? raw.modelPath
79
+ : join(homedir(), ".node-llama-cpp", "models", "bge-m3-q4_k_m.gguf")),
80
+ openaiCompat: {
81
+ model:
82
+ typeof openaiCompatRaw.model === "string"
83
+ ? openaiCompatRaw.model
84
+ : "text-embedding-3-small",
85
+ // baseURL: env wins (matches the official openai SDK convention)
86
+ baseURL:
87
+ process.env.OPENAI_BASE_URL ??
88
+ (typeof openaiCompatRaw.baseURL === "string"
89
+ ? openaiCompatRaw.baseURL
90
+ : "https://api.openai.com/v1"),
91
+ apiKeyEnv:
92
+ typeof openaiCompatRaw.apiKeyEnv === "string"
93
+ ? openaiCompatRaw.apiKeyEnv
94
+ : "OPENAI_API_KEY",
95
+ },
96
+ };
97
+ }
98
+
37
99
  /**
38
100
  * Parse plugin config from openclaw.plugin.json configSchema values,
39
101
  * with env var overrides and sensible defaults.
@@ -66,15 +128,7 @@ export function parsePluginConfig(raw?: Record<string, unknown>): KongBrainConfi
66
128
  ns: (typeof surreal.ns === "string" ? surreal.ns : null) ?? process.env.SURREAL_NS ?? "kong",
67
129
  db: (typeof surreal.db === "string" ? surreal.db : null) ?? process.env.SURREAL_DB ?? "memory",
68
130
  },
69
- embedding: {
70
- modelPath:
71
- process.env.EMBED_MODEL_PATH ??
72
- (typeof embedding.modelPath === "string"
73
- ? embedding.modelPath
74
- : join(homedir(), ".node-llama-cpp", "models", "bge-m3-q4_k_m.gguf")),
75
- dimensions:
76
- typeof embedding.dimensions === "number" ? embedding.dimensions : 1024,
77
- },
131
+ embedding: parseEmbeddingConfig(embedding),
78
132
  thresholds: {
79
133
  daemonTokenThreshold:
80
134
  typeof thresholds.daemonTokenThreshold === "number" ? thresholds.daemonTokenThreshold : 4000,
@@ -57,7 +57,7 @@ export class KongBrainContextEngine implements ContextEngine {
57
57
  readonly info: ContextEngineInfo = {
58
58
  id: "kongbrain",
59
59
  name: "KongBrain",
60
- version: "0.4.2",
60
+ version: "0.5.1",
61
61
  ownsCompaction: true,
62
62
  };
63
63
 
@@ -76,7 +76,9 @@ export class KongBrainContextEngine implements ContextEngine {
76
76
  // Run schema once per process (idempotent but expensive on every bootstrap)
77
77
  if (!this.state.schemaApplied) {
78
78
  try {
79
- const schemaSql = loadSchema();
79
+ const schemaSql = loadSchema({
80
+ embeddingDimensions: this.state.config.embedding.dimensions,
81
+ });
80
82
  await store.queryExec(schemaSql);
81
83
  this.state.schemaApplied = true;
82
84
  } catch (e) {
@@ -0,0 +1,232 @@
1
+ import type { EmbeddingService } from "./embeddings.js";
2
+ import type { EmbeddingConfig } from "./config.js";
3
+ import { swallow } from "./errors.js";
4
+ import { log } from "./log.js";
5
+
6
+ /**
7
+ * OpenAI-compatible embedding service. Speaks the /v1/embeddings shape that
8
+ * OpenAI, Azure OpenAI, Together, Anyscale, vLLM, LM Studio, Ollama (compat
9
+ * endpoint), DeepInfra, and others all conform to. Switching between any of
10
+ * them is a baseURL change.
11
+ *
12
+ * The vectors this service produces are NOT in the same space as a
13
+ * different provider's vectors, even at the same dimensionality. The
14
+ * providerId field is what the rest of the system uses to keep them apart.
15
+ */
16
+ export class OpenAICompatEmbeddingService implements EmbeddingService {
17
+ readonly providerId: string;
18
+ readonly dimensions: number;
19
+
20
+ private readonly model: string;
21
+ private readonly baseURL: string;
22
+ private readonly apiKey: string | null;
23
+ private ready = false;
24
+
25
+ /** Per-batch limit. OpenAI accepts up to 2048 inputs; most compat servers are stricter. */
26
+ private readonly maxBatchSize = 96;
27
+
28
+ constructor(config: EmbeddingConfig) {
29
+ this.model = config.openaiCompat.model;
30
+ this.baseURL = config.openaiCompat.baseURL.replace(/\/+$/, "");
31
+ this.dimensions = config.dimensions;
32
+ // Resolve the API key from the named env var. Empty string is treated as
33
+ // missing — handled at initialize() time so the error is clear and early.
34
+ const keyName = config.openaiCompat.apiKeyEnv;
35
+ const keyVal = process.env[keyName];
36
+ this.apiKey = keyVal && keyVal.length > 0 ? keyVal : null;
37
+
38
+ // providerId encodes (provider, model, dim) so vectors written today can
39
+ // be distinguished from the same model at a different output dim later.
40
+ this.providerId = `openai-compat-${this.model}-${this.dimensions}d`;
41
+ }
42
+
43
+ async initialize(): Promise<boolean> {
44
+ if (this.ready) return false;
45
+ if (!this.apiKey) {
46
+ throw new Error(
47
+ `OpenAI-compatible embeddings: API key not set. Configure embedding.openaiCompat.apiKeyEnv (default OPENAI_API_KEY) and put the key in that env var.`,
48
+ );
49
+ }
50
+ // Sanity: require dimensions to be set. The OpenAI text-embedding-3-*
51
+ // models support a `dimensions` parameter; non-OpenAI compat servers
52
+ // generally ignore it and return their native dim. We verify on the
53
+ // first embed() call rather than here so we don't burn a request just
54
+ // to validate config.
55
+ if (!Number.isFinite(this.dimensions) || this.dimensions <= 0) {
56
+ throw new Error(
57
+ `OpenAI-compatible embeddings: invalid dimensions ${this.dimensions}`,
58
+ );
59
+ }
60
+ this.ready = true;
61
+ return true;
62
+ }
63
+
64
+ async embed(text: string): Promise<number[]> {
65
+ const result = await this.request([text]);
66
+ return result[0];
67
+ }
68
+
69
+ async embedBatch(texts: string[]): Promise<number[][]> {
70
+ if (texts.length === 0) return [];
71
+ if (texts.length <= this.maxBatchSize) return this.request(texts);
72
+ // Split into chunks so we never exceed the per-request limit.
73
+ const out: number[][] = [];
74
+ for (let i = 0; i < texts.length; i += this.maxBatchSize) {
75
+ const chunk = texts.slice(i, i + this.maxBatchSize);
76
+ const vecs = await this.request(chunk);
77
+ out.push(...vecs);
78
+ }
79
+ return out;
80
+ }
81
+
82
+ isAvailable(): boolean {
83
+ return this.ready;
84
+ }
85
+
86
+ async dispose(): Promise<void> {
87
+ this.ready = false;
88
+ }
89
+
90
+ /**
91
+ * POST one batch to /embeddings with retry-and-backoff on 429.
92
+ * 401/403 fail hard (config problem, retry will not help).
93
+ */
94
+ private async request(input: string[]): Promise<number[][]> {
95
+ if (!this.ready) throw new Error("OpenAI-compat embeddings not initialized");
96
+ const url = `${this.baseURL}/embeddings`;
97
+ const body = {
98
+ model: this.model,
99
+ input,
100
+ // text-embedding-3-* honors `dimensions`. Compat servers that ignore
101
+ // it will return their native dim — we verify after the fact.
102
+ dimensions: this.dimensions,
103
+ encoding_format: "float",
104
+ };
105
+
106
+ const maxAttempts = 4;
107
+ let attempt = 0;
108
+ let lastErr: unknown = null;
109
+ while (attempt < maxAttempts) {
110
+ attempt++;
111
+ let res: Response;
112
+ try {
113
+ res = await fetch(url, {
114
+ method: "POST",
115
+ headers: {
116
+ "Content-Type": "application/json",
117
+ Authorization: `Bearer ${this.apiKey}`,
118
+ },
119
+ body: JSON.stringify(body),
120
+ });
121
+ } catch (e) {
122
+ // Network-level failure — retry with backoff.
123
+ lastErr = e;
124
+ await this.sleep(backoffMs(attempt));
125
+ continue;
126
+ }
127
+
128
+ if (res.ok) {
129
+ const json = await res.json() as {
130
+ data?: Array<{ embedding: number[]; index: number }>;
131
+ };
132
+ const data = json.data ?? [];
133
+ // Sort by index — most servers return in order but the spec only
134
+ // guarantees the index field, so we honor it.
135
+ data.sort((a, b) => a.index - b.index);
136
+ const vecs = data.map(d => d.embedding);
137
+ if (vecs.length !== input.length) {
138
+ throw new Error(
139
+ `OpenAI-compat embeddings: returned ${vecs.length} vectors for ${input.length} inputs`,
140
+ );
141
+ }
142
+ // Verify dim once per response so a misconfigured server fails
143
+ // loudly instead of writing wrong-sized vectors into the DB.
144
+ if (vecs[0].length !== this.dimensions) {
145
+ throw new Error(
146
+ `OpenAI-compat embeddings: server returned ${vecs[0].length}-dim vectors but config requested ${this.dimensions}. ` +
147
+ `For non-OpenAI providers that ignore the 'dimensions' parameter, set embedding.dimensions in plugin config to match the server's native output.`,
148
+ );
149
+ }
150
+ return vecs;
151
+ }
152
+
153
+ // Hard fail on auth / not found — retrying will not help.
154
+ if (res.status === 401 || res.status === 403) {
155
+ const text = await readBodyText(res);
156
+ throw new Error(
157
+ `OpenAI-compat embeddings: auth failed (${res.status}). Check the API key in env var. Response: ${text.slice(0, 200)}`,
158
+ );
159
+ }
160
+ if (res.status === 404) {
161
+ const text = await readBodyText(res);
162
+ throw new Error(
163
+ `OpenAI-compat embeddings: endpoint not found at ${url}. Check baseURL. Response: ${text.slice(0, 200)}`,
164
+ );
165
+ }
166
+
167
+ // 429 (rate limit) and 5xx — retry with backoff. Honor Retry-After
168
+ // when present. Note: OpenAI returns HTTP 429 for both transient
169
+ // rate limits and "out of credits" (insufficient_quota) — the
170
+ // latter is not retryable, so peek at the body and fail fast.
171
+ if (res.status === 429 || res.status >= 500) {
172
+ const text = await readBodyText(res);
173
+ if (res.status === 429 && /insufficient_quota/i.test(text)) {
174
+ throw new Error(
175
+ `OpenAI-compat embeddings: insufficient quota on this API key. ` +
176
+ `Add credits / a payment method at the provider's billing page, or switch keys. ` +
177
+ `Response: ${text.slice(0, 200)}`,
178
+ );
179
+ }
180
+ const retryAfter = parseRetryAfter(res.headers.get("retry-after"));
181
+ const wait = retryAfter ?? backoffMs(attempt);
182
+ log.warn(`[embeddings:openai] ${res.status} from ${url}, retrying in ${wait}ms (attempt ${attempt}/${maxAttempts})`);
183
+ lastErr = new Error(`HTTP ${res.status}: ${text.slice(0, 200)}`);
184
+ await this.sleep(wait);
185
+ continue;
186
+ }
187
+
188
+ // Other 4xx — body usually has the reason. Don't retry.
189
+ const text = await readBodyText(res);
190
+ throw new Error(
191
+ `OpenAI-compat embeddings: HTTP ${res.status}. Response: ${text.slice(0, 300)}`,
192
+ );
193
+ }
194
+
195
+ throw new Error(
196
+ `OpenAI-compat embeddings: exhausted ${maxAttempts} attempts. Last error: ${String(lastErr)}`,
197
+ );
198
+ }
199
+
200
+ private sleep(ms: number): Promise<void> {
201
+ return new Promise(r => setTimeout(r, ms));
202
+ }
203
+ }
204
+
205
+ /** Exponential backoff with jitter. 1s, 2s, 4s, 8s base, +/- 25%. */
206
+ function backoffMs(attempt: number): number {
207
+ const base = 1000 * Math.pow(2, attempt - 1);
208
+ const jitter = 1 + (Math.random() * 0.5 - 0.25);
209
+ return Math.round(base * jitter);
210
+ }
211
+
212
+ /** Parse Retry-After header (seconds or HTTP-date) into ms; null if absent or unparseable. */
213
+ function parseRetryAfter(value: string | null): number | null {
214
+ if (!value) return null;
215
+ const asInt = parseInt(value, 10);
216
+ if (Number.isFinite(asInt)) return asInt * 1000;
217
+ const asDate = Date.parse(value);
218
+ if (Number.isFinite(asDate)) {
219
+ const ms = asDate - Date.now();
220
+ return ms > 0 ? ms : 0;
221
+ }
222
+ return null;
223
+ }
224
+
225
+ async function readBodyText(res: Response): Promise<string> {
226
+ try {
227
+ return await res.text();
228
+ } catch (e) {
229
+ swallow("embeddings:openai:readBody", e);
230
+ return "";
231
+ }
232
+ }
package/src/embeddings.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { existsSync } from "node:fs";
2
2
  import type { EmbeddingConfig } from "./config.js";
3
+ import { OpenAICompatEmbeddingService } from "./embeddings-openai.js";
3
4
  import { swallow } from "./errors.js";
4
5
  import { log } from "./log.js";
5
6
 
@@ -8,8 +9,38 @@ import { log } from "./log.js";
8
9
  type LlamaEmbeddingContext = import("node-llama-cpp").LlamaEmbeddingContext;
9
10
  type LlamaModel = import("node-llama-cpp").LlamaModel;
10
11
 
12
+ /**
13
+ * Provider-agnostic embedding service.
14
+ *
15
+ * Implementations must guarantee that vectors they produce are in the same
16
+ * vector space across calls within a single instance. Different implementations
17
+ * (or different models within the same implementation) produce vectors in
18
+ * different spaces and must not be compared with cosine similarity. The
19
+ * `providerId` field is the stable tag used to detect cross-space mixing.
20
+ */
21
+ export interface EmbeddingService {
22
+ /** Stable identifier for the (provider, model, dimension) tuple. */
23
+ readonly providerId: string;
24
+ /** Dimensionality of the vectors this service produces. */
25
+ readonly dimensions: number;
26
+
27
+ /** Initialize the underlying model. Returns true on first init, false if already ready. */
28
+ initialize(): Promise<boolean>;
29
+ /** Return the embedding vector for a single text. */
30
+ embed(text: string): Promise<number[]>;
31
+ /** Return embedding vectors for an array of texts. */
32
+ embedBatch(texts: string[]): Promise<number[][]>;
33
+ /** True once initialize() has succeeded. */
34
+ isAvailable(): boolean;
35
+ /** Release any underlying resources (model handles, sockets, etc.). */
36
+ dispose(): Promise<void>;
37
+ }
38
+
11
39
  /** BGE-M3 embedding service (1024-dim via GGUF) with an LRU cache of up to 512 entries. */
12
- export class EmbeddingService {
40
+ export class LocalEmbeddingService implements EmbeddingService {
41
+ readonly providerId: string;
42
+ readonly dimensions: number;
43
+
13
44
  private model: LlamaModel | null = null;
14
45
  private ctx: LlamaEmbeddingContext | null = null;
15
46
  private ready = false;
@@ -17,9 +48,11 @@ export class EmbeddingService {
17
48
  private cache = new Map<string, number[]>();
18
49
  private readonly maxCacheSize = 512;
19
50
 
20
- constructor(private readonly config: EmbeddingConfig) {}
51
+ constructor(private readonly config: EmbeddingConfig) {
52
+ this.providerId = "local-bge-m3";
53
+ this.dimensions = config.dimensions;
54
+ }
21
55
 
22
- /** Initialize the embedding model. Returns true if freshly loaded, false if already ready. */
23
56
  async initialize(): Promise<boolean> {
24
57
  if (this.ready) return false;
25
58
  if (!existsSync(this.config.modelPath)) {
@@ -42,19 +75,16 @@ export class EmbeddingService {
42
75
  return true;
43
76
  }
44
77
 
45
- /** Return the embedding vector for text, serving from LRU cache on repeat calls. */
46
78
  async embed(text: string): Promise<number[]> {
47
79
  if (!this.ready || !this.ctx) throw new Error("Embeddings not initialized");
48
80
  const cached = this.cache.get(text);
49
81
  if (cached) {
50
- // Move to end for LRU freshness
51
82
  this.cache.delete(text);
52
83
  this.cache.set(text, cached);
53
84
  return cached;
54
85
  }
55
86
  const result = await this.ctx.getEmbeddingFor(text);
56
87
  const vec = Array.from(result.vector);
57
- // Evict oldest if at capacity
58
88
  if (this.cache.size >= this.maxCacheSize) {
59
89
  this.cache.delete(this.cache.keys().next().value!);
60
90
  }
@@ -82,3 +112,15 @@ export class EmbeddingService {
82
112
  }
83
113
  }
84
114
  }
115
+
116
+ /** Construct the configured embedding service. Adding a new provider plugs in here. */
117
+ export function createEmbeddingService(config: EmbeddingConfig): EmbeddingService {
118
+ if (config.provider === "openai-compat") {
119
+ // Lazy import keeps the local-only deployment path from paying the cost
120
+ // of parsing the OpenAI module on startup.
121
+ const { OpenAICompatEmbeddingService } = require("./embeddings-openai.js") as
122
+ typeof import("./embeddings-openai.js");
123
+ return new OpenAICompatEmbeddingService(config);
124
+ }
125
+ return new LocalEmbeddingService(config);
126
+ }
package/src/identity.ts CHANGED
@@ -103,6 +103,7 @@ export async function seedIdentity(
103
103
  chunk_index: i,
104
104
  text: chunk.text,
105
105
  embedding: vec,
106
+ embedding_provider: embeddings.providerId,
106
107
  importance: chunk.importance,
107
108
  },
108
109
  },
@@ -183,6 +184,7 @@ export async function saveUserIdentity(
183
184
  chunk_index: i,
184
185
  text,
185
186
  embedding: vec,
187
+ embedding_provider: embeddings.providerId,
186
188
  importance: 0.95,
187
189
  },
188
190
  },
package/src/index.ts CHANGED
@@ -10,8 +10,9 @@ import { join, dirname } from "node:path";
10
10
  import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
11
11
  import { parsePluginConfig } from "./config.js";
12
12
  import { SurrealStore } from "./surreal.js";
13
- import { EmbeddingService } from "./embeddings.js";
13
+ import { createEmbeddingService } from "./embeddings.js";
14
14
  import { GlobalPluginState, type CompleteFn } from "./state.js";
15
+ import { resolveModelRef } from "./model-resolution.js";
15
16
  import { KongBrainContextEngine } from "./context-engine.js";
16
17
  import { createRecallToolDef } from "./tools/recall.js";
17
18
  import { createCoreMemoryToolDef } from "./tools/core-memory.js";
@@ -299,10 +300,76 @@ async function detectGraduationEvent(
299
300
  }
300
301
  }
301
302
 
303
+ /**
304
+ * Detect rows tagged with a provider other than the one currently active.
305
+ * Pre-existing data stays in the database; PR-B's search-time filter keeps
306
+ * it from corrupting recall, but it becomes invisible until re-embedded.
307
+ * Logging gives the user a clear cue that a migration is needed without
308
+ * refusing to start (the data is intact and reads remain safe).
309
+ */
310
+ async function checkEmbeddingProviderMismatch(
311
+ store: SurrealStore,
312
+ activeProvider: string,
313
+ logger: { warn: (msg: string) => void },
314
+ ): Promise<void> {
315
+ if (!store.isAvailable()) return;
316
+ const tables = ["turn", "concept", "memory", "artifact", "identity_chunk", "skill", "reflection", "monologue"];
317
+ let mismatched = 0;
318
+ for (const t of tables) {
319
+ try {
320
+ const rows = await store.queryFirst<{ count: number }>(
321
+ `SELECT count() AS count FROM ${t} WHERE embedding != NONE AND embedding_provider != $provider GROUP ALL`,
322
+ { provider: activeProvider },
323
+ );
324
+ mismatched += Number(rows[0]?.count ?? 0);
325
+ } catch (e) {
326
+ swallow.warn(`factory:providerMismatchCount:${t}`, e);
327
+ }
328
+ }
329
+ if (mismatched > 0) {
330
+ logger.warn(
331
+ `Embedding provider mismatch: ${mismatched} rows in the database were embedded by a different provider than the active one (${activeProvider}). ` +
332
+ `These rows are filtered out of similarity search until re-embedded. To migrate, run the re-embed tool (PR-D, coming soon) or revert the embedding.provider config.`,
333
+ );
334
+ }
335
+ }
336
+
337
+ /**
338
+ * Detect vectors whose dimensionality doesn't match the configured dimension.
339
+ * DEFINE INDEX IF NOT EXISTS is non-destructive — changing embedding.dimensions
340
+ * after initial setup leaves the HNSW index at the old dimension while new
341
+ * vectors are written at the new size. Warn so the user knows to rebuild.
342
+ */
343
+ async function checkEmbeddingDimensionMismatch(
344
+ store: SurrealStore,
345
+ configuredDimensions: number,
346
+ logger: { warn: (msg: string) => void },
347
+ ): Promise<void> {
348
+ if (!store.isAvailable()) return;
349
+ const tables = ["concept", "memory", "turn", "artifact", "identity_chunk", "skill", "reflection", "monologue"];
350
+ for (const t of tables) {
351
+ try {
352
+ const rows = await store.queryFirst<{ len: number }>(
353
+ `SELECT array::len(embedding) AS len FROM ${t} WHERE embedding != NONE AND array::len(embedding) > 0 LIMIT 1`,
354
+ );
355
+ if (rows.length > 0 && rows[0].len !== configuredDimensions) {
356
+ logger.warn(
357
+ `Embedding dimension mismatch: existing vectors are ${rows[0].len}-dimensional but embedding.dimensions is configured as ${configuredDimensions}. ` +
358
+ `HNSW indexes created at the old dimension are not updated by DEFINE INDEX IF NOT EXISTS. ` +
359
+ `To fix: drop and recreate the vector indexes, then re-embed affected rows.`,
360
+ );
361
+ return;
362
+ }
363
+ } catch (e) {
364
+ swallow.warn(`factory:dimensionMismatchCheck:${t}`, e);
365
+ }
366
+ }
367
+ }
368
+
302
369
  export default definePluginEntry({
303
370
  id: "kongbrain",
304
371
  name: "KongBrain",
305
- description: "Graph-backed cognitive context engine with SurrealDB persistence and BGE-M3 embeddings.",
372
+ description: "Graph-backed cognitive context engine with SurrealDB persistence and pluggable embeddings (local BGE-M3 or OpenAI-compatible).",
306
373
  kind: "context-engine",
307
374
 
308
375
  register(api) {
@@ -315,8 +382,14 @@ export default definePluginEntry({
315
382
  // ensure a single instance survives across module reloads.
316
383
  let globalState = getGlobalState();
317
384
  if (!globalState) {
318
- const store = new SurrealStore(config.surreal);
319
- const embeddings = new EmbeddingService(config.embedding);
385
+ const store = new SurrealStore(config.surreal, {
386
+ embeddingDimensions: config.embedding.dimensions,
387
+ });
388
+ const embeddings = createEmbeddingService(config.embedding);
389
+ // Tag every embedding write and filter every embedding search by this
390
+ // provider id, so vectors from different models (different vector
391
+ // spaces) never mix in the same HNSW result set.
392
+ store.setActiveProvider(embeddings.providerId);
320
393
  // Build a CompleteFn using pi-ai directly since api.runtime.complete
321
394
  // is not available in OpenClaw 2026.3.24 (unreleased feature).
322
395
  const apiRef = api;
@@ -345,27 +418,22 @@ export default definePluginEntry({
345
418
  }
346
419
  piAi = await import(piAiPath);
347
420
  }
348
- // Fall back to calling pi-ai directly (runtime.complete not in OpenClaw 2026.3.24)
349
- const provider = params.provider ?? apiRef.runtime.agent.defaults.provider;
350
- const rawModel = params.model ?? apiRef.runtime.agent.defaults.model;
351
- // defaults.model may be an object {primary: '...', fallbacks: []} — unwrap it
352
- const modelIdRaw = typeof rawModel === 'object' && rawModel !== null
353
- ? (rawModel as any).primary ?? (rawModel as any).id ?? String(rawModel)
354
- : rawModel;
355
- // modelId may be "provider/model" format — split if provider not set
356
- let resolvedProvider = provider;
357
- let modelId = modelIdRaw;
358
- if (typeof modelId === 'string' && modelId.includes('/') && !resolvedProvider) {
359
- const idx = modelId.indexOf('/');
360
- resolvedProvider = modelId.slice(0, idx);
361
- modelId = modelId.slice(idx + 1);
362
- }
363
- const model = piAi!.getModel(resolvedProvider, modelId);
421
+ // Fall back to calling pi-ai directly (runtime.complete not in OpenClaw 2026.3.24).
422
+ // Fully-qualified OpenClaw refs (provider/model) are authoritative:
423
+ // stale runtime default providers must not override e.g.
424
+ // "openrouter/google/gemini-3-flash-preview".
425
+ const cfg = apiRef.runtime.config.loadConfig();
426
+ const resolved = resolveModelRef({
427
+ explicitProvider: params.provider,
428
+ explicitModel: params.model,
429
+ config: cfg,
430
+ runtimeDefaults: apiRef.runtime.agent.defaults,
431
+ });
432
+ const model = piAi!.getModel(resolved.provider, resolved.modelId);
364
433
  if (!model) {
365
- throw new Error(`Model "${modelId}" not found for provider "${provider}"`);
434
+ throw new Error(`Model "${resolved.modelId}" not found for provider "${resolved.provider}"`);
366
435
  }
367
436
  // Resolve auth via OpenClaw's runtime (handles profiles, env vars, etc.)
368
- const cfg = apiRef.runtime.config.loadConfig();
369
437
  const auth = await apiRef.runtime.modelAuth.getApiKeyForModel({ model, cfg });
370
438
  // Build context
371
439
  const now = Date.now();
@@ -379,7 +447,7 @@ export default definePluginEntry({
379
447
  );
380
448
  const context = { systemPrompt: params.system, messages };
381
449
  // Pass apiKey directly in options so the provider can use it
382
- log.info(`complete(): provider=${resolvedProvider} model=${modelId} msgs=${params.messages.length}`);
450
+ log.info(`complete(): provider=${resolved.provider} model=${resolved.modelId} msgs=${params.messages.length}`);
383
451
  // NOTE: outputFormat (structured output) is intentionally NOT passed to pi-ai.
384
452
  // pi-ai's SimpleStreamOptions doesn't support it, and injecting it via onPayload
385
453
  // causes the Anthropic API to return empty responses. The daemon's JSON parsing
@@ -419,10 +487,23 @@ export default definePluginEntry({
419
487
  throw e;
420
488
  }
421
489
 
422
- // Initialize BGE-M3 embeddings (no-op if already loaded)
490
+ // Initialize the embedding provider (no-op if already loaded)
423
491
  try {
424
492
  const freshEmbed = await embeddings.initialize();
425
- if (freshEmbed) logger.info(`BGE-M3 embeddings initialized: ${config.embedding.modelPath}`);
493
+ if (freshEmbed) {
494
+ const detail = config.embedding.provider === "openai-compat"
495
+ ? `${config.embedding.openaiCompat.baseURL} (${config.embedding.openaiCompat.model})`
496
+ : config.embedding.modelPath;
497
+ logger.info(`Embeddings initialized [${embeddings.providerId}]: ${detail}`);
498
+ // One-time check: warn if the DB has rows tagged with a different
499
+ // provider. PR-B's search-time filter prevents silent corruption,
500
+ // but those rows are now invisible to recall until they're
501
+ // re-embedded with the active provider.
502
+ checkEmbeddingProviderMismatch(store, embeddings.providerId, logger)
503
+ .catch(e => swallow.warn("factory:providerMismatchCheck", e));
504
+ checkEmbeddingDimensionMismatch(store, config.embedding.dimensions, logger)
505
+ .catch(e => swallow.warn("factory:dimensionMismatchCheck", e));
506
+ }
426
507
  } catch (e) {
427
508
  logger.warn(`Embeddings init failed — running in degraded mode: ${e}`);
428
509
  }
@@ -350,7 +350,7 @@ export async function writeExtractionResults(
350
350
  trigger_context: String(s.trigger_context ?? "").slice(0, 200),
351
351
  tags: ["auto-extracted"],
352
352
  session_id: sessionId,
353
- ...(emb ? { embedding: emb } : {}),
353
+ ...(emb ? { embedding: emb, embedding_provider: embeddings.providerId } : {}),
354
354
  },
355
355
  },
356
356
  );