kongbrain 0.4.4 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.kongcode-handoff.json +8 -0
- package/CHANGELOG.md +47 -0
- package/README.github.md +56 -4
- package/README.md +29 -3
- package/README.npm.md +29 -3
- package/SKILL.md +1 -1
- package/bin/kongbrain-reembed.ts +143 -0
- package/openclaw.plugin.json +37 -7
- package/package.json +4 -1
- package/src/causal.ts +4 -1
- package/src/cognitive-bootstrap.ts +1 -0
- package/src/concept-extract.ts +4 -2
- package/src/config.ts +64 -10
- package/src/context-engine.ts +4 -2
- package/src/embeddings-openai.ts +232 -0
- package/src/embeddings.ts +48 -6
- package/src/identity.ts +2 -0
- package/src/index.ts +106 -25
- package/src/memory-daemon.ts +1 -1
- package/src/migrate-reembed.ts +305 -0
- package/src/model-resolution.ts +98 -0
- package/src/reflection.ts +10 -4
- package/src/schema-loader.ts +21 -3
- package/src/schema.surql +37 -8
- package/src/skills.ts +14 -5
- package/src/supersedes.ts +2 -1
- package/src/surreal.ts +87 -21
- package/src/workspace-migrate.ts +3 -0
package/src/config.ts
CHANGED
|
@@ -10,9 +10,26 @@ export interface SurrealConfig {
|
|
|
10
10
|
db: string;
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
+
export type EmbeddingProvider = "local" | "openai-compat";
|
|
14
|
+
|
|
15
|
+
export interface OpenAICompatEmbeddingConfig {
|
|
16
|
+
/** Model name passed in the embeddings request body (e.g. "text-embedding-3-small"). */
|
|
17
|
+
model: string;
|
|
18
|
+
/** Endpoint base URL. Default: "https://api.openai.com/v1". */
|
|
19
|
+
baseURL: string;
|
|
20
|
+
/** Name of the env var holding the API key. Default: "OPENAI_API_KEY". */
|
|
21
|
+
apiKeyEnv: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
13
24
|
export interface EmbeddingConfig {
|
|
14
|
-
|
|
25
|
+
/** Which provider to use. Default "local" (BGE-M3 via node-llama-cpp). */
|
|
26
|
+
provider: EmbeddingProvider;
|
|
27
|
+
/** Vector dimensionality the active provider should produce. */
|
|
15
28
|
dimensions: number;
|
|
29
|
+
/** Path to the local GGUF model — only consulted when provider === "local". */
|
|
30
|
+
modelPath: string;
|
|
31
|
+
/** OpenAI-compatible provider settings — only consulted when provider === "openai-compat". */
|
|
32
|
+
openaiCompat: OpenAICompatEmbeddingConfig;
|
|
16
33
|
}
|
|
17
34
|
|
|
18
35
|
export interface ThresholdConfig {
|
|
@@ -34,6 +51,51 @@ export interface KongBrainConfig {
|
|
|
34
51
|
thresholds: ThresholdConfig;
|
|
35
52
|
}
|
|
36
53
|
|
|
54
|
+
const DEFAULT_EMBEDDING_DIMENSIONS = 1024;
|
|
55
|
+
|
|
56
|
+
function parsePositiveInteger(value: unknown, fallback: number): number {
|
|
57
|
+
return typeof value === "number" && Number.isInteger(value) && value > 0
|
|
58
|
+
? value
|
|
59
|
+
: fallback;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function parseEmbeddingConfig(raw: Record<string, unknown>): EmbeddingConfig {
|
|
63
|
+
const openaiCompatRaw = (raw.openaiCompat ?? {}) as Record<string, unknown>;
|
|
64
|
+
|
|
65
|
+
// Provider precedence: env var > plugin config > default "local"
|
|
66
|
+
const rawProvider =
|
|
67
|
+
process.env.KONGBRAIN_EMBED_PROVIDER ??
|
|
68
|
+
(typeof raw.provider === "string" ? raw.provider : null);
|
|
69
|
+
const provider: EmbeddingProvider =
|
|
70
|
+
rawProvider === "openai-compat" ? "openai-compat" : "local";
|
|
71
|
+
|
|
72
|
+
return {
|
|
73
|
+
provider,
|
|
74
|
+
dimensions: parsePositiveInteger(raw.dimensions, DEFAULT_EMBEDDING_DIMENSIONS),
|
|
75
|
+
modelPath:
|
|
76
|
+
process.env.EMBED_MODEL_PATH ??
|
|
77
|
+
(typeof raw.modelPath === "string"
|
|
78
|
+
? raw.modelPath
|
|
79
|
+
: join(homedir(), ".node-llama-cpp", "models", "bge-m3-q4_k_m.gguf")),
|
|
80
|
+
openaiCompat: {
|
|
81
|
+
model:
|
|
82
|
+
typeof openaiCompatRaw.model === "string"
|
|
83
|
+
? openaiCompatRaw.model
|
|
84
|
+
: "text-embedding-3-small",
|
|
85
|
+
// baseURL: env wins (matches the official openai SDK convention)
|
|
86
|
+
baseURL:
|
|
87
|
+
process.env.OPENAI_BASE_URL ??
|
|
88
|
+
(typeof openaiCompatRaw.baseURL === "string"
|
|
89
|
+
? openaiCompatRaw.baseURL
|
|
90
|
+
: "https://api.openai.com/v1"),
|
|
91
|
+
apiKeyEnv:
|
|
92
|
+
typeof openaiCompatRaw.apiKeyEnv === "string"
|
|
93
|
+
? openaiCompatRaw.apiKeyEnv
|
|
94
|
+
: "OPENAI_API_KEY",
|
|
95
|
+
},
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
37
99
|
/**
|
|
38
100
|
* Parse plugin config from openclaw.plugin.json configSchema values,
|
|
39
101
|
* with env var overrides and sensible defaults.
|
|
@@ -66,15 +128,7 @@ export function parsePluginConfig(raw?: Record<string, unknown>): KongBrainConfi
|
|
|
66
128
|
ns: (typeof surreal.ns === "string" ? surreal.ns : null) ?? process.env.SURREAL_NS ?? "kong",
|
|
67
129
|
db: (typeof surreal.db === "string" ? surreal.db : null) ?? process.env.SURREAL_DB ?? "memory",
|
|
68
130
|
},
|
|
69
|
-
embedding:
|
|
70
|
-
modelPath:
|
|
71
|
-
process.env.EMBED_MODEL_PATH ??
|
|
72
|
-
(typeof embedding.modelPath === "string"
|
|
73
|
-
? embedding.modelPath
|
|
74
|
-
: join(homedir(), ".node-llama-cpp", "models", "bge-m3-q4_k_m.gguf")),
|
|
75
|
-
dimensions:
|
|
76
|
-
typeof embedding.dimensions === "number" ? embedding.dimensions : 1024,
|
|
77
|
-
},
|
|
131
|
+
embedding: parseEmbeddingConfig(embedding),
|
|
78
132
|
thresholds: {
|
|
79
133
|
daemonTokenThreshold:
|
|
80
134
|
typeof thresholds.daemonTokenThreshold === "number" ? thresholds.daemonTokenThreshold : 4000,
|
package/src/context-engine.ts
CHANGED
|
@@ -57,7 +57,7 @@ export class KongBrainContextEngine implements ContextEngine {
|
|
|
57
57
|
readonly info: ContextEngineInfo = {
|
|
58
58
|
id: "kongbrain",
|
|
59
59
|
name: "KongBrain",
|
|
60
|
-
version: "0.
|
|
60
|
+
version: "0.5.1",
|
|
61
61
|
ownsCompaction: true,
|
|
62
62
|
};
|
|
63
63
|
|
|
@@ -76,7 +76,9 @@ export class KongBrainContextEngine implements ContextEngine {
|
|
|
76
76
|
// Run schema once per process (idempotent but expensive on every bootstrap)
|
|
77
77
|
if (!this.state.schemaApplied) {
|
|
78
78
|
try {
|
|
79
|
-
const schemaSql = loadSchema(
|
|
79
|
+
const schemaSql = loadSchema({
|
|
80
|
+
embeddingDimensions: this.state.config.embedding.dimensions,
|
|
81
|
+
});
|
|
80
82
|
await store.queryExec(schemaSql);
|
|
81
83
|
this.state.schemaApplied = true;
|
|
82
84
|
} catch (e) {
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
import type { EmbeddingService } from "./embeddings.js";
|
|
2
|
+
import type { EmbeddingConfig } from "./config.js";
|
|
3
|
+
import { swallow } from "./errors.js";
|
|
4
|
+
import { log } from "./log.js";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* OpenAI-compatible embedding service. Speaks the /v1/embeddings shape that
|
|
8
|
+
* OpenAI, Azure OpenAI, Together, Anyscale, vLLM, LM Studio, Ollama (compat
|
|
9
|
+
* endpoint), DeepInfra, and others all conform to. Switching between any of
|
|
10
|
+
* them is a baseURL change.
|
|
11
|
+
*
|
|
12
|
+
* The vectors this service produces are NOT in the same space as a
|
|
13
|
+
* different provider's vectors, even at the same dimensionality. The
|
|
14
|
+
* providerId field is what the rest of the system uses to keep them apart.
|
|
15
|
+
*/
|
|
16
|
+
export class OpenAICompatEmbeddingService implements EmbeddingService {
|
|
17
|
+
readonly providerId: string;
|
|
18
|
+
readonly dimensions: number;
|
|
19
|
+
|
|
20
|
+
private readonly model: string;
|
|
21
|
+
private readonly baseURL: string;
|
|
22
|
+
private readonly apiKey: string | null;
|
|
23
|
+
private ready = false;
|
|
24
|
+
|
|
25
|
+
/** Per-batch limit. OpenAI accepts up to 2048 inputs; most compat servers are stricter. */
|
|
26
|
+
private readonly maxBatchSize = 96;
|
|
27
|
+
|
|
28
|
+
constructor(config: EmbeddingConfig) {
|
|
29
|
+
this.model = config.openaiCompat.model;
|
|
30
|
+
this.baseURL = config.openaiCompat.baseURL.replace(/\/+$/, "");
|
|
31
|
+
this.dimensions = config.dimensions;
|
|
32
|
+
// Resolve the API key from the named env var. Empty string is treated as
|
|
33
|
+
// missing — handled at initialize() time so the error is clear and early.
|
|
34
|
+
const keyName = config.openaiCompat.apiKeyEnv;
|
|
35
|
+
const keyVal = process.env[keyName];
|
|
36
|
+
this.apiKey = keyVal && keyVal.length > 0 ? keyVal : null;
|
|
37
|
+
|
|
38
|
+
// providerId encodes (provider, model, dim) so vectors written today can
|
|
39
|
+
// be distinguished from the same model at a different output dim later.
|
|
40
|
+
this.providerId = `openai-compat-${this.model}-${this.dimensions}d`;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async initialize(): Promise<boolean> {
|
|
44
|
+
if (this.ready) return false;
|
|
45
|
+
if (!this.apiKey) {
|
|
46
|
+
throw new Error(
|
|
47
|
+
`OpenAI-compatible embeddings: API key not set. Configure embedding.openaiCompat.apiKeyEnv (default OPENAI_API_KEY) and put the key in that env var.`,
|
|
48
|
+
);
|
|
49
|
+
}
|
|
50
|
+
// Sanity: require dimensions to be set. The OpenAI text-embedding-3-*
|
|
51
|
+
// models support a `dimensions` parameter; non-OpenAI compat servers
|
|
52
|
+
// generally ignore it and return their native dim. We verify on the
|
|
53
|
+
// first embed() call rather than here so we don't burn a request just
|
|
54
|
+
// to validate config.
|
|
55
|
+
if (!Number.isFinite(this.dimensions) || this.dimensions <= 0) {
|
|
56
|
+
throw new Error(
|
|
57
|
+
`OpenAI-compatible embeddings: invalid dimensions ${this.dimensions}`,
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
this.ready = true;
|
|
61
|
+
return true;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async embed(text: string): Promise<number[]> {
|
|
65
|
+
const result = await this.request([text]);
|
|
66
|
+
return result[0];
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async embedBatch(texts: string[]): Promise<number[][]> {
|
|
70
|
+
if (texts.length === 0) return [];
|
|
71
|
+
if (texts.length <= this.maxBatchSize) return this.request(texts);
|
|
72
|
+
// Split into chunks so we never exceed the per-request limit.
|
|
73
|
+
const out: number[][] = [];
|
|
74
|
+
for (let i = 0; i < texts.length; i += this.maxBatchSize) {
|
|
75
|
+
const chunk = texts.slice(i, i + this.maxBatchSize);
|
|
76
|
+
const vecs = await this.request(chunk);
|
|
77
|
+
out.push(...vecs);
|
|
78
|
+
}
|
|
79
|
+
return out;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
isAvailable(): boolean {
|
|
83
|
+
return this.ready;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
async dispose(): Promise<void> {
|
|
87
|
+
this.ready = false;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* POST one batch to /embeddings with retry-and-backoff on 429.
|
|
92
|
+
* 401/403 fail hard (config problem, retry will not help).
|
|
93
|
+
*/
|
|
94
|
+
private async request(input: string[]): Promise<number[][]> {
|
|
95
|
+
if (!this.ready) throw new Error("OpenAI-compat embeddings not initialized");
|
|
96
|
+
const url = `${this.baseURL}/embeddings`;
|
|
97
|
+
const body = {
|
|
98
|
+
model: this.model,
|
|
99
|
+
input,
|
|
100
|
+
// text-embedding-3-* honors `dimensions`. Compat servers that ignore
|
|
101
|
+
// it will return their native dim — we verify after the fact.
|
|
102
|
+
dimensions: this.dimensions,
|
|
103
|
+
encoding_format: "float",
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
const maxAttempts = 4;
|
|
107
|
+
let attempt = 0;
|
|
108
|
+
let lastErr: unknown = null;
|
|
109
|
+
while (attempt < maxAttempts) {
|
|
110
|
+
attempt++;
|
|
111
|
+
let res: Response;
|
|
112
|
+
try {
|
|
113
|
+
res = await fetch(url, {
|
|
114
|
+
method: "POST",
|
|
115
|
+
headers: {
|
|
116
|
+
"Content-Type": "application/json",
|
|
117
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
118
|
+
},
|
|
119
|
+
body: JSON.stringify(body),
|
|
120
|
+
});
|
|
121
|
+
} catch (e) {
|
|
122
|
+
// Network-level failure — retry with backoff.
|
|
123
|
+
lastErr = e;
|
|
124
|
+
await this.sleep(backoffMs(attempt));
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
if (res.ok) {
|
|
129
|
+
const json = await res.json() as {
|
|
130
|
+
data?: Array<{ embedding: number[]; index: number }>;
|
|
131
|
+
};
|
|
132
|
+
const data = json.data ?? [];
|
|
133
|
+
// Sort by index — most servers return in order but the spec only
|
|
134
|
+
// guarantees the index field, so we honor it.
|
|
135
|
+
data.sort((a, b) => a.index - b.index);
|
|
136
|
+
const vecs = data.map(d => d.embedding);
|
|
137
|
+
if (vecs.length !== input.length) {
|
|
138
|
+
throw new Error(
|
|
139
|
+
`OpenAI-compat embeddings: returned ${vecs.length} vectors for ${input.length} inputs`,
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
// Verify dim once per response so a misconfigured server fails
|
|
143
|
+
// loudly instead of writing wrong-sized vectors into the DB.
|
|
144
|
+
if (vecs[0].length !== this.dimensions) {
|
|
145
|
+
throw new Error(
|
|
146
|
+
`OpenAI-compat embeddings: server returned ${vecs[0].length}-dim vectors but config requested ${this.dimensions}. ` +
|
|
147
|
+
`For non-OpenAI providers that ignore the 'dimensions' parameter, set embedding.dimensions in plugin config to match the server's native output.`,
|
|
148
|
+
);
|
|
149
|
+
}
|
|
150
|
+
return vecs;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Hard fail on auth / not found — retrying will not help.
|
|
154
|
+
if (res.status === 401 || res.status === 403) {
|
|
155
|
+
const text = await readBodyText(res);
|
|
156
|
+
throw new Error(
|
|
157
|
+
`OpenAI-compat embeddings: auth failed (${res.status}). Check the API key in env var. Response: ${text.slice(0, 200)}`,
|
|
158
|
+
);
|
|
159
|
+
}
|
|
160
|
+
if (res.status === 404) {
|
|
161
|
+
const text = await readBodyText(res);
|
|
162
|
+
throw new Error(
|
|
163
|
+
`OpenAI-compat embeddings: endpoint not found at ${url}. Check baseURL. Response: ${text.slice(0, 200)}`,
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// 429 (rate limit) and 5xx — retry with backoff. Honor Retry-After
|
|
168
|
+
// when present. Note: OpenAI returns HTTP 429 for both transient
|
|
169
|
+
// rate limits and "out of credits" (insufficient_quota) — the
|
|
170
|
+
// latter is not retryable, so peek at the body and fail fast.
|
|
171
|
+
if (res.status === 429 || res.status >= 500) {
|
|
172
|
+
const text = await readBodyText(res);
|
|
173
|
+
if (res.status === 429 && /insufficient_quota/i.test(text)) {
|
|
174
|
+
throw new Error(
|
|
175
|
+
`OpenAI-compat embeddings: insufficient quota on this API key. ` +
|
|
176
|
+
`Add credits / a payment method at the provider's billing page, or switch keys. ` +
|
|
177
|
+
`Response: ${text.slice(0, 200)}`,
|
|
178
|
+
);
|
|
179
|
+
}
|
|
180
|
+
const retryAfter = parseRetryAfter(res.headers.get("retry-after"));
|
|
181
|
+
const wait = retryAfter ?? backoffMs(attempt);
|
|
182
|
+
log.warn(`[embeddings:openai] ${res.status} from ${url}, retrying in ${wait}ms (attempt ${attempt}/${maxAttempts})`);
|
|
183
|
+
lastErr = new Error(`HTTP ${res.status}: ${text.slice(0, 200)}`);
|
|
184
|
+
await this.sleep(wait);
|
|
185
|
+
continue;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Other 4xx — body usually has the reason. Don't retry.
|
|
189
|
+
const text = await readBodyText(res);
|
|
190
|
+
throw new Error(
|
|
191
|
+
`OpenAI-compat embeddings: HTTP ${res.status}. Response: ${text.slice(0, 300)}`,
|
|
192
|
+
);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
throw new Error(
|
|
196
|
+
`OpenAI-compat embeddings: exhausted ${maxAttempts} attempts. Last error: ${String(lastErr)}`,
|
|
197
|
+
);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
private sleep(ms: number): Promise<void> {
|
|
201
|
+
return new Promise(r => setTimeout(r, ms));
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/** Exponential backoff with jitter. 1s, 2s, 4s, 8s base, +/- 25%. */
|
|
206
|
+
function backoffMs(attempt: number): number {
|
|
207
|
+
const base = 1000 * Math.pow(2, attempt - 1);
|
|
208
|
+
const jitter = 1 + (Math.random() * 0.5 - 0.25);
|
|
209
|
+
return Math.round(base * jitter);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/** Parse Retry-After header (seconds or HTTP-date) into ms; null if absent or unparseable. */
|
|
213
|
+
function parseRetryAfter(value: string | null): number | null {
|
|
214
|
+
if (!value) return null;
|
|
215
|
+
const asInt = parseInt(value, 10);
|
|
216
|
+
if (Number.isFinite(asInt)) return asInt * 1000;
|
|
217
|
+
const asDate = Date.parse(value);
|
|
218
|
+
if (Number.isFinite(asDate)) {
|
|
219
|
+
const ms = asDate - Date.now();
|
|
220
|
+
return ms > 0 ? ms : 0;
|
|
221
|
+
}
|
|
222
|
+
return null;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
async function readBodyText(res: Response): Promise<string> {
|
|
226
|
+
try {
|
|
227
|
+
return await res.text();
|
|
228
|
+
} catch (e) {
|
|
229
|
+
swallow("embeddings:openai:readBody", e);
|
|
230
|
+
return "";
|
|
231
|
+
}
|
|
232
|
+
}
|
package/src/embeddings.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { existsSync } from "node:fs";
|
|
2
2
|
import type { EmbeddingConfig } from "./config.js";
|
|
3
|
+
import { OpenAICompatEmbeddingService } from "./embeddings-openai.js";
|
|
3
4
|
import { swallow } from "./errors.js";
|
|
4
5
|
import { log } from "./log.js";
|
|
5
6
|
|
|
@@ -8,8 +9,38 @@ import { log } from "./log.js";
|
|
|
8
9
|
type LlamaEmbeddingContext = import("node-llama-cpp").LlamaEmbeddingContext;
|
|
9
10
|
type LlamaModel = import("node-llama-cpp").LlamaModel;
|
|
10
11
|
|
|
12
|
+
/**
|
|
13
|
+
* Provider-agnostic embedding service.
|
|
14
|
+
*
|
|
15
|
+
* Implementations must guarantee that vectors they produce are in the same
|
|
16
|
+
* vector space across calls within a single instance. Different implementations
|
|
17
|
+
* (or different models within the same implementation) produce vectors in
|
|
18
|
+
* different spaces and must not be compared with cosine similarity. The
|
|
19
|
+
* `providerId` field is the stable tag used to detect cross-space mixing.
|
|
20
|
+
*/
|
|
21
|
+
export interface EmbeddingService {
|
|
22
|
+
/** Stable identifier for the (provider, model, dimension) tuple. */
|
|
23
|
+
readonly providerId: string;
|
|
24
|
+
/** Dimensionality of the vectors this service produces. */
|
|
25
|
+
readonly dimensions: number;
|
|
26
|
+
|
|
27
|
+
/** Initialize the underlying model. Returns true on first init, false if already ready. */
|
|
28
|
+
initialize(): Promise<boolean>;
|
|
29
|
+
/** Return the embedding vector for a single text. */
|
|
30
|
+
embed(text: string): Promise<number[]>;
|
|
31
|
+
/** Return embedding vectors for an array of texts. */
|
|
32
|
+
embedBatch(texts: string[]): Promise<number[][]>;
|
|
33
|
+
/** True once initialize() has succeeded. */
|
|
34
|
+
isAvailable(): boolean;
|
|
35
|
+
/** Release any underlying resources (model handles, sockets, etc.). */
|
|
36
|
+
dispose(): Promise<void>;
|
|
37
|
+
}
|
|
38
|
+
|
|
11
39
|
/** BGE-M3 embedding service (1024-dim via GGUF) with an LRU cache of up to 512 entries. */
|
|
12
|
-
export class EmbeddingService {
|
|
40
|
+
export class LocalEmbeddingService implements EmbeddingService {
|
|
41
|
+
readonly providerId: string;
|
|
42
|
+
readonly dimensions: number;
|
|
43
|
+
|
|
13
44
|
private model: LlamaModel | null = null;
|
|
14
45
|
private ctx: LlamaEmbeddingContext | null = null;
|
|
15
46
|
private ready = false;
|
|
@@ -17,9 +48,11 @@ export class EmbeddingService {
|
|
|
17
48
|
private cache = new Map<string, number[]>();
|
|
18
49
|
private readonly maxCacheSize = 512;
|
|
19
50
|
|
|
20
|
-
constructor(private readonly config: EmbeddingConfig) {
|
|
51
|
+
constructor(private readonly config: EmbeddingConfig) {
|
|
52
|
+
this.providerId = "local-bge-m3";
|
|
53
|
+
this.dimensions = config.dimensions;
|
|
54
|
+
}
|
|
21
55
|
|
|
22
|
-
/** Initialize the embedding model. Returns true if freshly loaded, false if already ready. */
|
|
23
56
|
async initialize(): Promise<boolean> {
|
|
24
57
|
if (this.ready) return false;
|
|
25
58
|
if (!existsSync(this.config.modelPath)) {
|
|
@@ -42,19 +75,16 @@ export class EmbeddingService {
|
|
|
42
75
|
return true;
|
|
43
76
|
}
|
|
44
77
|
|
|
45
|
-
/** Return the embedding vector for text, serving from LRU cache on repeat calls. */
|
|
46
78
|
async embed(text: string): Promise<number[]> {
|
|
47
79
|
if (!this.ready || !this.ctx) throw new Error("Embeddings not initialized");
|
|
48
80
|
const cached = this.cache.get(text);
|
|
49
81
|
if (cached) {
|
|
50
|
-
// Move to end for LRU freshness
|
|
51
82
|
this.cache.delete(text);
|
|
52
83
|
this.cache.set(text, cached);
|
|
53
84
|
return cached;
|
|
54
85
|
}
|
|
55
86
|
const result = await this.ctx.getEmbeddingFor(text);
|
|
56
87
|
const vec = Array.from(result.vector);
|
|
57
|
-
// Evict oldest if at capacity
|
|
58
88
|
if (this.cache.size >= this.maxCacheSize) {
|
|
59
89
|
this.cache.delete(this.cache.keys().next().value!);
|
|
60
90
|
}
|
|
@@ -82,3 +112,15 @@ export class EmbeddingService {
|
|
|
82
112
|
}
|
|
83
113
|
}
|
|
84
114
|
}
|
|
115
|
+
|
|
116
|
+
/** Construct the configured embedding service. Adding a new provider plugs in here. */
|
|
117
|
+
export function createEmbeddingService(config: EmbeddingConfig): EmbeddingService {
|
|
118
|
+
if (config.provider === "openai-compat") {
|
|
119
|
+
// Lazy import keeps the local-only deployment path from paying the cost
|
|
120
|
+
// of parsing the OpenAI module on startup.
|
|
121
|
+
const { OpenAICompatEmbeddingService } = require("./embeddings-openai.js") as
|
|
122
|
+
typeof import("./embeddings-openai.js");
|
|
123
|
+
return new OpenAICompatEmbeddingService(config);
|
|
124
|
+
}
|
|
125
|
+
return new LocalEmbeddingService(config);
|
|
126
|
+
}
|
package/src/identity.ts
CHANGED
|
@@ -103,6 +103,7 @@ export async function seedIdentity(
|
|
|
103
103
|
chunk_index: i,
|
|
104
104
|
text: chunk.text,
|
|
105
105
|
embedding: vec,
|
|
106
|
+
embedding_provider: embeddings.providerId,
|
|
106
107
|
importance: chunk.importance,
|
|
107
108
|
},
|
|
108
109
|
},
|
|
@@ -183,6 +184,7 @@ export async function saveUserIdentity(
|
|
|
183
184
|
chunk_index: i,
|
|
184
185
|
text,
|
|
185
186
|
embedding: vec,
|
|
187
|
+
embedding_provider: embeddings.providerId,
|
|
186
188
|
importance: 0.95,
|
|
187
189
|
},
|
|
188
190
|
},
|
package/src/index.ts
CHANGED
|
@@ -10,8 +10,9 @@ import { join, dirname } from "node:path";
|
|
|
10
10
|
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
|
|
11
11
|
import { parsePluginConfig } from "./config.js";
|
|
12
12
|
import { SurrealStore } from "./surreal.js";
|
|
13
|
-
import {
|
|
13
|
+
import { createEmbeddingService } from "./embeddings.js";
|
|
14
14
|
import { GlobalPluginState, type CompleteFn } from "./state.js";
|
|
15
|
+
import { resolveModelRef } from "./model-resolution.js";
|
|
15
16
|
import { KongBrainContextEngine } from "./context-engine.js";
|
|
16
17
|
import { createRecallToolDef } from "./tools/recall.js";
|
|
17
18
|
import { createCoreMemoryToolDef } from "./tools/core-memory.js";
|
|
@@ -299,10 +300,76 @@ async function detectGraduationEvent(
|
|
|
299
300
|
}
|
|
300
301
|
}
|
|
301
302
|
|
|
303
|
+
/**
|
|
304
|
+
* Detect rows tagged with a provider other than the one currently active.
|
|
305
|
+
* Pre-existing data stays in the database; PR-B's search-time filter keeps
|
|
306
|
+
* it from corrupting recall, but it becomes invisible until re-embedded.
|
|
307
|
+
* Logging gives the user a clear cue that a migration is needed without
|
|
308
|
+
* refusing to start (the data is intact and reads remain safe).
|
|
309
|
+
*/
|
|
310
|
+
async function checkEmbeddingProviderMismatch(
|
|
311
|
+
store: SurrealStore,
|
|
312
|
+
activeProvider: string,
|
|
313
|
+
logger: { warn: (msg: string) => void },
|
|
314
|
+
): Promise<void> {
|
|
315
|
+
if (!store.isAvailable()) return;
|
|
316
|
+
const tables = ["turn", "concept", "memory", "artifact", "identity_chunk", "skill", "reflection", "monologue"];
|
|
317
|
+
let mismatched = 0;
|
|
318
|
+
for (const t of tables) {
|
|
319
|
+
try {
|
|
320
|
+
const rows = await store.queryFirst<{ count: number }>(
|
|
321
|
+
`SELECT count() AS count FROM ${t} WHERE embedding != NONE AND embedding_provider != $provider GROUP ALL`,
|
|
322
|
+
{ provider: activeProvider },
|
|
323
|
+
);
|
|
324
|
+
mismatched += Number(rows[0]?.count ?? 0);
|
|
325
|
+
} catch (e) {
|
|
326
|
+
swallow.warn(`factory:providerMismatchCount:${t}`, e);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
if (mismatched > 0) {
|
|
330
|
+
logger.warn(
|
|
331
|
+
`Embedding provider mismatch: ${mismatched} rows in the database were embedded by a different provider than the active one (${activeProvider}). ` +
|
|
332
|
+
`These rows are filtered out of similarity search until re-embedded. To migrate, run the re-embed tool (PR-D, coming soon) or revert the embedding.provider config.`,
|
|
333
|
+
);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* Detect vectors whose dimensionality doesn't match the configured dimension.
|
|
339
|
+
* DEFINE INDEX IF NOT EXISTS is non-destructive — changing embedding.dimensions
|
|
340
|
+
* after initial setup leaves the HNSW index at the old dimension while new
|
|
341
|
+
* vectors are written at the new size. Warn so the user knows to rebuild.
|
|
342
|
+
*/
|
|
343
|
+
async function checkEmbeddingDimensionMismatch(
|
|
344
|
+
store: SurrealStore,
|
|
345
|
+
configuredDimensions: number,
|
|
346
|
+
logger: { warn: (msg: string) => void },
|
|
347
|
+
): Promise<void> {
|
|
348
|
+
if (!store.isAvailable()) return;
|
|
349
|
+
const tables = ["concept", "memory", "turn", "artifact", "identity_chunk", "skill", "reflection", "monologue"];
|
|
350
|
+
for (const t of tables) {
|
|
351
|
+
try {
|
|
352
|
+
const rows = await store.queryFirst<{ len: number }>(
|
|
353
|
+
`SELECT array::len(embedding) AS len FROM ${t} WHERE embedding != NONE AND array::len(embedding) > 0 LIMIT 1`,
|
|
354
|
+
);
|
|
355
|
+
if (rows.length > 0 && rows[0].len !== configuredDimensions) {
|
|
356
|
+
logger.warn(
|
|
357
|
+
`Embedding dimension mismatch: existing vectors are ${rows[0].len}-dimensional but embedding.dimensions is configured as ${configuredDimensions}. ` +
|
|
358
|
+
`HNSW indexes created at the old dimension are not updated by DEFINE INDEX IF NOT EXISTS. ` +
|
|
359
|
+
`To fix: drop and recreate the vector indexes, then re-embed affected rows.`,
|
|
360
|
+
);
|
|
361
|
+
return;
|
|
362
|
+
}
|
|
363
|
+
} catch (e) {
|
|
364
|
+
swallow.warn(`factory:dimensionMismatchCheck:${t}`, e);
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
302
369
|
export default definePluginEntry({
|
|
303
370
|
id: "kongbrain",
|
|
304
371
|
name: "KongBrain",
|
|
305
|
-
description: "Graph-backed cognitive context engine with SurrealDB persistence and BGE-M3
|
|
372
|
+
description: "Graph-backed cognitive context engine with SurrealDB persistence and pluggable embeddings (local BGE-M3 or OpenAI-compatible).",
|
|
306
373
|
kind: "context-engine",
|
|
307
374
|
|
|
308
375
|
register(api) {
|
|
@@ -315,8 +382,14 @@ export default definePluginEntry({
|
|
|
315
382
|
// ensure a single instance survives across module reloads.
|
|
316
383
|
let globalState = getGlobalState();
|
|
317
384
|
if (!globalState) {
|
|
318
|
-
const store = new SurrealStore(config.surreal
|
|
319
|
-
|
|
385
|
+
const store = new SurrealStore(config.surreal, {
|
|
386
|
+
embeddingDimensions: config.embedding.dimensions,
|
|
387
|
+
});
|
|
388
|
+
const embeddings = createEmbeddingService(config.embedding);
|
|
389
|
+
// Tag every embedding write and filter every embedding search by this
|
|
390
|
+
// provider id, so vectors from different models (different vector
|
|
391
|
+
// spaces) never mix in the same HNSW result set.
|
|
392
|
+
store.setActiveProvider(embeddings.providerId);
|
|
320
393
|
// Build a CompleteFn using pi-ai directly since api.runtime.complete
|
|
321
394
|
// is not available in OpenClaw 2026.3.24 (unreleased feature).
|
|
322
395
|
const apiRef = api;
|
|
@@ -345,27 +418,22 @@ export default definePluginEntry({
|
|
|
345
418
|
}
|
|
346
419
|
piAi = await import(piAiPath);
|
|
347
420
|
}
|
|
348
|
-
// Fall back to calling pi-ai directly (runtime.complete not in OpenClaw 2026.3.24)
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
//
|
|
352
|
-
const
|
|
353
|
-
|
|
354
|
-
:
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
resolvedProvider = modelId.slice(0, idx);
|
|
361
|
-
modelId = modelId.slice(idx + 1);
|
|
362
|
-
}
|
|
363
|
-
const model = piAi!.getModel(resolvedProvider, modelId);
|
|
421
|
+
// Fall back to calling pi-ai directly (runtime.complete not in OpenClaw 2026.3.24).
|
|
422
|
+
// Fully-qualified OpenClaw refs (provider/model) are authoritative:
|
|
423
|
+
// stale runtime default providers must not override e.g.
|
|
424
|
+
// "openrouter/google/gemini-3-flash-preview".
|
|
425
|
+
const cfg = apiRef.runtime.config.loadConfig();
|
|
426
|
+
const resolved = resolveModelRef({
|
|
427
|
+
explicitProvider: params.provider,
|
|
428
|
+
explicitModel: params.model,
|
|
429
|
+
config: cfg,
|
|
430
|
+
runtimeDefaults: apiRef.runtime.agent.defaults,
|
|
431
|
+
});
|
|
432
|
+
const model = piAi!.getModel(resolved.provider, resolved.modelId);
|
|
364
433
|
if (!model) {
|
|
365
|
-
throw new Error(`Model "${modelId}" not found for provider "${provider}"`);
|
|
434
|
+
throw new Error(`Model "${resolved.modelId}" not found for provider "${resolved.provider}"`);
|
|
366
435
|
}
|
|
367
436
|
// Resolve auth via OpenClaw's runtime (handles profiles, env vars, etc.)
|
|
368
|
-
const cfg = apiRef.runtime.config.loadConfig();
|
|
369
437
|
const auth = await apiRef.runtime.modelAuth.getApiKeyForModel({ model, cfg });
|
|
370
438
|
// Build context
|
|
371
439
|
const now = Date.now();
|
|
@@ -379,7 +447,7 @@ export default definePluginEntry({
|
|
|
379
447
|
);
|
|
380
448
|
const context = { systemPrompt: params.system, messages };
|
|
381
449
|
// Pass apiKey directly in options so the provider can use it
|
|
382
|
-
log.info(`complete(): provider=${
|
|
450
|
+
log.info(`complete(): provider=${resolved.provider} model=${resolved.modelId} msgs=${params.messages.length}`);
|
|
383
451
|
// NOTE: outputFormat (structured output) is intentionally NOT passed to pi-ai.
|
|
384
452
|
// pi-ai's SimpleStreamOptions doesn't support it, and injecting it via onPayload
|
|
385
453
|
// causes the Anthropic API to return empty responses. The daemon's JSON parsing
|
|
@@ -419,10 +487,23 @@ export default definePluginEntry({
|
|
|
419
487
|
throw e;
|
|
420
488
|
}
|
|
421
489
|
|
|
422
|
-
// Initialize
|
|
490
|
+
// Initialize the embedding provider (no-op if already loaded)
|
|
423
491
|
try {
|
|
424
492
|
const freshEmbed = await embeddings.initialize();
|
|
425
|
-
if (freshEmbed)
|
|
493
|
+
if (freshEmbed) {
|
|
494
|
+
const detail = config.embedding.provider === "openai-compat"
|
|
495
|
+
? `${config.embedding.openaiCompat.baseURL} (${config.embedding.openaiCompat.model})`
|
|
496
|
+
: config.embedding.modelPath;
|
|
497
|
+
logger.info(`Embeddings initialized [${embeddings.providerId}]: ${detail}`);
|
|
498
|
+
// One-time check: warn if the DB has rows tagged with a different
|
|
499
|
+
// provider. PR-B's search-time filter prevents silent corruption,
|
|
500
|
+
// but those rows are now invisible to recall until they're
|
|
501
|
+
// re-embedded with the active provider.
|
|
502
|
+
checkEmbeddingProviderMismatch(store, embeddings.providerId, logger)
|
|
503
|
+
.catch(e => swallow.warn("factory:providerMismatchCheck", e));
|
|
504
|
+
checkEmbeddingDimensionMismatch(store, config.embedding.dimensions, logger)
|
|
505
|
+
.catch(e => swallow.warn("factory:dimensionMismatchCheck", e));
|
|
506
|
+
}
|
|
426
507
|
} catch (e) {
|
|
427
508
|
logger.warn(`Embeddings init failed — running in degraded mode: ${e}`);
|
|
428
509
|
}
|
package/src/memory-daemon.ts
CHANGED
|
@@ -350,7 +350,7 @@ export async function writeExtractionResults(
|
|
|
350
350
|
trigger_context: String(s.trigger_context ?? "").slice(0, 200),
|
|
351
351
|
tags: ["auto-extracted"],
|
|
352
352
|
session_id: sessionId,
|
|
353
|
-
...(emb ? { embedding: emb } : {}),
|
|
353
|
+
...(emb ? { embedding: emb, embedding_provider: embeddings.providerId } : {}),
|
|
354
354
|
},
|
|
355
355
|
},
|
|
356
356
|
);
|