kongbrain 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +47 -0
- package/README.github.md +53 -3
- package/README.md +29 -3
- package/README.npm.md +29 -3
- package/SKILL.md +1 -1
- package/bin/kongbrain-reembed.ts +143 -0
- package/openclaw.plugin.json +37 -7
- package/package.json +4 -1
- package/src/causal.ts +4 -1
- package/src/cognitive-bootstrap.ts +1 -0
- package/src/concept-extract.ts +4 -2
- package/src/config.ts +56 -10
- package/src/embeddings-openai.ts +232 -0
- package/src/embeddings.ts +48 -6
- package/src/identity.ts +2 -0
- package/src/index.ts +54 -5
- package/src/memory-daemon.ts +1 -1
- package/src/migrate-reembed.ts +305 -0
- package/src/reflection.ts +10 -4
- package/src/schema.surql +29 -0
- package/src/skills.ts +14 -5
- package/src/supersedes.ts +2 -1
- package/src/surreal.ts +77 -19
- package/src/workspace-migrate.ts +3 -0
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Re-embed migration: take rows tagged with one provider and rewrite their
|
|
3
|
+
* embeddings using another provider, updating the embedding_provider tag in
|
|
4
|
+
* the same UPDATE.
|
|
5
|
+
*
|
|
6
|
+
* Resumability: each table is processed in batches of `batchSize` rows
|
|
7
|
+
* matching `embedding_provider = $fromProvider`. After a batch is written,
|
|
8
|
+
* those rows no longer match the filter, so a subsequent run picks up from
|
|
9
|
+
* where the previous one stopped.
|
|
10
|
+
*
|
|
11
|
+
* The text re-embedded for each row is the canonical text field for that
|
|
12
|
+
* table (e.g. concept.content, turn.text). For tables where the original
|
|
13
|
+
* write site embedded a composed string (skill: "name: description"), we
|
|
14
|
+
* reproduce that composition here so the new vectors live in roughly the
|
|
15
|
+
* same conceptual neighborhood as the originals.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import type { EmbeddingService } from "./embeddings.js";
|
|
19
|
+
import type { SurrealStore } from "./surreal.js";
|
|
20
|
+
import { swallow } from "./errors.js";
|
|
21
|
+
|
|
22
|
+
/** Tables that store embeddings and need to participate in re-embed. */
|
|
23
|
+
export const VECTOR_TABLES = [
|
|
24
|
+
"turn",
|
|
25
|
+
"concept",
|
|
26
|
+
"memory",
|
|
27
|
+
"artifact",
|
|
28
|
+
"identity_chunk",
|
|
29
|
+
"skill",
|
|
30
|
+
"reflection",
|
|
31
|
+
"monologue",
|
|
32
|
+
] as const;
|
|
33
|
+
|
|
34
|
+
export type VectorTable = typeof VECTOR_TABLES[number];
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Per-table mapping from the row shape to the text that should be embedded.
|
|
38
|
+
* Different tables call their text field different things; skill composes
|
|
39
|
+
* its embedding text from name + description.
|
|
40
|
+
*/
|
|
41
|
+
type RowTextExtractor = (row: Record<string, unknown>) => string;
|
|
42
|
+
|
|
43
|
+
const TEXT_EXTRACTORS: Record<VectorTable, RowTextExtractor> = {
|
|
44
|
+
turn: r => String(r.text ?? ""),
|
|
45
|
+
concept: r => String(r.content ?? ""),
|
|
46
|
+
memory: r => String(r.text ?? ""),
|
|
47
|
+
artifact: r => {
|
|
48
|
+
// Match what workspace-migrate.ts does for content-rich artifacts when
|
|
49
|
+
// possible. When content is short, embed it; otherwise embed a header
|
|
50
|
+
// plus a content excerpt.
|
|
51
|
+
const description = String(r.description ?? "");
|
|
52
|
+
const content = String(r.content ?? "");
|
|
53
|
+
if (!content) return description;
|
|
54
|
+
if (content.length < 2000) return content;
|
|
55
|
+
return `${description}\n${content.slice(0, 1500)}`;
|
|
56
|
+
},
|
|
57
|
+
identity_chunk: r => String(r.text ?? ""),
|
|
58
|
+
// skills.ts embeds `${name}: ${description}` — preserve that.
|
|
59
|
+
skill: r => `${String(r.name ?? "")}: ${String(r.description ?? "")}`.trim(),
|
|
60
|
+
reflection: r => String(r.text ?? ""),
|
|
61
|
+
monologue: r => String(r.content ?? ""),
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
/** Fields a row must select for migration (per table). */
|
|
65
|
+
const SELECT_FIELDS: Record<VectorTable, string> = {
|
|
66
|
+
turn: "id, text",
|
|
67
|
+
concept: "id, content",
|
|
68
|
+
memory: "id, text",
|
|
69
|
+
artifact: "id, description, content",
|
|
70
|
+
identity_chunk: "id, text",
|
|
71
|
+
skill: "id, name, description",
|
|
72
|
+
reflection: "id, text",
|
|
73
|
+
monologue: "id, content",
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
export interface ReembedOptions {
|
|
77
|
+
/** Provider id rows should be migrated FROM. Required. */
|
|
78
|
+
fromProvider: string;
|
|
79
|
+
/** Provider id to migrate TO. Defaults to `embeddings.providerId`. */
|
|
80
|
+
toProvider?: string;
|
|
81
|
+
/** Tables to migrate. Defaults to all 8 vector tables. */
|
|
82
|
+
tables?: VectorTable[];
|
|
83
|
+
/** Rows fetched + embedded per batch. Default 256. */
|
|
84
|
+
batchSize?: number;
|
|
85
|
+
/** When true, count rows + estimate cost without writing anything. */
|
|
86
|
+
dryRun?: boolean;
|
|
87
|
+
/** Optional progress callback per batch. */
|
|
88
|
+
onProgress?: (event: ProgressEvent) => void;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export interface ProgressEvent {
|
|
92
|
+
table: VectorTable;
|
|
93
|
+
/** Rows processed in this batch. */
|
|
94
|
+
batchSize: number;
|
|
95
|
+
/** Cumulative rows processed for this table. */
|
|
96
|
+
tableProcessed: number;
|
|
97
|
+
/** Total rows (counted at start) for this table. */
|
|
98
|
+
tableTotal: number;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export interface ReembedResult {
|
|
102
|
+
/** Total rows updated (or counted, when dryRun). */
|
|
103
|
+
total: number;
|
|
104
|
+
/** Per-table breakdown. */
|
|
105
|
+
perTable: Record<VectorTable, number>;
|
|
106
|
+
/** Approximate input character count (sum of text lengths). */
|
|
107
|
+
approxChars: number;
|
|
108
|
+
/** Approximate input token count using a chars/4 heuristic. */
|
|
109
|
+
approxTokens: number;
|
|
110
|
+
/** True if no writes were performed. */
|
|
111
|
+
dryRun: boolean;
|
|
112
|
+
/** Wall clock duration in ms. */
|
|
113
|
+
durationMs: number;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Approximate token count using the chars/4 heuristic. Real tokenization
|
|
118
|
+
* varies by model; this estimate is good enough for cost ballparks.
|
|
119
|
+
*/
|
|
120
|
+
function approxTokenCount(chars: number): number {
|
|
121
|
+
return Math.ceil(chars / 4);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Migrate rows from one provider to another, batching reads + writes.
|
|
126
|
+
*
|
|
127
|
+
* The embedding service passed in MUST already be initialized and produce
|
|
128
|
+
* vectors in the target provider's space. The function does NOT switch
|
|
129
|
+
* providers itself — that is a config-time decision.
|
|
130
|
+
*/
|
|
131
|
+
export async function reembedAll(
|
|
132
|
+
store: SurrealStore,
|
|
133
|
+
embeddings: EmbeddingService,
|
|
134
|
+
opts: ReembedOptions,
|
|
135
|
+
): Promise<ReembedResult> {
|
|
136
|
+
const startedAt = Date.now();
|
|
137
|
+
const tables: VectorTable[] = opts.tables ?? [...VECTOR_TABLES];
|
|
138
|
+
const batchSize = opts.batchSize ?? 256;
|
|
139
|
+
const fromProvider = opts.fromProvider;
|
|
140
|
+
const toProvider = opts.toProvider ?? embeddings.providerId;
|
|
141
|
+
const dryRun = opts.dryRun ?? false;
|
|
142
|
+
|
|
143
|
+
if (fromProvider === toProvider) {
|
|
144
|
+
throw new Error(
|
|
145
|
+
`reembedAll: fromProvider (${fromProvider}) and toProvider (${toProvider}) are identical — nothing to do.`,
|
|
146
|
+
);
|
|
147
|
+
}
|
|
148
|
+
if (!dryRun && !embeddings.isAvailable()) {
|
|
149
|
+
throw new Error("reembedAll: embedding service is not initialized.");
|
|
150
|
+
}
|
|
151
|
+
if (!store.isAvailable()) {
|
|
152
|
+
throw new Error("reembedAll: SurrealStore is not initialized.");
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const perTable: Record<VectorTable, number> = Object.fromEntries(
|
|
156
|
+
VECTOR_TABLES.map(t => [t, 0]),
|
|
157
|
+
) as Record<VectorTable, number>;
|
|
158
|
+
let approxChars = 0;
|
|
159
|
+
|
|
160
|
+
for (const table of tables) {
|
|
161
|
+
// Count the rows we'll touch up front so onProgress can report
|
|
162
|
+
// progress against a total. Cheap with the embedding_provider index.
|
|
163
|
+
const countRows = await store.queryFirst<{ count: number }>(
|
|
164
|
+
`SELECT count() AS count FROM ${table}
|
|
165
|
+
WHERE embedding != NONE AND embedding_provider = $provider
|
|
166
|
+
GROUP ALL`,
|
|
167
|
+
{ provider: fromProvider },
|
|
168
|
+
);
|
|
169
|
+
const tableTotal = Number(countRows[0]?.count ?? 0);
|
|
170
|
+
if (tableTotal === 0) continue;
|
|
171
|
+
|
|
172
|
+
let tableProcessed = 0;
|
|
173
|
+
while (true) {
|
|
174
|
+
const rows = await store.queryFirst<Record<string, unknown>>(
|
|
175
|
+
`SELECT ${SELECT_FIELDS[table]} FROM ${table}
|
|
176
|
+
WHERE embedding != NONE AND embedding_provider = $provider
|
|
177
|
+
LIMIT $lim`,
|
|
178
|
+
{ provider: fromProvider, lim: batchSize },
|
|
179
|
+
);
|
|
180
|
+
if (rows.length === 0) break;
|
|
181
|
+
|
|
182
|
+
const extract = TEXT_EXTRACTORS[table];
|
|
183
|
+
const texts = rows.map(r => extract(r));
|
|
184
|
+
// Skip blanks: nothing useful to embed, but we still need to flip
|
|
185
|
+
// the provider tag so the row stops matching the FROM filter and
|
|
186
|
+
// the loop terminates. We set embedding to NONE to keep it out of
|
|
187
|
+
// the index entirely.
|
|
188
|
+
const blankIndices: number[] = [];
|
|
189
|
+
const realIndices: number[] = [];
|
|
190
|
+
const realTexts: string[] = [];
|
|
191
|
+
for (let i = 0; i < rows.length; i++) {
|
|
192
|
+
const t = texts[i];
|
|
193
|
+
if (!t || t.trim().length === 0) blankIndices.push(i);
|
|
194
|
+
else { realIndices.push(i); realTexts.push(t); }
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Sum chars before any embed call so dry-run accumulates the same
|
|
198
|
+
// way as the real run.
|
|
199
|
+
for (const t of realTexts) approxChars += t.length;
|
|
200
|
+
|
|
201
|
+
if (dryRun) {
|
|
202
|
+
// Count and continue without writing.
|
|
203
|
+
tableProcessed += rows.length;
|
|
204
|
+
perTable[table] = tableProcessed;
|
|
205
|
+
opts.onProgress?.({
|
|
206
|
+
table,
|
|
207
|
+
batchSize: rows.length,
|
|
208
|
+
tableProcessed,
|
|
209
|
+
tableTotal,
|
|
210
|
+
});
|
|
211
|
+
// In dry-run we cannot move past this batch (we did not flip
|
|
212
|
+
// provider tags), so break after recording the first batch's
|
|
213
|
+
// count and rely on the up-front count() instead.
|
|
214
|
+
perTable[table] = tableTotal;
|
|
215
|
+
break;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Real run: embed in one batched call (provider implementations
|
|
219
|
+
// chunk internally if needed).
|
|
220
|
+
let vecs: number[][] = [];
|
|
221
|
+
if (realTexts.length > 0) {
|
|
222
|
+
vecs = await embeddings.embedBatch(realTexts);
|
|
223
|
+
if (vecs.length !== realTexts.length) {
|
|
224
|
+
throw new Error(
|
|
225
|
+
`reembedAll[${table}]: embedBatch returned ${vecs.length} vectors for ${realTexts.length} inputs.`,
|
|
226
|
+
);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Write back: one UPDATE per row. Could be batched into a single
|
|
231
|
+
// queryBatch call for speed, but the simpler form is easier to
|
|
232
|
+
// reason about for resumability and is bounded by batchSize.
|
|
233
|
+
for (let j = 0; j < realIndices.length; j++) {
|
|
234
|
+
const row = rows[realIndices[j]];
|
|
235
|
+
const id = String(row.id);
|
|
236
|
+
try {
|
|
237
|
+
await store.queryExec(
|
|
238
|
+
`UPDATE ${id} SET embedding = $emb, embedding_provider = $provider`,
|
|
239
|
+
{ emb: vecs[j], provider: toProvider },
|
|
240
|
+
);
|
|
241
|
+
} catch (e) {
|
|
242
|
+
swallow.warn(`reembed:update:${table}`, e);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
// Blank-text rows: drop the embedding and flip the tag so they
|
|
246
|
+
// exit the FROM filter (otherwise we loop forever).
|
|
247
|
+
for (const idx of blankIndices) {
|
|
248
|
+
const row = rows[idx];
|
|
249
|
+
const id = String(row.id);
|
|
250
|
+
try {
|
|
251
|
+
await store.queryExec(
|
|
252
|
+
`UPDATE ${id} SET embedding = NONE, embedding_provider = NONE`,
|
|
253
|
+
);
|
|
254
|
+
} catch (e) {
|
|
255
|
+
swallow.warn(`reembed:blank:${table}`, e);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
tableProcessed += rows.length;
|
|
260
|
+
perTable[table] = tableProcessed;
|
|
261
|
+
opts.onProgress?.({
|
|
262
|
+
table,
|
|
263
|
+
batchSize: rows.length,
|
|
264
|
+
tableProcessed,
|
|
265
|
+
tableTotal,
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
// Loop again unless the batch was undersized (no more to do).
|
|
269
|
+
if (rows.length < batchSize) break;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
const total = Object.values(perTable).reduce((a, b) => a + b, 0);
|
|
274
|
+
return {
|
|
275
|
+
total,
|
|
276
|
+
perTable,
|
|
277
|
+
approxChars,
|
|
278
|
+
approxTokens: approxTokenCount(approxChars),
|
|
279
|
+
dryRun,
|
|
280
|
+
durationMs: Date.now() - startedAt,
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Format a result for human display. Used by the CLI; exposed so callers
|
|
286
|
+
* who embed the migrator into their own UIs can reuse the formatting.
|
|
287
|
+
*/
|
|
288
|
+
export function formatResult(result: ReembedResult, toProvider: string): string {
|
|
289
|
+
const lines: string[] = [];
|
|
290
|
+
lines.push(result.dryRun ? "DRY RUN — no writes performed." : "Migration complete.");
|
|
291
|
+
lines.push(`Target provider: ${toProvider}`);
|
|
292
|
+
lines.push(`Rows ${result.dryRun ? "to be migrated" : "migrated"}: ${result.total}`);
|
|
293
|
+
for (const t of VECTOR_TABLES) {
|
|
294
|
+
const n = result.perTable[t];
|
|
295
|
+
if (n > 0) lines.push(` ${t}: ${n}`);
|
|
296
|
+
}
|
|
297
|
+
lines.push(`Approx input: ${result.approxChars.toLocaleString()} chars (~${result.approxTokens.toLocaleString()} tokens)`);
|
|
298
|
+
// text-embedding-3-small is $0.02/1M tokens; -3-large is $0.13/1M.
|
|
299
|
+
// We don't know which model the caller is using, so report both.
|
|
300
|
+
const small = (result.approxTokens / 1_000_000) * 0.02;
|
|
301
|
+
const large = (result.approxTokens / 1_000_000) * 0.13;
|
|
302
|
+
lines.push(`Estimated cost: $${small.toFixed(4)} (text-embedding-3-small) | $${large.toFixed(4)} (text-embedding-3-large)`);
|
|
303
|
+
lines.push(`Duration: ${(result.durationMs / 1000).toFixed(2)}s`);
|
|
304
|
+
return lines.join("\n");
|
|
305
|
+
}
|
package/src/reflection.ts
CHANGED
|
@@ -179,15 +179,17 @@ export async function generateReflection(
|
|
|
179
179
|
try { reflEmb = await embeddings.embed(reflectionText); } catch (e) { swallow("reflection:ok", e); }
|
|
180
180
|
}
|
|
181
181
|
|
|
182
|
-
// Dedup: skip if a very similar reflection already exists
|
|
182
|
+
// Dedup: skip if a very similar reflection already exists. Filter by
|
|
183
|
+
// provider so we don't dedup against vectors in a different space.
|
|
183
184
|
if (reflEmb?.length) {
|
|
184
185
|
const existing = await store.queryFirst<{ id: string; importance: number; score: number }>(
|
|
185
186
|
`SELECT id, importance,
|
|
186
187
|
vector::similarity::cosine(embedding, $vec) AS score
|
|
187
188
|
FROM reflection
|
|
188
189
|
WHERE embedding != NONE AND array::len(embedding) > 0
|
|
190
|
+
AND embedding_provider = $provider
|
|
189
191
|
ORDER BY score DESC LIMIT 1`,
|
|
190
|
-
{ vec: reflEmb },
|
|
192
|
+
{ vec: reflEmb, provider: embeddings.providerId },
|
|
191
193
|
);
|
|
192
194
|
const top = existing[0];
|
|
193
195
|
if (top && typeof top.score === "number" && top.score > 0.85) {
|
|
@@ -207,7 +209,10 @@ export async function generateReflection(
|
|
|
207
209
|
severity,
|
|
208
210
|
importance: 7.0,
|
|
209
211
|
};
|
|
210
|
-
if (reflEmb?.length)
|
|
212
|
+
if (reflEmb?.length) {
|
|
213
|
+
record.embedding = reflEmb;
|
|
214
|
+
record.embedding_provider = embeddings.providerId;
|
|
215
|
+
}
|
|
211
216
|
|
|
212
217
|
const rows = await store.queryFirst<{ id: string }>(
|
|
213
218
|
`CREATE reflection CONTENT $record RETURN id`,
|
|
@@ -242,8 +247,9 @@ export async function retrieveReflections(
|
|
|
242
247
|
vector::similarity::cosine(embedding, $vec) AS score
|
|
243
248
|
FROM reflection
|
|
244
249
|
WHERE embedding != NONE AND array::len(embedding) > 0
|
|
250
|
+
AND embedding_provider = $provider
|
|
245
251
|
ORDER BY score DESC LIMIT $lim`,
|
|
246
|
-
{ vec: queryVec, lim: limit },
|
|
252
|
+
{ vec: queryVec, lim: limit, provider: store.getActiveProvider() },
|
|
247
253
|
);
|
|
248
254
|
|
|
249
255
|
return rows
|
package/src/schema.surql
CHANGED
|
@@ -39,10 +39,12 @@ DEFINE FIELD IF NOT EXISTS type ON artifact TYPE string;
|
|
|
39
39
|
DEFINE FIELD IF NOT EXISTS description ON artifact TYPE option<string>;
|
|
40
40
|
DEFINE FIELD IF NOT EXISTS content_hash ON artifact TYPE option<string>;
|
|
41
41
|
DEFINE FIELD IF NOT EXISTS embedding ON artifact TYPE option<array<float>>;
|
|
42
|
+
DEFINE FIELD IF NOT EXISTS embedding_provider ON artifact TYPE option<string>;
|
|
42
43
|
DEFINE FIELD IF NOT EXISTS tags ON artifact TYPE option<array>;
|
|
43
44
|
DEFINE FIELD IF NOT EXISTS created_at ON artifact TYPE datetime DEFAULT time::now();
|
|
44
45
|
DEFINE INDEX IF NOT EXISTS artifact_vec_idx ON artifact FIELDS embedding HNSW DIMENSION 1024 DIST COSINE;
|
|
45
46
|
DEFINE INDEX IF NOT EXISTS artifact_type_idx ON artifact FIELDS type;
|
|
47
|
+
DEFINE INDEX IF NOT EXISTS artifact_emb_provider_idx ON artifact FIELDS embedding_provider;
|
|
46
48
|
|
|
47
49
|
-- ============================================================
|
|
48
50
|
-- PILLAR 5: Concept (semantic knowledge nodes)
|
|
@@ -52,6 +54,7 @@ DEFINE TABLE IF NOT EXISTS concept SCHEMALESS;
|
|
|
52
54
|
UPDATE concept SET content = name WHERE content = NONE AND name != NONE;
|
|
53
55
|
DEFINE FIELD IF NOT EXISTS content ON concept TYPE string;
|
|
54
56
|
DEFINE FIELD IF NOT EXISTS embedding ON concept TYPE option<array<float>>;
|
|
57
|
+
DEFINE FIELD IF NOT EXISTS embedding_provider ON concept TYPE option<string>;
|
|
55
58
|
DEFINE FIELD IF NOT EXISTS stability ON concept TYPE float DEFAULT 1.0;
|
|
56
59
|
DEFINE FIELD IF NOT EXISTS confidence ON concept TYPE float DEFAULT 1.0;
|
|
57
60
|
DEFINE FIELD IF NOT EXISTS access_count ON concept TYPE int DEFAULT 0;
|
|
@@ -60,6 +63,7 @@ DEFINE FIELD IF NOT EXISTS source ON concept TYPE option<string>;
|
|
|
60
63
|
DEFINE FIELD IF NOT EXISTS created_at ON concept TYPE datetime DEFAULT time::now();
|
|
61
64
|
DEFINE FIELD IF NOT EXISTS last_accessed ON concept TYPE option<datetime>;
|
|
62
65
|
DEFINE INDEX IF NOT EXISTS concept_vec_idx ON concept FIELDS embedding HNSW DIMENSION 1024 DIST COSINE;
|
|
66
|
+
DEFINE INDEX IF NOT EXISTS concept_emb_provider_idx ON concept FIELDS embedding_provider;
|
|
63
67
|
|
|
64
68
|
-- ============================================================
|
|
65
69
|
-- Conversation turns (the workhorse table)
|
|
@@ -71,6 +75,7 @@ DEFINE FIELD IF NOT EXISTS text ON turn TYPE string;
|
|
|
71
75
|
-- Migration: ensure embedding is optional (SurrealDB 3.0 HNSW requires this for nullable embeddings)
|
|
72
76
|
REMOVE FIELD IF EXISTS embedding ON turn;
|
|
73
77
|
DEFINE FIELD embedding ON turn TYPE option<array<float>>;
|
|
78
|
+
DEFINE FIELD IF NOT EXISTS embedding_provider ON turn TYPE option<string>;
|
|
74
79
|
DEFINE FIELD IF NOT EXISTS timestamp ON turn TYPE datetime DEFAULT time::now();
|
|
75
80
|
DEFINE FIELD IF NOT EXISTS created_at ON turn TYPE datetime DEFAULT time::now();
|
|
76
81
|
DEFINE FIELD IF NOT EXISTS token_count ON turn TYPE option<int>;
|
|
@@ -80,6 +85,7 @@ DEFINE FIELD IF NOT EXISTS usage ON turn TYPE option<object>;
|
|
|
80
85
|
DEFINE INDEX IF NOT EXISTS turn_vec_idx ON turn FIELDS embedding HNSW DIMENSION 1024 DIST COSINE;
|
|
81
86
|
DEFINE INDEX IF NOT EXISTS turn_session_idx ON turn FIELDS session_id;
|
|
82
87
|
DEFINE INDEX IF NOT EXISTS turn_tool_name_idx ON turn FIELDS tool_name;
|
|
88
|
+
DEFINE INDEX IF NOT EXISTS turn_emb_provider_idx ON turn FIELDS embedding_provider;
|
|
83
89
|
|
|
84
90
|
-- Identity chunks (agent persona / identity)
|
|
85
91
|
DEFINE TABLE IF NOT EXISTS identity_chunk SCHEMALESS;
|
|
@@ -88,8 +94,10 @@ DEFINE FIELD IF NOT EXISTS source ON identity_chunk TYPE string;
|
|
|
88
94
|
DEFINE FIELD IF NOT EXISTS chunk_index ON identity_chunk TYPE int;
|
|
89
95
|
DEFINE FIELD IF NOT EXISTS text ON identity_chunk TYPE string;
|
|
90
96
|
DEFINE FIELD IF NOT EXISTS embedding ON identity_chunk TYPE option<array<float>>;
|
|
97
|
+
DEFINE FIELD IF NOT EXISTS embedding_provider ON identity_chunk TYPE option<string>;
|
|
91
98
|
DEFINE FIELD IF NOT EXISTS importance ON identity_chunk TYPE float DEFAULT 0.5;
|
|
92
99
|
DEFINE INDEX IF NOT EXISTS identity_vec_idx ON identity_chunk FIELDS embedding HNSW DIMENSION 1024 DIST COSINE;
|
|
100
|
+
DEFINE INDEX IF NOT EXISTS identity_emb_provider_idx ON identity_chunk FIELDS embedding_provider;
|
|
93
101
|
|
|
94
102
|
-- Sessions (lightweight, links to task for 5-pillar integration)
|
|
95
103
|
DEFINE TABLE IF NOT EXISTS session SCHEMALESS;
|
|
@@ -106,6 +114,7 @@ DEFINE FIELD IF NOT EXISTS cleanup_completed ON session TYPE bool DEFAULT false;
|
|
|
106
114
|
DEFINE TABLE IF NOT EXISTS memory SCHEMALESS;
|
|
107
115
|
DEFINE FIELD IF NOT EXISTS text ON memory TYPE string;
|
|
108
116
|
DEFINE FIELD IF NOT EXISTS embedding ON memory TYPE option<array<float>>;
|
|
117
|
+
DEFINE FIELD IF NOT EXISTS embedding_provider ON memory TYPE option<string>;
|
|
109
118
|
DEFINE FIELD IF NOT EXISTS importance ON memory TYPE float DEFAULT 0.5;
|
|
110
119
|
DEFINE FIELD IF NOT EXISTS confidence ON memory TYPE float DEFAULT 1.0;
|
|
111
120
|
DEFINE FIELD IF NOT EXISTS access_count ON memory TYPE int DEFAULT 0;
|
|
@@ -118,6 +127,7 @@ DEFINE FIELD IF NOT EXISTS resolved_at ON memory TYPE option<datetime>;
|
|
|
118
127
|
DEFINE FIELD IF NOT EXISTS resolved_by ON memory TYPE option<string>;
|
|
119
128
|
DEFINE INDEX IF NOT EXISTS memory_vec_idx ON memory FIELDS embedding HNSW DIMENSION 1024 DIST COSINE;
|
|
120
129
|
DEFINE INDEX IF NOT EXISTS memory_category_idx ON memory FIELDS category;
|
|
130
|
+
DEFINE INDEX IF NOT EXISTS memory_emb_provider_idx ON memory FIELDS embedding_provider;
|
|
121
131
|
|
|
122
132
|
-- ============================================================
|
|
123
133
|
-- GRAPH EDGES: Turn-level
|
|
@@ -257,6 +267,7 @@ DEFINE TABLE IF NOT EXISTS skill SCHEMALESS;
|
|
|
257
267
|
DEFINE FIELD IF NOT EXISTS name ON skill TYPE string;
|
|
258
268
|
DEFINE FIELD IF NOT EXISTS description ON skill TYPE string;
|
|
259
269
|
DEFINE FIELD IF NOT EXISTS embedding ON skill TYPE option<array<float>>;
|
|
270
|
+
DEFINE FIELD IF NOT EXISTS embedding_provider ON skill TYPE option<string>;
|
|
260
271
|
DEFINE FIELD IF NOT EXISTS preconditions ON skill TYPE option<string>;
|
|
261
272
|
DEFINE FIELD IF NOT EXISTS steps ON skill TYPE option<array>;
|
|
262
273
|
DEFINE FIELD IF NOT EXISTS postconditions ON skill TYPE option<string>;
|
|
@@ -267,6 +278,7 @@ DEFINE FIELD IF NOT EXISTS last_used ON skill TYPE option<datetime>;
|
|
|
267
278
|
DEFINE FIELD IF NOT EXISTS created_at ON skill TYPE datetime DEFAULT time::now();
|
|
268
279
|
DEFINE INDEX IF NOT EXISTS skill_vec_idx ON skill FIELDS embedding HNSW DIMENSION 1024 DIST COSINE;
|
|
269
280
|
DEFINE INDEX IF NOT EXISTS skill_active_idx ON skill FIELDS active;
|
|
281
|
+
DEFINE INDEX IF NOT EXISTS skill_emb_provider_idx ON skill FIELDS embedding_provider;
|
|
270
282
|
|
|
271
283
|
DEFINE TABLE IF NOT EXISTS skill_from_task TYPE RELATION IN skill OUT task;
|
|
272
284
|
DEFINE TABLE IF NOT EXISTS skill_uses_concept TYPE RELATION IN skill OUT concept;
|
|
@@ -278,12 +290,14 @@ DEFINE TABLE IF NOT EXISTS reflection SCHEMALESS;
|
|
|
278
290
|
DEFINE FIELD IF NOT EXISTS session_id ON reflection TYPE string;
|
|
279
291
|
DEFINE FIELD IF NOT EXISTS text ON reflection TYPE string;
|
|
280
292
|
DEFINE FIELD IF NOT EXISTS embedding ON reflection TYPE option<array<float>>;
|
|
293
|
+
DEFINE FIELD IF NOT EXISTS embedding_provider ON reflection TYPE option<string>;
|
|
281
294
|
DEFINE FIELD IF NOT EXISTS category ON reflection TYPE string DEFAULT "efficiency"; -- failure_pattern | efficiency | approach_strategy
|
|
282
295
|
DEFINE FIELD IF NOT EXISTS severity ON reflection TYPE string DEFAULT "minor"; -- minor | moderate | critical
|
|
283
296
|
DEFINE FIELD IF NOT EXISTS importance ON reflection TYPE float DEFAULT 7.0;
|
|
284
297
|
DEFINE FIELD IF NOT EXISTS access_count ON reflection TYPE int DEFAULT 0;
|
|
285
298
|
DEFINE FIELD IF NOT EXISTS created_at ON reflection TYPE datetime DEFAULT time::now();
|
|
286
299
|
DEFINE INDEX IF NOT EXISTS reflection_vec_idx ON reflection FIELDS embedding HNSW DIMENSION 1024 DIST COSINE;
|
|
300
|
+
DEFINE INDEX IF NOT EXISTS reflection_emb_provider_idx ON reflection FIELDS embedding_provider;
|
|
287
301
|
|
|
288
302
|
DEFINE TABLE IF NOT EXISTS reflects_on TYPE RELATION IN reflection OUT session;
|
|
289
303
|
|
|
@@ -329,10 +343,12 @@ DEFINE FIELD IF NOT EXISTS session_id ON monologue TYPE string;
|
|
|
329
343
|
DEFINE FIELD IF NOT EXISTS category ON monologue TYPE string;
|
|
330
344
|
DEFINE FIELD IF NOT EXISTS content ON monologue TYPE string;
|
|
331
345
|
DEFINE FIELD IF NOT EXISTS embedding ON monologue TYPE option<array<float>>;
|
|
346
|
+
DEFINE FIELD IF NOT EXISTS embedding_provider ON monologue TYPE option<string>;
|
|
332
347
|
DEFINE FIELD IF NOT EXISTS timestamp ON monologue TYPE datetime DEFAULT time::now();
|
|
333
348
|
DEFINE INDEX IF NOT EXISTS monologue_session ON monologue FIELDS session_id;
|
|
334
349
|
DEFINE INDEX IF NOT EXISTS monologue_category ON monologue FIELDS category;
|
|
335
350
|
DEFINE INDEX IF NOT EXISTS monologue_vec_idx ON monologue FIELDS embedding HNSW DIMENSION 1024 DIST COSINE;
|
|
351
|
+
DEFINE INDEX IF NOT EXISTS monologue_emb_provider_idx ON monologue FIELDS embedding_provider;
|
|
336
352
|
|
|
337
353
|
-- Fibonacci resurfacing: proactive memory that fades over time
|
|
338
354
|
-- Memories flagged as surfaceable get a next_surface_at timestamp.
|
|
@@ -384,3 +400,16 @@ DEFINE FIELD IF NOT EXISTS created_at ON graduation_event TYPE datetime DEFAULT
|
|
|
384
400
|
-- 768d → 1024d migration completed; REMOVE INDEX / UPDATE stale
|
|
385
401
|
-- embeddings removed to avoid destroying live HNSW indexes on
|
|
386
402
|
-- every startup.
|
|
403
|
+
|
|
404
|
+
-- embedding_provider backfill: any existing row with an embedding but no
|
|
405
|
+
-- provider tag was written by the original local BGE-M3 model. This is
|
|
406
|
+
-- idempotent (the WHERE clause skips already-tagged rows) so it is safe
|
|
407
|
+
-- to leave running on every startup.
|
|
408
|
+
UPDATE turn SET embedding_provider = "local-bge-m3" WHERE embedding != NONE AND embedding_provider = NONE;
|
|
409
|
+
UPDATE concept SET embedding_provider = "local-bge-m3" WHERE embedding != NONE AND embedding_provider = NONE;
|
|
410
|
+
UPDATE memory SET embedding_provider = "local-bge-m3" WHERE embedding != NONE AND embedding_provider = NONE;
|
|
411
|
+
UPDATE artifact SET embedding_provider = "local-bge-m3" WHERE embedding != NONE AND embedding_provider = NONE;
|
|
412
|
+
UPDATE identity_chunk SET embedding_provider = "local-bge-m3" WHERE embedding != NONE AND embedding_provider = NONE;
|
|
413
|
+
UPDATE skill SET embedding_provider = "local-bge-m3" WHERE embedding != NONE AND embedding_provider = NONE;
|
|
414
|
+
UPDATE reflection SET embedding_provider = "local-bge-m3" WHERE embedding != NONE AND embedding_provider = NONE;
|
|
415
|
+
UPDATE monologue SET embedding_provider = "local-bge-m3" WHERE embedding != NONE AND embedding_provider = NONE;
|
package/src/skills.ts
CHANGED
|
@@ -127,7 +127,10 @@ export async function extractSkill(
|
|
|
127
127
|
confidence: 1.0,
|
|
128
128
|
active: true,
|
|
129
129
|
};
|
|
130
|
-
if (skillEmb?.length)
|
|
130
|
+
if (skillEmb?.length) {
|
|
131
|
+
record.embedding = skillEmb;
|
|
132
|
+
record.embedding_provider = embeddings.providerId;
|
|
133
|
+
}
|
|
131
134
|
|
|
132
135
|
const rows = await store.queryFirst<{ id: string }>(
|
|
133
136
|
`CREATE skill CONTENT $record RETURN id`,
|
|
@@ -170,8 +173,9 @@ export async function supersedeOldSkills(
|
|
|
170
173
|
WHERE id != $sid
|
|
171
174
|
AND (active = NONE OR active = true)
|
|
172
175
|
AND embedding != NONE AND array::len(embedding) > 0
|
|
176
|
+
AND embedding_provider = $provider
|
|
173
177
|
ORDER BY score DESC LIMIT 5`,
|
|
174
|
-
{ vec: newEmb, sid: newSkillId },
|
|
178
|
+
{ vec: newEmb, sid: newSkillId, provider: store.getActiveProvider() },
|
|
175
179
|
);
|
|
176
180
|
for (const row of rows) {
|
|
177
181
|
if ((row.score ?? 0) >= 0.82) {
|
|
@@ -204,9 +208,11 @@ export async function findRelevantSkills(
|
|
|
204
208
|
avg_duration_ms AS avgDurationMs,
|
|
205
209
|
vector::similarity::cosine(embedding, $vec) AS score
|
|
206
210
|
FROM skill
|
|
207
|
-
WHERE embedding != NONE AND array::len(embedding) > 0
|
|
211
|
+
WHERE embedding != NONE AND array::len(embedding) > 0
|
|
212
|
+
AND embedding_provider = $provider
|
|
213
|
+
AND (active = NONE OR active = true)
|
|
208
214
|
ORDER BY score DESC LIMIT $lim`,
|
|
209
|
-
{ vec: queryVec, lim: limit },
|
|
215
|
+
{ vec: queryVec, lim: limit, provider: store.getActiveProvider() },
|
|
210
216
|
);
|
|
211
217
|
|
|
212
218
|
return rows
|
|
@@ -345,7 +351,10 @@ export async function graduateCausalToSkills(
|
|
|
345
351
|
confidence: 1.0,
|
|
346
352
|
active: true,
|
|
347
353
|
};
|
|
348
|
-
if (skillEmb?.length)
|
|
354
|
+
if (skillEmb?.length) {
|
|
355
|
+
record.embedding = skillEmb;
|
|
356
|
+
record.embedding_provider = embeddings.providerId;
|
|
357
|
+
}
|
|
349
358
|
|
|
350
359
|
const rows = await store.queryFirst<{ id: string }>(
|
|
351
360
|
`CREATE skill CONTENT $record RETURN id`,
|
package/src/supersedes.ts
CHANGED
|
@@ -61,11 +61,12 @@ export async function linkSupersedesEdges(
|
|
|
61
61
|
`SELECT id, vector::similarity::cosine(embedding, $vec) AS score, stability
|
|
62
62
|
FROM concept
|
|
63
63
|
WHERE embedding != NONE AND array::len(embedding) > 0
|
|
64
|
+
AND embedding_provider = $provider
|
|
64
65
|
AND superseded_at IS NONE
|
|
65
66
|
AND stability > $floor
|
|
66
67
|
ORDER BY score DESC
|
|
67
68
|
LIMIT 5`,
|
|
68
|
-
{ vec: originalVec, floor: STABILITY_FLOOR },
|
|
69
|
+
{ vec: originalVec, floor: STABILITY_FLOOR, provider: embeddings.providerId },
|
|
69
70
|
);
|
|
70
71
|
|
|
71
72
|
for (const candidate of candidates) {
|