kongbrain 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -51,11 +51,12 @@ import { graduateCausalToSkills } from "./skills.js";
51
51
  import { attemptGraduation, evolveSoul, checkStageTransition } from "./soul.js";
52
52
  import { swallow } from "./errors.js";
53
53
 
54
+ /** OpenClaw ContextEngine backed by SurrealDB graph retrieval and BGE-M3 embeddings. */
54
55
  export class KongBrainContextEngine implements ContextEngine {
55
56
  readonly info: ContextEngineInfo = {
56
57
  id: "kongbrain",
57
58
  name: "KongBrain",
58
- version: "0.1.2",
59
+ version: "0.4.2",
59
60
  ownsCompaction: true,
60
61
  };
61
62
 
@@ -63,6 +64,7 @@ export class KongBrainContextEngine implements ContextEngine {
63
64
 
64
65
  // ── Bootstrap ──────────────────────────────────────────────────────────
65
66
 
67
+ /** Initialize schema, create 5-pillar graph nodes, and start the memory daemon. */
66
68
  async bootstrap(params: {
67
69
  sessionId: string;
68
70
  sessionKey?: string;
@@ -139,6 +141,7 @@ export class KongBrainContextEngine implements ContextEngine {
139
141
 
140
142
  // ── Assemble ───────────────────────────────────────────────────────────
141
143
 
144
+ /** Build the context window: graph retrieval + system prompt additions + budget trimming. */
142
145
  async assemble(params: {
143
146
  sessionId: string;
144
147
  sessionKey?: string;
@@ -173,26 +176,22 @@ export class KongBrainContextEngine implements ContextEngine {
173
176
  if (systemPromptSection) additions.push(systemPromptSection);
174
177
 
175
178
  // Compaction summary (claw-code: compact.rs structured signals — inject once after compaction)
176
- const compactionSummary = (session as any)._compactionSummary as string | undefined;
179
+ const compactionSummary = session._compactionSummary;
177
180
  if (compactionSummary) {
178
181
  additions.push("[POST-COMPACTION CONTEXT]\n" + compactionSummary);
179
- delete (session as any)._compactionSummary;
182
+ session._compactionSummary = undefined;
180
183
  }
181
184
 
182
185
  // Wakeup briefing (synthesized at session start, may still be in-flight)
183
- const wakeupPromise = (session as any)._wakeupPromise as Promise<string | null> | undefined;
186
+ const wakeupPromise = session._wakeupPromise;
184
187
  if (wakeupPromise) {
185
188
  const wakeupBriefing = await wakeupPromise;
186
- delete (session as any)._wakeupPromise; // Only inject once
189
+ session._wakeupPromise = undefined; // Only inject once
187
190
  if (wakeupBriefing) additions.push(wakeupBriefing);
188
191
  }
189
192
 
190
193
  // Graduation celebration — tell the agent it just graduated so it can share with the user
191
- const graduation = (session as any)._graduationCelebration as {
192
- qualityScore: number;
193
- volumeScore: number;
194
- soulSummary: string;
195
- } | undefined;
194
+ const graduation = session._graduationCelebration;
196
195
  if (graduation) {
197
196
  let graduationBlock =
198
197
  "[SOUL GRADUATION — CELEBRATE WITH THE USER]\n" +
@@ -211,11 +210,11 @@ export class KongBrainContextEngine implements ContextEngine {
211
210
  "identity emerging from YOUR experience. Don't be robotic about it. This only happens once.";
212
211
 
213
212
  additions.push(graduationBlock);
214
- delete (session as any)._graduationCelebration; // Only inject once
213
+ session._graduationCelebration = undefined; // Only inject once
215
214
  }
216
215
 
217
216
  // Migration nudge — tell the agent there are workspace files to offer migrating
218
- if ((session as any)._hasMigratableFiles) {
217
+ if (session._hasMigratableFiles) {
219
218
  additions.push(
220
219
  "[MIGRATION AVAILABLE] This workspace has files from the default context engine " +
221
220
  "(IDENTITY.md, MEMORY.md, skills/, etc.). You can offer to migrate them into the graph " +
@@ -226,15 +225,31 @@ export class KongBrainContextEngine implements ContextEngine {
226
225
  );
227
226
  }
228
227
 
228
+ // Apply SPA priority budget — drop lowest-priority sections if over budget
229
+ // (dropped sections aren't lost — they're in the graph, retrievable on demand)
230
+ const BYTES_PER_TOKEN = 4; // claw-code: roughTokenCountEstimation default
231
+ const SPA_BUDGET_CHARS = Math.round(contextWindow * 0.08 * BYTES_PER_TOKEN);
232
+ let spaTotalChars = 0;
233
+ const keptAdditions: string[] = [];
234
+ for (const section of additions) { // additions are already in priority order
235
+ if (spaTotalChars + section.length > SPA_BUDGET_CHARS && keptAdditions.length > 0) break;
236
+ keptAdditions.push(section);
237
+ spaTotalChars += section.length;
238
+ }
239
+
240
+ const spaText = keptAdditions.length > 0 ? keptAdditions.join("\n\n") : undefined;
241
+ const spaTokens = spaText ? Math.ceil(spaText.length / BYTES_PER_TOKEN) : 0;
242
+
229
243
  return {
230
244
  messages,
231
- estimatedTokens: stats.sentTokens,
232
- systemPromptAddition: additions.length > 0 ? additions.join("\n\n") : undefined,
245
+ estimatedTokens: stats.sentTokens + spaTokens,
246
+ systemPromptAddition: spaText,
233
247
  };
234
248
  }
235
249
 
236
250
  // ── Ingest ─────────────────────────────────────────────────────────────
237
251
 
252
+ /** Embed and store a single user or assistant message as a turn node. */
238
253
  async ingest(params: {
239
254
  sessionId: string;
240
255
  sessionKey?: string;
@@ -247,7 +262,7 @@ export class KongBrainContextEngine implements ContextEngine {
247
262
  const msg = params.message;
248
263
 
249
264
  try {
250
- const role = (msg as any).role as string;
265
+ const role = "role" in msg ? (msg as { role: string }).role : "";
251
266
  if (role === "user" || role === "assistant") {
252
267
  const text = extractMessageText(msg);
253
268
  if (!text) return { ingested: false };
@@ -256,11 +271,16 @@ export class KongBrainContextEngine implements ContextEngine {
256
271
  let embedding: number[] | null = null;
257
272
  if (worthEmbedding && embeddings.isAvailable()) {
258
273
  try {
259
- const embedLimit = Math.round(8192 * 3.4 * 0.8);
260
- embedding = await embeddings.embed(text.slice(0, embedLimit));
274
+ const INGEST_EMBED_CHAR_LIMIT = 22_282; // ~6,554 tokens at 3.4 chars/token (BGE-M3 8192-token window * 0.8 safety margin)
275
+ embedding = await embeddings.embed(text.slice(0, INGEST_EMBED_CHAR_LIMIT));
261
276
  } catch (e) { swallow("ingest:embed", e); }
262
277
  }
263
278
 
279
+ // Stash user embedding for reuse in buildContextualQueryVec (avoids re-embedding)
280
+ if (role === "user" && embedding) {
281
+ session.lastUserEmbedding = embedding;
282
+ }
283
+
264
284
  const turnId = await store.upsertTurn({
265
285
  session_id: session.sessionId,
266
286
  role,
@@ -327,6 +347,7 @@ export class KongBrainContextEngine implements ContextEngine {
327
347
 
328
348
  // ── Compact ────────────────────────────────────────────────────────────
329
349
 
350
+ /** Extract structured signals (pending work, key files, errors) for post-compaction injection. */
330
351
  async compact(params: {
331
352
  sessionId: string;
332
353
  sessionKey?: string;
@@ -346,8 +367,9 @@ export class KongBrainContextEngine implements ContextEngine {
346
367
 
347
368
  // Extract structured compaction signals from stored turns
348
369
  let summary: string | undefined;
370
+ const { store } = this.state;
371
+ const contextWindow = params.tokenBudget ?? 200_000;
349
372
  try {
350
- const { store } = this.state;
351
373
  if (store.isAvailable()) {
352
374
  const turns = await store.getSessionTurnsRich(params.sessionId, 30);
353
375
  if (turns.length > 0) {
@@ -370,6 +392,12 @@ export class KongBrainContextEngine implements ContextEngine {
370
392
  turns.filter(t => t.tool_name).map(t => t.tool_name!)
371
393
  )];
372
394
 
395
+ // Recent errors — preserve tool failure context across compaction
396
+ const errorRe = /\b(error|failed|exception|crash|panic|TypeError|ReferenceError)\b[^.\n]{0,120}/gi;
397
+ const recentErrors = [...fullText.matchAll(errorRe)]
398
+ .map(m => m[0].trim().slice(0, 160))
399
+ .slice(-3); // last 3 errors only
400
+
373
401
  // Current work inference (claw-code: compact.rs:272-279)
374
402
  const lastText = turns.filter(t => t.text.length > 10).at(-1)?.text.slice(0, 200) ?? "";
375
403
 
@@ -377,6 +405,7 @@ export class KongBrainContextEngine implements ContextEngine {
377
405
  if (pendingMatches.length > 0) parts.push(`PENDING: ${pendingMatches.join("; ")}`);
378
406
  if (filePaths.length > 0) parts.push(`FILES: ${filePaths.join(", ")}`);
379
407
  if (toolNames.length > 0) parts.push(`TOOLS USED: ${toolNames.join(", ")}`);
408
+ if (recentErrors.length > 0) parts.push(`RECENT ERRORS: ${recentErrors.join("; ")}`);
380
409
  if (lastText) parts.push(`LAST: ${lastText}`);
381
410
  parts.push("Resume directly — do not recap what was happening.");
382
411
 
@@ -384,25 +413,34 @@ export class KongBrainContextEngine implements ContextEngine {
384
413
  summary = parts.join("\n");
385
414
  // Stash for next assemble() to inject
386
415
  if (session) {
387
- (session as any)._compactionSummary = summary;
416
+ session._compactionSummary = summary;
388
417
  }
389
418
  }
390
419
  }
391
420
  }
392
421
  } catch { /* non-critical */ }
393
422
 
423
+ // Compaction checkpoint — diagnostic trail for debugging
424
+ if (store.isAvailable() && session) {
425
+ store.createCompactionCheckpoint(params.sessionId, 0, session.userTurnCount)
426
+ .catch(e => swallow.warn("compact:checkpoint", e));
427
+ }
428
+
394
429
  return {
395
430
  ok: true,
396
- compacted: !!summary,
397
- reason: summary
398
- ? "Extracted structured signals for continuation."
399
- : "Graph retrieval handles context selection; no LLM-based compaction needed.",
400
- result: summary ? { summary, tokensBefore: 0 } : undefined,
431
+ compacted: true,
432
+ reason: "Graph-curated context window: assemble() selects relevant context each turn.",
433
+ result: summary ? {
434
+ summary,
435
+ tokensBefore: Math.round(summary.length / 4), // 4 bytes/token (claw-code ratio)
436
+ tokensAfter: Math.round(contextWindow * 0.325),
437
+ } : undefined,
401
438
  };
402
439
  }
403
440
 
404
441
  // ── After turn ─────────────────────────────────────────────────────────
405
442
 
443
+ /** Post-turn: ingest messages, evaluate retrieval quality, flush daemon, and run periodic maintenance. */
406
444
  async afterTurn?(params: {
407
445
  sessionId: string;
408
446
  sessionKey?: string;
@@ -442,11 +480,12 @@ export class KongBrainContextEngine implements ContextEngine {
442
480
  .catch(e => swallow.warn("afterTurn:evaluateRetrieval", e));
443
481
  }
444
482
 
483
+ // Single fetch for all downstream consumers (cognitive check, daemon flush, handoff)
484
+ const allSessionTurns = await store.getSessionTurns(session.sessionId, 50)
485
+ .catch(() => [] as { role: string; text: string }[]);
486
+
445
487
  // Cognitive check: periodic reasoning over retrieved context
446
488
  if (shouldRunCheck(session.userTurnCount, session) && stagedSnapshot.length > 0) {
447
- const recentTurns = await store.getSessionTurns(session.sessionId, 6)
448
- .catch(() => [] as { role: string; text: string }[]);
449
-
450
489
  runCognitiveCheck({
451
490
  sessionId: session.sessionId,
452
491
  userQuery: session.lastUserText,
@@ -457,7 +496,7 @@ export class KongBrainContextEngine implements ContextEngine {
457
496
  score: n.finalScore ?? 0,
458
497
  table: n.table,
459
498
  })),
460
- recentTurns,
499
+ recentTurns: allSessionTurns.slice(-6),
461
500
  }, session, store, this.state.complete).catch(e => swallow.warn("afterTurn:cognitiveCheck", e));
462
501
  }
463
502
 
@@ -466,11 +505,11 @@ export class KongBrainContextEngine implements ContextEngine {
466
505
  const turnReady = session.userTurnCount >= session.lastDaemonFlushTurnCount + 3;
467
506
  if (session.daemon && (tokenReady || turnReady)) {
468
507
  try {
469
- const recentTurns = await store.getSessionTurns(session.sessionId, 20);
508
+ const recentTurns = allSessionTurns.slice(-20);
470
509
  const turnData = recentTurns.map(t => ({
471
510
  role: t.role as "user" | "assistant",
472
511
  text: t.text,
473
- turnId: String((t as any).id ?? ""),
512
+ turnId: String((t as { id?: string }).id ?? ""),
474
513
  }));
475
514
 
476
515
  // Gather retrieved memory IDs for dedup
@@ -503,20 +542,14 @@ export class KongBrainContextEngine implements ContextEngine {
503
542
  // Fire-and-forget: these are non-critical background operations
504
543
  const cleanupOps: Promise<unknown>[] = [];
505
544
 
506
- // Final daemon flush with full transcript before cleanup
545
+ // Final daemon flush with full transcript before cleanup (reuse allSessionTurns)
507
546
  if (session.daemon) {
508
- cleanupOps.push(
509
- store.getSessionTurns(session.sessionId, 50)
510
- .then(recentTurns => {
511
- const turnData = recentTurns.map(t => ({
512
- role: t.role as "user" | "assistant",
513
- text: t.text,
514
- turnId: String((t as any).id ?? ""),
515
- }));
516
- session.daemon!.sendTurnBatch(turnData, [...session.pendingThinking], []);
517
- })
518
- .catch(e => swallow.warn("midCleanup:daemonFlush", e)),
519
- );
547
+ const turnData = allSessionTurns.map(t => ({
548
+ role: t.role as "user" | "assistant",
549
+ text: t.text,
550
+ turnId: String((t as { id?: string }).id ?? ""),
551
+ }));
552
+ session.daemon.sendTurnBatch(turnData, [...session.pendingThinking], []);
520
553
  }
521
554
 
522
555
  if (session.taskId) {
@@ -542,10 +575,10 @@ export class KongBrainContextEngine implements ContextEngine {
542
575
  .catch(e => swallow("midCleanup:acan", e)),
543
576
  );
544
577
 
545
- // Handoff note — snapshot for wakeup even if session continues
578
+ // Handoff note — snapshot for wakeup even if session continues (reuse allSessionTurns)
546
579
  cleanupOps.push(
547
580
  (async () => {
548
- const recentTurns = await store.getSessionTurns(session.sessionId, 15);
581
+ const recentTurns = allSessionTurns.slice(-15);
549
582
  if (recentTurns.length < 2) return;
550
583
  const turnSummary = recentTurns
551
584
  .map(t => `[${t.role}] ${t.text.slice(0, 200)}`)
@@ -635,12 +668,12 @@ export class KongBrainContextEngine implements ContextEngine {
635
668
  // ── Helpers ────────────────────────────────────────────────────────────────────
636
669
 
637
670
  function extractMessageText(msg: AgentMessage): string {
638
- const m = msg as any;
671
+ const m = msg as { content?: string | { type: string; text?: string }[] };
639
672
  if (typeof m.content === "string") return m.content;
640
673
  if (Array.isArray(m.content)) {
641
674
  return m.content
642
- .filter((c: any) => c.type === "text")
643
- .map((c: any) => c.text ?? "")
675
+ .filter((c) => c.type === "text")
676
+ .map((c) => c.text ?? "")
644
677
  .join("\n");
645
678
  }
646
679
  return "";
@@ -79,10 +79,10 @@ export function startMemoryDaemon(
79
79
  const { buildSystemPrompt, buildTranscript, writeExtractionResults } = await import("./memory-daemon.js");
80
80
 
81
81
  const transcript = buildTranscript(turns);
82
- const sections: string[] = [`[TRANSCRIPT]\n${transcript.slice(0, 60000)}`];
82
+ const sections: string[] = [`[TRANSCRIPT]\n${transcript.slice(0, 30000)}`];
83
83
 
84
84
  if (thinking.length > 0) {
85
- sections.push(`[THINKING]\n${thinking.slice(-8).join("\n---\n").slice(0, 4000)}`);
85
+ sections.push(`[THINKING]\n${thinking.slice(-3).join("\n---\n").slice(0, 2000)}`);
86
86
  }
87
87
 
88
88
  if (retrievedMemories.length > 0) {
@@ -92,37 +92,71 @@ export function startMemoryDaemon(
92
92
 
93
93
  const systemPrompt = buildSystemPrompt(thinking.length > 0, retrievedMemories.length > 0, priorState);
94
94
 
95
+ // Structured output schema — forces API to return valid JSON (no markdown, no preamble)
96
+ const extractionSchema = {
97
+ type: "object" as const,
98
+ properties: {
99
+ causal: { type: "array", items: { type: "object" } },
100
+ monologue: { type: "array", items: { type: "object" } },
101
+ resolved: { type: "array", items: { type: "string" } },
102
+ concepts: { type: "array", items: { type: "object" } },
103
+ corrections: { type: "array", items: { type: "object" } },
104
+ preferences: { type: "array", items: { type: "object" } },
105
+ artifacts: { type: "array", items: { type: "object" } },
106
+ decisions: { type: "array", items: { type: "object" } },
107
+ skills: { type: "array", items: { type: "object" } },
108
+ },
109
+ required: ["causal", "monologue", "resolved", "concepts", "corrections", "preferences", "artifacts", "decisions", "skills"],
110
+ };
111
+
95
112
  const response = await complete({
96
113
  system: systemPrompt,
97
114
  messages: [{ role: "user", content: sections.join("\n\n") }],
115
+ outputFormat: { type: "json_schema", schema: extractionSchema },
98
116
  });
99
117
 
100
118
  const responseText = response.text;
101
119
 
102
- const jsonMatch = responseText.match(/\{[\s\S]*?\}/);
103
- if (!jsonMatch) return;
104
-
120
+ // With structured output the response should be valid JSON directly.
121
+ // Fall back to regex extraction if the provider doesn't support outputFormat.
105
122
  let result: Record<string, any>;
106
123
  try {
107
- result = JSON.parse(jsonMatch[0]);
124
+ result = JSON.parse(responseText);
108
125
  } catch {
126
+ const jsonMatch = responseText.match(/\{[\s\S]*?\}/);
127
+ if (!jsonMatch) {
128
+ swallow.warn("daemon:noJson", new Error(`LLM response contained no JSON (${responseText.length} chars)`));
129
+ return;
130
+ }
109
131
  try {
110
- result = JSON.parse(jsonMatch[0].replace(/,\s*([}\]])/g, "$1"));
132
+ result = JSON.parse(jsonMatch[0]);
111
133
  } catch {
112
- result = {};
113
- const fields = ["causal", "monologue", "resolved", "concepts", "corrections", "preferences", "artifacts", "decisions", "skills"];
114
- for (const field of fields) {
115
- const fieldMatch = jsonMatch[0].match(new RegExp(`"${field}"\\s*:\\s*(\\[[\\s\\S]*?\\])(?=\\s*[,}]\\s*"[a-z]|\\s*\\}$)`, "m"));
116
- if (fieldMatch) {
117
- try { result[field] = JSON.parse(fieldMatch[1]); } catch { /* skip */ }
134
+ try {
135
+ result = JSON.parse(jsonMatch[0].replace(/,\s*([}\]])/g, "$1"));
136
+ } catch {
137
+ result = {};
138
+ const fields = ["causal", "monologue", "resolved", "concepts", "corrections", "preferences", "artifacts", "decisions", "skills"];
139
+ for (const field of fields) {
140
+ const fieldMatch = jsonMatch[0].match(new RegExp(`"${field}"\\s*:\\s*(\\[[\\s\\S]*?\\])(?=\\s*[,}]\\s*"[a-z]|\\s*\\}$)`, "m"));
141
+ if (fieldMatch) {
142
+ try { result[field] = JSON.parse(fieldMatch[1]); } catch { /* skip */ }
143
+ }
144
+ }
145
+ const PRIMARY_FIELDS = ["causal", "monologue", "artifacts"];
146
+ if (!PRIMARY_FIELDS.some(f => f in result)) {
147
+ swallow.warn("daemon:fallbackFailed", new Error(`Regex fallback extracted no primary fields from: ${jsonMatch[0].slice(0, 100)}`));
148
+ return;
118
149
  }
119
150
  }
120
- if (Object.keys(result).length === 0) return;
121
151
  }
122
152
  }
123
153
 
124
- const counts = await writeExtractionResults(result, sessionId, store, embeddings, priorState, taskId, projectId, turns);
125
- extractedTurnCount = turns.length;
154
+ try {
155
+ const counts = await writeExtractionResults(result, sessionId, store, embeddings, priorState, taskId, projectId, turns);
156
+ extractedTurnCount = turns.length;
157
+ } catch (e) {
158
+ swallow.warn("daemon:writeExtractionResults", e);
159
+ }
126
160
  }
127
161
 
128
162
  // Pending batch (only keep latest — newer batch supersedes older)
@@ -158,7 +192,15 @@ export function startMemoryDaemon(
158
192
  return {
159
193
  sendTurnBatch(turns, thinking, retrievedMemories, priorExtractions) {
160
194
  if (shuttingDown) return;
161
- pendingBatch = { turns, thinking, retrievedMemories, priorExtractions };
195
+ if (pendingBatch) {
196
+ // Merge into pending batch instead of discarding — prevents turn data loss
197
+ pendingBatch.turns = [...pendingBatch.turns, ...turns];
198
+ pendingBatch.thinking = [...pendingBatch.thinking, ...thinking];
199
+ pendingBatch.retrievedMemories = [...pendingBatch.retrievedMemories, ...retrievedMemories];
200
+ pendingBatch.priorExtractions = priorExtractions ?? pendingBatch.priorExtractions;
201
+ } else {
202
+ pendingBatch = { turns, thinking, retrievedMemories, priorExtractions };
203
+ }
162
204
  // Fire-and-forget
163
205
  processPending().catch(e => swallow.warn("daemon:sendBatch", e));
164
206
  },
@@ -176,14 +218,12 @@ export function startMemoryDaemon(
176
218
  shuttingDown = true;
177
219
  // Wait for current extraction to finish
178
220
  if (processing) {
179
- await Promise.race([
180
- new Promise<void>(resolve => {
181
- const check = setInterval(() => {
182
- if (!processing) { clearInterval(check); resolve(); }
183
- }, 100);
184
- }),
185
- new Promise<void>(resolve => setTimeout(resolve, timeoutMs)),
186
- ]);
221
+ await new Promise<void>(resolve => {
222
+ const check = setInterval(() => {
223
+ if (!processing) { clearInterval(check); clearTimeout(timeout); resolve(); }
224
+ }, 100);
225
+ const timeout = setTimeout(() => { clearInterval(check); resolve(); }, timeoutMs);
226
+ });
187
227
  }
188
228
  // Shared store/embeddings — don't dispose (owned by global state)
189
229
  },
@@ -14,10 +14,12 @@ import type { CompleteFn } from "./state.js";
14
14
  import { buildSystemPrompt, buildTranscript, writeExtractionResults } from "./memory-daemon.js";
15
15
  import type { PriorExtractions } from "./daemon-types.js";
16
16
  import { swallow } from "./errors.js";
17
+ import { log } from "./log.js";
17
18
 
18
19
  // Process-global flag — deferred cleanup runs AT MOST ONCE per process.
19
20
  // Using Symbol.for so it survives Jiti re-importing this module.
20
21
  const RAN_KEY = Symbol.for("kongbrain.deferredCleanup.ran");
22
+ const _g = globalThis as Record<symbol, unknown>;
21
23
 
22
24
  /**
23
25
  * Find and process orphaned sessions. Runs with a 30s total timeout.
@@ -30,8 +32,8 @@ export async function runDeferredCleanup(
30
32
  complete: CompleteFn,
31
33
  ): Promise<number> {
32
34
  // Once per process — never re-run even if first run times out
33
- if ((globalThis as any)[RAN_KEY]) return 0;
34
- (globalThis as any)[RAN_KEY] = true;
35
+ if (_g[RAN_KEY]) return 0;
36
+ _g[RAN_KEY] = true;
35
37
 
36
38
  try {
37
39
  return await runDeferredCleanupInner(store, embeddings, complete);
@@ -51,18 +53,14 @@ async function runDeferredCleanupInner(
51
53
  const orphaned = await store.getOrphanedSessions(10).catch(() => []);
52
54
  if (orphaned.length === 0) return 0;
53
55
 
54
- // Immediately claim all orphaned sessions so no concurrent run can pick them up
55
- await Promise.all(
56
- orphaned.map(s =>
57
- store.markSessionEnded(s.id).catch(e => swallow("deferred:claim", e))
58
- )
59
- );
60
-
61
56
  let processed = 0;
62
57
 
63
58
  const cleanup = async () => {
64
59
  for (const session of orphaned) {
65
60
  try {
61
+ // Claim each session just before processing so unclaimed ones remain
62
+ // available to the next run if we time out partway through
63
+ await store.markSessionEnded(session.id).catch(e => swallow("deferred:claim", e));
66
64
  await processOrphanedSession(session.id, store, embeddings, complete);
67
65
  processed++;
68
66
  } catch (e) {
@@ -105,7 +103,7 @@ async function processOrphanedSession(
105
103
  const systemPrompt = buildSystemPrompt(false, false, priorState);
106
104
 
107
105
  try {
108
- console.warn(`[deferred] extracting session ${surrealSessionId} (${turns.length} turns, transcript ${transcript.length} chars)`);
106
+ log.info(`[deferred] extracting session ${surrealSessionId} (${turns.length} turns, transcript ${transcript.length} chars)`);
109
107
  const LLM_CALL_TIMEOUT_MS = 30_000;
110
108
  const response = await Promise.race([
111
109
  complete({
@@ -118,7 +116,7 @@ async function processOrphanedSession(
118
116
  ]);
119
117
 
120
118
  const responseText = response.text;
121
- console.warn(`[deferred] extraction response: ${responseText.length} chars`);
119
+ log.info(`[deferred] extraction response: ${responseText.length} chars`);
122
120
  const jsonMatch = responseText.match(/\{[\s\S]*\}/);
123
121
  if (jsonMatch) {
124
122
  let result: Record<string, any>;
@@ -132,17 +130,17 @@ async function processOrphanedSession(
132
130
  // Strip prototype pollution keys from LLM-generated JSON
133
131
  const BANNED_KEYS = new Set(["__proto__", "constructor", "prototype"]);
134
132
  for (const key of Object.keys(result)) {
135
- if (BANNED_KEYS.has(key)) delete (result as any)[key];
133
+ if (BANNED_KEYS.has(key)) delete result[key];
136
134
  }
137
135
 
138
136
  const keys = Object.keys(result);
139
- console.warn(`[deferred] parsed ${keys.length} keys: ${keys.join(", ")}`);
137
+ log.info(`[deferred] parsed ${keys.length} keys: ${keys.join(", ")}`);
140
138
  if (keys.length > 0) {
141
139
  await writeExtractionResults(result, surrealSessionId, store, embeddings, priorState, undefined, undefined, turnData);
142
- console.warn(`[deferred] wrote extraction results for ${surrealSessionId}`);
140
+ log.info(`[deferred] wrote extraction results for ${surrealSessionId}`);
143
141
  }
144
142
  } else {
145
- console.warn(`[deferred] no JSON found in response`);
143
+ log.warn(`[deferred] no JSON found in response`);
146
144
  }
147
145
  } catch (e) {
148
146
  swallow.warn("deferredCleanup:extraction", e);
@@ -166,7 +164,7 @@ async function processOrphanedSession(
166
164
  ]);
167
165
 
168
166
  const handoffText = handoffResponse.text.trim();
169
- console.warn(`[deferred] handoff response: ${handoffText.length} chars`);
167
+ log.info(`[deferred] handoff response: ${handoffText.length} chars`);
170
168
  if (handoffText.length > 20) {
171
169
  let emb: number[] | null = null;
172
170
  if (embeddings.isAvailable()) {
package/src/embeddings.ts CHANGED
@@ -1,12 +1,14 @@
1
1
  import { existsSync } from "node:fs";
2
2
  import type { EmbeddingConfig } from "./config.js";
3
3
  import { swallow } from "./errors.js";
4
+ import { log } from "./log.js";
4
5
 
5
6
  // Lazy-import node-llama-cpp to avoid top-level await issues with jiti.
6
7
  // The actual import happens inside initialize() at runtime.
7
8
  type LlamaEmbeddingContext = import("node-llama-cpp").LlamaEmbeddingContext;
8
9
  type LlamaModel = import("node-llama-cpp").LlamaModel;
9
10
 
11
+ /** BGE-M3 embedding service (1024-dim via GGUF) with an LRU cache of up to 512 entries. */
10
12
  export class EmbeddingService {
11
13
  private model: LlamaModel | null = null;
12
14
  private ctx: LlamaEmbeddingContext | null = null;
@@ -30,8 +32,8 @@ export class EmbeddingService {
30
32
  logLevel: LlamaLogLevel.error,
31
33
  logger: (level, message) => {
32
34
  if (message.includes("missing newline token")) return;
33
- if (level === LlamaLogLevel.error) console.error(`[llama] ${message}`);
34
- else if (level === LlamaLogLevel.warn) console.warn(`[llama] ${message}`);
35
+ if (level === LlamaLogLevel.error) log.error(`[llama] ${message}`);
36
+ else if (level === LlamaLogLevel.warn) log.warn(`[llama] ${message}`);
35
37
  },
36
38
  });
37
39
  this.model = await llama.loadModel({ modelPath: this.config.modelPath });
@@ -40,6 +42,7 @@ export class EmbeddingService {
40
42
  return true;
41
43
  }
42
44
 
45
+ /** Return the embedding vector for text, serving from LRU cache on repeat calls. */
43
46
  async embed(text: string): Promise<number[]> {
44
47
  if (!this.ready || !this.ctx) throw new Error("Embeddings not initialized");
45
48
  const cached = this.cache.get(text);
@@ -61,11 +64,7 @@ export class EmbeddingService {
61
64
 
62
65
  async embedBatch(texts: string[]): Promise<number[][]> {
63
66
  if (texts.length === 0) return [];
64
- const results: number[][] = [];
65
- for (const text of texts) {
66
- results.push(await this.embed(text));
67
- }
68
- return results;
67
+ return Promise.all(texts.map(text => this.embed(text)));
69
68
  }
70
69
 
71
70
  isAvailable(): boolean {
package/src/errors.ts CHANGED
@@ -9,6 +9,8 @@
9
9
  * Always logged to stderr with stack trace.
10
10
  */
11
11
 
12
+ import { log } from "./log.js";
13
+
12
14
  const DEBUG = process.env.KONGBRAIN_DEBUG === "1";
13
15
 
14
16
  /**
@@ -18,7 +20,7 @@ const DEBUG = process.env.KONGBRAIN_DEBUG === "1";
18
20
  function swallow(context: string, err?: unknown): void {
19
21
  if (!DEBUG) return;
20
22
  const msg = err instanceof Error ? err.message : String(err ?? "unknown");
21
- console.debug(`[swallow] ${context}: ${msg}`);
23
+ log.debug(`[swallow] ${context}: ${msg}`);
22
24
  }
23
25
 
24
26
  /**
@@ -27,7 +29,7 @@ function swallow(context: string, err?: unknown): void {
27
29
  */
28
30
  swallow.warn = function swallowWarn(context: string, err?: unknown): void {
29
31
  const msg = err instanceof Error ? err.message : String(err ?? "unknown");
30
- console.warn(`[warn] ${context}: ${msg}`);
32
+ log.warn(`${context}: ${msg}`);
31
33
  };
32
34
 
33
35
  /**
@@ -37,7 +39,7 @@ swallow.warn = function swallowWarn(context: string, err?: unknown): void {
37
39
  swallow.error = function swallowError(context: string, err?: unknown): void {
38
40
  const msg = err instanceof Error ? err.message : String(err ?? "unknown");
39
41
  const stack = err instanceof Error ? `\n${err.stack}` : "";
40
- console.error(`[ERROR] ${context}: ${msg}${stack}`);
42
+ log.error(`${context}: ${msg}${stack}`);
41
43
  };
42
44
 
43
45
  export { swallow };