@kinqs/brainrouter-mcp-server 0.3.4 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +121 -71
- package/README.md +88 -15
- package/dist/__tests__/cognitive-extractor.test.js +112 -0
- package/dist/__tests__/crypto.test.js +8 -1
- package/dist/__tests__/working-memory.test.js +67 -0
- package/dist/env-loader.js +47 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +12 -1
- package/dist/init.d.ts +1 -0
- package/dist/init.js +64 -0
- package/dist/memory/engine.js +21 -1
- package/dist/memory/pipeline/cognitive-extractor.js +19 -1
- package/dist/memory/recall.d.ts +3 -1
- package/dist/memory/recall.js +48 -3
- package/dist/memory/store/relevance-judge.d.ts +51 -0
- package/dist/memory/store/relevance-judge.js +196 -0
- package/dist/memory/working/canvas.js +11 -0
- package/package.json +2 -2
- package/dist/memory/config.d.ts +0 -2
- package/dist/memory/config.js +0 -3
- package/dist/memory/pipeline/l1-contradiction.d.ts +0 -7
- package/dist/memory/pipeline/l1-contradiction.js +0 -66
- package/dist/memory/pipeline/l1-dedup.d.ts +0 -23
- package/dist/memory/pipeline/l1-dedup.js +0 -39
- package/dist/memory/pipeline/l1-extractor.d.ts +0 -21
- package/dist/memory/pipeline/l1-extractor.js +0 -180
- package/dist/memory/pipeline/l2-direction-shift.d.ts +0 -10
- package/dist/memory/pipeline/l2-direction-shift.js +0 -27
- package/dist/memory/pipeline/l2-scene.d.ts +0 -15
- package/dist/memory/pipeline/l2-scene.js +0 -140
- package/dist/memory/pipeline/l3-distiller.d.ts +0 -15
- package/dist/memory/pipeline/l3-distiller.js +0 -40
- package/dist/memory/pipeline/task-queue.d.ts +0 -54
- package/dist/memory/pipeline/task-queue.js +0 -117
- package/dist/memory/prompts/graph-extraction-batch.d.ts +0 -14
- package/dist/memory/prompts/graph-extraction-batch.js +0 -54
- package/dist/memory/prompts/l1-contradiction-batch.d.ts +0 -16
- package/dist/memory/prompts/l1-contradiction-batch.js +0 -47
- package/dist/memory/prompts/l1-contradiction.d.ts +0 -1
- package/dist/memory/prompts/l1-contradiction.js +0 -25
- package/dist/memory/prompts/l1-extraction.d.ts +0 -10
- package/dist/memory/prompts/l1-extraction.js +0 -114
- package/dist/memory/prompts/l2-direction-shift.d.ts +0 -5
- package/dist/memory/prompts/l2-direction-shift.js +0 -32
- package/dist/memory/prompts/l2-scene-cluster.d.ts +0 -2
- package/dist/memory/prompts/l2-scene-cluster.js +0 -33
- package/dist/memory/prompts/l2-scene.d.ts +0 -7
- package/dist/memory/prompts/l2-scene.js +0 -40
- package/dist/memory/prompts/l3-persona.d.ts +0 -6
- package/dist/memory/prompts/l3-persona.js +0 -60
- package/dist/memory/store/types.d.ts +0 -101
- package/dist/memory/types.d.ts +0 -207
- package/dist/memory/types.js +0 -7
- package/dist/memory/validation.d.ts +0 -441
- package/dist/memory/validation.js +0 -129
- package/dist/tools/agent_memory_tools.d.ts +0 -485
- package/dist/tools/agent_memory_tools.js +0 -793
- package/dist/tools/get_doc.d.ts +0 -21
- package/dist/tools/get_doc.js +0 -24
- package/dist/tools/list_docs.d.ts +0 -15
- package/dist/tools/list_docs.js +0 -16
- package/dist/tools/update_doc.d.ts +0 -24
- package/dist/tools/update_doc.js +0 -35
- /package/dist/__tests__/{agent_mode.test.d.ts → cognitive-extractor.test.d.ts} +0 -0
- /package/dist/{memory/store/types.js → env-loader.d.ts} +0 -0
package/dist/index.js
CHANGED
|
@@ -14,6 +14,17 @@
|
|
|
14
14
|
// Runs an Express HTTP server. Connect via serverUrl in tool config.
|
|
15
15
|
// Usage: node dist/index.js --root /path/to/project --http --port 3747
|
|
16
16
|
//
|
|
17
|
+
// init subcommand
|
|
18
|
+
// Scaffold ~/.config/brainrouter/server.env from the bundled
|
|
19
|
+
// .env.example and exit. Run this once after a global install.
|
|
20
|
+
// Usage: brainrouter-mcp init
|
|
21
|
+
//
|
|
22
|
+
// CRITICAL: import order matters. `init` may exit the process before
|
|
23
|
+
// anything else loads (for `brainrouter-mcp init`). `env-loader` runs next
|
|
24
|
+
// and sets process.env from the right .env file before any module body
|
|
25
|
+
// reads env vars (sqlite/embedding/extractor all do at load time).
|
|
26
|
+
import './init.js';
|
|
27
|
+
import './env-loader.js';
|
|
17
28
|
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
18
29
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
19
30
|
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
|
|
@@ -91,7 +102,7 @@ const PORT = parseInt(parseFlag('--port') ?? '3747', 10);
|
|
|
91
102
|
function buildMcpServer(registry, options) {
|
|
92
103
|
const defaultUserId = options?.defaultUserId ?? STDIO_DEFAULT_USER_ID;
|
|
93
104
|
const isAdmin = options?.isAdmin ?? false;
|
|
94
|
-
const server = new Server({ name: 'brainrouter-mcp-server', version: '0.3.
|
|
105
|
+
const server = new Server({ name: 'brainrouter-mcp-server', version: '0.3.5' }, { capabilities: { tools: {} } });
|
|
95
106
|
// ── Tool list ──────────────────────────────────────────────────────────────
|
|
96
107
|
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
97
108
|
tools: [
|
package/dist/init.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/init.js
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
// Side-effect module: imported FIRST in src/index.ts (before env-loader).
|
|
2
|
+
//
|
|
3
|
+
// Handles the `brainrouter-mcp init` subcommand by scaffolding a user-editable
|
|
4
|
+
// .env file at ~/.config/brainrouter/server.env from the package's bundled
|
|
5
|
+
// .env.example, then exiting. Never returns control when invoked.
|
|
6
|
+
//
|
|
7
|
+
// This solves the global-install UX gap: a user who runs
|
|
8
|
+
// `npm install -g @kinqs/brainrouter-mcp-server` has no obvious place to put
|
|
9
|
+
// their LLM credentials. `brainrouter-mcp init` creates the file in a known
|
|
10
|
+
// user-writable location that env-loader.ts then auto-finds.
|
|
11
|
+
//
|
|
12
|
+
// If the file already exists, init prints the path so the user knows where
|
|
13
|
+
// to edit it — but does NOT overwrite (don't clobber a user's real config).
|
|
14
|
+
import fs from 'node:fs';
|
|
15
|
+
import path from 'node:path';
|
|
16
|
+
import os from 'node:os';
|
|
17
|
+
import url from 'node:url';
|
|
18
|
+
function runInit() {
|
|
19
|
+
const userConfigDir = path.join(os.homedir(), '.config', 'brainrouter');
|
|
20
|
+
const userEnvFile = path.join(userConfigDir, 'server.env');
|
|
21
|
+
// .env.example sits at the package root (one level above src/ in source,
|
|
22
|
+
// one level above dist/ after build, both layouts work in the installed
|
|
23
|
+
// tarball because the `files` allowlist in package.json includes it).
|
|
24
|
+
const here = path.dirname(url.fileURLToPath(import.meta.url));
|
|
25
|
+
const exampleCandidates = [
|
|
26
|
+
path.resolve(here, '..', '.env.example'), // dist/init.js → ../.env.example
|
|
27
|
+
path.resolve(here, '..', '..', '.env.example'), // src/init.ts (dev) → ../../.env.example
|
|
28
|
+
];
|
|
29
|
+
const examplePath = exampleCandidates.find((p) => fs.existsSync(p));
|
|
30
|
+
if (!examplePath) {
|
|
31
|
+
process.stderr.write(`init: couldn't find .env.example bundled with the package.\n` +
|
|
32
|
+
`Checked:\n${exampleCandidates.map((p) => ` ${p}`).join('\n')}\n` +
|
|
33
|
+
`This is a packaging bug — please file an issue at ` +
|
|
34
|
+
`https://github.com/kinqsradiollc/BrainRouter/issues\n`);
|
|
35
|
+
process.exit(1);
|
|
36
|
+
}
|
|
37
|
+
if (fs.existsSync(userEnvFile)) {
|
|
38
|
+
process.stdout.write(`init: ${userEnvFile} already exists — not overwriting.\n` +
|
|
39
|
+
`Edit it with: $EDITOR ${userEnvFile}\n` +
|
|
40
|
+
`(Or compare against the latest template at ${examplePath})\n`);
|
|
41
|
+
process.exit(0);
|
|
42
|
+
}
|
|
43
|
+
fs.mkdirSync(userConfigDir, { recursive: true });
|
|
44
|
+
fs.copyFileSync(examplePath, userEnvFile);
|
|
45
|
+
// Tighten perms — this file will hold API keys + a JWT secret.
|
|
46
|
+
try {
|
|
47
|
+
fs.chmodSync(userEnvFile, 0o600);
|
|
48
|
+
}
|
|
49
|
+
catch { /* best effort */ }
|
|
50
|
+
process.stdout.write(`init: created ${userEnvFile}\n` +
|
|
51
|
+
`\n` +
|
|
52
|
+
`Next steps:\n` +
|
|
53
|
+
` 1. Edit it: $EDITOR ${userEnvFile}\n` +
|
|
54
|
+
` 2. Set BRAINROUTER_LLM_API_KEY (required for cognitive extraction)\n` +
|
|
55
|
+
` 3. Change BRAINROUTER_ADMIN_PASSWORD and BRAINROUTER_JWT_SECRET\n` +
|
|
56
|
+
` 4. Start the server: brainrouter-mcp --http --port 3747\n` +
|
|
57
|
+
`\n` +
|
|
58
|
+
`The server auto-finds this file via ~/.config/brainrouter/server.env\n` +
|
|
59
|
+
`(or set BRAINROUTER_ENV_FILE=/some/other/path to override).\n`);
|
|
60
|
+
process.exit(0);
|
|
61
|
+
}
|
|
62
|
+
if (process.argv.includes('init')) {
|
|
63
|
+
runInit();
|
|
64
|
+
}
|
package/dist/memory/engine.js
CHANGED
|
@@ -3,6 +3,7 @@ import { MemoryCapturePipeline } from "./capture.js";
|
|
|
3
3
|
import { MemoryRecallPipeline } from "./recall.js";
|
|
4
4
|
import { EmbeddingService } from "./store/embedding.js";
|
|
5
5
|
import { RerankerService } from "./store/reranker.js";
|
|
6
|
+
import { RelevanceJudgeService } from "./store/relevance-judge.js";
|
|
6
7
|
import { scanSkillsForHints } from "./skill-hints-loader.js";
|
|
7
8
|
import { distillFocusScenes } from "./pipeline/contextual-focus-builder.js";
|
|
8
9
|
import { distillCoreIdentity } from "./pipeline/identity-distiller.js";
|
|
@@ -172,6 +173,25 @@ export class MemoryEngine {
|
|
|
172
173
|
? parseInt(process.env.BRAINROUTER_RERANKER_TOP_N, 10)
|
|
173
174
|
: undefined,
|
|
174
175
|
});
|
|
176
|
+
// Relevance judge sits behind a flag (off by default) — opt in with
|
|
177
|
+
// BRAINROUTER_RELEVANCE_JUDGE_ENABLED=true. Falls back to the shared
|
|
178
|
+
// BRAINROUTER_LLM_* settings unless explicitly overridden so a single
|
|
179
|
+
// LLM credential covers extraction, synthesis, and judging.
|
|
180
|
+
const relevanceJudge = new RelevanceJudgeService({
|
|
181
|
+
enabled: process.env.BRAINROUTER_RELEVANCE_JUDGE_ENABLED === "true",
|
|
182
|
+
endpoint: process.env.BRAINROUTER_RELEVANCE_JUDGE_ENDPOINT
|
|
183
|
+
?? process.env.BRAINROUTER_LLM_ENDPOINT,
|
|
184
|
+
apiKey: process.env.BRAINROUTER_RELEVANCE_JUDGE_API_KEY
|
|
185
|
+
?? process.env.BRAINROUTER_LLM_API_KEY,
|
|
186
|
+
model: process.env.BRAINROUTER_RELEVANCE_JUDGE_MODEL
|
|
187
|
+
?? process.env.BRAINROUTER_LLM_MODEL,
|
|
188
|
+
maxCandidates: process.env.BRAINROUTER_RELEVANCE_JUDGE_MAX_CANDIDATES
|
|
189
|
+
? parseInt(process.env.BRAINROUTER_RELEVANCE_JUDGE_MAX_CANDIDATES, 10)
|
|
190
|
+
: undefined,
|
|
191
|
+
timeoutMs: process.env.BRAINROUTER_RELEVANCE_JUDGE_TIMEOUT_MS
|
|
192
|
+
? parseInt(process.env.BRAINROUTER_RELEVANCE_JUDGE_TIMEOUT_MS, 10)
|
|
193
|
+
: undefined,
|
|
194
|
+
});
|
|
175
195
|
this.store.initVec(embeddingService.getDimensions());
|
|
176
196
|
if (embeddingService.isReady()) {
|
|
177
197
|
void this.store.reembedStaleRecords((text) => embeddingService.embed(text)).then((count) => {
|
|
@@ -183,7 +203,7 @@ export class MemoryEngine {
|
|
|
183
203
|
});
|
|
184
204
|
}
|
|
185
205
|
this.capturePipeline = new MemoryCapturePipeline(this.store, this.extractionRunner, embeddingService, 1);
|
|
186
|
-
this.recallPipeline = new MemoryRecallPipeline(this.store, embeddingService, rerankerService);
|
|
206
|
+
this.recallPipeline = new MemoryRecallPipeline(this.store, embeddingService, rerankerService, relevanceJudge);
|
|
187
207
|
this.startExtractionSweeper();
|
|
188
208
|
}
|
|
189
209
|
async ensureSeedAdminUser() {
|
|
@@ -126,7 +126,7 @@ function parseExtractionResult(raw) {
|
|
|
126
126
|
const match = cleaned.match(/\[[\s\S]*\]/);
|
|
127
127
|
if (!match)
|
|
128
128
|
return [];
|
|
129
|
-
const parsed =
|
|
129
|
+
const parsed = parseJsonWithEscapeRepair(match[0]);
|
|
130
130
|
if (!Array.isArray(parsed))
|
|
131
131
|
return [];
|
|
132
132
|
const scenes = [];
|
|
@@ -159,6 +159,24 @@ function parseExtractionResult(raw) {
|
|
|
159
159
|
return [];
|
|
160
160
|
}
|
|
161
161
|
}
|
|
162
|
+
// LLMs frequently emit JSON where string values contain backslashes that
|
|
163
|
+
// aren't valid JSON escapes — Windows paths (\users), regex literals,
|
|
164
|
+
// LaTeX (\section), or shell snippets. JSON.parse rejects the entire
|
|
165
|
+
// payload on the first bad escape, so we'd drop an otherwise-good batch
|
|
166
|
+
// of memories over one stray backslash. Once the first parse has failed,
|
|
167
|
+
// preserve ambiguous backslashes literally; otherwise valid JSON escapes
|
|
168
|
+
// like \b, \f, \n, \r, \t, or \uXXXX can silently corrupt paths.
|
|
169
|
+
function parseJsonWithEscapeRepair(raw) {
|
|
170
|
+
try {
|
|
171
|
+
return JSON.parse(raw);
|
|
172
|
+
}
|
|
173
|
+
catch (err) {
|
|
174
|
+
if (!(err instanceof SyntaxError))
|
|
175
|
+
throw err;
|
|
176
|
+
const repaired = raw.replace(/\\(?!["\\\/])/g, "\\\\");
|
|
177
|
+
return JSON.parse(repaired);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
162
180
|
function parseMemoryType(value) {
|
|
163
181
|
const candidate = String(value || "");
|
|
164
182
|
return ALLOWED_MEMORY_TYPES.has(candidate) ? candidate : "episodic";
|
package/dist/memory/recall.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ import type { IMemoryStore } from "@kinqs/brainrouter-types";
|
|
|
2
2
|
import type { RecallResult } from "@kinqs/brainrouter-types";
|
|
3
3
|
import type { EmbeddingService } from "./store/embedding.js";
|
|
4
4
|
import type { RerankerService } from "./store/reranker.js";
|
|
5
|
+
import type { RelevanceJudgeService } from "./store/relevance-judge.js";
|
|
5
6
|
/**
|
|
6
7
|
* Optional filters applied to the candidate pool after RRF but before
|
|
7
8
|
* neural-spark propagation and reranking. Filters never *add* records — they
|
|
@@ -27,7 +28,8 @@ export declare class MemoryRecallPipeline {
|
|
|
27
28
|
private store;
|
|
28
29
|
private embeddingService;
|
|
29
30
|
private rerankerService;
|
|
30
|
-
|
|
31
|
+
private relevanceJudge?;
|
|
32
|
+
constructor(store: IMemoryStore, embeddingService: EmbeddingService, rerankerService: RerankerService, relevanceJudge?: RelevanceJudgeService | undefined);
|
|
31
33
|
recall(params: {
|
|
32
34
|
userId: string;
|
|
33
35
|
sessionKey: string;
|
package/dist/memory/recall.js
CHANGED
|
@@ -51,10 +51,12 @@ export class MemoryRecallPipeline {
|
|
|
51
51
|
store;
|
|
52
52
|
embeddingService;
|
|
53
53
|
rerankerService;
|
|
54
|
-
|
|
54
|
+
relevanceJudge;
|
|
55
|
+
constructor(store, embeddingService, rerankerService, relevanceJudge) {
|
|
55
56
|
this.store = store;
|
|
56
57
|
this.embeddingService = embeddingService;
|
|
57
58
|
this.rerankerService = rerankerService;
|
|
59
|
+
this.relevanceJudge = relevanceJudge;
|
|
58
60
|
}
|
|
59
61
|
async recall(params) {
|
|
60
62
|
const startTime = Date.now();
|
|
@@ -270,6 +272,35 @@ export class MemoryRecallPipeline {
|
|
|
270
272
|
console.error("[BrainRouter] Reranker failed during recall, falling back to RRF:", e.message);
|
|
271
273
|
}
|
|
272
274
|
}
|
|
275
|
+
// Stage 4 — LLM Relevance Judge (semantic approve/reject gate)
|
|
276
|
+
//
|
|
277
|
+
// The reranker orders candidates by a learned relevance score but never
|
|
278
|
+
// *filters* — so a memory that shares vocabulary with the query but is
|
|
279
|
+
// about a different subject still makes the cut. The judge fixes that by
|
|
280
|
+
// asking a fast LLM "is each of these actually relevant?" and dropping
|
|
281
|
+
// the rejects. On any failure we keep the reranker output unchanged so a
|
|
282
|
+
// flaky judge call never breaks recall.
|
|
283
|
+
let judgeUsed = false;
|
|
284
|
+
let judgeApproved = 0;
|
|
285
|
+
let judgeRejected = 0;
|
|
286
|
+
let judgeVerdicts;
|
|
287
|
+
if (this.relevanceJudge?.isReady() && topResults.length > 0) {
|
|
288
|
+
try {
|
|
289
|
+
const judgeCandidates = topResults.map(r => ({
|
|
290
|
+
id: r.record.record_id,
|
|
291
|
+
content: r.record.content,
|
|
292
|
+
}));
|
|
293
|
+
const judgeResult = await this.relevanceJudge.judge({ query, candidates: judgeCandidates });
|
|
294
|
+
judgeUsed = true;
|
|
295
|
+
judgeVerdicts = judgeResult.verdicts;
|
|
296
|
+
judgeApproved = judgeResult.approvedIndices.length;
|
|
297
|
+
judgeRejected = topResults.length - judgeApproved;
|
|
298
|
+
topResults = judgeResult.approvedIndices.map((i) => topResults[i]);
|
|
299
|
+
}
|
|
300
|
+
catch (e) {
|
|
301
|
+
console.error("[BrainRouter] Relevance judge failed during recall, keeping reranker output:", e.message);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
273
304
|
// 5. Format for context
|
|
274
305
|
const memoryLines = topResults.map(({ record }) => {
|
|
275
306
|
const tag = record.scene_name ? `${record.type}|${record.scene_name}` : record.type;
|
|
@@ -279,7 +310,13 @@ export class MemoryRecallPipeline {
|
|
|
279
310
|
}
|
|
280
311
|
return line;
|
|
281
312
|
});
|
|
282
|
-
|
|
313
|
+
// If the judge rejected everything, skip the prepend block entirely —
|
|
314
|
+
// an empty <relevant-memories> tag is worse than no tag because it
|
|
315
|
+
// implies "we looked and nothing helped," which the agent should infer
|
|
316
|
+
// from the absence of the block.
|
|
317
|
+
const prependContext = memoryLines.length > 0
|
|
318
|
+
? `<relevant-memories>\n The following memories are relevant to this query. Reference only if helpful:\n\n ${memoryLines.join("\n ")}\n</relevant-memories>`
|
|
319
|
+
: undefined;
|
|
283
320
|
// Build appendSystemContext with Contextual Focus Navigation + tools guide
|
|
284
321
|
const topScenes = this.store.getTopContextualFocus(userId, 3);
|
|
285
322
|
let appendSystemContext = "";
|
|
@@ -329,9 +366,10 @@ export class MemoryRecallPipeline {
|
|
|
329
366
|
recordId: r.record.record_id,
|
|
330
367
|
skillTag: r.record.skill_tag
|
|
331
368
|
}));
|
|
332
|
-
const
|
|
369
|
+
const baseStrategy = vecResults.length > 0
|
|
333
370
|
? (usedReranker ? "hybrid+rerank" : "hybrid")
|
|
334
371
|
: (usedReranker ? "keyword+rerank" : (filePathResults.length > 0 ? "keyword+file" : "keyword"));
|
|
372
|
+
const recallStrategy = judgeUsed ? `${baseStrategy}+judge` : baseStrategy;
|
|
335
373
|
const durationMs = Date.now() - startTime;
|
|
336
374
|
const recallExplanation = {
|
|
337
375
|
ftsHits: ftsResults.length,
|
|
@@ -342,6 +380,10 @@ export class MemoryRecallPipeline {
|
|
|
342
380
|
typeBoosts,
|
|
343
381
|
skillBoostApplied,
|
|
344
382
|
rerankerUsed: usedReranker,
|
|
383
|
+
judgeUsed,
|
|
384
|
+
judgeApproved,
|
|
385
|
+
judgeRejected,
|
|
386
|
+
judgeVerdicts,
|
|
345
387
|
graphExpansion: hasGraphExpansion,
|
|
346
388
|
citationBoosts,
|
|
347
389
|
durationMs,
|
|
@@ -388,6 +430,9 @@ export class MemoryRecallPipeline {
|
|
|
388
430
|
vecHits: explanation?.vecHits ?? 0,
|
|
389
431
|
intentDetected: explanation?.intentDetected ?? "none",
|
|
390
432
|
rerankerUsed: explanation?.rerankerUsed ?? false,
|
|
433
|
+
judgeUsed: explanation?.judgeUsed ?? false,
|
|
434
|
+
judgeApproved: explanation?.judgeApproved ?? 0,
|
|
435
|
+
judgeRejected: explanation?.judgeRejected ?? 0,
|
|
391
436
|
},
|
|
392
437
|
});
|
|
393
438
|
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import type { RelevanceJudgeServiceConfig, RelevanceVerdict } from "@kinqs/brainrouter-types";
|
|
2
|
+
export interface JudgeCandidate {
|
|
3
|
+
/** Stable id used for logging — typically the memory's record_id. */
|
|
4
|
+
id: string;
|
|
5
|
+
/** Memory content the judge will read. */
|
|
6
|
+
content: string;
|
|
7
|
+
}
|
|
8
|
+
export interface JudgeResult {
|
|
9
|
+
/** Verdicts in the order returned by the judge. */
|
|
10
|
+
verdicts: RelevanceVerdict[];
|
|
11
|
+
/** Indices the judge approved as relevant. */
|
|
12
|
+
approvedIndices: number[];
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* LLM-as-judge stage that approves or rejects retrieved memories based on
|
|
16
|
+
* actual semantic relevance to the user query — sits between the reranker and
|
|
17
|
+
* context formatting, dropping candidates that share keywords but aren't
|
|
18
|
+
* genuinely about the query subject.
|
|
19
|
+
*
|
|
20
|
+
* Failure mode is "skip the gate": if the call errors out, callers fall back
|
|
21
|
+
* to the unfiltered reranker output. We never want a flaky judge call to
|
|
22
|
+
* crash a recall.
|
|
23
|
+
*/
|
|
24
|
+
export declare class RelevanceJudgeService {
|
|
25
|
+
private readonly enabled;
|
|
26
|
+
private readonly endpoint;
|
|
27
|
+
private readonly apiKey;
|
|
28
|
+
private readonly model;
|
|
29
|
+
private readonly maxCandidates;
|
|
30
|
+
private readonly timeoutMs;
|
|
31
|
+
private readonly ready;
|
|
32
|
+
constructor(config: RelevanceJudgeServiceConfig);
|
|
33
|
+
isReady(): boolean;
|
|
34
|
+
getMaxCandidates(): number;
|
|
35
|
+
/**
|
|
36
|
+
* Grade a batch of candidates against the query. Returns verdicts and the
|
|
37
|
+
* subset of indices approved as relevant. Throws on transport/parsing
|
|
38
|
+
* failure — callers are expected to fall back to pre-judge results.
|
|
39
|
+
*/
|
|
40
|
+
judge(params: {
|
|
41
|
+
query: string;
|
|
42
|
+
candidates: JudgeCandidate[];
|
|
43
|
+
}): Promise<JudgeResult>;
|
|
44
|
+
/**
|
|
45
|
+
* Defensive JSON parse — strips code fences, picks the first valid JSON
|
|
46
|
+
* object/array, and tolerates either {"verdicts":[…]} or a bare array.
|
|
47
|
+
* Returns one verdict per candidate; missing entries default to "rejected"
|
|
48
|
+
* so a malformed response can't silently approve everything.
|
|
49
|
+
*/
|
|
50
|
+
private parseVerdicts;
|
|
51
|
+
}
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import { fetchWithExternalRetry } from "../retry.js";
|
|
2
|
+
import { acquireLLMSlot } from "../llm-semaphore.js";
|
|
3
|
+
/**
|
|
4
|
+
* LLM-as-judge stage that approves or rejects retrieved memories based on
|
|
5
|
+
* actual semantic relevance to the user query — sits between the reranker and
|
|
6
|
+
* context formatting, dropping candidates that share keywords but aren't
|
|
7
|
+
* genuinely about the query subject.
|
|
8
|
+
*
|
|
9
|
+
* Failure mode is "skip the gate": if the call errors out, callers fall back
|
|
10
|
+
* to the unfiltered reranker output. We never want a flaky judge call to
|
|
11
|
+
* crash a recall.
|
|
12
|
+
*/
|
|
13
|
+
export class RelevanceJudgeService {
|
|
14
|
+
enabled;
|
|
15
|
+
endpoint;
|
|
16
|
+
apiKey;
|
|
17
|
+
model;
|
|
18
|
+
maxCandidates;
|
|
19
|
+
timeoutMs;
|
|
20
|
+
ready;
|
|
21
|
+
constructor(config) {
|
|
22
|
+
this.enabled = config.enabled ?? false;
|
|
23
|
+
this.endpoint = config.endpoint ?? "https://api.openai.com/v1/chat/completions";
|
|
24
|
+
this.apiKey = config.apiKey ?? "";
|
|
25
|
+
this.model = config.model ?? "gpt-4o-mini";
|
|
26
|
+
this.maxCandidates = Math.max(1, config.maxCandidates ?? 10);
|
|
27
|
+
this.timeoutMs = Math.max(1000, config.timeoutMs ?? 15_000);
|
|
28
|
+
this.ready = this.enabled && !!this.apiKey;
|
|
29
|
+
if (this.enabled && !this.apiKey) {
|
|
30
|
+
console.error("[BrainRouter] Relevance judge enabled but no API key set. Stage 4 judging will be skipped.");
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
isReady() {
|
|
34
|
+
return this.ready;
|
|
35
|
+
}
|
|
36
|
+
getMaxCandidates() {
|
|
37
|
+
return this.maxCandidates;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Grade a batch of candidates against the query. Returns verdicts and the
|
|
41
|
+
* subset of indices approved as relevant. Throws on transport/parsing
|
|
42
|
+
* failure — callers are expected to fall back to pre-judge results.
|
|
43
|
+
*/
|
|
44
|
+
async judge(params) {
|
|
45
|
+
if (!this.ready) {
|
|
46
|
+
throw new Error("RelevanceJudgeService is not ready (disabled or missing API key)");
|
|
47
|
+
}
|
|
48
|
+
if (params.candidates.length === 0) {
|
|
49
|
+
return { verdicts: [], approvedIndices: [] };
|
|
50
|
+
}
|
|
51
|
+
const candidates = params.candidates.slice(0, this.maxCandidates);
|
|
52
|
+
const safeQuery = params.query.length > 800 ? params.query.slice(0, 800) + "…" : params.query;
|
|
53
|
+
const candidateBlock = candidates
|
|
54
|
+
.map((c, i) => {
|
|
55
|
+
const text = c.content.length > 600 ? c.content.slice(0, 600) + "…" : c.content;
|
|
56
|
+
return `[${i}] ${text.replace(/\s+/g, " ").trim()}`;
|
|
57
|
+
})
|
|
58
|
+
.join("\n");
|
|
59
|
+
const systemPrompt = [
|
|
60
|
+
"You are a strict relevance judge for a memory retrieval system.",
|
|
61
|
+
"For each candidate memory, decide whether it is actually relevant to the user's query.",
|
|
62
|
+
"A memory is RELEVANT only if it provides information that directly helps answer, contextualize, or inform the query.",
|
|
63
|
+
"It is NOT relevant if it merely shares keywords, is about a different subject, or is generic background.",
|
|
64
|
+
"When in doubt, reject — false positives pollute the agent's context window.",
|
|
65
|
+
"Respond with strict JSON only, no prose.",
|
|
66
|
+
].join(" ");
|
|
67
|
+
const userPrompt = [
|
|
68
|
+
`Query: ${safeQuery}`,
|
|
69
|
+
"",
|
|
70
|
+
"Candidates:",
|
|
71
|
+
candidateBlock,
|
|
72
|
+
"",
|
|
73
|
+
"Respond with exactly this JSON shape:",
|
|
74
|
+
`{"verdicts":[{"index":0,"relevant":true,"reason":"…"}, …]}`,
|
|
75
|
+
"Include one verdict per candidate. Keep each reason under 120 chars.",
|
|
76
|
+
].join("\n");
|
|
77
|
+
const doFetch = () => fetchWithExternalRetry(this.endpoint, {
|
|
78
|
+
method: "POST",
|
|
79
|
+
headers: {
|
|
80
|
+
"Content-Type": "application/json",
|
|
81
|
+
"Authorization": `Bearer ${this.apiKey}`,
|
|
82
|
+
},
|
|
83
|
+
// Deliberately omitting `response_format` — OpenAI accepts
|
|
84
|
+
// `{type:"json_object"}`, but LM Studio / llama.cpp-style backends
|
|
85
|
+
// reject anything except `json_schema` or `text` with a 400, and
|
|
86
|
+
// Ollama / vLLM each have their own quirks. The system prompt is
|
|
87
|
+
// explicit about strict-JSON output and the parser below strips
|
|
88
|
+
// code fences + tolerates surrounding prose, so dropping the hint
|
|
89
|
+
// is cheaper than per-provider branching.
|
|
90
|
+
body: JSON.stringify({
|
|
91
|
+
model: this.model,
|
|
92
|
+
messages: [
|
|
93
|
+
{ role: "system", content: systemPrompt },
|
|
94
|
+
{ role: "user", content: userPrompt },
|
|
95
|
+
],
|
|
96
|
+
temperature: 0,
|
|
97
|
+
}),
|
|
98
|
+
signal: AbortSignal.timeout(this.timeoutMs),
|
|
99
|
+
}, {
|
|
100
|
+
label: "Relevance Judge API",
|
|
101
|
+
});
|
|
102
|
+
const release = await acquireLLMSlot();
|
|
103
|
+
let raw;
|
|
104
|
+
try {
|
|
105
|
+
let res = await doFetch();
|
|
106
|
+
// LM Studio quirk: idle models auto-unload and the first call after
|
|
107
|
+
// unload returns 400 with "Model is unloaded" / "No models loaded".
|
|
108
|
+
// The backend then loads the model in the background, so a retry
|
|
109
|
+
// ~1.5s later usually succeeds. Mirrors ModelLLMRunner in engine.ts.
|
|
110
|
+
if (res.status === 400) {
|
|
111
|
+
const errorBody = await res.text();
|
|
112
|
+
if (/model\s+(is\s+)?unloaded|model\s+not\s+loaded|no\s+models?\s+loaded/i.test(errorBody)) {
|
|
113
|
+
await new Promise((resolve) => setTimeout(resolve, 1500));
|
|
114
|
+
res = await doFetch();
|
|
115
|
+
if (!res.ok) {
|
|
116
|
+
const retryBody = await res.text().catch(() => "(no body)");
|
|
117
|
+
throw new Error(`Relevance Judge API failed after LM Studio reload retry: HTTP ${res.status} ${res.statusText} - ${retryBody}`);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
throw new Error(`Relevance Judge API failed: HTTP ${res.status} ${res.statusText} - ${errorBody}`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
else if (!res.ok) {
|
|
125
|
+
const err = await res.text().catch(() => "(no body)");
|
|
126
|
+
throw new Error(`Relevance Judge API failed: HTTP ${res.status} ${res.statusText} - ${err}`);
|
|
127
|
+
}
|
|
128
|
+
const data = await res.json();
|
|
129
|
+
if (data?.error) {
|
|
130
|
+
const errMsg = typeof data.error === "string" ? data.error : (data.error.message ?? JSON.stringify(data.error).slice(0, 400));
|
|
131
|
+
throw new Error(`Relevance Judge endpoint returned an error envelope: ${errMsg}`);
|
|
132
|
+
}
|
|
133
|
+
const choice = data?.choices?.[0];
|
|
134
|
+
const content = choice?.message?.content ?? choice?.delta?.content;
|
|
135
|
+
if (typeof content !== "string") {
|
|
136
|
+
throw new Error(`Relevance Judge returned no usable content. Response: ${JSON.stringify(data).slice(0, 400)}`);
|
|
137
|
+
}
|
|
138
|
+
raw = content;
|
|
139
|
+
}
|
|
140
|
+
finally {
|
|
141
|
+
release();
|
|
142
|
+
}
|
|
143
|
+
const parsed = this.parseVerdicts(raw, candidates.length);
|
|
144
|
+
const approvedIndices = [];
|
|
145
|
+
for (const v of parsed) {
|
|
146
|
+
if (v.relevant && v.index >= 0 && v.index < candidates.length) {
|
|
147
|
+
approvedIndices.push(v.index);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return { verdicts: parsed, approvedIndices };
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Defensive JSON parse — strips code fences, picks the first valid JSON
|
|
154
|
+
* object/array, and tolerates either {"verdicts":[…]} or a bare array.
|
|
155
|
+
* Returns one verdict per candidate; missing entries default to "rejected"
|
|
156
|
+
* so a malformed response can't silently approve everything.
|
|
157
|
+
*/
|
|
158
|
+
parseVerdicts(raw, candidateCount) {
|
|
159
|
+
let text = raw.trim();
|
|
160
|
+
text = text.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim();
|
|
161
|
+
let parsed;
|
|
162
|
+
try {
|
|
163
|
+
parsed = JSON.parse(text);
|
|
164
|
+
}
|
|
165
|
+
catch {
|
|
166
|
+
const objMatch = text.match(/\{[\s\S]*\}/);
|
|
167
|
+
const arrMatch = text.match(/\[[\s\S]*\]/);
|
|
168
|
+
const candidate = objMatch?.[0] ?? arrMatch?.[0];
|
|
169
|
+
if (!candidate) {
|
|
170
|
+
throw new Error(`Relevance Judge produced non-JSON output: ${text.slice(0, 200)}`);
|
|
171
|
+
}
|
|
172
|
+
parsed = JSON.parse(candidate);
|
|
173
|
+
}
|
|
174
|
+
const list = Array.isArray(parsed)
|
|
175
|
+
? parsed
|
|
176
|
+
: Array.isArray(parsed?.verdicts) ? parsed.verdicts : [];
|
|
177
|
+
const byIndex = new Map();
|
|
178
|
+
for (const item of list) {
|
|
179
|
+
if (!item || typeof item !== "object")
|
|
180
|
+
continue;
|
|
181
|
+
const index = Number(item.index);
|
|
182
|
+
if (!Number.isFinite(index))
|
|
183
|
+
continue;
|
|
184
|
+
byIndex.set(index, {
|
|
185
|
+
index,
|
|
186
|
+
relevant: Boolean(item.relevant),
|
|
187
|
+
reason: typeof item.reason === "string" ? item.reason.slice(0, 200) : "",
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
const out = [];
|
|
191
|
+
for (let i = 0; i < candidateCount; i++) {
|
|
192
|
+
out.push(byIndex.get(i) ?? { index: i, relevant: false, reason: "no verdict returned" });
|
|
193
|
+
}
|
|
194
|
+
return out;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
@@ -24,6 +24,17 @@ export function buildAnnotatedCanvas(steps, activeNodeId) {
|
|
|
24
24
|
for (let index = 1; index < steps.length; index += 1) {
|
|
25
25
|
lines.push(` ${steps[index - 1].nodeId} --> ${steps[index].nodeId}`);
|
|
26
26
|
}
|
|
27
|
+
// Reasoning steps ("Why: …" decisions emitted via memory_working_offload
|
|
28
|
+
// with kind:"reasoning") get a dashed border so the audit trail is
|
|
29
|
+
// visually separable from tool_output and compressed_summary nodes when
|
|
30
|
+
// a human (or the dashboard) inspects canvas.mmd. Emitted before the
|
|
31
|
+
// active-node fill so the active highlight overrides the dashed style
|
|
32
|
+
// when the same node happens to be both.
|
|
33
|
+
for (const step of steps) {
|
|
34
|
+
if (step.kind === "reasoning") {
|
|
35
|
+
lines.push(` style ${step.nodeId} stroke-dasharray:4 4,stroke:#9f7aea,stroke-width:2px`);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
27
38
|
if (activeNodeId && steps.some((step) => step.nodeId === activeNodeId)) {
|
|
28
39
|
lines.push(` style ${activeNodeId} fill:#2b6cb0,stroke:#3182ce,stroke-width:2px,color:#fff`);
|
|
29
40
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kinqs/brainrouter-mcp-server",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.6",
|
|
4
4
|
"description": "BrainRouter MCP server — the cognitive memory engine. Exposes recall, capture, focus scenes, persona, contradictions, skills, and graph queries as MCP tools for any MCP-speaking agent.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
"gray-matter": "^4.0.3",
|
|
46
46
|
"sqlite-vec": "^0.1.9",
|
|
47
47
|
"zod": "^3.22.4",
|
|
48
|
-
"@kinqs/brainrouter-types": "^0.3.
|
|
48
|
+
"@kinqs/brainrouter-types": "^0.3.6"
|
|
49
49
|
},
|
|
50
50
|
"engines": {
|
|
51
51
|
"node": ">=22.0.0"
|
package/dist/memory/config.d.ts
DELETED
package/dist/memory/config.js
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
import type { IMemoryStore } from "@brainrouter/types";
|
|
2
|
-
import type { LLMRunner, L1Record } from "@brainrouter/types";
|
|
3
|
-
export declare function detectContradictions(params: {
|
|
4
|
-
newRecord: L1Record;
|
|
5
|
-
store: IMemoryStore;
|
|
6
|
-
llmRunner: LLMRunner;
|
|
7
|
-
}): Promise<void>;
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
import { L1_CONTRADICTION_PROMPT } from "../prompts/l1-contradiction.js";
|
|
2
|
-
import crypto from "node:crypto";
|
|
3
|
-
export async function detectContradictions(params) {
|
|
4
|
-
const { newRecord, store, llmRunner } = params;
|
|
5
|
-
// 1. Search for potentially related memories
|
|
6
|
-
// We use keyword search on the content of the new record to find similar existing ones
|
|
7
|
-
const candidates = store.searchL1Fts(newRecord.userId, newRecord.content, 5);
|
|
8
|
-
const evaluations = [];
|
|
9
|
-
const _parsedContradictionTimeout = parseInt(process.env.BRAINROUTER_CONTRADICTION_TIMEOUT_MS || "", 10);
|
|
10
|
-
const contradictionTimeoutMs = isNaN(_parsedContradictionTimeout) ? 60000 : _parsedContradictionTimeout;
|
|
11
|
-
for (const candidate of candidates) {
|
|
12
|
-
// Don't compare with self
|
|
13
|
-
if (candidate.record_id === newRecord.id)
|
|
14
|
-
continue;
|
|
15
|
-
// Only compare if they are of the same type or both are episodic/persona
|
|
16
|
-
// (instructions don't usually contradict episodic facts)
|
|
17
|
-
const prompt = L1_CONTRADICTION_PROMPT
|
|
18
|
-
.replace("{{newContent}}", newRecord.content)
|
|
19
|
-
.replace("{{existingContent}}", candidate.content);
|
|
20
|
-
try {
|
|
21
|
-
const response = await llmRunner.run({
|
|
22
|
-
prompt,
|
|
23
|
-
taskId: `contradiction-check-${newRecord.id}-${candidate.record_id}`,
|
|
24
|
-
timeoutMs: contradictionTimeoutMs
|
|
25
|
-
});
|
|
26
|
-
// Simple JSON extraction (flexible for local models)
|
|
27
|
-
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
28
|
-
if (!jsonMatch)
|
|
29
|
-
continue;
|
|
30
|
-
const data = JSON.parse(jsonMatch[0]);
|
|
31
|
-
if (data.isContradiction && data.confidence > 0.7) {
|
|
32
|
-
evaluations.push({
|
|
33
|
-
candidate,
|
|
34
|
-
isContradiction: true,
|
|
35
|
-
confidence: data.confidence,
|
|
36
|
-
kind: data.kind || "genuine_conflict",
|
|
37
|
-
reason: data.reason
|
|
38
|
-
});
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
catch (e) {
|
|
42
|
-
console.error(`[BrainRouter] Contradiction check failed for ${newRecord.id} vs ${candidate.record_id}:`, e.message);
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
// If ANY evaluation is a temporal_update, then the entire batch of contradictions represents a temporal transition!
|
|
46
|
-
const hasTemporalUpdate = evaluations.some(ev => ev.kind === "temporal_update");
|
|
47
|
-
for (const ev of evaluations) {
|
|
48
|
-
if (hasTemporalUpdate) {
|
|
49
|
-
// Treat all conflicting old records as superseded by the new record
|
|
50
|
-
console.error(`[BrainRouter] TEMPORAL UPDATE DETECTED (transition): Superseding memory ${ev.candidate.record_id} with new memory ${newRecord.id}`);
|
|
51
|
-
store.invalidateL1Record(newRecord.userId, ev.candidate.record_id, newRecord.id);
|
|
52
|
-
}
|
|
53
|
-
else {
|
|
54
|
-
// Genuine conflict
|
|
55
|
-
console.error(`[BrainRouter] CONTRADICTION DETECTED: ${newRecord.id} vs ${ev.candidate.record_id}`);
|
|
56
|
-
store.upsertContradiction({
|
|
57
|
-
id: `conflict_${crypto.randomBytes(4).toString("hex")}`,
|
|
58
|
-
userId: newRecord.userId,
|
|
59
|
-
recordIdA: ev.candidate.record_id,
|
|
60
|
-
recordIdB: newRecord.id,
|
|
61
|
-
reason: ev.reason,
|
|
62
|
-
confidence: ev.confidence
|
|
63
|
-
});
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
}
|