@kinqs/brainrouter-mcp-server 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/.env.example +121 -71
  2. package/dist/__tests__/cognitive-extractor.test.js +112 -0
  3. package/dist/__tests__/crypto.test.js +8 -1
  4. package/dist/__tests__/working-memory.test.js +67 -0
  5. package/dist/index.js +0 -0
  6. package/dist/memory/engine.js +21 -1
  7. package/dist/memory/pipeline/cognitive-extractor.js +19 -1
  8. package/dist/memory/recall.d.ts +3 -1
  9. package/dist/memory/recall.js +48 -3
  10. package/dist/memory/store/relevance-judge.d.ts +51 -0
  11. package/dist/memory/store/relevance-judge.js +196 -0
  12. package/dist/memory/working/canvas.js +11 -0
  13. package/package.json +2 -2
  14. package/dist/memory/config.d.ts +0 -2
  15. package/dist/memory/config.js +0 -3
  16. package/dist/memory/pipeline/l1-contradiction.d.ts +0 -7
  17. package/dist/memory/pipeline/l1-contradiction.js +0 -66
  18. package/dist/memory/pipeline/l1-dedup.d.ts +0 -23
  19. package/dist/memory/pipeline/l1-dedup.js +0 -39
  20. package/dist/memory/pipeline/l1-extractor.d.ts +0 -21
  21. package/dist/memory/pipeline/l1-extractor.js +0 -180
  22. package/dist/memory/pipeline/l2-direction-shift.d.ts +0 -10
  23. package/dist/memory/pipeline/l2-direction-shift.js +0 -27
  24. package/dist/memory/pipeline/l2-scene.d.ts +0 -15
  25. package/dist/memory/pipeline/l2-scene.js +0 -140
  26. package/dist/memory/pipeline/l3-distiller.d.ts +0 -15
  27. package/dist/memory/pipeline/l3-distiller.js +0 -40
  28. package/dist/memory/pipeline/task-queue.d.ts +0 -54
  29. package/dist/memory/pipeline/task-queue.js +0 -117
  30. package/dist/memory/prompts/graph-extraction-batch.d.ts +0 -14
  31. package/dist/memory/prompts/graph-extraction-batch.js +0 -54
  32. package/dist/memory/prompts/l1-contradiction-batch.d.ts +0 -16
  33. package/dist/memory/prompts/l1-contradiction-batch.js +0 -47
  34. package/dist/memory/prompts/l1-contradiction.d.ts +0 -1
  35. package/dist/memory/prompts/l1-contradiction.js +0 -25
  36. package/dist/memory/prompts/l1-extraction.d.ts +0 -10
  37. package/dist/memory/prompts/l1-extraction.js +0 -114
  38. package/dist/memory/prompts/l2-direction-shift.d.ts +0 -5
  39. package/dist/memory/prompts/l2-direction-shift.js +0 -32
  40. package/dist/memory/prompts/l2-scene-cluster.d.ts +0 -2
  41. package/dist/memory/prompts/l2-scene-cluster.js +0 -33
  42. package/dist/memory/prompts/l2-scene.d.ts +0 -7
  43. package/dist/memory/prompts/l2-scene.js +0 -40
  44. package/dist/memory/prompts/l3-persona.d.ts +0 -6
  45. package/dist/memory/prompts/l3-persona.js +0 -60
  46. package/dist/memory/store/types.d.ts +0 -101
  47. package/dist/memory/store/types.js +0 -1
  48. package/dist/memory/types.d.ts +0 -207
  49. package/dist/memory/types.js +0 -7
  50. package/dist/memory/validation.d.ts +0 -441
  51. package/dist/memory/validation.js +0 -129
  52. package/dist/tools/agent_memory_tools.d.ts +0 -485
  53. package/dist/tools/agent_memory_tools.js +0 -793
  54. package/dist/tools/get_doc.d.ts +0 -21
  55. package/dist/tools/get_doc.js +0 -24
  56. package/dist/tools/list_docs.d.ts +0 -15
  57. package/dist/tools/list_docs.js +0 -16
  58. package/dist/tools/update_doc.d.ts +0 -24
  59. package/dist/tools/update_doc.js +0 -35
  60. /package/dist/__tests__/{agent_mode.test.d.ts → cognitive-extractor.test.d.ts} +0 -0
package/.env.example CHANGED
@@ -1,26 +1,37 @@
1
- # BrainRouter MCP server — environment
1
+ # BrainRouter MCP server — environment template
2
2
  #
3
- # Copy to brainrouter/.env. Loaded automatically by `dotenv/config` when the
4
- # MCP server starts (the CLI sets the spawned child's cwd to this folder so
5
- # stdio-launched MCPs also pick it up).
3
+ # Copy to `brainrouter/.env`. Loaded automatically by `dotenv/config` when
4
+ # the MCP server starts. The CLI sets the spawned child's cwd to this
5
+ # folder so stdio-launched MCPs also pick it up.
6
6
  #
7
7
  # This file is for MCP-SERVER concerns only:
8
- # - cognitive extraction / synthesis LLM
9
- # - embedding provider
10
- # - reranker provider
11
- # - memory engine knobs (decay, sweeper, focus, identity)
12
- # - server auth (JWT, admin seed, CORS)
8
+ # 1. LLM credentials & endpoint (shared by every LLM-driven step)
9
+ # 2. Retrieval pipeline stages (embeddings, reranker, judge)
10
+ # 3. Memory engine knobs (storage, decay, distillation, sweeper)
11
+ # 4. Skill pre-warming
12
+ # 5. Server auth (JWT, admin seed, CORS, HTTP MCP key)
13
13
  #
14
14
  # CLI agent knobs (sandbox, tool loop limits, web search, etc.) live in
15
- # brainrouter-cli/.env.example. Keep them separate so the two processes
15
+ # `brainrouter-cli/.env.example`. Keep them separate so the two processes
16
16
  # can be configured independently.
17
-
18
- # ==========================================
19
- # LLM (cognitive extraction + synthesis)
20
- # ==========================================
21
- # Used by L1 extraction, contradiction checks, graph extraction, L2 scenes,
22
- # L3 persona synthesis. Falls back to OPENAI_API_KEY.
23
- BRAINROUTER_LLM_API_KEY=your_api_key_here
17
+ #
18
+ # All values in this template are blank placeholders. Fill in only what
19
+ # you actually need most settings have sensible defaults.
20
+
21
+
22
+ # =============================================================================
23
+ # 1. LLM (cognitive extraction + synthesis + judging)
24
+ # =============================================================================
25
+ # Shared credential and endpoint for every LLM-driven step on the MCP side:
26
+ # - cognitive extraction (turn raw conversation into structured memories)
27
+ # - contradiction checks (detect when a new memory conflicts with an old one)
28
+ # - graph extraction (pull entities + relations into the knowledge graph)
29
+ # - focus-scene summaries (group related memories under a scene heading)
30
+ # - persona synthesis (cross-session identity / "who is this user")
31
+ # - relevance judging (Stage 3 of retrieval — see section 2 below)
32
+ #
33
+ # Falls back to OPENAI_API_KEY when BRAINROUTER_LLM_API_KEY is unset.
34
+ BRAINROUTER_LLM_API_KEY=
24
35
 
25
36
  # OpenAI-compatible chat-completions endpoint.
26
37
  # Examples:
@@ -29,116 +40,155 @@ BRAINROUTER_LLM_API_KEY=your_api_key_here
29
40
  # LM Studio: http://localhost:1234/v1/chat/completions
30
41
  # Ollama: http://localhost:11434/v1/chat/completions
31
42
  BRAINROUTER_LLM_ENDPOINT=https://api.openai.com/v1/chat/completions
32
-
33
43
  BRAINROUTER_LLM_MODEL=gpt-4o-mini
34
44
 
35
- # Optional model split.
45
+ # Optional per-task model split. Both inherit BRAINROUTER_LLM_MODEL.
46
+ # - EXTRACTION_MODEL: high-volume, can be cheap/local
47
+ # - SYNTHESIS_MODEL: lower-volume but benefits from smarter models
36
48
  # BRAINROUTER_EXTRACTION_MODEL=gpt-4o-mini
37
49
  # BRAINROUTER_SYNTHESIS_MODEL=gpt-4o
38
50
 
39
- # Per-call timeout for MCP-side LLM calls. Default: 120000.
51
+ # Per-call timeout for MCP-side LLM calls. Default 120000 (2 min).
40
52
  # BRAINROUTER_LLM_TIMEOUT_MS=120000
41
53
 
42
54
  # Cap on concurrent in-flight LLM calls FROM THE MCP PROCESS.
43
- # Default: 2 (set to 1 on consumer hardware running LM Studio with a single model).
55
+ # Default 2. Set to 1 on consumer hardware running LM Studio with a single
56
+ # model; raise to 10+ for cloud APIs.
44
57
  # BRAINROUTER_LLM_MAX_CONCURRENT=2
45
58
 
46
- # ==========================================
47
- # Embeddings (vector search)
48
- # ==========================================
49
- # Falls back to BRAINROUTER_LLM_API_KEY when omitted.
50
- # Vector search is disabled if no key is available.
59
+
60
+ # =============================================================================
61
+ # 2. Retrieval pipeline (three optional stages)
62
+ # =============================================================================
63
+ # Each stage layers on top of the always-on FTS5 keyword search. Add them in
64
+ # order — every stage raises relevance but also adds latency.
65
+ #
66
+ # Stage 1: Embeddings — semantic vector recall (fused with keyword via RRF)
67
+ # Stage 2: Reranker — cross-encoder reorders the candidate pool
68
+ # Stage 3: Judge — LLM approves/rejects each finalist for relevance
69
+ #
70
+ # Skip any stage by leaving its credentials unset.
71
+
72
+ # --- Stage 1: Embeddings ----------------------------------------------------
73
+ # Vector search runs when an embedding key is available; otherwise the
74
+ # pipeline falls back to keyword-only. Falls back to BRAINROUTER_LLM_API_KEY
75
+ # when BRAINROUTER_EMBEDDING_API_KEY is unset.
51
76
  # BRAINROUTER_EMBEDDING_API_KEY=
52
77
  BRAINROUTER_EMBEDDING_ENDPOINT=https://api.openai.com/v1/embeddings
53
78
  BRAINROUTER_EMBEDDING_MODEL=text-embedding-3-small
54
79
  BRAINROUTER_EMBEDDING_DIMENSIONS=1536
55
80
 
56
- # ==========================================
57
- # Reranker (optional)
58
- # ==========================================
59
- # Disabled unless a key is present.
81
+ # --- Stage 2: Reranker (optional) -------------------------------------------
82
+ # Cross-encoder rescores the candidate pool. Disabled unless a key is set.
83
+ # Compatible with Cohere /v1/rerank or any vLLM-style /v1/rerank endpoint.
60
84
  # BRAINROUTER_RERANKER_API_KEY=
61
85
  # BRAINROUTER_RERANKER_ENDPOINT=https://api.cohere.com/v1/rerank
62
86
  # BRAINROUTER_RERANKER_MODEL=rerank-english-v3.0
63
87
  # BRAINROUTER_RERANKER_TOP_N=10
64
88
 
65
- # ==========================================
66
- # Storage
67
- # ==========================================
89
+ # --- Stage 3: Relevance judge (optional, off by default) --------------------
90
+ # LLM-as-judge gate that runs AFTER the reranker and drops candidates that
91
+ # aren't actually relevant to the query. The reranker only re-orders; it
92
+ # never filters — so a memory sharing vocabulary with the query but about
93
+ # a different subject still makes it through. The judge fixes that.
94
+ #
95
+ # Adds one extra LLM call per recall: ~500ms-1s on a small/fast model.
96
+ # Falls back to BRAINROUTER_LLM_* unless explicitly overridden, so a single
97
+ # credential covers extraction, synthesis, and judging by default.
98
+ # BRAINROUTER_RELEVANCE_JUDGE_ENABLED=true
99
+ # BRAINROUTER_RELEVANCE_JUDGE_API_KEY=
100
+ # BRAINROUTER_RELEVANCE_JUDGE_ENDPOINT=https://api.openai.com/v1/chat/completions
101
+ # BRAINROUTER_RELEVANCE_JUDGE_MODEL=gpt-4o-mini
102
+ # Max candidates sent to the judge in a single batched call. Default 10.
103
+ # BRAINROUTER_RELEVANCE_JUDGE_MAX_CANDIDATES=10
104
+ # Per-call timeout in ms. Default 15000.
105
+ # BRAINROUTER_RELEVANCE_JUDGE_TIMEOUT_MS=15000
106
+
107
+
108
+ # =============================================================================
109
+ # 3. Memory engine
110
+ # =============================================================================
111
+
112
+ # --- Storage paths ----------------------------------------------------------
68
113
  # SQLite memory store path. Default: ~/.brainrouter/memory.db.
69
- # BRAINROUTER_MEMORY_DB=/Users/you/.brainrouter/memory.db
70
-
114
+ # BRAINROUTER_MEMORY_DB=/path/to/memory.db
71
115
  # Override per-user state root. Default: ~/.brainrouter.
72
116
  # BRAINROUTER_HOME=/path/to/state
73
-
74
117
  # Workspace root when MCP --root is omitted.
75
118
  # BRAINROUTER_LOCAL_ROOT=/path/to/your/project
76
119
 
77
- # ==========================================
78
- # Memory engine
79
- # ==========================================
80
- # Set false to disable GraphRAG (2-hop entity expansion). Default: true.
120
+ # --- Knowledge graph + contradictions ---------------------------------------
121
+ # Disable GraphRAG (2-hop entity expansion) by setting to false. Default true.
81
122
  # BRAINROUTER_GRAPH_ENABLED=true
82
123
  # BRAINROUTER_GRAPH_TIMEOUT_MS=120000
83
124
  # BRAINROUTER_CONTRADICTION_TIMEOUT_MS=60000
84
125
 
85
- # Memories recalled this many times without citation are auto-archived.
86
- # 0 disables. Default: 10.
126
+ # --- ACE feedback (auto-archive uncited memories) ---------------------------
127
+ # Memories surfaced in recall this many times without being cited by the
128
+ # agent get auto-archived. 0 disables. Default 10.
87
129
  # BRAINROUTER_ACE_ARCHIVE_THRESHOLD=10
88
130
 
89
- # Focus-scene distillation trigger (new records before scenes rebuild).
131
+ # --- Distillation triggers --------------------------------------------------
132
+ # Focus-scene summary — groups related cognitives under a scene heading.
133
+ # Fires once every N new cognitive records.
90
134
  # BRAINROUTER_FOCUS_TRIGGER_N=10
91
135
  # BRAINROUTER_MAX_FOCUS_SCENES=20
92
136
 
93
- # Identity (persona) distillation trigger.
137
+ # Persona synthesis — cross-session identity summary ("who is this user").
138
+ # Fires once every N new cognitive records.
94
139
  # BRAINROUTER_IDENTITY_TRIGGER_N=50
140
+ # In-memory persona cache lifetime. Default 3600000 (1h).
95
141
  # BRAINROUTER_PERSONA_CACHE_TTL_MS=3600000
96
142
 
97
- # ==========================================
98
- # Skill pre-warming
99
- # ==========================================
100
- # BRAINROUTER_PREWARM_ENABLED=false
143
+ # --- Extraction backlog sweeper ---------------------------------------------
144
+ # Background job that runs cognitive extraction over sensory rows the
145
+ # per-turn extractor missed (errored, skipped, or interrupted).
146
+ # BRAINROUTER_DISABLE_EXTRACTION_SWEEPER=true
147
+ # BRAINROUTER_EXTRACTION_SWEEP_INTERVAL_MS=300000 # floor: 30000
148
+ # BRAINROUTER_EXTRACTION_SWEEP_MIN_AGE_MS=120000
149
+ # BRAINROUTER_EXTRACTION_MAX_FAILURES=5
150
+
151
+
152
+ # =============================================================================
153
+ # 4. Skill pre-warming (optional, off by default)
154
+ # =============================================================================
155
+ # Memetic skill activation — repeatedly invoking a skill heats it up so its
156
+ # memory hints get pre-injected into context. Half-life decay keeps cold
157
+ # skills from polluting the prompt.
158
+ # BRAINROUTER_PREWARM_ENABLED=true
101
159
  # BRAINROUTER_SKILL_HALF_LIFE_MINUTES=10
102
160
  # BRAINROUTER_SKILL_MIN_TURN_DECAY=0.05
103
161
  # BRAINROUTER_SKILL_PREWARM_THRESHOLD=0.3
104
162
  # BRAINROUTER_SKILL_SPIKE_AMOUNT=1.0
105
163
  # BRAINROUTER_SKILL_MAX_POTENTIAL=4.0
106
164
 
107
- # ==========================================
108
- # Extraction backlog sweeper
109
- # ==========================================
110
- # BRAINROUTER_DISABLE_EXTRACTION_SWEEPER=false
111
- # BRAINROUTER_EXTRACTION_SWEEP_INTERVAL_MS=300000 # floored at 30000
112
- # BRAINROUTER_EXTRACTION_SWEEP_MIN_AGE_MS=120000
113
- # BRAINROUTER_EXTRACTION_MAX_FAILURES=5
114
165
 
115
- # ==========================================
116
- # Server auth
117
- # ==========================================
166
+ # =============================================================================
167
+ # 5. Server auth (HTTP MCP + dashboard)
168
+ # =============================================================================
169
+ # Only needed if you run the HTTP MCP transport or the web dashboard.
170
+ # Stdio MCP (the default transport) doesn't use any of these.
171
+
118
172
  # Seeded admin (used when the users table is empty and by scripts/setup-admin.js).
119
173
  BRAINROUTER_DEFAULT_ADMIN_USER_ID=admin
120
174
  BRAINROUTER_ADMIN_EMAIL=admin@example.com
121
- BRAINROUTER_ADMIN_PASSWORD=change_me_before_use
175
+ # Set on first boot to seed the admin password — leave blank afterward.
176
+ BRAINROUTER_ADMIN_PASSWORD=
122
177
 
123
- # JWT signing key for dashboard sessions.
124
- # Generate one with:
178
+ # JWT signing key for dashboard sessions. Generate with:
125
179
  # node -e "console.log(require('crypto').randomBytes(32).toString('hex'))"
126
- # If unset, the server generates a random secret per boot sessions do not survive restarts.
127
- BRAINROUTER_JWT_SECRET=replace_with_a_long_random_secret
180
+ # If unset, the server generates a random secret per boot and sessions do
181
+ # not survive restarts.
182
+ BRAINROUTER_JWT_SECRET=
128
183
  # BRAINROUTER_JWT_EXPIRES_SECS=86400
129
184
 
130
185
  # Dashboard CORS allowlist.
131
186
  BRAINROUTER_CORS_ORIGIN=http://localhost:3000
132
187
 
133
- # API key for HTTP MCP transport clients. Usually set in the client config,
134
- # not here. Reset with: npm run setup:admin -- --reset --userId admin.
135
- # BRAINROUTER_API_KEY=br_your_api_key
188
+ # API key for HTTP MCP transport clients. Usually configured in the client,
189
+ # not here. Reset with: npm run setup:admin -- --reset --userId admin
190
+ # BRAINROUTER_API_KEY=
136
191
 
137
192
  # Stdio fallback user id when no authenticated user mapping is available.
138
193
  # Prefer BRAINROUTER_API_KEY instead.
139
194
  # BRAINROUTER_USER_ID=default
140
-
141
- # ==========================================
142
- # Dashboard (read by web/, not by this server)
143
- # ==========================================
144
- # NEXT_PUBLIC_API_URL=http://localhost:3747
@@ -0,0 +1,112 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { extractCognitiveMemories } from "../memory/pipeline/cognitive-extractor.js";
3
+ function makeMessage(messageText) {
4
+ const recordedAt = new Date().toISOString();
5
+ return {
6
+ id: "sensory_test",
7
+ userId: "user_test",
8
+ sessionKey: "session_test",
9
+ sessionId: "session_test",
10
+ role: "user",
11
+ messageText,
12
+ recordedAt,
13
+ timestamp: Date.parse(recordedAt),
14
+ skillTag: "",
15
+ };
16
+ }
17
+ function makeRunner(raw) {
18
+ return {
19
+ run: async () => raw,
20
+ };
21
+ }
22
+ function memory(content) {
23
+ return `{
24
+ "type": "episodic",
25
+ "content": "${content}",
26
+ "priority": 50,
27
+ "sourceKind": "model_inference",
28
+ "verificationStatus": "unverified"
29
+ }`;
30
+ }
31
+ async function extractContents(raw) {
32
+ const result = await extractCognitiveMemories({
33
+ messages: [makeMessage("capture these paths")],
34
+ userId: "user_test",
35
+ sessionKey: "session_test",
36
+ sessionId: "session_test",
37
+ llmRunner: makeRunner(raw),
38
+ });
39
+ expect(result.success).toBe(true);
40
+ return result.records.map((record) => record.content);
41
+ }
42
+ describe("cognitive extractor JSON escape repair", () => {
43
+ it("round-trips ambiguous path backslashes without interpreting them as escapes", async () => {
44
+ const raw = String.raw `[
45
+ {
46
+ "scene_name": "Path repair",
47
+ "memories": [
48
+ ${memory(String.raw `C:\users\file`)},
49
+ ${memory(String.raw `C:\bin\node.exe`)},
50
+ ${memory(String.raw `/repos/\target/release`)},
51
+ ${memory(String.raw `\release\foo.txt`)},
52
+ ${memory(String.raw `line1\nline2`)}
53
+ ]
54
+ }
55
+ ]`;
56
+ await expect(extractContents(raw)).resolves.toEqual([
57
+ String.raw `C:\users\file`,
58
+ String.raw `C:\bin\node.exe`,
59
+ String.raw `/repos/\target/release`,
60
+ String.raw `\release\foo.txt`,
61
+ String.raw `line1\nline2`,
62
+ ]);
63
+ });
64
+ it("keeps legitimate JSON escapes on the happy path", async () => {
65
+ const raw = String.raw `[
66
+ {
67
+ "scene_name": "Happy path",
68
+ "memories": [
69
+ ${memory(String.raw `line1\nline2`)}
70
+ ]
71
+ }
72
+ ]`;
73
+ await expect(extractContents(raw)).resolves.toEqual(["line1\nline2"]);
74
+ });
75
+ it("decodes \\uXXXX unicode escapes on the happy path", async () => {
76
+ // The input JSON contains the literal 6-char sequence é (an
77
+ // escape sequence as text). When the JSON is well-formed, the first
78
+ // JSON.parse handles the escape and we get the actual é code point.
79
+ // Locks down the contract for content like "café" / "résumé" /
80
+ // non-ASCII names emitted by LLMs that escape non-ASCII output.
81
+ const raw = String.raw `[
82
+ {
83
+ "scene_name": "Unicode happy",
84
+ "memories": [
85
+ ${memory(String.raw `café done`)}
86
+ ]
87
+ }
88
+ ]`;
89
+ await expect(extractContents(raw)).resolves.toEqual(["café done"]);
90
+ });
91
+ it("preserves \\uXXXX literally when repair fires (paths win the tie-break)", async () => {
92
+ // If anything in the batch forces the repair branch (here: a Windows
93
+ // path with \u + non-hex), then ALL ambiguous backslashes — including
94
+ // otherwise-valid \uXXXX unicode escapes elsewhere in the payload —
95
+ // become literal. Deliberate tradeoff: silent path corruption is
96
+ // worse than a one-off escaped unicode that doesn't decode. The
97
+ // resulting content has a literal `é` (6 chars) instead of "é".
98
+ const raw = String.raw `[
99
+ {
100
+ "scene_name": "Unicode + path collision",
101
+ "memories": [
102
+ ${memory(String.raw `C:\users\file`)},
103
+ ${memory(String.raw `café collateral`)}
104
+ ]
105
+ }
106
+ ]`;
107
+ await expect(extractContents(raw)).resolves.toEqual([
108
+ String.raw `C:\users\file`,
109
+ String.raw `café collateral`,
110
+ ]);
111
+ });
112
+ });
@@ -22,7 +22,14 @@ describe("crypto auth helpers", () => {
22
22
  });
23
23
  it("verifyJwt returns null for tampered signature", () => {
24
24
  const token = signJwt({ userId: "u1" }, "secret", 60);
25
- const tampered = `${token.slice(0, -1)}x`;
25
+ // Pick a replacement char that is GUARANTEED to differ from the
26
+ // original last char. The previous version hard-coded "x"; whenever
27
+ // the JWT's base64url signature happened to end in "x" (~1/64 odds),
28
+ // the "tampered" token equalled the original and verification
29
+ // succeeded — flaky failure. See PR #22 CI run 26323691062.
30
+ const lastChar = token.slice(-1);
31
+ const replacement = lastChar === "A" ? "B" : "A";
32
+ const tampered = token.slice(0, -1) + replacement;
26
33
  expect(verifyJwt(tampered, "secret")).toBeNull();
27
34
  });
28
35
  });
@@ -197,4 +197,71 @@ describe("short-term working memory tools", () => {
197
197
  expect(existsSync(join(resolve("abc123abc123"), ".brainrouter"))).toBe(false);
198
198
  rmSync(result.state.workDir, { recursive: true, force: true });
199
199
  });
200
+ it("round-trips kind:\"reasoning\" through offload → context", async () => {
201
+ // 0.3.6 item 2c: agents now offload a structured "Why: …" step after
202
+ // every non-trivial tool batch. The kind field is free-form on the
203
+ // schema, so a regression that silently dropped or overwrote the value
204
+ // (e.g. always-default to "tool_output") would erase the entire
205
+ // audit-trail surface. Pin the round-trip explicitly.
206
+ const workspacePath = mkdtempSync(join(tmpdir(), "brainrouter-working-reasoning-"));
207
+ const userId = "user-1";
208
+ const sessionKey = "reasoning-session";
209
+ const offload = parseToolJson(await handleMemoryWorkingTool("memory_working_offload", {
210
+ workspacePath,
211
+ userId,
212
+ sessionKey,
213
+ payload: "Decided to refactor canvas.ts because rendering by kind was missing.",
214
+ title: "Why: refactor canvas for kind-aware rendering",
215
+ summary: "Picked the dashed-border style for reasoning nodes.",
216
+ kind: "reasoning",
217
+ }));
218
+ expect(offload.state.injectedState.currentNode.kind).toBe("reasoning");
219
+ const context = parseToolJson(await handleMemoryWorkingTool("memory_working_context", {
220
+ workspacePath,
221
+ userId,
222
+ sessionKey,
223
+ }));
224
+ expect(context.steps).toHaveLength(1);
225
+ expect(context.steps[0].kind).toBe("reasoning");
226
+ expect(context.state.injectedState.recentSteps[0].kind).toBe("reasoning");
227
+ });
228
+ it("renders reasoning-kind nodes with a distinct Mermaid style in the canvas", async () => {
229
+ // The canvas needs to visually separate reasoning ("why") nodes from
230
+ // tool_output ("what came back") and compressed_summary ("the older
231
+ // history got rolled up") nodes, so a human inspecting `canvas.mmd`
232
+ // can see the decision trail at a glance. Pin the style emission so a
233
+ // future refactor of canvas.ts can't silently flatten all kinds back
234
+ // to a single shape.
235
+ const workspacePath = mkdtempSync(join(tmpdir(), "brainrouter-working-canvas-kind-"));
236
+ const userId = "user-1";
237
+ const sessionKey = "canvas-kind-session";
238
+ const tool = parseToolJson(await handleMemoryWorkingTool("memory_working_offload", {
239
+ workspacePath,
240
+ userId,
241
+ sessionKey,
242
+ payload: "tool output payload",
243
+ title: "Tool result",
244
+ summary: "Read repo files",
245
+ kind: "tool_output",
246
+ }));
247
+ const reason = parseToolJson(await handleMemoryWorkingTool("memory_working_offload", {
248
+ workspacePath,
249
+ userId,
250
+ sessionKey,
251
+ payload: "Chose dashed-border style because reasoning is conceptually different from tool output.",
252
+ title: "Why: dashed style for reasoning",
253
+ summary: "Visual separation of why vs. what.",
254
+ kind: "reasoning",
255
+ }));
256
+ const context = parseToolJson(await handleMemoryWorkingTool("memory_working_context", {
257
+ workspacePath,
258
+ userId,
259
+ sessionKey,
260
+ }));
261
+ // Reasoning node must carry a distinct stroke-dasharray style line.
262
+ // Tool-output node must NOT carry that same dashed style — otherwise
263
+ // the "distinct" claim is meaningless.
264
+ expect(context.canvas).toMatch(new RegExp(`style ${reason.nodeId} [^\\n]*stroke-dasharray`));
265
+ expect(context.canvas).not.toMatch(new RegExp(`style ${tool.nodeId} [^\\n]*stroke-dasharray`));
266
+ });
200
267
  });
package/dist/index.js CHANGED
File without changes
@@ -3,6 +3,7 @@ import { MemoryCapturePipeline } from "./capture.js";
3
3
  import { MemoryRecallPipeline } from "./recall.js";
4
4
  import { EmbeddingService } from "./store/embedding.js";
5
5
  import { RerankerService } from "./store/reranker.js";
6
+ import { RelevanceJudgeService } from "./store/relevance-judge.js";
6
7
  import { scanSkillsForHints } from "./skill-hints-loader.js";
7
8
  import { distillFocusScenes } from "./pipeline/contextual-focus-builder.js";
8
9
  import { distillCoreIdentity } from "./pipeline/identity-distiller.js";
@@ -172,6 +173,25 @@ export class MemoryEngine {
172
173
  ? parseInt(process.env.BRAINROUTER_RERANKER_TOP_N, 10)
173
174
  : undefined,
174
175
  });
176
+ // Relevance judge sits behind a flag (off by default) — opt in with
177
+ // BRAINROUTER_RELEVANCE_JUDGE_ENABLED=true. Falls back to the shared
178
+ // BRAINROUTER_LLM_* settings unless explicitly overridden so a single
179
+ // LLM credential covers extraction, synthesis, and judging.
180
+ const relevanceJudge = new RelevanceJudgeService({
181
+ enabled: process.env.BRAINROUTER_RELEVANCE_JUDGE_ENABLED === "true",
182
+ endpoint: process.env.BRAINROUTER_RELEVANCE_JUDGE_ENDPOINT
183
+ ?? process.env.BRAINROUTER_LLM_ENDPOINT,
184
+ apiKey: process.env.BRAINROUTER_RELEVANCE_JUDGE_API_KEY
185
+ ?? process.env.BRAINROUTER_LLM_API_KEY,
186
+ model: process.env.BRAINROUTER_RELEVANCE_JUDGE_MODEL
187
+ ?? process.env.BRAINROUTER_LLM_MODEL,
188
+ maxCandidates: process.env.BRAINROUTER_RELEVANCE_JUDGE_MAX_CANDIDATES
189
+ ? parseInt(process.env.BRAINROUTER_RELEVANCE_JUDGE_MAX_CANDIDATES, 10)
190
+ : undefined,
191
+ timeoutMs: process.env.BRAINROUTER_RELEVANCE_JUDGE_TIMEOUT_MS
192
+ ? parseInt(process.env.BRAINROUTER_RELEVANCE_JUDGE_TIMEOUT_MS, 10)
193
+ : undefined,
194
+ });
175
195
  this.store.initVec(embeddingService.getDimensions());
176
196
  if (embeddingService.isReady()) {
177
197
  void this.store.reembedStaleRecords((text) => embeddingService.embed(text)).then((count) => {
@@ -183,7 +203,7 @@ export class MemoryEngine {
183
203
  });
184
204
  }
185
205
  this.capturePipeline = new MemoryCapturePipeline(this.store, this.extractionRunner, embeddingService, 1);
186
- this.recallPipeline = new MemoryRecallPipeline(this.store, embeddingService, rerankerService);
206
+ this.recallPipeline = new MemoryRecallPipeline(this.store, embeddingService, rerankerService, relevanceJudge);
187
207
  this.startExtractionSweeper();
188
208
  }
189
209
  async ensureSeedAdminUser() {
@@ -126,7 +126,7 @@ function parseExtractionResult(raw) {
126
126
  const match = cleaned.match(/\[[\s\S]*\]/);
127
127
  if (!match)
128
128
  return [];
129
- const parsed = JSON.parse(match[0]);
129
+ const parsed = parseJsonWithEscapeRepair(match[0]);
130
130
  if (!Array.isArray(parsed))
131
131
  return [];
132
132
  const scenes = [];
@@ -159,6 +159,24 @@ function parseExtractionResult(raw) {
159
159
  return [];
160
160
  }
161
161
  }
162
+ // LLMs frequently emit JSON where string values contain backslashes that
163
+ // aren't valid JSON escapes — Windows paths (\users), regex literals,
164
+ // LaTeX (\section), or shell snippets. JSON.parse rejects the entire
165
+ // payload on the first bad escape, so we'd drop an otherwise-good batch
166
+ // of memories over one stray backslash. Once the first parse has failed,
167
+ // preserve ambiguous backslashes literally; otherwise valid JSON escapes
168
+ // like \b, \f, \n, \r, \t, or \uXXXX can silently corrupt paths.
169
+ function parseJsonWithEscapeRepair(raw) {
170
+ try {
171
+ return JSON.parse(raw);
172
+ }
173
+ catch (err) {
174
+ if (!(err instanceof SyntaxError))
175
+ throw err;
176
+ const repaired = raw.replace(/\\(?!["\\\/])/g, "\\\\");
177
+ return JSON.parse(repaired);
178
+ }
179
+ }
162
180
  function parseMemoryType(value) {
163
181
  const candidate = String(value || "");
164
182
  return ALLOWED_MEMORY_TYPES.has(candidate) ? candidate : "episodic";
@@ -2,6 +2,7 @@ import type { IMemoryStore } from "@kinqs/brainrouter-types";
2
2
  import type { RecallResult } from "@kinqs/brainrouter-types";
3
3
  import type { EmbeddingService } from "./store/embedding.js";
4
4
  import type { RerankerService } from "./store/reranker.js";
5
+ import type { RelevanceJudgeService } from "./store/relevance-judge.js";
5
6
  /**
6
7
  * Optional filters applied to the candidate pool after RRF but before
7
8
  * neural-spark propagation and reranking. Filters never *add* records — they
@@ -27,7 +28,8 @@ export declare class MemoryRecallPipeline {
27
28
  private store;
28
29
  private embeddingService;
29
30
  private rerankerService;
30
- constructor(store: IMemoryStore, embeddingService: EmbeddingService, rerankerService: RerankerService);
31
+ private relevanceJudge?;
32
+ constructor(store: IMemoryStore, embeddingService: EmbeddingService, rerankerService: RerankerService, relevanceJudge?: RelevanceJudgeService | undefined);
31
33
  recall(params: {
32
34
  userId: string;
33
35
  sessionKey: string;
@@ -51,10 +51,12 @@ export class MemoryRecallPipeline {
51
51
  store;
52
52
  embeddingService;
53
53
  rerankerService;
54
- constructor(store, embeddingService, rerankerService) {
54
+ relevanceJudge;
55
+ constructor(store, embeddingService, rerankerService, relevanceJudge) {
55
56
  this.store = store;
56
57
  this.embeddingService = embeddingService;
57
58
  this.rerankerService = rerankerService;
59
+ this.relevanceJudge = relevanceJudge;
58
60
  }
59
61
  async recall(params) {
60
62
  const startTime = Date.now();
@@ -270,6 +272,35 @@ export class MemoryRecallPipeline {
270
272
  console.error("[BrainRouter] Reranker failed during recall, falling back to RRF:", e.message);
271
273
  }
272
274
  }
275
+ // Stage 4 — LLM Relevance Judge (semantic approve/reject gate)
276
+ //
277
+ // The reranker orders candidates by a learned relevance score but never
278
+ // *filters* — so a memory that shares vocabulary with the query but is
279
+ // about a different subject still makes the cut. The judge fixes that by
280
+ // asking a fast LLM "is each of these actually relevant?" and dropping
281
+ // the rejects. On any failure we keep the reranker output unchanged so a
282
+ // flaky judge call never breaks recall.
283
+ let judgeUsed = false;
284
+ let judgeApproved = 0;
285
+ let judgeRejected = 0;
286
+ let judgeVerdicts;
287
+ if (this.relevanceJudge?.isReady() && topResults.length > 0) {
288
+ try {
289
+ const judgeCandidates = topResults.map(r => ({
290
+ id: r.record.record_id,
291
+ content: r.record.content,
292
+ }));
293
+ const judgeResult = await this.relevanceJudge.judge({ query, candidates: judgeCandidates });
294
+ judgeUsed = true;
295
+ judgeVerdicts = judgeResult.verdicts;
296
+ judgeApproved = judgeResult.approvedIndices.length;
297
+ judgeRejected = topResults.length - judgeApproved;
298
+ topResults = judgeResult.approvedIndices.map((i) => topResults[i]);
299
+ }
300
+ catch (e) {
301
+ console.error("[BrainRouter] Relevance judge failed during recall, keeping reranker output:", e.message);
302
+ }
303
+ }
273
304
  // 5. Format for context
274
305
  const memoryLines = topResults.map(({ record }) => {
275
306
  const tag = record.scene_name ? `${record.type}|${record.scene_name}` : record.type;
@@ -279,7 +310,13 @@ export class MemoryRecallPipeline {
279
310
  }
280
311
  return line;
281
312
  });
282
- const prependContext = `<relevant-memories>\n The following memories are relevant to this query. Reference only if helpful:\n\n ${memoryLines.join("\n ")}\n</relevant-memories>`;
313
+ // If the judge rejected everything, skip the prepend block entirely
314
+ // an empty <relevant-memories> tag is worse than no tag because it
315
+ // implies "we looked and nothing helped," which the agent should infer
316
+ // from the absence of the block.
317
+ const prependContext = memoryLines.length > 0
318
+ ? `<relevant-memories>\n The following memories are relevant to this query. Reference only if helpful:\n\n ${memoryLines.join("\n ")}\n</relevant-memories>`
319
+ : undefined;
283
320
  // Build appendSystemContext with Contextual Focus Navigation + tools guide
284
321
  const topScenes = this.store.getTopContextualFocus(userId, 3);
285
322
  let appendSystemContext = "";
@@ -329,9 +366,10 @@ export class MemoryRecallPipeline {
329
366
  recordId: r.record.record_id,
330
367
  skillTag: r.record.skill_tag
331
368
  }));
332
- const recallStrategy = vecResults.length > 0
369
+ const baseStrategy = vecResults.length > 0
333
370
  ? (usedReranker ? "hybrid+rerank" : "hybrid")
334
371
  : (usedReranker ? "keyword+rerank" : (filePathResults.length > 0 ? "keyword+file" : "keyword"));
372
+ const recallStrategy = judgeUsed ? `${baseStrategy}+judge` : baseStrategy;
335
373
  const durationMs = Date.now() - startTime;
336
374
  const recallExplanation = {
337
375
  ftsHits: ftsResults.length,
@@ -342,6 +380,10 @@ export class MemoryRecallPipeline {
342
380
  typeBoosts,
343
381
  skillBoostApplied,
344
382
  rerankerUsed: usedReranker,
383
+ judgeUsed,
384
+ judgeApproved,
385
+ judgeRejected,
386
+ judgeVerdicts,
345
387
  graphExpansion: hasGraphExpansion,
346
388
  citationBoosts,
347
389
  durationMs,
@@ -388,6 +430,9 @@ export class MemoryRecallPipeline {
388
430
  vecHits: explanation?.vecHits ?? 0,
389
431
  intentDetected: explanation?.intentDetected ?? "none",
390
432
  rerankerUsed: explanation?.rerankerUsed ?? false,
433
+ judgeUsed: explanation?.judgeUsed ?? false,
434
+ judgeApproved: explanation?.judgeApproved ?? 0,
435
+ judgeRejected: explanation?.judgeRejected ?? 0,
391
436
  },
392
437
  });
393
438
  }