@kinqs/brainrouter-mcp-server 0.3.5 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +121 -71
- package/README.md +1 -1
- package/dist/__tests__/cognitive-extractor.test.js +112 -0
- package/dist/__tests__/crypto.test.js +8 -1
- package/dist/__tests__/working-memory.test.js +67 -0
- package/dist/api/auth/crypto.js +8 -3
- package/dist/index.js +1 -1
- package/dist/memory/engine.js +21 -1
- package/dist/memory/pipeline/cognitive-extractor.js +19 -1
- package/dist/memory/recall.d.ts +3 -1
- package/dist/memory/recall.js +48 -3
- package/dist/memory/store/relevance-judge.d.ts +51 -0
- package/dist/memory/store/relevance-judge.js +196 -0
- package/dist/memory/working/canvas.js +11 -0
- package/docs/specs/0.3.7-terminal-ui-redesign.md +259 -0
- package/package.json +2 -2
- package/dist/memory/config.d.ts +0 -2
- package/dist/memory/config.js +0 -3
- package/dist/memory/pipeline/l1-contradiction.d.ts +0 -7
- package/dist/memory/pipeline/l1-contradiction.js +0 -66
- package/dist/memory/pipeline/l1-dedup.d.ts +0 -23
- package/dist/memory/pipeline/l1-dedup.js +0 -39
- package/dist/memory/pipeline/l1-extractor.d.ts +0 -21
- package/dist/memory/pipeline/l1-extractor.js +0 -180
- package/dist/memory/pipeline/l2-direction-shift.d.ts +0 -10
- package/dist/memory/pipeline/l2-direction-shift.js +0 -27
- package/dist/memory/pipeline/l2-scene.d.ts +0 -15
- package/dist/memory/pipeline/l2-scene.js +0 -140
- package/dist/memory/pipeline/l3-distiller.d.ts +0 -15
- package/dist/memory/pipeline/l3-distiller.js +0 -40
- package/dist/memory/pipeline/task-queue.d.ts +0 -54
- package/dist/memory/pipeline/task-queue.js +0 -117
- package/dist/memory/prompts/graph-extraction-batch.d.ts +0 -14
- package/dist/memory/prompts/graph-extraction-batch.js +0 -54
- package/dist/memory/prompts/l1-contradiction-batch.d.ts +0 -16
- package/dist/memory/prompts/l1-contradiction-batch.js +0 -47
- package/dist/memory/prompts/l1-contradiction.d.ts +0 -1
- package/dist/memory/prompts/l1-contradiction.js +0 -25
- package/dist/memory/prompts/l1-extraction.d.ts +0 -10
- package/dist/memory/prompts/l1-extraction.js +0 -114
- package/dist/memory/prompts/l2-direction-shift.d.ts +0 -5
- package/dist/memory/prompts/l2-direction-shift.js +0 -32
- package/dist/memory/prompts/l2-scene-cluster.d.ts +0 -2
- package/dist/memory/prompts/l2-scene-cluster.js +0 -33
- package/dist/memory/prompts/l2-scene.d.ts +0 -7
- package/dist/memory/prompts/l2-scene.js +0 -40
- package/dist/memory/prompts/l3-persona.d.ts +0 -6
- package/dist/memory/prompts/l3-persona.js +0 -60
- package/dist/memory/store/types.d.ts +0 -101
- package/dist/memory/store/types.js +0 -1
- package/dist/memory/types.d.ts +0 -207
- package/dist/memory/types.js +0 -7
- package/dist/memory/validation.d.ts +0 -441
- package/dist/memory/validation.js +0 -129
- package/dist/tools/agent_memory_tools.d.ts +0 -485
- package/dist/tools/agent_memory_tools.js +0 -793
- package/dist/tools/get_doc.d.ts +0 -21
- package/dist/tools/get_doc.js +0 -24
- package/dist/tools/list_docs.d.ts +0 -15
- package/dist/tools/list_docs.js +0 -16
- package/dist/tools/update_doc.d.ts +0 -24
- package/dist/tools/update_doc.js +0 -35
- /package/dist/__tests__/{agent_mode.test.d.ts → cognitive-extractor.test.d.ts} +0 -0
package/dist/memory/recall.js
CHANGED
|
@@ -51,10 +51,12 @@ export class MemoryRecallPipeline {
|
|
|
51
51
|
store;
|
|
52
52
|
embeddingService;
|
|
53
53
|
rerankerService;
|
|
54
|
-
|
|
54
|
+
relevanceJudge;
|
|
55
|
+
constructor(store, embeddingService, rerankerService, relevanceJudge) {
|
|
55
56
|
this.store = store;
|
|
56
57
|
this.embeddingService = embeddingService;
|
|
57
58
|
this.rerankerService = rerankerService;
|
|
59
|
+
this.relevanceJudge = relevanceJudge;
|
|
58
60
|
}
|
|
59
61
|
async recall(params) {
|
|
60
62
|
const startTime = Date.now();
|
|
@@ -270,6 +272,35 @@ export class MemoryRecallPipeline {
|
|
|
270
272
|
console.error("[BrainRouter] Reranker failed during recall, falling back to RRF:", e.message);
|
|
271
273
|
}
|
|
272
274
|
}
|
|
275
|
+
// Stage 4 — LLM Relevance Judge (semantic approve/reject gate)
|
|
276
|
+
//
|
|
277
|
+
// The reranker orders candidates by a learned relevance score but never
|
|
278
|
+
// *filters* — so a memory that shares vocabulary with the query but is
|
|
279
|
+
// about a different subject still makes the cut. The judge fixes that by
|
|
280
|
+
// asking a fast LLM "is each of these actually relevant?" and dropping
|
|
281
|
+
// the rejects. On any failure we keep the reranker output unchanged so a
|
|
282
|
+
// flaky judge call never breaks recall.
|
|
283
|
+
let judgeUsed = false;
|
|
284
|
+
let judgeApproved = 0;
|
|
285
|
+
let judgeRejected = 0;
|
|
286
|
+
let judgeVerdicts;
|
|
287
|
+
if (this.relevanceJudge?.isReady() && topResults.length > 0) {
|
|
288
|
+
try {
|
|
289
|
+
const judgeCandidates = topResults.map(r => ({
|
|
290
|
+
id: r.record.record_id,
|
|
291
|
+
content: r.record.content,
|
|
292
|
+
}));
|
|
293
|
+
const judgeResult = await this.relevanceJudge.judge({ query, candidates: judgeCandidates });
|
|
294
|
+
judgeUsed = true;
|
|
295
|
+
judgeVerdicts = judgeResult.verdicts;
|
|
296
|
+
judgeApproved = judgeResult.approvedIndices.length;
|
|
297
|
+
judgeRejected = topResults.length - judgeApproved;
|
|
298
|
+
topResults = judgeResult.approvedIndices.map((i) => topResults[i]);
|
|
299
|
+
}
|
|
300
|
+
catch (e) {
|
|
301
|
+
console.error("[BrainRouter] Relevance judge failed during recall, keeping reranker output:", e.message);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
273
304
|
// 5. Format for context
|
|
274
305
|
const memoryLines = topResults.map(({ record }) => {
|
|
275
306
|
const tag = record.scene_name ? `${record.type}|${record.scene_name}` : record.type;
|
|
@@ -279,7 +310,13 @@ export class MemoryRecallPipeline {
|
|
|
279
310
|
}
|
|
280
311
|
return line;
|
|
281
312
|
});
|
|
282
|
-
|
|
313
|
+
// If the judge rejected everything, skip the prepend block entirely —
|
|
314
|
+
// an empty <relevant-memories> tag is worse than no tag because it
|
|
315
|
+
// implies "we looked and nothing helped," which the agent should infer
|
|
316
|
+
// from the absence of the block.
|
|
317
|
+
const prependContext = memoryLines.length > 0
|
|
318
|
+
? `<relevant-memories>\n The following memories are relevant to this query. Reference only if helpful:\n\n ${memoryLines.join("\n ")}\n</relevant-memories>`
|
|
319
|
+
: undefined;
|
|
283
320
|
// Build appendSystemContext with Contextual Focus Navigation + tools guide
|
|
284
321
|
const topScenes = this.store.getTopContextualFocus(userId, 3);
|
|
285
322
|
let appendSystemContext = "";
|
|
@@ -329,9 +366,10 @@ export class MemoryRecallPipeline {
|
|
|
329
366
|
recordId: r.record.record_id,
|
|
330
367
|
skillTag: r.record.skill_tag
|
|
331
368
|
}));
|
|
332
|
-
const
|
|
369
|
+
const baseStrategy = vecResults.length > 0
|
|
333
370
|
? (usedReranker ? "hybrid+rerank" : "hybrid")
|
|
334
371
|
: (usedReranker ? "keyword+rerank" : (filePathResults.length > 0 ? "keyword+file" : "keyword"));
|
|
372
|
+
const recallStrategy = judgeUsed ? `${baseStrategy}+judge` : baseStrategy;
|
|
335
373
|
const durationMs = Date.now() - startTime;
|
|
336
374
|
const recallExplanation = {
|
|
337
375
|
ftsHits: ftsResults.length,
|
|
@@ -342,6 +380,10 @@ export class MemoryRecallPipeline {
|
|
|
342
380
|
typeBoosts,
|
|
343
381
|
skillBoostApplied,
|
|
344
382
|
rerankerUsed: usedReranker,
|
|
383
|
+
judgeUsed,
|
|
384
|
+
judgeApproved,
|
|
385
|
+
judgeRejected,
|
|
386
|
+
judgeVerdicts,
|
|
345
387
|
graphExpansion: hasGraphExpansion,
|
|
346
388
|
citationBoosts,
|
|
347
389
|
durationMs,
|
|
@@ -388,6 +430,9 @@ export class MemoryRecallPipeline {
|
|
|
388
430
|
vecHits: explanation?.vecHits ?? 0,
|
|
389
431
|
intentDetected: explanation?.intentDetected ?? "none",
|
|
390
432
|
rerankerUsed: explanation?.rerankerUsed ?? false,
|
|
433
|
+
judgeUsed: explanation?.judgeUsed ?? false,
|
|
434
|
+
judgeApproved: explanation?.judgeApproved ?? 0,
|
|
435
|
+
judgeRejected: explanation?.judgeRejected ?? 0,
|
|
391
436
|
},
|
|
392
437
|
});
|
|
393
438
|
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import type { RelevanceJudgeServiceConfig, RelevanceVerdict } from "@kinqs/brainrouter-types";
|
|
2
|
+
export interface JudgeCandidate {
|
|
3
|
+
/** Stable id used for logging — typically the memory's record_id. */
|
|
4
|
+
id: string;
|
|
5
|
+
/** Memory content the judge will read. */
|
|
6
|
+
content: string;
|
|
7
|
+
}
|
|
8
|
+
export interface JudgeResult {
|
|
9
|
+
/** Verdicts in the order returned by the judge. */
|
|
10
|
+
verdicts: RelevanceVerdict[];
|
|
11
|
+
/** Indices the judge approved as relevant. */
|
|
12
|
+
approvedIndices: number[];
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* LLM-as-judge stage that approves or rejects retrieved memories based on
|
|
16
|
+
* actual semantic relevance to the user query — sits between the reranker and
|
|
17
|
+
* context formatting, dropping candidates that share keywords but aren't
|
|
18
|
+
* genuinely about the query subject.
|
|
19
|
+
*
|
|
20
|
+
* Failure mode is "skip the gate": if the call errors out, callers fall back
|
|
21
|
+
* to the unfiltered reranker output. We never want a flaky judge call to
|
|
22
|
+
* crash a recall.
|
|
23
|
+
*/
|
|
24
|
+
export declare class RelevanceJudgeService {
|
|
25
|
+
private readonly enabled;
|
|
26
|
+
private readonly endpoint;
|
|
27
|
+
private readonly apiKey;
|
|
28
|
+
private readonly model;
|
|
29
|
+
private readonly maxCandidates;
|
|
30
|
+
private readonly timeoutMs;
|
|
31
|
+
private readonly ready;
|
|
32
|
+
constructor(config: RelevanceJudgeServiceConfig);
|
|
33
|
+
isReady(): boolean;
|
|
34
|
+
getMaxCandidates(): number;
|
|
35
|
+
/**
|
|
36
|
+
* Grade a batch of candidates against the query. Returns verdicts and the
|
|
37
|
+
* subset of indices approved as relevant. Throws on transport/parsing
|
|
38
|
+
* failure — callers are expected to fall back to pre-judge results.
|
|
39
|
+
*/
|
|
40
|
+
judge(params: {
|
|
41
|
+
query: string;
|
|
42
|
+
candidates: JudgeCandidate[];
|
|
43
|
+
}): Promise<JudgeResult>;
|
|
44
|
+
/**
|
|
45
|
+
* Defensive JSON parse — strips code fences, picks the first valid JSON
|
|
46
|
+
* object/array, and tolerates either {"verdicts":[…]} or a bare array.
|
|
47
|
+
* Returns one verdict per candidate; missing entries default to "rejected"
|
|
48
|
+
* so a malformed response can't silently approve everything.
|
|
49
|
+
*/
|
|
50
|
+
private parseVerdicts;
|
|
51
|
+
}
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import { fetchWithExternalRetry } from "../retry.js";
|
|
2
|
+
import { acquireLLMSlot } from "../llm-semaphore.js";
|
|
3
|
+
/**
|
|
4
|
+
* LLM-as-judge stage that approves or rejects retrieved memories based on
|
|
5
|
+
* actual semantic relevance to the user query — sits between the reranker and
|
|
6
|
+
* context formatting, dropping candidates that share keywords but aren't
|
|
7
|
+
* genuinely about the query subject.
|
|
8
|
+
*
|
|
9
|
+
* Failure mode is "skip the gate": if the call errors out, callers fall back
|
|
10
|
+
* to the unfiltered reranker output. We never want a flaky judge call to
|
|
11
|
+
* crash a recall.
|
|
12
|
+
*/
|
|
13
|
+
export class RelevanceJudgeService {
|
|
14
|
+
enabled;
|
|
15
|
+
endpoint;
|
|
16
|
+
apiKey;
|
|
17
|
+
model;
|
|
18
|
+
maxCandidates;
|
|
19
|
+
timeoutMs;
|
|
20
|
+
ready;
|
|
21
|
+
constructor(config) {
|
|
22
|
+
this.enabled = config.enabled ?? false;
|
|
23
|
+
this.endpoint = config.endpoint ?? "https://api.openai.com/v1/chat/completions";
|
|
24
|
+
this.apiKey = config.apiKey ?? "";
|
|
25
|
+
this.model = config.model ?? "gpt-4o-mini";
|
|
26
|
+
this.maxCandidates = Math.max(1, config.maxCandidates ?? 10);
|
|
27
|
+
this.timeoutMs = Math.max(1000, config.timeoutMs ?? 15_000);
|
|
28
|
+
this.ready = this.enabled && !!this.apiKey;
|
|
29
|
+
if (this.enabled && !this.apiKey) {
|
|
30
|
+
console.error("[BrainRouter] Relevance judge enabled but no API key set. Stage 4 judging will be skipped.");
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
isReady() {
|
|
34
|
+
return this.ready;
|
|
35
|
+
}
|
|
36
|
+
getMaxCandidates() {
|
|
37
|
+
return this.maxCandidates;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Grade a batch of candidates against the query. Returns verdicts and the
|
|
41
|
+
* subset of indices approved as relevant. Throws on transport/parsing
|
|
42
|
+
* failure — callers are expected to fall back to pre-judge results.
|
|
43
|
+
*/
|
|
44
|
+
async judge(params) {
|
|
45
|
+
if (!this.ready) {
|
|
46
|
+
throw new Error("RelevanceJudgeService is not ready (disabled or missing API key)");
|
|
47
|
+
}
|
|
48
|
+
if (params.candidates.length === 0) {
|
|
49
|
+
return { verdicts: [], approvedIndices: [] };
|
|
50
|
+
}
|
|
51
|
+
const candidates = params.candidates.slice(0, this.maxCandidates);
|
|
52
|
+
const safeQuery = params.query.length > 800 ? params.query.slice(0, 800) + "…" : params.query;
|
|
53
|
+
const candidateBlock = candidates
|
|
54
|
+
.map((c, i) => {
|
|
55
|
+
const text = c.content.length > 600 ? c.content.slice(0, 600) + "…" : c.content;
|
|
56
|
+
return `[${i}] ${text.replace(/\s+/g, " ").trim()}`;
|
|
57
|
+
})
|
|
58
|
+
.join("\n");
|
|
59
|
+
const systemPrompt = [
|
|
60
|
+
"You are a strict relevance judge for a memory retrieval system.",
|
|
61
|
+
"For each candidate memory, decide whether it is actually relevant to the user's query.",
|
|
62
|
+
"A memory is RELEVANT only if it provides information that directly helps answer, contextualize, or inform the query.",
|
|
63
|
+
"It is NOT relevant if it merely shares keywords, is about a different subject, or is generic background.",
|
|
64
|
+
"When in doubt, reject — false positives pollute the agent's context window.",
|
|
65
|
+
"Respond with strict JSON only, no prose.",
|
|
66
|
+
].join(" ");
|
|
67
|
+
const userPrompt = [
|
|
68
|
+
`Query: ${safeQuery}`,
|
|
69
|
+
"",
|
|
70
|
+
"Candidates:",
|
|
71
|
+
candidateBlock,
|
|
72
|
+
"",
|
|
73
|
+
"Respond with exactly this JSON shape:",
|
|
74
|
+
`{"verdicts":[{"index":0,"relevant":true,"reason":"…"}, …]}`,
|
|
75
|
+
"Include one verdict per candidate. Keep each reason under 120 chars.",
|
|
76
|
+
].join("\n");
|
|
77
|
+
const doFetch = () => fetchWithExternalRetry(this.endpoint, {
|
|
78
|
+
method: "POST",
|
|
79
|
+
headers: {
|
|
80
|
+
"Content-Type": "application/json",
|
|
81
|
+
"Authorization": `Bearer ${this.apiKey}`,
|
|
82
|
+
},
|
|
83
|
+
// Deliberately omitting `response_format` — OpenAI accepts
|
|
84
|
+
// `{type:"json_object"}`, but LM Studio / llama.cpp-style backends
|
|
85
|
+
// reject anything except `json_schema` or `text` with a 400, and
|
|
86
|
+
// Ollama / vLLM each have their own quirks. The system prompt is
|
|
87
|
+
// explicit about strict-JSON output and the parser below strips
|
|
88
|
+
// code fences + tolerates surrounding prose, so dropping the hint
|
|
89
|
+
// is cheaper than per-provider branching.
|
|
90
|
+
body: JSON.stringify({
|
|
91
|
+
model: this.model,
|
|
92
|
+
messages: [
|
|
93
|
+
{ role: "system", content: systemPrompt },
|
|
94
|
+
{ role: "user", content: userPrompt },
|
|
95
|
+
],
|
|
96
|
+
temperature: 0,
|
|
97
|
+
}),
|
|
98
|
+
signal: AbortSignal.timeout(this.timeoutMs),
|
|
99
|
+
}, {
|
|
100
|
+
label: "Relevance Judge API",
|
|
101
|
+
});
|
|
102
|
+
const release = await acquireLLMSlot();
|
|
103
|
+
let raw;
|
|
104
|
+
try {
|
|
105
|
+
let res = await doFetch();
|
|
106
|
+
// LM Studio quirk: idle models auto-unload and the first call after
|
|
107
|
+
// unload returns 400 with "Model is unloaded" / "No models loaded".
|
|
108
|
+
// The backend then loads the model in the background, so a retry
|
|
109
|
+
// ~1.5s later usually succeeds. Mirrors ModelLLMRunner in engine.ts.
|
|
110
|
+
if (res.status === 400) {
|
|
111
|
+
const errorBody = await res.text();
|
|
112
|
+
if (/model\s+(is\s+)?unloaded|model\s+not\s+loaded|no\s+models?\s+loaded/i.test(errorBody)) {
|
|
113
|
+
await new Promise((resolve) => setTimeout(resolve, 1500));
|
|
114
|
+
res = await doFetch();
|
|
115
|
+
if (!res.ok) {
|
|
116
|
+
const retryBody = await res.text().catch(() => "(no body)");
|
|
117
|
+
throw new Error(`Relevance Judge API failed after LM Studio reload retry: HTTP ${res.status} ${res.statusText} - ${retryBody}`);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
throw new Error(`Relevance Judge API failed: HTTP ${res.status} ${res.statusText} - ${errorBody}`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
else if (!res.ok) {
|
|
125
|
+
const err = await res.text().catch(() => "(no body)");
|
|
126
|
+
throw new Error(`Relevance Judge API failed: HTTP ${res.status} ${res.statusText} - ${err}`);
|
|
127
|
+
}
|
|
128
|
+
const data = await res.json();
|
|
129
|
+
if (data?.error) {
|
|
130
|
+
const errMsg = typeof data.error === "string" ? data.error : (data.error.message ?? JSON.stringify(data.error).slice(0, 400));
|
|
131
|
+
throw new Error(`Relevance Judge endpoint returned an error envelope: ${errMsg}`);
|
|
132
|
+
}
|
|
133
|
+
const choice = data?.choices?.[0];
|
|
134
|
+
const content = choice?.message?.content ?? choice?.delta?.content;
|
|
135
|
+
if (typeof content !== "string") {
|
|
136
|
+
throw new Error(`Relevance Judge returned no usable content. Response: ${JSON.stringify(data).slice(0, 400)}`);
|
|
137
|
+
}
|
|
138
|
+
raw = content;
|
|
139
|
+
}
|
|
140
|
+
finally {
|
|
141
|
+
release();
|
|
142
|
+
}
|
|
143
|
+
const parsed = this.parseVerdicts(raw, candidates.length);
|
|
144
|
+
const approvedIndices = [];
|
|
145
|
+
for (const v of parsed) {
|
|
146
|
+
if (v.relevant && v.index >= 0 && v.index < candidates.length) {
|
|
147
|
+
approvedIndices.push(v.index);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return { verdicts: parsed, approvedIndices };
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Defensive JSON parse — strips code fences, picks the first valid JSON
|
|
154
|
+
* object/array, and tolerates either {"verdicts":[…]} or a bare array.
|
|
155
|
+
* Returns one verdict per candidate; missing entries default to "rejected"
|
|
156
|
+
* so a malformed response can't silently approve everything.
|
|
157
|
+
*/
|
|
158
|
+
parseVerdicts(raw, candidateCount) {
|
|
159
|
+
let text = raw.trim();
|
|
160
|
+
text = text.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim();
|
|
161
|
+
let parsed;
|
|
162
|
+
try {
|
|
163
|
+
parsed = JSON.parse(text);
|
|
164
|
+
}
|
|
165
|
+
catch {
|
|
166
|
+
const objMatch = text.match(/\{[\s\S]*\}/);
|
|
167
|
+
const arrMatch = text.match(/\[[\s\S]*\]/);
|
|
168
|
+
const candidate = objMatch?.[0] ?? arrMatch?.[0];
|
|
169
|
+
if (!candidate) {
|
|
170
|
+
throw new Error(`Relevance Judge produced non-JSON output: ${text.slice(0, 200)}`);
|
|
171
|
+
}
|
|
172
|
+
parsed = JSON.parse(candidate);
|
|
173
|
+
}
|
|
174
|
+
const list = Array.isArray(parsed)
|
|
175
|
+
? parsed
|
|
176
|
+
: Array.isArray(parsed?.verdicts) ? parsed.verdicts : [];
|
|
177
|
+
const byIndex = new Map();
|
|
178
|
+
for (const item of list) {
|
|
179
|
+
if (!item || typeof item !== "object")
|
|
180
|
+
continue;
|
|
181
|
+
const index = Number(item.index);
|
|
182
|
+
if (!Number.isFinite(index))
|
|
183
|
+
continue;
|
|
184
|
+
byIndex.set(index, {
|
|
185
|
+
index,
|
|
186
|
+
relevant: Boolean(item.relevant),
|
|
187
|
+
reason: typeof item.reason === "string" ? item.reason.slice(0, 200) : "",
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
const out = [];
|
|
191
|
+
for (let i = 0; i < candidateCount; i++) {
|
|
192
|
+
out.push(byIndex.get(i) ?? { index: i, relevant: false, reason: "no verdict returned" });
|
|
193
|
+
}
|
|
194
|
+
return out;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
@@ -24,6 +24,17 @@ export function buildAnnotatedCanvas(steps, activeNodeId) {
|
|
|
24
24
|
for (let index = 1; index < steps.length; index += 1) {
|
|
25
25
|
lines.push(` ${steps[index - 1].nodeId} --> ${steps[index].nodeId}`);
|
|
26
26
|
}
|
|
27
|
+
// Reasoning steps ("Why: …" decisions emitted via memory_working_offload
|
|
28
|
+
// with kind:"reasoning") get a dashed border so the audit trail is
|
|
29
|
+
// visually separable from tool_output and compressed_summary nodes when
|
|
30
|
+
// a human (or the dashboard) inspects canvas.mmd. Emitted before the
|
|
31
|
+
// active-node fill so the active highlight overrides the dashed style
|
|
32
|
+
// when the same node happens to be both.
|
|
33
|
+
for (const step of steps) {
|
|
34
|
+
if (step.kind === "reasoning") {
|
|
35
|
+
lines.push(` style ${step.nodeId} stroke-dasharray:4 4,stroke:#9f7aea,stroke-width:2px`);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
27
38
|
if (activeNodeId && steps.some((step) => step.nodeId === activeNodeId)) {
|
|
28
39
|
lines.push(` style ${activeNodeId} fill:#2b6cb0,stroke:#3182ce,stroke-width:2px,color:#fff`);
|
|
29
40
|
}
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
# Spec: 0.3.7 Item 6 — Terminal UI redesign + in-terminal config wizard
|
|
2
|
+
|
|
3
|
+
> Status: drafted, awaiting implementation. See [Tasks.md](../../Tasks.md)
|
|
4
|
+
> for live progress. Pairs with [`brainrouter-roadmap/0.3.7.md`](../../brainrouter-roadmap/0.3.7.md)
|
|
5
|
+
> as the headline feature of the 0.3.7 cycle.
|
|
6
|
+
|
|
7
|
+
## Objective
|
|
8
|
+
|
|
9
|
+
Make BrainRouter's CLI feel like Claude Code / OpenCode / Codex / Grok-CLI
|
|
10
|
+
/ DeepSeek-TUI — **especially around in-terminal, picker-driven
|
|
11
|
+
configuration**. Today a first-run user has to:
|
|
12
|
+
|
|
13
|
+
1. Run `brainrouter login` outside the REPL → answers a sequence of
|
|
14
|
+
`inquirer` text prompts → exits.
|
|
15
|
+
2. Run `brainrouter config` outside the REPL → answers another sequence
|
|
16
|
+
of text prompts → exits.
|
|
17
|
+
3. Then start `brainrouter` (the chat REPL) and discover everything else
|
|
18
|
+
(`/theme`, `/personality`, `/effort`, `/mode`, statusline) via
|
|
19
|
+
`/help` or by hand-editing JSON.
|
|
20
|
+
|
|
21
|
+
Peer CLIs do this differently:
|
|
22
|
+
|
|
23
|
+
- **Claude Code** — `/config` opens a tabbed settings panel; `/theme`,
|
|
24
|
+
`/model`, `/login` are arrow-key pickers; first-run shows a welcome
|
|
25
|
+
card → onboarding wizard with a step counter.
|
|
26
|
+
- **Codex** — `Step::{Welcome, Auth, TrustDirectory}` state machine
|
|
27
|
+
drives the onboarding; one modal owns multiple sub-states (`PickMode`
|
|
28
|
+
→ `ApiKeyEntry`) via an enum.
|
|
29
|
+
- **DeepSeek-TUI** — `/config` is verb-overloaded: bare opens a picker,
|
|
30
|
+
`<key>` shows, `<key> <val>` sets session, `<key> <val> --save`
|
|
31
|
+
persists. Provider picker uses a single modal with `Stage::{List,
|
|
32
|
+
KeyEntry}`; selecting an un-configured provider transitions the same
|
|
33
|
+
modal into key entry.
|
|
34
|
+
- **Grok-CLI** — first-run is a render-time `ApiKeyModal` overlay
|
|
35
|
+
inside the REPL, not a separate `login` subcommand. Each settings
|
|
36
|
+
surface (`/models`, `/mcp`, `/sandbox`, `/wallet`) is its own modal
|
|
37
|
+
that merges through a single `saveUserSettings(partial)`.
|
|
38
|
+
|
|
39
|
+
We adopt the **render-time modal** + **state-enum** + **verb-overloaded
|
|
40
|
+
slash command** patterns, on top of our existing
|
|
41
|
+
`brainrouter-cli/src/cli/cliPrompt.ts` picker primitive.
|
|
42
|
+
|
|
43
|
+
## Non-goals
|
|
44
|
+
|
|
45
|
+
- A full Ratatui/Ink-style fullscreen renderer. That's the 0.5.0 cycle.
|
|
46
|
+
- Replacing `readline` as the REPL composer. We extend the existing
|
|
47
|
+
raw-mode picker; we do not rewrite the input loop.
|
|
48
|
+
- Multi-provider OAuth flows. v0.3.7 keeps API-key auth; OAuth /
|
|
49
|
+
device-code can be a follow-up once the wizard skeleton lands.
|
|
50
|
+
- Touching the MCP server's config (`~/.config/brainrouter/server.env`).
|
|
51
|
+
The wizard reads/writes only the CLI-owned files:
|
|
52
|
+
`~/.config/brainrouter/config.json` and
|
|
53
|
+
`<workspace>/.brainrouter/cli/preferences.json`.
|
|
54
|
+
|
|
55
|
+
## Tech stack
|
|
56
|
+
|
|
57
|
+
- Same as the rest of `brainrouter-cli/`: TypeScript on Node 22+, ESM,
|
|
58
|
+
Vitest, `chalk` for color, our own `cliPrompt.askChoice` for the
|
|
59
|
+
arrow-key picker.
|
|
60
|
+
- `inquirer` is deliberately **removed from the new flows** — its
|
|
61
|
+
competing readline interface is the bug that motivated `cliPrompt.ts`
|
|
62
|
+
in the first place. Legacy `brainrouter login` / `brainrouter config`
|
|
63
|
+
subcommands stay (for back-compat) but the docs steer users at the
|
|
64
|
+
new in-REPL flows.
|
|
65
|
+
|
|
66
|
+
## Slash command surface
|
|
67
|
+
|
|
68
|
+
Following the verb-overload pattern from DeepSeek-TUI:
|
|
69
|
+
|
|
70
|
+
| Command | Behaviour |
|
|
71
|
+
| --- | --- |
|
|
72
|
+
| `/init` | Re-runs the first-run onboarding wizard (theme → provider → model → MCP → done). Idempotent; safe to re-run. **Aliased from the old `/init` which only wrote `AGENT.md`** — that one-shot is folded into the wizard's final step as an opt-in toggle so we don't regress for users who relied on it. |
|
|
73
|
+
| `/config` | Bare — opens the **settings home panel** (picker over: LLM, MCP, Theme, Statusline, Effort, Mode, Quiet, Personality, Editor mode, View raw config). |
|
|
74
|
+
| `/config <key>` | Show current value for `<key>` (e.g. `/config theme` → `theme: dark (preference)`). |
|
|
75
|
+
| `/config <key> <value>` | Set `<key>` to `<value>` and persist (writes to `~/.config/brainrouter/config.json` for LLM/MCP knobs, `<workspace>/.brainrouter/cli/preferences.json` for prefs). |
|
|
76
|
+
| `/login` | NEW slash alias for the old `brainrouter login` flow — opens the **MCP profile editor** in-REPL (transport picker → fields → reachability test → save). The standalone `brainrouter login` CLI subcommand stays for first-run-before-REPL use. |
|
|
77
|
+
| `/logout` | Already exists; unchanged. Clears API keys from the active profile. |
|
|
78
|
+
| `/theme` | Already exists. Extended with `/theme` (bare) opening the picker (live-preview the banner + prompt accent on cursor-change), confirming via ENTER, restoring on Esc. |
|
|
79
|
+
| `/model` | Already exists. Extended with bare-`/model` opening a picker over a curated short-list (gpt-4o-mini, gpt-4o, gpt-5, claude-sonnet-4, deepseek-v4, qwen3-coder, …) + "Other" for free-text. |
|
|
80
|
+
|
|
81
|
+
The bare `/config` panel is the **settings home** — it's the screen
|
|
82
|
+
that lets users discover every knob without needing to memorise the
|
|
83
|
+
slash vocabulary.
|
|
84
|
+
|
|
85
|
+
## Onboarding wizard (`/init`)
|
|
86
|
+
|
|
87
|
+
State machine — same shape as Codex `Step` and DeepSeek
|
|
88
|
+
`OnboardingState`:
|
|
89
|
+
|
|
90
|
+
```
|
|
91
|
+
Welcome → Theme → Provider → ApiKey → Model → MCP → AgentMd → Done
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
- **Welcome** — a single boxed card (~5 lines) introducing
|
|
95
|
+
BrainRouter, what gets configured, and where (path of
|
|
96
|
+
`~/.config/brainrouter/config.json`). ENTER continues, q quits
|
|
97
|
+
without writing.
|
|
98
|
+
- **Theme** — arrow-key picker over `dark / light / mono / auto`,
|
|
99
|
+
with **live preview**: on cursor-change, redraw the banner accent so
|
|
100
|
+
the user sees the change before confirming. Persists to
|
|
101
|
+
`preferences.theme` on confirm.
|
|
102
|
+
- **Provider** — picker over a curated provider list:
|
|
103
|
+
`OpenAI · DeepSeek · OpenRouter · Anthropic (via gateway) · Gemini
|
|
104
|
+
(OpenAI-compat) · LM Studio (local) · Ollama (local) · Custom…`.
|
|
105
|
+
Each row shows a one-line hint (endpoint, "(local)" / "(cloud)",
|
|
106
|
+
needs-key indicator). Pre-detects from env vars
|
|
107
|
+
(`OPENAI_API_KEY`, `DEEPSEEK_API_KEY`, `OPENROUTER_API_KEY`, etc.)
|
|
108
|
+
— pre-selects the row whose key is already present in the shell.
|
|
109
|
+
- **ApiKey** — free-text entry (re-uses the picker's `awaitingOther`
|
|
110
|
+
free-text mode). Pre-filled from env when available. Validation
|
|
111
|
+
tier `Accept{warning?}` / `Reject(reason)`: empty for local
|
|
112
|
+
endpoints is OK; non-empty with `sk-`-shape prefix or vendor-known
|
|
113
|
+
shape is OK; everything else is **accepted with a warning** (not
|
|
114
|
+
rejected — vendors invent new prefixes all the time). Mask to last
|
|
115
|
+
4 chars after entry.
|
|
116
|
+
- **Model** — picker over the provider's curated short-list +
|
|
117
|
+
"Other" for free-text. Defaults: OpenAI → `gpt-4o-mini`, DeepSeek
|
|
118
|
+
→ `deepseek-chat`, OpenRouter → `anthropic/claude-sonnet-4`,
|
|
119
|
+
LM Studio → blank (user picks from loaded models), etc.
|
|
120
|
+
- **MCP** — picker over `Local stdio (brainrouter-mcp on PATH) ·
|
|
121
|
+
Local HTTP (http://localhost:3747/mcp) · Remote HTTP… (free-text
|
|
122
|
+
URL) · Skip (offline-only)`. On confirm, run a single
|
|
123
|
+
`mcpClient.listTools()` reachability test (5s timeout); on success
|
|
124
|
+
save the profile and mark it active; on failure offer "save
|
|
125
|
+
anyway" / "try a different transport" / "skip".
|
|
126
|
+
- **AgentMd** — yes/no: "Write AGENT.md to this workspace? (helps
|
|
127
|
+
agents understand your repo conventions)". Defaults to **yes** if
|
|
128
|
+
no `AGENT.md` / `CLAUDE.md` exists yet, **no** otherwise. This is
|
|
129
|
+
the toggle that absorbs the old `/init` behaviour.
|
|
130
|
+
- **Done** — summary card showing the saved config (with API keys
|
|
131
|
+
masked) + next steps (`/help`, `/config`, `/where`). Marker file
|
|
132
|
+
written to `~/.config/brainrouter/.onboarded` so subsequent CLI
|
|
133
|
+
starts skip the wizard. Re-running `/init` is always allowed.
|
|
134
|
+
|
|
135
|
+
**Skip semantics.** At any step `Esc` backs out one state; `q`
|
|
136
|
+
aborts the whole wizard with **no changes saved** (everything is
|
|
137
|
+
journalled in-memory until the Done step commits the write).
|
|
138
|
+
|
|
139
|
+
**Auto-trigger.** When the user runs `brainrouter` (the chat command)
|
|
140
|
+
and no `~/.config/brainrouter/config.json` exists, instead of the
|
|
141
|
+
current "Run \`brainrouter login\` …" error-and-exit, we drop them
|
|
142
|
+
**straight into the wizard inside the REPL** (no separate subcommand).
|
|
143
|
+
This is the grok-cli `ApiKeyModal` pattern — the modal IS the
|
|
144
|
+
onboarding.
|
|
145
|
+
|
|
146
|
+
## Settings home panel (`/config`)
|
|
147
|
+
|
|
148
|
+
Picker over the settings categories. Each row shows the current value
|
|
149
|
+
on the right so the user can scan the state at a glance:
|
|
150
|
+
|
|
151
|
+
```
|
|
152
|
+
┌─ ⚙️ BrainRouter Config ────────────────────────────────────────┐
|
|
153
|
+
│ │
|
|
154
|
+
│ ▶ LLM provider openai · gpt-4o-mini │
|
|
155
|
+
│ MCP profile default · http · 🟢 online │
|
|
156
|
+
│ Theme dark │
|
|
157
|
+
│ Statusline mode,branch,workflow,goal │
|
|
158
|
+
│ Reasoning effort medium │
|
|
159
|
+
│ Execution mode planning │
|
|
160
|
+
│ Review policy request │
|
|
161
|
+
│ Quiet mode off │
|
|
162
|
+
│ Personality standard │
|
|
163
|
+
│ Editor mode emacs │
|
|
164
|
+
│ │
|
|
165
|
+
│ View raw config │
|
|
166
|
+
│ Quit (Esc) │
|
|
167
|
+
│ │
|
|
168
|
+
│ ↑/↓ navigate · ENTER edit · Esc to quit │
|
|
169
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Selecting a row opens the appropriate sub-picker (provider picker,
|
|
173
|
+
theme picker, etc.). On confirm, returns to the home panel with the
|
|
174
|
+
new value visible. Esc backs out one level (sub-picker → home → exit).
|
|
175
|
+
|
|
176
|
+
The "View raw config" row prints the scrubbed JSON (today's bare
|
|
177
|
+
`/config` behaviour) so users who prefer the JSON view can still get
|
|
178
|
+
it without leaving the panel.
|
|
179
|
+
|
|
180
|
+
## Persistence contract
|
|
181
|
+
|
|
182
|
+
One central writer per file. No scattered `fs.writeFileSync` calls.
|
|
183
|
+
|
|
184
|
+
| File | Wrapper | Owns |
|
|
185
|
+
| --- | --- | --- |
|
|
186
|
+
| `~/.config/brainrouter/config.json` | `saveConfig(partial)` in [`config/config.ts`](../../brainrouter-cli/src/config/config.ts) — existing function, called with the merged result. | LLM provider, MCP profiles, active profile. |
|
|
187
|
+
| `<workspace>/.brainrouter/cli/preferences.json` | `writePreferences(workspaceRoot, partial)` in [`state/preferencesStore.ts`](../../brainrouter-cli/src/state/preferencesStore.ts) — existing function; already merges. | Theme, statusline, effort, exec mode, review policy, quiet, personality, editor, sandbox grants. |
|
|
188
|
+
| `~/.config/brainrouter/.onboarded` | New `markOnboarded()` helper alongside `config/config.ts`. | Empty file; presence = wizard has run at least once. |
|
|
189
|
+
|
|
190
|
+
The wizard accumulates intent in an in-memory `Draft` object across
|
|
191
|
+
all steps; the commit happens **once** at the Done step. Aborting via
|
|
192
|
+
`q` discards the draft.
|
|
193
|
+
|
|
194
|
+
## Picker primitive — additions to `cliPrompt.ts`
|
|
195
|
+
|
|
196
|
+
Today's `askChoice` is sufficient for confirm-only pickers; the
|
|
197
|
+
redesign needs two new behaviours we can fold in as optional fields:
|
|
198
|
+
|
|
199
|
+
1. **`onCursorChange(index)` callback** — fired by `reducePicker` after
|
|
200
|
+
an `up`/`down` keystroke if the option list moved. Theme picker
|
|
201
|
+
uses it to repaint the banner accent live.
|
|
202
|
+
2. **`prefilledOther?: string`** — when the wizard wants to drop the
|
|
203
|
+
user straight into the `awaitingOther` free-text input pre-filled
|
|
204
|
+
with the env-var value (so ENTER accepts the env-derived default,
|
|
205
|
+
editing overrides it).
|
|
206
|
+
|
|
207
|
+
Both are additive; existing callers keep working with `undefined`.
|
|
208
|
+
|
|
209
|
+
## Boundary rules (per `spec-driven-skill`)
|
|
210
|
+
|
|
211
|
+
- **Always:** route all settings persistence through
|
|
212
|
+
`saveConfig` / `writePreferences`; mask API keys to last 4 chars on
|
|
213
|
+
every render; pause the parent `rl` while a picker is active; close
|
|
214
|
+
with the cursor visible.
|
|
215
|
+
- **Ask first:** changing `~/.config/brainrouter/config.json` shape
|
|
216
|
+
(any new top-level key) — needs explicit user sign-off because it
|
|
217
|
+
surfaces in `/debug-config`.
|
|
218
|
+
- **Never:** write API keys to disk in plain text under the workspace
|
|
219
|
+
(only under `~/.config/brainrouter/`); silently overwrite an
|
|
220
|
+
existing config without confirmation; default to option 1 in
|
|
221
|
+
non-TTY mode (mirrors the existing `NoTTYError` contract).
|
|
222
|
+
|
|
223
|
+
## Definition of Done
|
|
224
|
+
|
|
225
|
+
1. `brainrouter` started against a clean `$HOME` (no `~/.config/brainrouter/`)
|
|
226
|
+
drops into the wizard automatically — no `brainrouter login`
|
|
227
|
+
needed.
|
|
228
|
+
2. `/init` re-runs the wizard at any time inside the REPL.
|
|
229
|
+
3. `/config` (bare) opens the settings home panel; arrow-keys
|
|
230
|
+
navigate; ENTER opens a sub-picker; Esc backs out.
|
|
231
|
+
4. `/config theme dark` sets theme to dark and prints the new value
|
|
232
|
+
without opening any picker.
|
|
233
|
+
5. Theme picker live-previews on cursor-change and restores the
|
|
234
|
+
original theme if the user presses Esc.
|
|
235
|
+
6. Tests: pure-function tests for the wizard reducer (`stepReducer`,
|
|
236
|
+
`Draft` shape, validation tier), `Vitest` tests for the
|
|
237
|
+
`/config` argument parser (bare / get / set), and one
|
|
238
|
+
`cliPrompt.ts` test for the new `onCursorChange` callback.
|
|
239
|
+
7. Docs: `brainrouter-docs/cli.md` and `brainrouter-docs/configuration.md`
|
|
240
|
+
are updated end-to-end. README's "First-time setup" section
|
|
241
|
+
collapses to "run `brainrouter`; the wizard takes over."
|
|
242
|
+
8. No regressions: `npm run test --workspace brainrouter-cli` stays
|
|
243
|
+
green; the legacy `brainrouter login` / `brainrouter config`
|
|
244
|
+
subcommands still work for users who scripted around them.
|
|
245
|
+
|
|
246
|
+
## Open questions
|
|
247
|
+
|
|
248
|
+
- **Should `/init` skip the wizard if the marker file exists**, or
|
|
249
|
+
always re-run? Current answer: always re-run when invoked
|
|
250
|
+
explicitly; only **auto-trigger on REPL start** when the marker is
|
|
251
|
+
missing. This matches Codex's "explicit `--login` always honoured"
|
|
252
|
+
contract.
|
|
253
|
+
- **Should the MCP step probe both stdio and HTTP**, or just the
|
|
254
|
+
user's pick? Current answer: just the pick. Probing both adds 5s
|
|
255
|
+
to the wizard for unclear payoff. Users can re-run `/init` if they
|
|
256
|
+
want to try the other transport.
|
|
257
|
+
- **Should the wizard offer to install missing system deps** (gh,
|
|
258
|
+
jq, ripgrep)? Current answer: no. Out of scope for v0.3.7. A
|
|
259
|
+
future `/doctor --fix` could handle it.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kinqs/brainrouter-mcp-server",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.7",
|
|
4
4
|
"description": "BrainRouter MCP server — the cognitive memory engine. Exposes recall, capture, focus scenes, persona, contradictions, skills, and graph queries as MCP tools for any MCP-speaking agent.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
"gray-matter": "^4.0.3",
|
|
46
46
|
"sqlite-vec": "^0.1.9",
|
|
47
47
|
"zod": "^3.22.4",
|
|
48
|
-
"@kinqs/brainrouter-types": "^0.3.
|
|
48
|
+
"@kinqs/brainrouter-types": "^0.3.6"
|
|
49
49
|
},
|
|
50
50
|
"engines": {
|
|
51
51
|
"node": ">=22.0.0"
|
package/dist/memory/config.d.ts
DELETED
package/dist/memory/config.js
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
import type { IMemoryStore } from "@brainrouter/types";
|
|
2
|
-
import type { LLMRunner, L1Record } from "@brainrouter/types";
|
|
3
|
-
export declare function detectContradictions(params: {
|
|
4
|
-
newRecord: L1Record;
|
|
5
|
-
store: IMemoryStore;
|
|
6
|
-
llmRunner: LLMRunner;
|
|
7
|
-
}): Promise<void>;
|