@j0hanz/cortex-mcp 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,6 @@ interface ReasonOptions {
13
13
  isConclusion?: boolean;
14
14
  rollbackToStep?: number;
15
15
  abortSignal?: AbortSignal;
16
- onProgress?: (progress: number, total: number) => void | Promise<void>;
16
+ onProgress?: (progress: number, total: number, stepSummary?: string) => void | Promise<void>;
17
17
  }
18
18
  export declare function reason(query: string, level: ReasoningLevel | undefined, options?: ReasonOptions): Promise<Readonly<Session>>;
@@ -63,7 +63,7 @@ export async function reason(query, level, options) {
63
63
  sessionStore.markCompleted(session.id);
64
64
  }
65
65
  if (onProgress) {
66
- await onProgress(addedThought.index + 1, totalThoughts);
66
+ await onProgress(addedThought.index + 1, totalThoughts, stepSummary);
67
67
  throwIfReasoningAborted(abortSignal);
68
68
  }
69
69
  return getSessionOrThrow(session.id);
@@ -8,12 +8,14 @@ export declare class SessionStore {
8
8
  private oldestSessionId;
9
9
  private newestSessionId;
10
10
  private sortedSessionIdsCache;
11
- private readonly cleanupInterval;
11
+ private cleanupInterval;
12
12
  private readonly ttlMs;
13
13
  private readonly maxSessions;
14
14
  private readonly maxTotalTokens;
15
15
  private totalTokens;
16
16
  constructor(ttlMs?: number, maxSessions?: number, maxTotalTokens?: number);
17
+ ensureCleanupTimer(): void;
18
+ dispose(): void;
17
19
  create(level: ReasoningLevel, totalThoughts?: number): Readonly<Session>;
18
20
  get(id: string): Readonly<Session> | undefined;
19
21
  getSummary(id: string): Readonly<SessionSummary> | undefined;
@@ -33,12 +33,25 @@ export class SessionStore {
33
33
  this.ttlMs = ttlMs;
34
34
  this.maxSessions = maxSessions;
35
35
  this.maxTotalTokens = maxTotalTokens;
36
- const sweepInterval = resolveSweepInterval(ttlMs);
36
+ this.ensureCleanupTimer();
37
+ }
38
+ ensureCleanupTimer() {
39
+ if (this.cleanupInterval) {
40
+ return;
41
+ }
42
+ const sweepInterval = resolveSweepInterval(this.ttlMs);
37
43
  this.cleanupInterval = setInterval(() => {
38
44
  this.sweep();
39
45
  }, sweepInterval);
40
46
  this.cleanupInterval.unref();
41
47
  }
48
+ dispose() {
49
+ if (!this.cleanupInterval) {
50
+ return;
51
+ }
52
+ clearInterval(this.cleanupInterval);
53
+ this.cleanupInterval = undefined;
54
+ }
42
55
  create(level, totalThoughts) {
43
56
  this.evictIfAtCapacity();
44
57
  const config = getLevelConfig(level);
@@ -30,12 +30,6 @@ const TOOL_CONTRACTS = [
30
30
  required: false,
31
31
  constraints: '1-100,000 chars',
32
32
  },
33
- {
34
- name: 'thoughts',
35
- type: 'string[]',
36
- required: false,
37
- constraints: 'optional',
38
- },
39
33
  {
40
34
  name: 'targetThoughts',
41
35
  type: 'number',
@@ -9,4 +9,9 @@ export declare function getTargetThoughtsError(level: ReasoningLevel, targetThou
9
9
  * if the variable is absent or invalid. Values below `minimum` also fall back.
10
10
  */
11
11
  export declare function parsePositiveIntEnv(name: string, fallback: number, minimum?: number): number;
12
+ /**
13
+ * Parse a boolean from an environment variable, returning `fallback` when absent
14
+ * or when the value is not a recognized boolean literal.
15
+ */
16
+ export declare function parseBooleanEnv(name: string, fallback: boolean): boolean;
12
17
  export declare function collectPrefixMatches(candidates: readonly string[], value: string, limit: number): string[];
@@ -30,6 +30,24 @@ export function parsePositiveIntEnv(name, fallback, minimum = 1) {
30
30
  }
31
31
  return parsed;
32
32
  }
33
+ /**
34
+ * Parse a boolean from an environment variable, returning `fallback` when absent
35
+ * or when the value is not a recognized boolean literal.
36
+ */
37
+ export function parseBooleanEnv(name, fallback) {
38
+ const raw = process.env[name];
39
+ if (raw === undefined) {
40
+ return fallback;
41
+ }
42
+ const normalized = raw.trim().toLowerCase();
43
+ if (['1', 'true', 'yes', 'on'].includes(normalized)) {
44
+ return true;
45
+ }
46
+ if (['0', 'false', 'no', 'off'].includes(normalized)) {
47
+ return false;
48
+ }
49
+ return fallback;
50
+ }
33
51
  export function collectPrefixMatches(candidates, value, limit) {
34
52
  const results = [];
35
53
  for (const candidate of candidates) {
@@ -40,17 +40,21 @@ function createTextPrompt(text) {
40
40
  function buildPromptText(args) {
41
41
  const { context, task, constraints, output } = args;
42
42
  return [
43
- '# Context',
43
+ '<context>',
44
44
  ...context,
45
+ '</context>',
45
46
  '',
46
- '# Task',
47
+ '<task>',
47
48
  ...task,
49
+ '</task>',
48
50
  '',
49
- '# Constraints',
51
+ '<constraints>',
50
52
  ...constraints.map((line) => `- ${line}`),
53
+ '</constraints>',
51
54
  '',
52
- '# Output',
55
+ '<output_format>',
53
56
  ...output,
57
+ '</output_format>',
54
58
  ].join('\n');
55
59
  }
56
60
  function buildStartReasoningPrompt(args) {
@@ -64,20 +68,20 @@ function buildStartReasoningPrompt(args) {
64
68
  : String(targetThoughts)}`,
65
69
  ],
66
70
  task: [
67
- `Start a new reasoning session using "${REASONING_TOOL_NAME}".`,
68
- 'Create the first step with a complete, concrete reasoning thought.',
71
+ `Start new reasoning session via "${REASONING_TOOL_NAME}".`,
72
+ 'Generate the first concrete reasoning step.',
69
73
  ],
70
74
  constraints: [
71
75
  THOUGHT_PARAMETER_GUIDANCE,
72
- 'Preserve sessionId from the response for continuation calls.',
73
- 'Continue until status is completed or remainingThoughts is 0.',
76
+ 'Preserve sessionId for continuation.',
77
+ 'Continue until status="completed" or remainingThoughts=0.',
74
78
  ],
75
79
  output: [
76
- 'Return the first tool call payload only.',
77
- 'Fields: query, level, thought, and optional targetThoughts.',
80
+ 'Return exactly one tool call payload.',
81
+ 'Required fields: query, level, thought.',
78
82
  ],
79
83
  });
80
- return `${base}\n\n---\n\n${getTemplate(level)}`;
84
+ return `${base}\n\n${getTemplate(level)}`;
81
85
  }
82
86
  function buildRetryReasoningPrompt(args) {
83
87
  const { query, level, targetThoughts } = args;
@@ -90,18 +94,18 @@ function buildRetryReasoningPrompt(args) {
90
94
  : String(targetThoughts)}`,
91
95
  ],
92
96
  task: [
93
- `Retry by calling "${REASONING_TOOL_NAME}" with an improved first thought.`,
97
+ `Retry calling "${REASONING_TOOL_NAME}" with an improved first thought.`,
94
98
  ],
95
99
  constraints: [
96
100
  THOUGHT_PARAMETER_GUIDANCE,
97
- 'Use a direct and specific thought with no filler language.',
101
+ 'Write a direct, specific thought. No filler.',
98
102
  ],
99
103
  output: [
100
- 'Return one tool call payload only.',
101
- 'Fields: query, level, thought, and optional targetThoughts.',
104
+ 'Return exactly one tool call payload.',
105
+ 'Required fields: query, level, thought.',
102
106
  ],
103
107
  });
104
- return `${base}\n\n---\n\n${getTemplate(level)}`;
108
+ return `${base}\n\n${getTemplate(level)}`;
105
109
  }
106
110
  function buildContinueReasoningPrompt(args) {
107
111
  const { sessionId, query, level, targetThoughts } = args;
@@ -119,17 +123,17 @@ function buildContinueReasoningPrompt(args) {
119
123
  : String(targetThoughts)}`,
120
124
  ],
121
125
  task: [
122
- `Continue the existing session using "${REASONING_TOOL_NAME}".`,
123
- 'Generate the next reasoning step only.',
126
+ `Continue session via "${REASONING_TOOL_NAME}".`,
127
+ 'Generate the next reasoning step.',
124
128
  ],
125
129
  constraints: [
126
130
  THOUGHT_PARAMETER_GUIDANCE,
127
- 'Keep the same sessionId in the call payload.',
128
- 'Prefer concise, concrete reasoning over meta commentary.',
131
+ 'Keep the same sessionId.',
132
+ 'Write concrete reasoning. No meta commentary.',
129
133
  ],
130
134
  output: [
131
- 'Return one continuation tool call payload only.',
132
- 'Fields: sessionId, thought, and optional query/level/targetThoughts.',
135
+ 'Return exactly one continuation tool call payload.',
136
+ 'Required fields: sessionId, thought.',
133
137
  ],
134
138
  });
135
139
  }
@@ -3,99 +3,220 @@
3
3
  // Each template shows correct `thought` depth and step count so the LLM
4
4
  // calibrates its output to the selected level requirements.
5
5
  // ---------------------------------------------------------------------------
6
- const BASIC_TEMPLATE = `## Guided Example (basic — 3 thoughts)
7
-
8
- **Query:** "Is Set or Array better for deduplicating a list of strings in JavaScript?"
9
-
10
- **Thought 1 of 3:**
11
- > A \`Set\` enforces uniqueness automatically; \`[...new Set(arr)]\` is a single-step O(n) operation. An Array-based approach (\`filter\` + \`indexOf\`) is O(n²) and adds unnecessary complexity.
12
-
13
- **Thought 2 of 3:**
14
- > Edge cases: both preserve insertion order in modern JS engines, so ordering is not a differentiator. \`Set\` converts all values to a common reference internally, which handles string equality correctly.
15
-
16
- **Thought 3 of 3 — conclusion:**
17
- > Use \`Set\`. It is faster (O(n) vs O(n²)), shorter to write, and immediately communicates the intent of deduplication. \`[...new Set(list)]\` is the idiomatic solution.
18
-
19
- ---
20
- **System Directive:** Follow the pattern above. Each \`thought\` must contain self-contained, concrete analysis — no filler language or meta-commentary. Use 3 to 5 thoughts total.`;
21
- const NORMAL_TEMPLATE = `## Guided Example (normal 7 thoughts)
22
-
23
- **Query:** "How do I safely handle concurrent writes to a shared in-memory counter in a Node.js service?"
24
-
25
- **Thought 1 of 7:**
26
- > Node.js is single-threaded: synchronous code is never interrupted mid-execution, so a plain \`counter++\` within one event-loop tick is effectively atomic from JavaScript's perspective.
27
-
28
- **Thought 2 of 7:**
29
- > The risk arises in *async* code. A read-modify-write spanning an \`await\` boundary is NOT atomic. Two concurrent callers can both read the same value before either writes, causing a lost update.
30
-
31
- **Thought 3 of 7:**
32
- > Example race: \`const v = await db.get('c'); await db.set('c', v + 1);\` — if two requests interleave at the \`await\` points, both read \`v=5\` and both write \`6\`, losing one increment.
33
-
34
- **Thought 4 of 7:**
35
- > Solution A — atomic DB operation: \`UPDATE counter SET n = n + 1 RETURNING n\` (SQL) or Redis \`INCR\`. The DB engine serialises the read-modify-write internally with no async gap.
36
-
37
- **Thought 5 of 7:**
38
- > Solution B — async mutex: use a library-level lock (e.g. \`async-mutex\`) to serialise access. Works for in-process state but does not scale across multiple processes or restarts.
39
-
40
- **Thought 6 of 7:**
41
- > Solution C synchronous in-memory only: keep the counter as a plain variable, increment with \`counter++\` (no \`await\` in the read-modify-write path). Valid only for single-process, ephemeral state.
42
-
43
- **Thought 7 of 7 — conclusion:**
44
- > Prefer Solution A (atomic DB op) for correctness across restarts and multi-process deployments. Use Solution C only for in-process, non-persisted counters where an \`await\` never touches the variable. Avoid async read-modify-write without a mutex.
45
-
46
- ---
47
- **System Directive:** Follow the pattern above. Each \`thought\` must be concrete and progress the analysis. Use 6 to 10 thoughts total; avoid restating earlier thoughts.`;
48
- const HIGH_TEMPLATE = `## Guided Example (high — 15 thoughts)
49
-
50
- **Query:** "Our Node.js API latency jumped from p50=20ms to p50=800ms after a dependency upgrade. How do I diagnose and fix this?"
51
-
52
- **Thought 1 of 15:**
53
- > Establish the change boundary: run \`git log --oneline\` to find the upgrade commit. Use \`git bisect\` between the last known-good tag and HEAD to confirm the exact commit that caused the regression.
54
-
55
- **Thought 2 of 15:**
56
- > Collect baseline metrics before touching anything: event-loop lag (\`perf_hooks.monitorEventLoopDelay\`), GC pause times (\`--expose-gc\` + \`PerformanceObserver\`), and per-route timings. This separates compute regressions from I/O regressions.
57
-
58
- **Thought 3 of 15:**
59
- > If event-loop lag is high (>50ms per tick), the cause is synchronous blocking inserted into the hot path — JSON serialisation of large objects, synchronous file I/O, regex backtracking, or CPU-heavy validation.
60
-
61
- **Thought 4 of 15:**
62
- > If event-loop lag is low but p50 is high, the bottleneck is I/O wait: slow DB queries, connection-pool exhaustion, DNS resolution delays, or increased network RTT to the upgraded service.
63
-
64
- **Thought 5 of 15:**
65
- > Read the dependency's changelog between the old and new version. Look for: new middleware injected at startup, serialisation format changes, default timeout changes, or connection-pool default reductions.
66
-
67
- **Thought 6 of 15:**
68
- > Profile with \`clinic.js flame\` (or \`node --prof\` + \`node --prof-process\`) under representative load. The flame graph will pinpoint whether wall-clock time is in JS compute vs. idle I/O await.
69
-
70
- **Thought 7 of 15:**
71
- > Write a minimal reproduction that calls *only* the upgraded package's API with representative input. Benchmark it against the pinned old version in isolation to confirm the package itself is the source.
72
-
73
- **Thought 8 of 15:**
74
- > Common 40× regression patterns: (a) added synchronous schema validation on every request, (b) HTTP/1.1 HTTP/2 frame parsing overhead, (c) new middleware that buffers the full request body before routing.
75
-
76
- **Thought 9 of 15:**
77
- > Check connection-pool configuration: if the upgrade changed default pool size or idle timeout, requests may queue waiting for connections. Inspect \`pool.min\`, \`pool.max\`, and \`acquireTimeoutMillis\` in the new version's defaults.
78
-
79
- **Thought 10 of 15:**
80
- > Check middleware registration order: some packages inject global middleware at \`require\`-time. A slow middleware (e.g., large-payload body parser) before fast routes affects all endpoints even if the route itself is unchanged.
81
-
82
- **Thought 11 of 15:**
83
- > Immediate mitigation: pin the dependency to the last known-good version (\`npm install dep@x.y.z\`) and deploy to restore SLA while the full investigation continues. Add a TODO linking to the issue tracker.
84
-
85
- **Thought 12 of 15:**
86
- > If the regression is a bug in the dependency, open an issue with the minimal reproduction from Thought 7. Check if a patch release or a configuration flag exists to disable the slow behaviour.
87
-
88
- **Thought 13 of 15:**
89
- > If the slow path is unavoidable, mitigation options: (a) cache the expensive result at the request or process level, (b) offload CPU work to a \`worker_threads\` worker, (c) evaluate an alternative package.
90
-
91
- **Thought 14 of 15:**
92
- > After applying the fix, run the same load test that revealed the regression. Confirm p50 and p99 return to baseline and do not diverge under sustained load. Check that GC pressure did not increase.
93
-
94
- **Thought 15 of 15 — conclusion:**
95
- > Diagnosis path: git bisect → event-loop lag check → clinic.js flame graph → isolated package benchmark → changelog review → pool/middleware audit. Mitigation: pin version immediately. Fix: configure, cache, or replace. Prevention: add a latency benchmark target to CI.
96
-
97
- ---
98
- **System Directive:** Follow the pattern above. Each \`thought\` must be specific, advancing the investigation — no summaries of prior steps, no filler. Use 15 to 25 thoughts total; scale depth to complexity.`;
6
+ const BASIC_TEMPLATE = `<example>
7
+ <query>Is Set or Array better for deduplicating a list of strings in JavaScript?</query>
8
+
9
+ <thought_process>
10
+ <step index="1" total="3">
11
+ <thought>
12
+ [Observation] A \`Set\` enforces uniqueness automatically; \`[...new Set(arr)]\` is a single-step O(n) operation. An Array-based approach (\`filter\` + \`indexOf\`) is O(n²) and adds unnecessary complexity.
13
+ </thought>
14
+ <step_summary>Set is O(n) while Array filter is O(n²).</step_summary>
15
+ </step>
16
+
17
+ <step index="2" total="3">
18
+ <thought>
19
+ [Evaluation] Edge cases: both preserve insertion order in modern JS engines, so ordering is not a differentiator. \`Set\` converts all values to a common reference internally, which handles string equality correctly.
20
+ </thought>
21
+ <step_summary>Both preserve order, but Set handles string equality natively.</step_summary>
22
+ </step>
23
+
24
+ <step index="3" total="3">
25
+ <thought>
26
+ [Conclusion] Use \`Set\`. It is faster (O(n) vs O(n²)), shorter to write, and immediately communicates the intent of deduplication. \`[...new Set(list)]\` is the idiomatic solution.
27
+ </thought>
28
+ <step_summary>Set is the optimal and idiomatic choice.</step_summary>
29
+ </step>
30
+ </thought_process>
31
+ </example>
32
+
33
+ <constraints>
34
+ - Match the depth and quality of the example above.
35
+ - Structure reasoning using: [Observation], [Hypothesis], [Evaluation], [Conclusion].
36
+ - Write concrete, self-contained thoughts. No filler.
37
+ - Provide a 1-sentence \`step_summary\` per step.
38
+ - Total thoughts: 3 to 5.
39
+ </constraints>`;
40
+ const NORMAL_TEMPLATE = `<example>
41
+ <query>How do I safely handle concurrent writes to a shared in-memory counter in a Node.js service?</query>
42
+
43
+ <thought_process>
44
+ <step index="1" total="7">
45
+ <thought>
46
+ [Observation] Node.js is single-threaded: synchronous code is never interrupted mid-execution, so a plain \`counter++\` within one event-loop tick is effectively atomic from JavaScript's perspective.
47
+ </thought>
48
+ <step_summary>Synchronous increments in Node.js are atomic.</step_summary>
49
+ </step>
50
+
51
+ <step index="2" total="7">
52
+ <thought>
53
+ [Hypothesis] The risk arises in *async* code. A read-modify-write spanning an \`await\` boundary is NOT atomic. Two concurrent callers can both read the same value before either writes, causing a lost update.
54
+ </thought>
55
+ <step_summary>Async read-modify-write operations are not atomic and risk lost updates.</step_summary>
56
+ </step>
57
+
58
+ <step index="3" total="7">
59
+ <thought>
60
+ [Evaluation] Example race: \`const v = await db.get('c'); await db.set('c', v + 1);\` — if two requests interleave at the \`await\` points, both read \`v=5\` and both write \`6\`, losing one increment.
61
+ </thought>
62
+ <step_summary>Interleaved async operations lead to data races.</step_summary>
63
+ </step>
64
+
65
+ <step index="4" total="7">
66
+ <thought>
67
+ [Strategy] Solution A — atomic DB operation: \`UPDATE counter SET n = n + 1 RETURNING n\` (SQL) or Redis \`INCR\`. The DB engine serialises the read-modify-write internally with no async gap.
68
+ </thought>
69
+ <step_summary>Database-level atomic operations prevent races.</step_summary>
70
+ </step>
71
+
72
+ <step index="5" total="7">
73
+ <thought>
74
+ [Strategy] Solution B async mutex: use a library-level lock (e.g. \`async-mutex\`) to serialise access. Works for in-process state but does not scale across multiple processes or restarts.
75
+ </thought>
76
+ <step_summary>In-process mutexes work but don't scale horizontally.</step_summary>
77
+ </step>
78
+
79
+ <step index="6" total="7">
80
+ <thought>
81
+ [Strategy] Solution C — synchronous in-memory only: keep the counter as a plain variable, increment with \`counter++\` (no \`await\` in the read-modify-write path). Valid only for single-process, ephemeral state.
82
+ </thought>
83
+ <step_summary>Synchronous in-memory counters are safe for ephemeral, single-process state.</step_summary>
84
+ </step>
85
+
86
+ <step index="7" total="7">
87
+ <thought>
88
+ [Conclusion] Prefer Solution A (atomic DB op) for correctness across restarts and multi-process deployments. Use Solution C only for in-process, non-persisted counters where an \`await\` never touches the variable. Avoid async read-modify-write without a mutex.
89
+ </thought>
90
+ <step_summary>Use DB atomic ops for persistence, or sync variables for ephemeral state.</step_summary>
91
+ </step>
92
+ </thought_process>
93
+ </example>
94
+
95
+ <constraints>
96
+ - Match the depth and quality of the example above.
97
+ - Structure reasoning using: [Observation], [Hypothesis], [Evaluation], [Strategy], [Conclusion].
98
+ - Write concrete thoughts that progress the analysis. Do not restate earlier thoughts.
99
+ - Provide a 1-sentence \`step_summary\` per step.
100
+ - Total thoughts: 6 to 10.
101
+ </constraints>`;
102
+ const HIGH_TEMPLATE = `<example>
103
+ <query>Our Node.js API latency jumped from p50=20ms to p50=800ms after a dependency upgrade. How do I diagnose and fix this?</query>
104
+
105
+ <thought_process>
106
+ <step index="1" total="15">
107
+ <thought>
108
+ [Strategy] Establish the change boundary: run \`git log --oneline\` to find the upgrade commit. Use \`git bisect\` between the last known-good tag and HEAD to confirm the exact commit that caused the regression.
109
+ </thought>
110
+ <step_summary>Isolate the exact commit causing the regression using git bisect.</step_summary>
111
+ </step>
112
+
113
+ <step index="2" total="15">
114
+ <thought>
115
+ [Observation] Collect baseline metrics before touching anything: event-loop lag (\`perf_hooks.monitorEventLoopDelay\`), GC pause times (\`--expose-gc\` + \`PerformanceObserver\`), and per-route timings. This separates compute regressions from I/O regressions.
116
+ </thought>
117
+ <step_summary>Collect baseline metrics to distinguish compute vs I/O regressions.</step_summary>
118
+ </step>
119
+
120
+ <step index="3" total="15">
121
+ <thought>
122
+ [Hypothesis] If event-loop lag is high (>50ms per tick), the cause is synchronous blocking inserted into the hot path — JSON serialisation of large objects, synchronous file I/O, regex backtracking, or CPU-heavy validation.
123
+ </thought>
124
+ <step_summary>High event-loop lag indicates synchronous blocking.</step_summary>
125
+ </step>
126
+
127
+ <step index="4" total="15">
128
+ <thought>
129
+ [Hypothesis] If event-loop lag is low but p50 is high, the bottleneck is I/O wait: slow DB queries, connection-pool exhaustion, DNS resolution delays, or increased network RTT to the upgraded service.
130
+ </thought>
131
+ <step_summary>Low event-loop lag with high p50 indicates I/O bottlenecks.</step_summary>
132
+ </step>
133
+
134
+ <step index="5" total="15">
135
+ <thought>
136
+ [Action] Read the dependency's changelog between the old and new version. Look for: new middleware injected at startup, serialisation format changes, default timeout changes, or connection-pool default reductions.
137
+ </thought>
138
+ <step_summary>Review the dependency changelog for breaking changes or new defaults.</step_summary>
139
+ </step>
140
+
141
+ <step index="6" total="15">
142
+ <thought>
143
+ [Action] Profile with \`clinic.js flame\` (or \`node --prof\` + \`node --prof-process\`) under representative load. The flame graph will pinpoint whether wall-clock time is in JS compute vs. idle I/O await.
144
+ </thought>
145
+ <step_summary>Use flame graphs to pinpoint the exact bottleneck.</step_summary>
146
+ </step>
147
+
148
+ <step index="7" total="15">
149
+ <thought>
150
+ [Action] Write a minimal reproduction that calls *only* the upgraded package's API with representative input. Benchmark it against the pinned old version in isolation to confirm the package itself is the source.
151
+ </thought>
152
+ <step_summary>Create a minimal reproduction to isolate the package's performance.</step_summary>
153
+ </step>
154
+
155
+ <step index="8" total="15">
156
+ <thought>
157
+ [Evaluation] Common 40× regression patterns: (a) added synchronous schema validation on every request, (b) HTTP/1.1 → HTTP/2 frame parsing overhead, (c) new middleware that buffers the full request body before routing.
158
+ </thought>
159
+ <step_summary>Evaluate common regression patterns like added validation or middleware.</step_summary>
160
+ </step>
161
+
162
+ <step index="9" total="15">
163
+ <thought>
164
+ [Evaluation] Check connection-pool configuration: if the upgrade changed default pool size or idle timeout, requests may queue waiting for connections. Inspect \`pool.min\`, \`pool.max\`, and \`acquireTimeoutMillis\` in the new version's defaults.
165
+ </thought>
166
+ <step_summary>Verify connection-pool configurations for reduced defaults.</step_summary>
167
+ </step>
168
+
169
+ <step index="10" total="15">
170
+ <thought>
171
+ [Evaluation] Check middleware registration order: some packages inject global middleware at \`require\`-time. A slow middleware (e.g., large-payload body parser) before fast routes affects all endpoints even if the route itself is unchanged.
172
+ </thought>
173
+ <step_summary>Check for slow global middleware affecting all routes.</step_summary>
174
+ </step>
175
+
176
+ <step index="11" total="15">
177
+ <thought>
178
+ [Mitigation] Immediate mitigation: pin the dependency to the last known-good version (\`npm install dep@x.y.z\`) and deploy to restore SLA while the full investigation continues. Add a TODO linking to the issue tracker.
179
+ </thought>
180
+ <step_summary>Pin the dependency to the last known-good version to restore SLA.</step_summary>
181
+ </step>
182
+
183
+ <step index="12" total="15">
184
+ <thought>
185
+ [Action] If the regression is a bug in the dependency, open an issue with the minimal reproduction from Thought 7. Check if a patch release or a configuration flag exists to disable the slow behaviour.
186
+ </thought>
187
+ <step_summary>Report the bug upstream with the minimal reproduction.</step_summary>
188
+ </step>
189
+
190
+ <step index="13" total="15">
191
+ <thought>
192
+ [Strategy] If the slow path is unavoidable, mitigation options: (a) cache the expensive result at the request or process level, (b) offload CPU work to a \`worker_threads\` worker, (c) evaluate an alternative package.
193
+ </thought>
194
+ <step_summary>Consider caching, worker threads, or alternative packages if unavoidable.</step_summary>
195
+ </step>
196
+
197
+ <step index="14" total="15">
198
+ <thought>
199
+ [Validation] After applying the fix, run the same load test that revealed the regression. Confirm p50 and p99 return to baseline and do not diverge under sustained load. Check that GC pressure did not increase.
200
+ </thought>
201
+ <step_summary>Validate the fix under load to ensure metrics return to baseline.</step_summary>
202
+ </step>
203
+
204
+ <step index="15" total="15">
205
+ <thought>
206
+ [Conclusion] Diagnosis path: git bisect → event-loop lag check → clinic.js flame graph → isolated package benchmark → changelog review → pool/middleware audit. Mitigation: pin version immediately. Fix: configure, cache, or replace. Prevention: add a latency benchmark target to CI.
207
+ </thought>
208
+ <step_summary>Summarize the diagnosis, mitigation, fix, and prevention strategy.</step_summary>
209
+ </step>
210
+ </thought_process>
211
+ </example>
212
+
213
+ <constraints>
214
+ - Match the depth and quality of the example above.
215
+ - Structure reasoning using: [Observation], [Hypothesis], [Strategy], [Action], [Evaluation], [Mitigation], [Validation], [Conclusion].
216
+ - Write specific thoughts that advance the investigation. No summaries of prior steps, no filler.
217
+ - Provide a 1-sentence \`step_summary\` per step.
218
+ - Total thoughts: 15 to 25. Scale depth to complexity.
219
+ </constraints>`;
99
220
  const TEMPLATES = {
100
221
  basic: BASIC_TEMPLATE,
101
222
  normal: NORMAL_TEMPLATE,
@@ -3,13 +3,14 @@ import { McpError } from '@modelcontextprotocol/sdk/types.js';
3
3
  import { sessionStore } from '../engine/reasoner.js';
4
4
  import { formatThoughtsToMarkdown } from '../lib/formatting.js';
5
5
  import { withIconMeta } from '../lib/tool-response.js';
6
- import { collectPrefixMatches } from '../lib/validators.js';
6
+ import { collectPrefixMatches, parseBooleanEnv } from '../lib/validators.js';
7
7
  import { buildServerInstructions } from './instructions.js';
8
8
  import { buildToolCatalog } from './tool-catalog.js';
9
9
  import { buildWorkflowGuide } from './workflows.js';
10
10
  const SESSIONS_RESOURCE_URI = 'reasoning://sessions';
11
11
  const SESSION_RESOURCE_PREFIX = `${SESSIONS_RESOURCE_URI}/`;
12
12
  const TRACE_RESOURCE_PREFIX = 'file:///cortex/sessions/';
13
+ const REDACTED_THOUGHT_CONTENT = '[REDACTED]';
13
14
  // --- Helpers ---
14
15
  function extractStringVariable(variables, name, uri) {
15
16
  const raw = variables[name];
@@ -72,6 +73,25 @@ function completeSessionIds(value) {
72
73
  function toIsoTimestamp(unixMs) {
73
74
  return new Date(unixMs).toISOString();
74
75
  }
76
+ function shouldRedactTraceContent() {
77
+ return parseBooleanEnv('CORTEX_REDACT_TRACE_CONTENT', false);
78
+ }
79
+ function getSessionView(session) {
80
+ if (!shouldRedactTraceContent()) {
81
+ return session;
82
+ }
83
+ return {
84
+ ...session,
85
+ thoughts: session.thoughts.map((thought) => ({
86
+ index: thought.index,
87
+ content: REDACTED_THOUGHT_CONTENT,
88
+ revision: thought.revision,
89
+ ...(thought.stepSummary !== undefined
90
+ ? { stepSummary: REDACTED_THOUGHT_CONTENT }
91
+ : {}),
92
+ })),
93
+ };
94
+ }
75
95
  function completeThoughtNames(value, sessionId) {
76
96
  const session = sessionStore.get(sessionId);
77
97
  if (!session) {
@@ -180,7 +200,7 @@ export function registerAllResources(server, iconMeta) {
180
200
  ...(withIconMeta(iconMeta) ?? {}),
181
201
  }, (uri, variables) => {
182
202
  const sessionId = extractStringVariable(variables, 'sessionId', uri);
183
- const session = resolveSession(sessionId, uri);
203
+ const session = getSessionView(resolveSession(sessionId, uri));
184
204
  return {
185
205
  contents: [
186
206
  {
@@ -214,7 +234,7 @@ export function registerAllResources(server, iconMeta) {
214
234
  ...(withIconMeta(iconMeta) ?? {}),
215
235
  }, (uri, variables) => {
216
236
  const sessionId = extractStringVariable(variables, 'sessionId', uri);
217
- const session = resolveSession(sessionId, uri);
237
+ const session = getSessionView(resolveSession(sessionId, uri));
218
238
  const thoughtName = extractStringVariable(variables, 'thoughtName', uri);
219
239
  const { index, requestedRevised } = parseThoughtName(thoughtName, session);
220
240
  const thought = session.thoughts[index - 1];
@@ -264,7 +284,7 @@ export function registerAllResources(server, iconMeta) {
264
284
  ...(withIconMeta(iconMeta) ?? {}),
265
285
  }, (uri, variables) => {
266
286
  const sessionId = extractStringVariable(variables, 'sessionId', uri);
267
- const session = resolveSession(sessionId, uri);
287
+ const session = getSessionView(resolveSession(sessionId, uri));
268
288
  const generatedThoughts = session.thoughts.length;
269
289
  const summary = buildSessionSummary({ ...session, generatedThoughts });
270
290
  return {
@@ -28,76 +28,59 @@ export function buildServerInstructions() {
28
28
  const sharedConstraints = getSharedConstraints()
29
29
  .map((c) => `- ${c}`)
30
30
  .join('\n');
31
- return `# CORTEX-MCP INSTRUCTIONS
32
-
33
- These instructions are available as a resource (internal://instructions) or prompt (get-help). Load them when unsure about tool usage.
34
-
35
- ---
36
-
37
- ## CORE CAPABILITY
38
-
39
- - Domain: Multi-level reasoning engine that decomposes queries into structured thought chains at configurable depth levels (basic, normal, high).
40
- - Primary Resources: Reasoning sessions (in-memory, 30-minute TTL), thought chains, progress notifications.
41
- - Tools: \`reasoning_think\` (WRITE — creates/extends sessions with LLM-authored thoughts).
42
-
43
- ---
44
-
45
- ## PROMPTS
46
-
47
- - \`get-help\`: Returns these instructions for quick recall.
31
+ return `<role>
32
+ You are an expert reasoning engine assistant. You decompose queries into structured thought chains at configurable depth levels (basic, normal, high).
33
+ </role>
34
+
35
+ <capabilities>
36
+ - Domain: Multi-level reasoning engine.
37
+ - Resources: Sessions (in-memory, 30m TTL), thought chains, progress notifications.
38
+ - Tools: \`reasoning_think\` (WRITE: creates/extends sessions).
39
+ </capabilities>
40
+
41
+ <prompts>
42
+ - \`get-help\`: Returns these instructions.
48
43
  ${promptList}
49
44
 
50
- > **Guided templates:** Each \`reasoning.<level>\` prompt embeds a level-specific few-shot example showing the expected \`thought\` depth and step count. Only the template for the requested level is injected — the other two are omitted to keep prompts lean.
51
-
52
- ---
53
-
54
- ## RESOURCES & RESOURCE LINKS
45
+ > **Guided templates:** Each \`reasoning.<level>\` prompt embeds a level-specific few-shot example showing expected \`thought\` depth and step count.
46
+ </prompts>
55
47
 
48
+ <resources>
56
49
  - \`internal://instructions\`: This document.
57
- - \`reasoning://sessions\`: List all active reasoning sessions with metadata (JSON).
58
- - \`reasoning://sessions/{sessionId}\`: Inspect a specific session's thoughts and metadata (JSON). Supports auto-completion on \`sessionId\`.
59
- - \`file:///cortex/sessions/{sessionId}/trace.md\`: Full Markdown trace of a session. Supports auto-completion on \`sessionId\`.
60
- - \`file:///cortex/sessions/{sessionId}/{thoughtName}.md\`: Markdown content of a single thought (e.g., \`Thought-1.md\`). Supports auto-completion on \`sessionId\` and \`thoughtName\`.
61
- - The server supports \`resources/subscribe\` for real-time change notifications on individual resources.
62
- - Subscribe to \`reasoning://sessions/{sessionId}\` to receive \`notifications/resources/updated\` when thoughts are added, revised, or status changes.
63
- - Subscribe to \`reasoning://sessions\` to receive aggregate updates as session content and statuses evolve.
64
- - Use subscriptions to monitor session progress without polling.
65
-
66
- ---
67
-
68
- ## PROGRESS & TASKS
69
-
70
- - Include \`_meta.progressToken\` in requests to receive \`notifications/progress\` updates during reasoning.
71
- - Task-augmented tool calls are supported for \`reasoning_think\`:
72
- - \`execution.taskSupport: "optional"\` invoke normally or as a task.
73
- - Send \`tools/call\` with \`task\` to get a task id.
74
- - Poll \`tasks/get\` and fetch results via \`tasks/result\`.
75
- - Use \`tasks/cancel\` to abort a running task.
76
- - For \`high\` level, progress is emitted every 2 steps to reduce noise; \`basic\` and \`normal\` emit after every step.
77
- - Use \`runMode: "run_to_completion"\` with \`thought\` + \`thoughts[]\` to execute multiple reasoning steps in one request.
78
-
79
- ---
80
-
81
- ## TOOL CONTRACTS
82
-
50
+ - \`reasoning://sessions\`: List active sessions (JSON).
51
+ - \`reasoning://sessions/{sessionId}\`: Inspect session thoughts/metadata (JSON).
52
+ - \`file:///cortex/sessions/{sessionId}/trace.md\`: Full Markdown trace.
53
+ - \`file:///cortex/sessions/{sessionId}/{thoughtName}.md\`: Single thought Markdown.
54
+ - Subscriptions (\`resources/subscribe\`):
55
+ - \`reasoning://sessions/{sessionId}\`: Updates on thought additions/revisions.
56
+ - \`reasoning://sessions\`: Aggregate session updates.
57
+ </resources>
58
+
59
+ <tasks_and_progress>
60
+ - Pass \`_meta.progressToken\` for \`notifications/progress\`.
61
+ - \`reasoning_think\` supports tasks (\`execution.taskSupport: "optional"\`):
62
+ - Send \`task\` in \`tools/call\` to get \`taskId\`.
63
+ - Poll \`tasks/get\`, fetch via \`tasks/result\`, abort via \`tasks/cancel\`.
64
+ - Progress emission: \`high\` level every 2 steps; \`basic\`/\`normal\` every step.
65
+ - \`runMode: "run_to_completion"\`: Pass \`thought\` as string array for batch execution.
66
+ </tasks_and_progress>
67
+
68
+ <tool_contracts>
83
69
  ${toolSections.join('\n\n')}
70
+ </tool_contracts>
84
71
 
85
- ---
86
-
87
- ## SHARED CONSTRAINTS
88
-
72
+ <constraints>
89
73
  ${sharedConstraints}
90
-
91
- ---
92
-
93
- ## ERROR HANDLING STRATEGY
94
-
95
- - \`E_SESSION_NOT_FOUND\`: Session expired or never existed. Call \`reasoning://sessions\` to list active sessions, or start a new session without \`sessionId\`.
96
- - \`E_INVALID_THOUGHT_COUNT\`: \`targetThoughts\` is outside the level range. Check ranges: basic (3–5), normal (6–10), high (15–25).
97
- - \`E_INSUFFICIENT_THOUGHTS\`: In \`run_to_completion\`, the request did not provide enough thought inputs for planned remaining steps.
98
- - \`E_INVALID_RUN_MODE_ARGS\`: Invalid \`runMode\` argument combination (for example, missing \`targetThoughts\` when starting a new run-to-completion session).
99
- - \`E_ABORTED\`: Reasoning was cancelled via abort signal or task cancellation. Retry with a new request if needed.
100
- - \`E_SERVER_BUSY\`: Too many concurrent task-mode reasoning calls (default cap: 32). Retry after a short delay, or use normal (non-task) invocation.
101
- - \`E_REASONING\`: Unexpected engine error. Check the error \`message\` field for details and retry.
74
+ </constraints>
75
+
76
+ <error_handling>
77
+ - \`E_SESSION_NOT_FOUND\`: Expired/missing. List sessions or start new.
78
+ - \`E_INVALID_THOUGHT_COUNT\`: \`targetThoughts\` out of range (basic: 3-5, normal: 6-10, high: 15-25).
79
+ - \`E_INSUFFICIENT_THOUGHTS\`: Not enough inputs for \`run_to_completion\`.
80
+ - \`E_INVALID_RUN_MODE_ARGS\`: Invalid \`runMode\` args (e.g., missing \`targetThoughts\`).
81
+ - \`E_ABORTED\`: Cancelled. Retry if needed.
82
+ - \`E_SERVER_BUSY\`: Too many concurrent tasks. Retry later or use sync mode.
83
+ - \`E_REASONING\`: Engine error. Check message and retry.
84
+ </error_handling>
102
85
  `;
103
86
  }
@@ -1,19 +1,20 @@
1
1
  import { buildCoreContextPack } from './tool-info.js';
2
- const CATALOG_GUIDE = `# Tool Catalog Details
3
- ## Optional Parameters
4
- - \`observation\`: What facts are known at this step? Use with \`hypothesis\` and \`evaluation\` as an alternative to \`thought\`.
5
- - \`hypothesis\`: What is the proposed idea or next logical leap?
6
- - \`evaluation\`: Critique the hypothesis. Are there flaws?
7
- - \`step_summary\`: A 1-sentence summary of the conclusion reached in this step. Accumulates in the \`summary\` field for contextual guidance.
8
- - \`is_conclusion\`: Set to true to end the session early with a final answer.
9
- - \`rollback_to_step\`: Roll back to a thought index (0-based). All thoughts after this index are discarded.
2
+ const CATALOG_GUIDE = `<optional_parameters>
3
+ - \`observation\`: Facts known at this step. Use with \`hypothesis\` and \`evaluation\` instead of \`thought\`.
4
+ - \`hypothesis\`: Proposed idea or next logical leap.
5
+ - \`evaluation\`: Critique of the hypothesis.
6
+ - \`step_summary\`: 1-sentence conclusion summary. Accumulates in \`summary\` field.
7
+ - \`is_conclusion\`: Set true to end session early.
8
+ - \`rollback_to_step\`: 0-based thought index to rollback to. Discards subsequent thoughts.
9
+ </optional_parameters>
10
10
 
11
- ## Cross-Tool Data Flow
11
+ <cross_tool_data_flow>
12
12
  \`\`\`
13
13
  reasoning_think -> result.sessionId -> reasoning_think.sessionId
14
14
  reasoning_think -> result.sessionId -> reasoning://sessions/{sessionId}
15
15
  reasoning_think -> result.sessionId -> file:///cortex/sessions/{sessionId}/trace.md
16
16
  \`\`\`
17
+ </cross_tool_data_flow>
17
18
  `;
18
19
  export function buildToolCatalog() {
19
20
  return `${buildCoreContextPack()}\n\n${CATALOG_GUIDE}`;
@@ -19,7 +19,7 @@ export function buildCoreContextPack() {
19
19
  ? `| \`${e.name}\` | ${e.model} | ${e.timeout} | ${e.maxOutputTokens} | ${e.purpose} |`
20
20
  : '';
21
21
  });
22
- return `# Core Context Pack\n\n| Tool | Model | Timeout | Max Output Tokens | Purpose |\n|------|-------|---------|-------------------|---------|\n${rows.join('\n')}`;
22
+ return `<core_context_pack>\n| Tool | Model | Timeout | Max Output Tokens | Purpose |\n|------|-------|---------|-------------------|---------|\n${rows.join('\n')}\n</core_context_pack>`;
23
23
  }
24
24
  export function getSharedConstraints() {
25
25
  return [
@@ -7,55 +7,55 @@ function buildToolReference() {
7
7
  .join('\n\n');
8
8
  }
9
9
  export function buildWorkflowGuide() {
10
- return `# THE "GOLDEN PATH" WORKFLOWS (CRITICAL)
10
+ return `<role>
11
+ You are an expert reasoning engine assistant. You decompose queries into structured thought chains at configurable depth levels (basic, normal, high).
12
+ </role>
11
13
 
14
+ <workflows>
12
15
  ### WORKFLOW A: Sequential Reasoning (Most Common)
13
-
14
- 1. Call \`reasoning_think\` with \`{ query: "...", level: "basic", thought: "Your detailed reasoning for step 1..." }\`.
15
- 2. Read the response note the \`sessionId\` and \`remainingThoughts\` fields.
16
- 3. **You MUST continue**: Call again with \`{ sessionId: "<from response>", thought: "Your next reasoning step..." }\`.
17
- 4. Repeat step 3 until the response shows \`status: "completed"\` or \`remainingThoughts: 0\`.
18
- NOTE: The \`summary\` field contains the exact continuation call you should make next.
19
-
20
- ### WORKFLOW B: Multi-Turn Reasoning (Session Continuation)
21
-
22
- 1. Call \`reasoning_think\` with \`{ query: "initial question", level: "normal", thought: "Your first reasoning step..." }\` note the returned \`sessionId\`.
23
- 2. Call \`reasoning_think\` with \`{ sessionId: "<id>", thought: "Your next reasoning step..." }\` (optional: add \`query\` for follow-up context).
24
- 3. Repeat until \`status: "completed"\` or \`remainingThoughts: 0\`, then read \`reasoning://sessions/{sessionId}\` for the full chain.
25
- NOTE: The \`level\` parameter is optional when continuing; if provided and mismatched, the session level is used.
26
-
27
- ### WORKFLOW C: Controlled Depth Reasoning
28
-
29
- 1. Call \`reasoning_think\` with \`{ query: "...", level: "normal", targetThoughts: 8, thought: "Your reasoning..." }\` to set the session's planned step count.
30
- 2. Repeat calls with the returned \`sessionId\` and your next \`thought\` until \`result.totalThoughts\` is reached.
31
- NOTE: \`targetThoughts\` must fall within the level range (basic: 3–5, normal: 6–10, high: 15–25). Out-of-range values return \`E_INVALID_THOUGHT_COUNT\`.
32
-
33
- ### WORKFLOW D: Async Task Execution
34
-
35
- 1. Call \`reasoning_think\` as a task (send \`tools/call\` with \`task\` field) for long-running \`high\`-level reasoning.
36
- 2. Poll \`tasks/get\` until status is \`completed\` or \`failed\`.
37
- 3. Retrieve the result via \`tasks/result\`.
38
- 4. Use \`tasks/cancel\` to abort if needed.
16
+ 1. Call \`reasoning_think\` with \`{ query: "...", level: "basic", thought: "..." }\`.
17
+ 2. Read response: note \`sessionId\` and \`remainingThoughts\`.
18
+ 3. **MUST continue**: Call again with \`{ sessionId: "<id>", thought: "..." }\`.
19
+ 4. Repeat until \`status: "completed"\` or \`remainingThoughts: 0\`.
20
+ NOTE: \`summary\` field contains the exact next call.
21
+
22
+ ### WORKFLOW B: Multi-Turn Reasoning
23
+ 1. Call \`reasoning_think\` with \`{ query: "...", level: "normal", thought: "..." }\`.
24
+ 2. Call \`reasoning_think\` with \`{ sessionId: "<id>", thought: "..." }\` (optional: add \`query\` for follow-up).
25
+ 3. Repeat until completed. Read \`reasoning://sessions/{sessionId}\` for full chain.
26
+ NOTE: \`level\` is optional when continuing; session level is used if omitted.
27
+
28
+ ### WORKFLOW C: Controlled Depth
29
+ 1. Call \`reasoning_think\` with \`{ query: "...", level: "normal", targetThoughts: 8, thought: "..." }\`.
30
+ 2. Repeat with \`sessionId\` and \`thought\` until \`totalThoughts\` reached.
31
+ NOTE: \`targetThoughts\` must fit level range (basic: 3-5, normal: 6-10, high: 15-25).
32
+
33
+ ### WORKFLOW D: Async Task
34
+ 1. Call \`reasoning_think\` as task (send \`task\` field) for long \`high\`-level reasoning.
35
+ 2. Poll \`tasks/get\` until \`completed\`/\`failed\`.
36
+ 3. Retrieve via \`tasks/result\`.
37
+ 4. Abort via \`tasks/cancel\`.
39
38
 
40
39
  ### WORKFLOW E: Batched Run-To-Completion
41
-
42
- 1. Start a new session with explicit \`targetThoughts\` and \`runMode: "run_to_completion"\`.
43
- 2. Provide one \`thought\` plus additional \`thoughts[]\` entries to cover the planned step count.
44
- 3. The server consumes thought inputs in order until completion, token budget exhaustion, or cancellation.
45
-
46
- ### WORKFLOW F: Structured Reasoning (Observation/Hypothesis/Evaluation)
47
-
48
- 1. Call \`reasoning_think\` with \`{ query: "...", level: "normal", observation: "facts...", hypothesis: "idea...", evaluation: "critique..." }\`.
49
- 2. The server formats these into a structured thought and stores it in the session trace.
50
- 3. Continue with \`sessionId\` using either \`thought\` or structured fields for subsequent steps.
51
- 4. Use \`is_conclusion: true\` to end early, or \`rollback_to_step\` to discard and redo from a specific step.
52
-
53
- ## Shared Constraints
40
+ 1. Start session with \`targetThoughts\` and \`runMode: "run_to_completion"\`.
41
+ 2. Provide \`thought\` as string array (e.g., \`["step1", "step2"]\`).
42
+ 3. Server consumes inputs until completion, token exhaustion, or cancellation.
43
+
44
+ ### WORKFLOW F: Structured Reasoning
45
+ 1. Call \`reasoning_think\` with \`{ query: "...", level: "normal", observation: "...", hypothesis: "...", evaluation: "..." }\`.
46
+ 2. Server formats into structured thought in trace.
47
+ 3. Continue with \`sessionId\` using \`thought\` or structured fields.
48
+ 4. Use \`is_conclusion: true\` to end early, or \`rollback_to_step\` to discard/redo.
49
+ </workflows>
50
+
51
+ <constraints>
54
52
  ${getSharedConstraints()
55
53
  .map((c) => `- ${c}`)
56
54
  .join('\n')}
55
+ </constraints>
57
56
 
58
- ## Tool Reference
57
+ <tool_reference>
59
58
  ${buildToolReference()}
59
+ </tool_reference>
60
60
  `;
61
61
  }
@@ -13,7 +13,6 @@ export declare const ReasoningThinkInputSchema: z.ZodObject<{
13
13
  run_to_completion: "run_to_completion";
14
14
  }>>;
15
15
  thought: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>>;
16
- thoughts: z.ZodOptional<z.ZodArray<z.ZodString>>;
17
16
  is_conclusion: z.ZodOptional<z.ZodBoolean>;
18
17
  rollback_to_step: z.ZodOptional<z.ZodNumber>;
19
18
  step_summary: z.ZodOptional<z.ZodString>;
@@ -17,63 +17,49 @@ function addCustomIssue(ctx, path, message) {
17
17
  }
18
18
  export const ReasoningThinkInputSchema = z
19
19
  .strictObject({
20
- query: QUERY_TEXT_SCHEMA.optional().describe('The question or problem to reason about'),
21
- level: LEVEL_SCHEMA.optional().describe(`Reasoning depth level (required for new sessions, optional for continuing). ${getLevelDescriptionString()}.`),
20
+ query: QUERY_TEXT_SCHEMA.optional().describe('Question or problem to reason about.'),
21
+ level: LEVEL_SCHEMA.optional().describe(`Reasoning depth level (required for new sessions). ${getLevelDescriptionString()}.`),
22
22
  targetThoughts: z
23
23
  .number()
24
24
  .int()
25
25
  .min(1)
26
26
  .max(25)
27
27
  .optional()
28
- .describe('Optional explicit thought count. Must fit the level range: basic 3–5, normal 6–10, high 15–25.'),
28
+ .describe('Explicit thought count. Must fit level range.'),
29
29
  sessionId: z
30
30
  .string()
31
31
  .min(1)
32
32
  .max(128)
33
33
  .optional()
34
- .describe('Session ID to continue. The session level is used when continuing; provided level is optional.'),
34
+ .describe('Session ID to continue.'),
35
35
  runMode: z
36
36
  .enum(RUN_MODE_VALUES)
37
37
  .optional()
38
- .describe('Execution mode (default: "step"). "step" appends a single thought per call. "run_to_completion" consumes all supplied thought inputs in one request.'),
38
+ .describe('Execution mode. "step" (default) or "run_to_completion".'),
39
39
  thought: z
40
40
  .union([THOUGHT_TEXT_SCHEMA, THOUGHT_BATCH_SCHEMA])
41
41
  .optional()
42
- .describe('Your full reasoning content for this step. ' +
43
- 'The server stores this text verbatim as the thought in the session trace. ' +
44
- 'Write your complete analysis, observations, and conclusions here — this is what appears in trace.md. ' +
45
- 'Can be a single string or an array of strings (for batch execution).'),
46
- thoughts: z
47
- .array(THOUGHT_TEXT_SCHEMA)
48
- .max(25)
49
- .optional()
50
- .describe('(Deprecated) Optional additional thought inputs. Use "thought" as an array instead.'),
42
+ .describe('Full reasoning content for this step. Stored verbatim. String or string array.'),
51
43
  is_conclusion: z
52
44
  .boolean()
53
45
  .optional()
54
- .describe('Set to true if you have arrived at the final answer and wish to end the reasoning session early.'),
46
+ .describe('End session early if final answer reached.'),
55
47
  rollback_to_step: z
56
48
  .number()
57
49
  .int()
58
50
  .min(0)
59
51
  .optional()
60
- .describe('Set to a thought index (0-based) to rollback to. All thoughts after this index will be discarded.'),
52
+ .describe('0-based thought index to rollback to. Discards subsequent thoughts.'),
61
53
  step_summary: z
62
54
  .string()
63
55
  .optional()
64
- .describe('A 1-sentence summary of the conclusion reached in this specific step.'),
65
- observation: z
66
- .string()
67
- .optional()
68
- .describe('What facts are known at this step?'),
56
+ .describe('1-sentence summary of the conclusion reached.'),
57
+ observation: z.string().optional().describe('Facts known at this step.'),
69
58
  hypothesis: z
70
59
  .string()
71
60
  .optional()
72
- .describe('What is the proposed idea or next logical leap?'),
73
- evaluation: z
74
- .string()
75
- .optional()
76
- .describe('Critique the hypothesis. Are there flaws?'),
61
+ .describe('Proposed idea or next logical leap.'),
62
+ evaluation: z.string().optional().describe('Critique of the hypothesis.'),
77
63
  })
78
64
  .superRefine((data, ctx) => {
79
65
  const runMode = data.runMode ?? DEFAULT_RUN_MODE;
@@ -91,9 +77,6 @@ export const ReasoningThinkInputSchema = z
91
77
  if (runMode === 'step' && Array.isArray(data.thought)) {
92
78
  addCustomIssue(ctx, ['thought'], 'thought must be a string when runMode is "step"');
93
79
  }
94
- if (runMode === 'step' && data.thoughts !== undefined) {
95
- addCustomIssue(ctx, ['thoughts'], 'thoughts is only allowed when runMode is "run_to_completion"');
96
- }
97
80
  const hasThought = data.thought !== undefined;
98
81
  const hasStructured = data.observation !== undefined &&
99
82
  data.hypothesis !== undefined &&
@@ -13,7 +13,7 @@ const ThoughtSchema = z.strictObject({
13
13
  stepSummary: z
14
14
  .string()
15
15
  .optional()
16
- .describe('A 1-sentence summary of the conclusion reached in this step, if provided.'),
16
+ .describe('1-sentence summary of the conclusion reached.'),
17
17
  });
18
18
  const ReasoningThinkSuccessSchema = z.strictObject({
19
19
  ok: z.literal(true),
@@ -25,22 +25,18 @@ const ReasoningThinkSuccessSchema = z.strictObject({
25
25
  generatedThoughts: z.number(),
26
26
  requestedThoughts: z.number(),
27
27
  totalThoughts: z.number(),
28
- tokenBudget: z
29
- .number()
30
- .describe('Approximate token budget (UTF-8 bytes ÷ 4, not true tokenization)'),
31
- tokensUsed: z
32
- .number()
33
- .describe('Approximate tokens used (UTF-8 bytes ÷ 4, not true tokenization)'),
28
+ tokenBudget: z.number().describe('Approximate token budget.'),
29
+ tokensUsed: z.number().describe('Approximate tokens used.'),
34
30
  ttlMs: z.number(),
35
31
  expiresAt: z.number(),
36
32
  createdAt: z.number(),
37
33
  updatedAt: z.number(),
38
34
  remainingThoughts: z
39
35
  .number()
40
- .describe('Number of thoughts remaining before the session reaches totalThoughts'),
36
+ .describe('Thoughts remaining before reaching totalThoughts.'),
41
37
  summary: z
42
38
  .string()
43
- .describe('Actionable next-step instruction when active, or completion status when done'),
39
+ .describe('Actionable next-step instruction or completion status.'),
44
40
  }),
45
41
  });
46
42
  const ReasoningThinkErrorSchema = z.strictObject({
package/dist/server.js CHANGED
@@ -3,6 +3,7 @@ import { findPackageJSON } from 'node:module';
3
3
  import { InMemoryTaskStore } from '@modelcontextprotocol/sdk/experimental/tasks';
4
4
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
5
5
  import { engineEvents } from './engine/events.js';
6
+ import { sessionStore } from './engine/reasoner.js';
6
7
  import { getErrorMessage } from './lib/errors.js';
7
8
  import { registerAllTools } from './tools/index.js';
8
9
  import { registerAllPrompts } from './prompts/index.js';
@@ -20,6 +21,7 @@ const ICON_URL_CANDIDATES = [
20
21
  ];
21
22
  let cachedLocalIconData;
22
23
  let cachedVersion;
24
+ let activeServerCount = 0;
23
25
  function getLocalIconData() {
24
26
  if (cachedLocalIconData !== undefined) {
25
27
  return cachedLocalIconData ?? undefined;
@@ -142,10 +144,18 @@ function installCloseCleanup(server, cleanup) {
142
144
  }
143
145
  closed = true;
144
146
  cleanup();
147
+ activeServerCount = Math.max(0, activeServerCount - 1);
148
+ if (activeServerCount === 0) {
149
+ sessionStore.dispose();
150
+ }
145
151
  await originalClose();
146
152
  };
147
153
  }
148
154
  export function createServer() {
155
+ if (activeServerCount === 0) {
156
+ sessionStore.ensureCleanupTimer();
157
+ }
158
+ activeServerCount += 1;
149
159
  const version = loadVersion();
150
160
  const taskStore = new InMemoryTaskStore();
151
161
  const localIcon = getLocalIconData();
@@ -6,13 +6,30 @@ import { createTaskLimiter } from '../lib/concurrency.js';
6
6
  import { createErrorResponse, getErrorMessage, InsufficientThoughtsError, InvalidRunModeArgsError, isObjectRecord, ReasoningAbortedError, ReasoningError, ServerBusyError, SessionNotFoundError, } from '../lib/errors.js';
7
7
  import { formatProgressMessage, formatThoughtsToMarkdown, } from '../lib/formatting.js';
8
8
  import { createToolResponse, withIconMeta } from '../lib/tool-response.js';
9
- import { parsePositiveIntEnv } from '../lib/validators.js';
9
+ import { parseBooleanEnv, parsePositiveIntEnv } from '../lib/validators.js';
10
10
  const DEFAULT_MAX_ACTIVE_REASONING_TASKS = 32;
11
+ const REDACTED_THOUGHT_CONTENT = '[REDACTED]';
12
+ function shouldRedactTraceContent() {
13
+ return parseBooleanEnv('CORTEX_REDACT_TRACE_CONTENT', false);
14
+ }
11
15
  function buildTraceResource(session) {
16
+ const sessionView = shouldRedactTraceContent()
17
+ ? {
18
+ ...session,
19
+ thoughts: session.thoughts.map((thought) => ({
20
+ index: thought.index,
21
+ content: REDACTED_THOUGHT_CONTENT,
22
+ revision: thought.revision,
23
+ ...(thought.stepSummary !== undefined
24
+ ? { stepSummary: REDACTED_THOUGHT_CONTENT }
25
+ : {}),
26
+ })),
27
+ }
28
+ : session;
12
29
  return {
13
30
  uri: `file:///cortex/sessions/${session.id}/trace.md`,
14
31
  mimeType: 'text/markdown',
15
- text: formatThoughtsToMarkdown(session),
32
+ text: formatThoughtsToMarkdown(sessionView),
16
33
  };
17
34
  }
18
35
  const reasoningTaskLimiter = createTaskLimiter(parsePositiveIntEnv('CORTEX_MAX_ACTIVE_REASONING_TASKS', DEFAULT_MAX_ACTIVE_REASONING_TASKS));
@@ -144,7 +161,7 @@ function buildThoughtInputs(params) {
144
161
  : params.thought
145
162
  ? [params.thought]
146
163
  : [];
147
- return [...primary, ...(params.thoughts ?? [])];
164
+ return primary;
148
165
  }
149
166
  function getStartingThoughtCount(sessionId) {
150
167
  if (sessionId === undefined) {
@@ -283,16 +300,22 @@ function createCancellationController(signal) {
283
300
  const controller = new AbortController();
284
301
  if (signal.aborted) {
285
302
  controller.abort();
286
- return controller;
303
+ return {
304
+ controller,
305
+ cleanup: () => {
306
+ // No listener to clean up when already aborted.
307
+ },
308
+ };
287
309
  }
288
310
  const onAbort = () => {
289
311
  controller.abort();
290
312
  };
291
- signal.addEventListener('abort', onAbort, { once: true });
292
- controller.signal.addEventListener('abort', () => {
313
+ const cleanup = () => {
293
314
  signal.removeEventListener('abort', onAbort);
294
- }, { once: true });
295
- return controller;
315
+ };
316
+ signal.addEventListener('abort', onAbort, { once: true });
317
+ controller.signal.addEventListener('abort', cleanup, { once: true });
318
+ return { controller, cleanup };
296
319
  }
297
320
  async function isTaskCancelled(taskStore, taskId) {
298
321
  try {
@@ -311,7 +334,7 @@ async function ensureTaskIsActive(taskStore, taskId, controller) {
311
334
  }
312
335
  function createProgressHandler(args) {
313
336
  const { server, taskStore, taskId, level, progressToken, controller, startingCount, batchTotal, } = args;
314
- return async (progress) => {
337
+ return async (progress, _total, summary) => {
315
338
  await ensureTaskIsActive(taskStore, taskId, controller);
316
339
  if (progressToken === undefined) {
317
340
  return;
@@ -322,14 +345,16 @@ function createProgressHandler(args) {
322
345
  const isTerminal = displayProgress >= batchTotal;
323
346
  // We must emit if it's the terminal update for this batch,
324
347
  // otherwise we respect the session-level skipping rules.
348
+ // If a summary is provided, we force an emit to show the meaningful update.
325
349
  if (!isTerminal &&
350
+ !summary &&
326
351
  !shouldEmitProgress(displayProgress, batchTotal, level)) {
327
352
  return;
328
353
  }
329
354
  const message = formatProgressMessage({
330
- toolName: TOOL_NAME,
355
+ toolName: `꩜ ${TOOL_NAME}`,
331
356
  context: 'Thought',
332
- metadata: `[${String(displayProgress)}/${String(batchTotal)}]`,
357
+ metadata: `[${String(displayProgress)}/${String(batchTotal)}]${summary ? ` ${summary}` : ''}`,
333
358
  ...(isTerminal ? { outcome: 'complete' } : {}),
334
359
  });
335
360
  await notifyProgress({
@@ -457,9 +482,9 @@ async function runReasoningTask(args) {
457
482
  const normalizedBatchTotal = Math.max(1, batchTotal);
458
483
  if (progressToken !== undefined) {
459
484
  const message = formatProgressMessage({
460
- toolName: TOOL_NAME,
461
- context: 'reasoning',
462
- metadata: level ? `starting [${level}]` : 'continuing session',
485
+ toolName: `꩜ ${TOOL_NAME}`,
486
+ context: level ? 'starting' : 'continuing',
487
+ metadata: level ? `[${level}]` : 'session',
463
488
  });
464
489
  await notifyProgress({
465
490
  server,
@@ -565,7 +590,8 @@ Use step_summary for a 1-sentence conclusion per step — these accumulate in th
565
590
 
566
591
  Levels: ${getLevelDescriptionString()}.
567
592
  Alternatives: runMode="run_to_completion" (batch), or observation/hypothesis/evaluation fields (structured).
568
- Errors: E_SESSION_NOT_FOUND (expired — start new), E_INVALID_THOUGHT_COUNT (check level ranges).`,
593
+ Errors: E_SESSION_NOT_FOUND (expired — start new), E_INVALID_THOUGHT_COUNT (check level ranges).
594
+ Protocol validation: malformed task metadata/arguments fail at request level before task start; runtime reasoning failures return tool isError=true payloads.`,
569
595
  inputSchema: ReasoningThinkInputSchema,
570
596
  outputSchema: ReasoningThinkToolOutputSchema,
571
597
  annotations: {
@@ -602,13 +628,13 @@ Errors: E_SESSION_NOT_FOUND (expired — start new), E_INVALID_THOUGHT_COUNT (ch
602
628
  reasoningTaskLimiter.release();
603
629
  throw error;
604
630
  }
605
- const controller = createCancellationController(extra.signal);
631
+ const cancellation = createCancellationController(extra.signal);
606
632
  const runReasoningArgs = {
607
633
  server,
608
634
  taskStore: extra.taskStore,
609
635
  taskId: task.taskId,
610
636
  params,
611
- controller,
637
+ controller: cancellation.controller,
612
638
  };
613
639
  if (progressToken !== undefined) {
614
640
  runReasoningArgs.progressToken = progressToken;
@@ -617,6 +643,7 @@ Errors: E_SESSION_NOT_FOUND (expired — start new), E_INVALID_THOUGHT_COUNT (ch
617
643
  runReasoningArgs.sessionId = extra.sessionId;
618
644
  }
619
645
  void runReasoningTask(runReasoningArgs).finally(() => {
646
+ cancellation.cleanup();
620
647
  reasoningTaskLimiter.release();
621
648
  });
622
649
  return { task };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@j0hanz/cortex-mcp",
3
- "version": "1.5.0",
3
+ "version": "1.6.0",
4
4
  "mcpName": "io.github.j0hanz/cortex-mcp",
5
5
  "author": "Johanz",
6
6
  "license": "MIT",