npm - @j0hanz/cortex-mcp - Versions diffs - 1.5.0 → 1.6.0 - Mend

@j0hanz/cortex-mcp 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/dist/engine/reasoner.d.ts +1 -1
package/dist/engine/reasoner.js +1 -1
package/dist/engine/session-store.d.ts +3 -1
package/dist/engine/session-store.js +14 -1
package/dist/lib/tool-contracts.js +0 -6
package/dist/lib/validators.d.ts +5 -0
package/dist/lib/validators.js +18 -0
package/dist/prompts/index.js +26 -22
package/dist/prompts/templates.js +214 -93
package/dist/resources/index.js +24 -4
package/dist/resources/instructions.js +47 -64
package/dist/resources/tool-catalog.js +10 -9
package/dist/resources/tool-info.js +1 -1
package/dist/resources/workflows.js +41 -41
package/dist/schemas/inputs.d.ts +0 -1
package/dist/schemas/inputs.js +12 -29
package/dist/schemas/outputs.js +5 -9
package/dist/server.js +10 -0
package/dist/tools/reasoning-think.js +44 -17
package/package.json +1 -1

package/dist/engine/reasoner.d.ts CHANGED Viewed

@@ -13,6 +13,6 @@ interface ReasonOptions {
     isConclusion?: boolean;
     rollbackToStep?: number;
     abortSignal?: AbortSignal;
-    onProgress?: (progress: number, total: number) => void | Promise<void>;
+    onProgress?: (progress: number, total: number, stepSummary?: string) => void | Promise<void>;
 }
 export declare function reason(query: string, level: ReasoningLevel | undefined, options?: ReasonOptions): Promise<Readonly<Session>>;

package/dist/engine/reasoner.js CHANGED Viewed

@@ -63,7 +63,7 @@ export async function reason(query, level, options) {
             sessionStore.markCompleted(session.id);
         }
         if (onProgress) {
-            await onProgress(addedThought.index + 1, totalThoughts);
+            await onProgress(addedThought.index + 1, totalThoughts, stepSummary);
             throwIfReasoningAborted(abortSignal);
         }
         return getSessionOrThrow(session.id);

package/dist/engine/session-store.d.ts CHANGED Viewed

@@ -8,12 +8,14 @@ export declare class SessionStore {
     private oldestSessionId;
     private newestSessionId;
     private sortedSessionIdsCache;
-    private readonly cleanupInterval;
+    private cleanupInterval;
     private readonly ttlMs;
     private readonly maxSessions;
     private readonly maxTotalTokens;
     private totalTokens;
     constructor(ttlMs?: number, maxSessions?: number, maxTotalTokens?: number);
+    ensureCleanupTimer(): void;
+    dispose(): void;
     create(level: ReasoningLevel, totalThoughts?: number): Readonly<Session>;
     get(id: string): Readonly<Session> | undefined;
     getSummary(id: string): Readonly<SessionSummary> | undefined;

package/dist/engine/session-store.js CHANGED Viewed

@@ -33,12 +33,25 @@ export class SessionStore {
         this.ttlMs = ttlMs;
         this.maxSessions = maxSessions;
         this.maxTotalTokens = maxTotalTokens;
-        const sweepInterval = resolveSweepInterval(ttlMs);
+        this.ensureCleanupTimer();
+    }
+    ensureCleanupTimer() {
+        if (this.cleanupInterval) {
+            return;
+        }
+        const sweepInterval = resolveSweepInterval(this.ttlMs);
         this.cleanupInterval = setInterval(() => {
             this.sweep();
         }, sweepInterval);
         this.cleanupInterval.unref();
     }
+    dispose() {
+        if (!this.cleanupInterval) {
+            return;
+        }
+        clearInterval(this.cleanupInterval);
+        this.cleanupInterval = undefined;
+    }
     create(level, totalThoughts) {
         this.evictIfAtCapacity();
         const config = getLevelConfig(level);

package/dist/lib/tool-contracts.js CHANGED Viewed

@@ -30,12 +30,6 @@ const TOOL_CONTRACTS = [
                 required: false,
                 constraints: '1-100,000 chars',
             },
-            {
-                name: 'thoughts',
-                type: 'string[]',
-                required: false,
-                constraints: 'optional',
-            },
             {
                 name: 'targetThoughts',
                 type: 'number',

package/dist/lib/validators.d.ts CHANGED Viewed

@@ -9,4 +9,9 @@ export declare function getTargetThoughtsError(level: ReasoningLevel, targetThou
  * if the variable is absent or invalid. Values below `minimum` also fall back.
  */
 export declare function parsePositiveIntEnv(name: string, fallback: number, minimum?: number): number;
+/**
+ * Parse a boolean from an environment variable, returning `fallback` when absent
+ * or when the value is not a recognized boolean literal.
+ */
+export declare function parseBooleanEnv(name: string, fallback: boolean): boolean;
 export declare function collectPrefixMatches(candidates: readonly string[], value: string, limit: number): string[];

package/dist/lib/validators.js CHANGED Viewed

@@ -30,6 +30,24 @@ export function parsePositiveIntEnv(name, fallback, minimum = 1) {
     }
     return parsed;
 }
+/**
+ * Parse a boolean from an environment variable, returning `fallback` when absent
+ * or when the value is not a recognized boolean literal.
+ */
+export function parseBooleanEnv(name, fallback) {
+    const raw = process.env[name];
+    if (raw === undefined) {
+        return fallback;
+    }
+    const normalized = raw.trim().toLowerCase();
+    if (['1', 'true', 'yes', 'on'].includes(normalized)) {
+        return true;
+    }
+    if (['0', 'false', 'no', 'off'].includes(normalized)) {
+        return false;
+    }
+    return fallback;
+}
 export function collectPrefixMatches(candidates, value, limit) {
     const results = [];
     for (const candidate of candidates) {

package/dist/prompts/index.js CHANGED Viewed

@@ -40,17 +40,21 @@ function createTextPrompt(text) {
 function buildPromptText(args) {
     const { context, task, constraints, output } = args;
     return [
-        '# Context',
+        '<context>',
         ...context,
+        '</context>',
         '',
-        '# Task',
+        '<task>',
         ...task,
+        '</task>',
         '',
-        '# Constraints',
+        '<constraints>',
         ...constraints.map((line) => `- ${line}`),
+        '</constraints>',
         '',
-        '# Output',
+        '<output_format>',
         ...output,
+        '</output_format>',
     ].join('\n');
 }
 function buildStartReasoningPrompt(args) {
@@ -64,20 +68,20 @@ function buildStartReasoningPrompt(args) {
                 : String(targetThoughts)}`,
         ],
         task: [
-            `Start a new reasoning session using "${REASONING_TOOL_NAME}".`,
-            'Create the first step with a complete, concrete reasoning thought.',
+            `Start new reasoning session via "${REASONING_TOOL_NAME}".`,
+            'Generate the first concrete reasoning step.',
         ],
         constraints: [
             THOUGHT_PARAMETER_GUIDANCE,
-            'Preserve sessionId from the response for continuation calls.',
-            'Continue until status is completed or remainingThoughts is 0.',
+            'Preserve sessionId for continuation.',
+            'Continue until status="completed" or remainingThoughts=0.',
         ],
         output: [
-            'Return the first tool call payload only.',
-            'Fields: query, level, thought, and optional targetThoughts.',
+            'Return exactly one tool call payload.',
+            'Required fields: query, level, thought.',
         ],
     });
-    return `${base}\n\n---\n\n${getTemplate(level)}`;
+    return `${base}\n\n${getTemplate(level)}`;
 }
 function buildRetryReasoningPrompt(args) {
     const { query, level, targetThoughts } = args;
@@ -90,18 +94,18 @@ function buildRetryReasoningPrompt(args) {
                 : String(targetThoughts)}`,
         ],
         task: [
-            `Retry by calling "${REASONING_TOOL_NAME}" with an improved first thought.`,
+            `Retry calling "${REASONING_TOOL_NAME}" with an improved first thought.`,
         ],
         constraints: [
             THOUGHT_PARAMETER_GUIDANCE,
-            'Use a direct and specific thought with no filler language.',
+            'Write a direct, specific thought. No filler.',
         ],
         output: [
-            'Return one tool call payload only.',
-            'Fields: query, level, thought, and optional targetThoughts.',
+            'Return exactly one tool call payload.',
+            'Required fields: query, level, thought.',
         ],
     });
-    return `${base}\n\n---\n\n${getTemplate(level)}`;
+    return `${base}\n\n${getTemplate(level)}`;
 }
 function buildContinueReasoningPrompt(args) {
     const { sessionId, query, level, targetThoughts } = args;
@@ -119,17 +123,17 @@ function buildContinueReasoningPrompt(args) {
                 : String(targetThoughts)}`,
         ],
         task: [
-            `Continue the existing session using "${REASONING_TOOL_NAME}".`,
-            'Generate the next reasoning step only.',
+            `Continue session via "${REASONING_TOOL_NAME}".`,
+            'Generate the next reasoning step.',
         ],
         constraints: [
             THOUGHT_PARAMETER_GUIDANCE,
-            'Keep the same sessionId in the call payload.',
-            'Prefer concise, concrete reasoning over meta commentary.',
+            'Keep the same sessionId.',
+            'Write concrete reasoning. No meta commentary.',
         ],
         output: [
-            'Return one continuation tool call payload only.',
-            'Fields: sessionId, thought, and optional query/level/targetThoughts.',
+            'Return exactly one continuation tool call payload.',
+            'Required fields: sessionId, thought.',
         ],
     });
 }

package/dist/prompts/templates.js CHANGED Viewed

@@ -3,99 +3,220 @@
 // Each template shows correct `thought` depth and step count so the LLM
 // calibrates its output to the selected level requirements.
 // ---------------------------------------------------------------------------
-const BASIC_TEMPLATE = `## Guided Example (basic — 3 thoughts)
-**Query:** "Is Set or Array better for deduplicating a list of strings in JavaScript?"
-**Thought 1 of 3:**
-> A \`Set\` enforces uniqueness automatically; \`[...new Set(arr)]\` is a single-step O(n) operation. An Array-based approach (\`filter\` + \`indexOf\`) is O(n²) and adds unnecessary complexity.
-**Thought 2 of 3:**
-> Edge cases: both preserve insertion order in modern JS engines, so ordering is not a differentiator. \`Set\` converts all values to a common reference internally, which handles string equality correctly.
-**Thought 3 of 3 — conclusion:**
-> Use \`Set\`. It is faster (O(n) vs O(n²)), shorter to write, and immediately communicates the intent of deduplication. \`[...new Set(list)]\` is the idiomatic solution.
----
-**System Directive:** Follow the pattern above. Each \`thought\` must contain self-contained, concrete analysis — no filler language or meta-commentary. Use 3 to 5 thoughts total.`;
-const NORMAL_TEMPLATE = `## Guided Example (normal — 7 thoughts)
-**Query:** "How do I safely handle concurrent writes to a shared in-memory counter in a Node.js service?"
-**Thought 1 of 7:**
-> Node.js is single-threaded: synchronous code is never interrupted mid-execution, so a plain \`counter++\` within one event-loop tick is effectively atomic from JavaScript's perspective.
-**Thought 2 of 7:**
-> The risk arises in *async* code. A read-modify-write spanning an \`await\` boundary is NOT atomic. Two concurrent callers can both read the same value before either writes, causing a lost update.
-**Thought 3 of 7:**
-> Example race: \`const v = await db.get('c'); await db.set('c', v + 1);\` — if two requests interleave at the \`await\` points, both read \`v=5\` and both write \`6\`, losing one increment.
-**Thought 4 of 7:**
-> Solution A — atomic DB operation: \`UPDATE counter SET n = n + 1 RETURNING n\` (SQL) or Redis \`INCR\`. The DB engine serialises the read-modify-write internally with no async gap.
-**Thought 5 of 7:**
-> Solution B — async mutex: use a library-level lock (e.g. \`async-mutex\`) to serialise access. Works for in-process state but does not scale across multiple processes or restarts.
-**Thought 6 of 7:**
-> Solution C — synchronous in-memory only: keep the counter as a plain variable, increment with \`counter++\` (no \`await\` in the read-modify-write path). Valid only for single-process, ephemeral state.
-**Thought 7 of 7 — conclusion:**
-> Prefer Solution A (atomic DB op) for correctness across restarts and multi-process deployments. Use Solution C only for in-process, non-persisted counters where an \`await\` never touches the variable. Avoid async read-modify-write without a mutex.
----
-**System Directive:** Follow the pattern above. Each \`thought\` must be concrete and progress the analysis. Use 6 to 10 thoughts total; avoid restating earlier thoughts.`;
-const HIGH_TEMPLATE = `## Guided Example (high — 15 thoughts)
-**Query:** "Our Node.js API latency jumped from p50=20ms to p50=800ms after a dependency upgrade. How do I diagnose and fix this?"
-**Thought 1 of 15:**
-> Establish the change boundary: run \`git log --oneline\` to find the upgrade commit. Use \`git bisect\` between the last known-good tag and HEAD to confirm the exact commit that caused the regression.
-**Thought 2 of 15:**
-> Collect baseline metrics before touching anything: event-loop lag (\`perf_hooks.monitorEventLoopDelay\`), GC pause times (\`--expose-gc\` + \`PerformanceObserver\`), and per-route timings. This separates compute regressions from I/O regressions.
-**Thought 3 of 15:**
-> If event-loop lag is high (>50ms per tick), the cause is synchronous blocking inserted into the hot path — JSON serialisation of large objects, synchronous file I/O, regex backtracking, or CPU-heavy validation.
-**Thought 4 of 15:**
-> If event-loop lag is low but p50 is high, the bottleneck is I/O wait: slow DB queries, connection-pool exhaustion, DNS resolution delays, or increased network RTT to the upgraded service.
-**Thought 5 of 15:**
-> Read the dependency's changelog between the old and new version. Look for: new middleware injected at startup, serialisation format changes, default timeout changes, or connection-pool default reductions.
-**Thought 6 of 15:**
-> Profile with \`clinic.js flame\` (or \`node --prof\` + \`node --prof-process\`) under representative load. The flame graph will pinpoint whether wall-clock time is in JS compute vs. idle I/O await.
-**Thought 7 of 15:**
-> Write a minimal reproduction that calls *only* the upgraded package's API with representative input. Benchmark it against the pinned old version in isolation to confirm the package itself is the source.
-**Thought 8 of 15:**
-> Common 40× regression patterns: (a) added synchronous schema validation on every request, (b) HTTP/1.1 → HTTP/2 frame parsing overhead, (c) new middleware that buffers the full request body before routing.
-**Thought 9 of 15:**
-> Check connection-pool configuration: if the upgrade changed default pool size or idle timeout, requests may queue waiting for connections. Inspect \`pool.min\`, \`pool.max\`, and \`acquireTimeoutMillis\` in the new version's defaults.
-**Thought 10 of 15:**
-> Check middleware registration order: some packages inject global middleware at \`require\`-time. A slow middleware (e.g., large-payload body parser) before fast routes affects all endpoints even if the route itself is unchanged.
-**Thought 11 of 15:**
-> Immediate mitigation: pin the dependency to the last known-good version (\`npm install dep@x.y.z\`) and deploy to restore SLA while the full investigation continues. Add a TODO linking to the issue tracker.
-**Thought 12 of 15:**
-> If the regression is a bug in the dependency, open an issue with the minimal reproduction from Thought 7. Check if a patch release or a configuration flag exists to disable the slow behaviour.
-**Thought 13 of 15:**
-> If the slow path is unavoidable, mitigation options: (a) cache the expensive result at the request or process level, (b) offload CPU work to a \`worker_threads\` worker, (c) evaluate an alternative package.
-**Thought 14 of 15:**
-> After applying the fix, run the same load test that revealed the regression. Confirm p50 and p99 return to baseline and do not diverge under sustained load. Check that GC pressure did not increase.
-**Thought 15 of 15 — conclusion:**
-> Diagnosis path: git bisect → event-loop lag check → clinic.js flame graph → isolated package benchmark → changelog review → pool/middleware audit. Mitigation: pin version immediately. Fix: configure, cache, or replace. Prevention: add a latency benchmark target to CI.
----
-**System Directive:** Follow the pattern above. Each \`thought\` must be specific, advancing the investigation — no summaries of prior steps, no filler. Use 15 to 25 thoughts total; scale depth to complexity.`;
+const BASIC_TEMPLATE = `<example>
+<query>Is Set or Array better for deduplicating a list of strings in JavaScript?</query>
+<thought_process>
+<step index="1" total="3">
+<thought>
+[Observation] A \`Set\` enforces uniqueness automatically; \`[...new Set(arr)]\` is a single-step O(n) operation. An Array-based approach (\`filter\` + \`indexOf\`) is O(n²) and adds unnecessary complexity.
+</thought>
+<step_summary>Set is O(n) while Array filter is O(n²).</step_summary>
+</step>
+<step index="2" total="3">
+<thought>
+[Evaluation] Edge cases: both preserve insertion order in modern JS engines, so ordering is not a differentiator. \`Set\` converts all values to a common reference internally, which handles string equality correctly.
+</thought>
+<step_summary>Both preserve order, but Set handles string equality natively.</step_summary>
+</step>
+<step index="3" total="3">
+<thought>
+[Conclusion] Use \`Set\`. It is faster (O(n) vs O(n²)), shorter to write, and immediately communicates the intent of deduplication. \`[...new Set(list)]\` is the idiomatic solution.
+</thought>
+<step_summary>Set is the optimal and idiomatic choice.</step_summary>
+</step>
+</thought_process>
+</example>
+<constraints>
+- Match the depth and quality of the example above.
+- Structure reasoning using: [Observation], [Hypothesis], [Evaluation], [Conclusion].
+- Write concrete, self-contained thoughts. No filler.
+- Provide a 1-sentence \`step_summary\` per step.
+- Total thoughts: 3 to 5.
+</constraints>`;
+const NORMAL_TEMPLATE = `<example>
+<query>How do I safely handle concurrent writes to a shared in-memory counter in a Node.js service?</query>
+<thought_process>
+<step index="1" total="7">
+<thought>
+[Observation] Node.js is single-threaded: synchronous code is never interrupted mid-execution, so a plain \`counter++\` within one event-loop tick is effectively atomic from JavaScript's perspective.
+</thought>
+<step_summary>Synchronous increments in Node.js are atomic.</step_summary>
+</step>
+<step index="2" total="7">
+<thought>
+[Hypothesis] The risk arises in *async* code. A read-modify-write spanning an \`await\` boundary is NOT atomic. Two concurrent callers can both read the same value before either writes, causing a lost update.
+</thought>
+<step_summary>Async read-modify-write operations are not atomic and risk lost updates.</step_summary>
+</step>
+<step index="3" total="7">
+<thought>
+[Evaluation] Example race: \`const v = await db.get('c'); await db.set('c', v + 1);\` — if two requests interleave at the \`await\` points, both read \`v=5\` and both write \`6\`, losing one increment.
+</thought>
+<step_summary>Interleaved async operations lead to data races.</step_summary>
+</step>
+<step index="4" total="7">
+<thought>
+[Strategy] Solution A — atomic DB operation: \`UPDATE counter SET n = n + 1 RETURNING n\` (SQL) or Redis \`INCR\`. The DB engine serialises the read-modify-write internally with no async gap.
+</thought>
+<step_summary>Database-level atomic operations prevent races.</step_summary>
+</step>
+<step index="5" total="7">
+<thought>
+[Strategy] Solution B — async mutex: use a library-level lock (e.g. \`async-mutex\`) to serialise access. Works for in-process state but does not scale across multiple processes or restarts.
+</thought>
+<step_summary>In-process mutexes work but don't scale horizontally.</step_summary>
+</step>
+<step index="6" total="7">
+<thought>
+[Strategy] Solution C — synchronous in-memory only: keep the counter as a plain variable, increment with \`counter++\` (no \`await\` in the read-modify-write path). Valid only for single-process, ephemeral state.
+</thought>
+<step_summary>Synchronous in-memory counters are safe for ephemeral, single-process state.</step_summary>
+</step>
+<step index="7" total="7">
+<thought>
+[Conclusion] Prefer Solution A (atomic DB op) for correctness across restarts and multi-process deployments. Use Solution C only for in-process, non-persisted counters where an \`await\` never touches the variable. Avoid async read-modify-write without a mutex.
+</thought>
+<step_summary>Use DB atomic ops for persistence, or sync variables for ephemeral state.</step_summary>
+</step>
+</thought_process>
+</example>
+<constraints>
+- Match the depth and quality of the example above.
+- Structure reasoning using: [Observation], [Hypothesis], [Evaluation], [Strategy], [Conclusion].
+- Write concrete thoughts that progress the analysis. Do not restate earlier thoughts.
+- Provide a 1-sentence \`step_summary\` per step.
+- Total thoughts: 6 to 10.
+</constraints>`;
+const HIGH_TEMPLATE = `<example>
+<query>Our Node.js API latency jumped from p50=20ms to p50=800ms after a dependency upgrade. How do I diagnose and fix this?</query>
+<thought_process>
+<step index="1" total="15">
+<thought>
+[Strategy] Establish the change boundary: run \`git log --oneline\` to find the upgrade commit. Use \`git bisect\` between the last known-good tag and HEAD to confirm the exact commit that caused the regression.
+</thought>
+<step_summary>Isolate the exact commit causing the regression using git bisect.</step_summary>
+</step>
+<step index="2" total="15">
+<thought>
+[Observation] Collect baseline metrics before touching anything: event-loop lag (\`perf_hooks.monitorEventLoopDelay\`), GC pause times (\`--expose-gc\` + \`PerformanceObserver\`), and per-route timings. This separates compute regressions from I/O regressions.
+</thought>
+<step_summary>Collect baseline metrics to distinguish compute vs I/O regressions.</step_summary>
+</step>
+<step index="3" total="15">
+<thought>
+[Hypothesis] If event-loop lag is high (>50ms per tick), the cause is synchronous blocking inserted into the hot path — JSON serialisation of large objects, synchronous file I/O, regex backtracking, or CPU-heavy validation.
+</thought>
+<step_summary>High event-loop lag indicates synchronous blocking.</step_summary>
+</step>
+<step index="4" total="15">
+<thought>
+[Hypothesis] If event-loop lag is low but p50 is high, the bottleneck is I/O wait: slow DB queries, connection-pool exhaustion, DNS resolution delays, or increased network RTT to the upgraded service.
+</thought>
+<step_summary>Low event-loop lag with high p50 indicates I/O bottlenecks.</step_summary>
+</step>
+<step index="5" total="15">
+<thought>
+[Action] Read the dependency's changelog between the old and new version. Look for: new middleware injected at startup, serialisation format changes, default timeout changes, or connection-pool default reductions.
+</thought>
+<step_summary>Review the dependency changelog for breaking changes or new defaults.</step_summary>
+</step>
+<step index="6" total="15">
+<thought>
+[Action] Profile with \`clinic.js flame\` (or \`node --prof\` + \`node --prof-process\`) under representative load. The flame graph will pinpoint whether wall-clock time is in JS compute vs. idle I/O await.
+</thought>
+<step_summary>Use flame graphs to pinpoint the exact bottleneck.</step_summary>
+</step>
+<step index="7" total="15">
+<thought>
+[Action] Write a minimal reproduction that calls *only* the upgraded package's API with representative input. Benchmark it against the pinned old version in isolation to confirm the package itself is the source.
+</thought>
+<step_summary>Create a minimal reproduction to isolate the package's performance.</step_summary>
+</step>
+<step index="8" total="15">
+<thought>
+[Evaluation] Common 40× regression patterns: (a) added synchronous schema validation on every request, (b) HTTP/1.1 → HTTP/2 frame parsing overhead, (c) new middleware that buffers the full request body before routing.
+</thought>
+<step_summary>Evaluate common regression patterns like added validation or middleware.</step_summary>
+</step>
+<step index="9" total="15">
+<thought>
+[Evaluation] Check connection-pool configuration: if the upgrade changed default pool size or idle timeout, requests may queue waiting for connections. Inspect \`pool.min\`, \`pool.max\`, and \`acquireTimeoutMillis\` in the new version's defaults.
+</thought>
+<step_summary>Verify connection-pool configurations for reduced defaults.</step_summary>
+</step>
+<step index="10" total="15">
+<thought>
+[Evaluation] Check middleware registration order: some packages inject global middleware at \`require\`-time. A slow middleware (e.g., large-payload body parser) before fast routes affects all endpoints even if the route itself is unchanged.
+</thought>
+<step_summary>Check for slow global middleware affecting all routes.</step_summary>
+</step>
+<step index="11" total="15">
+<thought>
+[Mitigation] Immediate mitigation: pin the dependency to the last known-good version (\`npm install dep@x.y.z\`) and deploy to restore SLA while the full investigation continues. Add a TODO linking to the issue tracker.
+</thought>
+<step_summary>Pin the dependency to the last known-good version to restore SLA.</step_summary>
+</step>
+<step index="12" total="15">
+<thought>
+[Action] If the regression is a bug in the dependency, open an issue with the minimal reproduction from Thought 7. Check if a patch release or a configuration flag exists to disable the slow behaviour.
+</thought>
+<step_summary>Report the bug upstream with the minimal reproduction.</step_summary>
+</step>
+<step index="13" total="15">
+<thought>
+[Strategy] If the slow path is unavoidable, mitigation options: (a) cache the expensive result at the request or process level, (b) offload CPU work to a \`worker_threads\` worker, (c) evaluate an alternative package.
+</thought>
+<step_summary>Consider caching, worker threads, or alternative packages if unavoidable.</step_summary>
+</step>
+<step index="14" total="15">
+<thought>
+[Validation] After applying the fix, run the same load test that revealed the regression. Confirm p50 and p99 return to baseline and do not diverge under sustained load. Check that GC pressure did not increase.
+</thought>
+<step_summary>Validate the fix under load to ensure metrics return to baseline.</step_summary>
+</step>
+<step index="15" total="15">
+<thought>
+[Conclusion] Diagnosis path: git bisect → event-loop lag check → clinic.js flame graph → isolated package benchmark → changelog review → pool/middleware audit. Mitigation: pin version immediately. Fix: configure, cache, or replace. Prevention: add a latency benchmark target to CI.
+</thought>
+<step_summary>Summarize the diagnosis, mitigation, fix, and prevention strategy.</step_summary>
+</step>
+</thought_process>
+</example>
+<constraints>
+- Match the depth and quality of the example above.
+- Structure reasoning using: [Observation], [Hypothesis], [Strategy], [Action], [Evaluation], [Mitigation], [Validation], [Conclusion].
+- Write specific thoughts that advance the investigation. No summaries of prior steps, no filler.
+- Provide a 1-sentence \`step_summary\` per step.
+- Total thoughts: 15 to 25. Scale depth to complexity.
+</constraints>`;
 const TEMPLATES = {
     basic: BASIC_TEMPLATE,
     normal: NORMAL_TEMPLATE,

package/dist/resources/index.js CHANGED Viewed

@@ -3,13 +3,14 @@ import { McpError } from '@modelcontextprotocol/sdk/types.js';
 import { sessionStore } from '../engine/reasoner.js';
 import { formatThoughtsToMarkdown } from '../lib/formatting.js';
 import { withIconMeta } from '../lib/tool-response.js';
-import { collectPrefixMatches } from '../lib/validators.js';
+import { collectPrefixMatches, parseBooleanEnv } from '../lib/validators.js';
 import { buildServerInstructions } from './instructions.js';
 import { buildToolCatalog } from './tool-catalog.js';
 import { buildWorkflowGuide } from './workflows.js';
 const SESSIONS_RESOURCE_URI = 'reasoning://sessions';
 const SESSION_RESOURCE_PREFIX = `${SESSIONS_RESOURCE_URI}/`;
 const TRACE_RESOURCE_PREFIX = 'file:///cortex/sessions/';
+const REDACTED_THOUGHT_CONTENT = '[REDACTED]';
 // --- Helpers ---
 function extractStringVariable(variables, name, uri) {
     const raw = variables[name];
@@ -72,6 +73,25 @@ function completeSessionIds(value) {
 function toIsoTimestamp(unixMs) {
     return new Date(unixMs).toISOString();
 }
+function shouldRedactTraceContent() {
+    return parseBooleanEnv('CORTEX_REDACT_TRACE_CONTENT', false);
+}
+function getSessionView(session) {
+    if (!shouldRedactTraceContent()) {
+        return session;
+    }
+    return {
+        ...session,
+        thoughts: session.thoughts.map((thought) => ({
+            index: thought.index,
+            content: REDACTED_THOUGHT_CONTENT,
+            revision: thought.revision,
+            ...(thought.stepSummary !== undefined
+                ? { stepSummary: REDACTED_THOUGHT_CONTENT }
+                : {}),
+        })),
+    };
+}
 function completeThoughtNames(value, sessionId) {
     const session = sessionStore.get(sessionId);
     if (!session) {
@@ -180,7 +200,7 @@ export function registerAllResources(server, iconMeta) {
         ...(withIconMeta(iconMeta) ?? {}),
     }, (uri, variables) => {
         const sessionId = extractStringVariable(variables, 'sessionId', uri);
-        const session = resolveSession(sessionId, uri);
+        const session = getSessionView(resolveSession(sessionId, uri));
         return {
             contents: [
                 {
@@ -214,7 +234,7 @@ export function registerAllResources(server, iconMeta) {
         ...(withIconMeta(iconMeta) ?? {}),
     }, (uri, variables) => {
         const sessionId = extractStringVariable(variables, 'sessionId', uri);
-        const session = resolveSession(sessionId, uri);
+        const session = getSessionView(resolveSession(sessionId, uri));
         const thoughtName = extractStringVariable(variables, 'thoughtName', uri);
         const { index, requestedRevised } = parseThoughtName(thoughtName, session);
         const thought = session.thoughts[index - 1];
@@ -264,7 +284,7 @@ export function registerAllResources(server, iconMeta) {
         ...(withIconMeta(iconMeta) ?? {}),
     }, (uri, variables) => {
         const sessionId = extractStringVariable(variables, 'sessionId', uri);
-        const session = resolveSession(sessionId, uri);
+        const session = getSessionView(resolveSession(sessionId, uri));
         const generatedThoughts = session.thoughts.length;
         const summary = buildSessionSummary({ ...session, generatedThoughts });
         return {

package/dist/resources/instructions.js CHANGED Viewed

@@ -28,76 +28,59 @@ export function buildServerInstructions() {
     const sharedConstraints = getSharedConstraints()
         .map((c) => `- ${c}`)
         .join('\n');
-    return `# CORTEX-MCP INSTRUCTIONS
-These instructions are available as a resource (internal://instructions) or prompt (get-help). Load them when unsure about tool usage.
----
-## CORE CAPABILITY
-- Domain: Multi-level reasoning engine that decomposes queries into structured thought chains at configurable depth levels (basic, normal, high).
-- Primary Resources: Reasoning sessions (in-memory, 30-minute TTL), thought chains, progress notifications.
-- Tools: \`reasoning_think\` (WRITE — creates/extends sessions with LLM-authored thoughts).
----
-## PROMPTS
-- \`get-help\`: Returns these instructions for quick recall.
+    return `<role>
+You are an expert reasoning engine assistant. You decompose queries into structured thought chains at configurable depth levels (basic, normal, high).
+</role>
+<capabilities>
+- Domain: Multi-level reasoning engine.
+- Resources: Sessions (in-memory, 30m TTL), thought chains, progress notifications.
+- Tools: \`reasoning_think\` (WRITE: creates/extends sessions).
+</capabilities>
+<prompts>
+- \`get-help\`: Returns these instructions.
 ${promptList}
-> **Guided templates:** Each \`reasoning.<level>\` prompt embeds a level-specific few-shot example showing the expected \`thought\` depth and step count. Only the template for the requested level is injected — the other two are omitted to keep prompts lean.
----
-## RESOURCES & RESOURCE LINKS
+> **Guided templates:** Each \`reasoning.<level>\` prompt embeds a level-specific few-shot example showing expected \`thought\` depth and step count.
+</prompts>
+<resources>
 - \`internal://instructions\`: This document.
-- \`reasoning://sessions\`: List all active reasoning sessions with metadata (JSON).
-- \`reasoning://sessions/{sessionId}\`: Inspect a specific session's thoughts and metadata (JSON). Supports auto-completion on \`sessionId\`.
-- \`file:///cortex/sessions/{sessionId}/trace.md\`: Full Markdown trace of a session. Supports auto-completion on \`sessionId\`.
-- \`file:///cortex/sessions/{sessionId}/{thoughtName}.md\`: Markdown content of a single thought (e.g., \`Thought-1.md\`). Supports auto-completion on \`sessionId\` and \`thoughtName\`.
-- The server supports \`resources/subscribe\` for real-time change notifications on individual resources.
-- Subscribe to \`reasoning://sessions/{sessionId}\` to receive \`notifications/resources/updated\` when thoughts are added, revised, or status changes.
-- Subscribe to \`reasoning://sessions\` to receive aggregate updates as session content and statuses evolve.
-- Use subscriptions to monitor session progress without polling.
----
-## PROGRESS & TASKS
-- Include \`_meta.progressToken\` in requests to receive \`notifications/progress\` updates during reasoning.
-- Task-augmented tool calls are supported for \`reasoning_think\`:
-  - \`execution.taskSupport: "optional"\` — invoke normally or as a task.
-  - Send \`tools/call\` with \`task\` to get a task id.
-  - Poll \`tasks/get\` and fetch results via \`tasks/result\`.
-  - Use \`tasks/cancel\` to abort a running task.
-  - For \`high\` level, progress is emitted every 2 steps to reduce noise; \`basic\` and \`normal\` emit after every step.
-  - Use \`runMode: "run_to_completion"\` with \`thought\` + \`thoughts[]\` to execute multiple reasoning steps in one request.
----
-## TOOL CONTRACTS
+- \`reasoning://sessions\`: List active sessions (JSON).
+- \`reasoning://sessions/{sessionId}\`: Inspect session thoughts/metadata (JSON).
+- \`file:///cortex/sessions/{sessionId}/trace.md\`: Full Markdown trace.
+- \`file:///cortex/sessions/{sessionId}/{thoughtName}.md\`: Single thought Markdown.
+- Subscriptions (\`resources/subscribe\`):
+  - \`reasoning://sessions/{sessionId}\`: Updates on thought additions/revisions.
+  - \`reasoning://sessions\`: Aggregate session updates.
+</resources>
+<tasks_and_progress>
+- Pass \`_meta.progressToken\` for \`notifications/progress\`.
+- \`reasoning_think\` supports tasks (\`execution.taskSupport: "optional"\`):
+  - Send \`task\` in \`tools/call\` to get \`taskId\`.
+  - Poll \`tasks/get\`, fetch via \`tasks/result\`, abort via \`tasks/cancel\`.
+- Progress emission: \`high\` level every 2 steps; \`basic\`/\`normal\` every step.
+- \`runMode: "run_to_completion"\`: Pass \`thought\` as string array for batch execution.
+</tasks_and_progress>
+<tool_contracts>
 ${toolSections.join('\n\n')}
+</tool_contracts>
----
-## SHARED CONSTRAINTS
+<constraints>
 ${sharedConstraints}
----
-## ERROR HANDLING STRATEGY
-- \`E_SESSION_NOT_FOUND\`: Session expired or never existed. Call \`reasoning://sessions\` to list active sessions, or start a new session without \`sessionId\`.
-- \`E_INVALID_THOUGHT_COUNT\`: \`targetThoughts\` is outside the level range. Check ranges: basic (3–5), normal (6–10), high (15–25).
-- \`E_INSUFFICIENT_THOUGHTS\`: In \`run_to_completion\`, the request did not provide enough thought inputs for planned remaining steps.
-- \`E_INVALID_RUN_MODE_ARGS\`: Invalid \`runMode\` argument combination (for example, missing \`targetThoughts\` when starting a new run-to-completion session).
-- \`E_ABORTED\`: Reasoning was cancelled via abort signal or task cancellation. Retry with a new request if needed.
-- \`E_SERVER_BUSY\`: Too many concurrent task-mode reasoning calls (default cap: 32). Retry after a short delay, or use normal (non-task) invocation.
-- \`E_REASONING\`: Unexpected engine error. Check the error \`message\` field for details and retry.
+</constraints>
+<error_handling>
+- \`E_SESSION_NOT_FOUND\`: Expired/missing. List sessions or start new.
+- \`E_INVALID_THOUGHT_COUNT\`: \`targetThoughts\` out of range (basic: 3-5, normal: 6-10, high: 15-25).
+- \`E_INSUFFICIENT_THOUGHTS\`: Not enough inputs for \`run_to_completion\`.
+- \`E_INVALID_RUN_MODE_ARGS\`: Invalid \`runMode\` args (e.g., missing \`targetThoughts\`).
+- \`E_ABORTED\`: Cancelled. Retry if needed.
+- \`E_SERVER_BUSY\`: Too many concurrent tasks. Retry later or use sync mode.
+- \`E_REASONING\`: Engine error. Check message and retry.
+</error_handling>
 `;
 }

package/dist/resources/tool-catalog.js CHANGED Viewed

@@ -1,19 +1,20 @@
 import { buildCoreContextPack } from './tool-info.js';
-const CATALOG_GUIDE = `# Tool Catalog Details
-## Optional Parameters
-- \`observation\`: What facts are known at this step? Use with \`hypothesis\` and \`evaluation\` as an alternative to \`thought\`.
-- \`hypothesis\`: What is the proposed idea or next logical leap?
-- \`evaluation\`: Critique the hypothesis. Are there flaws?
-- \`step_summary\`: A 1-sentence summary of the conclusion reached in this step. Accumulates in the \`summary\` field for contextual guidance.
-- \`is_conclusion\`: Set to true to end the session early with a final answer.
-- \`rollback_to_step\`: Roll back to a thought index (0-based). All thoughts after this index are discarded.
+const CATALOG_GUIDE = `<optional_parameters>
+- \`observation\`: Facts known at this step. Use with \`hypothesis\` and \`evaluation\` instead of \`thought\`.
+- \`hypothesis\`: Proposed idea or next logical leap.
+- \`evaluation\`: Critique of the hypothesis.
+- \`step_summary\`: 1-sentence conclusion summary. Accumulates in \`summary\` field.
+- \`is_conclusion\`: Set true to end session early.
+- \`rollback_to_step\`: 0-based thought index to rollback to. Discards subsequent thoughts.
+</optional_parameters>
-## Cross-Tool Data Flow
+<cross_tool_data_flow>
 \`\`\`
 reasoning_think -> result.sessionId -> reasoning_think.sessionId
 reasoning_think -> result.sessionId -> reasoning://sessions/{sessionId}
 reasoning_think -> result.sessionId -> file:///cortex/sessions/{sessionId}/trace.md
 \`\`\`
+</cross_tool_data_flow>
 `;
 export function buildToolCatalog() {
     return `${buildCoreContextPack()}\n\n${CATALOG_GUIDE}`;

package/dist/resources/tool-info.js CHANGED Viewed

@@ -19,7 +19,7 @@ export function buildCoreContextPack() {
             ? `| \`${e.name}\` | ${e.model} | ${e.timeout} | ${e.maxOutputTokens} | ${e.purpose} |`
             : '';
     });
-    return `# Core Context Pack\n\n| Tool | Model | Timeout | Max Output Tokens | Purpose |\n|------|-------|---------|-------------------|---------|\n${rows.join('\n')}`;
+    return `<core_context_pack>\n| Tool | Model | Timeout | Max Output Tokens | Purpose |\n|------|-------|---------|-------------------|---------|\n${rows.join('\n')}\n</core_context_pack>`;
 }
 export function getSharedConstraints() {
     return [

package/dist/resources/workflows.js CHANGED Viewed

@@ -7,55 +7,55 @@ function buildToolReference() {
         .join('\n\n');
 }
 export function buildWorkflowGuide() {
-    return `# THE "GOLDEN PATH" WORKFLOWS (CRITICAL)
+    return `<role>
+You are an expert reasoning engine assistant. You decompose queries into structured thought chains at configurable depth levels (basic, normal, high).
+</role>
+<workflows>
 ### WORKFLOW A: Sequential Reasoning (Most Common)
-1. Call \`reasoning_think\` with \`{ query: "...", level: "basic", thought: "Your detailed reasoning for step 1..." }\`.
-2. Read the response — note the \`sessionId\` and \`remainingThoughts\` fields.
-3. **You MUST continue**: Call again with \`{ sessionId: "<from response>", thought: "Your next reasoning step..." }\`.
-4. Repeat step 3 until the response shows \`status: "completed"\` or \`remainingThoughts: 0\`.
-   NOTE: The \`summary\` field contains the exact continuation call you should make next.
-### WORKFLOW B: Multi-Turn Reasoning (Session Continuation)
-1. Call \`reasoning_think\` with \`{ query: "initial question", level: "normal", thought: "Your first reasoning step..." }\` — note the returned \`sessionId\`.
-2. Call \`reasoning_think\` with \`{ sessionId: "<id>", thought: "Your next reasoning step..." }\` (optional: add \`query\` for follow-up context).
-3. Repeat until \`status: "completed"\` or \`remainingThoughts: 0\`, then read \`reasoning://sessions/{sessionId}\` for the full chain.
-   NOTE: The \`level\` parameter is optional when continuing; if provided and mismatched, the session level is used.
-### WORKFLOW C: Controlled Depth Reasoning
-1. Call \`reasoning_think\` with \`{ query: "...", level: "normal", targetThoughts: 8, thought: "Your reasoning..." }\` to set the session's planned step count.
-2. Repeat calls with the returned \`sessionId\` and your next \`thought\` until \`result.totalThoughts\` is reached.
-   NOTE: \`targetThoughts\` must fall within the level range (basic: 3–5, normal: 6–10, high: 15–25). Out-of-range values return \`E_INVALID_THOUGHT_COUNT\`.
-### WORKFLOW D: Async Task Execution
-1. Call \`reasoning_think\` as a task (send \`tools/call\` with \`task\` field) for long-running \`high\`-level reasoning.
-2. Poll \`tasks/get\` until status is \`completed\` or \`failed\`.
-3. Retrieve the result via \`tasks/result\`.
-4. Use \`tasks/cancel\` to abort if needed.
+1. Call \`reasoning_think\` with \`{ query: "...", level: "basic", thought: "..." }\`.
+2. Read response: note \`sessionId\` and \`remainingThoughts\`.
+3. **MUST continue**: Call again with \`{ sessionId: "<id>", thought: "..." }\`.
+4. Repeat until \`status: "completed"\` or \`remainingThoughts: 0\`.
+   NOTE: \`summary\` field contains the exact next call.
+### WORKFLOW B: Multi-Turn Reasoning
+1. Call \`reasoning_think\` with \`{ query: "...", level: "normal", thought: "..." }\`.
+2. Call \`reasoning_think\` with \`{ sessionId: "<id>", thought: "..." }\` (optional: add \`query\` for follow-up).
+3. Repeat until completed. Read \`reasoning://sessions/{sessionId}\` for full chain.
+   NOTE: \`level\` is optional when continuing; session level is used if omitted.
+### WORKFLOW C: Controlled Depth
+1. Call \`reasoning_think\` with \`{ query: "...", level: "normal", targetThoughts: 8, thought: "..." }\`.
+2. Repeat with \`sessionId\` and \`thought\` until \`totalThoughts\` reached.
+   NOTE: \`targetThoughts\` must fit level range (basic: 3-5, normal: 6-10, high: 15-25).
+### WORKFLOW D: Async Task
+1. Call \`reasoning_think\` as task (send \`task\` field) for long \`high\`-level reasoning.
+2. Poll \`tasks/get\` until \`completed\`/\`failed\`.
+3. Retrieve via \`tasks/result\`.
+4. Abort via \`tasks/cancel\`.
 ### WORKFLOW E: Batched Run-To-Completion
-1. Start a new session with explicit \`targetThoughts\` and \`runMode: "run_to_completion"\`.
-2. Provide one \`thought\` plus additional \`thoughts[]\` entries to cover the planned step count.
-3. The server consumes thought inputs in order until completion, token budget exhaustion, or cancellation.
-### WORKFLOW F: Structured Reasoning (Observation/Hypothesis/Evaluation)
-1. Call \`reasoning_think\` with \`{ query: "...", level: "normal", observation: "facts...", hypothesis: "idea...", evaluation: "critique..." }\`.
-2. The server formats these into a structured thought and stores it in the session trace.
-3. Continue with \`sessionId\` using either \`thought\` or structured fields for subsequent steps.
-4. Use \`is_conclusion: true\` to end early, or \`rollback_to_step\` to discard and redo from a specific step.
-## Shared Constraints
+1. Start session with \`targetThoughts\` and \`runMode: "run_to_completion"\`.
+2. Provide \`thought\` as string array (e.g., \`["step1", "step2"]\`).
+3. Server consumes inputs until completion, token exhaustion, or cancellation.
+### WORKFLOW F: Structured Reasoning
+1. Call \`reasoning_think\` with \`{ query: "...", level: "normal", observation: "...", hypothesis: "...", evaluation: "..." }\`.
+2. Server formats into structured thought in trace.
+3. Continue with \`sessionId\` using \`thought\` or structured fields.
+4. Use \`is_conclusion: true\` to end early, or \`rollback_to_step\` to discard/redo.
+</workflows>
+<constraints>
 ${getSharedConstraints()
         .map((c) => `- ${c}`)
         .join('\n')}
+</constraints>
-## Tool Reference
+<tool_reference>
 ${buildToolReference()}
+</tool_reference>
 `;
 }

package/dist/schemas/inputs.d.ts CHANGED Viewed

@@ -13,7 +13,6 @@ export declare const ReasoningThinkInputSchema: z.ZodObject<{
         run_to_completion: "run_to_completion";
     }>>;
     thought: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>>;
-    thoughts: z.ZodOptional<z.ZodArray<z.ZodString>>;
     is_conclusion: z.ZodOptional<z.ZodBoolean>;
     rollback_to_step: z.ZodOptional<z.ZodNumber>;
     step_summary: z.ZodOptional<z.ZodString>;

package/dist/schemas/inputs.js CHANGED Viewed

@@ -17,63 +17,49 @@ function addCustomIssue(ctx, path, message) {
 }
 export const ReasoningThinkInputSchema = z
     .strictObject({
-    query: QUERY_TEXT_SCHEMA.optional().describe('The question or problem to reason about'),
-    level: LEVEL_SCHEMA.optional().describe(`Reasoning depth level (required for new sessions, optional for continuing). ${getLevelDescriptionString()}.`),
+    query: QUERY_TEXT_SCHEMA.optional().describe('Question or problem to reason about.'),
+    level: LEVEL_SCHEMA.optional().describe(`Reasoning depth level (required for new sessions). ${getLevelDescriptionString()}.`),
     targetThoughts: z
         .number()
         .int()
         .min(1)
         .max(25)
         .optional()
-        .describe('Optional explicit thought count. Must fit the level range: basic 3–5, normal 6–10, high 15–25.'),
+        .describe('Explicit thought count. Must fit level range.'),
     sessionId: z
         .string()
         .min(1)
         .max(128)
         .optional()
-        .describe('Session ID to continue. The session level is used when continuing; provided level is optional.'),
+        .describe('Session ID to continue.'),
     runMode: z
         .enum(RUN_MODE_VALUES)
         .optional()
-        .describe('Execution mode (default: "step"). "step" appends a single thought per call. "run_to_completion" consumes all supplied thought inputs in one request.'),
+        .describe('Execution mode. "step" (default) or "run_to_completion".'),
     thought: z
         .union([THOUGHT_TEXT_SCHEMA, THOUGHT_BATCH_SCHEMA])
         .optional()
-        .describe('Your full reasoning content for this step. ' +
-        'The server stores this text verbatim as the thought in the session trace. ' +
-        'Write your complete analysis, observations, and conclusions here — this is what appears in trace.md. ' +
-        'Can be a single string or an array of strings (for batch execution).'),
-    thoughts: z
-        .array(THOUGHT_TEXT_SCHEMA)
-        .max(25)
-        .optional()
-        .describe('(Deprecated) Optional additional thought inputs. Use "thought" as an array instead.'),
+        .describe('Full reasoning content for this step. Stored verbatim. String or string array.'),
     is_conclusion: z
         .boolean()
         .optional()
-        .describe('Set to true if you have arrived at the final answer and wish to end the reasoning session early.'),
+        .describe('End session early if final answer reached.'),
     rollback_to_step: z
         .number()
         .int()
         .min(0)
         .optional()
-        .describe('Set to a thought index (0-based) to rollback to. All thoughts after this index will be discarded.'),
+        .describe('0-based thought index to rollback to. Discards subsequent thoughts.'),
     step_summary: z
         .string()
         .optional()
-        .describe('A 1-sentence summary of the conclusion reached in this specific step.'),
-    observation: z
-        .string()
-        .optional()
-        .describe('What facts are known at this step?'),
+        .describe('1-sentence summary of the conclusion reached.'),
+    observation: z.string().optional().describe('Facts known at this step.'),
     hypothesis: z
         .string()
         .optional()
-        .describe('What is the proposed idea or next logical leap?'),
-    evaluation: z
-        .string()
-        .optional()
-        .describe('Critique the hypothesis. Are there flaws?'),
+        .describe('Proposed idea or next logical leap.'),
+    evaluation: z.string().optional().describe('Critique of the hypothesis.'),
 })
     .superRefine((data, ctx) => {
     const runMode = data.runMode ?? DEFAULT_RUN_MODE;
@@ -91,9 +77,6 @@ export const ReasoningThinkInputSchema = z
     if (runMode === 'step' && Array.isArray(data.thought)) {
         addCustomIssue(ctx, ['thought'], 'thought must be a string when runMode is "step"');
     }
-    if (runMode === 'step' && data.thoughts !== undefined) {
-        addCustomIssue(ctx, ['thoughts'], 'thoughts is only allowed when runMode is "run_to_completion"');
-    }
     const hasThought = data.thought !== undefined;
     const hasStructured = data.observation !== undefined &&
         data.hypothesis !== undefined &&

package/dist/schemas/outputs.js CHANGED Viewed

@@ -13,7 +13,7 @@ const ThoughtSchema = z.strictObject({
     stepSummary: z
         .string()
         .optional()
-        .describe('A 1-sentence summary of the conclusion reached in this step, if provided.'),
+        .describe('1-sentence summary of the conclusion reached.'),
 });
 const ReasoningThinkSuccessSchema = z.strictObject({
     ok: z.literal(true),
@@ -25,22 +25,18 @@ const ReasoningThinkSuccessSchema = z.strictObject({
         generatedThoughts: z.number(),
         requestedThoughts: z.number(),
         totalThoughts: z.number(),
-        tokenBudget: z
-            .number()
-            .describe('Approximate token budget (UTF-8 bytes ÷ 4, not true tokenization)'),
-        tokensUsed: z
-            .number()
-            .describe('Approximate tokens used (UTF-8 bytes ÷ 4, not true tokenization)'),
+        tokenBudget: z.number().describe('Approximate token budget.'),
+        tokensUsed: z.number().describe('Approximate tokens used.'),
         ttlMs: z.number(),
         expiresAt: z.number(),
         createdAt: z.number(),
         updatedAt: z.number(),
         remainingThoughts: z
             .number()
-            .describe('Number of thoughts remaining before the session reaches totalThoughts'),
+            .describe('Thoughts remaining before reaching totalThoughts.'),
         summary: z
             .string()
-            .describe('Actionable next-step instruction when active, or completion status when done'),
+            .describe('Actionable next-step instruction or completion status.'),
     }),
 });
 const ReasoningThinkErrorSchema = z.strictObject({

package/dist/server.js CHANGED Viewed

@@ -3,6 +3,7 @@ import { findPackageJSON } from 'node:module';
 import { InMemoryTaskStore } from '@modelcontextprotocol/sdk/experimental/tasks';
 import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
 import { engineEvents } from './engine/events.js';
+import { sessionStore } from './engine/reasoner.js';
 import { getErrorMessage } from './lib/errors.js';
 import { registerAllTools } from './tools/index.js';
 import { registerAllPrompts } from './prompts/index.js';
@@ -20,6 +21,7 @@ const ICON_URL_CANDIDATES = [
 ];
 let cachedLocalIconData;
 let cachedVersion;
+let activeServerCount = 0;
 function getLocalIconData() {
     if (cachedLocalIconData !== undefined) {
         return cachedLocalIconData ?? undefined;
@@ -142,10 +144,18 @@ function installCloseCleanup(server, cleanup) {
         }
         closed = true;
         cleanup();
+        activeServerCount = Math.max(0, activeServerCount - 1);
+        if (activeServerCount === 0) {
+            sessionStore.dispose();
+        }
         await originalClose();
     };
 }
 export function createServer() {
+    if (activeServerCount === 0) {
+        sessionStore.ensureCleanupTimer();
+    }
+    activeServerCount += 1;
     const version = loadVersion();
     const taskStore = new InMemoryTaskStore();
     const localIcon = getLocalIconData();

package/dist/tools/reasoning-think.js CHANGED Viewed

@@ -6,13 +6,30 @@ import { createTaskLimiter } from '../lib/concurrency.js';
 import { createErrorResponse, getErrorMessage, InsufficientThoughtsError, InvalidRunModeArgsError, isObjectRecord, ReasoningAbortedError, ReasoningError, ServerBusyError, SessionNotFoundError, } from '../lib/errors.js';
 import { formatProgressMessage, formatThoughtsToMarkdown, } from '../lib/formatting.js';
 import { createToolResponse, withIconMeta } from '../lib/tool-response.js';
-import { parsePositiveIntEnv } from '../lib/validators.js';
+import { parseBooleanEnv, parsePositiveIntEnv } from '../lib/validators.js';
 const DEFAULT_MAX_ACTIVE_REASONING_TASKS = 32;
+const REDACTED_THOUGHT_CONTENT = '[REDACTED]';
+function shouldRedactTraceContent() {
+    return parseBooleanEnv('CORTEX_REDACT_TRACE_CONTENT', false);
+}
 function buildTraceResource(session) {
+    const sessionView = shouldRedactTraceContent()
+        ? {
+            ...session,
+            thoughts: session.thoughts.map((thought) => ({
+                index: thought.index,
+                content: REDACTED_THOUGHT_CONTENT,
+                revision: thought.revision,
+                ...(thought.stepSummary !== undefined
+                    ? { stepSummary: REDACTED_THOUGHT_CONTENT }
+                    : {}),
+            })),
+        }
+        : session;
     return {
         uri: `file:///cortex/sessions/${session.id}/trace.md`,
         mimeType: 'text/markdown',
-        text: formatThoughtsToMarkdown(session),
+        text: formatThoughtsToMarkdown(sessionView),
     };
 }
 const reasoningTaskLimiter = createTaskLimiter(parsePositiveIntEnv('CORTEX_MAX_ACTIVE_REASONING_TASKS', DEFAULT_MAX_ACTIVE_REASONING_TASKS));
@@ -144,7 +161,7 @@ function buildThoughtInputs(params) {
         : params.thought
             ? [params.thought]
             : [];
-    return [...primary, ...(params.thoughts ?? [])];
+    return primary;
 }
 function getStartingThoughtCount(sessionId) {
     if (sessionId === undefined) {
@@ -283,16 +300,22 @@ function createCancellationController(signal) {
     const controller = new AbortController();
     if (signal.aborted) {
         controller.abort();
-        return controller;
+        return {
+            controller,
+            cleanup: () => {
+                // No listener to clean up when already aborted.
+            },
+        };
     }
     const onAbort = () => {
         controller.abort();
     };
-    signal.addEventListener('abort', onAbort, { once: true });
-    controller.signal.addEventListener('abort', () => {
+    const cleanup = () => {
         signal.removeEventListener('abort', onAbort);
-    }, { once: true });
-    return controller;
+    };
+    signal.addEventListener('abort', onAbort, { once: true });
+    controller.signal.addEventListener('abort', cleanup, { once: true });
+    return { controller, cleanup };
 }
 async function isTaskCancelled(taskStore, taskId) {
     try {
@@ -311,7 +334,7 @@ async function ensureTaskIsActive(taskStore, taskId, controller) {
 }
 function createProgressHandler(args) {
     const { server, taskStore, taskId, level, progressToken, controller, startingCount, batchTotal, } = args;
-    return async (progress) => {
+    return async (progress, _total, summary) => {
         await ensureTaskIsActive(taskStore, taskId, controller);
         if (progressToken === undefined) {
             return;
@@ -322,14 +345,16 @@ function createProgressHandler(args) {
         const isTerminal = displayProgress >= batchTotal;
         // We must emit if it's the terminal update for this batch,
         // otherwise we respect the session-level skipping rules.
+        // If a summary is provided, we force an emit to show the meaningful update.
         if (!isTerminal &&
+            !summary &&
             !shouldEmitProgress(displayProgress, batchTotal, level)) {
             return;
         }
         const message = formatProgressMessage({
-            toolName: TOOL_NAME,
+            toolName: `꩜ ${TOOL_NAME}`,
             context: 'Thought',
-            metadata: `[${String(displayProgress)}/${String(batchTotal)}]`,
+            metadata: `[${String(displayProgress)}/${String(batchTotal)}]${summary ? ` ${summary}` : ''}`,
             ...(isTerminal ? { outcome: 'complete' } : {}),
         });
         await notifyProgress({
@@ -457,9 +482,9 @@ async function runReasoningTask(args) {
         const normalizedBatchTotal = Math.max(1, batchTotal);
         if (progressToken !== undefined) {
             const message = formatProgressMessage({
-                toolName: TOOL_NAME,
-                context: 'reasoning',
-                metadata: level ? `starting [${level}]` : 'continuing session',
+                toolName: `꩜ ${TOOL_NAME}`,
+                context: level ? 'starting' : 'continuing',
+                metadata: level ? `[${level}]` : 'session',
             });
             await notifyProgress({
                 server,
@@ -565,7 +590,8 @@ Use step_summary for a 1-sentence conclusion per step — these accumulate in th
 Levels: ${getLevelDescriptionString()}.
 Alternatives: runMode="run_to_completion" (batch), or observation/hypothesis/evaluation fields (structured).
-Errors: E_SESSION_NOT_FOUND (expired — start new), E_INVALID_THOUGHT_COUNT (check level ranges).`,
+Errors: E_SESSION_NOT_FOUND (expired — start new), E_INVALID_THOUGHT_COUNT (check level ranges).
+Protocol validation: malformed task metadata/arguments fail at request level before task start; runtime reasoning failures return tool isError=true payloads.`,
         inputSchema: ReasoningThinkInputSchema,
         outputSchema: ReasoningThinkToolOutputSchema,
         annotations: {
@@ -602,13 +628,13 @@ Errors: E_SESSION_NOT_FOUND (expired — start new), E_INVALID_THOUGHT_COUNT (ch
                 reasoningTaskLimiter.release();
                 throw error;
             }
-            const controller = createCancellationController(extra.signal);
+            const cancellation = createCancellationController(extra.signal);
             const runReasoningArgs = {
                 server,
                 taskStore: extra.taskStore,
                 taskId: task.taskId,
                 params,
-                controller,
+                controller: cancellation.controller,
             };
             if (progressToken !== undefined) {
                 runReasoningArgs.progressToken = progressToken;
@@ -617,6 +643,7 @@ Errors: E_SESSION_NOT_FOUND (expired — start new), E_INVALID_THOUGHT_COUNT (ch
                 runReasoningArgs.sessionId = extra.sessionId;
             }
             void runReasoningTask(runReasoningArgs).finally(() => {
+                cancellation.cleanup();
                 reasoningTaskLimiter.release();
             });
             return { task };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@j0hanz/cortex-mcp",
-  "version": "1.5.0",
+  "version": "1.6.0",
   "mcpName": "io.github.j0hanz/cortex-mcp",
   "author": "Johanz",
   "license": "MIT",