verifiable-thinking-mcp 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +339 -0
- package/package.json +75 -0
- package/src/index.ts +38 -0
- package/src/lib/cache.ts +246 -0
- package/src/lib/compression.ts +804 -0
- package/src/lib/compute/cache.ts +86 -0
- package/src/lib/compute/classifier.ts +555 -0
- package/src/lib/compute/confidence.ts +79 -0
- package/src/lib/compute/context.ts +154 -0
- package/src/lib/compute/extract.ts +200 -0
- package/src/lib/compute/filter.ts +224 -0
- package/src/lib/compute/index.ts +171 -0
- package/src/lib/compute/math.ts +247 -0
- package/src/lib/compute/patterns.ts +564 -0
- package/src/lib/compute/registry.ts +145 -0
- package/src/lib/compute/solvers/arithmetic.ts +65 -0
- package/src/lib/compute/solvers/calculus.ts +249 -0
- package/src/lib/compute/solvers/derivation-core.ts +371 -0
- package/src/lib/compute/solvers/derivation-latex.ts +160 -0
- package/src/lib/compute/solvers/derivation-mistakes.ts +1046 -0
- package/src/lib/compute/solvers/derivation-simplify.ts +451 -0
- package/src/lib/compute/solvers/derivation-transform.ts +620 -0
- package/src/lib/compute/solvers/derivation.ts +67 -0
- package/src/lib/compute/solvers/facts.ts +120 -0
- package/src/lib/compute/solvers/formula.ts +728 -0
- package/src/lib/compute/solvers/index.ts +36 -0
- package/src/lib/compute/solvers/logic.ts +422 -0
- package/src/lib/compute/solvers/probability.ts +307 -0
- package/src/lib/compute/solvers/statistics.ts +262 -0
- package/src/lib/compute/solvers/word-problems.ts +408 -0
- package/src/lib/compute/types.ts +107 -0
- package/src/lib/concepts.ts +111 -0
- package/src/lib/domain.ts +731 -0
- package/src/lib/extraction.ts +912 -0
- package/src/lib/index.ts +122 -0
- package/src/lib/judge.ts +260 -0
- package/src/lib/math/ast.ts +842 -0
- package/src/lib/math/index.ts +8 -0
- package/src/lib/math/operators.ts +171 -0
- package/src/lib/math/tokenizer.ts +477 -0
- package/src/lib/patterns.ts +200 -0
- package/src/lib/session.ts +825 -0
- package/src/lib/think/challenge.ts +323 -0
- package/src/lib/think/complexity.ts +504 -0
- package/src/lib/think/confidence-drift.ts +507 -0
- package/src/lib/think/consistency.ts +347 -0
- package/src/lib/think/guidance.ts +188 -0
- package/src/lib/think/helpers.ts +568 -0
- package/src/lib/think/hypothesis.ts +216 -0
- package/src/lib/think/index.ts +127 -0
- package/src/lib/think/prompts.ts +262 -0
- package/src/lib/think/route.ts +358 -0
- package/src/lib/think/schema.ts +98 -0
- package/src/lib/think/scratchpad-schema.ts +662 -0
- package/src/lib/think/spot-check.ts +961 -0
- package/src/lib/think/types.ts +93 -0
- package/src/lib/think/verification.ts +260 -0
- package/src/lib/tokens.ts +177 -0
- package/src/lib/verification.ts +620 -0
- package/src/prompts/index.ts +10 -0
- package/src/prompts/templates.ts +336 -0
- package/src/resources/index.ts +8 -0
- package/src/resources/sessions.ts +196 -0
- package/src/tools/compress.ts +138 -0
- package/src/tools/index.ts +5 -0
- package/src/tools/scratchpad.ts +2659 -0
- package/src/tools/sessions.ts +144 -0
|
@@ -0,0 +1,2659 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scratchpad Tool - Unified CRASH-style reasoning with operation-based dispatch
|
|
3
|
+
*
|
|
4
|
+
* Features:
|
|
5
|
+
* - Auto step increment (no manual step_number needed)
|
|
6
|
+
* - Confidence tracking (average across chain)
|
|
7
|
+
* - Threshold detection with 5-second warning
|
|
8
|
+
* - Navigate operation for viewing history/branches/paths
|
|
9
|
+
* - Branch and revise operations
|
|
10
|
+
* - Auto-suggest next simplification step for math derivations
|
|
11
|
+
* - Proactive stepping guidance based on question complexity
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import type { Context } from "fastmcp";
|
|
15
|
+
import { compress, needsCompression } from "../lib/compression.ts";
|
|
16
|
+
import { contextAwareCompute } from "../lib/compute/context.ts";
|
|
17
|
+
import {
|
|
18
|
+
type DetectedMistake,
|
|
19
|
+
detectCommonMistakesFromText,
|
|
20
|
+
isLikelyComputable,
|
|
21
|
+
type SimplificationStep,
|
|
22
|
+
suggestNextStepFromText,
|
|
23
|
+
suggestSimplificationPath,
|
|
24
|
+
tryLocalCompute,
|
|
25
|
+
} from "../lib/compute/index.ts";
|
|
26
|
+
import { stripMarkdown } from "../lib/extraction.ts";
|
|
27
|
+
import { SessionManager, type ThoughtRecord } from "../lib/session.ts";
|
|
28
|
+
import { challenge, shouldChallenge } from "../lib/think/challenge.ts";
|
|
29
|
+
import { assessPromptComplexity } from "../lib/think/complexity.ts";
|
|
30
|
+
import { analyzeConfidenceDrift } from "../lib/think/confidence-drift.ts";
|
|
31
|
+
import { checkStepConsistency } from "../lib/think/consistency.ts";
|
|
32
|
+
import { detectDomain } from "../lib/think/guidance.ts";
|
|
33
|
+
import { analyzeStepForResolution } from "../lib/think/hypothesis.ts";
|
|
34
|
+
import {
|
|
35
|
+
type ScratchpadArgs,
|
|
36
|
+
type ScratchpadResponse,
|
|
37
|
+
ScratchpadSchema,
|
|
38
|
+
} from "../lib/think/scratchpad-schema.ts";
|
|
39
|
+
import { primeQuestion, spotCheck } from "../lib/think/spot-check.ts";
|
|
40
|
+
import { calculateTokenUsage, getSessionTokens, trackSessionTokens } from "../lib/tokens.ts";
|
|
41
|
+
import { verify } from "../lib/verification.ts";
|
|
42
|
+
|
|
43
|
+
type MCPContext = Context<Record<string, unknown> | undefined>;
|
|
44
|
+
|
|
45
|
+
// ============================================================================
|
|
46
|
+
// CONSTANTS
|
|
47
|
+
// ============================================================================
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Threshold for adaptive maxCombined in trap priming.
|
|
51
|
+
* Questions shorter than this get maxCombined=2, longer get maxCombined=1.
|
|
52
|
+
*
|
|
53
|
+
* Tuned empirically: all multi-trap questions in benchmark are ≥195 chars.
|
|
54
|
+
* Using 190 ensures all multi-trap questions stay conservative (maxCombined=1).
|
|
55
|
+
*/
|
|
56
|
+
const ADAPTIVE_PRIMING_THRESHOLD = 190;
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Maximum question length for trap priming (security + performance).
|
|
60
|
+
* Prevents memory exhaustion and ReDoS attacks on regex patterns.
|
|
61
|
+
* 10k chars ≈ 2.5k tokens, sufficient for any reasonable question.
|
|
62
|
+
*/
|
|
63
|
+
const MAX_QUESTION_LENGTH = 10_000;
|
|
64
|
+
|
|
65
|
+
// ============================================================================
|
|
66
|
+
// STEPPING GUIDANCE
|
|
67
|
+
// ============================================================================
|
|
68
|
+
|
|
69
|
+
/** Map complexity tier to recommended minimum steps */
|
|
70
|
+
function getRecommendedSteps(
|
|
71
|
+
tier: "Low" | "Moderate" | "High" | "Very Hard" | "Almost Impossible",
|
|
72
|
+
): number {
|
|
73
|
+
switch (tier) {
|
|
74
|
+
case "Low":
|
|
75
|
+
return 1;
|
|
76
|
+
case "Moderate":
|
|
77
|
+
return 2;
|
|
78
|
+
case "High":
|
|
79
|
+
return 4;
|
|
80
|
+
case "Very Hard":
|
|
81
|
+
return 6;
|
|
82
|
+
case "Almost Impossible":
|
|
83
|
+
return 8;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// ============================================================================
|
|
88
|
+
// CONFIDENCE TRACKING
|
|
89
|
+
// ============================================================================
|
|
90
|
+
|
|
91
|
+
interface ConfidenceState {
|
|
92
|
+
stepConfidence: number | undefined;
|
|
93
|
+
chainConfidence: number;
|
|
94
|
+
stepsWithConfidence: number;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/** Calculate chain confidence from session thoughts + current step */
|
|
98
|
+
function calculateConfidence(
|
|
99
|
+
sessionId: string,
|
|
100
|
+
branchId: string,
|
|
101
|
+
newConfidence?: number,
|
|
102
|
+
): ConfidenceState {
|
|
103
|
+
const thoughts = SessionManager.getThoughts(sessionId, branchId);
|
|
104
|
+
|
|
105
|
+
// Collect confidences from verification results
|
|
106
|
+
const confidences: number[] = [];
|
|
107
|
+
for (const t of thoughts) {
|
|
108
|
+
if (t.verification?.confidence !== undefined) {
|
|
109
|
+
confidences.push(t.verification.confidence);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Add new confidence if provided
|
|
114
|
+
if (newConfidence !== undefined) {
|
|
115
|
+
confidences.push(newConfidence);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const chainConfidence =
|
|
119
|
+
confidences.length > 0 ? confidences.reduce((a, b) => a + b, 0) / confidences.length : 0;
|
|
120
|
+
|
|
121
|
+
return {
|
|
122
|
+
stepConfidence: newConfidence,
|
|
123
|
+
chainConfidence,
|
|
124
|
+
stepsWithConfidence: confidences.length,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/** Determine status based on confidence threshold */
|
|
129
|
+
function determineStatus(
|
|
130
|
+
chainConfidence: number,
|
|
131
|
+
threshold: number,
|
|
132
|
+
isComplete: boolean,
|
|
133
|
+
): ScratchpadResponse["status"] {
|
|
134
|
+
if (isComplete) return "complete";
|
|
135
|
+
if (chainConfidence >= threshold) return "threshold_reached";
|
|
136
|
+
if (chainConfidence >= threshold * 0.8) return "review"; // Within 20% of threshold
|
|
137
|
+
return "continue";
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/** Get suggested action based on status */
|
|
141
|
+
function getSuggestedAction(status: ScratchpadResponse["status"], chainConfidence: number): string {
|
|
142
|
+
switch (status) {
|
|
143
|
+
case "complete":
|
|
144
|
+
return "Reasoning chain complete.";
|
|
145
|
+
case "threshold_reached":
|
|
146
|
+
return `Confidence ${(chainConfidence * 100).toFixed(0)}% reached threshold. Consider completing or add one more verification step.`;
|
|
147
|
+
case "review":
|
|
148
|
+
return `Confidence ${(chainConfidence * 100).toFixed(0)}% approaching threshold. Review recent steps for completeness.`;
|
|
149
|
+
case "continue":
|
|
150
|
+
return `Continue reasoning. Chain confidence: ${(chainConfidence * 100).toFixed(0)}%`;
|
|
151
|
+
case "verification_failed":
|
|
152
|
+
return "Verification failed. Use revise, branch, or override to continue.";
|
|
153
|
+
case "budget_exhausted":
|
|
154
|
+
return "Token budget exhausted. Complete your reasoning or start a new session.";
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Run step-level CDD analysis and return drift info.
|
|
160
|
+
* Returns data for ALL patterns (not just concerning ones) so clients can display trajectory.
|
|
161
|
+
* Streams warning only for concerning patterns.
|
|
162
|
+
*/
|
|
163
|
+
async function runStepLevelCDD(
|
|
164
|
+
sessionId: string,
|
|
165
|
+
branchId: string,
|
|
166
|
+
streamContent: MCPContext["streamContent"],
|
|
167
|
+
): Promise<ScratchpadResponse["confidence_drift"] | undefined> {
|
|
168
|
+
const thoughts = SessionManager.getThoughts(sessionId, branchId);
|
|
169
|
+
|
|
170
|
+
// Need at least 3 steps for meaningful CDD analysis
|
|
171
|
+
if (thoughts.length < 3) {
|
|
172
|
+
return undefined;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const analysis = analyzeConfidenceDrift(thoughts);
|
|
176
|
+
|
|
177
|
+
// Skip insufficient pattern
|
|
178
|
+
if (analysis.pattern === "insufficient") {
|
|
179
|
+
return undefined;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Stream warning for concerning patterns only
|
|
183
|
+
if (analysis.unresolved) {
|
|
184
|
+
await streamContent({
|
|
185
|
+
type: "text",
|
|
186
|
+
text:
|
|
187
|
+
`\n⚠️ **Early Drift Warning:** ${analysis.explanation}\n` +
|
|
188
|
+
(analysis.suggestion ? ` 💡 ${analysis.suggestion}\n` : ""),
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Return structured data for ALL non-insufficient patterns (so clients can display trajectory)
|
|
193
|
+
return {
|
|
194
|
+
drift_score: analysis.drift_score,
|
|
195
|
+
unresolved: analysis.unresolved,
|
|
196
|
+
min_confidence: analysis.min_confidence,
|
|
197
|
+
min_step: analysis.min_step,
|
|
198
|
+
max_drop: analysis.max_drop,
|
|
199
|
+
recovery: analysis.recovery,
|
|
200
|
+
has_revision_after_drop: analysis.has_revision_after_drop,
|
|
201
|
+
pattern: analysis.pattern,
|
|
202
|
+
explanation: analysis.explanation,
|
|
203
|
+
suggestion: analysis.suggestion,
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Adaptive spot-check: Auto-run spot-check when CDD detects unresolved drift.
|
|
209
|
+
* This catches trap patterns early, before the model reaches complete().
|
|
210
|
+
*
|
|
211
|
+
* Triggers when:
|
|
212
|
+
* 1. CDD detected unresolved pattern (unresolved=true)
|
|
213
|
+
* 2. Session has a stored question
|
|
214
|
+
* 3. Current thought contains potential answer markers
|
|
215
|
+
*
|
|
216
|
+
* Returns spot-check result if triggered, undefined otherwise.
|
|
217
|
+
*/
|
|
218
|
+
async function runAdaptiveSpotCheck(
|
|
219
|
+
sessionId: string,
|
|
220
|
+
thought: string,
|
|
221
|
+
cddResult: ScratchpadResponse["confidence_drift"] | undefined,
|
|
222
|
+
streamContent: MCPContext["streamContent"],
|
|
223
|
+
): Promise<ScratchpadResponse["spot_check_result"] | undefined> {
|
|
224
|
+
// Only trigger if CDD detected unresolved drift
|
|
225
|
+
if (!cddResult?.unresolved) {
|
|
226
|
+
return undefined;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Need a stored question to spot-check against
|
|
230
|
+
const question = SessionManager.getQuestion(sessionId);
|
|
231
|
+
if (!question) {
|
|
232
|
+
return undefined;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Look for answer indicators in the thought
|
|
236
|
+
// Match patterns like "answer is X", "= X", "therefore X", "result: X"
|
|
237
|
+
const answerPatterns = [
|
|
238
|
+
/(?:answer|result|solution|total|sum|value|equals?)\s*(?:is|:|=)\s*([^\s,.]+)/i,
|
|
239
|
+
/(?:therefore|thus|so|hence)\s*[,:]?\s*([^\s,.]+)/i,
|
|
240
|
+
/=\s*([^\s,.=]+)\s*$/m,
|
|
241
|
+
/\*\*([^*]+)\*\*\s*$/m, // Bold answer at end
|
|
242
|
+
];
|
|
243
|
+
|
|
244
|
+
let potentialAnswer: string | undefined;
|
|
245
|
+
for (const pattern of answerPatterns) {
|
|
246
|
+
const match = thought.match(pattern);
|
|
247
|
+
if (match?.[1]) {
|
|
248
|
+
potentialAnswer = match[1].trim();
|
|
249
|
+
break;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// No answer found in thought
|
|
254
|
+
if (!potentialAnswer) {
|
|
255
|
+
return undefined;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Run spot-check
|
|
259
|
+
const result = spotCheck(question, potentialAnswer);
|
|
260
|
+
|
|
261
|
+
// Only report if spot-check failed (found a trap)
|
|
262
|
+
if (result.passed) {
|
|
263
|
+
return undefined;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// Stream warning
|
|
267
|
+
await streamContent({
|
|
268
|
+
type: "text",
|
|
269
|
+
text:
|
|
270
|
+
`\n🔍 **Adaptive Spot-Check** (triggered by ${cddResult.pattern} drift)\n` +
|
|
271
|
+
` ⚠️ ${result.trapType}: ${result.warning}\n` +
|
|
272
|
+
(result.hint ? ` 💡 ${result.hint}\n` : ""),
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
return {
|
|
276
|
+
passed: result.passed,
|
|
277
|
+
trap_type: result.trapType,
|
|
278
|
+
warning: result.warning,
|
|
279
|
+
hint: result.hint,
|
|
280
|
+
confidence: result.confidence,
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Enrich step response with optional fields (verification, compute, compression, etc).
|
|
286
|
+
* Extracted to reduce handleStep complexity.
|
|
287
|
+
*/
|
|
288
|
+
function enrichStepResponse(
|
|
289
|
+
response: ScratchpadResponse,
|
|
290
|
+
params: {
|
|
291
|
+
verificationResult: { passed: boolean; confidence: number } | null;
|
|
292
|
+
domain: string;
|
|
293
|
+
computeResult: { solved: boolean; result?: string | number; method?: string } | null;
|
|
294
|
+
compressionResult: ScratchpadResponse["compression"] | null;
|
|
295
|
+
tokenUsage: { total: number };
|
|
296
|
+
tokenBudget: number;
|
|
297
|
+
budgetExceeded: boolean;
|
|
298
|
+
autoCompressed: boolean;
|
|
299
|
+
augmentationResult: ScratchpadResponse["augmentation"] | null;
|
|
300
|
+
trapAnalysis: ScratchpadResponse["trap_analysis"] | undefined;
|
|
301
|
+
nextStepSuggestion: ScratchpadResponse["next_step_suggestion"] | undefined;
|
|
302
|
+
},
|
|
303
|
+
): void {
|
|
304
|
+
const {
|
|
305
|
+
verificationResult,
|
|
306
|
+
domain,
|
|
307
|
+
computeResult,
|
|
308
|
+
compressionResult,
|
|
309
|
+
tokenUsage,
|
|
310
|
+
tokenBudget,
|
|
311
|
+
budgetExceeded,
|
|
312
|
+
autoCompressed,
|
|
313
|
+
augmentationResult,
|
|
314
|
+
trapAnalysis,
|
|
315
|
+
nextStepSuggestion,
|
|
316
|
+
} = params;
|
|
317
|
+
|
|
318
|
+
// Add verification info
|
|
319
|
+
if (verificationResult) {
|
|
320
|
+
response.verification = {
|
|
321
|
+
passed: verificationResult.passed,
|
|
322
|
+
confidence: verificationResult.confidence,
|
|
323
|
+
domain,
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// Add local compute info
|
|
328
|
+
if (computeResult?.solved && computeResult.result !== undefined) {
|
|
329
|
+
response.local_compute = {
|
|
330
|
+
solved: true,
|
|
331
|
+
result: computeResult.result,
|
|
332
|
+
method: computeResult.method ?? "unknown",
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// Add compression info
|
|
337
|
+
if (compressionResult) {
|
|
338
|
+
response.compression = compressionResult;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
// Add token usage info
|
|
342
|
+
const budgetPercent = tokenBudget > 0 ? (tokenUsage.total / tokenBudget) * 100 : 0;
|
|
343
|
+
response.token_usage = {
|
|
344
|
+
total: tokenUsage.total,
|
|
345
|
+
budget: tokenBudget,
|
|
346
|
+
exceeded: budgetExceeded,
|
|
347
|
+
auto_compressed: autoCompressed,
|
|
348
|
+
budget_percent: Math.round(budgetPercent),
|
|
349
|
+
};
|
|
350
|
+
|
|
351
|
+
// Proactive compression suggestion when approaching budget (>60% consumed)
|
|
352
|
+
if (budgetPercent >= 60 && !autoCompressed && !compressionResult) {
|
|
353
|
+
const urgency = budgetPercent >= 80 ? "⚠️ " : "";
|
|
354
|
+
response.compression_suggestion = {
|
|
355
|
+
should_compress: true,
|
|
356
|
+
current_tokens: tokenUsage.total,
|
|
357
|
+
budget: tokenBudget,
|
|
358
|
+
percent_used: Math.round(budgetPercent),
|
|
359
|
+
nudge: `${urgency}Session at ${Math.round(budgetPercent)}% of token budget (${tokenUsage.total}/${tokenBudget}). Use compress=true on next step to reduce context size.`,
|
|
360
|
+
};
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// Add augmentation info
|
|
364
|
+
if (augmentationResult) {
|
|
365
|
+
response.augmentation = augmentationResult;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
// Add trap analysis info (from priming on first step)
|
|
369
|
+
if (trapAnalysis) {
|
|
370
|
+
response.trap_analysis = trapAnalysis;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
// Add next step suggestion for math domain
|
|
374
|
+
if (nextStepSuggestion) {
|
|
375
|
+
response.next_step_suggestion = nextStepSuggestion;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// ============================================================================
|
|
380
|
+
// HELPER FUNCTIONS
|
|
381
|
+
// ============================================================================
|
|
382
|
+
|
|
383
|
+
/** Build verification failure response with recovery options */
|
|
384
|
+
function buildVerificationFailureResponse(params: {
|
|
385
|
+
sessionId: string;
|
|
386
|
+
branchId: string;
|
|
387
|
+
stepNumber: number;
|
|
388
|
+
threshold: number;
|
|
389
|
+
verificationResult: {
|
|
390
|
+
passed: boolean;
|
|
391
|
+
confidence: number;
|
|
392
|
+
suggestions: string[];
|
|
393
|
+
evidence: string;
|
|
394
|
+
};
|
|
395
|
+
detectedMistakes: DetectedMistake[];
|
|
396
|
+
domain: string;
|
|
397
|
+
}): ScratchpadResponse {
|
|
398
|
+
const {
|
|
399
|
+
sessionId,
|
|
400
|
+
branchId,
|
|
401
|
+
stepNumber,
|
|
402
|
+
threshold,
|
|
403
|
+
verificationResult,
|
|
404
|
+
detectedMistakes,
|
|
405
|
+
domain,
|
|
406
|
+
} = params;
|
|
407
|
+
const confState = calculateConfidence(sessionId, branchId);
|
|
408
|
+
const verificationError = {
|
|
409
|
+
issue: verificationResult.suggestions[0] || "Verification failed",
|
|
410
|
+
evidence: verificationResult.evidence,
|
|
411
|
+
suggestions: verificationResult.suggestions,
|
|
412
|
+
confidence: verificationResult.confidence,
|
|
413
|
+
domain,
|
|
414
|
+
};
|
|
415
|
+
|
|
416
|
+
return {
|
|
417
|
+
session_id: sessionId,
|
|
418
|
+
current_step: stepNumber - 1,
|
|
419
|
+
branch: branchId,
|
|
420
|
+
operation: "step",
|
|
421
|
+
chain_confidence: confState.chainConfidence,
|
|
422
|
+
confidence_threshold: threshold,
|
|
423
|
+
steps_with_confidence: confState.stepsWithConfidence,
|
|
424
|
+
status: "verification_failed",
|
|
425
|
+
suggested_action: "Verification failed. Use revise, branch, or override to continue.",
|
|
426
|
+
verification_failure: {
|
|
427
|
+
issue: verificationError.issue,
|
|
428
|
+
evidence: verificationError.evidence,
|
|
429
|
+
suggestions: verificationError.suggestions,
|
|
430
|
+
confidence: verificationResult.confidence,
|
|
431
|
+
domain,
|
|
432
|
+
detected_mistakes:
|
|
433
|
+
detectedMistakes.length > 0
|
|
434
|
+
? detectedMistakes.map((m) => ({
|
|
435
|
+
type: m.type,
|
|
436
|
+
description: m.explanation,
|
|
437
|
+
fix: m.suggestion,
|
|
438
|
+
corrected_step: m.suggestedFix,
|
|
439
|
+
}))
|
|
440
|
+
: undefined,
|
|
441
|
+
recovery_options: {
|
|
442
|
+
revise: {
|
|
443
|
+
target_step: stepNumber,
|
|
444
|
+
suggested_reason: detectedMistakes[0]
|
|
445
|
+
? `Fix ${detectedMistakes[0].type}: ${detectedMistakes[0].suggestion || detectedMistakes[0].explanation}`
|
|
446
|
+
: verificationError.suggestions[0] || "Fix verification issue",
|
|
447
|
+
},
|
|
448
|
+
branch: {
|
|
449
|
+
from_step: Math.max(1, stepNumber - 1),
|
|
450
|
+
suggested_name: `Alternative after failed step ${stepNumber}`,
|
|
451
|
+
},
|
|
452
|
+
override: {
|
|
453
|
+
flag: "force_continue",
|
|
454
|
+
warning:
|
|
455
|
+
"Only use if you're certain the heuristic is wrong. The step will be stored as-is.",
|
|
456
|
+
},
|
|
457
|
+
},
|
|
458
|
+
},
|
|
459
|
+
};
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
/** Stream verification failure notice with detected mistakes */
|
|
463
|
+
async function streamVerificationFailure(
|
|
464
|
+
streamContent: MCPContext["streamContent"],
|
|
465
|
+
verificationResult: { confidence: number; suggestions: string[]; evidence: string },
|
|
466
|
+
detectedMistakes: DetectedMistake[],
|
|
467
|
+
stepNumber: number,
|
|
468
|
+
): Promise<void> {
|
|
469
|
+
let mistakeText = "";
|
|
470
|
+
if (detectedMistakes.length > 0) {
|
|
471
|
+
mistakeText = "\n**Detected algebraic mistakes:**\n";
|
|
472
|
+
for (const m of detectedMistakes) {
|
|
473
|
+
mistakeText += `• **${m.type}**: ${m.explanation}\n`;
|
|
474
|
+
if (m.suggestedFix) {
|
|
475
|
+
mistakeText += ` **Corrected:** \`${m.suggestedFix}\`\n`;
|
|
476
|
+
} else if (m.suggestion) {
|
|
477
|
+
mistakeText += ` Fix: ${m.suggestion}\n`;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
const issue = verificationResult.suggestions[0] || "Verification failed";
|
|
483
|
+
await streamContent({
|
|
484
|
+
type: "text",
|
|
485
|
+
text:
|
|
486
|
+
`\n⚠️ **VERIFICATION FAILED** (${Math.round(verificationResult.confidence * 100)}% confidence)\n` +
|
|
487
|
+
`**Issue:** ${issue}\n` +
|
|
488
|
+
`**Evidence:** ${verificationResult.evidence}\n` +
|
|
489
|
+
mistakeText +
|
|
490
|
+
`\n**Recovery options:**\n` +
|
|
491
|
+
`1. \`revise\` - Correct this step (target_step: ${stepNumber}, reason: "${verificationResult.suggestions[0] || "fix issue"}")\n` +
|
|
492
|
+
`2. \`branch\` - Try alternative approach (from_step: ${stepNumber - 1})\n` +
|
|
493
|
+
`3. \`override\` - Proceed anyway (acknowledge: true, failed_step: ${stepNumber})\n\n` +
|
|
494
|
+
`**Suggested:** revise\n`,
|
|
495
|
+
});
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
/** Build pending thought record for failed verification */
|
|
499
|
+
function buildPendingRecord(params: {
|
|
500
|
+
sessionId: string;
|
|
501
|
+
branchId: string;
|
|
502
|
+
stepNumber: number;
|
|
503
|
+
thought: string;
|
|
504
|
+
domain: string;
|
|
505
|
+
verificationConfidence: number;
|
|
506
|
+
compressionResult: { original_tokens: number; compressed_tokens: number } | null;
|
|
507
|
+
}): ThoughtRecord {
|
|
508
|
+
const {
|
|
509
|
+
sessionId,
|
|
510
|
+
branchId,
|
|
511
|
+
stepNumber,
|
|
512
|
+
thought,
|
|
513
|
+
domain,
|
|
514
|
+
verificationConfidence,
|
|
515
|
+
compressionResult,
|
|
516
|
+
} = params;
|
|
517
|
+
return {
|
|
518
|
+
id: `${sessionId}:${branchId}:${stepNumber}`,
|
|
519
|
+
step_number: stepNumber,
|
|
520
|
+
thought,
|
|
521
|
+
timestamp: Date.now(),
|
|
522
|
+
branch_id: branchId,
|
|
523
|
+
verification: { passed: false, confidence: verificationConfidence, domain },
|
|
524
|
+
compression: compressionResult
|
|
525
|
+
? {
|
|
526
|
+
input_bytes_saved:
|
|
527
|
+
(compressionResult.original_tokens - compressionResult.compressed_tokens) * 4,
|
|
528
|
+
output_bytes_saved: 0,
|
|
529
|
+
context_bytes_saved: 0,
|
|
530
|
+
original_tokens: compressionResult.original_tokens,
|
|
531
|
+
compressed_tokens: compressionResult.compressed_tokens,
|
|
532
|
+
}
|
|
533
|
+
: undefined,
|
|
534
|
+
};
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
/** Apply augmentation to thought if enabled */
|
|
538
|
+
async function applyAugmentation(
|
|
539
|
+
thought: string,
|
|
540
|
+
context: string | undefined,
|
|
541
|
+
shouldAugment: boolean,
|
|
542
|
+
streamContent: MCPContext["streamContent"],
|
|
543
|
+
): Promise<{
|
|
544
|
+
thought: string;
|
|
545
|
+
result: { applied: boolean; computations: number; filtered: number; domain: string } | null;
|
|
546
|
+
}> {
|
|
547
|
+
if (!shouldAugment) {
|
|
548
|
+
return { thought, result: null };
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
const augResult = contextAwareCompute({ thought, systemPrompt: context });
|
|
552
|
+
if (!augResult.hasComputations) {
|
|
553
|
+
return { thought, result: null };
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
await streamContent({
|
|
557
|
+
type: "text",
|
|
558
|
+
text: `⚡ **Augmented** ${augResult.computations.length} computations (${augResult.domain})\n`,
|
|
559
|
+
});
|
|
560
|
+
|
|
561
|
+
return {
|
|
562
|
+
thought: augResult.augmented,
|
|
563
|
+
result: {
|
|
564
|
+
applied: true,
|
|
565
|
+
computations: augResult.computations.length,
|
|
566
|
+
filtered: augResult.filteredCount,
|
|
567
|
+
domain: augResult.domain,
|
|
568
|
+
},
|
|
569
|
+
};
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
/** Apply compression if needed */
|
|
573
|
+
async function applyCompression(
|
|
574
|
+
thought: string,
|
|
575
|
+
args: { compress?: boolean; compression_query?: string; context?: string },
|
|
576
|
+
budgetExceeded: boolean,
|
|
577
|
+
streamContent: MCPContext["streamContent"],
|
|
578
|
+
): Promise<{
|
|
579
|
+
thought: string;
|
|
580
|
+
result: {
|
|
581
|
+
applied: boolean;
|
|
582
|
+
original_tokens: number;
|
|
583
|
+
compressed_tokens: number;
|
|
584
|
+
ratio: number;
|
|
585
|
+
} | null;
|
|
586
|
+
autoCompressed: boolean;
|
|
587
|
+
}> {
|
|
588
|
+
const shouldCompress =
|
|
589
|
+
args.compress ||
|
|
590
|
+
budgetExceeded ||
|
|
591
|
+
(thought.length > 500 && needsCompression(thought).shouldCompress);
|
|
592
|
+
|
|
593
|
+
if (!shouldCompress) {
|
|
594
|
+
return { thought, result: null, autoCompressed: false };
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
const query = args.compression_query || args.context || "";
|
|
598
|
+
const targetRatio = budgetExceeded ? 0.4 : 0.6;
|
|
599
|
+
const compressOutput = compress(thought, query, { target_ratio: targetRatio });
|
|
600
|
+
const autoCompressed = budgetExceeded && !args.compress;
|
|
601
|
+
|
|
602
|
+
const budgetTag = autoCompressed ? " [budget guard]" : "";
|
|
603
|
+
await streamContent({
|
|
604
|
+
type: "text",
|
|
605
|
+
text: `📦 **Compressed** ${compressOutput.original_tokens}→${compressOutput.compressed_tokens} tokens (${(compressOutput.ratio * 100).toFixed(0)}%)${budgetTag}\n`,
|
|
606
|
+
});
|
|
607
|
+
|
|
608
|
+
return {
|
|
609
|
+
thought: compressOutput.compressed,
|
|
610
|
+
result: {
|
|
611
|
+
applied: true,
|
|
612
|
+
original_tokens: compressOutput.original_tokens,
|
|
613
|
+
compressed_tokens: compressOutput.compressed_tokens,
|
|
614
|
+
ratio: compressOutput.ratio,
|
|
615
|
+
},
|
|
616
|
+
autoCompressed,
|
|
617
|
+
};
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
// ============================================================================
|
|
621
|
+
// HELPER FUNCTIONS
|
|
622
|
+
// ============================================================================
|
|
623
|
+
|
|
624
|
+
/**
|
|
625
|
+
* Handle trap priming for step operation.
|
|
626
|
+
* Stores question in session and runs trap detection on first step.
|
|
627
|
+
* Returns trap analysis if traps detected, undefined otherwise.
|
|
628
|
+
*
|
|
629
|
+
* Uses adaptive maxCombined based on question length:
|
|
630
|
+
* - Short questions (<ADAPTIVE_PRIMING_THRESHOLD chars): maxCombined=2
|
|
631
|
+
* - Longer questions: maxCombined=1 (avoid prompt bloat, multi-trap confusion)
|
|
632
|
+
*/
|
|
633
|
+
async function handleTrapPriming(
|
|
634
|
+
question: string,
|
|
635
|
+
sessionId: string,
|
|
636
|
+
stepNumber: number,
|
|
637
|
+
streamContent: MCPContext["streamContent"],
|
|
638
|
+
): Promise<ScratchpadResponse["trap_analysis"]> {
|
|
639
|
+
// Validate question length (security: prevents memory exhaustion + ReDoS)
|
|
640
|
+
if (question.length > MAX_QUESTION_LENGTH) {
|
|
641
|
+
await streamContent({
|
|
642
|
+
type: "text",
|
|
643
|
+
text: `⚠️ Question too long (${question.length} chars, max ${MAX_QUESTION_LENGTH}). Skipping trap detection.\n\n`,
|
|
644
|
+
});
|
|
645
|
+
return undefined;
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
// Store question in session for later spot-check at complete (first-write-wins)
|
|
649
|
+
SessionManager.setQuestion(sessionId, question);
|
|
650
|
+
|
|
651
|
+
// Warn if question provided late (trap analysis only runs on step 1)
|
|
652
|
+
if (stepNumber !== 1) {
|
|
653
|
+
await streamContent({
|
|
654
|
+
type: "text",
|
|
655
|
+
text: `⚠️ Question provided at step ${stepNumber}. Trap priming only runs on step 1. Stored for spot-check at complete.\n\n`,
|
|
656
|
+
});
|
|
657
|
+
return undefined;
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
// Adaptive maxCombined: short questions can handle more priming context
|
|
661
|
+
const maxCombined = question.length < ADAPTIVE_PRIMING_THRESHOLD ? 2 : 1;
|
|
662
|
+
const primeResult = primeQuestion(question, { maxCombined });
|
|
663
|
+
if (!primeResult.shouldPrime || !primeResult.primingPrompt) return undefined;
|
|
664
|
+
|
|
665
|
+
await streamContent({
|
|
666
|
+
type: "text",
|
|
667
|
+
text: `💡 **Trap Analysis:** ${primeResult.primingPrompt}\n\n`,
|
|
668
|
+
});
|
|
669
|
+
|
|
670
|
+
return {
|
|
671
|
+
detected: true,
|
|
672
|
+
types: primeResult.trapTypes,
|
|
673
|
+
primed_count: primeResult.primedTypes.length,
|
|
674
|
+
note: primeResult.primingPrompt,
|
|
675
|
+
confidence: primeResult.confidence,
|
|
676
|
+
};
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
/**
|
|
680
|
+
* Run consistency check every N steps to detect contradictions.
|
|
681
|
+
* Returns consistency_warning if contradictions found, undefined otherwise.
|
|
682
|
+
*/
|
|
683
|
+
async function runConsistencyCheck(
|
|
684
|
+
sessionId: string,
|
|
685
|
+
branchId: string,
|
|
686
|
+
stepNumber: number,
|
|
687
|
+
currentThought: string,
|
|
688
|
+
streamContent: MCPContext["streamContent"],
|
|
689
|
+
): Promise<ScratchpadResponse["consistency_warning"]> {
|
|
690
|
+
// Only check every 3 steps, and only if we have prior steps
|
|
691
|
+
if (stepNumber < 3 || stepNumber % 3 !== 0) {
|
|
692
|
+
return undefined;
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
const thoughts = SessionManager.getThoughts(sessionId, branchId);
|
|
696
|
+
const stepData = thoughts.map((t) => ({ step: t.step_number, thought: t.thought }));
|
|
697
|
+
const contradictions = checkStepConsistency(
|
|
698
|
+
{ step: stepNumber, thought: currentThought },
|
|
699
|
+
stepData.slice(0, -1), // Exclude current step (already in thoughts)
|
|
700
|
+
);
|
|
701
|
+
|
|
702
|
+
if (contradictions.length === 0) {
|
|
703
|
+
return undefined;
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
await streamContent({
|
|
707
|
+
type: "text",
|
|
708
|
+
text:
|
|
709
|
+
`\n⚠️ **Consistency Warning:** ${contradictions.length} contradiction(s) detected\n` +
|
|
710
|
+
contradictions.map((c) => ` - ${c.description}`).join("\n") +
|
|
711
|
+
"\n",
|
|
712
|
+
});
|
|
713
|
+
|
|
714
|
+
return {
|
|
715
|
+
has_contradictions: true,
|
|
716
|
+
count: contradictions.length,
|
|
717
|
+
contradictions: contradictions.map((c) => ({
|
|
718
|
+
type: c.type,
|
|
719
|
+
description: c.description,
|
|
720
|
+
subject: c.subject,
|
|
721
|
+
original_step: c.original_step,
|
|
722
|
+
conflicting_step: c.conflicting_step,
|
|
723
|
+
confidence: c.confidence,
|
|
724
|
+
})),
|
|
725
|
+
nudge: `⚠️ Found ${contradictions.length} potential contradiction(s). Review steps ${contradictions.map((c) => c.original_step).join(", ")} for consistency.`,
|
|
726
|
+
};
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
/**
|
|
730
|
+
* Run hypothesis resolution check for branch steps.
|
|
731
|
+
* Returns hypothesis_resolution and optional merge_suggestion if confirmed.
|
|
732
|
+
*/
|
|
733
|
+
async function runHypothesisResolution(
|
|
734
|
+
sessionId: string,
|
|
735
|
+
branchId: string,
|
|
736
|
+
stepNumber: number,
|
|
737
|
+
currentThought: string,
|
|
738
|
+
streamContent: MCPContext["streamContent"],
|
|
739
|
+
): Promise<{
|
|
740
|
+
resolution?: ScratchpadResponse["hypothesis_resolution"];
|
|
741
|
+
mergeSuggestion?: ScratchpadResponse["merge_suggestion"];
|
|
742
|
+
}> {
|
|
743
|
+
const session = SessionManager.get(sessionId);
|
|
744
|
+
if (!session) {
|
|
745
|
+
return {};
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
// Check all branches with hypotheses
|
|
749
|
+
for (const branch of session.branches.values()) {
|
|
750
|
+
if (!branch.hypothesis || branch.id === "main") {
|
|
751
|
+
continue;
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
// Only check if the current step is on this branch
|
|
755
|
+
if (branchId !== branch.id) {
|
|
756
|
+
continue;
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
const resolution = analyzeStepForResolution(
|
|
760
|
+
currentThought,
|
|
761
|
+
branch.hypothesis,
|
|
762
|
+
branch.success_criteria ?? null,
|
|
763
|
+
stepNumber,
|
|
764
|
+
);
|
|
765
|
+
|
|
766
|
+
if (!resolution.resolved && resolution.confidence <= 0.5) {
|
|
767
|
+
continue;
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
// Stream resolution status
|
|
771
|
+
if (resolution.resolved) {
|
|
772
|
+
const emoji =
|
|
773
|
+
resolution.outcome === "confirmed" ? "✅" : resolution.outcome === "refuted" ? "❌" : "❓";
|
|
774
|
+
await streamContent({
|
|
775
|
+
type: "text",
|
|
776
|
+
text:
|
|
777
|
+
`\n${emoji} **Hypothesis ${resolution.outcome?.toUpperCase()}:** "${branch.hypothesis.slice(0, 60)}${branch.hypothesis.length > 60 ? "..." : ""}"\n` +
|
|
778
|
+
` Evidence: ${resolution.evidence}\n` +
|
|
779
|
+
` ${resolution.suggestion}\n`,
|
|
780
|
+
});
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
// Build merge suggestion if hypothesis confirmed
|
|
784
|
+
let mergeSuggestion: ScratchpadResponse["merge_suggestion"];
|
|
785
|
+
if (resolution.outcome === "confirmed") {
|
|
786
|
+
mergeSuggestion = {
|
|
787
|
+
should_merge: true,
|
|
788
|
+
from_branch: branch.id,
|
|
789
|
+
confirmed_hypothesis: branch.hypothesis,
|
|
790
|
+
key_findings: resolution.evidence || currentThought.slice(0, 100),
|
|
791
|
+
nudge: `💡 Hypothesis confirmed! Consider incorporating findings from branch "${branch.name || branch.id}" into your main reasoning.`,
|
|
792
|
+
};
|
|
793
|
+
|
|
794
|
+
await streamContent({
|
|
795
|
+
type: "text",
|
|
796
|
+
text: `\n${mergeSuggestion.nudge}\n`,
|
|
797
|
+
});
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
return { resolution, mergeSuggestion };
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
return {};
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
/**
|
|
807
|
+
* Check if reasoning should be challenged and build suggestion.
|
|
808
|
+
* Returns challenge_suggestion if overconfidence detected, undefined otherwise.
|
|
809
|
+
*/
|
|
810
|
+
async function runAutoChallenge(
|
|
811
|
+
chainConfidence: number,
|
|
812
|
+
stepCount: number,
|
|
813
|
+
hasVerification: boolean,
|
|
814
|
+
streamContent: MCPContext["streamContent"],
|
|
815
|
+
): Promise<ScratchpadResponse["challenge_suggestion"]> {
|
|
816
|
+
if (!shouldChallenge(chainConfidence, stepCount, hasVerification)) {
|
|
817
|
+
return undefined;
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
// Determine reason for challenge suggestion
|
|
821
|
+
let reason: string;
|
|
822
|
+
let suggestedType: ScratchpadResponse["challenge_suggestion"] extends
|
|
823
|
+
| { suggested_type: infer T }
|
|
824
|
+
| undefined
|
|
825
|
+
? T
|
|
826
|
+
: never;
|
|
827
|
+
|
|
828
|
+
if (chainConfidence > 0.95) {
|
|
829
|
+
reason = `Very high confidence (${(chainConfidence * 100).toFixed(0)}%) warrants adversarial review`;
|
|
830
|
+
suggestedType = "all";
|
|
831
|
+
} else if (stepCount < 3 && !hasVerification) {
|
|
832
|
+
reason = `High confidence (${(chainConfidence * 100).toFixed(0)}%) with only ${stepCount} step(s) and no verification`;
|
|
833
|
+
suggestedType = "premise_check";
|
|
834
|
+
} else {
|
|
835
|
+
reason = `Confidence pattern suggests potential overconfidence`;
|
|
836
|
+
suggestedType = "assumption_inversion";
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
const nudge = `🎯 Consider using \`challenge\` operation: ${reason}`;
|
|
840
|
+
|
|
841
|
+
await streamContent({
|
|
842
|
+
type: "text",
|
|
843
|
+
text: `\n${nudge}\n`,
|
|
844
|
+
});
|
|
845
|
+
|
|
846
|
+
return {
|
|
847
|
+
should_challenge: true,
|
|
848
|
+
reason,
|
|
849
|
+
suggested_type: suggestedType,
|
|
850
|
+
nudge,
|
|
851
|
+
};
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
/**
|
|
855
|
+
* Calculate stepping guidance based on question complexity.
|
|
856
|
+
* Only runs on step 1 when a question is provided.
|
|
857
|
+
*/
|
|
858
|
+
async function calculateSteppingGuidance(
|
|
859
|
+
question: string | undefined,
|
|
860
|
+
stepNumber: number,
|
|
861
|
+
streamContent: MCPContext["streamContent"],
|
|
862
|
+
): Promise<ScratchpadResponse["stepping_guidance"]> {
|
|
863
|
+
if (!question || stepNumber !== 1) {
|
|
864
|
+
return undefined;
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
const complexity = assessPromptComplexity(question);
|
|
868
|
+
const recommendedSteps = getRecommendedSteps(complexity.tier);
|
|
869
|
+
const guidance: ScratchpadResponse["stepping_guidance"] = {
|
|
870
|
+
complexity_tier: complexity.tier,
|
|
871
|
+
recommended_steps: recommendedSteps,
|
|
872
|
+
current_steps: 1,
|
|
873
|
+
needs_more_steps: recommendedSteps > 1,
|
|
874
|
+
nudge:
|
|
875
|
+
recommendedSteps > 2
|
|
876
|
+
? `⚠️ This is a ${complexity.tier} complexity question. Take ${recommendedSteps}+ reasoning steps before concluding.`
|
|
877
|
+
: null,
|
|
878
|
+
};
|
|
879
|
+
|
|
880
|
+
if (guidance.nudge) {
|
|
881
|
+
await streamContent({
|
|
882
|
+
type: "text",
|
|
883
|
+
text: `${guidance.nudge}\n\n`,
|
|
884
|
+
});
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
return guidance;
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
/**
|
|
891
|
+
* Run verification on thought and return failure response if verification fails.
|
|
892
|
+
* Returns null if verification passes or is not required.
|
|
893
|
+
*/
|
|
894
|
+
async function runVerificationCheck(
|
|
895
|
+
args: ScratchpadArgs,
|
|
896
|
+
sessionId: string,
|
|
897
|
+
branchId: string,
|
|
898
|
+
stepNumber: number,
|
|
899
|
+
thought: string,
|
|
900
|
+
domain: "math" | "logic" | "code" | "general",
|
|
901
|
+
threshold: number,
|
|
902
|
+
compressionResult: {
|
|
903
|
+
applied: boolean;
|
|
904
|
+
original_tokens: number;
|
|
905
|
+
compressed_tokens: number;
|
|
906
|
+
ratio: number;
|
|
907
|
+
} | null,
|
|
908
|
+
streamContent: MCPContext["streamContent"],
|
|
909
|
+
): Promise<
|
|
910
|
+
| {
|
|
911
|
+
passed: true;
|
|
912
|
+
result: ReturnType<typeof verify> | null;
|
|
913
|
+
}
|
|
914
|
+
| {
|
|
915
|
+
passed: false;
|
|
916
|
+
response: ScratchpadResponse;
|
|
917
|
+
}
|
|
918
|
+
> {
|
|
919
|
+
// Run verification if requested OR auto-enabled for longer chains
|
|
920
|
+
// Auto-verify when: chain has >3 steps AND verify wasn't explicitly set to false
|
|
921
|
+
const priorThoughts = SessionManager.getThoughts(sessionId, branchId);
|
|
922
|
+
const shouldAutoVerify = priorThoughts.length >= 3 && args.verify !== false;
|
|
923
|
+
const shouldVerify = args.verify === true || shouldAutoVerify;
|
|
924
|
+
|
|
925
|
+
if (!shouldVerify) {
|
|
926
|
+
return { passed: true, result: null };
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
const autoVerifyEnabled = shouldAutoVerify && args.verify !== true;
|
|
930
|
+
const contextStrings = priorThoughts.map((t) => t.thought);
|
|
931
|
+
const verificationResult = verify(thought, domain, contextStrings, true);
|
|
932
|
+
|
|
933
|
+
// Note auto-verification in stream if it was triggered
|
|
934
|
+
if (autoVerifyEnabled) {
|
|
935
|
+
await streamContent({
|
|
936
|
+
type: "text",
|
|
937
|
+
text: `🔍 **Auto-verification enabled** (chain length: ${priorThoughts.length + 1} steps)\n`,
|
|
938
|
+
});
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
// HALT ON VERIFICATION FAILURE
|
|
942
|
+
if (!verificationResult.passed) {
|
|
943
|
+
const mistakeResult = domain === "math" ? detectCommonMistakesFromText(thought) : null;
|
|
944
|
+
const detectedMistakes = mistakeResult?.mistakes ?? [];
|
|
945
|
+
|
|
946
|
+
// Build and store pending record
|
|
947
|
+
const pendingRecord = buildPendingRecord({
|
|
948
|
+
sessionId,
|
|
949
|
+
branchId,
|
|
950
|
+
stepNumber,
|
|
951
|
+
thought,
|
|
952
|
+
domain,
|
|
953
|
+
verificationConfidence: verificationResult.confidence,
|
|
954
|
+
compressionResult,
|
|
955
|
+
});
|
|
956
|
+
const verificationError = {
|
|
957
|
+
issue: verificationResult.suggestions[0] || "Verification failed",
|
|
958
|
+
evidence: verificationResult.evidence,
|
|
959
|
+
suggestions: verificationResult.suggestions,
|
|
960
|
+
confidence: verificationResult.confidence,
|
|
961
|
+
domain,
|
|
962
|
+
};
|
|
963
|
+
SessionManager.setPendingThought(sessionId, pendingRecord, verificationError);
|
|
964
|
+
|
|
965
|
+
// Stream failure and return response
|
|
966
|
+
await streamVerificationFailure(
|
|
967
|
+
streamContent,
|
|
968
|
+
verificationResult,
|
|
969
|
+
detectedMistakes,
|
|
970
|
+
stepNumber,
|
|
971
|
+
);
|
|
972
|
+
return {
|
|
973
|
+
passed: false,
|
|
974
|
+
response: buildVerificationFailureResponse({
|
|
975
|
+
sessionId,
|
|
976
|
+
branchId,
|
|
977
|
+
stepNumber,
|
|
978
|
+
threshold,
|
|
979
|
+
verificationResult,
|
|
980
|
+
detectedMistakes,
|
|
981
|
+
domain,
|
|
982
|
+
}),
|
|
983
|
+
};
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
return { passed: true, result: verificationResult };
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
// ============================================================================
|
|
990
|
+
// OPERATION HANDLERS
|
|
991
|
+
// ============================================================================
|
|
992
|
+
|
|
993
|
+
/** Handle step operation - add a new thought */
|
|
994
|
+
async function handleStep(args: ScratchpadArgs, ctx: MCPContext): Promise<ScratchpadResponse> {
|
|
995
|
+
const { streamContent } = ctx;
|
|
996
|
+
|
|
997
|
+
// Runtime validation: thought is required for step operation
|
|
998
|
+
if (!args.thought) {
|
|
999
|
+
throw new Error("thought is required for step operation");
|
|
1000
|
+
}
|
|
1001
|
+
const thought = args.thought;
|
|
1002
|
+
|
|
1003
|
+
const sessionId = args.session_id || `s_${crypto.randomUUID()}`;
|
|
1004
|
+
const branchId = "main"; // Default branch for step operation
|
|
1005
|
+
const threshold = args.confidence_threshold ?? 0.8;
|
|
1006
|
+
const tokenBudget = args.token_budget ?? 3000;
|
|
1007
|
+
|
|
1008
|
+
// S3: Check max_step_tokens limit before any processing
|
|
1009
|
+
const maxStepTokens = args.max_step_tokens;
|
|
1010
|
+
if (maxStepTokens !== undefined && !args.force_large) {
|
|
1011
|
+
// Estimate tokens: ~4 chars per token
|
|
1012
|
+
const estimatedTokens = Math.ceil(thought.length / 4);
|
|
1013
|
+
if (estimatedTokens > maxStepTokens) {
|
|
1014
|
+
throw new Error(
|
|
1015
|
+
`Step exceeds max_step_tokens limit: ${estimatedTokens} > ${maxStepTokens}. ` +
|
|
1016
|
+
`Split into smaller steps or use force_large=true to override.`,
|
|
1017
|
+
);
|
|
1018
|
+
}
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
// Auto-increment step number
|
|
1022
|
+
const stepNumber = SessionManager.getNextStep(sessionId, branchId);
|
|
1023
|
+
|
|
1024
|
+
// Handle trap priming if question provided
|
|
1025
|
+
const trapAnalysis = args.question
|
|
1026
|
+
? await handleTrapPriming(args.question, sessionId, stepNumber, streamContent)
|
|
1027
|
+
: undefined;
|
|
1028
|
+
|
|
1029
|
+
// Proactive stepping guidance: assess complexity on first step when question provided
|
|
1030
|
+
const steppingGuidance = await calculateSteppingGuidance(
|
|
1031
|
+
args.question,
|
|
1032
|
+
stepNumber,
|
|
1033
|
+
streamContent,
|
|
1034
|
+
);
|
|
1035
|
+
|
|
1036
|
+
// Strip markdown and detect domain
|
|
1037
|
+
let strippedThought = stripMarkdown(thought);
|
|
1038
|
+
const domain = args.domain || detectDomain(strippedThought);
|
|
1039
|
+
|
|
1040
|
+
// Pre-compute next step suggestion for math domain (before augmentation modifies text)
|
|
1041
|
+
let nextStepSuggestion: ScratchpadResponse["next_step_suggestion"];
|
|
1042
|
+
if (domain === "math") {
|
|
1043
|
+
const suggestion = suggestNextStepFromText(strippedThought);
|
|
1044
|
+
if (suggestion) {
|
|
1045
|
+
nextStepSuggestion = suggestion;
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
// Try local compute FIRST if requested (before augmentation modifies the text)
|
|
1050
|
+
let computeResult = null;
|
|
1051
|
+
if (args.local_compute && isLikelyComputable(strippedThought)) {
|
|
1052
|
+
computeResult = tryLocalCompute(strippedThought);
|
|
1053
|
+
if (computeResult?.solved) {
|
|
1054
|
+
await streamContent({
|
|
1055
|
+
type: "text",
|
|
1056
|
+
text: `⚡ **Local Compute** (${computeResult.method})\n**Result:** ${computeResult.result}\n\n`,
|
|
1057
|
+
});
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
// S2: Run augment_compute (default: true) - inject computed values into thought
|
|
1062
|
+
const shouldAugment = args.augment_compute !== false;
|
|
1063
|
+
const augmentation = await applyAugmentation(
|
|
1064
|
+
strippedThought,
|
|
1065
|
+
args.context,
|
|
1066
|
+
shouldAugment,
|
|
1067
|
+
streamContent,
|
|
1068
|
+
);
|
|
1069
|
+
strippedThought = augmentation.thought;
|
|
1070
|
+
const augmentationResult = augmentation.result;
|
|
1071
|
+
|
|
1072
|
+
// S1: Token budget guard - check if session exceeds budget
|
|
1073
|
+
const tokenUsage = SessionManager.getTokenUsage(sessionId);
|
|
1074
|
+
const budgetExceeded = tokenUsage.total >= tokenBudget;
|
|
1075
|
+
|
|
1076
|
+
// Compression - check if requested, auto-detect, OR budget exceeded
|
|
1077
|
+
const compression = await applyCompression(strippedThought, args, budgetExceeded, streamContent);
|
|
1078
|
+
strippedThought = compression.thought;
|
|
1079
|
+
const compressionResult = compression.result;
|
|
1080
|
+
const autoCompressed = compression.autoCompressed;
|
|
1081
|
+
|
|
1082
|
+
// Run verification (extracted to helper to reduce complexity)
|
|
1083
|
+
const verificationCheck = await runVerificationCheck(
|
|
1084
|
+
args,
|
|
1085
|
+
sessionId,
|
|
1086
|
+
branchId,
|
|
1087
|
+
stepNumber,
|
|
1088
|
+
strippedThought,
|
|
1089
|
+
domain,
|
|
1090
|
+
threshold,
|
|
1091
|
+
compressionResult,
|
|
1092
|
+
streamContent,
|
|
1093
|
+
);
|
|
1094
|
+
if (!verificationCheck.passed) {
|
|
1095
|
+
return verificationCheck.response;
|
|
1096
|
+
}
|
|
1097
|
+
const verificationResult = verificationCheck.result;
|
|
1098
|
+
|
|
1099
|
+
// Stream the thought (only if verification passed or wasn't requested)
|
|
1100
|
+
await streamContent({
|
|
1101
|
+
type: "text",
|
|
1102
|
+
text: `**Step ${stepNumber}** [${args.purpose}]\n${strippedThought}\n`,
|
|
1103
|
+
});
|
|
1104
|
+
if (args.preconditions?.length) {
|
|
1105
|
+
await streamContent({
|
|
1106
|
+
type: "text",
|
|
1107
|
+
text: `📋 **Preconditions:** ${args.preconditions.join(", ")}\n`,
|
|
1108
|
+
});
|
|
1109
|
+
}
|
|
1110
|
+
if (args.outcome) {
|
|
1111
|
+
await streamContent({ type: "text", text: `**Outcome:** ${args.outcome}\n` });
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
// Build thought record
|
|
1115
|
+
const record: ThoughtRecord = {
|
|
1116
|
+
id: `${sessionId}:${branchId}:${stepNumber}`,
|
|
1117
|
+
step_number: stepNumber,
|
|
1118
|
+
thought: strippedThought,
|
|
1119
|
+
timestamp: Date.now(),
|
|
1120
|
+
branch_id: branchId,
|
|
1121
|
+
// Store preconditions if provided
|
|
1122
|
+
preconditions: args.preconditions,
|
|
1123
|
+
verification: verificationResult
|
|
1124
|
+
? {
|
|
1125
|
+
passed: verificationResult.passed,
|
|
1126
|
+
confidence: args.confidence ?? verificationResult.confidence,
|
|
1127
|
+
domain,
|
|
1128
|
+
}
|
|
1129
|
+
: args.confidence !== undefined
|
|
1130
|
+
? {
|
|
1131
|
+
passed: true, // Assume passed if confidence provided manually
|
|
1132
|
+
confidence: args.confidence,
|
|
1133
|
+
domain,
|
|
1134
|
+
}
|
|
1135
|
+
: undefined,
|
|
1136
|
+
// Track compression stats if compression was applied
|
|
1137
|
+
compression: compressionResult
|
|
1138
|
+
? {
|
|
1139
|
+
input_bytes_saved:
|
|
1140
|
+
(compressionResult.original_tokens - compressionResult.compressed_tokens) * 4,
|
|
1141
|
+
output_bytes_saved: 0,
|
|
1142
|
+
context_bytes_saved: 0,
|
|
1143
|
+
original_tokens: compressionResult.original_tokens,
|
|
1144
|
+
compressed_tokens: compressionResult.compressed_tokens,
|
|
1145
|
+
}
|
|
1146
|
+
: undefined,
|
|
1147
|
+
};
|
|
1148
|
+
|
|
1149
|
+
// Store thought
|
|
1150
|
+
const storeResult = SessionManager.addThought(sessionId, record);
|
|
1151
|
+
if (!storeResult.success) {
|
|
1152
|
+
throw new Error(storeResult.error || "Failed to store thought");
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
// Calculate confidence
|
|
1156
|
+
const confState = calculateConfidence(sessionId, branchId, args.confidence);
|
|
1157
|
+
const status = determineStatus(confState.chainConfidence, threshold, false);
|
|
1158
|
+
const suggestedAction = getSuggestedAction(status, confState.chainConfidence);
|
|
1159
|
+
|
|
1160
|
+
// Build response
|
|
1161
|
+
const response: ScratchpadResponse = {
|
|
1162
|
+
session_id: sessionId,
|
|
1163
|
+
current_step: stepNumber,
|
|
1164
|
+
branch: branchId,
|
|
1165
|
+
operation: "step",
|
|
1166
|
+
step_confidence: confState.stepConfidence,
|
|
1167
|
+
chain_confidence: confState.chainConfidence,
|
|
1168
|
+
confidence_threshold: threshold,
|
|
1169
|
+
steps_with_confidence: confState.stepsWithConfidence,
|
|
1170
|
+
status,
|
|
1171
|
+
suggested_action: suggestedAction,
|
|
1172
|
+
};
|
|
1173
|
+
|
|
1174
|
+
// Add 5-second warning if threshold reached
|
|
1175
|
+
if (status === "threshold_reached") {
|
|
1176
|
+
response.auto_complete_warning =
|
|
1177
|
+
"⏱️ Confidence threshold reached. You have 5 seconds to continue or call complete. " +
|
|
1178
|
+
"After 5s, the chain will auto-complete if no further action is taken.";
|
|
1179
|
+
await streamContent({
|
|
1180
|
+
type: "text",
|
|
1181
|
+
text:
|
|
1182
|
+
`\n⚠️ **THRESHOLD REACHED** (${(confState.chainConfidence * 100).toFixed(0)}% ≥ ${threshold * 100}%)\n` +
|
|
1183
|
+
"Call `complete` operation or continue reasoning within 5 seconds.\n",
|
|
1184
|
+
});
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
// Enrich response with optional fields (extracted to reduce complexity)
|
|
1188
|
+
const updatedTokenUsage = SessionManager.getTokenUsage(sessionId);
|
|
1189
|
+
enrichStepResponse(response, {
|
|
1190
|
+
verificationResult,
|
|
1191
|
+
domain,
|
|
1192
|
+
computeResult,
|
|
1193
|
+
compressionResult,
|
|
1194
|
+
tokenUsage: updatedTokenUsage,
|
|
1195
|
+
tokenBudget,
|
|
1196
|
+
budgetExceeded,
|
|
1197
|
+
autoCompressed,
|
|
1198
|
+
augmentationResult,
|
|
1199
|
+
trapAnalysis,
|
|
1200
|
+
nextStepSuggestion,
|
|
1201
|
+
});
|
|
1202
|
+
|
|
1203
|
+
// Stream next step suggestion if available
|
|
1204
|
+
if (nextStepSuggestion?.hasSuggestion) {
|
|
1205
|
+
await streamContent({
|
|
1206
|
+
type: "text",
|
|
1207
|
+
text: `💡 **Next step:** ${nextStepSuggestion.description}\n`,
|
|
1208
|
+
});
|
|
1209
|
+
}
|
|
1210
|
+
|
|
1211
|
+
// Add stepping guidance if available (from first step complexity assessment)
|
|
1212
|
+
if (steppingGuidance) {
|
|
1213
|
+
response.stepping_guidance = steppingGuidance;
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
// Stream compression suggestion if present
|
|
1217
|
+
if (response.compression_suggestion) {
|
|
1218
|
+
await streamContent({
|
|
1219
|
+
type: "text",
|
|
1220
|
+
text: `📦 ${response.compression_suggestion.nudge}\n`,
|
|
1221
|
+
});
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
// S3: Step-level Confidence Drift Detection (CDD)
|
|
1225
|
+
// Extracted to helper function to reduce cyclomatic complexity
|
|
1226
|
+
const cddResult = await runStepLevelCDD(sessionId, branchId, streamContent);
|
|
1227
|
+
if (cddResult) {
|
|
1228
|
+
response.confidence_drift = cddResult;
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
// Adaptive spot-check: Auto-trigger when CDD detects unresolved drift
|
|
1232
|
+
// This catches trap patterns early, before complete() is called
|
|
1233
|
+
const adaptiveSpotCheck = await runAdaptiveSpotCheck(
|
|
1234
|
+
sessionId,
|
|
1235
|
+
strippedThought,
|
|
1236
|
+
cddResult,
|
|
1237
|
+
streamContent,
|
|
1238
|
+
);
|
|
1239
|
+
if (adaptiveSpotCheck) {
|
|
1240
|
+
response.spot_check_result = adaptiveSpotCheck;
|
|
1241
|
+
// Upgrade status to "review" if spot-check found a trap
|
|
1242
|
+
if (!adaptiveSpotCheck.passed) {
|
|
1243
|
+
response.status = "review";
|
|
1244
|
+
response.suggested_action = `Potential ${adaptiveSpotCheck.trap_type} trap detected. ${adaptiveSpotCheck.hint || "Reconsider your approach."}`;
|
|
1245
|
+
}
|
|
1246
|
+
}
|
|
1247
|
+
|
|
1248
|
+
// Consistency check: Run every 3 steps to catch contradictions early
|
|
1249
|
+
const consistencyWarning = await runConsistencyCheck(
|
|
1250
|
+
sessionId,
|
|
1251
|
+
branchId,
|
|
1252
|
+
stepNumber,
|
|
1253
|
+
strippedThought,
|
|
1254
|
+
streamContent,
|
|
1255
|
+
);
|
|
1256
|
+
if (consistencyWarning) {
|
|
1257
|
+
response.consistency_warning = consistencyWarning;
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
// Hypothesis resolution: Check if branch hypothesis has been resolved
|
|
1261
|
+
const { resolution, mergeSuggestion } = await runHypothesisResolution(
|
|
1262
|
+
sessionId,
|
|
1263
|
+
branchId,
|
|
1264
|
+
stepNumber,
|
|
1265
|
+
strippedThought,
|
|
1266
|
+
streamContent,
|
|
1267
|
+
);
|
|
1268
|
+
if (resolution) {
|
|
1269
|
+
response.hypothesis_resolution = resolution;
|
|
1270
|
+
}
|
|
1271
|
+
if (mergeSuggestion) {
|
|
1272
|
+
response.merge_suggestion = mergeSuggestion;
|
|
1273
|
+
}
|
|
1274
|
+
|
|
1275
|
+
// Auto-challenge: Suggest adversarial review on overconfidence
|
|
1276
|
+
const hasVerification = !!verificationResult?.passed;
|
|
1277
|
+
const challengeSuggestion = await runAutoChallenge(
|
|
1278
|
+
confState.chainConfidence,
|
|
1279
|
+
stepNumber,
|
|
1280
|
+
hasVerification,
|
|
1281
|
+
streamContent,
|
|
1282
|
+
);
|
|
1283
|
+
if (challengeSuggestion) {
|
|
1284
|
+
response.challenge_suggestion = challengeSuggestion;
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
return response;
|
|
1288
|
+
}
|
|
1289
|
+
|
|
1290
|
+
/** Handle navigate operation - view history/branches/steps/paths */
|
|
1291
|
+
async function handleNavigate(args: ScratchpadArgs, _ctx: MCPContext): Promise<ScratchpadResponse> {
|
|
1292
|
+
const sessionId = args.session_id;
|
|
1293
|
+
if (!sessionId) {
|
|
1294
|
+
throw new Error("session_id required for navigate operation");
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
const session = SessionManager.get(sessionId);
|
|
1298
|
+
if (!session) {
|
|
1299
|
+
throw new Error(`Session not found: ${sessionId}`);
|
|
1300
|
+
}
|
|
1301
|
+
|
|
1302
|
+
const threshold = args.confidence_threshold ?? 0.8;
|
|
1303
|
+
const branchId = args.branch_id || "main";
|
|
1304
|
+
const confState = calculateConfidence(sessionId, branchId);
|
|
1305
|
+
const status = determineStatus(confState.chainConfidence, threshold, false);
|
|
1306
|
+
|
|
1307
|
+
const response: ScratchpadResponse = {
|
|
1308
|
+
session_id: sessionId,
|
|
1309
|
+
current_step: SessionManager.getCurrentStep(sessionId, branchId),
|
|
1310
|
+
branch: branchId,
|
|
1311
|
+
operation: "navigate",
|
|
1312
|
+
chain_confidence: confState.chainConfidence,
|
|
1313
|
+
confidence_threshold: threshold,
|
|
1314
|
+
steps_with_confidence: confState.stepsWithConfidence,
|
|
1315
|
+
status,
|
|
1316
|
+
suggested_action: getSuggestedAction(status, confState.chainConfidence),
|
|
1317
|
+
};
|
|
1318
|
+
|
|
1319
|
+
switch (args.view) {
|
|
1320
|
+
case "history": {
|
|
1321
|
+
const thoughts = SessionManager.getThoughts(sessionId, args.branch_id);
|
|
1322
|
+
const limited = thoughts.slice(-(args.limit || 10));
|
|
1323
|
+
response.history = limited.map((t) => ({
|
|
1324
|
+
step: t.step_number,
|
|
1325
|
+
branch: t.branch_id,
|
|
1326
|
+
purpose: "analysis", // Default since we don't store purpose currently
|
|
1327
|
+
thought_preview: t.thought.slice(0, 80) + (t.thought.length > 80 ? "..." : ""),
|
|
1328
|
+
confidence: t.verification?.confidence,
|
|
1329
|
+
revised_by: t.revised_by,
|
|
1330
|
+
}));
|
|
1331
|
+
break;
|
|
1332
|
+
}
|
|
1333
|
+
|
|
1334
|
+
case "branches": {
|
|
1335
|
+
const branches = SessionManager.getBranches(sessionId);
|
|
1336
|
+
response.branches = branches.map((b) => ({
|
|
1337
|
+
id: b.id,
|
|
1338
|
+
name: b.name,
|
|
1339
|
+
from_step: b.from_step,
|
|
1340
|
+
depth: b.depth,
|
|
1341
|
+
hypothesis: b.hypothesis,
|
|
1342
|
+
success_criteria: b.success_criteria,
|
|
1343
|
+
}));
|
|
1344
|
+
break;
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
case "step": {
|
|
1348
|
+
if (!args.step_id) {
|
|
1349
|
+
throw new Error("step_id required for step view");
|
|
1350
|
+
}
|
|
1351
|
+
const step = SessionManager.getStep(sessionId, args.step_id);
|
|
1352
|
+
if (!step) {
|
|
1353
|
+
throw new Error(`Step not found: ${args.step_id}`);
|
|
1354
|
+
}
|
|
1355
|
+
response.step_detail = {
|
|
1356
|
+
step: step.step_number,
|
|
1357
|
+
branch: step.branch_id,
|
|
1358
|
+
purpose: "analysis",
|
|
1359
|
+
thought: step.thought,
|
|
1360
|
+
outcome: undefined, // Not stored currently
|
|
1361
|
+
confidence: step.verification?.confidence,
|
|
1362
|
+
revises_step: step.revises_step,
|
|
1363
|
+
revised_by: step.revised_by,
|
|
1364
|
+
preconditions: step.preconditions,
|
|
1365
|
+
hypothesis: step.hypothesis,
|
|
1366
|
+
success_criteria: step.success_criteria,
|
|
1367
|
+
};
|
|
1368
|
+
break;
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1371
|
+
case "path": {
|
|
1372
|
+
if (!args.step_id) {
|
|
1373
|
+
throw new Error("step_id required for path view");
|
|
1374
|
+
}
|
|
1375
|
+
const path = SessionManager.getPath(sessionId, args.step_id);
|
|
1376
|
+
response.path = path.map((t) => ({
|
|
1377
|
+
step: t.step_number,
|
|
1378
|
+
branch: t.branch_id,
|
|
1379
|
+
thought_preview: t.thought.slice(0, 60) + (t.thought.length > 60 ? "..." : ""),
|
|
1380
|
+
}));
|
|
1381
|
+
break;
|
|
1382
|
+
}
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
return response;
|
|
1386
|
+
}
|
|
1387
|
+
|
|
1388
|
+
/** Handle branch operation - start alternative reasoning path */
|
|
1389
|
+
async function handleBranch(args: ScratchpadArgs, ctx: MCPContext): Promise<ScratchpadResponse> {
|
|
1390
|
+
const { streamContent } = ctx;
|
|
1391
|
+
|
|
1392
|
+
// Runtime validation: session_id and thought are required for branch operation
|
|
1393
|
+
if (!args.session_id) {
|
|
1394
|
+
throw new Error("session_id required for branch operation");
|
|
1395
|
+
}
|
|
1396
|
+
if (!args.thought) {
|
|
1397
|
+
throw new Error("thought is required for branch operation");
|
|
1398
|
+
}
|
|
1399
|
+
const sessionId = args.session_id;
|
|
1400
|
+
const thought = args.thought;
|
|
1401
|
+
|
|
1402
|
+
const session = SessionManager.get(sessionId);
|
|
1403
|
+
if (!session) {
|
|
1404
|
+
throw new Error(`Session not found: ${sessionId}`);
|
|
1405
|
+
}
|
|
1406
|
+
|
|
1407
|
+
const threshold = args.confidence_threshold ?? 0.8;
|
|
1408
|
+
|
|
1409
|
+
// Clear any pending verification failure (branching abandons the failed step)
|
|
1410
|
+
const hadPending = SessionManager.clearPendingThought(sessionId);
|
|
1411
|
+
|
|
1412
|
+
// Determine branch point
|
|
1413
|
+
const fromStep = args.from_step ?? SessionManager.getCurrentStep(sessionId, "main");
|
|
1414
|
+
const branchId = `branch-${crypto.randomUUID()}`;
|
|
1415
|
+
const branchName = args.branch_name || `Alternative from step ${fromStep}`;
|
|
1416
|
+
|
|
1417
|
+
// Auto-increment step number for new branch
|
|
1418
|
+
const stepNumber = fromStep + 1;
|
|
1419
|
+
|
|
1420
|
+
// Strip markdown and detect domain BEFORE augmentation
|
|
1421
|
+
let strippedThought = stripMarkdown(thought);
|
|
1422
|
+
const domain = detectDomain(strippedThought);
|
|
1423
|
+
|
|
1424
|
+
// Pre-compute next step suggestion for math domain (before augmentation modifies text)
|
|
1425
|
+
let nextStepSuggestion: ScratchpadResponse["next_step_suggestion"];
|
|
1426
|
+
if (domain === "math") {
|
|
1427
|
+
const suggestion = suggestNextStepFromText(strippedThought);
|
|
1428
|
+
if (suggestion) {
|
|
1429
|
+
nextStepSuggestion = suggestion;
|
|
1430
|
+
}
|
|
1431
|
+
}
|
|
1432
|
+
|
|
1433
|
+
// Auto-augment (default: true)
|
|
1434
|
+
let augmentationResult: {
|
|
1435
|
+
applied: boolean;
|
|
1436
|
+
computations: number;
|
|
1437
|
+
filtered: number;
|
|
1438
|
+
domain: string;
|
|
1439
|
+
} | null = null;
|
|
1440
|
+
|
|
1441
|
+
const shouldAugment = args.augment_compute !== false;
|
|
1442
|
+
|
|
1443
|
+
if (shouldAugment) {
|
|
1444
|
+
const augResult = contextAwareCompute({
|
|
1445
|
+
thought: strippedThought,
|
|
1446
|
+
systemPrompt: args.context,
|
|
1447
|
+
});
|
|
1448
|
+
|
|
1449
|
+
if (augResult.hasComputations) {
|
|
1450
|
+
strippedThought = augResult.augmented;
|
|
1451
|
+
augmentationResult = {
|
|
1452
|
+
applied: true,
|
|
1453
|
+
computations: augResult.computations.length,
|
|
1454
|
+
filtered: augResult.filteredCount,
|
|
1455
|
+
domain: augResult.domain,
|
|
1456
|
+
};
|
|
1457
|
+
await streamContent({
|
|
1458
|
+
type: "text",
|
|
1459
|
+
text: `⚡ **Augmented** ${augResult.computations.length} computations (${augResult.domain})\n`,
|
|
1460
|
+
});
|
|
1461
|
+
}
|
|
1462
|
+
}
|
|
1463
|
+
|
|
1464
|
+
// Stream branch creation
|
|
1465
|
+
const pendingNote = hadPending ? " (abandoning failed verification step)" : "";
|
|
1466
|
+
const hypothesisNote = args.hypothesis ? `\n 📊 Hypothesis: ${args.hypothesis}` : "";
|
|
1467
|
+
const criteriaNote = args.success_criteria
|
|
1468
|
+
? `\n ✅ Success criteria: ${args.success_criteria}`
|
|
1469
|
+
: "";
|
|
1470
|
+
await streamContent({
|
|
1471
|
+
type: "text",
|
|
1472
|
+
text:
|
|
1473
|
+
`🌿 **New Branch:** ${branchName}${pendingNote}\n` +
|
|
1474
|
+
` From step ${fromStep} → Step ${stepNumber}${hypothesisNote}${criteriaNote}\n\n`,
|
|
1475
|
+
});
|
|
1476
|
+
|
|
1477
|
+
// Stream the thought
|
|
1478
|
+
await streamContent({
|
|
1479
|
+
type: "text",
|
|
1480
|
+
text: `**Step ${stepNumber}** [${args.purpose}]\n${strippedThought}\n`,
|
|
1481
|
+
});
|
|
1482
|
+
|
|
1483
|
+
// Build thought record with branch info
|
|
1484
|
+
const record: ThoughtRecord = {
|
|
1485
|
+
id: `${sessionId}:${branchId}:${stepNumber}`,
|
|
1486
|
+
step_number: stepNumber,
|
|
1487
|
+
thought: strippedThought,
|
|
1488
|
+
timestamp: Date.now(),
|
|
1489
|
+
branch_id: branchId,
|
|
1490
|
+
branch_from: fromStep,
|
|
1491
|
+
branch_name: branchName,
|
|
1492
|
+
// Hypothesis-driven branching
|
|
1493
|
+
hypothesis: args.hypothesis,
|
|
1494
|
+
success_criteria: args.success_criteria,
|
|
1495
|
+
};
|
|
1496
|
+
|
|
1497
|
+
// Store thought
|
|
1498
|
+
const storeResult = SessionManager.addThought(sessionId, record);
|
|
1499
|
+
if (!storeResult.success) {
|
|
1500
|
+
throw new Error(storeResult.error || "Failed to store branch thought");
|
|
1501
|
+
}
|
|
1502
|
+
|
|
1503
|
+
// Calculate confidence for new branch
|
|
1504
|
+
const confState = calculateConfidence(sessionId, branchId);
|
|
1505
|
+
const status = determineStatus(confState.chainConfidence, threshold, false);
|
|
1506
|
+
|
|
1507
|
+
const response: ScratchpadResponse = {
|
|
1508
|
+
session_id: sessionId,
|
|
1509
|
+
current_step: stepNumber,
|
|
1510
|
+
branch: branchId,
|
|
1511
|
+
operation: "branch",
|
|
1512
|
+
chain_confidence: confState.chainConfidence,
|
|
1513
|
+
confidence_threshold: threshold,
|
|
1514
|
+
steps_with_confidence: confState.stepsWithConfidence,
|
|
1515
|
+
status,
|
|
1516
|
+
suggested_action: args.hypothesis
|
|
1517
|
+
? `Branch "${branchName}" created to test: "${args.hypothesis}". Continue reasoning to prove/disprove.`
|
|
1518
|
+
: `Branch "${branchName}" created. Continue reasoning on this alternative path.`,
|
|
1519
|
+
};
|
|
1520
|
+
|
|
1521
|
+
// Add augmentation info
|
|
1522
|
+
if (augmentationResult) {
|
|
1523
|
+
response.augmentation = augmentationResult;
|
|
1524
|
+
}
|
|
1525
|
+
|
|
1526
|
+
// Add next step suggestion for math domain (computed before augmentation)
|
|
1527
|
+
if (nextStepSuggestion) {
|
|
1528
|
+
response.next_step_suggestion = nextStepSuggestion;
|
|
1529
|
+
if (nextStepSuggestion.hasSuggestion) {
|
|
1530
|
+
await streamContent({
|
|
1531
|
+
type: "text",
|
|
1532
|
+
text: `💡 **Next step:** ${nextStepSuggestion.description}\n`,
|
|
1533
|
+
});
|
|
1534
|
+
}
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
return response;
|
|
1538
|
+
}
|
|
1539
|
+
|
|
1540
|
+
/** Handle revise operation - correct earlier step */
|
|
1541
|
+
async function handleRevise(args: ScratchpadArgs, ctx: MCPContext): Promise<ScratchpadResponse> {
|
|
1542
|
+
const { streamContent } = ctx;
|
|
1543
|
+
|
|
1544
|
+
// Runtime validation: required fields for revise operation
|
|
1545
|
+
if (!args.session_id) {
|
|
1546
|
+
throw new Error("session_id required for revise operation");
|
|
1547
|
+
}
|
|
1548
|
+
if (!args.thought) {
|
|
1549
|
+
throw new Error("thought is required for revise operation");
|
|
1550
|
+
}
|
|
1551
|
+
if (args.target_step === undefined) {
|
|
1552
|
+
throw new Error("target_step is required for revise operation");
|
|
1553
|
+
}
|
|
1554
|
+
const sessionId = args.session_id;
|
|
1555
|
+
const thought = args.thought;
|
|
1556
|
+
const targetStep = args.target_step;
|
|
1557
|
+
|
|
1558
|
+
const session = SessionManager.get(sessionId);
|
|
1559
|
+
if (!session) {
|
|
1560
|
+
throw new Error(`Session not found: ${sessionId}`);
|
|
1561
|
+
}
|
|
1562
|
+
|
|
1563
|
+
const threshold = args.confidence_threshold ?? 0.8;
|
|
1564
|
+
const branchId = "main"; // Revisions go on main branch
|
|
1565
|
+
|
|
1566
|
+
// Check if revising a pending (failed verification) step
|
|
1567
|
+
const pending = SessionManager.getPendingThought(sessionId);
|
|
1568
|
+
const isRevisingPending = pending && targetStep === pending.thought.step_number;
|
|
1569
|
+
|
|
1570
|
+
// If not revising pending, validate target step exists in stored thoughts
|
|
1571
|
+
if (!isRevisingPending) {
|
|
1572
|
+
const existingStep = SessionManager.getStep(sessionId, targetStep);
|
|
1573
|
+
if (!existingStep) {
|
|
1574
|
+
throw new Error(`Target step not found: ${targetStep}`);
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
|
|
1578
|
+
// Clear pending if we're revising it (the revision replaces it)
|
|
1579
|
+
if (isRevisingPending) {
|
|
1580
|
+
SessionManager.clearPendingThought(sessionId);
|
|
1581
|
+
}
|
|
1582
|
+
|
|
1583
|
+
// Use the same step number if revising pending, otherwise auto-increment
|
|
1584
|
+
const stepNumber = isRevisingPending
|
|
1585
|
+
? pending.thought.step_number
|
|
1586
|
+
: SessionManager.getNextStep(sessionId, branchId);
|
|
1587
|
+
|
|
1588
|
+
// Strip markdown
|
|
1589
|
+
let strippedThought = stripMarkdown(thought);
|
|
1590
|
+
const domain = detectDomain(strippedThought);
|
|
1591
|
+
|
|
1592
|
+
// Pre-compute next step suggestion for math domain (before augmentation modifies text)
|
|
1593
|
+
let nextStepSuggestion: ScratchpadResponse["next_step_suggestion"];
|
|
1594
|
+
if (domain === "math") {
|
|
1595
|
+
const suggestion = suggestNextStepFromText(strippedThought);
|
|
1596
|
+
if (suggestion) {
|
|
1597
|
+
nextStepSuggestion = suggestion;
|
|
1598
|
+
}
|
|
1599
|
+
}
|
|
1600
|
+
|
|
1601
|
+
// Auto-augment (default: true)
|
|
1602
|
+
let augmentationResult: {
|
|
1603
|
+
applied: boolean;
|
|
1604
|
+
computations: number;
|
|
1605
|
+
filtered: number;
|
|
1606
|
+
domain: string;
|
|
1607
|
+
} | null = null;
|
|
1608
|
+
|
|
1609
|
+
const shouldAugment = args.augment_compute !== false;
|
|
1610
|
+
|
|
1611
|
+
if (shouldAugment) {
|
|
1612
|
+
const augResult = contextAwareCompute({
|
|
1613
|
+
thought: strippedThought,
|
|
1614
|
+
systemPrompt: args.context,
|
|
1615
|
+
});
|
|
1616
|
+
|
|
1617
|
+
if (augResult.hasComputations) {
|
|
1618
|
+
strippedThought = augResult.augmented;
|
|
1619
|
+
augmentationResult = {
|
|
1620
|
+
applied: true,
|
|
1621
|
+
computations: augResult.computations.length,
|
|
1622
|
+
filtered: augResult.filteredCount,
|
|
1623
|
+
domain: augResult.domain,
|
|
1624
|
+
};
|
|
1625
|
+
await streamContent({
|
|
1626
|
+
type: "text",
|
|
1627
|
+
text: `⚡ **Augmented** ${augResult.computations.length} computations (${augResult.domain})\n`,
|
|
1628
|
+
});
|
|
1629
|
+
}
|
|
1630
|
+
}
|
|
1631
|
+
|
|
1632
|
+
// Stream revision
|
|
1633
|
+
const revisingLabel = isRevisingPending ? " (replacing failed verification)" : "";
|
|
1634
|
+
await streamContent({
|
|
1635
|
+
type: "text",
|
|
1636
|
+
text:
|
|
1637
|
+
`📝 **Revising Step ${targetStep}**${revisingLabel}\n` +
|
|
1638
|
+
` Reason: ${args.reason ?? "correction"}\n\n` +
|
|
1639
|
+
`**Step ${stepNumber}** [correction]\n${strippedThought}\n`,
|
|
1640
|
+
});
|
|
1641
|
+
|
|
1642
|
+
// Build thought record with revision info
|
|
1643
|
+
const record: ThoughtRecord = {
|
|
1644
|
+
id: `${sessionId}:${branchId}:${stepNumber}`,
|
|
1645
|
+
step_number: stepNumber,
|
|
1646
|
+
thought: strippedThought,
|
|
1647
|
+
timestamp: Date.now(),
|
|
1648
|
+
branch_id: branchId,
|
|
1649
|
+
revises_step: isRevisingPending ? undefined : targetStep, // Don't mark as revision if replacing pending
|
|
1650
|
+
revision_reason: args.reason,
|
|
1651
|
+
verification:
|
|
1652
|
+
args.confidence !== undefined
|
|
1653
|
+
? {
|
|
1654
|
+
passed: true,
|
|
1655
|
+
confidence: args.confidence,
|
|
1656
|
+
domain,
|
|
1657
|
+
}
|
|
1658
|
+
: undefined,
|
|
1659
|
+
};
|
|
1660
|
+
|
|
1661
|
+
// Store thought
|
|
1662
|
+
const storeResult = SessionManager.addThought(sessionId, record);
|
|
1663
|
+
if (!storeResult.success) {
|
|
1664
|
+
throw new Error(storeResult.error || "Failed to store revision");
|
|
1665
|
+
}
|
|
1666
|
+
|
|
1667
|
+
// Calculate confidence
|
|
1668
|
+
const confState = calculateConfidence(sessionId, branchId, args.confidence);
|
|
1669
|
+
const status = determineStatus(confState.chainConfidence, threshold, false);
|
|
1670
|
+
|
|
1671
|
+
const response: ScratchpadResponse = {
|
|
1672
|
+
session_id: sessionId,
|
|
1673
|
+
current_step: stepNumber,
|
|
1674
|
+
branch: branchId,
|
|
1675
|
+
operation: "revise",
|
|
1676
|
+
step_confidence: confState.stepConfidence,
|
|
1677
|
+
chain_confidence: confState.chainConfidence,
|
|
1678
|
+
confidence_threshold: threshold,
|
|
1679
|
+
steps_with_confidence: confState.stepsWithConfidence,
|
|
1680
|
+
status,
|
|
1681
|
+
suggested_action: `Revised step ${targetStep}. Continue reasoning with corrected understanding.`,
|
|
1682
|
+
};
|
|
1683
|
+
|
|
1684
|
+
// Add augmentation info
|
|
1685
|
+
if (augmentationResult) {
|
|
1686
|
+
response.augmentation = augmentationResult;
|
|
1687
|
+
}
|
|
1688
|
+
|
|
1689
|
+
// Add next step suggestion for math domain (computed before augmentation)
|
|
1690
|
+
if (nextStepSuggestion) {
|
|
1691
|
+
response.next_step_suggestion = nextStepSuggestion;
|
|
1692
|
+
if (nextStepSuggestion.hasSuggestion) {
|
|
1693
|
+
await streamContent({
|
|
1694
|
+
type: "text",
|
|
1695
|
+
text: `💡 **Next step:** ${nextStepSuggestion.description}\n`,
|
|
1696
|
+
});
|
|
1697
|
+
}
|
|
1698
|
+
}
|
|
1699
|
+
|
|
1700
|
+
return response;
|
|
1701
|
+
}
|
|
1702
|
+
|
|
1703
|
+
/** Handle complete operation - finalize reasoning chain */
|
|
1704
|
+
async function handleComplete(args: ScratchpadArgs, ctx: MCPContext): Promise<ScratchpadResponse> {
|
|
1705
|
+
const { streamContent } = ctx;
|
|
1706
|
+
const sessionId = args.session_id;
|
|
1707
|
+
if (!sessionId) {
|
|
1708
|
+
throw new Error("session_id required for complete operation");
|
|
1709
|
+
}
|
|
1710
|
+
|
|
1711
|
+
const session = SessionManager.get(sessionId);
|
|
1712
|
+
if (!session) {
|
|
1713
|
+
throw new Error(`Session not found: ${sessionId}`);
|
|
1714
|
+
}
|
|
1715
|
+
|
|
1716
|
+
const threshold = args.confidence_threshold ?? 0.8;
|
|
1717
|
+
const branchId = "main";
|
|
1718
|
+
|
|
1719
|
+
// Get final stats - filter to main branch only for accurate analysis
|
|
1720
|
+
const allThoughts = SessionManager.getThoughts(sessionId);
|
|
1721
|
+
const thoughts = allThoughts.filter((t) => !t.branch_id || t.branch_id === branchId);
|
|
1722
|
+
const confState = calculateConfidence(sessionId, branchId);
|
|
1723
|
+
const compressionStats = SessionManager.getCompressionStats(sessionId);
|
|
1724
|
+
|
|
1725
|
+
// Stream completion
|
|
1726
|
+
await streamContent({
|
|
1727
|
+
type: "text",
|
|
1728
|
+
text:
|
|
1729
|
+
`✅ **Reasoning Complete**\n` +
|
|
1730
|
+
` Total steps: ${thoughts.length}\n` +
|
|
1731
|
+
` Chain confidence: ${(confState.chainConfidence * 100).toFixed(0)}%\n`,
|
|
1732
|
+
});
|
|
1733
|
+
|
|
1734
|
+
if (compressionStats && compressionStats.totalBytesSaved > 0) {
|
|
1735
|
+
await streamContent({
|
|
1736
|
+
type: "text",
|
|
1737
|
+
text: ` Compression: ${compressionStats.stepCount} steps, ${compressionStats.totalBytesSaved} bytes saved\n`,
|
|
1738
|
+
});
|
|
1739
|
+
}
|
|
1740
|
+
|
|
1741
|
+
if (args.summary) {
|
|
1742
|
+
await streamContent({ type: "text", text: `\n**Summary:** ${args.summary}\n` });
|
|
1743
|
+
}
|
|
1744
|
+
if (args.final_answer) {
|
|
1745
|
+
await streamContent({ type: "text", text: `**Answer:** ${args.final_answer}\n` });
|
|
1746
|
+
}
|
|
1747
|
+
|
|
1748
|
+
// Auto spot-check if question and final_answer provided
|
|
1749
|
+
// Use stored question from step operation if not provided directly
|
|
1750
|
+
const questionForSpotCheck = args.question || SessionManager.getQuestion(sessionId);
|
|
1751
|
+
let spotCheckResult:
|
|
1752
|
+
| {
|
|
1753
|
+
passed: boolean;
|
|
1754
|
+
trapType: string | null;
|
|
1755
|
+
warning: string | null;
|
|
1756
|
+
hint: string | null;
|
|
1757
|
+
confidence: number;
|
|
1758
|
+
}
|
|
1759
|
+
| undefined;
|
|
1760
|
+
let needsReconsideration = false;
|
|
1761
|
+
|
|
1762
|
+
if (questionForSpotCheck && args.final_answer) {
|
|
1763
|
+
spotCheckResult = spotCheck(questionForSpotCheck, args.final_answer);
|
|
1764
|
+
if (!spotCheckResult.passed) {
|
|
1765
|
+
needsReconsideration = true;
|
|
1766
|
+
await streamContent({
|
|
1767
|
+
type: "text",
|
|
1768
|
+
text:
|
|
1769
|
+
`\n⚠️ **Spot-check warning:** ${spotCheckResult.trapType}\n` +
|
|
1770
|
+
(spotCheckResult.warning ? ` ${spotCheckResult.warning}\n` : "") +
|
|
1771
|
+
(spotCheckResult.hint ? ` 💡 ${spotCheckResult.hint}\n` : "") +
|
|
1772
|
+
`\n🔄 **Reconsideration recommended:** Your answer may have fallen for a cognitive trap.\n` +
|
|
1773
|
+
` Call \`revise\` with target_step=${thoughts.length} to reconsider your final reasoning.\n`,
|
|
1774
|
+
});
|
|
1775
|
+
}
|
|
1776
|
+
}
|
|
1777
|
+
|
|
1778
|
+
// Confidence Drift Detection (CDD) - analyze trajectory for unresolved uncertainty
|
|
1779
|
+
const driftAnalysis = analyzeConfidenceDrift(thoughts);
|
|
1780
|
+
if (driftAnalysis.pattern !== "insufficient") {
|
|
1781
|
+
// Stream drift analysis if concerning
|
|
1782
|
+
if (driftAnalysis.unresolved) {
|
|
1783
|
+
needsReconsideration = true;
|
|
1784
|
+
await streamContent({
|
|
1785
|
+
type: "text",
|
|
1786
|
+
text:
|
|
1787
|
+
`\n⚠️ **Confidence Drift Warning:** ${driftAnalysis.explanation}\n` +
|
|
1788
|
+
(driftAnalysis.suggestion ? ` 💡 ${driftAnalysis.suggestion}\n` : "") +
|
|
1789
|
+
` Pattern: ${driftAnalysis.pattern}, Drift score: ${(driftAnalysis.drift_score * 100).toFixed(0)}%\n`,
|
|
1790
|
+
});
|
|
1791
|
+
} else if (driftAnalysis.pattern !== "stable") {
|
|
1792
|
+
// Informational for non-stable patterns
|
|
1793
|
+
await streamContent({
|
|
1794
|
+
type: "text",
|
|
1795
|
+
text: ` Confidence pattern: ${driftAnalysis.pattern}\n`,
|
|
1796
|
+
});
|
|
1797
|
+
}
|
|
1798
|
+
}
|
|
1799
|
+
|
|
1800
|
+
// Determine final status - "review" if spot-check failed or unresolved drift, otherwise "complete"
|
|
1801
|
+
const finalStatus = needsReconsideration ? "review" : "complete";
|
|
1802
|
+
let suggestedAction: string;
|
|
1803
|
+
if (needsReconsideration) {
|
|
1804
|
+
if (driftAnalysis.unresolved) {
|
|
1805
|
+
suggestedAction = `Unresolved confidence drift detected (${driftAnalysis.pattern} pattern). ${driftAnalysis.suggestion || `Review step ${driftAnalysis.min_step} where confidence dropped.`}`;
|
|
1806
|
+
} else if (spotCheckResult?.trapType) {
|
|
1807
|
+
suggestedAction = `Potential ${spotCheckResult.trapType} trap detected. Call revise(target_step=${thoughts.length}, reason="${spotCheckResult.hint || "Reconsider approach"}") to fix.`;
|
|
1808
|
+
} else {
|
|
1809
|
+
suggestedAction = "Review recommended before finalizing.";
|
|
1810
|
+
}
|
|
1811
|
+
} else {
|
|
1812
|
+
suggestedAction = "Reasoning chain finalized.";
|
|
1813
|
+
}
|
|
1814
|
+
|
|
1815
|
+
const response: ScratchpadResponse = {
|
|
1816
|
+
session_id: sessionId,
|
|
1817
|
+
current_step: SessionManager.getCurrentStep(sessionId, branchId),
|
|
1818
|
+
branch: branchId,
|
|
1819
|
+
operation: "complete",
|
|
1820
|
+
chain_confidence: confState.chainConfidence,
|
|
1821
|
+
confidence_threshold: threshold,
|
|
1822
|
+
steps_with_confidence: confState.stepsWithConfidence,
|
|
1823
|
+
status: finalStatus,
|
|
1824
|
+
suggested_action: suggestedAction,
|
|
1825
|
+
final_summary: args.summary,
|
|
1826
|
+
total_steps: thoughts.length,
|
|
1827
|
+
};
|
|
1828
|
+
|
|
1829
|
+
// Add spot-check result if we ran it
|
|
1830
|
+
if (spotCheckResult) {
|
|
1831
|
+
response.spot_check_result = {
|
|
1832
|
+
passed: spotCheckResult.passed,
|
|
1833
|
+
trap_type: spotCheckResult.trapType,
|
|
1834
|
+
warning: spotCheckResult.warning,
|
|
1835
|
+
hint: spotCheckResult.hint,
|
|
1836
|
+
confidence: spotCheckResult.confidence,
|
|
1837
|
+
};
|
|
1838
|
+
|
|
1839
|
+
// Add reconsideration prompt if trap detected
|
|
1840
|
+
if (needsReconsideration && spotCheckResult.trapType && spotCheckResult.hint) {
|
|
1841
|
+
response.reconsideration = {
|
|
1842
|
+
trap_type: spotCheckResult.trapType,
|
|
1843
|
+
hint: spotCheckResult.hint,
|
|
1844
|
+
suggested_revise: {
|
|
1845
|
+
target_step: thoughts.length,
|
|
1846
|
+
reason: `Potential ${spotCheckResult.trapType} trap: ${spotCheckResult.hint}`,
|
|
1847
|
+
},
|
|
1848
|
+
};
|
|
1849
|
+
}
|
|
1850
|
+
}
|
|
1851
|
+
|
|
1852
|
+
// Add compression stats if any compression occurred
|
|
1853
|
+
if (compressionStats && compressionStats.totalBytesSaved > 0) {
|
|
1854
|
+
response.compression_stats = {
|
|
1855
|
+
total_bytes_saved: compressionStats.totalBytesSaved,
|
|
1856
|
+
steps_compressed: compressionStats.stepCount,
|
|
1857
|
+
tokens:
|
|
1858
|
+
compressionStats.tokens.original > 0
|
|
1859
|
+
? {
|
|
1860
|
+
original: compressionStats.tokens.original,
|
|
1861
|
+
compressed: compressionStats.tokens.compressed,
|
|
1862
|
+
saved: compressionStats.tokens.saved,
|
|
1863
|
+
}
|
|
1864
|
+
: undefined,
|
|
1865
|
+
};
|
|
1866
|
+
}
|
|
1867
|
+
|
|
1868
|
+
// Add confidence drift analysis (always include for complete operation)
|
|
1869
|
+
if (driftAnalysis.pattern !== "insufficient") {
|
|
1870
|
+
response.confidence_drift = {
|
|
1871
|
+
drift_score: driftAnalysis.drift_score,
|
|
1872
|
+
unresolved: driftAnalysis.unresolved,
|
|
1873
|
+
min_confidence: driftAnalysis.min_confidence,
|
|
1874
|
+
min_step: driftAnalysis.min_step,
|
|
1875
|
+
max_drop: driftAnalysis.max_drop,
|
|
1876
|
+
recovery: driftAnalysis.recovery,
|
|
1877
|
+
has_revision_after_drop: driftAnalysis.has_revision_after_drop,
|
|
1878
|
+
pattern: driftAnalysis.pattern,
|
|
1879
|
+
explanation: driftAnalysis.explanation,
|
|
1880
|
+
suggestion: driftAnalysis.suggestion,
|
|
1881
|
+
};
|
|
1882
|
+
}
|
|
1883
|
+
|
|
1884
|
+
return response;
|
|
1885
|
+
}
|
|
1886
|
+
|
|
1887
|
+
/** Handle augment operation - extract, compute, and inject math results */
|
|
1888
|
+
async function handleAugment(args: ScratchpadArgs, ctx: MCPContext): Promise<ScratchpadResponse> {
|
|
1889
|
+
const { streamContent } = ctx;
|
|
1890
|
+
|
|
1891
|
+
// Runtime validation: text is required for augment operation
|
|
1892
|
+
if (!args.text) {
|
|
1893
|
+
throw new Error("text is required for augment operation");
|
|
1894
|
+
}
|
|
1895
|
+
const text = args.text;
|
|
1896
|
+
|
|
1897
|
+
const sessionId = args.session_id || `s_${crypto.randomUUID()}`;
|
|
1898
|
+
const threshold = args.confidence_threshold ?? 0.8;
|
|
1899
|
+
const branchId = "main";
|
|
1900
|
+
|
|
1901
|
+
// Run context-aware computation
|
|
1902
|
+
const computeResult = contextAwareCompute({
|
|
1903
|
+
thought: text,
|
|
1904
|
+
systemPrompt: args.system_context,
|
|
1905
|
+
});
|
|
1906
|
+
|
|
1907
|
+
// Stream result
|
|
1908
|
+
if (computeResult.hasComputations) {
|
|
1909
|
+
await streamContent({
|
|
1910
|
+
type: "text",
|
|
1911
|
+
text:
|
|
1912
|
+
`⚡ **Augmented** (${computeResult.computations.length} computations, ` +
|
|
1913
|
+
`${computeResult.filteredCount} filtered by domain)\n` +
|
|
1914
|
+
`Domain: ${computeResult.domain}\n\n`,
|
|
1915
|
+
});
|
|
1916
|
+
await streamContent({
|
|
1917
|
+
type: "text",
|
|
1918
|
+
text: `**Result:**\n${computeResult.augmented}\n`,
|
|
1919
|
+
});
|
|
1920
|
+
} else {
|
|
1921
|
+
await streamContent({
|
|
1922
|
+
type: "text",
|
|
1923
|
+
text: "No computable expressions found.\n",
|
|
1924
|
+
});
|
|
1925
|
+
}
|
|
1926
|
+
|
|
1927
|
+
// Optionally store as a step
|
|
1928
|
+
let stepNumber = 0;
|
|
1929
|
+
if (args.store_as_step) {
|
|
1930
|
+
stepNumber = SessionManager.getNextStep(sessionId, branchId);
|
|
1931
|
+
const record: ThoughtRecord = {
|
|
1932
|
+
id: `${sessionId}:${branchId}:${stepNumber}`,
|
|
1933
|
+
step_number: stepNumber,
|
|
1934
|
+
thought: computeResult.augmented,
|
|
1935
|
+
timestamp: Date.now(),
|
|
1936
|
+
branch_id: branchId,
|
|
1937
|
+
};
|
|
1938
|
+
SessionManager.addThought(sessionId, record);
|
|
1939
|
+
}
|
|
1940
|
+
|
|
1941
|
+
// Calculate confidence for session
|
|
1942
|
+
const confState = calculateConfidence(sessionId, branchId);
|
|
1943
|
+
const status = determineStatus(confState.chainConfidence, threshold, false);
|
|
1944
|
+
|
|
1945
|
+
return {
|
|
1946
|
+
session_id: sessionId,
|
|
1947
|
+
current_step: stepNumber,
|
|
1948
|
+
branch: branchId,
|
|
1949
|
+
operation: "augment",
|
|
1950
|
+
chain_confidence: confState.chainConfidence,
|
|
1951
|
+
confidence_threshold: threshold,
|
|
1952
|
+
steps_with_confidence: confState.stepsWithConfidence,
|
|
1953
|
+
status,
|
|
1954
|
+
suggested_action: computeResult.hasComputations
|
|
1955
|
+
? `Augmented ${computeResult.computations.length} expressions. Use store_as_step=true to add to reasoning chain.`
|
|
1956
|
+
: "No computations found. Text returned unchanged.",
|
|
1957
|
+
augmented_text: computeResult.augmented,
|
|
1958
|
+
computations: computeResult.computations.map((c) => ({
|
|
1959
|
+
expression: c.original,
|
|
1960
|
+
result: c.result,
|
|
1961
|
+
method: c.method,
|
|
1962
|
+
})),
|
|
1963
|
+
filtered_count: computeResult.filteredCount,
|
|
1964
|
+
detected_domain: computeResult.domain,
|
|
1965
|
+
};
|
|
1966
|
+
}
|
|
1967
|
+
|
|
1968
|
+
/** Handle override operation - commit a failed verification step anyway */
|
|
1969
|
+
async function handleOverride(args: ScratchpadArgs, ctx: MCPContext): Promise<ScratchpadResponse> {
|
|
1970
|
+
const { streamContent } = ctx;
|
|
1971
|
+
const sessionId = args.session_id;
|
|
1972
|
+
if (!sessionId) {
|
|
1973
|
+
throw new Error("session_id required for override operation");
|
|
1974
|
+
}
|
|
1975
|
+
|
|
1976
|
+
const threshold = args.confidence_threshold ?? 0.8;
|
|
1977
|
+
|
|
1978
|
+
// Check for pending thought
|
|
1979
|
+
const pending = SessionManager.getPendingThought(sessionId);
|
|
1980
|
+
if (!pending) {
|
|
1981
|
+
throw new Error(
|
|
1982
|
+
`No pending verification failure to override. ` +
|
|
1983
|
+
`Use override only after a step fails verification.`,
|
|
1984
|
+
);
|
|
1985
|
+
}
|
|
1986
|
+
|
|
1987
|
+
// Validate the failed_step matches
|
|
1988
|
+
if (args.failed_step !== pending.thought.step_number) {
|
|
1989
|
+
throw new Error(
|
|
1990
|
+
`failed_step (${args.failed_step}) doesn't match pending step (${pending.thought.step_number})`,
|
|
1991
|
+
);
|
|
1992
|
+
}
|
|
1993
|
+
|
|
1994
|
+
// Commit the pending thought
|
|
1995
|
+
const commitResult = SessionManager.commitPendingThought(sessionId);
|
|
1996
|
+
if (!commitResult.success) {
|
|
1997
|
+
throw new Error(commitResult.error || "Failed to commit overridden step");
|
|
1998
|
+
}
|
|
1999
|
+
|
|
2000
|
+
const branchId = pending.thought.branch_id;
|
|
2001
|
+
|
|
2002
|
+
// Stream override notice
|
|
2003
|
+
await streamContent({
|
|
2004
|
+
type: "text",
|
|
2005
|
+
text:
|
|
2006
|
+
`✓ **OVERRIDE ACCEPTED**\n` +
|
|
2007
|
+
`Step ${args.failed_step} committed despite verification failure.\n` +
|
|
2008
|
+
`Reason: ${args.reason}\n\n` +
|
|
2009
|
+
`**Note:** This step is marked as verification-failed in the chain.\n`,
|
|
2010
|
+
});
|
|
2011
|
+
|
|
2012
|
+
// Calculate confidence
|
|
2013
|
+
const confState = calculateConfidence(sessionId, branchId);
|
|
2014
|
+
const status = determineStatus(confState.chainConfidence, threshold, false);
|
|
2015
|
+
|
|
2016
|
+
return {
|
|
2017
|
+
session_id: sessionId,
|
|
2018
|
+
current_step: pending.thought.step_number,
|
|
2019
|
+
branch: branchId,
|
|
2020
|
+
operation: "override",
|
|
2021
|
+
chain_confidence: confState.chainConfidence,
|
|
2022
|
+
confidence_threshold: threshold,
|
|
2023
|
+
steps_with_confidence: confState.stepsWithConfidence,
|
|
2024
|
+
status,
|
|
2025
|
+
suggested_action: `Step ${args.failed_step} committed. Continue reasoning.`,
|
|
2026
|
+
verification: {
|
|
2027
|
+
passed: false,
|
|
2028
|
+
confidence: pending.verificationError.confidence,
|
|
2029
|
+
domain: pending.verificationError.domain,
|
|
2030
|
+
},
|
|
2031
|
+
};
|
|
2032
|
+
}
|
|
2033
|
+
|
|
2034
|
+
/** Handle hint operation - progressive simplification hints with session state */
|
|
2035
|
+
async function handleHint(args: ScratchpadArgs, ctx: MCPContext): Promise<ScratchpadResponse> {
|
|
2036
|
+
const { streamContent } = ctx;
|
|
2037
|
+
const sessionId = args.session_id || `hint-${Date.now()}`;
|
|
2038
|
+
const threshold = args.confidence_threshold ?? 0.8;
|
|
2039
|
+
const { cumulative = true, reset = false } = args;
|
|
2040
|
+
|
|
2041
|
+
// Check for existing hint state
|
|
2042
|
+
const existingState = reset ? null : SessionManager.getHintState(sessionId);
|
|
2043
|
+
|
|
2044
|
+
// Determine expression and reveal count
|
|
2045
|
+
let expression: string;
|
|
2046
|
+
let revealCount: number;
|
|
2047
|
+
|
|
2048
|
+
if (args.expression) {
|
|
2049
|
+
// New expression provided - start fresh or continue if same expression
|
|
2050
|
+
expression = args.expression;
|
|
2051
|
+
if (existingState && existingState.expression === expression && !reset) {
|
|
2052
|
+
// Same expression - auto-increment if no reveal_count specified
|
|
2053
|
+
revealCount = args.reveal_count ?? existingState.revealCount + 1;
|
|
2054
|
+
} else {
|
|
2055
|
+
// Different expression or reset - start fresh
|
|
2056
|
+
revealCount = args.reveal_count ?? 1;
|
|
2057
|
+
}
|
|
2058
|
+
} else if (existingState) {
|
|
2059
|
+
// No expression but have state - continue from previous
|
|
2060
|
+
expression = existingState.expression;
|
|
2061
|
+
revealCount = args.reveal_count ?? existingState.revealCount + 1;
|
|
2062
|
+
} else {
|
|
2063
|
+
// No expression and no state - error
|
|
2064
|
+
await streamContent({
|
|
2065
|
+
type: "text",
|
|
2066
|
+
text: `❌ No expression provided and no previous hint state in session.\n`,
|
|
2067
|
+
});
|
|
2068
|
+
|
|
2069
|
+
return {
|
|
2070
|
+
session_id: sessionId,
|
|
2071
|
+
current_step: 0,
|
|
2072
|
+
branch: "main",
|
|
2073
|
+
operation: "hint",
|
|
2074
|
+
chain_confidence: 0,
|
|
2075
|
+
confidence_threshold: threshold,
|
|
2076
|
+
steps_with_confidence: 0,
|
|
2077
|
+
status: "continue",
|
|
2078
|
+
suggested_action: "Provide an expression to get hints.",
|
|
2079
|
+
hint_result: {
|
|
2080
|
+
success: false,
|
|
2081
|
+
original: "",
|
|
2082
|
+
simplified: "",
|
|
2083
|
+
steps_shown: 0,
|
|
2084
|
+
total_steps: 0,
|
|
2085
|
+
steps: [],
|
|
2086
|
+
has_more: false,
|
|
2087
|
+
},
|
|
2088
|
+
};
|
|
2089
|
+
}
|
|
2090
|
+
|
|
2091
|
+
// Get full simplification path
|
|
2092
|
+
const pathResult = suggestSimplificationPath(expression);
|
|
2093
|
+
|
|
2094
|
+
if (!pathResult.success) {
|
|
2095
|
+
// Clear any existing state for this failed expression
|
|
2096
|
+
SessionManager.clearHintState(sessionId);
|
|
2097
|
+
|
|
2098
|
+
await streamContent({
|
|
2099
|
+
type: "text",
|
|
2100
|
+
text: `❌ Could not parse expression: "${expression}"\n`,
|
|
2101
|
+
});
|
|
2102
|
+
|
|
2103
|
+
return {
|
|
2104
|
+
session_id: sessionId,
|
|
2105
|
+
current_step: 0,
|
|
2106
|
+
branch: "main",
|
|
2107
|
+
operation: "hint",
|
|
2108
|
+
chain_confidence: 0,
|
|
2109
|
+
confidence_threshold: threshold,
|
|
2110
|
+
steps_with_confidence: 0,
|
|
2111
|
+
status: "continue",
|
|
2112
|
+
suggested_action: "Expression could not be parsed. Check syntax.",
|
|
2113
|
+
hint_result: {
|
|
2114
|
+
success: false,
|
|
2115
|
+
original: expression,
|
|
2116
|
+
simplified: expression,
|
|
2117
|
+
steps_shown: 0,
|
|
2118
|
+
total_steps: 0,
|
|
2119
|
+
steps: [],
|
|
2120
|
+
has_more: false,
|
|
2121
|
+
},
|
|
2122
|
+
};
|
|
2123
|
+
}
|
|
2124
|
+
|
|
2125
|
+
const totalSteps = pathResult.steps.length;
|
|
2126
|
+
const stepsToShow = Math.min(revealCount, totalSteps);
|
|
2127
|
+
|
|
2128
|
+
// Store hint state for future calls
|
|
2129
|
+
SessionManager.setHintState(sessionId, {
|
|
2130
|
+
expression,
|
|
2131
|
+
revealCount: stepsToShow,
|
|
2132
|
+
totalSteps,
|
|
2133
|
+
simplified: pathResult.simplified,
|
|
2134
|
+
});
|
|
2135
|
+
|
|
2136
|
+
// Build steps array
|
|
2137
|
+
const visibleSteps: SimplificationStep[] = (
|
|
2138
|
+
cumulative
|
|
2139
|
+
? pathResult.steps.slice(0, stepsToShow)
|
|
2140
|
+
: stepsToShow > 0
|
|
2141
|
+
? [pathResult.steps[stepsToShow - 1]]
|
|
2142
|
+
: []
|
|
2143
|
+
).filter((s: SimplificationStep | undefined): s is SimplificationStep => s !== undefined);
|
|
2144
|
+
|
|
2145
|
+
// Get the result at the revealed step
|
|
2146
|
+
const lastStep =
|
|
2147
|
+
stepsToShow > 0 && stepsToShow <= totalSteps ? pathResult.steps[stepsToShow - 1] : undefined;
|
|
2148
|
+
const currentSimplified = lastStep?.after ?? expression;
|
|
2149
|
+
|
|
2150
|
+
// Stream the hint
|
|
2151
|
+
const isContinuing = existingState?.expression === expression;
|
|
2152
|
+
if (totalSteps === 0) {
|
|
2153
|
+
await streamContent({
|
|
2154
|
+
type: "text",
|
|
2155
|
+
text: `✓ Expression "${expression}" is already simplified.\n`,
|
|
2156
|
+
});
|
|
2157
|
+
} else {
|
|
2158
|
+
const continueLabel = isContinuing ? " (continued)" : "";
|
|
2159
|
+
await streamContent({
|
|
2160
|
+
type: "text",
|
|
2161
|
+
text: `💡 **Simplification Hint${continueLabel}** (step ${stepsToShow}/${totalSteps})\n\n`,
|
|
2162
|
+
});
|
|
2163
|
+
|
|
2164
|
+
for (const step of visibleSteps) {
|
|
2165
|
+
await streamContent({
|
|
2166
|
+
type: "text",
|
|
2167
|
+
text:
|
|
2168
|
+
`**Step ${step.step}:** ${step.transformation}\n` +
|
|
2169
|
+
` ${step.before} → ${step.after}\n` +
|
|
2170
|
+
` _${step.description}_\n\n`,
|
|
2171
|
+
});
|
|
2172
|
+
}
|
|
2173
|
+
|
|
2174
|
+
if (stepsToShow < totalSteps) {
|
|
2175
|
+
await streamContent({
|
|
2176
|
+
type: "text",
|
|
2177
|
+
text: `_${totalSteps - stepsToShow} more step(s) available. Call hint again to reveal next step._\n`,
|
|
2178
|
+
});
|
|
2179
|
+
} else {
|
|
2180
|
+
await streamContent({
|
|
2181
|
+
type: "text",
|
|
2182
|
+
text: `✓ **Final simplified form:** ${pathResult.simplified}\n`,
|
|
2183
|
+
});
|
|
2184
|
+
}
|
|
2185
|
+
}
|
|
2186
|
+
|
|
2187
|
+
return {
|
|
2188
|
+
session_id: sessionId,
|
|
2189
|
+
current_step: 0,
|
|
2190
|
+
branch: "main",
|
|
2191
|
+
operation: "hint",
|
|
2192
|
+
chain_confidence: 0,
|
|
2193
|
+
confidence_threshold: threshold,
|
|
2194
|
+
steps_with_confidence: 0,
|
|
2195
|
+
status: "continue",
|
|
2196
|
+
suggested_action:
|
|
2197
|
+
stepsToShow < totalSteps
|
|
2198
|
+
? `${totalSteps - stepsToShow} more steps available. Call hint again to continue.`
|
|
2199
|
+
: "Expression fully simplified",
|
|
2200
|
+
hint_result: {
|
|
2201
|
+
success: true,
|
|
2202
|
+
original: expression,
|
|
2203
|
+
simplified: currentSimplified,
|
|
2204
|
+
steps_shown: stepsToShow,
|
|
2205
|
+
total_steps: totalSteps,
|
|
2206
|
+
steps: visibleSteps.map((s: SimplificationStep) => ({
|
|
2207
|
+
step_number: s.step,
|
|
2208
|
+
transformation: s.transformation,
|
|
2209
|
+
description: s.description,
|
|
2210
|
+
from: s.before,
|
|
2211
|
+
to: s.after,
|
|
2212
|
+
})),
|
|
2213
|
+
has_more: stepsToShow < totalSteps,
|
|
2214
|
+
},
|
|
2215
|
+
};
|
|
2216
|
+
}
|
|
2217
|
+
|
|
2218
|
+
/** Handle mistakes operation - proactive error checking for math derivations */
|
|
2219
|
+
async function handleMistakes(args: ScratchpadArgs, ctx: MCPContext): Promise<ScratchpadResponse> {
|
|
2220
|
+
const { streamContent } = ctx;
|
|
2221
|
+
|
|
2222
|
+
// Runtime validation: text is required for mistakes operation
|
|
2223
|
+
if (!args.text) {
|
|
2224
|
+
throw new Error("text is required for mistakes operation");
|
|
2225
|
+
}
|
|
2226
|
+
const text = args.text;
|
|
2227
|
+
|
|
2228
|
+
const sessionId = args.session_id || `mistakes-${Date.now()}`;
|
|
2229
|
+
const threshold = args.confidence_threshold ?? 0.8;
|
|
2230
|
+
|
|
2231
|
+
// Run mistake detection
|
|
2232
|
+
const result = detectCommonMistakesFromText(text);
|
|
2233
|
+
const mistakes = result?.mistakes ?? [];
|
|
2234
|
+
const mistakesFound = mistakes.length;
|
|
2235
|
+
|
|
2236
|
+
// Stream results
|
|
2237
|
+
if (mistakesFound === 0) {
|
|
2238
|
+
await streamContent({
|
|
2239
|
+
type: "text",
|
|
2240
|
+
text: `✓ **No common algebraic mistakes detected**\n\n_Note: This checks for sign errors, distribution errors, exponent mistakes, etc. It doesn't guarantee correctness._\n`,
|
|
2241
|
+
});
|
|
2242
|
+
} else {
|
|
2243
|
+
await streamContent({
|
|
2244
|
+
type: "text",
|
|
2245
|
+
text: `⚠️ **Found ${mistakesFound} potential algebraic mistake${mistakesFound > 1 ? "s" : ""}:**\n\n`,
|
|
2246
|
+
});
|
|
2247
|
+
|
|
2248
|
+
for (const m of mistakes) {
|
|
2249
|
+
await streamContent({
|
|
2250
|
+
type: "text",
|
|
2251
|
+
text: `• **${m.type}**: ${m.explanation}\n`,
|
|
2252
|
+
});
|
|
2253
|
+
if (m.suggestedFix) {
|
|
2254
|
+
await streamContent({
|
|
2255
|
+
type: "text",
|
|
2256
|
+
text: ` **Corrected:** \`${m.suggestedFix}\`\n`,
|
|
2257
|
+
});
|
|
2258
|
+
} else if (m.suggestion) {
|
|
2259
|
+
await streamContent({
|
|
2260
|
+
type: "text",
|
|
2261
|
+
text: ` _Fix: ${m.suggestion}_\n`,
|
|
2262
|
+
});
|
|
2263
|
+
}
|
|
2264
|
+
}
|
|
2265
|
+
}
|
|
2266
|
+
|
|
2267
|
+
return {
|
|
2268
|
+
session_id: sessionId,
|
|
2269
|
+
current_step: 0,
|
|
2270
|
+
branch: "main",
|
|
2271
|
+
operation: "mistakes",
|
|
2272
|
+
chain_confidence: 0,
|
|
2273
|
+
confidence_threshold: threshold,
|
|
2274
|
+
steps_with_confidence: 0,
|
|
2275
|
+
status: "continue",
|
|
2276
|
+
suggested_action:
|
|
2277
|
+
mistakesFound > 0
|
|
2278
|
+
? `Found ${mistakesFound} potential mistake(s). Review and revise if needed.`
|
|
2279
|
+
: "No common mistakes detected.",
|
|
2280
|
+
mistakes_result: {
|
|
2281
|
+
text_checked: text.slice(0, 200) + (text.length > 200 ? "..." : ""),
|
|
2282
|
+
mistakes_found: mistakesFound,
|
|
2283
|
+
mistakes: mistakes.map((m: DetectedMistake) => ({
|
|
2284
|
+
type: m.type,
|
|
2285
|
+
description: m.explanation,
|
|
2286
|
+
fix: m.suggestion,
|
|
2287
|
+
corrected_step: m.suggestedFix,
|
|
2288
|
+
})),
|
|
2289
|
+
},
|
|
2290
|
+
};
|
|
2291
|
+
}
|
|
2292
|
+
|
|
2293
|
+
/** Handle spot_check operation - detect trap patterns in answers */
|
|
2294
|
+
async function handleSpotCheck(args: ScratchpadArgs, ctx: MCPContext): Promise<ScratchpadResponse> {
|
|
2295
|
+
const { streamContent } = ctx;
|
|
2296
|
+
|
|
2297
|
+
// Runtime validation: question and answer are required for spot_check operation
|
|
2298
|
+
if (!args.question) {
|
|
2299
|
+
throw new Error("question is required for spot_check operation");
|
|
2300
|
+
}
|
|
2301
|
+
if (!args.answer) {
|
|
2302
|
+
throw new Error("answer is required for spot_check operation");
|
|
2303
|
+
}
|
|
2304
|
+
const question = args.question;
|
|
2305
|
+
const answer = args.answer;
|
|
2306
|
+
|
|
2307
|
+
const sessionId = args.session_id || `spot-check-${Date.now()}`;
|
|
2308
|
+
const threshold = args.confidence_threshold ?? 0.8;
|
|
2309
|
+
|
|
2310
|
+
// Run spot-check
|
|
2311
|
+
const result = spotCheck(question, answer);
|
|
2312
|
+
|
|
2313
|
+
// Stream results
|
|
2314
|
+
if (result.passed) {
|
|
2315
|
+
await streamContent({
|
|
2316
|
+
type: "text",
|
|
2317
|
+
text: `✓ **No trap patterns detected**\n\n_Answer "${answer}" does not match known cognitive trap patterns for this question type._\n`,
|
|
2318
|
+
});
|
|
2319
|
+
} else {
|
|
2320
|
+
await streamContent({
|
|
2321
|
+
type: "text",
|
|
2322
|
+
text: `⚠️ **Potential trap detected: ${result.trapType}**\n\n`,
|
|
2323
|
+
});
|
|
2324
|
+
if (result.warning) {
|
|
2325
|
+
await streamContent({
|
|
2326
|
+
type: "text",
|
|
2327
|
+
text: `**Warning:** ${result.warning}\n`,
|
|
2328
|
+
});
|
|
2329
|
+
}
|
|
2330
|
+
if (result.hint) {
|
|
2331
|
+
await streamContent({
|
|
2332
|
+
type: "text",
|
|
2333
|
+
text: `**Hint:** ${result.hint}\n`,
|
|
2334
|
+
});
|
|
2335
|
+
}
|
|
2336
|
+
await streamContent({
|
|
2337
|
+
type: "text",
|
|
2338
|
+
text: `\n_Consider rechecking your reasoning before finalizing this answer._\n`,
|
|
2339
|
+
});
|
|
2340
|
+
}
|
|
2341
|
+
|
|
2342
|
+
return {
|
|
2343
|
+
session_id: sessionId,
|
|
2344
|
+
current_step: 0,
|
|
2345
|
+
branch: "main",
|
|
2346
|
+
operation: "spot_check",
|
|
2347
|
+
chain_confidence: 0,
|
|
2348
|
+
confidence_threshold: threshold,
|
|
2349
|
+
steps_with_confidence: 0,
|
|
2350
|
+
status: result.passed ? "continue" : "review",
|
|
2351
|
+
suggested_action: result.passed
|
|
2352
|
+
? "No trap patterns detected. Answer appears safe."
|
|
2353
|
+
: `Potential ${result.trapType} trap detected. Review reasoning before finalizing.`,
|
|
2354
|
+
spot_check_result: {
|
|
2355
|
+
passed: result.passed,
|
|
2356
|
+
trap_type: result.trapType,
|
|
2357
|
+
warning: result.warning,
|
|
2358
|
+
hint: result.hint,
|
|
2359
|
+
confidence: result.confidence,
|
|
2360
|
+
},
|
|
2361
|
+
};
|
|
2362
|
+
}
|
|
2363
|
+
|
|
2364
|
+
/** Handle challenge operation - adversarial self-check for reasoning quality */
|
|
2365
|
+
async function handleChallenge(args: ScratchpadArgs, ctx: MCPContext): Promise<ScratchpadResponse> {
|
|
2366
|
+
const { streamContent } = ctx;
|
|
2367
|
+
const sessionId = args.session_id;
|
|
2368
|
+
if (!sessionId) {
|
|
2369
|
+
throw new Error("session_id required for challenge operation");
|
|
2370
|
+
}
|
|
2371
|
+
|
|
2372
|
+
const session = SessionManager.get(sessionId);
|
|
2373
|
+
if (!session) {
|
|
2374
|
+
throw new Error(`Session not found: ${sessionId}`);
|
|
2375
|
+
}
|
|
2376
|
+
|
|
2377
|
+
const threshold = args.confidence_threshold ?? 0.8;
|
|
2378
|
+
const branchId = args.branch_id || "main";
|
|
2379
|
+
|
|
2380
|
+
// Get thoughts from session
|
|
2381
|
+
const thoughts = SessionManager.getThoughts(sessionId, branchId);
|
|
2382
|
+
if (thoughts.length === 0) {
|
|
2383
|
+
await streamContent({
|
|
2384
|
+
type: "text",
|
|
2385
|
+
text: "⚠️ No reasoning steps to challenge. Add steps first.\n",
|
|
2386
|
+
});
|
|
2387
|
+
|
|
2388
|
+
return {
|
|
2389
|
+
session_id: sessionId,
|
|
2390
|
+
current_step: 0,
|
|
2391
|
+
branch: branchId,
|
|
2392
|
+
operation: "challenge",
|
|
2393
|
+
chain_confidence: 0,
|
|
2394
|
+
confidence_threshold: threshold,
|
|
2395
|
+
steps_with_confidence: 0,
|
|
2396
|
+
status: "continue",
|
|
2397
|
+
suggested_action: "Add reasoning steps before running challenge.",
|
|
2398
|
+
challenge_result: {
|
|
2399
|
+
challenges_generated: 0,
|
|
2400
|
+
challenges: [],
|
|
2401
|
+
overall_robustness: 1.0,
|
|
2402
|
+
summary: "No steps to challenge.",
|
|
2403
|
+
},
|
|
2404
|
+
};
|
|
2405
|
+
}
|
|
2406
|
+
|
|
2407
|
+
// Convert to format expected by challenge function
|
|
2408
|
+
const stepData = thoughts.map((t) => ({ step: t.step_number, thought: t.thought }));
|
|
2409
|
+
|
|
2410
|
+
// Run challenge with optional target claim
|
|
2411
|
+
const result = challenge(stepData, args.target_claim);
|
|
2412
|
+
|
|
2413
|
+
// Stream results
|
|
2414
|
+
if (result.challenges_generated === 0) {
|
|
2415
|
+
await streamContent({
|
|
2416
|
+
type: "text",
|
|
2417
|
+
text:
|
|
2418
|
+
`✓ **No significant challenges found**\n` +
|
|
2419
|
+
`Robustness: ${(result.overall_robustness * 100).toFixed(0)}%\n\n` +
|
|
2420
|
+
`_Reasoning appears robust against common counterarguments._\n`,
|
|
2421
|
+
});
|
|
2422
|
+
} else {
|
|
2423
|
+
const highCount = result.challenges.filter((c) => c.severity === "high").length;
|
|
2424
|
+
const medCount = result.challenges.filter((c) => c.severity === "medium").length;
|
|
2425
|
+
|
|
2426
|
+
await streamContent({
|
|
2427
|
+
type: "text",
|
|
2428
|
+
text:
|
|
2429
|
+
`⚡ **Adversarial Challenge Results**\n` +
|
|
2430
|
+
` Challenges: ${result.challenges_generated} (${highCount} high, ${medCount} medium)\n` +
|
|
2431
|
+
` Robustness: ${(result.overall_robustness * 100).toFixed(0)}%\n\n`,
|
|
2432
|
+
});
|
|
2433
|
+
|
|
2434
|
+
// Group by severity for better readability
|
|
2435
|
+
const severityOrder = ["high", "medium", "low"] as const;
|
|
2436
|
+
for (const severity of severityOrder) {
|
|
2437
|
+
const challengesOfSeverity = result.challenges.filter((c) => c.severity === severity);
|
|
2438
|
+
if (challengesOfSeverity.length === 0) continue;
|
|
2439
|
+
|
|
2440
|
+
const emoji = severity === "high" ? "🔴" : severity === "medium" ? "🟡" : "🟢";
|
|
2441
|
+
await streamContent({
|
|
2442
|
+
type: "text",
|
|
2443
|
+
text: `**${emoji} ${severity.toUpperCase()} Severity:**\n`,
|
|
2444
|
+
});
|
|
2445
|
+
|
|
2446
|
+
for (const c of challengesOfSeverity) {
|
|
2447
|
+
await streamContent({
|
|
2448
|
+
type: "text",
|
|
2449
|
+
text:
|
|
2450
|
+
`• **${c.type}**: ${c.challenge}\n` +
|
|
2451
|
+
` _Claim: "${c.original_claim.slice(0, 60)}${c.original_claim.length > 60 ? "..." : ""}"_\n` +
|
|
2452
|
+
` 💡 ${c.suggested_response}\n\n`,
|
|
2453
|
+
});
|
|
2454
|
+
}
|
|
2455
|
+
}
|
|
2456
|
+
}
|
|
2457
|
+
|
|
2458
|
+
// Calculate confidence for session
|
|
2459
|
+
const confState = calculateConfidence(sessionId, branchId);
|
|
2460
|
+
const status =
|
|
2461
|
+
result.challenges.filter((c) => c.severity === "high").length > 0 ? "review" : "continue";
|
|
2462
|
+
|
|
2463
|
+
return {
|
|
2464
|
+
session_id: sessionId,
|
|
2465
|
+
current_step: SessionManager.getCurrentStep(sessionId, branchId),
|
|
2466
|
+
branch: branchId,
|
|
2467
|
+
operation: "challenge",
|
|
2468
|
+
chain_confidence: confState.chainConfidence,
|
|
2469
|
+
confidence_threshold: threshold,
|
|
2470
|
+
steps_with_confidence: confState.stepsWithConfidence,
|
|
2471
|
+
status,
|
|
2472
|
+
suggested_action:
|
|
2473
|
+
result.challenges_generated === 0
|
|
2474
|
+
? "Reasoning appears robust. Proceed to complete."
|
|
2475
|
+
: result.challenges.filter((c) => c.severity === "high").length > 0
|
|
2476
|
+
? `Found ${result.challenges.filter((c) => c.severity === "high").length} high-severity challenge(s). Address before finalizing.`
|
|
2477
|
+
: `Found ${result.challenges_generated} challenge(s). Consider addressing before completion.`,
|
|
2478
|
+
challenge_result: {
|
|
2479
|
+
challenges_generated: result.challenges_generated,
|
|
2480
|
+
challenges: result.challenges.map((c) => ({
|
|
2481
|
+
type: c.type,
|
|
2482
|
+
original_claim: c.original_claim,
|
|
2483
|
+
challenge: c.challenge,
|
|
2484
|
+
severity: c.severity,
|
|
2485
|
+
suggested_response: c.suggested_response,
|
|
2486
|
+
})),
|
|
2487
|
+
overall_robustness: result.overall_robustness,
|
|
2488
|
+
summary: result.summary,
|
|
2489
|
+
},
|
|
2490
|
+
};
|
|
2491
|
+
}
|
|
2492
|
+
|
|
2493
|
+
// ============================================================================
|
|
2494
|
+
// SCRATCHPAD TOOL
|
|
2495
|
+
// ============================================================================
|
|
2496
|
+
|
|
2497
|
+
export const scratchpadTool = {
|
|
2498
|
+
name: "scratchpad",
|
|
2499
|
+
description: `Structured reasoning with verification, trap detection, and self-challenge.
|
|
2500
|
+
|
|
2501
|
+
OPS:
|
|
2502
|
+
step thought= [question= on 1st] → Add reasoning step. Auto-verifies at step 4+.
|
|
2503
|
+
complete [final_answer=] [summary=] → Finalize chain. Auto spot-checks answer.
|
|
2504
|
+
revise target_step= thought= [reason=] → Fix a step (after verification fail or trap warning).
|
|
2505
|
+
branch thought= [from_step=] [hypothesis=] → Fork reasoning path to test alternative.
|
|
2506
|
+
navigate view=history|branches|step|path [step_id=] → Inspect session state.
|
|
2507
|
+
augment text= → Compute math expressions, inject results.
|
|
2508
|
+
hint [expression=] → Progressive simplification hints (auto-continues).
|
|
2509
|
+
mistakes text= → Check for algebraic errors.
|
|
2510
|
+
spot_check question= answer= → Manual trap pattern detection.
|
|
2511
|
+
challenge [target_claim=] → Adversarial self-check. Generates counterarguments.
|
|
2512
|
+
override failed_step= [reason=] → Force-commit after verification fail.
|
|
2513
|
+
|
|
2514
|
+
DEFAULTS:
|
|
2515
|
+
confidence_threshold=0.8 token_budget=3000
|
|
2516
|
+
|
|
2517
|
+
STATUS → ACTION:
|
|
2518
|
+
continue → Add more steps
|
|
2519
|
+
threshold_reached → Consider complete or add verification step
|
|
2520
|
+
review → Trap/drift detected. Use reconsideration.suggested_revise
|
|
2521
|
+
verification_failed → revise target_step | branch from prior | override
|
|
2522
|
+
budget_exhausted → complete or new session
|
|
2523
|
+
|
|
2524
|
+
FLOW:
|
|
2525
|
+
1. step(question=, thought=) → primes trap detection
|
|
2526
|
+
2. step(thought=) × N → auto-verify at 4+
|
|
2527
|
+
3. [optional] challenge() → adversarial self-check
|
|
2528
|
+
4. complete(final_answer=) → spot-check, returns status
|
|
2529
|
+
5. If review: revise per reconsideration.suggested_revise`,
|
|
2530
|
+
|
|
2531
|
+
parameters: ScratchpadSchema,
|
|
2532
|
+
|
|
2533
|
+
annotations: {
|
|
2534
|
+
streamingHint: true,
|
|
2535
|
+
},
|
|
2536
|
+
|
|
2537
|
+
execute: async (args: ScratchpadArgs, ctx: MCPContext) => {
|
|
2538
|
+
try {
|
|
2539
|
+
// Check hard budget limit BEFORE processing operation
|
|
2540
|
+
if (args.hard_limit_tokens && args.session_id) {
|
|
2541
|
+
const existingTokens = getSessionTokens(args.session_id);
|
|
2542
|
+
if (existingTokens && existingTokens.total >= args.hard_limit_tokens) {
|
|
2543
|
+
const budgetExhaustedResponse: ScratchpadResponse = {
|
|
2544
|
+
session_id: args.session_id,
|
|
2545
|
+
current_step: 0,
|
|
2546
|
+
branch: "main",
|
|
2547
|
+
operation: args.operation,
|
|
2548
|
+
chain_confidence: 0,
|
|
2549
|
+
confidence_threshold: args.confidence_threshold,
|
|
2550
|
+
steps_with_confidence: 0,
|
|
2551
|
+
status: "budget_exhausted",
|
|
2552
|
+
suggested_action:
|
|
2553
|
+
"Token budget exhausted. Complete the reasoning chain with your current answer or start a new session.",
|
|
2554
|
+
session_tokens: existingTokens,
|
|
2555
|
+
budget_exhausted: {
|
|
2556
|
+
limit: args.hard_limit_tokens,
|
|
2557
|
+
current: existingTokens.total,
|
|
2558
|
+
exceeded_by: existingTokens.total - args.hard_limit_tokens,
|
|
2559
|
+
message: `Session has used ${existingTokens.total} tokens, exceeding hard limit of ${args.hard_limit_tokens}.`,
|
|
2560
|
+
recommendation:
|
|
2561
|
+
"Use complete operation to finalize your answer, or start a fresh session for new reasoning.",
|
|
2562
|
+
},
|
|
2563
|
+
};
|
|
2564
|
+
return {
|
|
2565
|
+
content: [
|
|
2566
|
+
{
|
|
2567
|
+
type: "text" as const,
|
|
2568
|
+
text: `\n\`\`\`json\n${JSON.stringify(budgetExhaustedResponse, null, 2)}\n\`\`\``,
|
|
2569
|
+
},
|
|
2570
|
+
],
|
|
2571
|
+
};
|
|
2572
|
+
}
|
|
2573
|
+
}
|
|
2574
|
+
|
|
2575
|
+
let response: ScratchpadResponse;
|
|
2576
|
+
|
|
2577
|
+
switch (args.operation) {
|
|
2578
|
+
case "step":
|
|
2579
|
+
response = await handleStep(args, ctx);
|
|
2580
|
+
break;
|
|
2581
|
+
case "navigate":
|
|
2582
|
+
response = await handleNavigate(args, ctx);
|
|
2583
|
+
break;
|
|
2584
|
+
case "branch":
|
|
2585
|
+
response = await handleBranch(args, ctx);
|
|
2586
|
+
break;
|
|
2587
|
+
case "revise":
|
|
2588
|
+
response = await handleRevise(args, ctx);
|
|
2589
|
+
break;
|
|
2590
|
+
case "complete":
|
|
2591
|
+
response = await handleComplete(args, ctx);
|
|
2592
|
+
break;
|
|
2593
|
+
case "augment":
|
|
2594
|
+
response = await handleAugment(args, ctx);
|
|
2595
|
+
break;
|
|
2596
|
+
case "override":
|
|
2597
|
+
response = await handleOverride(args, ctx);
|
|
2598
|
+
break;
|
|
2599
|
+
case "hint":
|
|
2600
|
+
response = await handleHint(args, ctx);
|
|
2601
|
+
break;
|
|
2602
|
+
case "mistakes":
|
|
2603
|
+
response = await handleMistakes(args, ctx);
|
|
2604
|
+
break;
|
|
2605
|
+
case "spot_check":
|
|
2606
|
+
response = await handleSpotCheck(args, ctx);
|
|
2607
|
+
break;
|
|
2608
|
+
case "challenge":
|
|
2609
|
+
response = await handleChallenge(args, ctx);
|
|
2610
|
+
break;
|
|
2611
|
+
default:
|
|
2612
|
+
throw new Error(`Unknown operation: ${(args as { operation: string }).operation}`);
|
|
2613
|
+
}
|
|
2614
|
+
|
|
2615
|
+
// Add token usage to response
|
|
2616
|
+
const tokens = calculateTokenUsage(args, response);
|
|
2617
|
+
response.tokens = tokens;
|
|
2618
|
+
|
|
2619
|
+
// Track cumulative session tokens
|
|
2620
|
+
const sessionTokens = trackSessionTokens(response.session_id, tokens);
|
|
2621
|
+
response.session_tokens = sessionTokens;
|
|
2622
|
+
|
|
2623
|
+
// Check token budget warning threshold
|
|
2624
|
+
if (args.warn_at_tokens && sessionTokens.total > args.warn_at_tokens) {
|
|
2625
|
+
response.token_warning = {
|
|
2626
|
+
threshold: args.warn_at_tokens,
|
|
2627
|
+
current: sessionTokens.total,
|
|
2628
|
+
exceeded_by: sessionTokens.total - args.warn_at_tokens,
|
|
2629
|
+
message: `Session token usage (${sessionTokens.total}) exceeds threshold (${args.warn_at_tokens}). Consider completing or compressing.`,
|
|
2630
|
+
};
|
|
2631
|
+
}
|
|
2632
|
+
|
|
2633
|
+
return {
|
|
2634
|
+
content: [
|
|
2635
|
+
{
|
|
2636
|
+
type: "text" as const,
|
|
2637
|
+
text: `\n\`\`\`json\n${JSON.stringify(response, null, 2)}\n\`\`\``,
|
|
2638
|
+
},
|
|
2639
|
+
],
|
|
2640
|
+
};
|
|
2641
|
+
} catch (error) {
|
|
2642
|
+
const message = error instanceof Error ? error.message : "Unknown error";
|
|
2643
|
+
const errorResponse: {
|
|
2644
|
+
error: string;
|
|
2645
|
+
tokens?: ReturnType<typeof calculateTokenUsage>;
|
|
2646
|
+
session_tokens?: ReturnType<typeof trackSessionTokens>;
|
|
2647
|
+
} = { error: message };
|
|
2648
|
+
const tokens = calculateTokenUsage(args, errorResponse);
|
|
2649
|
+
errorResponse.tokens = tokens;
|
|
2650
|
+
// Track session tokens even on error for accurate budget monitoring
|
|
2651
|
+
if (args.session_id) {
|
|
2652
|
+
errorResponse.session_tokens = trackSessionTokens(args.session_id, tokens);
|
|
2653
|
+
}
|
|
2654
|
+
return {
|
|
2655
|
+
content: [{ type: "text" as const, text: JSON.stringify(errorResponse) }],
|
|
2656
|
+
};
|
|
2657
|
+
}
|
|
2658
|
+
},
|
|
2659
|
+
};
|