verifiable-thinking-mcp 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +339 -0
  3. package/package.json +75 -0
  4. package/src/index.ts +38 -0
  5. package/src/lib/cache.ts +246 -0
  6. package/src/lib/compression.ts +804 -0
  7. package/src/lib/compute/cache.ts +86 -0
  8. package/src/lib/compute/classifier.ts +555 -0
  9. package/src/lib/compute/confidence.ts +79 -0
  10. package/src/lib/compute/context.ts +154 -0
  11. package/src/lib/compute/extract.ts +200 -0
  12. package/src/lib/compute/filter.ts +224 -0
  13. package/src/lib/compute/index.ts +171 -0
  14. package/src/lib/compute/math.ts +247 -0
  15. package/src/lib/compute/patterns.ts +564 -0
  16. package/src/lib/compute/registry.ts +145 -0
  17. package/src/lib/compute/solvers/arithmetic.ts +65 -0
  18. package/src/lib/compute/solvers/calculus.ts +249 -0
  19. package/src/lib/compute/solvers/derivation-core.ts +371 -0
  20. package/src/lib/compute/solvers/derivation-latex.ts +160 -0
  21. package/src/lib/compute/solvers/derivation-mistakes.ts +1046 -0
  22. package/src/lib/compute/solvers/derivation-simplify.ts +451 -0
  23. package/src/lib/compute/solvers/derivation-transform.ts +620 -0
  24. package/src/lib/compute/solvers/derivation.ts +67 -0
  25. package/src/lib/compute/solvers/facts.ts +120 -0
  26. package/src/lib/compute/solvers/formula.ts +728 -0
  27. package/src/lib/compute/solvers/index.ts +36 -0
  28. package/src/lib/compute/solvers/logic.ts +422 -0
  29. package/src/lib/compute/solvers/probability.ts +307 -0
  30. package/src/lib/compute/solvers/statistics.ts +262 -0
  31. package/src/lib/compute/solvers/word-problems.ts +408 -0
  32. package/src/lib/compute/types.ts +107 -0
  33. package/src/lib/concepts.ts +111 -0
  34. package/src/lib/domain.ts +731 -0
  35. package/src/lib/extraction.ts +912 -0
  36. package/src/lib/index.ts +122 -0
  37. package/src/lib/judge.ts +260 -0
  38. package/src/lib/math/ast.ts +842 -0
  39. package/src/lib/math/index.ts +8 -0
  40. package/src/lib/math/operators.ts +171 -0
  41. package/src/lib/math/tokenizer.ts +477 -0
  42. package/src/lib/patterns.ts +200 -0
  43. package/src/lib/session.ts +825 -0
  44. package/src/lib/think/challenge.ts +323 -0
  45. package/src/lib/think/complexity.ts +504 -0
  46. package/src/lib/think/confidence-drift.ts +507 -0
  47. package/src/lib/think/consistency.ts +347 -0
  48. package/src/lib/think/guidance.ts +188 -0
  49. package/src/lib/think/helpers.ts +568 -0
  50. package/src/lib/think/hypothesis.ts +216 -0
  51. package/src/lib/think/index.ts +127 -0
  52. package/src/lib/think/prompts.ts +262 -0
  53. package/src/lib/think/route.ts +358 -0
  54. package/src/lib/think/schema.ts +98 -0
  55. package/src/lib/think/scratchpad-schema.ts +662 -0
  56. package/src/lib/think/spot-check.ts +961 -0
  57. package/src/lib/think/types.ts +93 -0
  58. package/src/lib/think/verification.ts +260 -0
  59. package/src/lib/tokens.ts +177 -0
  60. package/src/lib/verification.ts +620 -0
  61. package/src/prompts/index.ts +10 -0
  62. package/src/prompts/templates.ts +336 -0
  63. package/src/resources/index.ts +8 -0
  64. package/src/resources/sessions.ts +196 -0
  65. package/src/tools/compress.ts +138 -0
  66. package/src/tools/index.ts +5 -0
  67. package/src/tools/scratchpad.ts +2659 -0
  68. package/src/tools/sessions.ts +144 -0
@@ -0,0 +1,568 @@
1
+ /**
2
+ * Think Tool Helpers - Extracted functions to reduce cognitive complexity
3
+ * These helpers handle specific phases of the think tool's execute function.
4
+ */
5
+
6
+ import { compress, needsCompression } from "../compression.ts";
7
+ import {
8
+ type ContextAwareResult,
9
+ contextAwareCompute,
10
+ isLikelyComputable,
11
+ tryLocalCompute,
12
+ } from "../compute/index.ts";
13
+ import type { ComputeResult } from "../compute/types.ts";
14
+ import { SessionManager, type ThoughtRecord } from "../session.ts";
15
+ import { type VerificationDomain, type VerificationResult, verify } from "../verification.ts";
16
+ import type { ThoughtAnalysis } from "./guidance.ts";
17
+ import {
18
+ analyzeThought,
19
+ assessPromptComplexity,
20
+ detectDomain,
21
+ isTrivialQuestion,
22
+ } from "./index.ts";
23
+ import type { ThinkArgs } from "./schema.ts";
24
+
25
+ // ============================================================================
26
+ // TYPES
27
+ // ============================================================================
28
+
29
+ export type CompressionLevel = "none" | "auto" | "aggressive";
30
+ export type StreamFn = (content: { type: "text"; text: string }) => Promise<void>;
31
+
32
+ export interface CompressionStats {
33
+ inputCompressed: boolean;
34
+ outputCompressed: boolean;
35
+ contextCompressed: boolean;
36
+ inputBytesSaved: number;
37
+ outputBytesSaved: number;
38
+ contextBytesSaved: number;
39
+ }
40
+
41
+ export interface ExecuteContext {
42
+ sessionId: string;
43
+ branch: string;
44
+ step: number;
45
+ stepId: string;
46
+ domain: VerificationDomain;
47
+ compressionLevel: CompressionLevel;
48
+ priorThoughts: ThoughtRecord[];
49
+ }
50
+
51
+ export interface ComplexityInfo {
52
+ tier: string;
53
+ score: number;
54
+ trivial: boolean;
55
+ domain: string | null;
56
+ intensity_signals: string[];
57
+ }
58
+
59
+ // ============================================================================
60
+ // BASELINE MODE
61
+ // ============================================================================
62
+
63
+ /** Build response for baseline mode (pure pass-through) */
64
+ export function buildBaselineResponse(
65
+ args: ThinkArgs,
66
+ stepId: string,
67
+ sessionId: string,
68
+ ): Record<string, unknown> {
69
+ const step = args.step_number;
70
+ const status = args.is_final_step ? "complete" : "continue";
71
+ const response: Record<string, unknown> = {
72
+ step_id: stepId,
73
+ session_id: sessionId,
74
+ status,
75
+ step: `${step}/${args.estimated_total}`,
76
+ purpose: args.purpose,
77
+ next_action: args.next_action,
78
+ baseline: true,
79
+ };
80
+ if (status === "continue") {
81
+ response.next_step = step + 1;
82
+ }
83
+ return response;
84
+ }
85
+
86
+ // ============================================================================
87
+ // COMPRESSION HELPERS
88
+ // ============================================================================
89
+
90
+ /** Compress input thought if needed */
91
+ export function compressInput(
92
+ text: string,
93
+ context: string | undefined,
94
+ level: CompressionLevel,
95
+ ): { thought: string; compressed: boolean; bytesSaved: number } {
96
+ if (level === "none") {
97
+ return { thought: text, compressed: false, bytesSaved: 0 };
98
+ }
99
+
100
+ const shouldCompress =
101
+ level === "aggressive"
102
+ ? text.length > 200
103
+ : needsCompression(text, context ?? "").shouldCompress;
104
+
105
+ if (!shouldCompress) {
106
+ return { thought: text, compressed: false, bytesSaved: 0 };
107
+ }
108
+
109
+ const targetRatio = level === "aggressive" ? 0.5 : 0.6;
110
+ const result = compress(text, context ?? "", { target_ratio: targetRatio });
111
+
112
+ if (result.ratio < 0.8) {
113
+ return {
114
+ thought: result.compressed,
115
+ compressed: true,
116
+ bytesSaved: text.length - result.compressed.length,
117
+ };
118
+ }
119
+
120
+ return { thought: text, compressed: false, bytesSaved: 0 };
121
+ }
122
+
123
+ /** Compress context from long reasoning chains */
124
+ export function compressChainContext(
125
+ priorThoughts: ThoughtRecord[],
126
+ currentThought: string,
127
+ level: CompressionLevel,
128
+ ): { compressed: string | undefined; bytesSaved: number } {
129
+ if (level === "none" || priorThoughts.length < 5) {
130
+ return { compressed: undefined, bytesSaved: 0 };
131
+ }
132
+
133
+ const fullContext = priorThoughts.map((t) => t.thought).join(" ");
134
+ const shouldCompress =
135
+ level === "aggressive"
136
+ ? fullContext.length > 500
137
+ : needsCompression(fullContext, currentThought).shouldCompress;
138
+
139
+ if (!shouldCompress) {
140
+ return { compressed: undefined, bytesSaved: 0 };
141
+ }
142
+
143
+ const targetRatio = level === "aggressive" ? 0.3 : 0.4;
144
+ const result = compress(fullContext, currentThought, { target_ratio: targetRatio });
145
+
146
+ return {
147
+ compressed: result.compressed,
148
+ bytesSaved: fullContext.length - result.compressed.length,
149
+ };
150
+ }
151
+
152
+ /** Compress output thought for storage */
153
+ export function compressOutput(
154
+ thought: string,
155
+ context: string | undefined,
156
+ level: CompressionLevel,
157
+ ): { stored: string; compressed: boolean; bytesSaved: number } {
158
+ if (level === "none" || thought.length <= 500) {
159
+ return { stored: thought, compressed: false, bytesSaved: 0 };
160
+ }
161
+
162
+ const shouldCompress =
163
+ level === "aggressive" || needsCompression(thought, context ?? "").shouldCompress;
164
+ if (!shouldCompress) {
165
+ return { stored: thought, compressed: false, bytesSaved: 0 };
166
+ }
167
+
168
+ const targetRatio = level === "aggressive" ? 0.6 : 0.7;
169
+ const result = compress(thought, context ?? "", { target_ratio: targetRatio });
170
+
171
+ if (result.ratio < 0.85) {
172
+ return {
173
+ stored: result.compressed,
174
+ compressed: true,
175
+ bytesSaved: thought.length - result.compressed.length,
176
+ };
177
+ }
178
+
179
+ return { stored: thought, compressed: false, bytesSaved: 0 };
180
+ }
181
+
182
+ // ============================================================================
183
+ // COMPLEXITY & LOCAL COMPUTE
184
+ // ============================================================================
185
+
186
+ /** Assess complexity on step 1 for metadata */
187
+ export function assessComplexity(thought: string, step: number): ComplexityInfo | null {
188
+ if (step !== 1) return null;
189
+
190
+ const complexity = assessPromptComplexity(thought);
191
+ const trivial = isTrivialQuestion(thought);
192
+ return {
193
+ tier: complexity.tier,
194
+ score: complexity.score,
195
+ trivial,
196
+ domain: complexity.explanation.domain_detected,
197
+ intensity_signals: complexity.explanation.intensity_signals,
198
+ };
199
+ }
200
+
201
+ /** Try local compute for math/logic problems */
202
+ export async function tryCompute(
203
+ args: ThinkArgs,
204
+ thought: string,
205
+ streamFn: StreamFn,
206
+ ): Promise<ComputeResult | null> {
207
+ if (!args.local_compute || args.step_number !== 1 || !isLikelyComputable(thought)) {
208
+ return null;
209
+ }
210
+
211
+ const computed = tryLocalCompute(thought);
212
+ if (!computed.solved) return null;
213
+
214
+ await streamFn({
215
+ type: "text",
216
+ text:
217
+ `⚡ **Local Compute** (${computed.method}, ${computed.time_ms?.toFixed(2)}ms)\n` +
218
+ `**Result:** ${computed.result}\n\n`,
219
+ });
220
+
221
+ return computed;
222
+ }
223
+
224
+ /** Augmentation result with metadata */
225
+ export interface AugmentResult {
226
+ /** Augmented thought with injected values */
227
+ augmented: string;
228
+ /** Number of computations injected */
229
+ count: number;
230
+ /** Number filtered out by domain */
231
+ filtered: number;
232
+ /** Detected domain */
233
+ domain: string;
234
+ /** Time taken in ms */
235
+ time_ms: number;
236
+ }
237
+
238
+ /**
239
+ * Augment thought with locally computed values.
240
+ * Extracts all computable expressions and injects results.
241
+ * Domain-aware: filters irrelevant computations based on system_prompt.
242
+ *
243
+ * @returns Augmented thought and metadata, or null if disabled/no computations
244
+ */
245
+ export function tryAugment(args: ThinkArgs, thought: string): AugmentResult | null {
246
+ if (!args.augment_compute) {
247
+ return null;
248
+ }
249
+
250
+ const result: ContextAwareResult = contextAwareCompute({
251
+ thought,
252
+ systemPrompt: args.system_prompt,
253
+ });
254
+
255
+ // No computations found
256
+ if (!result.hasComputations && result.filteredCount === 0) {
257
+ return null;
258
+ }
259
+
260
+ return {
261
+ augmented: result.augmented,
262
+ count: result.computations.length,
263
+ filtered: result.filteredCount,
264
+ domain: result.domain,
265
+ time_ms: result.time_ms,
266
+ };
267
+ }
268
+
269
+ // ============================================================================
270
+ // VALIDATION HELPERS
271
+ // ============================================================================
272
+
273
+ /** Validate revision step - returns error message or null */
274
+ export function validateRevision(
275
+ revisesStep: number | undefined,
276
+ currentStep: number,
277
+ ): string | null {
278
+ if (revisesStep === undefined) return null;
279
+ if (revisesStep >= currentStep) {
280
+ return `Cannot revise step ${revisesStep} from step ${currentStep}`;
281
+ }
282
+ return null;
283
+ }
284
+
285
+ /** Validate branch_from step - returns error message or null */
286
+ export function validateBranch(branchFrom: number | undefined, currentStep: number): string | null {
287
+ if (branchFrom === undefined) return null;
288
+ if (branchFrom >= currentStep) {
289
+ return `Cannot branch from future step ${branchFrom}`;
290
+ }
291
+ return null;
292
+ }
293
+
294
+ /** Find missing dependencies - returns array of missing step numbers */
295
+ export function findMissingDeps(
296
+ dependencies: number[] | undefined,
297
+ priorThoughts: ThoughtRecord[],
298
+ ): number[] {
299
+ if (!dependencies?.length) return [];
300
+ const existingSteps = new Set(priorThoughts.map((t) => t.step_number));
301
+ return dependencies.filter((d) => !existingSteps.has(d));
302
+ }
303
+
304
+ // ============================================================================
305
+ // GUIDANCE & VERIFICATION
306
+ // ============================================================================
307
+
308
+ /** Run guidance analysis and stream results if needed */
309
+ export async function runGuidance(
310
+ args: ThinkArgs,
311
+ thought: string,
312
+ ctx: ExecuteContext,
313
+ streamFn: StreamFn,
314
+ ): Promise<ThoughtAnalysis | null> {
315
+ if (args.guidance === false) return null;
316
+
317
+ const analysis = analyzeThought(thought, ctx.step, ctx.priorThoughts, ctx.domain);
318
+
319
+ // Only stream if there's something to show
320
+ if (analysis.guidance.length === 0 && !analysis.checkpoint_recommended) {
321
+ return analysis;
322
+ }
323
+
324
+ await streamFn({ type: "text", text: "\n---\n" });
325
+
326
+ if (analysis.risk_level !== "low") {
327
+ await streamFn({ type: "text", text: `**Risk: ${analysis.risk_level.toUpperCase()}**\n` });
328
+ }
329
+
330
+ if (analysis.checkpoint_recommended) {
331
+ await streamFn({ type: "text", text: "**⚠️ CHECKPOINT RECOMMENDED**\n" });
332
+ }
333
+
334
+ for (const g of analysis.guidance) {
335
+ await streamFn({ type: "text", text: `> ${g}\n` });
336
+ }
337
+
338
+ if (analysis.suggested_next) {
339
+ await streamFn({ type: "text", text: `\n**Suggested:** ${analysis.suggested_next}\n` });
340
+ }
341
+
342
+ return analysis;
343
+ }
344
+
345
+ /** Run verification and stream results */
346
+ export async function runVerify(
347
+ args: ThinkArgs,
348
+ thought: string,
349
+ ctx: ExecuteContext,
350
+ streamFn: StreamFn,
351
+ ): Promise<VerificationResult | null> {
352
+ if (!args.verify) return null;
353
+
354
+ const contextStrings = ctx.priorThoughts.map((t) => t.thought);
355
+ const result = verify(thought, ctx.domain, contextStrings, true);
356
+
357
+ const icon = result.passed ? "✓ PASS" : "✗ FAIL";
358
+ await streamFn({
359
+ type: "text",
360
+ text: `\n**Verification: ${icon}** (${Math.round(result.confidence * 100)}%)\n`,
361
+ });
362
+
363
+ return result;
364
+ }
365
+
366
+ // ============================================================================
367
+ // RECORD & RESPONSE BUILDERS
368
+ // ============================================================================
369
+
370
+ /** Build the thought record for session storage */
371
+ export function buildRecord(
372
+ args: ThinkArgs,
373
+ ctx: ExecuteContext,
374
+ storedThought: string,
375
+ verificationResult: VerificationResult | null,
376
+ compressedContext: string | undefined,
377
+ stats: CompressionStats,
378
+ ): ThoughtRecord {
379
+ const hasCompression = stats.inputCompressed || stats.outputCompressed || stats.contextCompressed;
380
+
381
+ return {
382
+ id: ctx.stepId,
383
+ step_number: ctx.step,
384
+ thought: storedThought,
385
+ timestamp: Date.now(),
386
+ branch_id: ctx.branch,
387
+ verification: verificationResult
388
+ ? {
389
+ passed: verificationResult.passed,
390
+ confidence: verificationResult.confidence,
391
+ domain: ctx.domain,
392
+ }
393
+ : undefined,
394
+ compressed_context: compressedContext,
395
+ compression: hasCompression
396
+ ? {
397
+ input_bytes_saved: stats.inputBytesSaved,
398
+ output_bytes_saved: stats.outputBytesSaved,
399
+ context_bytes_saved: stats.contextBytesSaved,
400
+ }
401
+ : undefined,
402
+ revises_step: args.revises_step,
403
+ revision_reason: args.revision_reason,
404
+ branch_from: args.branch_from,
405
+ branch_name: args.branch_name,
406
+ dependencies: args.dependencies,
407
+ tools_used: args.tools_used,
408
+ external_context: args.external_context,
409
+ };
410
+ }
411
+
412
+ /** Build the final JSON response */
413
+ export function buildResponse(
414
+ args: ThinkArgs,
415
+ ctx: ExecuteContext,
416
+ analysis: ThoughtAnalysis | null,
417
+ verificationResult: VerificationResult | null,
418
+ localComputeResult: ComputeResult | null,
419
+ complexityInfo: ComplexityInfo | null,
420
+ stats: CompressionStats,
421
+ augmentResult: AugmentResult | null = null,
422
+ ): Record<string, unknown> {
423
+ const status = args.is_final_step ? "complete" : "continue";
424
+
425
+ const response: Record<string, unknown> = {
426
+ step_id: ctx.stepId,
427
+ session_id: ctx.sessionId,
428
+ status,
429
+ step: `${ctx.step}/${args.estimated_total}`,
430
+ purpose: args.purpose,
431
+ next_action: args.next_action,
432
+ };
433
+
434
+ if (status === "continue") {
435
+ response.next_step = ctx.step + 1;
436
+ }
437
+
438
+ if (args.confidence !== undefined) {
439
+ response.confidence = args.confidence;
440
+ }
441
+
442
+ // Analysis metadata
443
+ if (analysis) {
444
+ response.risk_level = analysis.risk_level;
445
+ if (analysis.patterns_detected.length > 0) {
446
+ response.patterns = analysis.patterns_detected;
447
+ }
448
+ if (analysis.checkpoint_recommended) {
449
+ response.checkpoint = true;
450
+ }
451
+ }
452
+
453
+ // Verification metadata
454
+ if (verificationResult) {
455
+ response.verified = verificationResult.passed;
456
+ response.verification_confidence = verificationResult.confidence;
457
+ }
458
+
459
+ // Local compute metadata
460
+ if (localComputeResult) {
461
+ response.local_compute = {
462
+ solved: true,
463
+ result: localComputeResult.result,
464
+ method: localComputeResult.method,
465
+ time_ms: localComputeResult.time_ms,
466
+ };
467
+ }
468
+
469
+ // Complexity metadata (step 1 only)
470
+ if (complexityInfo) {
471
+ response.complexity = complexityInfo;
472
+ }
473
+
474
+ // Revision metadata
475
+ if (args.revises_step) {
476
+ response.revised_step = args.revises_step;
477
+ }
478
+
479
+ // Branch metadata
480
+ if (args.branch_from) {
481
+ response.branch = {
482
+ id: ctx.branch,
483
+ name: args.branch_name,
484
+ from: args.branch_from,
485
+ };
486
+ }
487
+
488
+ // Tools used
489
+ if (args.tools_used?.length) {
490
+ response.tools_used = args.tools_used;
491
+ }
492
+
493
+ // Compression stats
494
+ const hasCompression = stats.inputCompressed || stats.outputCompressed || stats.contextCompressed;
495
+ if (hasCompression) {
496
+ response.compression = {
497
+ level: ctx.compressionLevel,
498
+ input: stats.inputCompressed,
499
+ output: stats.outputCompressed,
500
+ context: stats.contextCompressed,
501
+ bytes_saved: stats.inputBytesSaved + stats.outputBytesSaved + stats.contextBytesSaved,
502
+ };
503
+ }
504
+
505
+ // Augmentation metadata
506
+ if (augmentResult) {
507
+ response.augmented = {
508
+ count: augmentResult.count,
509
+ filtered: augmentResult.filtered,
510
+ domain: augmentResult.domain,
511
+ time_ms: augmentResult.time_ms,
512
+ };
513
+ }
514
+
515
+ return response;
516
+ }
517
+
518
+ // ============================================================================
519
+ // UTILITY
520
+ // ============================================================================
521
+
522
+ /** Create error response object */
523
+ export function errorResponse(message: string) {
524
+ return {
525
+ content: [{ type: "text" as const, text: JSON.stringify({ error: message }) }],
526
+ };
527
+ }
528
+
529
+ /** Create success response with JSON content */
530
+ export function jsonResponse(data: Record<string, unknown>) {
531
+ return {
532
+ content: [
533
+ { type: "text" as const, text: `\n\`\`\`json\n${JSON.stringify(data, null, 2)}\n\`\`\`` },
534
+ ],
535
+ };
536
+ }
537
+
538
+ /** Initialize execute context from args */
539
+ export function initContext(args: ThinkArgs, thought: string): ExecuteContext {
540
+ const sessionId = args.session_id || `s_${crypto.randomUUID()}`;
541
+ const branch = args.branch_id || "main";
542
+ const step = args.step_number;
543
+ const compressionLevel = (args.compression_level || "auto") as CompressionLevel;
544
+ const priorThoughts = SessionManager.getThoughts(sessionId, branch);
545
+ const domain = (args.domain || detectDomain(thought)) as VerificationDomain;
546
+
547
+ return {
548
+ sessionId,
549
+ branch,
550
+ step,
551
+ stepId: `${sessionId}:${branch}:${step}`,
552
+ domain,
553
+ compressionLevel,
554
+ priorThoughts,
555
+ };
556
+ }
557
+
558
+ /** Store thought record and return success/error */
559
+ export function storeThought(
560
+ sessionId: string,
561
+ record: ThoughtRecord,
562
+ ): { success: true } | { success: false; error: string } {
563
+ const result = SessionManager.addThought(sessionId, record);
564
+ if (!result.success) {
565
+ return { success: false, error: result.error || "Failed to store thought" };
566
+ }
567
+ return { success: true };
568
+ }