verifiable-thinking-mcp 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +339 -0
  3. package/package.json +75 -0
  4. package/src/index.ts +38 -0
  5. package/src/lib/cache.ts +246 -0
  6. package/src/lib/compression.ts +804 -0
  7. package/src/lib/compute/cache.ts +86 -0
  8. package/src/lib/compute/classifier.ts +555 -0
  9. package/src/lib/compute/confidence.ts +79 -0
  10. package/src/lib/compute/context.ts +154 -0
  11. package/src/lib/compute/extract.ts +200 -0
  12. package/src/lib/compute/filter.ts +224 -0
  13. package/src/lib/compute/index.ts +171 -0
  14. package/src/lib/compute/math.ts +247 -0
  15. package/src/lib/compute/patterns.ts +564 -0
  16. package/src/lib/compute/registry.ts +145 -0
  17. package/src/lib/compute/solvers/arithmetic.ts +65 -0
  18. package/src/lib/compute/solvers/calculus.ts +249 -0
  19. package/src/lib/compute/solvers/derivation-core.ts +371 -0
  20. package/src/lib/compute/solvers/derivation-latex.ts +160 -0
  21. package/src/lib/compute/solvers/derivation-mistakes.ts +1046 -0
  22. package/src/lib/compute/solvers/derivation-simplify.ts +451 -0
  23. package/src/lib/compute/solvers/derivation-transform.ts +620 -0
  24. package/src/lib/compute/solvers/derivation.ts +67 -0
  25. package/src/lib/compute/solvers/facts.ts +120 -0
  26. package/src/lib/compute/solvers/formula.ts +728 -0
  27. package/src/lib/compute/solvers/index.ts +36 -0
  28. package/src/lib/compute/solvers/logic.ts +422 -0
  29. package/src/lib/compute/solvers/probability.ts +307 -0
  30. package/src/lib/compute/solvers/statistics.ts +262 -0
  31. package/src/lib/compute/solvers/word-problems.ts +408 -0
  32. package/src/lib/compute/types.ts +107 -0
  33. package/src/lib/concepts.ts +111 -0
  34. package/src/lib/domain.ts +731 -0
  35. package/src/lib/extraction.ts +912 -0
  36. package/src/lib/index.ts +122 -0
  37. package/src/lib/judge.ts +260 -0
  38. package/src/lib/math/ast.ts +842 -0
  39. package/src/lib/math/index.ts +8 -0
  40. package/src/lib/math/operators.ts +171 -0
  41. package/src/lib/math/tokenizer.ts +477 -0
  42. package/src/lib/patterns.ts +200 -0
  43. package/src/lib/session.ts +825 -0
  44. package/src/lib/think/challenge.ts +323 -0
  45. package/src/lib/think/complexity.ts +504 -0
  46. package/src/lib/think/confidence-drift.ts +507 -0
  47. package/src/lib/think/consistency.ts +347 -0
  48. package/src/lib/think/guidance.ts +188 -0
  49. package/src/lib/think/helpers.ts +568 -0
  50. package/src/lib/think/hypothesis.ts +216 -0
  51. package/src/lib/think/index.ts +127 -0
  52. package/src/lib/think/prompts.ts +262 -0
  53. package/src/lib/think/route.ts +358 -0
  54. package/src/lib/think/schema.ts +98 -0
  55. package/src/lib/think/scratchpad-schema.ts +662 -0
  56. package/src/lib/think/spot-check.ts +961 -0
  57. package/src/lib/think/types.ts +93 -0
  58. package/src/lib/think/verification.ts +260 -0
  59. package/src/lib/tokens.ts +177 -0
  60. package/src/lib/verification.ts +620 -0
  61. package/src/prompts/index.ts +10 -0
  62. package/src/prompts/templates.ts +336 -0
  63. package/src/resources/index.ts +8 -0
  64. package/src/resources/sessions.ts +196 -0
  65. package/src/tools/compress.ts +138 -0
  66. package/src/tools/index.ts +5 -0
  67. package/src/tools/scratchpad.ts +2659 -0
  68. package/src/tools/sessions.ts +144 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 CoderDayton
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,339 @@
1
+ <div align="center">
2
+
3
+ <img src="assets/header.svg" alt="Verifiable Thinking MCP" width="800" />
4
+
5
+ **LLMs fail predictably on cognitive traps. This catches them.**
6
+
7
+ [![npm version](https://img.shields.io/npm/v/verifiable-thinking-mcp?color=blue&label=npm)](https://www.npmjs.com/package/verifiable-thinking-mcp)
8
+ [![CI](https://img.shields.io/github/actions/workflow/status/CoderDayton/verifiable-thinking-mcp/ci.yml?label=CI)](https://github.com/CoderDayton/verifiable-thinking-mcp/actions/workflows/ci.yml)
9
+ [![codecov](https://codecov.io/gh/CoderDayton/verifiable-thinking-mcp/branch/main/graph/badge.svg)](https://codecov.io/gh/CoderDayton/verifiable-thinking-mcp)
10
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
11
+
12
+ *Bat-and-ball, lily pad doubling, Monty Hall—15 trap patterns detected in <1ms, no LLM calls.*
13
+
14
+ [Quick Start](#quick-start) • [Features](#features) • [Trap Detection](#trap-detection) • [API](#tools)
15
+
16
+ </div>
17
+
18
+ ---
19
+
20
+ An MCP server for structured reasoning with cognitive trap detection, verification gates, and context compression.
21
+
22
+ ```
23
+ ┌────────────────────────────────────────────────────────────────┐
24
+ │ "A bat and ball cost $1.10. The bat costs $1 more..." │
25
+ │ ↓ │
26
+ │ TRAP DETECTED: additive_system │
27
+ │ > Don't subtract $1 from $1.10. Set up: x + (x+1) = 1.10 │
28
+ │ ↓ │
29
+ │ Answer: $0.05 (not $0.10) │
30
+ └────────────────────────────────────────────────────────────────┘
31
+ ```
32
+
33
+ ## Why This Exists
34
+
35
+ | The Problem | Our Solution |
36
+ |-------------|--------------|
37
+ | LLMs answer "$0.10" to bat-ball ~40% of the time | Trap priming catches it before reasoning starts |
38
+ | Verification requires another LLM call | O(n) heuristics, zero LLM overhead |
39
+ | Reasoning chains drift without structure | Scratchpad with confidence tracking & auto-verification |
40
+
41
+ ## Quick Stats
42
+
43
+ | Metric | Value |
44
+ |--------|-------|
45
+ | 🎯 Cognitive trap patterns | 15 structural detectors |
46
+ | ⚡ Detection latency | <1ms (O(n) single-pass) |
47
+ | 🧪 Test coverage | 1831+ tests, 100% line coverage |
48
+ | 📦 Dependencies | 3 runtime (fastmcp, zod, dotenv) |
49
+
50
+ ## Features
51
+
52
+ | Feature | What It Does |
53
+ |---------|--------------|
54
+ | 🎯 **Trap Detection** | 15 cognitive trap patterns (bat-ball, Monty Hall, base rate...) via O(n) heuristics |
55
+ | 📝 **Scratchpad** | Structured reasoning with step tracking, confidence, and verification gates |
56
+ | 🔢 **Local Compute** | Math expression evaluation without LLM round-trips |
57
+ | 🗜️ **CPC Compression** | Query-aware context compression for long reasoning chains |
58
+
59
+ ## Quick Start
60
+
61
+ **Zero config install:**
62
+
63
+ ```bash
64
+ npx -y verifiable-thinking-mcp
65
+ ```
66
+
67
+ ### Claude Desktop
68
+
69
+ Add to `claude_desktop_config.json`:
70
+
71
+ ```json
72
+ {
73
+ "mcpServers": {
74
+ "verifiable-thinking": {
75
+ "command": "npx",
76
+ "args": ["-y", "verifiable-thinking-mcp"]
77
+ }
78
+ }
79
+ }
80
+ ```
81
+
82
+ Or with Bun:
83
+
84
+ ```json
85
+ {
86
+ "mcpServers": {
87
+ "verifiable-thinking": {
88
+ "command": "bunx",
89
+ "args": ["verifiable-thinking-mcp"]
90
+ }
91
+ }
92
+ }
93
+ ```
94
+
95
+ ### Basic Usage
96
+
97
+ ```typescript
98
+ // Step 1: Start reasoning with trap priming
99
+ scratchpad({
100
+ operation: "step",
101
+ question: "A bat and ball cost $1.10. The bat costs $1 more than the ball. How much does the ball cost?",
102
+ thought: "Let me set up equations. Let ball = x, bat = x + 1.00",
103
+ confidence: 0.9
104
+ })
105
+ // Returns trap_analysis warning about additive_system pattern
106
+
107
+ // Step 2: Continue reasoning
108
+ scratchpad({
109
+ operation: "step",
110
+ thought: "x + (x + 1.00) = 1.10, so 2x = 0.10, x = 0.05",
111
+ confidence: 0.95
112
+ })
113
+
114
+ // Step 3: Complete with spot-check
115
+ scratchpad({
116
+ operation: "complete",
117
+ final_answer: "$0.05"
118
+ })
119
+ // Auto spot-checks against stored question
120
+ ```
121
+
122
+ ## Tools
123
+
124
+ ### `scratchpad` (primary)
125
+
126
+ Unified reasoning tool with operation-based dispatch.
127
+
128
+ **Operations:**
129
+
130
+ | Operation | Purpose | Required Params |
131
+ |-----------|---------|-----------------|
132
+ | `step` | Add reasoning step | `thought` |
133
+ | `complete` | Finalize chain | — |
134
+ | `revise` | Fix earlier step | `thought`, `target_step` |
135
+ | `branch` | Alternative path | `thought` |
136
+ | `navigate` | View history | `view` (history\|branches\|step\|path) |
137
+ | `spot_check` | Manual trap check | `question`, `answer` |
138
+ | `hint` | Progressive simplification | `expression` |
139
+ | `mistakes` | Algebraic error detection | `text` |
140
+ | `augment` | Compute math expressions | `text` |
141
+ | `override` | Force-commit failed step | `failed_step`, `reason` |
142
+
143
+ **Key Parameters:**
144
+
145
+ | Parameter | Type | Description |
146
+ |-----------|------|-------------|
147
+ | `question` | string | Pass on first step for trap priming |
148
+ | `thought` | string | Current reasoning step |
149
+ | `confidence` | 0-1 | Step confidence (accumulates to chain average) |
150
+ | `verify` | boolean | Enable domain verification (auto-enabled after step 3) |
151
+ | `domain` | enum | math, logic, code, general |
152
+ | `warn_at_tokens` | number | Soft limit: warn when session tokens exceed threshold |
153
+ | `hard_limit_tokens` | number | Hard limit: block operations when exceeded |
154
+
155
+ **Token Tracking:**
156
+
157
+ Every response includes token usage metadata:
158
+
159
+ ```json
160
+ {
161
+ "tokens": { "input_tokens": 42, "output_tokens": 156, "total_tokens": 198 },
162
+ "session_tokens": { "total_input": 84, "total_output": 312, "total": 396, "operations": 2 }
163
+ }
164
+ ```
165
+
166
+ **Cost Control:**
167
+
168
+ Use `warn_at_tokens` for soft warnings, or `hard_limit_tokens` to block operations:
169
+
170
+ ```typescript
171
+ // Soft limit: warns but allows operation
172
+ scratchpad({
173
+ operation: "step",
174
+ thought: "...",
175
+ warn_at_tokens: 2000 // Adds token_warning to response
176
+ })
177
+
178
+ // Hard limit: blocks operation entirely
179
+ scratchpad({
180
+ operation: "step",
181
+ thought: "...",
182
+ hard_limit_tokens: 5000 // Returns status="budget_exhausted" if exceeded
183
+ })
184
+ // Response includes budget_exhausted with recommendation to complete or start new session
185
+ ```
186
+
187
+ **Workflow:**
188
+
189
+ 1. `step(question="...", thought="...")` → trap_analysis if patterns detected
190
+ 2. Continue with `step(thought="...")` → auto-verify kicks in after step 3
191
+ 3. If verification fails → `revise` or `branch`
192
+ 4. `complete(final_answer="...")` → auto spot-check against stored question
193
+ 5. If status="review" → follow `reconsideration.suggested_revise`
194
+
195
+ ### `list_sessions`
196
+
197
+ List all active reasoning sessions.
198
+
199
+ ### `get_session`
200
+
201
+ Retrieve session in `full`, `summary`, or `compressed` format.
202
+
203
+ ### `clear_session`
204
+
205
+ Clear specific session or all sessions.
206
+
207
+ ### `compress`
208
+
209
+ Standalone CPC-style context compression.
210
+
211
+ ```typescript
212
+ compress({
213
+ context: "Long text to compress...",
214
+ query: "relevance query",
215
+ target_ratio: 0.5,
216
+ boost_reasoning: true
217
+ })
218
+ ```
219
+
220
+ ## Trap Detection
221
+
222
+ Detects 15 structural patterns without LLM calls:
223
+
224
+ | Pattern | Trap | Example |
225
+ |---------|------|---------|
226
+ | `additive_system` | Subtract instead of solve | bat-ball, widget-gadget |
227
+ | `nonlinear_growth` | Linear interpolation | lily pad doubling |
228
+ | `rate_pattern` | Incorrect scaling | 5 machines/5 minutes |
229
+ | `harmonic_mean` | Arithmetic mean for rates | average speed round-trip |
230
+ | `independence` | Gambler's fallacy | coin flip sequences |
231
+ | `pigeonhole` | Underestimate worst case | minimum to guarantee |
232
+ | `base_rate` | Ignore prevalence | medical test accuracy |
233
+ | `factorial_counting` | Simple division | trailing zeros in n! |
234
+ | `clock_overlap` | Assume 12 overlaps | hour/minute hand |
235
+ | `conditional_probability` | Ignore conditioning | given/if probability |
236
+ | `conjunction_fallacy` | More detail = more likely | Linda problem |
237
+ | `monty_hall` | 50/50 after reveal | door switching |
238
+ | `anchoring` | Influenced by irrelevant number | estimation after priming |
239
+ | `sunk_cost` | Consider past investment | should continue? |
240
+ | `framing_effect` | Gain/loss framing bias | save vs die |
241
+
242
+ ## Architecture
243
+
244
+ ```
245
+ src/
246
+ ├── index.ts # FastMCP server entry
247
+ ├── tools/
248
+ │ ├── scratchpad.ts # Main reasoning tool (1800 LOC)
249
+ │ ├── sessions.ts # Session management
250
+ │ └── compress.ts # Compression tool
251
+ └── lib/
252
+ ├── think/
253
+ │ ├── spot-check.ts # Trap detection (O(n))
254
+ │ ├── guidance.ts # Domain detection
255
+ │ └── scratchpad-schema.ts
256
+ ├── compression.ts # CPC-style compression
257
+ ├── compute/ # Local math evaluation
258
+ ├── verification.ts # Domain verifiers
259
+ ├── session.ts # Session manager with TTL
260
+ └── extraction.ts # Answer extraction
261
+ ```
262
+
263
+ ## Development
264
+
265
+ ```bash
266
+ # Clone and install
267
+ git clone https://github.com/CoderDayton/verifiable-thinking-mcp.git
268
+ cd verifiable-thinking
269
+ bun install
270
+
271
+ # Interactive dev mode with MCP Inspector
272
+ bun run dev
273
+
274
+ # Inspect server capabilities
275
+ bun run inspect
276
+
277
+ # Run tests
278
+ bun test
279
+
280
+ # Type check
281
+ bun run typecheck
282
+
283
+ # Lint and format
284
+ bun run check
285
+ ```
286
+
287
+ ## Benchmarks
288
+
289
+ See `examples/benchmarks/`:
290
+
291
+ | Benchmark | Purpose |
292
+ |-----------|---------|
293
+ | `priming-latency.ts` | Validates O(n) trap detection (<1ms) |
294
+ | `priming-bench.ts` | LLM accuracy with/without priming |
295
+ | `math-bench.ts` | Local compute accuracy |
296
+ | `compression-bench.ts` | Compression ratio and retention |
297
+
298
+ Run benchmarks:
299
+
300
+ ```bash
301
+ cd examples/benchmarks
302
+ bun run priming-latency.ts
303
+ bun run priming-bench.ts --full
304
+ ```
305
+
306
+ ## vs Sequential Thinking MCP
307
+
308
+ How does this compare to `@modelcontextprotocol/server-sequential-thinking`?
309
+
310
+ | Feature | Sequential Thinking | Verifiable Thinking |
311
+ |---------|---------------------|---------------------|
312
+ | Thought tracking | ✅ | ✅ |
313
+ | Branching | ✅ Basic | ✅ + hypothesis + success criteria |
314
+ | **Trap detection** | ❌ | ✅ 15 patterns |
315
+ | **Verification** | ❌ | ✅ 4 domains |
316
+ | **Consistency checking** | ❌ | ✅ Contradiction detection |
317
+ | **Confidence tracking** | ❌ | ✅ Per-step + chain average |
318
+ | **Adversarial challenge** | ❌ | ✅ 4 challenge types |
319
+ | **Local compute** | ❌ | ✅ Math + hints + mistake detection |
320
+ | **Context compression** | ❌ | ✅ CPC-style |
321
+ | **Token tracking** | ❌ | ✅ Per-call + budget limits |
322
+
323
+ Sequential Thinking is minimal scaffolding (~150 lines). Verifiable Thinking is a complete verification system with 18 additional features.
324
+
325
+ See [`docs/competitive-analysis.md`](docs/competitive-analysis.md) for full comparison.
326
+
327
+ ## License
328
+
329
+ MIT
330
+
331
+ ---
332
+
333
+ <div align="center">
334
+
335
+ **[Report Bug](https://github.com/CoderDayton/verifiable-thinking-mcp/issues) · [Request Feature](https://github.com/CoderDayton/verifiable-thinking-mcp/issues)**
336
+
337
+ Made with 🧠 for more reliable AI reasoning
338
+
339
+ </div>
package/package.json ADDED
@@ -0,0 +1,75 @@
1
+ {
2
+ "name": "verifiable-thinking-mcp",
3
+ "version": "0.4.0",
4
+ "description": "MCP server for structured reasoning with cognitive trap detection",
5
+ "author": "CoderDayton",
6
+ "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "git+https://github.com/CoderDayton/verifiable-thinking-mcp.git"
10
+ },
11
+ "homepage": "https://github.com/CoderDayton/verifiable-thinking-mcp#readme",
12
+ "bugs": {
13
+ "url": "https://github.com/CoderDayton/verifiable-thinking-mcp/issues"
14
+ },
15
+ "keywords": [
16
+ "mcp",
17
+ "mcp-server",
18
+ "reasoning",
19
+ "cognitive-traps",
20
+ "verification",
21
+ "llm",
22
+ "ai",
23
+ "typescript",
24
+ "bun"
25
+ ],
26
+ "type": "module",
27
+ "main": "src/index.ts",
28
+ "bin": {
29
+ "verifiable-thinking-mcp": "src/index.ts"
30
+ },
31
+ "files": [
32
+ "src/**/*.ts",
33
+ "!src/**/*.test.ts"
34
+ ],
35
+ "engines": {
36
+ "node": ">=18",
37
+ "bun": ">=1.0"
38
+ },
39
+ "dependencies": {
40
+ "dotenv": "^17.2.3",
41
+ "fastmcp": "^3.26.7",
42
+ "zod": "^4.3.4"
43
+ },
44
+ "devDependencies": {
45
+ "@biomejs/biome": "^2.3.10",
46
+ "@types/bun": "latest",
47
+ "lefthook": "^2.0.13"
48
+ },
49
+ "peerDependencies": {
50
+ "typescript": "^5.9.3"
51
+ },
52
+ "scripts": {
53
+ "build": "bun build src/index.ts --outdir dist --target bun --minify --external sury --external effect --external @valibot/to-json-schema",
54
+ "dev": "fastmcp dev src/index.ts",
55
+ "inspect": "fastmcp inspect src/index.ts",
56
+ "start": "bun run src/index.ts",
57
+ "test": "bun test",
58
+ "test:coverage": "bun test --coverage",
59
+ "test:ci": "bun test --coverage || true; bun run coverage:check",
60
+ "coverage:check": "bun run scripts/check-coverage.ts",
61
+ "coverage:report": "bun test --coverage && bun run scripts/check-coverage.ts",
62
+ "typecheck": "tsc --noEmit",
63
+ "lint": "biome lint src test scripts",
64
+ "format": "biome format --write src test scripts",
65
+ "check": "biome check src test scripts",
66
+ "prepare": "lefthook install"
67
+ },
68
+ "coverage": {
69
+ "threshold": {
70
+ "lines": 85,
71
+ "functions": 85
72
+ },
73
+ "exclude": ["src/tools/*", "src/index.ts"]
74
+ }
75
+ }
package/src/index.ts ADDED
@@ -0,0 +1,38 @@
1
+ import { FastMCP } from "fastmcp";
2
+ import { allResources, allResourceTemplates } from "./resources/index.ts";
3
+ import {
4
+ clearSessionTool,
5
+ compressTool,
6
+ getSessionTool,
7
+ listSessionsTool,
8
+ scratchpadTool,
9
+ } from "./tools/index.ts";
10
+
11
+ const server = new FastMCP({
12
+ name: "Verifiable Thinking MCP",
13
+ version: "0.1.0",
14
+ });
15
+
16
+ // Register tools
17
+ server.addTool(scratchpadTool);
18
+ server.addTool(listSessionsTool);
19
+ server.addTool(getSessionTool);
20
+ server.addTool(clearSessionTool);
21
+ server.addTool(compressTool);
22
+
23
+ // NOTE: MCP prompts disabled - opencode v1.1.4 doesn't support prompt execution
24
+ // Re-enable when opencode implements prompts/get (see sst/opencode#5767)
25
+
26
+ // Register resources
27
+ for (const resource of allResources) {
28
+ server.addResource(resource);
29
+ }
30
+
31
+ // Register resource templates
32
+ for (const template of allResourceTemplates) {
33
+ // biome-ignore lint/suspicious/noExplicitAny: FastMCP template type mismatch
34
+ server.addResourceTemplate(template as any);
35
+ }
36
+
37
+ // Start server (stdio for local MCP agents like Claude Desktop)
38
+ server.start({ transportType: "stdio" });