verifiable-thinking-mcp 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +339 -0
  3. package/package.json +75 -0
  4. package/src/index.ts +38 -0
  5. package/src/lib/cache.ts +246 -0
  6. package/src/lib/compression.ts +804 -0
  7. package/src/lib/compute/cache.ts +86 -0
  8. package/src/lib/compute/classifier.ts +555 -0
  9. package/src/lib/compute/confidence.ts +79 -0
  10. package/src/lib/compute/context.ts +154 -0
  11. package/src/lib/compute/extract.ts +200 -0
  12. package/src/lib/compute/filter.ts +224 -0
  13. package/src/lib/compute/index.ts +171 -0
  14. package/src/lib/compute/math.ts +247 -0
  15. package/src/lib/compute/patterns.ts +564 -0
  16. package/src/lib/compute/registry.ts +145 -0
  17. package/src/lib/compute/solvers/arithmetic.ts +65 -0
  18. package/src/lib/compute/solvers/calculus.ts +249 -0
  19. package/src/lib/compute/solvers/derivation-core.ts +371 -0
  20. package/src/lib/compute/solvers/derivation-latex.ts +160 -0
  21. package/src/lib/compute/solvers/derivation-mistakes.ts +1046 -0
  22. package/src/lib/compute/solvers/derivation-simplify.ts +451 -0
  23. package/src/lib/compute/solvers/derivation-transform.ts +620 -0
  24. package/src/lib/compute/solvers/derivation.ts +67 -0
  25. package/src/lib/compute/solvers/facts.ts +120 -0
  26. package/src/lib/compute/solvers/formula.ts +728 -0
  27. package/src/lib/compute/solvers/index.ts +36 -0
  28. package/src/lib/compute/solvers/logic.ts +422 -0
  29. package/src/lib/compute/solvers/probability.ts +307 -0
  30. package/src/lib/compute/solvers/statistics.ts +262 -0
  31. package/src/lib/compute/solvers/word-problems.ts +408 -0
  32. package/src/lib/compute/types.ts +107 -0
  33. package/src/lib/concepts.ts +111 -0
  34. package/src/lib/domain.ts +731 -0
  35. package/src/lib/extraction.ts +912 -0
  36. package/src/lib/index.ts +122 -0
  37. package/src/lib/judge.ts +260 -0
  38. package/src/lib/math/ast.ts +842 -0
  39. package/src/lib/math/index.ts +8 -0
  40. package/src/lib/math/operators.ts +171 -0
  41. package/src/lib/math/tokenizer.ts +477 -0
  42. package/src/lib/patterns.ts +200 -0
  43. package/src/lib/session.ts +825 -0
  44. package/src/lib/think/challenge.ts +323 -0
  45. package/src/lib/think/complexity.ts +504 -0
  46. package/src/lib/think/confidence-drift.ts +507 -0
  47. package/src/lib/think/consistency.ts +347 -0
  48. package/src/lib/think/guidance.ts +188 -0
  49. package/src/lib/think/helpers.ts +568 -0
  50. package/src/lib/think/hypothesis.ts +216 -0
  51. package/src/lib/think/index.ts +127 -0
  52. package/src/lib/think/prompts.ts +262 -0
  53. package/src/lib/think/route.ts +358 -0
  54. package/src/lib/think/schema.ts +98 -0
  55. package/src/lib/think/scratchpad-schema.ts +662 -0
  56. package/src/lib/think/spot-check.ts +961 -0
  57. package/src/lib/think/types.ts +93 -0
  58. package/src/lib/think/verification.ts +260 -0
  59. package/src/lib/tokens.ts +177 -0
  60. package/src/lib/verification.ts +620 -0
  61. package/src/prompts/index.ts +10 -0
  62. package/src/prompts/templates.ts +336 -0
  63. package/src/resources/index.ts +8 -0
  64. package/src/resources/sessions.ts +196 -0
  65. package/src/tools/compress.ts +138 -0
  66. package/src/tools/index.ts +5 -0
  67. package/src/tools/scratchpad.ts +2659 -0
  68. package/src/tools/sessions.ts +144 -0
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Solvers index - re-exports all solver functions
3
+ */
4
+
5
+ export { tryArithmetic } from "./arithmetic.ts";
6
+ export { simpsonIntegrate, tryCalculus } from "./calculus.ts";
7
+ export {
8
+ type DerivationErrorExplanation,
9
+ type DerivationLatexOptions,
10
+ type DerivationResult,
11
+ type DetectedMistake,
12
+ derivationTextToLatex,
13
+ derivationToLatex,
14
+ detectCommonMistakes,
15
+ detectCommonMistakesFromText,
16
+ explainDerivationError,
17
+ type MistakeDetectionResult,
18
+ type MistakeType,
19
+ type NextStepSuggestion,
20
+ type SimplificationPath,
21
+ type SimplificationStep,
22
+ type SimplifiedStep,
23
+ type SimplifyDerivationResult,
24
+ simplifyDerivation,
25
+ simplifyDerivationText,
26
+ suggestNextStep,
27
+ suggestNextStepFromText,
28
+ suggestSimplificationPath,
29
+ tryDerivation,
30
+ verifyDerivationSteps,
31
+ } from "./derivation.ts";
32
+ export { tryMathFacts } from "./facts.ts";
33
+ export { canonicalizeExpression, tryFormula, trySimplifyToConstant } from "./formula.ts";
34
+ export { tryLogic } from "./logic.ts";
35
+ export { tryProbability } from "./probability.ts";
36
+ export { tryCRTProblem, tryMultiStepWordProblem, tryWordProblem } from "./word-problems.ts";
@@ -0,0 +1,422 @@
1
+ /**
2
+ * Logic Solver - Handles simple propositional logic patterns
3
+ *
4
+ * Supports:
5
+ * - Modus ponens: "If P then Q. P. Therefore Q?" → YES
6
+ * - Modus tollens: "If P then Q. Not Q. Therefore not P?" → YES (P is false)
7
+ * - Syllogism: "All A are B. All B are C. Therefore all A are C?" → YES
8
+ * - XOR violation: "X or Y (exclusive). Both. Violated?" → YES
9
+ *
10
+ * O(n) pattern matching - no backtracking, single-pass regex
11
+ */
12
+
13
+ import { SolverType } from "../classifier.ts";
14
+ import type { ComputeResult, Solver } from "../types.ts";
15
+
16
+ // =============================================================================
17
+ // PATTERNS
18
+ // =============================================================================
19
+
20
+ const PATTERNS = {
21
+ // Modus ponens: "If P, Q. P. Is Q?" (P asserted, asking about Q)
22
+ // Uses [^,] and [^.] as delimiters instead of (.+?) for cleaner capture
23
+ modusPonens:
24
+ /if\s+([^,]+),\s*(?:then\s+)?([^.]+)\.\s*(?:it['']?s\s+|it\s+is\s+)?([^.]+)\.\s*(?:is\s+(?:the\s+)?)?([^?]+)\?/i,
25
+
26
+ // Modus tollens: "If P, Q. Not Q (or Q is false/dry/etc). Is P?"
27
+ modusTollens:
28
+ /if\s+([^,]+),\s*(?:then\s+)?(?:the\s+)?([^.]+)\.\s*(?:the\s+)?([^.]+)\s+(?:is\s+)?(?:not\s+wet|dry|not|false|n['']t)\b[^.]*\.\s*(?:is\s+(?:it\s+)?)?([^?]+)\?/i,
29
+
30
+ // Syllogism: "All A are B. All B are C. [Therefore] all A are C. Valid?"
31
+ syllogism:
32
+ /all\s+(\w+)\s+are\s+(\w+)\.\s*all\s+(\w+)\s+are\s+(\w+)\.\s*(?:therefore\s+)?all\s+(\w+)\s+are\s+(\w+)\.\s*(?:is\s+(?:this\s+)?)?valid\??\s*(?:yes|no)?/i,
33
+
34
+ // XOR: "X or Y (exclusive). [You have] both. Violated?"
35
+ xor: /(.+?)\s+or\s+(.+?)["\s]*\(?\s*(?:exclusive|xor)\s*\)?\.?\s+(?:you\s+(?:have|chose|pick)\s+)?both\.\s*(?:violated|broken|is\s+(?:this|the)\s+rule)/i,
36
+ } as const;
37
+
38
+ // =============================================================================
39
+ // GUARDS (cheap detection before expensive regex)
40
+ // =============================================================================
41
+
42
+ function hasModusPonens(lower: string): boolean {
43
+ return (
44
+ lower.includes("if ") &&
45
+ lower.includes("yes or no") &&
46
+ !lower.includes(" dry") &&
47
+ !lower.includes(" not ")
48
+ );
49
+ }
50
+
51
+ function hasModusTollens(lower: string): boolean {
52
+ // Modus tollens: "If P→Q and ¬Q, then ¬P"
53
+ // The CONSEQUENT is negated (dry = not wet), asking about the ANTECEDENT
54
+ // Exclude "therefore" which signals denying antecedent pattern
55
+ return (
56
+ lower.includes("if ") &&
57
+ (lower.includes(" dry") || lower.includes(" not ") || lower.includes("n't")) &&
58
+ !lower.includes("therefore") // "therefore" indicates a conclusion claim, not a question
59
+ );
60
+ }
61
+
62
+ function hasSyllogism(lower: string): boolean {
63
+ return lower.includes("all ") && lower.includes(" are ") && lower.includes("valid");
64
+ }
65
+
66
+ function hasXor(lower: string): boolean {
67
+ return lower.includes(" or ") && lower.includes("exclusive") && lower.includes("both");
68
+ }
69
+
70
+ // =============================================================================
71
+ // HELPERS
72
+ // =============================================================================
73
+
74
+ /**
75
+ * Normalize a phrase for comparison:
76
+ * - Lowercase
77
+ * - Remove articles (the, a, an)
78
+ * - Remove "it's", "it is", "it"
79
+ * - Basic stemming (ing → "", ed → "", s → "" for verbs)
80
+ */
81
+ function normalize(s: string): string {
82
+ return s
83
+ .toLowerCase()
84
+ .replace(/\b(?:the|a|an|it['']?s|it\s+is|it)\b/gi, "")
85
+ .replace(/ing\b/g, "") // raining → rain
86
+ .replace(/ed\b/g, "") // rained → rain
87
+ .replace(/s\b/g, "") // rains → rain
88
+ .replace(/\s+/g, " ")
89
+ .trim();
90
+ }
91
+
92
+ /**
93
+ * Check if two phrases refer to the same concept
94
+ * Uses overlap of significant words
95
+ */
96
+ function matchesConcept(a: string, b: string): boolean {
97
+ const aNorm = normalize(a);
98
+ const bNorm = normalize(b);
99
+
100
+ // Exact match after normalization
101
+ if (aNorm === bNorm) return true;
102
+
103
+ // Word overlap (at least 50% of shorter phrase)
104
+ const aWords = new Set(aNorm.split(/\s+/).filter((w) => w.length > 2));
105
+ const bWords = new Set(bNorm.split(/\s+/).filter((w) => w.length > 2));
106
+
107
+ if (aWords.size === 0 || bWords.size === 0) return false;
108
+
109
+ let overlap = 0;
110
+ for (const w of aWords) {
111
+ if (bWords.has(w)) overlap++;
112
+ }
113
+
114
+ const minSize = Math.min(aWords.size, bWords.size);
115
+ return overlap >= minSize * 0.5;
116
+ }
117
+
118
+ // =============================================================================
119
+ // SOLVER
120
+ // =============================================================================
121
+
122
+ // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: logic solver requires exhaustive pattern matching for logical forms
123
+ export function tryLogic(text: string): ComputeResult {
124
+ const start = performance.now();
125
+ const lower = text.toLowerCase();
126
+
127
+ // MODUS PONENS: If P→Q and P, then Q is true
128
+ // "If it rains, the ground is wet. It's raining. Is the ground wet?" → YES
129
+ if (hasModusPonens(lower)) {
130
+ const match = text.match(PATTERNS.modusPonens);
131
+ if (match) {
132
+ const [, premise, consequent, assertion, question] = match;
133
+
134
+ // Check if assertion matches premise (P is true)
135
+ // Check if question matches consequent (asking about Q)
136
+ if (
137
+ premise &&
138
+ consequent &&
139
+ assertion &&
140
+ question &&
141
+ matchesConcept(assertion, premise) &&
142
+ matchesConcept(question, consequent)
143
+ ) {
144
+ return {
145
+ solved: true,
146
+ result: "YES",
147
+ method: "modus_ponens",
148
+ confidence: 1.0,
149
+ time_ms: performance.now() - start,
150
+ };
151
+ }
152
+ }
153
+ }
154
+
155
+ // MODUS TOLLENS: If P→Q and ¬Q, then ¬P
156
+ // "If it rains, the ground is wet. Ground is dry. Is it raining?" → NO
157
+ if (hasModusTollens(lower)) {
158
+ const match = text.match(PATTERNS.modusTollens);
159
+ if (match) {
160
+ // The consequent is negated (ground is dry = not wet)
161
+ // Therefore the premise is false (not raining)
162
+ return {
163
+ solved: true,
164
+ result: "NO",
165
+ method: "modus_tollens",
166
+ confidence: 1.0,
167
+ time_ms: performance.now() - start,
168
+ };
169
+ }
170
+ }
171
+
172
+ // SYLLOGISM: All A→B, All B→C ⊢ All A→C
173
+ // "All A are B. All B are C. Therefore all A are C. Valid?" → YES
174
+ if (hasSyllogism(lower)) {
175
+ const match = text.match(PATTERNS.syllogism);
176
+ if (match) {
177
+ const [, A, B1, B2, C1, A2, C2] = match;
178
+
179
+ // Valid syllogism if:
180
+ // - Middle term (B1, B2) connects the premises
181
+ // - Subject (A, A2) is preserved
182
+ // - Predicate (C1, C2) is preserved
183
+ if (A && B1 && B2 && C1 && A2 && C2) {
184
+ const valid =
185
+ B1.toLowerCase() === B2.toLowerCase() &&
186
+ A.toLowerCase() === A2.toLowerCase() &&
187
+ C1.toLowerCase() === C2.toLowerCase();
188
+
189
+ return {
190
+ solved: true,
191
+ result: valid ? "YES" : "NO",
192
+ method: "syllogism",
193
+ confidence: 1.0,
194
+ time_ms: performance.now() - start,
195
+ };
196
+ }
197
+ }
198
+ }
199
+
200
+ // XOR VIOLATION: X ⊕ Y means exactly one, not both
201
+ // "You can have cake or ice cream (exclusive). You have both. Violated?" → YES
202
+ if (hasXor(lower)) {
203
+ const match = text.match(PATTERNS.xor);
204
+ if (match) {
205
+ // Having both violates exclusive OR
206
+ return {
207
+ solved: true,
208
+ result: "YES",
209
+ method: "xor_violation",
210
+ confidence: 1.0,
211
+ time_ms: performance.now() - start,
212
+ };
213
+ }
214
+ }
215
+
216
+ // AFFIRMING THE CONSEQUENT (invalid): If P→Q and Q, cannot conclude P
217
+ // "If it rains, ground is wet. Ground is wet. Therefore it rained. Valid?" → NO
218
+ // "If it rains, ground is wet. Ground is wet. Can we conclude it rained?" → NO
219
+ if (lower.includes("if ") && (lower.includes("valid") || lower.includes("conclude"))) {
220
+ // Pattern 1: "If P, Q. Q. Therefore P. Valid?"
221
+ const affirmConseq1 =
222
+ /if\s+([^,]+),\s*(?:then\s+)?(?:the\s+)?([^.]+)\.\s*(?:the\s+)?([^.]+)\s+is\s+([^.]+)\.\s*therefore\s+(?:it\s+)?([^.]+)\.\s*valid/i;
223
+ const match1 = text.match(affirmConseq1);
224
+ if (match1) {
225
+ const [, premise, consequent, subject, _state, conclusion] = match1;
226
+ if (
227
+ premise &&
228
+ consequent &&
229
+ conclusion &&
230
+ matchesConcept(subject || "", consequent) &&
231
+ matchesConcept(conclusion, premise)
232
+ ) {
233
+ return {
234
+ solved: true,
235
+ result: "NO",
236
+ method: "affirming_consequent",
237
+ confidence: 1.0,
238
+ time_ms: performance.now() - start,
239
+ };
240
+ }
241
+ }
242
+
243
+ // Pattern 2: "If P, Q. Q. Can we conclude P?" (more natural phrasing)
244
+ const affirmConseq2 =
245
+ /if\s+([^,]+),\s*(?:then\s+)?(?:the\s+)?([^.]+)\.\s*(?:the\s+)?([^.]+?)\.\s*(?:can\s+we\s+)?conclude\s+(?:that\s+)?(?:it\s+)?([^?]+)\?/i;
246
+ const match2 = text.match(affirmConseq2);
247
+ if (match2) {
248
+ const [, premise, consequent, assertion, conclusion] = match2;
249
+ // Check if assertion matches consequent (affirming Q)
250
+ // And conclusion tries to derive premise (claiming P)
251
+ if (
252
+ premise &&
253
+ consequent &&
254
+ assertion &&
255
+ conclusion &&
256
+ matchesConcept(assertion, consequent) &&
257
+ matchesConcept(conclusion, premise)
258
+ ) {
259
+ return {
260
+ solved: true,
261
+ result: "NO",
262
+ method: "affirming_consequent",
263
+ confidence: 1.0,
264
+ time_ms: performance.now() - start,
265
+ };
266
+ }
267
+ }
268
+
269
+ // Pattern 3: "If P then Q. Q is true. Therefore P is true. Valid?"
270
+ const affirmConseq3 =
271
+ /if\s+(\w+)\s+then\s+(\w+)\.\s*(\w+)\s+is\s+true\.\s*therefore\s+(\w+)\s+is\s+true\.\s*valid/i;
272
+ const match3 = text.match(affirmConseq3);
273
+ if (match3) {
274
+ const [, P, Q, assertedQ, concludedP] = match3;
275
+ // Affirming consequent: Q is true, claiming P is true (invalid)
276
+ if (
277
+ Q &&
278
+ assertedQ &&
279
+ P &&
280
+ concludedP &&
281
+ Q.toLowerCase() === assertedQ.toLowerCase() &&
282
+ P.toLowerCase() === concludedP.toLowerCase()
283
+ ) {
284
+ return {
285
+ solved: true,
286
+ result: "NO",
287
+ method: "affirming_consequent",
288
+ confidence: 1.0,
289
+ time_ms: performance.now() - start,
290
+ };
291
+ }
292
+ }
293
+ }
294
+
295
+ // DENYING THE ANTECEDENT (invalid): If P→Q and ¬P, cannot conclude ¬Q
296
+ // "If it rains, ground is wet. It's not raining. Therefore ground is dry. Valid?" → NO
297
+ if (lower.includes("valid") && lower.includes("therefore") && lower.includes("not ")) {
298
+ const denyAntecedent =
299
+ /if\s+([^,]+),\s*(?:then\s+)?(?:the\s+)?([^.]+)\.\s*(?:it['']?s\s+)?not\s+([^.]+)\.\s*therefore\s+(?:the\s+)?([^.]+)\s+is\s+([^.]+)\.\s*valid/i;
300
+ const match = text.match(denyAntecedent);
301
+ if (match) {
302
+ const [, premise, consequent, negatedPremise, subject, _conclusion] = match;
303
+ // If negating premise and concluding about consequent, it's invalid
304
+ if (
305
+ premise &&
306
+ consequent &&
307
+ negatedPremise &&
308
+ matchesConcept(negatedPremise, premise) &&
309
+ matchesConcept(subject || "", consequent)
310
+ ) {
311
+ return {
312
+ solved: true,
313
+ result: "NO",
314
+ method: "denying_antecedent",
315
+ confidence: 1.0,
316
+ time_ms: performance.now() - start,
317
+ };
318
+ }
319
+ }
320
+ }
321
+
322
+ // DE MORGAN'S LAWS
323
+ // NOT(A AND B) = (NOT A) OR (NOT B)
324
+ // NOT(A OR B) = (NOT A) AND (NOT B)
325
+ if (
326
+ lower.includes("not") &&
327
+ (lower.includes("equivalent") || lower.includes("fill") || lower.includes("="))
328
+ ) {
329
+ // NOT(A AND B) = (NOT A) ___ (NOT B) → OR
330
+ // Also matches "is equivalent to"
331
+ const deMorganAnd = /not\s*\(\s*a\s+and\s+b\s*\).*?\(not\s+a\)\s*(?:_+|and|or)\s*\(not\s+b\)/i;
332
+ if (deMorganAnd.test(text) && lower.includes("and b")) {
333
+ // Check if it's NOT(A AND B) pattern
334
+ if (/not\s*\(\s*a\s+and\s+b\s*\)/i.test(text)) {
335
+ return {
336
+ solved: true,
337
+ result: "OR",
338
+ method: "de_morgan_and",
339
+ confidence: 1.0,
340
+ time_ms: performance.now() - start,
341
+ };
342
+ }
343
+ }
344
+
345
+ // NOT(A OR B) = (NOT A) ___ (NOT B) → AND
346
+ const deMorganOr = /not\s*\(\s*a\s+or\s+b\s*\).*?\(not\s+a\)\s*(?:_+|and|or)\s*\(not\s+b\)/i;
347
+ if (deMorganOr.test(text) && lower.includes("or b")) {
348
+ // Check if it's NOT(A OR B) pattern
349
+ if (/not\s*\(\s*a\s+or\s+b\s*\)/i.test(text)) {
350
+ return {
351
+ solved: true,
352
+ result: "AND",
353
+ method: "de_morgan_or",
354
+ confidence: 1.0,
355
+ time_ms: performance.now() - start,
356
+ };
357
+ }
358
+ }
359
+ }
360
+
361
+ // INVALID SYLLOGISM: "Some A are B. Some B are C. Therefore some A are C." → NO
362
+ if (lower.includes("some") && lower.includes("valid")) {
363
+ const invalidSyllogism =
364
+ /some\s+(\w+)\s+are\s+(\w+)\.\s*some\s+(\w+)\s+are\s+(\w+)\.\s*(?:therefore\s+)?some\s+(\w+)\s+are\s+(\w+)\.\s*valid/i;
365
+ const match = text.match(invalidSyllogism);
366
+ if (match) {
367
+ // "Some A are B. Some B are C." does NOT imply "Some A are C"
368
+ // This is an undistributed middle term fallacy
369
+ return {
370
+ solved: true,
371
+ result: "NO",
372
+ method: "invalid_syllogism_some",
373
+ confidence: 1.0,
374
+ time_ms: performance.now() - start,
375
+ };
376
+ }
377
+ }
378
+
379
+ // CONTRAPOSITIVE: "All A are B" is equivalent to "All non-B are non-A" → YES
380
+ // "All dogs are mammals" = "All non-mammals are non-dogs"
381
+ if (lower.includes("equivalent") && lower.includes("all ") && lower.includes("non-")) {
382
+ // Pattern: "All X are Y" is equivalent to "All non-Y are non-X"
383
+ const contrapositiveMatch = text.match(
384
+ /["']?all\s+(\w+)\s+are\s+(\w+)["']?\s+is\s+equivalent\s+to\s+["']?all\s+non-(\w+)\s+are\s+non-(\w+)["']?/i,
385
+ );
386
+ if (contrapositiveMatch) {
387
+ const [, A, B, notB, notA] = contrapositiveMatch;
388
+ // Valid contrapositive: All A→B ≡ All ¬B→¬A
389
+ // Check if the terms match correctly (B matches notB, A matches notA)
390
+ if (
391
+ A &&
392
+ B &&
393
+ notB &&
394
+ notA &&
395
+ B.toLowerCase() === notB.toLowerCase() &&
396
+ A.toLowerCase() === notA.toLowerCase()
397
+ ) {
398
+ return {
399
+ solved: true,
400
+ result: "YES",
401
+ method: "contrapositive",
402
+ confidence: 1.0,
403
+ time_ms: performance.now() - start,
404
+ };
405
+ }
406
+ }
407
+ }
408
+
409
+ return { solved: false, confidence: 0 };
410
+ }
411
+
412
+ // =============================================================================
413
+ // SOLVER REGISTRATION
414
+ // =============================================================================
415
+
416
+ export const solver: Solver = {
417
+ name: "logic",
418
+ description: "Propositional logic: modus ponens, modus tollens, syllogism, XOR violation",
419
+ types: SolverType.LOGIC,
420
+ priority: 15, // After facts, before formula
421
+ solve: (text, _lower) => tryLogic(text),
422
+ };