verifiable-thinking-mcp 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +339 -0
  3. package/package.json +75 -0
  4. package/src/index.ts +38 -0
  5. package/src/lib/cache.ts +246 -0
  6. package/src/lib/compression.ts +804 -0
  7. package/src/lib/compute/cache.ts +86 -0
  8. package/src/lib/compute/classifier.ts +555 -0
  9. package/src/lib/compute/confidence.ts +79 -0
  10. package/src/lib/compute/context.ts +154 -0
  11. package/src/lib/compute/extract.ts +200 -0
  12. package/src/lib/compute/filter.ts +224 -0
  13. package/src/lib/compute/index.ts +171 -0
  14. package/src/lib/compute/math.ts +247 -0
  15. package/src/lib/compute/patterns.ts +564 -0
  16. package/src/lib/compute/registry.ts +145 -0
  17. package/src/lib/compute/solvers/arithmetic.ts +65 -0
  18. package/src/lib/compute/solvers/calculus.ts +249 -0
  19. package/src/lib/compute/solvers/derivation-core.ts +371 -0
  20. package/src/lib/compute/solvers/derivation-latex.ts +160 -0
  21. package/src/lib/compute/solvers/derivation-mistakes.ts +1046 -0
  22. package/src/lib/compute/solvers/derivation-simplify.ts +451 -0
  23. package/src/lib/compute/solvers/derivation-transform.ts +620 -0
  24. package/src/lib/compute/solvers/derivation.ts +67 -0
  25. package/src/lib/compute/solvers/facts.ts +120 -0
  26. package/src/lib/compute/solvers/formula.ts +728 -0
  27. package/src/lib/compute/solvers/index.ts +36 -0
  28. package/src/lib/compute/solvers/logic.ts +422 -0
  29. package/src/lib/compute/solvers/probability.ts +307 -0
  30. package/src/lib/compute/solvers/statistics.ts +262 -0
  31. package/src/lib/compute/solvers/word-problems.ts +408 -0
  32. package/src/lib/compute/types.ts +107 -0
  33. package/src/lib/concepts.ts +111 -0
  34. package/src/lib/domain.ts +731 -0
  35. package/src/lib/extraction.ts +912 -0
  36. package/src/lib/index.ts +122 -0
  37. package/src/lib/judge.ts +260 -0
  38. package/src/lib/math/ast.ts +842 -0
  39. package/src/lib/math/index.ts +8 -0
  40. package/src/lib/math/operators.ts +171 -0
  41. package/src/lib/math/tokenizer.ts +477 -0
  42. package/src/lib/patterns.ts +200 -0
  43. package/src/lib/session.ts +825 -0
  44. package/src/lib/think/challenge.ts +323 -0
  45. package/src/lib/think/complexity.ts +504 -0
  46. package/src/lib/think/confidence-drift.ts +507 -0
  47. package/src/lib/think/consistency.ts +347 -0
  48. package/src/lib/think/guidance.ts +188 -0
  49. package/src/lib/think/helpers.ts +568 -0
  50. package/src/lib/think/hypothesis.ts +216 -0
  51. package/src/lib/think/index.ts +127 -0
  52. package/src/lib/think/prompts.ts +262 -0
  53. package/src/lib/think/route.ts +358 -0
  54. package/src/lib/think/schema.ts +98 -0
  55. package/src/lib/think/scratchpad-schema.ts +662 -0
  56. package/src/lib/think/spot-check.ts +961 -0
  57. package/src/lib/think/types.ts +93 -0
  58. package/src/lib/think/verification.ts +260 -0
  59. package/src/lib/tokens.ts +177 -0
  60. package/src/lib/verification.ts +620 -0
  61. package/src/prompts/index.ts +10 -0
  62. package/src/prompts/templates.ts +336 -0
  63. package/src/resources/index.ts +8 -0
  64. package/src/resources/sessions.ts +196 -0
  65. package/src/tools/compress.ts +138 -0
  66. package/src/tools/index.ts +5 -0
  67. package/src/tools/scratchpad.ts +2659 -0
  68. package/src/tools/sessions.ts +144 -0
@@ -0,0 +1,323 @@
1
+ /**
2
+ * Challenge Operation - Adversarial Self-Check for Reasoning Quality
3
+ *
4
+ * Generates counterarguments to combat confirmation bias by:
5
+ * - Inverting key assumptions
6
+ * - Finding edge cases
7
+ * - Verifying premises were established
8
+ * - Generating steelman counterarguments
9
+ *
10
+ * O(n) complexity using regex-based claim extraction.
11
+ */
12
+
13
+ /** Types of challenges that can be generated */
14
+ export type ChallengeType =
15
+ | "assumption_inversion"
16
+ | "edge_case"
17
+ | "premise_check"
18
+ | "steelman_counter";
19
+
20
+ /** A generated challenge to a claim */
21
+ export interface Challenge {
22
+ /** Type of challenge */
23
+ type: ChallengeType;
24
+ /** The original claim being challenged */
25
+ original_claim: string;
26
+ /** The challenge/counterargument */
27
+ challenge: string;
28
+ /** How serious is this challenge */
29
+ severity: "low" | "medium" | "high";
30
+ /** Suggested way to address this challenge */
31
+ suggested_response: string;
32
+ }
33
+
34
+ /** Result of running challenge operation */
35
+ export interface ChallengeResult {
36
+ /** Number of challenges generated */
37
+ challenges_generated: number;
38
+ /** The challenges */
39
+ challenges: Challenge[];
40
+ /** Overall robustness score (0-1) */
41
+ overall_robustness: number;
42
+ /** Summary of findings */
43
+ summary: string;
44
+ }
45
+
46
+ // Patterns to extract claims/conclusions from text
47
+ const CLAIM_PATTERNS = [
48
+ /(?:therefore|thus|hence|consequently|so)\s+(.{10,100}?)(?:\.|$)/gi,
49
+ /(?:we conclude|this means|this shows|this proves)\s+(?:that\s+)?(.{10,100}?)(?:\.|$)/gi,
50
+ /(?:it follows that|it must be that)\s+(.{10,100}?)(?:\.|$)/gi,
51
+ /(.{5,50})\s+(?:is|are)\s+(?:true|false|correct|incorrect|valid|invalid)(?:\.|$)/gi,
52
+ /(?:the answer is|the result is|the solution is)\s+(.{5,100}?)(?:\.|$)/gi,
53
+ ];
54
+
55
+ // Patterns for conditional statements (if P then Q)
56
+ const CONDITIONAL_PATTERN = /if\s+(.{5,80}?)(?:,\s*)?then\s+(.{5,80}?)(?:\.|,|$)/gi;
57
+
58
+ // Assumption words to invert
59
+ const ASSUMPTION_INVERSIONS: Record<string, string> = {
60
+ always: "sometimes not",
61
+ never: "sometimes",
62
+ all: "some",
63
+ none: "some",
64
+ every: "some",
65
+ must: "might not",
66
+ cannot: "might",
67
+ impossible: "possible",
68
+ certain: "uncertain",
69
+ definitely: "possibly not",
70
+ obviously: "not necessarily",
71
+ clearly: "arguably",
72
+ };
73
+
74
+ // Numeric patterns for edge case detection
75
+ const NUMERIC_PATTERN = /\b(\d+(?:\.\d+)?)\b/g;
76
+
77
+ /**
78
+ * Extract claims from text using pattern matching
79
+ */
80
+ function extractClaims(text: string): string[] {
81
+ const claims: string[] = [];
82
+ const seen = new Set<string>();
83
+
84
+ for (const pattern of CLAIM_PATTERNS) {
85
+ pattern.lastIndex = 0;
86
+ let match: RegExpExecArray | null;
87
+ while ((match = pattern.exec(text)) !== null) {
88
+ const claim = match[1]?.trim();
89
+ if (claim && claim.length > 10 && !seen.has(claim.toLowerCase())) {
90
+ claims.push(claim);
91
+ seen.add(claim.toLowerCase());
92
+ }
93
+ }
94
+ }
95
+
96
+ return claims;
97
+ }
98
+
99
+ /**
100
+ * Extract conditional statements (if P then Q)
101
+ */
102
+ function extractConditionals(text: string): Array<{ premise: string; conclusion: string }> {
103
+ const conditionals: Array<{ premise: string; conclusion: string }> = [];
104
+ CONDITIONAL_PATTERN.lastIndex = 0;
105
+
106
+ let match: RegExpExecArray | null;
107
+ while ((match = CONDITIONAL_PATTERN.exec(text)) !== null) {
108
+ const premise = match[1]?.trim();
109
+ const conclusion = match[2]?.trim();
110
+ if (premise && conclusion) {
111
+ conditionals.push({ premise, conclusion });
112
+ }
113
+ }
114
+
115
+ return conditionals;
116
+ }
117
+
118
+ /**
119
+ * Generate assumption inversion challenges
120
+ */
121
+ function generateAssumptionInversions(claim: string): Challenge[] {
122
+ const challenges: Challenge[] = [];
123
+ const lowerClaim = claim.toLowerCase();
124
+
125
+ for (const [word, inversion] of Object.entries(ASSUMPTION_INVERSIONS)) {
126
+ if (lowerClaim.includes(word)) {
127
+ challenges.push({
128
+ type: "assumption_inversion",
129
+ original_claim: claim,
130
+ challenge: `What if "${word}" should be "${inversion}"? The claim assumes absolute certainty.`,
131
+ severity: "medium",
132
+ suggested_response: `Verify the "${word}" claim with evidence or soften to "${inversion}".`,
133
+ });
134
+ }
135
+ }
136
+
137
+ return challenges;
138
+ }
139
+
140
+ /**
141
+ * Generate edge case challenges from numeric values
142
+ */
143
+ function generateEdgeCases(claim: string): Challenge[] {
144
+ const challenges: Challenge[] = [];
145
+
146
+ // Find numeric values
147
+ NUMERIC_PATTERN.lastIndex = 0;
148
+ let match: RegExpExecArray | null;
149
+ while ((match = NUMERIC_PATTERN.exec(claim)) !== null) {
150
+ const matchValue = match[1];
151
+ if (!matchValue) continue;
152
+ const num = parseFloat(matchValue);
153
+ if (!Number.isNaN(num)) {
154
+ const edgeCases = [0, -1, 1, num - 1, num + 1];
155
+ if (num > 0) edgeCases.push(-num);
156
+
157
+ challenges.push({
158
+ type: "edge_case",
159
+ original_claim: claim,
160
+ challenge: `Does the claim hold for edge cases: ${edgeCases.slice(0, 3).join(", ")}?`,
161
+ severity: "low",
162
+ suggested_response: `Test the claim with boundary values: ${edgeCases.join(", ")}.`,
163
+ });
164
+ break; // One edge case challenge per claim
165
+ }
166
+ }
167
+
168
+ return challenges;
169
+ }
170
+
171
+ /**
172
+ * Generate premise check challenges from conditionals
173
+ */
174
+ function generatePremiseChecks(
175
+ conditionals: Array<{ premise: string; conclusion: string }>,
176
+ allText: string,
177
+ ): Challenge[] {
178
+ const challenges: Challenge[] = [];
179
+ const lowerText = allText.toLowerCase();
180
+
181
+ for (const { premise, conclusion } of conditionals) {
182
+ // Check if premise was established (mentioned affirmatively)
183
+ const premiseWords = premise
184
+ .toLowerCase()
185
+ .split(/\s+/)
186
+ .filter((w) => w.length > 3);
187
+ const premiseInText = premiseWords.filter((w) => lowerText.includes(w)).length;
188
+ const coverage = premiseWords.length > 0 ? premiseInText / premiseWords.length : 0;
189
+
190
+ if (coverage < 0.5) {
191
+ challenges.push({
192
+ type: "premise_check",
193
+ original_claim: `If ${premise} then ${conclusion}`,
194
+ challenge: `The premise "${premise}" was not clearly established before concluding "${conclusion}".`,
195
+ severity: "high",
196
+ suggested_response: `Add a step that explicitly establishes: "${premise}".`,
197
+ });
198
+ }
199
+ }
200
+
201
+ return challenges;
202
+ }
203
+
204
+ /**
205
+ * Generate steelman counterargument
206
+ */
207
+ function generateSteelmanCounter(claim: string): Challenge {
208
+ return {
209
+ type: "steelman_counter",
210
+ original_claim: claim,
211
+ challenge: `Steel-man opposing view: What's the strongest argument AGAINST "${claim.slice(0, 50)}${claim.length > 50 ? "..." : ""}"?`,
212
+ severity: "medium",
213
+ suggested_response: "Address the strongest possible counterargument before finalizing.",
214
+ };
215
+ }
216
+
217
+ /**
218
+ * Calculate overall robustness score
219
+ */
220
+ function calculateRobustness(challenges: Challenge[]): number {
221
+ if (challenges.length === 0) return 1.0;
222
+
223
+ const severityWeights = { low: 0.1, medium: 0.25, high: 0.5 };
224
+ const totalPenalty = challenges.reduce((sum, c) => sum + severityWeights[c.severity], 0);
225
+
226
+ // Robustness decreases with more/severe challenges
227
+ return Math.max(0, 1 - Math.min(totalPenalty, 1));
228
+ }
229
+
230
+ /**
231
+ * Run adversarial challenge on reasoning steps
232
+ *
233
+ * @param steps - Array of reasoning step texts
234
+ * @param targetClaim - Optional specific claim to challenge
235
+ * @returns Challenge result with generated counterarguments
236
+ */
237
+ export function challenge(
238
+ steps: Array<{ step: number; thought: string }>,
239
+ targetClaim?: string,
240
+ ): ChallengeResult {
241
+ if (steps.length === 0) {
242
+ return {
243
+ challenges_generated: 0,
244
+ challenges: [],
245
+ overall_robustness: 1.0,
246
+ summary: "No steps to challenge.",
247
+ };
248
+ }
249
+
250
+ // Combine all text for analysis
251
+ const allText = steps.map((s) => s.thought).join(" ");
252
+
253
+ // Extract claims and conditionals
254
+ const claims = targetClaim ? [targetClaim] : extractClaims(allText);
255
+ const conditionals = extractConditionals(allText);
256
+
257
+ const challenges: Challenge[] = [];
258
+
259
+ // Generate challenges for each claim
260
+ for (const claim of claims.slice(0, 5)) {
261
+ // Limit to 5 claims
262
+ challenges.push(...generateAssumptionInversions(claim));
263
+ challenges.push(...generateEdgeCases(claim));
264
+
265
+ // Add one steelman counter for the most recent claim
266
+ if (claim === claims[claims.length - 1]) {
267
+ challenges.push(generateSteelmanCounter(claim));
268
+ }
269
+ }
270
+
271
+ // Generate premise checks from conditionals
272
+ challenges.push(...generatePremiseChecks(conditionals, allText));
273
+
274
+ // Dedupe by challenge text
275
+ const seen = new Set<string>();
276
+ const uniqueChallenges = challenges.filter((c) => {
277
+ const key = c.challenge.toLowerCase();
278
+ if (seen.has(key)) return false;
279
+ seen.add(key);
280
+ return true;
281
+ });
282
+
283
+ // Calculate robustness
284
+ const robustness = calculateRobustness(uniqueChallenges);
285
+
286
+ // Generate summary
287
+ const highCount = uniqueChallenges.filter((c) => c.severity === "high").length;
288
+ const summary =
289
+ uniqueChallenges.length === 0
290
+ ? "No significant challenges found. Reasoning appears robust."
291
+ : highCount > 0
292
+ ? `⚠️ Found ${highCount} high-severity challenge(s). Address before finalizing.`
293
+ : `Found ${uniqueChallenges.length} challenge(s). Robustness: ${(robustness * 100).toFixed(0)}%`;
294
+
295
+ return {
296
+ challenges_generated: uniqueChallenges.length,
297
+ challenges: uniqueChallenges,
298
+ overall_robustness: robustness,
299
+ summary,
300
+ };
301
+ }
302
+
303
+ /**
304
+ * Quick check if reasoning should be challenged (for auto-trigger)
305
+ * Returns true if overconfidence detected or claims lack support
306
+ */
307
+ export function shouldChallenge(
308
+ chainConfidence: number,
309
+ stepCount: number,
310
+ hasVerification: boolean,
311
+ ): boolean {
312
+ // Trigger on overconfidence: high confidence with few steps and no verification
313
+ if (chainConfidence > 0.9 && stepCount < 3 && !hasVerification) {
314
+ return true;
315
+ }
316
+
317
+ // Trigger on very high confidence regardless
318
+ if (chainConfidence > 0.95) {
319
+ return true;
320
+ }
321
+
322
+ return false;
323
+ }