verifiable-thinking-mcp 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +339 -0
- package/package.json +75 -0
- package/src/index.ts +38 -0
- package/src/lib/cache.ts +246 -0
- package/src/lib/compression.ts +804 -0
- package/src/lib/compute/cache.ts +86 -0
- package/src/lib/compute/classifier.ts +555 -0
- package/src/lib/compute/confidence.ts +79 -0
- package/src/lib/compute/context.ts +154 -0
- package/src/lib/compute/extract.ts +200 -0
- package/src/lib/compute/filter.ts +224 -0
- package/src/lib/compute/index.ts +171 -0
- package/src/lib/compute/math.ts +247 -0
- package/src/lib/compute/patterns.ts +564 -0
- package/src/lib/compute/registry.ts +145 -0
- package/src/lib/compute/solvers/arithmetic.ts +65 -0
- package/src/lib/compute/solvers/calculus.ts +249 -0
- package/src/lib/compute/solvers/derivation-core.ts +371 -0
- package/src/lib/compute/solvers/derivation-latex.ts +160 -0
- package/src/lib/compute/solvers/derivation-mistakes.ts +1046 -0
- package/src/lib/compute/solvers/derivation-simplify.ts +451 -0
- package/src/lib/compute/solvers/derivation-transform.ts +620 -0
- package/src/lib/compute/solvers/derivation.ts +67 -0
- package/src/lib/compute/solvers/facts.ts +120 -0
- package/src/lib/compute/solvers/formula.ts +728 -0
- package/src/lib/compute/solvers/index.ts +36 -0
- package/src/lib/compute/solvers/logic.ts +422 -0
- package/src/lib/compute/solvers/probability.ts +307 -0
- package/src/lib/compute/solvers/statistics.ts +262 -0
- package/src/lib/compute/solvers/word-problems.ts +408 -0
- package/src/lib/compute/types.ts +107 -0
- package/src/lib/concepts.ts +111 -0
- package/src/lib/domain.ts +731 -0
- package/src/lib/extraction.ts +912 -0
- package/src/lib/index.ts +122 -0
- package/src/lib/judge.ts +260 -0
- package/src/lib/math/ast.ts +842 -0
- package/src/lib/math/index.ts +8 -0
- package/src/lib/math/operators.ts +171 -0
- package/src/lib/math/tokenizer.ts +477 -0
- package/src/lib/patterns.ts +200 -0
- package/src/lib/session.ts +825 -0
- package/src/lib/think/challenge.ts +323 -0
- package/src/lib/think/complexity.ts +504 -0
- package/src/lib/think/confidence-drift.ts +507 -0
- package/src/lib/think/consistency.ts +347 -0
- package/src/lib/think/guidance.ts +188 -0
- package/src/lib/think/helpers.ts +568 -0
- package/src/lib/think/hypothesis.ts +216 -0
- package/src/lib/think/index.ts +127 -0
- package/src/lib/think/prompts.ts +262 -0
- package/src/lib/think/route.ts +358 -0
- package/src/lib/think/schema.ts +98 -0
- package/src/lib/think/scratchpad-schema.ts +662 -0
- package/src/lib/think/spot-check.ts +961 -0
- package/src/lib/think/types.ts +93 -0
- package/src/lib/think/verification.ts +260 -0
- package/src/lib/tokens.ts +177 -0
- package/src/lib/verification.ts +620 -0
- package/src/prompts/index.ts +10 -0
- package/src/prompts/templates.ts +336 -0
- package/src/resources/index.ts +8 -0
- package/src/resources/sessions.ts +196 -0
- package/src/tools/compress.ts +138 -0
- package/src/tools/index.ts +5 -0
- package/src/tools/scratchpad.ts +2659 -0
- package/src/tools/sessions.ts +144 -0
|
@@ -0,0 +1,620 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verification Engine - Domain-specific verifiers for reasoning steps
|
|
3
|
+
* Heuristic-based (no LLM calls) for <10ms overhead
|
|
4
|
+
* Includes content-hash caching for repeated verifications
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { verificationCache } from "./cache.ts";
|
|
8
|
+
|
|
9
|
+
// Re-export math module for backwards compatibility
|
|
10
|
+
export {
|
|
11
|
+
type ASTNode,
|
|
12
|
+
// AST
|
|
13
|
+
type ASTNodeType,
|
|
14
|
+
type ASTResult,
|
|
15
|
+
type BinaryNode,
|
|
16
|
+
buildAST,
|
|
17
|
+
canBeUnary,
|
|
18
|
+
compareExpressions,
|
|
19
|
+
compareOperatorPrecedence,
|
|
20
|
+
type EvalResult,
|
|
21
|
+
// Tokenizer
|
|
22
|
+
type ExpressionValidation,
|
|
23
|
+
evaluateExpression,
|
|
24
|
+
type FormatASTOptions,
|
|
25
|
+
type FormatOptions,
|
|
26
|
+
formatAST,
|
|
27
|
+
formatExpression,
|
|
28
|
+
getOperatorArity,
|
|
29
|
+
getOperatorArityInContext,
|
|
30
|
+
getOperatorPrecedence,
|
|
31
|
+
// Operator utilities
|
|
32
|
+
isMathOperator,
|
|
33
|
+
isRightAssociative,
|
|
34
|
+
MATH_OPERATOR_PATTERN,
|
|
35
|
+
// Constants
|
|
36
|
+
MATH_OPERATORS,
|
|
37
|
+
type MathToken,
|
|
38
|
+
type MathTokenType,
|
|
39
|
+
type NumberNode,
|
|
40
|
+
simplifyAST,
|
|
41
|
+
type TokenizeResult,
|
|
42
|
+
tokenizeMathExpression,
|
|
43
|
+
type UnaryNode,
|
|
44
|
+
type VariableNode,
|
|
45
|
+
validateExpression,
|
|
46
|
+
} from "./math/index.ts";
|
|
47
|
+
|
|
48
|
+
// Import for internal use
|
|
49
|
+
import {
|
|
50
|
+
evaluateExpression,
|
|
51
|
+
isMathOperator,
|
|
52
|
+
MATH_OPERATOR_PATTERN,
|
|
53
|
+
validateExpression,
|
|
54
|
+
} from "./math/index.ts";
|
|
55
|
+
|
|
56
|
+
export type VerificationDomain = "math" | "logic" | "code" | "general";
|
|
57
|
+
|
|
58
|
+
export interface VerificationResult {
|
|
59
|
+
passed: boolean;
|
|
60
|
+
confidence: number; // 0-1
|
|
61
|
+
domain: VerificationDomain;
|
|
62
|
+
evidence: string;
|
|
63
|
+
reward: 0 | 1; // RLVR-style binary reward
|
|
64
|
+
suggestions: string[];
|
|
65
|
+
cached?: boolean; // Whether result was from cache
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
type Verifier = (
|
|
69
|
+
thought: string,
|
|
70
|
+
context: string[],
|
|
71
|
+
) => Omit<VerificationResult, "domain" | "reward">;
|
|
72
|
+
|
|
73
|
+
const verifiers: Record<VerificationDomain, Verifier> = {
|
|
74
|
+
math: verifyMath,
|
|
75
|
+
logic: verifyLogic,
|
|
76
|
+
code: verifyCode,
|
|
77
|
+
general: verifyGeneral,
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
export function verify(
|
|
81
|
+
thought: string,
|
|
82
|
+
domain: VerificationDomain,
|
|
83
|
+
context: string[] = [],
|
|
84
|
+
useCache: boolean = true,
|
|
85
|
+
): VerificationResult {
|
|
86
|
+
// Check cache first
|
|
87
|
+
if (useCache) {
|
|
88
|
+
const cached = verificationCache.get(thought, domain, context);
|
|
89
|
+
if (cached) {
|
|
90
|
+
return { ...cached, cached: true };
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const verifier = verifiers[domain] || verifiers.general;
|
|
95
|
+
const result = verifier(thought, context);
|
|
96
|
+
|
|
97
|
+
const fullResult: VerificationResult = {
|
|
98
|
+
...result,
|
|
99
|
+
domain,
|
|
100
|
+
reward: result.passed ? 1 : 0,
|
|
101
|
+
cached: false,
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
// Store in cache
|
|
105
|
+
if (useCache) {
|
|
106
|
+
verificationCache.set(thought, domain, context, fullResult);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return fullResult;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/** Get cache statistics */
|
|
113
|
+
export function getVerificationCacheStats() {
|
|
114
|
+
return verificationCache.getStats();
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/** Clear verification cache */
|
|
118
|
+
export function clearVerificationCache(): number {
|
|
119
|
+
return verificationCache.clear();
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// ============================================================================
|
|
123
|
+
// DOMAIN VERIFIERS
|
|
124
|
+
// ============================================================================
|
|
125
|
+
|
|
126
|
+
function verifyMath(
|
|
127
|
+
thought: string,
|
|
128
|
+
_context: string[],
|
|
129
|
+
): Omit<VerificationResult, "domain" | "reward"> {
|
|
130
|
+
const lower = thought.toLowerCase();
|
|
131
|
+
|
|
132
|
+
// Check for mathematical content
|
|
133
|
+
const hasMath =
|
|
134
|
+
/[\d.+\-*/()=]/.test(thought) ||
|
|
135
|
+
/solve|calculate|equation|derivative|integral|sum|product/i.test(thought);
|
|
136
|
+
|
|
137
|
+
// Check for balanced parentheses/brackets
|
|
138
|
+
const balanced = checkBalanced(thought);
|
|
139
|
+
|
|
140
|
+
// Check for contradictions
|
|
141
|
+
const hasContradiction = /but also|both true and false|contradiction/i.test(lower);
|
|
142
|
+
|
|
143
|
+
// Check for valid algebraic patterns
|
|
144
|
+
// Allow valid chained equalities: a = b = c (common in derivations)
|
|
145
|
+
// Flag patterns that suggest errors:
|
|
146
|
+
// - Isolated "= =" (double equals without content)
|
|
147
|
+
// - "= = =" (triple equals in a row)
|
|
148
|
+
// - Contradictory assignments like "x = 5 = 3" (value = value where values differ)
|
|
149
|
+
const hasInvalidEquals = /=\s*=/.test(thought) || hasContradictoryAssignment(thought);
|
|
150
|
+
|
|
151
|
+
// Extract and validate math expressions for structural errors
|
|
152
|
+
const structuralErrors = extractAndValidateExpressions(thought);
|
|
153
|
+
|
|
154
|
+
// Verify numeric equations (e.g., "2 + 2 = 5" should fail)
|
|
155
|
+
const computationError = verifyNumericEquations(thought);
|
|
156
|
+
|
|
157
|
+
const passed =
|
|
158
|
+
hasMath &&
|
|
159
|
+
balanced &&
|
|
160
|
+
!hasContradiction &&
|
|
161
|
+
!hasInvalidEquals &&
|
|
162
|
+
!structuralErrors &&
|
|
163
|
+
!computationError;
|
|
164
|
+
const confidence = calculateConfidence([
|
|
165
|
+
hasMath,
|
|
166
|
+
balanced,
|
|
167
|
+
!hasContradiction,
|
|
168
|
+
!hasInvalidEquals,
|
|
169
|
+
!structuralErrors,
|
|
170
|
+
!computationError,
|
|
171
|
+
]);
|
|
172
|
+
|
|
173
|
+
const suggestions: string[] = [];
|
|
174
|
+
if (!hasMath) suggestions.push("Include mathematical expressions or operations");
|
|
175
|
+
if (!balanced) suggestions.push("Check parentheses/brackets are balanced");
|
|
176
|
+
if (hasContradiction) suggestions.push("Resolve the logical contradiction");
|
|
177
|
+
if (hasInvalidEquals) suggestions.push("Check equation structure for errors");
|
|
178
|
+
if (structuralErrors) suggestions.push(structuralErrors);
|
|
179
|
+
if (computationError) suggestions.push(computationError);
|
|
180
|
+
if (passed) suggestions.push("Continue with next step");
|
|
181
|
+
|
|
182
|
+
return {
|
|
183
|
+
passed,
|
|
184
|
+
confidence,
|
|
185
|
+
evidence: passed ? "Valid mathematical reasoning" : suggestions[0] || "Verification failed",
|
|
186
|
+
suggestions,
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Extract potential math expressions from text and validate their structure
|
|
192
|
+
* Returns error message if any expression is malformed, null otherwise
|
|
193
|
+
*/
|
|
194
|
+
function extractAndValidateExpressions(text: string): string | null {
|
|
195
|
+
// Find expression-like sequences by scanning for operators
|
|
196
|
+
// and expanding to capture the full expression with balanced parens
|
|
197
|
+
const operators = /[+\-*/×÷−·^√²³]/;
|
|
198
|
+
let i = 0;
|
|
199
|
+
|
|
200
|
+
while (i < text.length) {
|
|
201
|
+
const char = text[i] as string;
|
|
202
|
+
|
|
203
|
+
// Found an operator - try to extract the surrounding expression
|
|
204
|
+
if (operators.test(char)) {
|
|
205
|
+
const expr = extractBalancedExpression(text, i);
|
|
206
|
+
if (expr && expr.length >= 3) {
|
|
207
|
+
const validation = validateExpression(expr);
|
|
208
|
+
if (!validation.valid) {
|
|
209
|
+
return `Expression error: ${validation.error}`;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
// Skip past this expression to avoid re-checking
|
|
213
|
+
i += expr ? expr.length : 1;
|
|
214
|
+
} else {
|
|
215
|
+
i++;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
return null;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Extract a balanced expression around an operator position
|
|
224
|
+
*/
|
|
225
|
+
function extractBalancedExpression(text: string, operatorIdx: number): string | null {
|
|
226
|
+
// Go backwards to find start
|
|
227
|
+
let start = operatorIdx;
|
|
228
|
+
let parenDepth = 0;
|
|
229
|
+
|
|
230
|
+
while (start > 0) {
|
|
231
|
+
const char = text[start - 1] as string;
|
|
232
|
+
if (char === ")") {
|
|
233
|
+
parenDepth++;
|
|
234
|
+
start--;
|
|
235
|
+
} else if (char === "(") {
|
|
236
|
+
if (parenDepth === 0) break; // Unmatched open paren - stop before it
|
|
237
|
+
parenDepth--;
|
|
238
|
+
start--;
|
|
239
|
+
} else if (/[\d.a-zA-Z_\s]/.test(char) || isMathOperator(char)) {
|
|
240
|
+
start--;
|
|
241
|
+
} else if (parenDepth === 0) {
|
|
242
|
+
break; // Non-expression character, stop
|
|
243
|
+
} else {
|
|
244
|
+
start--;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Go forward to find end
|
|
249
|
+
let end = operatorIdx + 1;
|
|
250
|
+
parenDepth = 0;
|
|
251
|
+
|
|
252
|
+
while (end < text.length) {
|
|
253
|
+
const char = text[end] as string;
|
|
254
|
+
if (char === "(") {
|
|
255
|
+
parenDepth++;
|
|
256
|
+
end++;
|
|
257
|
+
} else if (char === ")") {
|
|
258
|
+
if (parenDepth === 0) break; // Unmatched close paren - stop before it
|
|
259
|
+
parenDepth--;
|
|
260
|
+
end++;
|
|
261
|
+
} else if (/[\d.a-zA-Z_\s]/.test(char) || isMathOperator(char)) {
|
|
262
|
+
end++;
|
|
263
|
+
} else if (parenDepth === 0) {
|
|
264
|
+
break; // Non-expression character, stop
|
|
265
|
+
} else {
|
|
266
|
+
end++;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const expr = text.slice(start, end).trim();
|
|
271
|
+
// Only return if it contains operands, not just operators
|
|
272
|
+
if (!/[\d.a-zA-Z_]/.test(expr)) {
|
|
273
|
+
return null;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
return expr;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Detect contradictory numeric assignments like "5 = 3"
|
|
281
|
+
* Only flags patterns where the left side is JUST a number (not an expression)
|
|
282
|
+
* Examples:
|
|
283
|
+
* "5 = 3" → true (contradictory: standalone number = different number)
|
|
284
|
+
* "2 + 2 = 4" → false (expression = result, valid)
|
|
285
|
+
* "1 = 1" → false (same number, valid)
|
|
286
|
+
* "x = 5 = 3" → true (contains 5 = 3)
|
|
287
|
+
* "2^3 = 8" → false (exponentiation expression)
|
|
288
|
+
* "√4 = 2" → false (square root expression)
|
|
289
|
+
*/
|
|
290
|
+
function hasContradictoryAssignment(thought: string): boolean {
|
|
291
|
+
// Match: number = number patterns
|
|
292
|
+
// We'll check the preceding context to see if it's part of an expression
|
|
293
|
+
const numericEquals = /(\d+(?:\.\d+)?)\s*=\s*(\d+(?:\.\d+)?)/g;
|
|
294
|
+
const matches = thought.matchAll(numericEquals);
|
|
295
|
+
|
|
296
|
+
for (const match of matches) {
|
|
297
|
+
const a = match[1];
|
|
298
|
+
const b = match[2];
|
|
299
|
+
const matchStart = match.index ?? 0;
|
|
300
|
+
|
|
301
|
+
// Skip if preceded by an operator (part of an expression)
|
|
302
|
+
const precedingContext = thought.slice(0, matchStart);
|
|
303
|
+
if (hasMathOperatorBefore(precedingContext)) {
|
|
304
|
+
continue;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// Skip if the number is preceded by another digit (part of larger number)
|
|
308
|
+
if (matchStart > 0 && /\d$/.test(precedingContext)) {
|
|
309
|
+
continue;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
if (a && b && parseFloat(a) !== parseFloat(b)) {
|
|
313
|
+
return true;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
return false;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Verify numeric equations by evaluating LHS and comparing to RHS
|
|
321
|
+
* E.g., "2 + 2 = 5" returns an error because 4 ≠ 5
|
|
322
|
+
* Returns error message if computation is wrong, null if all computations are correct
|
|
323
|
+
*/
|
|
324
|
+
function verifyNumericEquations(thought: string): string | null {
|
|
325
|
+
// Pattern: find "= number" and capture everything before as potential LHS
|
|
326
|
+
// Look for: optional whitespace, =, optional whitespace, number
|
|
327
|
+
const equalsPattern = /=\s*(-?[\d.]+)/g;
|
|
328
|
+
const matches = thought.matchAll(equalsPattern);
|
|
329
|
+
|
|
330
|
+
for (const match of matches) {
|
|
331
|
+
const rhs = match[1]?.trim();
|
|
332
|
+
const equalsIdx = match.index ?? 0;
|
|
333
|
+
|
|
334
|
+
if (!rhs) continue;
|
|
335
|
+
|
|
336
|
+
// Extract LHS: go backwards from the "=" to find the expression
|
|
337
|
+
const beforeEquals = thought.slice(0, equalsIdx).trimEnd();
|
|
338
|
+
const lhs = extractLHSExpression(beforeEquals);
|
|
339
|
+
|
|
340
|
+
if (!lhs) continue;
|
|
341
|
+
|
|
342
|
+
// Skip if LHS is just a number (handled by hasContradictoryAssignment)
|
|
343
|
+
if (/^-?[\d.]+$/.test(lhs)) continue;
|
|
344
|
+
|
|
345
|
+
// Skip if LHS contains variables (can't evaluate)
|
|
346
|
+
if (/[a-zA-Z]/.test(lhs)) continue;
|
|
347
|
+
|
|
348
|
+
const result = evaluateExpression(lhs);
|
|
349
|
+
if (result.value === null) continue; // Can't evaluate, skip
|
|
350
|
+
|
|
351
|
+
const rhsValue = parseFloat(rhs);
|
|
352
|
+
if (Number.isNaN(rhsValue)) continue;
|
|
353
|
+
|
|
354
|
+
// Compare with tolerance for floating point
|
|
355
|
+
const tolerance = 1e-9;
|
|
356
|
+
if (Math.abs(result.value - rhsValue) > tolerance) {
|
|
357
|
+
return `Computation error: ${lhs} = ${result.value}, not ${rhs}`;
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
return null;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
/**
|
|
365
|
+
* Extract the LHS expression from text ending just before "="
|
|
366
|
+
* Scans backwards to find a complete math expression
|
|
367
|
+
*/
|
|
368
|
+
function extractLHSExpression(text: string): string | null {
|
|
369
|
+
if (!text) return null;
|
|
370
|
+
|
|
371
|
+
// Valid expression characters (including space for "2 + 2" and letters for variables)
|
|
372
|
+
// We include letters so we capture "x + 1" fully, then filter out variable expressions later
|
|
373
|
+
const exprChars = /[\d.+\-*/^×÷−·√²³()\sa-zA-Z_]/;
|
|
374
|
+
|
|
375
|
+
const end = text.length;
|
|
376
|
+
let start = end;
|
|
377
|
+
let parenDepth = 0;
|
|
378
|
+
|
|
379
|
+
// Scan backwards
|
|
380
|
+
while (start > 0) {
|
|
381
|
+
const char = text[start - 1] as string;
|
|
382
|
+
|
|
383
|
+
if (char === ")") {
|
|
384
|
+
parenDepth++;
|
|
385
|
+
start--;
|
|
386
|
+
} else if (char === "(") {
|
|
387
|
+
if (parenDepth > 0) {
|
|
388
|
+
parenDepth--;
|
|
389
|
+
start--;
|
|
390
|
+
} else {
|
|
391
|
+
break; // Unmatched open paren
|
|
392
|
+
}
|
|
393
|
+
} else if (exprChars.test(char)) {
|
|
394
|
+
start--;
|
|
395
|
+
} else {
|
|
396
|
+
break; // Non-expression character
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
const expr = text.slice(start, end).trim();
|
|
401
|
+
|
|
402
|
+
// Must contain at least one operator to be an expression (not just a number)
|
|
403
|
+
if (!/[+\-*/^×÷−·√²³]/.test(expr)) {
|
|
404
|
+
return null;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
return expr || null;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
/** Check if text ends with a math operator (including Unicode) */
|
|
411
|
+
function hasMathOperatorBefore(text: string): boolean {
|
|
412
|
+
return MATH_OPERATOR_PATTERN.test(text);
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
function verifyLogic(
|
|
416
|
+
thought: string,
|
|
417
|
+
context: string[],
|
|
418
|
+
): Omit<VerificationResult, "domain" | "reward"> {
|
|
419
|
+
const lower = thought.toLowerCase();
|
|
420
|
+
|
|
421
|
+
// Check for logical structure
|
|
422
|
+
const hasLogicalKeywords =
|
|
423
|
+
/if|then|therefore|because|implies|hence|thus|conclude|assume|given/i.test(thought);
|
|
424
|
+
|
|
425
|
+
// Check for contradictions
|
|
426
|
+
const contradictions = [
|
|
427
|
+
"both true and false",
|
|
428
|
+
"and not both",
|
|
429
|
+
"yes and no simultaneously",
|
|
430
|
+
"contradiction",
|
|
431
|
+
];
|
|
432
|
+
const hasContradiction = contradictions.some((c) => lower.includes(c));
|
|
433
|
+
|
|
434
|
+
// Check for circular reasoning indicators
|
|
435
|
+
const hasCircular = /because it is|proves itself|self-evident without/i.test(lower);
|
|
436
|
+
|
|
437
|
+
// Check consistency with prior context
|
|
438
|
+
const consistent = checkContextConsistency(thought, context);
|
|
439
|
+
|
|
440
|
+
const passed = hasLogicalKeywords && !hasContradiction && !hasCircular && consistent;
|
|
441
|
+
const confidence = calculateConfidence([
|
|
442
|
+
hasLogicalKeywords,
|
|
443
|
+
!hasContradiction,
|
|
444
|
+
!hasCircular,
|
|
445
|
+
consistent,
|
|
446
|
+
]);
|
|
447
|
+
|
|
448
|
+
const suggestions: string[] = [];
|
|
449
|
+
if (!hasLogicalKeywords)
|
|
450
|
+
suggestions.push("Add logical connectives (if/then, therefore, because)");
|
|
451
|
+
if (hasContradiction) suggestions.push("Resolve the contradiction");
|
|
452
|
+
if (hasCircular) suggestions.push("Avoid circular reasoning");
|
|
453
|
+
if (!consistent) suggestions.push("Check consistency with previous steps");
|
|
454
|
+
if (passed) suggestions.push("Reasoning is logically sound");
|
|
455
|
+
|
|
456
|
+
return {
|
|
457
|
+
passed,
|
|
458
|
+
confidence,
|
|
459
|
+
evidence: passed ? "Logically consistent" : suggestions[0] || "Logic check failed",
|
|
460
|
+
suggestions,
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
function verifyCode(
|
|
465
|
+
thought: string,
|
|
466
|
+
_context: string[],
|
|
467
|
+
): Omit<VerificationResult, "domain" | "reward"> {
|
|
468
|
+
// Check for code-related content
|
|
469
|
+
const hasCodeKeywords =
|
|
470
|
+
/function|class|return|const|let|var|if|for|while|async|await|def|import|export|->|=>|struct|impl|fn|pub/i.test(
|
|
471
|
+
thought,
|
|
472
|
+
);
|
|
473
|
+
|
|
474
|
+
// Check balanced brackets/braces
|
|
475
|
+
const balanced = checkBalanced(thought);
|
|
476
|
+
|
|
477
|
+
// Check for common code smells in reasoning
|
|
478
|
+
const hasInfiniteLoop = /while\s*\(\s*true\s*\)|for\s*\(\s*;\s*;\s*\)|loop\s*{/i.test(thought);
|
|
479
|
+
const hasNullDeref = /\.\s*unwrap\s*\(\s*\)|\.unwrap\(\)|null\s*\./i.test(thought);
|
|
480
|
+
|
|
481
|
+
// Check for algorithm keywords
|
|
482
|
+
const hasAlgorithm =
|
|
483
|
+
/algorithm|complexity|O\(|time|space|iterate|recurse|sort|search|hash|tree|graph/i.test(
|
|
484
|
+
thought,
|
|
485
|
+
);
|
|
486
|
+
|
|
487
|
+
const passed = (hasCodeKeywords || hasAlgorithm) && balanced && !hasInfiniteLoop;
|
|
488
|
+
const confidence = calculateConfidence([
|
|
489
|
+
hasCodeKeywords || hasAlgorithm,
|
|
490
|
+
balanced,
|
|
491
|
+
!hasInfiniteLoop,
|
|
492
|
+
!hasNullDeref,
|
|
493
|
+
]);
|
|
494
|
+
|
|
495
|
+
const suggestions: string[] = [];
|
|
496
|
+
if (!hasCodeKeywords && !hasAlgorithm)
|
|
497
|
+
suggestions.push("Include code concepts or algorithm discussion");
|
|
498
|
+
if (!balanced) suggestions.push("Check bracket/brace balance");
|
|
499
|
+
if (hasInfiniteLoop) suggestions.push("Potential infinite loop detected");
|
|
500
|
+
if (hasNullDeref) suggestions.push("Consider handling null/None cases");
|
|
501
|
+
if (passed) suggestions.push("Code reasoning is valid");
|
|
502
|
+
|
|
503
|
+
return {
|
|
504
|
+
passed,
|
|
505
|
+
confidence,
|
|
506
|
+
evidence: passed ? "Valid code reasoning" : suggestions[0] || "Code verification failed",
|
|
507
|
+
suggestions,
|
|
508
|
+
};
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
function verifyGeneral(
|
|
512
|
+
thought: string,
|
|
513
|
+
context: string[],
|
|
514
|
+
): Omit<VerificationResult, "domain" | "reward"> {
|
|
515
|
+
// Basic coherence checks
|
|
516
|
+
const hasSubstance = thought.length > 15;
|
|
517
|
+
const notJustQuestion = !thought.trim().endsWith("?") || thought.length > 50;
|
|
518
|
+
const hasStructure = /\.|,|;|:/.test(thought); // Has punctuation
|
|
519
|
+
|
|
520
|
+
// Check for vague/non-committal language
|
|
521
|
+
const tooVague =
|
|
522
|
+
/maybe|perhaps|possibly|might|could be|not sure/i.test(thought) && thought.length < 100;
|
|
523
|
+
|
|
524
|
+
// Check context relevance (simple keyword overlap)
|
|
525
|
+
const relevant = context.length === 0 || checkContextRelevance(thought, context);
|
|
526
|
+
|
|
527
|
+
const passed = hasSubstance && notJustQuestion && !tooVague && relevant;
|
|
528
|
+
const confidence = calculateConfidence([
|
|
529
|
+
hasSubstance,
|
|
530
|
+
notJustQuestion,
|
|
531
|
+
!tooVague,
|
|
532
|
+
relevant,
|
|
533
|
+
hasStructure,
|
|
534
|
+
]);
|
|
535
|
+
|
|
536
|
+
const suggestions: string[] = [];
|
|
537
|
+
if (!hasSubstance) suggestions.push("Provide more detailed reasoning");
|
|
538
|
+
if (!notJustQuestion) suggestions.push("Answer the question rather than asking another");
|
|
539
|
+
if (tooVague) suggestions.push("Be more specific in your reasoning");
|
|
540
|
+
if (!relevant) suggestions.push("Ensure relevance to previous context");
|
|
541
|
+
if (passed) suggestions.push("Proceed to next step");
|
|
542
|
+
|
|
543
|
+
return {
|
|
544
|
+
passed,
|
|
545
|
+
confidence,
|
|
546
|
+
evidence: passed ? "Coherent reasoning" : suggestions[0] || "General check failed",
|
|
547
|
+
suggestions,
|
|
548
|
+
};
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
// ============================================================================
|
|
552
|
+
// HELPERS
|
|
553
|
+
// ============================================================================
|
|
554
|
+
|
|
555
|
+
function checkBalanced(text: string): boolean {
|
|
556
|
+
const brackets: Record<string, string> = { "(": ")", "{": "}", "[": "]" };
|
|
557
|
+
const stack: string[] = [];
|
|
558
|
+
|
|
559
|
+
for (const char of text) {
|
|
560
|
+
if (char in brackets) {
|
|
561
|
+
stack.push(char);
|
|
562
|
+
} else if (Object.values(brackets).includes(char)) {
|
|
563
|
+
const last = stack.pop();
|
|
564
|
+
if (!last || brackets[last] !== char) {
|
|
565
|
+
return false;
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
return stack.length === 0;
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
function checkContextConsistency(thought: string, context: string[]): boolean {
|
|
574
|
+
if (context.length === 0) return true;
|
|
575
|
+
|
|
576
|
+
const lower = thought.toLowerCase();
|
|
577
|
+
|
|
578
|
+
// Check for explicit contradictions with prior context
|
|
579
|
+
for (const prev of context) {
|
|
580
|
+
const prevLower = prev.toLowerCase();
|
|
581
|
+
|
|
582
|
+
// Simple negation check
|
|
583
|
+
if (
|
|
584
|
+
lower.includes(`not ${prevLower.slice(0, 20)}`) ||
|
|
585
|
+
prevLower.includes(`not ${lower.slice(0, 20)}`)
|
|
586
|
+
) {
|
|
587
|
+
return false;
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
return true;
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
function checkContextRelevance(thought: string, context: string[]): boolean {
|
|
595
|
+
if (context.length === 0) return true;
|
|
596
|
+
|
|
597
|
+
const thoughtWords = new Set(tokenize(thought));
|
|
598
|
+
const contextWords = new Set(context.flatMap((c) => tokenize(c)));
|
|
599
|
+
|
|
600
|
+
// Check for at least some word overlap
|
|
601
|
+
let overlap = 0;
|
|
602
|
+
for (const word of thoughtWords) {
|
|
603
|
+
if (contextWords.has(word)) overlap++;
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
return overlap >= 1 || thoughtWords.size < 5;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
function tokenize(text: string): string[] {
|
|
610
|
+
return text
|
|
611
|
+
.toLowerCase()
|
|
612
|
+
.replace(/[^\w\s]/g, " ")
|
|
613
|
+
.split(/\s+/)
|
|
614
|
+
.filter((w) => w.length > 2);
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
function calculateConfidence(checks: boolean[]): number {
|
|
618
|
+
const passed = checks.filter(Boolean).length;
|
|
619
|
+
return Math.round((passed / checks.length) * 100) / 100;
|
|
620
|
+
}
|