verifiable-thinking-mcp 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +339 -0
- package/package.json +75 -0
- package/src/index.ts +38 -0
- package/src/lib/cache.ts +246 -0
- package/src/lib/compression.ts +804 -0
- package/src/lib/compute/cache.ts +86 -0
- package/src/lib/compute/classifier.ts +555 -0
- package/src/lib/compute/confidence.ts +79 -0
- package/src/lib/compute/context.ts +154 -0
- package/src/lib/compute/extract.ts +200 -0
- package/src/lib/compute/filter.ts +224 -0
- package/src/lib/compute/index.ts +171 -0
- package/src/lib/compute/math.ts +247 -0
- package/src/lib/compute/patterns.ts +564 -0
- package/src/lib/compute/registry.ts +145 -0
- package/src/lib/compute/solvers/arithmetic.ts +65 -0
- package/src/lib/compute/solvers/calculus.ts +249 -0
- package/src/lib/compute/solvers/derivation-core.ts +371 -0
- package/src/lib/compute/solvers/derivation-latex.ts +160 -0
- package/src/lib/compute/solvers/derivation-mistakes.ts +1046 -0
- package/src/lib/compute/solvers/derivation-simplify.ts +451 -0
- package/src/lib/compute/solvers/derivation-transform.ts +620 -0
- package/src/lib/compute/solvers/derivation.ts +67 -0
- package/src/lib/compute/solvers/facts.ts +120 -0
- package/src/lib/compute/solvers/formula.ts +728 -0
- package/src/lib/compute/solvers/index.ts +36 -0
- package/src/lib/compute/solvers/logic.ts +422 -0
- package/src/lib/compute/solvers/probability.ts +307 -0
- package/src/lib/compute/solvers/statistics.ts +262 -0
- package/src/lib/compute/solvers/word-problems.ts +408 -0
- package/src/lib/compute/types.ts +107 -0
- package/src/lib/concepts.ts +111 -0
- package/src/lib/domain.ts +731 -0
- package/src/lib/extraction.ts +912 -0
- package/src/lib/index.ts +122 -0
- package/src/lib/judge.ts +260 -0
- package/src/lib/math/ast.ts +842 -0
- package/src/lib/math/index.ts +8 -0
- package/src/lib/math/operators.ts +171 -0
- package/src/lib/math/tokenizer.ts +477 -0
- package/src/lib/patterns.ts +200 -0
- package/src/lib/session.ts +825 -0
- package/src/lib/think/challenge.ts +323 -0
- package/src/lib/think/complexity.ts +504 -0
- package/src/lib/think/confidence-drift.ts +507 -0
- package/src/lib/think/consistency.ts +347 -0
- package/src/lib/think/guidance.ts +188 -0
- package/src/lib/think/helpers.ts +568 -0
- package/src/lib/think/hypothesis.ts +216 -0
- package/src/lib/think/index.ts +127 -0
- package/src/lib/think/prompts.ts +262 -0
- package/src/lib/think/route.ts +358 -0
- package/src/lib/think/schema.ts +98 -0
- package/src/lib/think/scratchpad-schema.ts +662 -0
- package/src/lib/think/spot-check.ts +961 -0
- package/src/lib/think/types.ts +93 -0
- package/src/lib/think/verification.ts +260 -0
- package/src/lib/tokens.ts +177 -0
- package/src/lib/verification.ts +620 -0
- package/src/prompts/index.ts +10 -0
- package/src/prompts/templates.ts +336 -0
- package/src/resources/index.ts +8 -0
- package/src/resources/sessions.ts +196 -0
- package/src/tools/compress.ts +138 -0
- package/src/tools/index.ts +5 -0
- package/src/tools/scratchpad.ts +2659 -0
- package/src/tools/sessions.ts +144 -0
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Consistency Checker - O(n) lightweight contradiction detection
|
|
3
|
+
*
|
|
4
|
+
* Detects obvious contradictions across reasoning steps:
|
|
5
|
+
* - Variable reassignment (x=5 then x=10)
|
|
6
|
+
* - Logical conflicts (always vs never, all vs none)
|
|
7
|
+
* - Sign/direction flips (positive→negative, increasing→decreasing)
|
|
8
|
+
*
|
|
9
|
+
* Keeps complexity O(n) by single-pass extraction + map-based comparison.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
/** A detected contradiction between reasoning steps */
|
|
13
|
+
export interface Contradiction {
|
|
14
|
+
/** Type of contradiction found */
|
|
15
|
+
type: "value_reassignment" | "logical_conflict" | "sign_flip" | "direction_reversal";
|
|
16
|
+
/** Human-readable description */
|
|
17
|
+
description: string;
|
|
18
|
+
/** The variable/concept involved */
|
|
19
|
+
subject: string;
|
|
20
|
+
/** Step where original claim was made */
|
|
21
|
+
original_step: number;
|
|
22
|
+
/** Value/state in original step */
|
|
23
|
+
original_value: string;
|
|
24
|
+
/** Step where contradiction occurred */
|
|
25
|
+
conflicting_step: number;
|
|
26
|
+
/** Conflicting value/state */
|
|
27
|
+
conflicting_value: string;
|
|
28
|
+
/** Confidence in detection (0-1) */
|
|
29
|
+
confidence: number;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/** Result of consistency check */
|
|
33
|
+
export interface ConsistencyResult {
|
|
34
|
+
/** Whether any contradictions were found */
|
|
35
|
+
has_contradictions: boolean;
|
|
36
|
+
/** List of detected contradictions */
|
|
37
|
+
contradictions: Contradiction[];
|
|
38
|
+
/** Number of steps analyzed */
|
|
39
|
+
steps_analyzed: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Patterns for extracting variable assignments
|
|
43
|
+
// Matches: x = 5, x = 10, let x = 3, x := 7
|
|
44
|
+
const ASSIGNMENT_PATTERN = /(?:let\s+)?([a-zA-Z_][a-zA-Z0-9_]*)\s*[:=]=?\s*(-?\d+(?:\.\d+)?)/g;
|
|
45
|
+
|
|
46
|
+
// Patterns for logical absolutes
|
|
47
|
+
const LOGICAL_ABSOLUTES: Record<string, string[]> = {
|
|
48
|
+
always: ["never", "sometimes", "rarely"],
|
|
49
|
+
never: ["always", "sometimes", "often"],
|
|
50
|
+
all: ["none", "some", "few"],
|
|
51
|
+
none: ["all", "some", "many"],
|
|
52
|
+
every: ["no", "some", "few"],
|
|
53
|
+
must: ["cannot", "might", "may not"],
|
|
54
|
+
cannot: ["must", "can", "might"],
|
|
55
|
+
true: ["false"],
|
|
56
|
+
false: ["true"],
|
|
57
|
+
impossible: ["possible", "certain", "likely"],
|
|
58
|
+
certain: ["impossible", "uncertain", "unlikely"],
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
// Patterns for sign/direction words
|
|
62
|
+
const SIGN_WORDS: Record<string, string[]> = {
|
|
63
|
+
positive: ["negative", "zero"],
|
|
64
|
+
negative: ["positive", "zero"],
|
|
65
|
+
increasing: ["decreasing", "constant"],
|
|
66
|
+
decreasing: ["increasing", "constant"],
|
|
67
|
+
greater: ["less", "equal"],
|
|
68
|
+
less: ["greater", "equal"],
|
|
69
|
+
above: ["below", "at"],
|
|
70
|
+
below: ["above", "at"],
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
/** Internal tracking structure for a claim */
|
|
74
|
+
interface ClaimRecord {
|
|
75
|
+
step: number;
|
|
76
|
+
value: string;
|
|
77
|
+
context: string; // surrounding words for confidence
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Extract variable assignments from text
|
|
82
|
+
* O(n) where n = text length
|
|
83
|
+
*/
|
|
84
|
+
function extractAssignments(text: string, stepNum: number): Map<string, ClaimRecord> {
|
|
85
|
+
const result = new Map<string, ClaimRecord>();
|
|
86
|
+
let match: RegExpExecArray | null;
|
|
87
|
+
|
|
88
|
+
// Reset regex state
|
|
89
|
+
ASSIGNMENT_PATTERN.lastIndex = 0;
|
|
90
|
+
|
|
91
|
+
while ((match = ASSIGNMENT_PATTERN.exec(text)) !== null) {
|
|
92
|
+
const varName = match[1]?.toLowerCase();
|
|
93
|
+
const value = match[2];
|
|
94
|
+
if (!varName || !value) continue;
|
|
95
|
+
// Get surrounding context (20 chars each side)
|
|
96
|
+
const start = Math.max(0, match.index - 20);
|
|
97
|
+
const end = Math.min(text.length, match.index + match[0].length + 20);
|
|
98
|
+
const context = text.slice(start, end);
|
|
99
|
+
|
|
100
|
+
result.set(varName, { step: stepNum, value, context });
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return result;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Extract logical absolute claims from text
|
|
108
|
+
* O(n) where n = text length
|
|
109
|
+
*
|
|
110
|
+
* Strategy: Extract the word itself, look for conflicts later
|
|
111
|
+
*/
|
|
112
|
+
function extractLogicalClaims(text: string, stepNum: number): Map<string, ClaimRecord> {
|
|
113
|
+
const result = new Map<string, ClaimRecord>();
|
|
114
|
+
const lowerText = text.toLowerCase();
|
|
115
|
+
|
|
116
|
+
for (const [word, _conflicts] of Object.entries(LOGICAL_ABSOLUTES)) {
|
|
117
|
+
// Word boundary match
|
|
118
|
+
const regex = new RegExp(`\\b${word}\\b`, "gi");
|
|
119
|
+
if (regex.test(lowerText)) {
|
|
120
|
+
// Get context around the word
|
|
121
|
+
const match = lowerText.indexOf(word);
|
|
122
|
+
const start = Math.max(0, match - 30);
|
|
123
|
+
const end = Math.min(text.length, match + word.length + 30);
|
|
124
|
+
const context = text.slice(start, end);
|
|
125
|
+
|
|
126
|
+
// Simple key: just the word type (always, never, etc)
|
|
127
|
+
// We'll check conflicts based on value, not key matching
|
|
128
|
+
const key = `logical:${word}`;
|
|
129
|
+
result.set(key, { step: stepNum, value: word, context });
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return result;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Extract sign/direction claims from text
|
|
138
|
+
* O(n) where n = text length
|
|
139
|
+
*/
|
|
140
|
+
function extractSignClaims(text: string, stepNum: number): Map<string, ClaimRecord> {
|
|
141
|
+
const result = new Map<string, ClaimRecord>();
|
|
142
|
+
const lowerText = text.toLowerCase();
|
|
143
|
+
|
|
144
|
+
for (const [word, _conflicts] of Object.entries(SIGN_WORDS)) {
|
|
145
|
+
const regex = new RegExp(`\\b${word}\\b`, "gi");
|
|
146
|
+
if (regex.test(lowerText)) {
|
|
147
|
+
const match = lowerText.indexOf(word);
|
|
148
|
+
const start = Math.max(0, match - 30);
|
|
149
|
+
const end = Math.min(text.length, match + word.length + 30);
|
|
150
|
+
const context = text.slice(start, end);
|
|
151
|
+
|
|
152
|
+
// Include nearby noun/subject for specificity
|
|
153
|
+
// But use the word itself as the key for conflict matching
|
|
154
|
+
const varMatch = context.match(/\b(the\s+)?([a-zA-Z_][a-zA-Z0-9_]*)\b/i);
|
|
155
|
+
const subject = varMatch ? varMatch[2] || "value" : "value";
|
|
156
|
+
const key = `sign:${word}`;
|
|
157
|
+
result.set(key, { step: stepNum, value: word, context: `${subject}: ${context}` });
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return result;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Check for contradictions between two claims
|
|
166
|
+
*/
|
|
167
|
+
function findContradiction(
|
|
168
|
+
key: string,
|
|
169
|
+
oldClaim: ClaimRecord,
|
|
170
|
+
newClaim: ClaimRecord,
|
|
171
|
+
): Contradiction | null {
|
|
172
|
+
// Value reassignment check
|
|
173
|
+
if (key.match(/^[a-z_][a-z0-9_]*$/i)) {
|
|
174
|
+
// Simple variable name - check if values differ
|
|
175
|
+
if (oldClaim.value !== newClaim.value) {
|
|
176
|
+
return {
|
|
177
|
+
type: "value_reassignment",
|
|
178
|
+
description: `Variable '${key}' was ${oldClaim.value} in step ${oldClaim.step}, now ${newClaim.value}`,
|
|
179
|
+
subject: key,
|
|
180
|
+
original_step: oldClaim.step,
|
|
181
|
+
original_value: oldClaim.value,
|
|
182
|
+
conflicting_step: newClaim.step,
|
|
183
|
+
conflicting_value: newClaim.value,
|
|
184
|
+
confidence: 0.9,
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Logical conflict check
|
|
190
|
+
if (key.startsWith("logical:")) {
|
|
191
|
+
const conflicts = LOGICAL_ABSOLUTES[oldClaim.value];
|
|
192
|
+
if (conflicts?.includes(newClaim.value)) {
|
|
193
|
+
return {
|
|
194
|
+
type: "logical_conflict",
|
|
195
|
+
description: `Logical conflict: '${oldClaim.value}' in step ${oldClaim.step} vs '${newClaim.value}' in step ${newClaim.step}`,
|
|
196
|
+
subject: key.split(":")[2] || "claim",
|
|
197
|
+
original_step: oldClaim.step,
|
|
198
|
+
original_value: oldClaim.value,
|
|
199
|
+
conflicting_step: newClaim.step,
|
|
200
|
+
conflicting_value: newClaim.value,
|
|
201
|
+
confidence: 0.85,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Sign flip check
|
|
207
|
+
if (key.startsWith("sign:")) {
|
|
208
|
+
const conflicts = SIGN_WORDS[oldClaim.value];
|
|
209
|
+
if (conflicts?.includes(newClaim.value)) {
|
|
210
|
+
const subject = key.split(":")[1] || "value";
|
|
211
|
+
return {
|
|
212
|
+
type: "sign_flip",
|
|
213
|
+
description: `Sign flip for '${subject}': ${oldClaim.value} → ${newClaim.value}`,
|
|
214
|
+
subject,
|
|
215
|
+
original_step: oldClaim.step,
|
|
216
|
+
original_value: oldClaim.value,
|
|
217
|
+
conflicting_step: newClaim.step,
|
|
218
|
+
conflicting_value: newClaim.value,
|
|
219
|
+
confidence: 0.8,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return null;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Check consistency across reasoning steps
|
|
229
|
+
*
|
|
230
|
+
* O(n*m) where n = total text length, m = number of steps
|
|
231
|
+
* Practically O(n) since m is bounded by session limits
|
|
232
|
+
*
|
|
233
|
+
* @param steps - Array of thought texts with step numbers
|
|
234
|
+
* @returns Consistency check result
|
|
235
|
+
*/
|
|
236
|
+
export function checkConsistency(
|
|
237
|
+
steps: Array<{ step: number; thought: string }>,
|
|
238
|
+
): ConsistencyResult {
|
|
239
|
+
if (steps.length < 2) {
|
|
240
|
+
return {
|
|
241
|
+
has_contradictions: false,
|
|
242
|
+
contradictions: [],
|
|
243
|
+
steps_analyzed: steps.length,
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const contradictions: Contradiction[] = [];
|
|
248
|
+
|
|
249
|
+
// Track all claims across steps
|
|
250
|
+
const allAssignments = new Map<string, ClaimRecord>();
|
|
251
|
+
// For logical/sign, track by value (the word itself) to find conflicts
|
|
252
|
+
const allLogicalByValue = new Map<string, ClaimRecord>();
|
|
253
|
+
const allSignsByValue = new Map<string, ClaimRecord>();
|
|
254
|
+
|
|
255
|
+
for (const { step, thought } of steps) {
|
|
256
|
+
// Extract claims from this step
|
|
257
|
+
const assignments = extractAssignments(thought, step);
|
|
258
|
+
const logical = extractLogicalClaims(thought, step);
|
|
259
|
+
const signs = extractSignClaims(thought, step);
|
|
260
|
+
|
|
261
|
+
// Check for contradictions with previous steps - variable reassignment
|
|
262
|
+
for (const [key, claim] of assignments) {
|
|
263
|
+
const existing = allAssignments.get(key);
|
|
264
|
+
if (existing && existing.step !== step) {
|
|
265
|
+
const contradiction = findContradiction(key, existing, claim);
|
|
266
|
+
if (contradiction) {
|
|
267
|
+
contradictions.push(contradiction);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
// Update or set the claim (latest wins for tracking)
|
|
271
|
+
allAssignments.set(key, claim);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Logical conflicts: check if this step's words conflict with prior words
|
|
275
|
+
for (const [_key, claim] of logical) {
|
|
276
|
+
const conflicts = LOGICAL_ABSOLUTES[claim.value];
|
|
277
|
+
if (conflicts) {
|
|
278
|
+
// Check if any conflicting word was seen in a prior step
|
|
279
|
+
for (const conflictWord of conflicts) {
|
|
280
|
+
const existing = allLogicalByValue.get(conflictWord);
|
|
281
|
+
if (existing && existing.step !== step) {
|
|
282
|
+
contradictions.push({
|
|
283
|
+
type: "logical_conflict",
|
|
284
|
+
description: `Logical conflict: '${existing.value}' in step ${existing.step} vs '${claim.value}' in step ${step}`,
|
|
285
|
+
subject: "claim",
|
|
286
|
+
original_step: existing.step,
|
|
287
|
+
original_value: existing.value,
|
|
288
|
+
conflicting_step: step,
|
|
289
|
+
conflicting_value: claim.value,
|
|
290
|
+
confidence: 0.85,
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
allLogicalByValue.set(claim.value, claim);
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// Sign conflicts: check if this step's words conflict with prior words
|
|
299
|
+
for (const [_key, claim] of signs) {
|
|
300
|
+
const conflicts = SIGN_WORDS[claim.value];
|
|
301
|
+
if (conflicts) {
|
|
302
|
+
for (const conflictWord of conflicts) {
|
|
303
|
+
const existing = allSignsByValue.get(conflictWord);
|
|
304
|
+
if (existing && existing.step !== step) {
|
|
305
|
+
// Extract subject from context
|
|
306
|
+
const subjectMatch = claim.context.match(/^([^:]+):/);
|
|
307
|
+
const subject = subjectMatch?.[1] ?? "value";
|
|
308
|
+
contradictions.push({
|
|
309
|
+
type: "sign_flip",
|
|
310
|
+
description: `Sign flip for '${subject}': ${existing.value} → ${claim.value}`,
|
|
311
|
+
subject,
|
|
312
|
+
original_step: existing.step,
|
|
313
|
+
original_value: existing.value,
|
|
314
|
+
conflicting_step: step,
|
|
315
|
+
conflicting_value: claim.value,
|
|
316
|
+
confidence: 0.8,
|
|
317
|
+
});
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
allSignsByValue.set(claim.value, claim);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
return {
|
|
326
|
+
has_contradictions: contradictions.length > 0,
|
|
327
|
+
contradictions,
|
|
328
|
+
steps_analyzed: steps.length,
|
|
329
|
+
};
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* Quick check if a new step contradicts any previous steps
|
|
334
|
+
* More efficient for incremental checking
|
|
335
|
+
*
|
|
336
|
+
* @param newStep - The new step to check
|
|
337
|
+
* @param priorSteps - Previous steps to check against
|
|
338
|
+
* @returns Array of contradictions (empty if none)
|
|
339
|
+
*/
|
|
340
|
+
export function checkStepConsistency(
|
|
341
|
+
newStep: { step: number; thought: string },
|
|
342
|
+
priorSteps: Array<{ step: number; thought: string }>,
|
|
343
|
+
): Contradiction[] {
|
|
344
|
+
const result = checkConsistency([...priorSteps, newStep]);
|
|
345
|
+
// Only return contradictions involving the new step
|
|
346
|
+
return result.contradictions.filter((c) => c.conflicting_step === newStep.step);
|
|
347
|
+
}
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Guidance Engine - Proactive reasoning assistance
|
|
3
|
+
* Research-backed failure pattern detection and guidance generation
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type { ThoughtRecord } from "../session.ts";
|
|
7
|
+
import type { VerificationDomain } from "../verification.ts";
|
|
8
|
+
|
|
9
|
+
// ============================================================================
|
|
10
|
+
// FAILURE PATTERNS - Research-backed reasoning hazards
|
|
11
|
+
// ============================================================================
|
|
12
|
+
|
|
13
|
+
export interface FailurePattern {
|
|
14
|
+
trigger: RegExp;
|
|
15
|
+
risk: string;
|
|
16
|
+
guidance: string;
|
|
17
|
+
checkpoint: boolean;
|
|
18
|
+
minLength?: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export const FAILURE_PATTERNS: Record<string, FailurePattern> = {
|
|
22
|
+
premature_conclusion: {
|
|
23
|
+
// Only flag if "answer is/=:" appears in first 100 chars AND total response is long
|
|
24
|
+
// (short responses are expected for structured phase outputs)
|
|
25
|
+
trigger: /^.{0,100}answer\s*(is|=|:)/im,
|
|
26
|
+
risk: "Concluding without showing intermediate steps",
|
|
27
|
+
guidance: "Show your work before concluding",
|
|
28
|
+
checkpoint: true,
|
|
29
|
+
minLength: 200, // Don't flag short structured outputs - they're intentional
|
|
30
|
+
},
|
|
31
|
+
arithmetic_chain: {
|
|
32
|
+
trigger: /\d+\s*[+\-*/]\s*\d+\s*[+\-*/]\s*\d+/,
|
|
33
|
+
risk: "Arithmetic chain prone to carry-forward errors",
|
|
34
|
+
guidance: "Verify each arithmetic step independently",
|
|
35
|
+
checkpoint: true,
|
|
36
|
+
minLength: 20,
|
|
37
|
+
},
|
|
38
|
+
short_answer: {
|
|
39
|
+
// Only flag for long-form reasoning contexts, not structured phase outputs
|
|
40
|
+
trigger: /^.{0,80}$/,
|
|
41
|
+
risk: "Answer too brief - may lack reasoning",
|
|
42
|
+
guidance: "Show intermediate steps",
|
|
43
|
+
checkpoint: false,
|
|
44
|
+
minLength: 0,
|
|
45
|
+
// NOTE: This pattern is disabled when guidance=false (phase-based iteration)
|
|
46
|
+
// It's designed for unstructured LLM outputs, not intentionally terse phases
|
|
47
|
+
},
|
|
48
|
+
contradiction: {
|
|
49
|
+
trigger: /but\s+(also|then|wait)|however.*but|on\s+the\s+other\s+hand.*yet/i,
|
|
50
|
+
risk: "Potential contradiction in reasoning",
|
|
51
|
+
guidance: "Resolve conflicting statements before proceeding",
|
|
52
|
+
checkpoint: true,
|
|
53
|
+
minLength: 100,
|
|
54
|
+
},
|
|
55
|
+
overconfident_complex: {
|
|
56
|
+
trigger: /obviously|clearly|trivially|of\s+course/i,
|
|
57
|
+
risk: "Overconfidence may mask errors",
|
|
58
|
+
guidance: "Verify 'obvious' steps explicitly",
|
|
59
|
+
checkpoint: false,
|
|
60
|
+
minLength: 100,
|
|
61
|
+
},
|
|
62
|
+
unchecked_assumption: {
|
|
63
|
+
trigger: /let's\s+assume|assuming\s+that|suppose\s+that/i,
|
|
64
|
+
risk: "Assumption may not hold",
|
|
65
|
+
guidance: "Verify assumption is warranted by the problem",
|
|
66
|
+
checkpoint: false,
|
|
67
|
+
minLength: 100,
|
|
68
|
+
},
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
// Valid purpose categories
|
|
72
|
+
export const VALID_PURPOSES = new Set([
|
|
73
|
+
"analysis",
|
|
74
|
+
"action",
|
|
75
|
+
"reflection",
|
|
76
|
+
"decision",
|
|
77
|
+
"summary",
|
|
78
|
+
"validation",
|
|
79
|
+
"exploration",
|
|
80
|
+
"hypothesis",
|
|
81
|
+
"correction",
|
|
82
|
+
"planning",
|
|
83
|
+
]);
|
|
84
|
+
|
|
85
|
+
// ============================================================================
|
|
86
|
+
// GUIDANCE ENGINE - Proactive reasoning assistance
|
|
87
|
+
// ============================================================================
|
|
88
|
+
|
|
89
|
+
export interface ThoughtAnalysis {
|
|
90
|
+
patterns_detected: string[];
|
|
91
|
+
guidance: string[];
|
|
92
|
+
checkpoint_recommended: boolean;
|
|
93
|
+
suggested_next: string | null;
|
|
94
|
+
risk_level: "low" | "medium" | "high";
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export function analyzeThought(
|
|
98
|
+
thought: string,
|
|
99
|
+
step: number,
|
|
100
|
+
priorThoughts: ThoughtRecord[],
|
|
101
|
+
domain: VerificationDomain,
|
|
102
|
+
): ThoughtAnalysis {
|
|
103
|
+
const patterns_detected: string[] = [];
|
|
104
|
+
const guidance: string[] = [];
|
|
105
|
+
let checkpoint_recommended = false;
|
|
106
|
+
let risk_score = 0;
|
|
107
|
+
const thoughtLength = thought.length;
|
|
108
|
+
|
|
109
|
+
// Check against known failure patterns
|
|
110
|
+
for (const [name, pattern] of Object.entries(FAILURE_PATTERNS)) {
|
|
111
|
+
const minLen = pattern.minLength ?? 0;
|
|
112
|
+
if (thoughtLength >= minLen && pattern.trigger.test(thought)) {
|
|
113
|
+
patterns_detected.push(name);
|
|
114
|
+
guidance.push(pattern.guidance);
|
|
115
|
+
if (pattern.checkpoint) checkpoint_recommended = true;
|
|
116
|
+
risk_score++;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Domain-specific guidance
|
|
121
|
+
if (step > 1) {
|
|
122
|
+
if (domain === "math" && /=/.test(thought) && priorThoughts.length > 0) {
|
|
123
|
+
if (guidance.length === 0) {
|
|
124
|
+
guidance.push("Verify equation transformation preserves equality");
|
|
125
|
+
}
|
|
126
|
+
} else if (domain === "code" && /loop|iterate|recursive/i.test(thought)) {
|
|
127
|
+
guidance.push("Verify termination condition exists");
|
|
128
|
+
checkpoint_recommended = true;
|
|
129
|
+
risk_score++;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Confidence trajectory analysis
|
|
134
|
+
const confidences = priorThoughts
|
|
135
|
+
.map((t) => t.verification?.confidence)
|
|
136
|
+
.filter((c): c is number => c !== undefined);
|
|
137
|
+
|
|
138
|
+
if (confidences.length >= 2) {
|
|
139
|
+
const recent = confidences.slice(-2);
|
|
140
|
+
const prev = recent[0] ?? 0;
|
|
141
|
+
const curr = recent[1] ?? 0;
|
|
142
|
+
if (curr < prev - 0.2) {
|
|
143
|
+
guidance.push("Confidence dropping - consider revisiting assumptions");
|
|
144
|
+
risk_score++;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Suggest next action
|
|
149
|
+
let suggested_next: string | null = null;
|
|
150
|
+
if (checkpoint_recommended) {
|
|
151
|
+
suggested_next = "Pause and verify current step before proceeding";
|
|
152
|
+
} else if (patterns_detected.includes("premature_conclusion")) {
|
|
153
|
+
suggested_next = "Review all constraints before finalizing";
|
|
154
|
+
} else if (step >= 3 && !priorThoughts.some((t) => t.verification?.passed)) {
|
|
155
|
+
suggested_next = "Consider verifying intermediate steps";
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const risk_level = risk_score >= 3 ? "high" : risk_score >= 1 ? "medium" : "low";
|
|
159
|
+
|
|
160
|
+
return {
|
|
161
|
+
patterns_detected,
|
|
162
|
+
guidance: guidance.slice(0, 3),
|
|
163
|
+
checkpoint_recommended,
|
|
164
|
+
suggested_next,
|
|
165
|
+
risk_level,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// ============================================================================
|
|
170
|
+
// DOMAIN DETECTION
|
|
171
|
+
// ============================================================================
|
|
172
|
+
|
|
173
|
+
export function detectDomain(thought: string): VerificationDomain {
|
|
174
|
+
if (/\d+\s*[+\-*/^=]\s*\d+|equation|solve|derivative|integral|sum\s+of/i.test(thought)) {
|
|
175
|
+
return "math";
|
|
176
|
+
}
|
|
177
|
+
if (
|
|
178
|
+
/function|class|return|const|let|var|def\s|import\s|async|await|=>|->|fn\s|impl\s/i.test(
|
|
179
|
+
thought,
|
|
180
|
+
)
|
|
181
|
+
) {
|
|
182
|
+
return "code";
|
|
183
|
+
}
|
|
184
|
+
if (/if\s+.+\s+then|therefore|implies|hence|thus|conclude|premise|valid|invalid/i.test(thought)) {
|
|
185
|
+
return "logic";
|
|
186
|
+
}
|
|
187
|
+
return "general";
|
|
188
|
+
}
|