verifiable-thinking-mcp 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +339 -0
- package/package.json +75 -0
- package/src/index.ts +38 -0
- package/src/lib/cache.ts +246 -0
- package/src/lib/compression.ts +804 -0
- package/src/lib/compute/cache.ts +86 -0
- package/src/lib/compute/classifier.ts +555 -0
- package/src/lib/compute/confidence.ts +79 -0
- package/src/lib/compute/context.ts +154 -0
- package/src/lib/compute/extract.ts +200 -0
- package/src/lib/compute/filter.ts +224 -0
- package/src/lib/compute/index.ts +171 -0
- package/src/lib/compute/math.ts +247 -0
- package/src/lib/compute/patterns.ts +564 -0
- package/src/lib/compute/registry.ts +145 -0
- package/src/lib/compute/solvers/arithmetic.ts +65 -0
- package/src/lib/compute/solvers/calculus.ts +249 -0
- package/src/lib/compute/solvers/derivation-core.ts +371 -0
- package/src/lib/compute/solvers/derivation-latex.ts +160 -0
- package/src/lib/compute/solvers/derivation-mistakes.ts +1046 -0
- package/src/lib/compute/solvers/derivation-simplify.ts +451 -0
- package/src/lib/compute/solvers/derivation-transform.ts +620 -0
- package/src/lib/compute/solvers/derivation.ts +67 -0
- package/src/lib/compute/solvers/facts.ts +120 -0
- package/src/lib/compute/solvers/formula.ts +728 -0
- package/src/lib/compute/solvers/index.ts +36 -0
- package/src/lib/compute/solvers/logic.ts +422 -0
- package/src/lib/compute/solvers/probability.ts +307 -0
- package/src/lib/compute/solvers/statistics.ts +262 -0
- package/src/lib/compute/solvers/word-problems.ts +408 -0
- package/src/lib/compute/types.ts +107 -0
- package/src/lib/concepts.ts +111 -0
- package/src/lib/domain.ts +731 -0
- package/src/lib/extraction.ts +912 -0
- package/src/lib/index.ts +122 -0
- package/src/lib/judge.ts +260 -0
- package/src/lib/math/ast.ts +842 -0
- package/src/lib/math/index.ts +8 -0
- package/src/lib/math/operators.ts +171 -0
- package/src/lib/math/tokenizer.ts +477 -0
- package/src/lib/patterns.ts +200 -0
- package/src/lib/session.ts +825 -0
- package/src/lib/think/challenge.ts +323 -0
- package/src/lib/think/complexity.ts +504 -0
- package/src/lib/think/confidence-drift.ts +507 -0
- package/src/lib/think/consistency.ts +347 -0
- package/src/lib/think/guidance.ts +188 -0
- package/src/lib/think/helpers.ts +568 -0
- package/src/lib/think/hypothesis.ts +216 -0
- package/src/lib/think/index.ts +127 -0
- package/src/lib/think/prompts.ts +262 -0
- package/src/lib/think/route.ts +358 -0
- package/src/lib/think/schema.ts +98 -0
- package/src/lib/think/scratchpad-schema.ts +662 -0
- package/src/lib/think/spot-check.ts +961 -0
- package/src/lib/think/types.ts +93 -0
- package/src/lib/think/verification.ts +260 -0
- package/src/lib/tokens.ts +177 -0
- package/src/lib/verification.ts +620 -0
- package/src/prompts/index.ts +10 -0
- package/src/prompts/templates.ts +336 -0
- package/src/resources/index.ts +8 -0
- package/src/resources/sessions.ts +196 -0
- package/src/tools/compress.ts +138 -0
- package/src/tools/index.ts +5 -0
- package/src/tools/scratchpad.ts +2659 -0
- package/src/tools/sessions.ts +144 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LRU Cache for computed results
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { CacheEntry, CacheStats, ComputeResult } from "./types.ts";
|
|
6
|
+
|
|
7
|
+
export class ComputeCache {
|
|
8
|
+
private cache: Map<string, CacheEntry> = new Map();
|
|
9
|
+
private readonly maxSize: number;
|
|
10
|
+
private readonly ttlMs: number;
|
|
11
|
+
private hits = 0;
|
|
12
|
+
private misses = 0;
|
|
13
|
+
|
|
14
|
+
constructor(maxSize = 1000, ttlMs = 5 * 60 * 1000) {
|
|
15
|
+
this.maxSize = maxSize;
|
|
16
|
+
this.ttlMs = ttlMs;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
private normalize(text: string): string {
|
|
20
|
+
return text.toLowerCase().replace(/\s+/g, " ").trim();
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
get(text: string): ComputeResult | null {
|
|
24
|
+
const key = this.normalize(text);
|
|
25
|
+
const entry = this.cache.get(key);
|
|
26
|
+
|
|
27
|
+
if (!entry) {
|
|
28
|
+
this.misses++;
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Check TTL
|
|
33
|
+
if (Date.now() - entry.timestamp > this.ttlMs) {
|
|
34
|
+
this.cache.delete(key);
|
|
35
|
+
this.misses++;
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Move to end (most recently used)
|
|
40
|
+
this.cache.delete(key);
|
|
41
|
+
this.cache.set(key, entry);
|
|
42
|
+
this.hits++;
|
|
43
|
+
return entry.result;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
set(text: string, result: ComputeResult): void {
|
|
47
|
+
const key = this.normalize(text);
|
|
48
|
+
|
|
49
|
+
// Evict oldest if at capacity
|
|
50
|
+
if (this.cache.size >= this.maxSize) {
|
|
51
|
+
const oldest = this.cache.keys().next().value;
|
|
52
|
+
if (oldest) this.cache.delete(oldest);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
this.cache.set(key, { result, timestamp: Date.now() });
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
stats(): CacheStats {
|
|
59
|
+
const total = this.hits + this.misses;
|
|
60
|
+
return {
|
|
61
|
+
hits: this.hits,
|
|
62
|
+
misses: this.misses,
|
|
63
|
+
size: this.cache.size,
|
|
64
|
+
hitRate: total > 0 ? this.hits / total : 0,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
clear(): void {
|
|
69
|
+
this.cache.clear();
|
|
70
|
+
this.hits = 0;
|
|
71
|
+
this.misses = 0;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Global cache instance
|
|
76
|
+
export const computeCache = new ComputeCache();
|
|
77
|
+
|
|
78
|
+
/** Get cache statistics */
|
|
79
|
+
export function getCacheStats(): CacheStats {
|
|
80
|
+
return computeCache.stats();
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/** Clear the compute cache */
|
|
84
|
+
export function clearCache(): void {
|
|
85
|
+
computeCache.clear();
|
|
86
|
+
}
|
|
@@ -0,0 +1,555 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Question Classifier - Fast-path routing via bitmask
|
|
3
|
+
*
|
|
4
|
+
* Runs ONCE per question and returns a bitmask indicating which solver
|
|
5
|
+
* types are likely to match. This allows skipping entire solver categories
|
|
6
|
+
* without running their expensive regex patterns.
|
|
7
|
+
*
|
|
8
|
+
* Performance: ~0.01ms to classify, saves ~0.05ms per skipped solver tier
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
// =============================================================================
|
|
12
|
+
// SOLVER TYPE BITMASK
|
|
13
|
+
// =============================================================================
|
|
14
|
+
|
|
15
|
+
/** Bitmask flags for solver types */
|
|
16
|
+
export const SolverType = {
|
|
17
|
+
NONE: 0,
|
|
18
|
+
ARITHMETIC: 1 << 0, // 1
|
|
19
|
+
FORMULA_TIER1: 1 << 1, // 2 - percentage, factorial, modulo, prime, fibonacci
|
|
20
|
+
FORMULA_TIER2: 1 << 2, // 4 - sqrt, power, gcd, lcm
|
|
21
|
+
FORMULA_TIER3: 1 << 3, // 8 - log, quadratic, combinations, permutations, last digit
|
|
22
|
+
FORMULA_TIER4: 1 << 4, // 16 - pythagorean, trailing zeros, series, matrix, interest
|
|
23
|
+
WORD_PROBLEM: 1 << 5, // 32
|
|
24
|
+
MULTI_STEP: 1 << 6, // 64
|
|
25
|
+
CALCULUS: 1 << 7, // 128
|
|
26
|
+
FACTS: 1 << 8, // 256 - known mathematical facts (rationality, etc.)
|
|
27
|
+
LOGIC: 1 << 9, // 512 - propositional logic (modus ponens/tollens, syllogism, XOR)
|
|
28
|
+
PROBABILITY: 1 << 10, // 1024 - independent events, gambler's fallacy
|
|
29
|
+
DERIVATION: 1 << 11, // 2048 - algebraic derivations and proofs
|
|
30
|
+
} as const;
|
|
31
|
+
|
|
32
|
+
/** Combined flags for convenience */
|
|
33
|
+
export const SolverGroup = {
|
|
34
|
+
FORMULA_ALL:
|
|
35
|
+
SolverType.FORMULA_TIER1 |
|
|
36
|
+
SolverType.FORMULA_TIER2 |
|
|
37
|
+
SolverType.FORMULA_TIER3 |
|
|
38
|
+
SolverType.FORMULA_TIER4,
|
|
39
|
+
WORD_ALL: SolverType.WORD_PROBLEM | SolverType.MULTI_STEP,
|
|
40
|
+
ALL: 0xfff, // All solvers (12 bits)
|
|
41
|
+
} as const;
|
|
42
|
+
|
|
43
|
+
export type SolverMask = number;
|
|
44
|
+
|
|
45
|
+
// =============================================================================
|
|
46
|
+
// CLASSIFIER RESULT
|
|
47
|
+
// =============================================================================
|
|
48
|
+
|
|
49
|
+
export interface ClassifierResult {
|
|
50
|
+
/** Bitmask of likely matching solver types */
|
|
51
|
+
mask: SolverMask;
|
|
52
|
+
/** Lowercase version of text (computed once, reused) */
|
|
53
|
+
lower: string;
|
|
54
|
+
/** Quick character presence flags */
|
|
55
|
+
chars: {
|
|
56
|
+
hasDigit: boolean;
|
|
57
|
+
hasPercent: boolean;
|
|
58
|
+
hasCaret: boolean;
|
|
59
|
+
hasBracket: boolean;
|
|
60
|
+
hasDollar: boolean;
|
|
61
|
+
hasExclaim: boolean;
|
|
62
|
+
hasX: boolean;
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// =============================================================================
|
|
67
|
+
// FAST CHARACTER CHECKS (inlined for speed)
|
|
68
|
+
// =============================================================================
|
|
69
|
+
|
|
70
|
+
const DIGIT_RE = /\d/;
|
|
71
|
+
const X_RE = /x/i;
|
|
72
|
+
|
|
73
|
+
// =============================================================================
|
|
74
|
+
// CLASSIFIER RULES
|
|
75
|
+
// Each rule adds solver types to the mask based on cheap checks
|
|
76
|
+
// =============================================================================
|
|
77
|
+
|
|
78
|
+
interface ClassifierRule {
|
|
79
|
+
/** Quick guard - if false, skip this rule */
|
|
80
|
+
guard: (text: string, lower: string, chars: ClassifierResult["chars"]) => boolean;
|
|
81
|
+
/** Solver types to add if guard passes */
|
|
82
|
+
types: SolverMask;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const CLASSIFIER_RULES: ClassifierRule[] = [
|
|
86
|
+
// ARITHMETIC: needs digits and operators
|
|
87
|
+
{
|
|
88
|
+
guard: (text, _lower, chars) =>
|
|
89
|
+
chars.hasDigit && /[+\-*/]/.test(text) && !/[a-df-wyzA-DF-WYZ]/.test(text),
|
|
90
|
+
types: SolverType.ARITHMETIC,
|
|
91
|
+
},
|
|
92
|
+
|
|
93
|
+
// TIER 1: percentage
|
|
94
|
+
{
|
|
95
|
+
guard: (_text, _lower, chars) => chars.hasPercent && chars.hasDigit,
|
|
96
|
+
types: SolverType.FORMULA_TIER1,
|
|
97
|
+
},
|
|
98
|
+
|
|
99
|
+
// TIER 1: factorial
|
|
100
|
+
{
|
|
101
|
+
guard: (_text, lower, chars) =>
|
|
102
|
+
(chars.hasExclaim && chars.hasDigit) || lower.includes("factorial"),
|
|
103
|
+
types: SolverType.FORMULA_TIER1,
|
|
104
|
+
},
|
|
105
|
+
|
|
106
|
+
// TIER 1: modulo
|
|
107
|
+
{
|
|
108
|
+
guard: (_text, lower, _chars) => lower.includes("mod") || lower.includes("remainder"),
|
|
109
|
+
types: SolverType.FORMULA_TIER1,
|
|
110
|
+
},
|
|
111
|
+
|
|
112
|
+
// TIER 1: prime
|
|
113
|
+
{
|
|
114
|
+
guard: (_text, lower, _chars) => lower.includes("prime"),
|
|
115
|
+
types: SolverType.FORMULA_TIER1,
|
|
116
|
+
},
|
|
117
|
+
|
|
118
|
+
// TIER 1: fibonacci
|
|
119
|
+
{
|
|
120
|
+
guard: (_text, lower, _chars) => lower.includes("fibonacci"),
|
|
121
|
+
types: SolverType.FORMULA_TIER1,
|
|
122
|
+
},
|
|
123
|
+
|
|
124
|
+
// TIER 2: sqrt
|
|
125
|
+
{
|
|
126
|
+
guard: (text, lower, _chars) =>
|
|
127
|
+
lower.includes("sqrt") || text.includes("\u221A") || lower.includes("root"),
|
|
128
|
+
types: SolverType.FORMULA_TIER2,
|
|
129
|
+
},
|
|
130
|
+
|
|
131
|
+
// TIER 2: power
|
|
132
|
+
{
|
|
133
|
+
guard: (_text, lower, chars) => chars.hasCaret || lower.includes("power"),
|
|
134
|
+
types: SolverType.FORMULA_TIER2,
|
|
135
|
+
},
|
|
136
|
+
|
|
137
|
+
// TIER 2: gcd/lcm
|
|
138
|
+
{
|
|
139
|
+
guard: (_text, lower, _chars) =>
|
|
140
|
+
lower.includes("gcd") ||
|
|
141
|
+
lower.includes("lcm") ||
|
|
142
|
+
lower.includes("greatest common") ||
|
|
143
|
+
lower.includes("least common"),
|
|
144
|
+
types: SolverType.FORMULA_TIER2,
|
|
145
|
+
},
|
|
146
|
+
|
|
147
|
+
// TIER 3: logarithm
|
|
148
|
+
{
|
|
149
|
+
guard: (_text, lower, _chars) => lower.includes("log") || lower.includes("ln"),
|
|
150
|
+
types: SolverType.FORMULA_TIER3,
|
|
151
|
+
},
|
|
152
|
+
|
|
153
|
+
// TIER 3: quadratic (x² or x^2 with = 0)
|
|
154
|
+
{
|
|
155
|
+
guard: (text, _lower, chars) => chars.hasX && text.includes("0") && /x[\u00B22^]/.test(text),
|
|
156
|
+
types: SolverType.FORMULA_TIER3,
|
|
157
|
+
},
|
|
158
|
+
|
|
159
|
+
// TIER 3: combinations/permutations
|
|
160
|
+
{
|
|
161
|
+
guard: (text, lower, _chars) =>
|
|
162
|
+
lower.includes("choose") ||
|
|
163
|
+
/ c /i.test(text) ||
|
|
164
|
+
/ p /i.test(text) ||
|
|
165
|
+
lower.includes("combination") ||
|
|
166
|
+
lower.includes("permutation") ||
|
|
167
|
+
lower.includes("arrangement"),
|
|
168
|
+
types: SolverType.FORMULA_TIER3,
|
|
169
|
+
},
|
|
170
|
+
|
|
171
|
+
// TIER 3: last digit
|
|
172
|
+
{
|
|
173
|
+
guard: (text, lower, _chars) => lower.includes("last digit") || /mod\s*10/i.test(text),
|
|
174
|
+
types: SolverType.FORMULA_TIER3,
|
|
175
|
+
},
|
|
176
|
+
|
|
177
|
+
// TIER 4: pythagorean
|
|
178
|
+
{
|
|
179
|
+
guard: (_text, lower, _chars) => lower.includes("hypoten"),
|
|
180
|
+
types: SolverType.FORMULA_TIER4,
|
|
181
|
+
},
|
|
182
|
+
|
|
183
|
+
// TIER 4: trailing zeros
|
|
184
|
+
{
|
|
185
|
+
guard: (_text, lower, _chars) => lower.includes("trailing"),
|
|
186
|
+
types: SolverType.FORMULA_TIER4,
|
|
187
|
+
},
|
|
188
|
+
|
|
189
|
+
// TIER 4: geometric series
|
|
190
|
+
{
|
|
191
|
+
guard: (text, lower, _chars) =>
|
|
192
|
+
lower.includes("infinite") ||
|
|
193
|
+
lower.includes("series") ||
|
|
194
|
+
text.includes("...") ||
|
|
195
|
+
(lower.includes("sum") && /1\s*\+\s*1\/\d/.test(text)),
|
|
196
|
+
types: SolverType.FORMULA_TIER4,
|
|
197
|
+
},
|
|
198
|
+
|
|
199
|
+
// TIER 4: matrix determinant
|
|
200
|
+
{
|
|
201
|
+
guard: (_text, lower, chars) =>
|
|
202
|
+
chars.hasBracket && (lower.includes("det") || lower.includes("determinant")),
|
|
203
|
+
types: SolverType.FORMULA_TIER4,
|
|
204
|
+
},
|
|
205
|
+
|
|
206
|
+
// TIER 4: compound interest
|
|
207
|
+
{
|
|
208
|
+
guard: (_text, lower, chars) =>
|
|
209
|
+
(chars.hasDollar || lower.includes("interest")) && lower.includes("year"),
|
|
210
|
+
types: SolverType.FORMULA_TIER4,
|
|
211
|
+
},
|
|
212
|
+
|
|
213
|
+
// WORD PROBLEMS: multiplication words
|
|
214
|
+
{
|
|
215
|
+
guard: (_text, lower, _chars) =>
|
|
216
|
+
lower.includes("twice") ||
|
|
217
|
+
lower.includes("double") ||
|
|
218
|
+
lower.includes("triple") ||
|
|
219
|
+
lower.includes("times"),
|
|
220
|
+
types: SolverType.WORD_PROBLEM,
|
|
221
|
+
},
|
|
222
|
+
|
|
223
|
+
// WORD PROBLEMS: division words
|
|
224
|
+
{
|
|
225
|
+
guard: (_text, lower, _chars) =>
|
|
226
|
+
lower.includes("half") ||
|
|
227
|
+
lower.includes("third") ||
|
|
228
|
+
lower.includes("quarter") ||
|
|
229
|
+
lower.includes("divided"),
|
|
230
|
+
types: SolverType.WORD_PROBLEM,
|
|
231
|
+
},
|
|
232
|
+
|
|
233
|
+
// WORD PROBLEMS: addition/subtraction words
|
|
234
|
+
{
|
|
235
|
+
guard: (_text, lower, _chars) =>
|
|
236
|
+
lower.includes("sum of") ||
|
|
237
|
+
lower.includes("plus") ||
|
|
238
|
+
lower.includes("minus") ||
|
|
239
|
+
lower.includes("difference") ||
|
|
240
|
+
lower.includes("more than") ||
|
|
241
|
+
lower.includes("less than"),
|
|
242
|
+
types: SolverType.WORD_PROBLEM,
|
|
243
|
+
},
|
|
244
|
+
|
|
245
|
+
// WORD PROBLEMS: other patterns
|
|
246
|
+
{
|
|
247
|
+
guard: (_text, lower, _chars) =>
|
|
248
|
+
lower.includes("product of") ||
|
|
249
|
+
lower.includes("quotient") ||
|
|
250
|
+
lower.includes("squared") ||
|
|
251
|
+
lower.includes("cubed") ||
|
|
252
|
+
lower.includes("average of"),
|
|
253
|
+
types: SolverType.WORD_PROBLEM,
|
|
254
|
+
},
|
|
255
|
+
|
|
256
|
+
// MULTI-STEP: entity patterns (Name has X)
|
|
257
|
+
{
|
|
258
|
+
guard: (text, lower, _chars) =>
|
|
259
|
+
/[A-Z][a-z]+\s+has/.test(text) &&
|
|
260
|
+
(lower.includes("twice") ||
|
|
261
|
+
lower.includes("half") ||
|
|
262
|
+
lower.includes("more than") ||
|
|
263
|
+
lower.includes("less than") ||
|
|
264
|
+
lower.includes("fewer")),
|
|
265
|
+
types: SolverType.MULTI_STEP,
|
|
266
|
+
},
|
|
267
|
+
|
|
268
|
+
// MULTI-STEP: question about entity
|
|
269
|
+
{
|
|
270
|
+
guard: (text, _lower, _chars) => /how\s+many\s+does\s+[A-Z]/i.test(text),
|
|
271
|
+
types: SolverType.MULTI_STEP,
|
|
272
|
+
},
|
|
273
|
+
|
|
274
|
+
// CALCULUS: derivative
|
|
275
|
+
{
|
|
276
|
+
guard: (_text, lower, _chars) =>
|
|
277
|
+
lower.includes("derivative") || lower.includes("d/dx") || lower.includes("differentiate"),
|
|
278
|
+
types: SolverType.CALCULUS,
|
|
279
|
+
},
|
|
280
|
+
|
|
281
|
+
// CALCULUS: integral
|
|
282
|
+
{
|
|
283
|
+
guard: (text, lower, _chars) =>
|
|
284
|
+
lower.includes("integral") || lower.includes("integrate") || text.includes("\u222B"),
|
|
285
|
+
types: SolverType.CALCULUS,
|
|
286
|
+
},
|
|
287
|
+
|
|
288
|
+
// FACTS: rationality questions (sqrt(2) rational/irrational, pi rational, etc.)
|
|
289
|
+
{
|
|
290
|
+
guard: (_text, lower, _chars) => lower.includes("rational") || lower.includes("irrational"),
|
|
291
|
+
types: SolverType.FACTS,
|
|
292
|
+
},
|
|
293
|
+
|
|
294
|
+
// LOGIC: modus ponens/tollens patterns (If P then Q...)
|
|
295
|
+
{
|
|
296
|
+
guard: (_text, lower, _chars) =>
|
|
297
|
+
lower.includes("if ") &&
|
|
298
|
+
(lower.includes("yes or no") || lower.includes("is it") || lower.includes("is the")),
|
|
299
|
+
types: SolverType.LOGIC,
|
|
300
|
+
},
|
|
301
|
+
|
|
302
|
+
// LOGIC: syllogism (All A are B...)
|
|
303
|
+
{
|
|
304
|
+
guard: (_text, lower, _chars) =>
|
|
305
|
+
lower.includes("all ") && lower.includes(" are ") && lower.includes("valid"),
|
|
306
|
+
types: SolverType.LOGIC,
|
|
307
|
+
},
|
|
308
|
+
|
|
309
|
+
// LOGIC: XOR violation (exclusive or + both)
|
|
310
|
+
{
|
|
311
|
+
guard: (_text, lower, _chars) =>
|
|
312
|
+
lower.includes(" or ") && lower.includes("exclusive") && lower.includes("both"),
|
|
313
|
+
types: SolverType.LOGIC,
|
|
314
|
+
},
|
|
315
|
+
|
|
316
|
+
// PROBABILITY: Fair coin with streak context
|
|
317
|
+
{
|
|
318
|
+
guard: (_text, lower, _chars) =>
|
|
319
|
+
lower.includes("fair") &&
|
|
320
|
+
lower.includes("coin") &&
|
|
321
|
+
(lower.includes("probability") || lower.includes("chance")),
|
|
322
|
+
types: SolverType.PROBABILITY,
|
|
323
|
+
},
|
|
324
|
+
|
|
325
|
+
// PROBABILITY: Independent events with probability question
|
|
326
|
+
{
|
|
327
|
+
guard: (_text, lower, _chars) =>
|
|
328
|
+
lower.includes("independent") && (lower.includes("probability") || lower.includes("chance")),
|
|
329
|
+
types: SolverType.PROBABILITY,
|
|
330
|
+
},
|
|
331
|
+
|
|
332
|
+
// PROBABILITY: Streak + probability question (hot hand, gambler's fallacy)
|
|
333
|
+
{
|
|
334
|
+
guard: (_text, lower, _chars) =>
|
|
335
|
+
lower.includes("in a row") &&
|
|
336
|
+
(lower.includes("probability") || lower.includes("chance") || lower.includes("what's")),
|
|
337
|
+
types: SolverType.PROBABILITY,
|
|
338
|
+
},
|
|
339
|
+
|
|
340
|
+
// PROBABILITY: Birthday paradox (people + share birthday)
|
|
341
|
+
{
|
|
342
|
+
guard: (_text, lower, _chars) =>
|
|
343
|
+
lower.includes("birthday") &&
|
|
344
|
+
(lower.includes("share") || lower.includes("same")) &&
|
|
345
|
+
(lower.includes("people") ||
|
|
346
|
+
lower.includes("person") ||
|
|
347
|
+
lower.includes("room") ||
|
|
348
|
+
lower.includes("group")),
|
|
349
|
+
types: SolverType.PROBABILITY,
|
|
350
|
+
},
|
|
351
|
+
|
|
352
|
+
// CRT: Bat and ball style (X costs $Y more than Z)
|
|
353
|
+
{
|
|
354
|
+
guard: (_text, lower, chars) =>
|
|
355
|
+
chars.hasDollar && lower.includes("more than") && lower.includes("cost"),
|
|
356
|
+
types: SolverType.WORD_PROBLEM,
|
|
357
|
+
},
|
|
358
|
+
|
|
359
|
+
// CRT: Lily pad doubling (doubles + days + half/cover)
|
|
360
|
+
{
|
|
361
|
+
guard: (_text, lower, _chars) =>
|
|
362
|
+
lower.includes("double") &&
|
|
363
|
+
lower.includes("day") &&
|
|
364
|
+
(lower.includes("half") || lower.includes("cover")),
|
|
365
|
+
types: SolverType.WORD_PROBLEM,
|
|
366
|
+
},
|
|
367
|
+
|
|
368
|
+
// CRT: Widget/machine (machines + minutes + widgets)
|
|
369
|
+
{
|
|
370
|
+
guard: (_text, lower, _chars) =>
|
|
371
|
+
lower.includes("machine") && lower.includes("minute") && lower.includes("widget"),
|
|
372
|
+
types: SolverType.WORD_PROBLEM,
|
|
373
|
+
},
|
|
374
|
+
|
|
375
|
+
// CRT: Harmonic mean (speed + returns/back + average)
|
|
376
|
+
{
|
|
377
|
+
guard: (_text, lower, _chars) =>
|
|
378
|
+
(lower.includes("mph") || lower.includes("km/h")) &&
|
|
379
|
+
(lower.includes("return") || lower.includes("back")) &&
|
|
380
|
+
lower.includes("average"),
|
|
381
|
+
types: SolverType.WORD_PROBLEM,
|
|
382
|
+
},
|
|
383
|
+
|
|
384
|
+
// CRT: Catch-up problem (m/s or mph + head start/ahead)
|
|
385
|
+
{
|
|
386
|
+
guard: (_text, lower, _chars) =>
|
|
387
|
+
(lower.includes("m/s") || lower.includes("mph")) &&
|
|
388
|
+
(lower.includes("head start") || lower.includes("ahead")),
|
|
389
|
+
types: SolverType.WORD_PROBLEM,
|
|
390
|
+
},
|
|
391
|
+
|
|
392
|
+
// CRT: Sock drawer / pigeonhole (socks/balls + minimum + guarantee + pair/matching)
|
|
393
|
+
{
|
|
394
|
+
guard: (_text, lower, _chars) =>
|
|
395
|
+
(lower.includes("sock") || lower.includes("ball")) &&
|
|
396
|
+
(lower.includes("minimum") || lower.includes("least")) &&
|
|
397
|
+
lower.includes("guarantee") &&
|
|
398
|
+
(lower.includes("pair") || lower.includes("matching")),
|
|
399
|
+
types: SolverType.WORD_PROBLEM,
|
|
400
|
+
},
|
|
401
|
+
|
|
402
|
+
// DERIVATION: prove/show/verify keywords
|
|
403
|
+
{
|
|
404
|
+
guard: (_text, lower, _chars) =>
|
|
405
|
+
lower.includes("prove") ||
|
|
406
|
+
lower.includes("show that") ||
|
|
407
|
+
lower.includes("verify") ||
|
|
408
|
+
lower.includes("derivation"),
|
|
409
|
+
types: SolverType.DERIVATION,
|
|
410
|
+
},
|
|
411
|
+
|
|
412
|
+
// DERIVATION: Multiple equals signs (a = b = c chains)
|
|
413
|
+
{
|
|
414
|
+
guard: (text, _lower, _chars) => (text.match(/=/g)?.length ?? 0) >= 2,
|
|
415
|
+
types: SolverType.DERIVATION,
|
|
416
|
+
},
|
|
417
|
+
|
|
418
|
+
// DERIVATION: Arrow symbols (⟹, →, =>)
|
|
419
|
+
{
|
|
420
|
+
guard: (text, _lower, _chars) =>
|
|
421
|
+
text.includes("⟹") || text.includes("→") || text.includes("=>"),
|
|
422
|
+
types: SolverType.DERIVATION,
|
|
423
|
+
},
|
|
424
|
+
|
|
425
|
+
// STATISTICS: mean/average (TIER3 + WORD_PROBLEM for statistics solver)
|
|
426
|
+
{
|
|
427
|
+
guard: (_text, lower, _chars) => lower.includes("mean") || lower.includes("average"),
|
|
428
|
+
types: SolverType.FORMULA_TIER3 | SolverType.WORD_PROBLEM,
|
|
429
|
+
},
|
|
430
|
+
|
|
431
|
+
// STATISTICS: standard error
|
|
432
|
+
{
|
|
433
|
+
guard: (_text, lower, _chars) => lower.includes("standard error"),
|
|
434
|
+
types: SolverType.FORMULA_TIER3 | SolverType.WORD_PROBLEM,
|
|
435
|
+
},
|
|
436
|
+
|
|
437
|
+
// STATISTICS: expected value
|
|
438
|
+
{
|
|
439
|
+
guard: (_text, lower, _chars) =>
|
|
440
|
+
lower.includes("expected value") || (lower.includes("chance") && lower.includes("$")),
|
|
441
|
+
types: SolverType.FORMULA_TIER3 | SolverType.WORD_PROBLEM,
|
|
442
|
+
},
|
|
443
|
+
|
|
444
|
+
// STATISTICS: handshake problem
|
|
445
|
+
{
|
|
446
|
+
guard: (_text, lower, _chars) => lower.includes("handshake") || lower.includes("shakes hands"),
|
|
447
|
+
types: SolverType.FORMULA_TIER3 | SolverType.WORD_PROBLEM,
|
|
448
|
+
},
|
|
449
|
+
|
|
450
|
+
// STATISTICS: permutations with repetition (arrange letters in WORD)
|
|
451
|
+
{
|
|
452
|
+
guard: (_text, lower, _chars) => lower.includes("arrange") && lower.includes("letter"),
|
|
453
|
+
types: SolverType.FORMULA_TIER3 | SolverType.WORD_PROBLEM,
|
|
454
|
+
},
|
|
455
|
+
|
|
456
|
+
// LOGIC: affirming the consequent / denying the antecedent
|
|
457
|
+
{
|
|
458
|
+
guard: (_text, lower, _chars) =>
|
|
459
|
+
lower.includes("if ") &&
|
|
460
|
+
(lower.includes("therefore") || lower.includes("conclude") || lower.includes("valid")),
|
|
461
|
+
types: SolverType.LOGIC,
|
|
462
|
+
},
|
|
463
|
+
|
|
464
|
+
// LOGIC: De Morgan's laws
|
|
465
|
+
{
|
|
466
|
+
guard: (_text, lower, _chars) =>
|
|
467
|
+
(lower.includes("not") || lower.includes("¬")) &&
|
|
468
|
+
(lower.includes(" and ") || lower.includes(" or ")) &&
|
|
469
|
+
(lower.includes("=") || lower.includes("equivalent")),
|
|
470
|
+
types: SolverType.LOGIC,
|
|
471
|
+
},
|
|
472
|
+
|
|
473
|
+
// LOGIC: "Some A are B" syllogism patterns
|
|
474
|
+
{
|
|
475
|
+
guard: (_text, lower, _chars) =>
|
|
476
|
+
lower.includes("some ") && lower.includes(" are ") && lower.includes("valid"),
|
|
477
|
+
types: SolverType.LOGIC,
|
|
478
|
+
},
|
|
479
|
+
|
|
480
|
+
// LOGIC: Contrapositive patterns (All A are B ≡ All non-B are non-A)
|
|
481
|
+
{
|
|
482
|
+
guard: (_text, lower, _chars) =>
|
|
483
|
+
lower.includes("equivalent") && lower.includes("all ") && lower.includes("non-"),
|
|
484
|
+
types: SolverType.LOGIC,
|
|
485
|
+
},
|
|
486
|
+
];
|
|
487
|
+
|
|
488
|
+
// =============================================================================
|
|
489
|
+
// MAIN CLASSIFIER FUNCTION
|
|
490
|
+
// =============================================================================
|
|
491
|
+
|
|
492
|
+
/**
|
|
493
|
+
* Classify a question to determine which solvers might match.
|
|
494
|
+
* Runs once per question, returns bitmask + precomputed values.
|
|
495
|
+
*
|
|
496
|
+
* @param text - The question text
|
|
497
|
+
* @returns ClassifierResult with mask and precomputed values
|
|
498
|
+
*/
|
|
499
|
+
export function classifyQuestion(text: string): ClassifierResult {
|
|
500
|
+
const lower = text.toLowerCase();
|
|
501
|
+
|
|
502
|
+
// Precompute character flags (very fast)
|
|
503
|
+
const chars = {
|
|
504
|
+
hasDigit: DIGIT_RE.test(text),
|
|
505
|
+
hasPercent: text.includes("%"),
|
|
506
|
+
hasCaret: text.includes("^") || text.includes("**"),
|
|
507
|
+
hasBracket: text.includes("["),
|
|
508
|
+
hasDollar: text.includes("$"),
|
|
509
|
+
hasExclaim: text.includes("!"),
|
|
510
|
+
hasX: X_RE.test(text),
|
|
511
|
+
};
|
|
512
|
+
|
|
513
|
+
// Build mask by checking all rules
|
|
514
|
+
let mask: SolverMask = 0;
|
|
515
|
+
|
|
516
|
+
for (const rule of CLASSIFIER_RULES) {
|
|
517
|
+
if (rule.guard(text, lower, chars)) {
|
|
518
|
+
mask |= rule.types;
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
// If no specific matches, try arithmetic as fallback (bare expressions)
|
|
523
|
+
if (mask === 0 && chars.hasDigit) {
|
|
524
|
+
mask = SolverType.ARITHMETIC;
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
return { mask, lower, chars };
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
/**
|
|
531
|
+
* Check if a specific solver type should be tried
|
|
532
|
+
*/
|
|
533
|
+
export function shouldTrySolver(mask: SolverMask, solverType: number): boolean {
|
|
534
|
+
return (mask & solverType) !== 0;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
/**
|
|
538
|
+
* Get human-readable list of solver types in mask
|
|
539
|
+
*/
|
|
540
|
+
export function describeMask(mask: SolverMask): string[] {
|
|
541
|
+
const types: string[] = [];
|
|
542
|
+
if (mask & SolverType.ARITHMETIC) types.push("arithmetic");
|
|
543
|
+
if (mask & SolverType.FORMULA_TIER1) types.push("formula_tier1");
|
|
544
|
+
if (mask & SolverType.FORMULA_TIER2) types.push("formula_tier2");
|
|
545
|
+
if (mask & SolverType.FORMULA_TIER3) types.push("formula_tier3");
|
|
546
|
+
if (mask & SolverType.FORMULA_TIER4) types.push("formula_tier4");
|
|
547
|
+
if (mask & SolverType.WORD_PROBLEM) types.push("word_problem");
|
|
548
|
+
if (mask & SolverType.MULTI_STEP) types.push("multi_step");
|
|
549
|
+
if (mask & SolverType.CALCULUS) types.push("calculus");
|
|
550
|
+
if (mask & SolverType.FACTS) types.push("facts");
|
|
551
|
+
if (mask & SolverType.LOGIC) types.push("logic");
|
|
552
|
+
if (mask & SolverType.PROBABILITY) types.push("probability");
|
|
553
|
+
if (mask & SolverType.DERIVATION) types.push("derivation");
|
|
554
|
+
return types;
|
|
555
|
+
}
|