verifiable-thinking-mcp 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +339 -0
  3. package/package.json +75 -0
  4. package/src/index.ts +38 -0
  5. package/src/lib/cache.ts +246 -0
  6. package/src/lib/compression.ts +804 -0
  7. package/src/lib/compute/cache.ts +86 -0
  8. package/src/lib/compute/classifier.ts +555 -0
  9. package/src/lib/compute/confidence.ts +79 -0
  10. package/src/lib/compute/context.ts +154 -0
  11. package/src/lib/compute/extract.ts +200 -0
  12. package/src/lib/compute/filter.ts +224 -0
  13. package/src/lib/compute/index.ts +171 -0
  14. package/src/lib/compute/math.ts +247 -0
  15. package/src/lib/compute/patterns.ts +564 -0
  16. package/src/lib/compute/registry.ts +145 -0
  17. package/src/lib/compute/solvers/arithmetic.ts +65 -0
  18. package/src/lib/compute/solvers/calculus.ts +249 -0
  19. package/src/lib/compute/solvers/derivation-core.ts +371 -0
  20. package/src/lib/compute/solvers/derivation-latex.ts +160 -0
  21. package/src/lib/compute/solvers/derivation-mistakes.ts +1046 -0
  22. package/src/lib/compute/solvers/derivation-simplify.ts +451 -0
  23. package/src/lib/compute/solvers/derivation-transform.ts +620 -0
  24. package/src/lib/compute/solvers/derivation.ts +67 -0
  25. package/src/lib/compute/solvers/facts.ts +120 -0
  26. package/src/lib/compute/solvers/formula.ts +728 -0
  27. package/src/lib/compute/solvers/index.ts +36 -0
  28. package/src/lib/compute/solvers/logic.ts +422 -0
  29. package/src/lib/compute/solvers/probability.ts +307 -0
  30. package/src/lib/compute/solvers/statistics.ts +262 -0
  31. package/src/lib/compute/solvers/word-problems.ts +408 -0
  32. package/src/lib/compute/types.ts +107 -0
  33. package/src/lib/concepts.ts +111 -0
  34. package/src/lib/domain.ts +731 -0
  35. package/src/lib/extraction.ts +912 -0
  36. package/src/lib/index.ts +122 -0
  37. package/src/lib/judge.ts +260 -0
  38. package/src/lib/math/ast.ts +842 -0
  39. package/src/lib/math/index.ts +8 -0
  40. package/src/lib/math/operators.ts +171 -0
  41. package/src/lib/math/tokenizer.ts +477 -0
  42. package/src/lib/patterns.ts +200 -0
  43. package/src/lib/session.ts +825 -0
  44. package/src/lib/think/challenge.ts +323 -0
  45. package/src/lib/think/complexity.ts +504 -0
  46. package/src/lib/think/confidence-drift.ts +507 -0
  47. package/src/lib/think/consistency.ts +347 -0
  48. package/src/lib/think/guidance.ts +188 -0
  49. package/src/lib/think/helpers.ts +568 -0
  50. package/src/lib/think/hypothesis.ts +216 -0
  51. package/src/lib/think/index.ts +127 -0
  52. package/src/lib/think/prompts.ts +262 -0
  53. package/src/lib/think/route.ts +358 -0
  54. package/src/lib/think/schema.ts +98 -0
  55. package/src/lib/think/scratchpad-schema.ts +662 -0
  56. package/src/lib/think/spot-check.ts +961 -0
  57. package/src/lib/think/types.ts +93 -0
  58. package/src/lib/think/verification.ts +260 -0
  59. package/src/lib/tokens.ts +177 -0
  60. package/src/lib/verification.ts +620 -0
  61. package/src/prompts/index.ts +10 -0
  62. package/src/prompts/templates.ts +336 -0
  63. package/src/resources/index.ts +8 -0
  64. package/src/resources/sessions.ts +196 -0
  65. package/src/tools/compress.ts +138 -0
  66. package/src/tools/index.ts +5 -0
  67. package/src/tools/scratchpad.ts +2659 -0
  68. package/src/tools/sessions.ts +144 -0
@@ -0,0 +1,507 @@
1
+ /**
2
+ * Confidence Drift Detection (CDD)
3
+ *
4
+ * Novel technique: Analyzes confidence TRAJECTORY as a meta-signal for reasoning quality.
5
+ *
6
+ * Key insight: LLMs often start confident, confidence DROPS mid-chain when hitting
7
+ * difficulty, then "recovers" at the end without explicitly addressing the uncertainty.
8
+ * This V-shaped pattern without revision indicates "pushed through" uncertainty.
9
+ *
10
+ * Design principles:
11
+ * 1. O(n) single-pass analysis of confidence array
12
+ * 2. Detects structural patterns in confidence trajectory
13
+ * 3. Flags unresolved doubt (recovery without revision)
14
+ * 4. Provides actionable insights for reasoning improvement
15
+ *
16
+ * Formula:
17
+ * drift_score = max_drop × recovery_magnitude / steps_to_recover
18
+ * unresolved = drift_score > threshold AND no revision step exists
19
+ */
20
+
21
+ import type { ThoughtRecord } from "../session.ts";
22
+
23
+ // ============================================================================
24
+ // TYPES
25
+ // ============================================================================
26
+
27
+ export interface DriftAnalysis {
28
+ /** Overall drift score (0-1, higher = more concerning) */
29
+ drift_score: number;
30
+ /** Whether the drift represents unresolved uncertainty */
31
+ unresolved: boolean;
32
+ /** Confidence at trajectory minimum */
33
+ min_confidence: number;
34
+ /** Step number where minimum occurred */
35
+ min_step: number;
36
+ /** Maximum confidence drop observed */
37
+ max_drop: number;
38
+ /** Recovery magnitude from min to final */
39
+ recovery: number;
40
+ /** Whether a revision step exists after the drop */
41
+ has_revision_after_drop: boolean;
42
+ /** Pattern classification */
43
+ pattern: DriftPattern;
44
+ /** Human-readable explanation */
45
+ explanation: string;
46
+ /** Suggested action if unresolved */
47
+ suggestion: string | null;
48
+ }
49
+
50
+ export type DriftPattern =
51
+ | "stable" // Confidence stays relatively flat
52
+ | "stable_overconfident" // All confidence values ≥0.85 with low variance (trap risk)
53
+ | "declining" // Monotonic decrease (getting less confident)
54
+ | "improving" // Monotonic increase (getting more confident)
55
+ | "v_shaped" // Drop then recovery (the concerning pattern)
56
+ | "oscillating" // Multiple ups and downs
57
+ | "cliff" // Sudden drop at end (likely error detected)
58
+ | "insufficient"; // Not enough steps to analyze
59
+
60
+ export interface DriftConfig {
61
+ /** Minimum drop to consider significant (default: 0.15) */
62
+ min_significant_drop: number;
63
+ /** Minimum recovery to flag as V-shaped (default: 0.15) */
64
+ min_significant_recovery: number;
65
+ /** Drift score threshold to flag as unresolved (default: 0.3) */
66
+ unresolved_threshold: number;
67
+ /** Minimum steps required for analysis (default: 3) */
68
+ min_steps: number;
69
+ /** Minimum confidence threshold for "overconfident" detection (default: 0.85) */
70
+ overconfident_threshold: number;
71
+ /** Maximum variance allowed for "stable overconfident" pattern (default: 0.05) */
72
+ overconfident_max_variance: number;
73
+ /** Minimum final drop to flag cliff as unresolved (default: 0.3) */
74
+ cliff_drop_threshold: number;
75
+ /** Final confidence threshold to flag declining pattern as unresolved (default: 0.5) */
76
+ declining_final_threshold: number;
77
+ }
78
+
79
+ const DEFAULT_CONFIG: DriftConfig = {
80
+ min_significant_drop: 0.15,
81
+ min_significant_recovery: 0.15,
82
+ unresolved_threshold: 0.3,
83
+ min_steps: 3,
84
+ overconfident_threshold: 0.85,
85
+ overconfident_max_variance: 0.05,
86
+ cliff_drop_threshold: 0.3,
87
+ declining_final_threshold: 0.5,
88
+ };
89
+
90
+ // ============================================================================
91
+ // CORE ALGORITHM
92
+ // ============================================================================
93
+
94
+ /**
95
+ * Analyze confidence trajectory for drift patterns.
96
+ * O(n) complexity - single pass through steps array.
97
+ */
98
+ export function analyzeConfidenceDrift(
99
+ steps: ThoughtRecord[],
100
+ config: Partial<DriftConfig> = {},
101
+ ): DriftAnalysis {
102
+ const cfg = { ...DEFAULT_CONFIG, ...config };
103
+
104
+ // Handle insufficient data
105
+ if (steps.length < cfg.min_steps) {
106
+ return {
107
+ drift_score: 0,
108
+ unresolved: false,
109
+ min_confidence: steps[0]?.verification?.confidence ?? 0.5,
110
+ min_step: steps[0]?.step_number ?? 1,
111
+ max_drop: 0,
112
+ recovery: 0,
113
+ has_revision_after_drop: false,
114
+ pattern: "insufficient",
115
+ explanation: `Insufficient steps for drift analysis (${steps.length} < ${cfg.min_steps})`,
116
+ suggestion: null,
117
+ };
118
+ }
119
+
120
+ // Extract confidence values (default to 0.5 if not present)
121
+ const confidences = steps.map((s) => s.verification?.confidence ?? 0.5);
122
+ const stepNumbers = steps.map((s) => s.step_number);
123
+
124
+ // Single-pass analysis: find min, max drop, track trajectory
125
+ let minConf = confidences[0]!;
126
+ let minIdx = 0;
127
+ let maxConf = confidences[0]!;
128
+ let maxIdx = 0;
129
+ let maxDropFromPeak = 0;
130
+
131
+ // Track running peak for drop calculation
132
+ let runningPeak = confidences[0]!;
133
+
134
+ for (let i = 1; i < confidences.length; i++) {
135
+ const conf = confidences[i]!;
136
+
137
+ // Update global min
138
+ if (conf < minConf) {
139
+ minConf = conf;
140
+ minIdx = i;
141
+ }
142
+
143
+ // Update global max
144
+ if (conf > maxConf) {
145
+ maxConf = conf;
146
+ maxIdx = i;
147
+ }
148
+
149
+ // Track maximum drop from any previous peak
150
+ if (conf > runningPeak) {
151
+ runningPeak = conf;
152
+ } else {
153
+ const dropFromPeak = runningPeak - conf;
154
+ if (dropFromPeak > maxDropFromPeak) {
155
+ maxDropFromPeak = dropFromPeak;
156
+ }
157
+ }
158
+ }
159
+
160
+ // Calculate recovery (from min to final)
161
+ const finalConf = confidences[confidences.length - 1]!;
162
+ const recovery = finalConf - minConf;
163
+
164
+ // Check for revision steps after the minimum
165
+ const hasRevisionAfterDrop = steps.slice(minIdx + 1).some((s) => s.revises_step !== undefined);
166
+
167
+ // Classify pattern
168
+ const pattern = classifyPattern(confidences, minIdx, maxIdx, maxDropFromPeak, recovery, cfg);
169
+
170
+ // Calculate drift score
171
+ // Formula for V-shaped: emphasize the drop magnitude since that's the concern
172
+ // For other patterns: use drop as primary signal
173
+ const stepsToRecover = Math.max(1, confidences.length - 1 - minIdx);
174
+ let driftScore: number;
175
+
176
+ if (pattern === "v_shaped") {
177
+ // V-shaped score: max of (drop alone) or (drop × recovery / steps)
178
+ // This ensures significant drops always produce significant scores
179
+ const basicScore = maxDropFromPeak;
180
+ const recoveryBonus = (maxDropFromPeak * recovery) / stepsToRecover;
181
+ driftScore = Math.max(basicScore, recoveryBonus);
182
+ } else {
183
+ // Non-V patterns get lower score based just on drop
184
+ driftScore = maxDropFromPeak * 0.5;
185
+ }
186
+
187
+ // Clamp to 0-1
188
+ const normalizedDriftScore = Math.min(1, Math.max(0, driftScore));
189
+
190
+ // Determine if unresolved (concerning pattern without remediation)
191
+ const isVShaped = pattern === "v_shaped";
192
+ const isStableOverconfident = pattern === "stable_overconfident";
193
+ const isCliff = pattern === "cliff";
194
+ const significantDrop = maxDropFromPeak >= cfg.min_significant_drop;
195
+ const significantRecovery = recovery >= cfg.min_significant_recovery;
196
+
197
+ // V-shaped is unresolved if: significant drop + recovery, no revision, above threshold
198
+ const vShapedUnresolved =
199
+ isVShaped &&
200
+ significantDrop &&
201
+ significantRecovery &&
202
+ !hasRevisionAfterDrop &&
203
+ normalizedDriftScore >= cfg.unresolved_threshold;
204
+
205
+ // Cliff is unresolved if: sharp final drop exceeds threshold (error detected at end)
206
+ // Calculate final step drop for cliff detection
207
+ const finalStepDrop =
208
+ confidences.length >= 2
209
+ ? confidences[confidences.length - 2]! - confidences[confidences.length - 1]!
210
+ : 0;
211
+ const cliffUnresolved = isCliff && finalStepDrop >= cfg.cliff_drop_threshold;
212
+
213
+ // Declining is unresolved if: final confidence below threshold (ended uncertain)
214
+ const isDeclining = pattern === "declining";
215
+ const decliningUnresolved = isDeclining && finalConf < cfg.declining_final_threshold;
216
+
217
+ // Stable overconfident is always flagged as unresolved (warrants review)
218
+ // This catches trap questions where LLM is confidently wrong
219
+ const unresolved =
220
+ vShapedUnresolved || isStableOverconfident || cliffUnresolved || decliningUnresolved;
221
+
222
+ // For stable_overconfident, cliff, and declining: use a moderate drift score to indicate concern
223
+ const finalDriftScore =
224
+ isStableOverconfident || cliffUnresolved || decliningUnresolved
225
+ ? Math.max(normalizedDriftScore, 0.4) // Ensure visible concern level
226
+ : normalizedDriftScore;
227
+
228
+ // Generate explanation
229
+ const explanation = generateExplanation(
230
+ pattern,
231
+ maxDropFromPeak,
232
+ recovery,
233
+ minIdx,
234
+ stepNumbers,
235
+ hasRevisionAfterDrop,
236
+ minConf,
237
+ );
238
+
239
+ // Generate suggestion if unresolved
240
+ const suggestion = unresolved
241
+ ? generateSuggestion(stepNumbers[minIdx]!, maxDropFromPeak, pattern, minConf)
242
+ : null;
243
+
244
+ return {
245
+ drift_score: finalDriftScore,
246
+ unresolved,
247
+ min_confidence: minConf,
248
+ min_step: stepNumbers[minIdx]!,
249
+ max_drop: maxDropFromPeak,
250
+ recovery,
251
+ has_revision_after_drop: hasRevisionAfterDrop,
252
+ pattern,
253
+ explanation,
254
+ suggestion,
255
+ };
256
+ }
257
+
258
+ /**
259
+ * Classify the overall confidence trajectory pattern.
260
+ */
261
+ function classifyPattern(
262
+ confidences: number[],
263
+ minIdx: number,
264
+ _maxIdx: number,
265
+ maxDrop: number,
266
+ recovery: number,
267
+ cfg: DriftConfig,
268
+ ): DriftPattern {
269
+ const n = confidences.length;
270
+ const range = Math.max(...confidences) - Math.min(...confidences);
271
+
272
+ // V-shaped: significant drop followed by significant recovery
273
+ // Min must be in middle portion (not at start or end)
274
+ // Check FIRST - this is the most important pattern to detect
275
+ const minInMiddle = minIdx > 0 && minIdx < n - 1;
276
+ if (
277
+ minInMiddle &&
278
+ maxDrop >= cfg.min_significant_drop &&
279
+ recovery >= cfg.min_significant_recovery
280
+ ) {
281
+ return "v_shaped";
282
+ }
283
+
284
+ // Cliff: sudden drop at the end (min is at or near the end)
285
+ // Must be a SUDDEN drop in the final step - check this BEFORE declining
286
+ // to catch "error detected at end" pattern
287
+ // For cliff: final drop must be significantly larger than average step change
288
+ if (minIdx >= n - 1 && maxDrop >= cfg.min_significant_drop && n >= 2) {
289
+ const finalDrop = confidences[n - 2]! - confidences[n - 1]!;
290
+ // Calculate average step change for comparison
291
+ let totalChange = 0;
292
+ for (let i = 1; i < n - 1; i++) {
293
+ totalChange += Math.abs(confidences[i]! - confidences[i - 1]!);
294
+ }
295
+ const avgChange = n > 2 ? totalChange / (n - 2) : 0;
296
+ // Cliff: final drop is at least 2x the average change AND meets minimum threshold
297
+ if (finalDrop >= cfg.min_significant_drop && finalDrop >= avgChange * 2) {
298
+ return "cliff";
299
+ }
300
+ }
301
+
302
+ // Stable overconfident: all values ≥ threshold with low variance
303
+ // This is a concerning pattern on trap questions where LLMs are confidently wrong
304
+ // Check BEFORE generic stable to catch this specific concerning case
305
+ const minConf = Math.min(...confidences);
306
+ if (minConf >= cfg.overconfident_threshold && range <= cfg.overconfident_max_variance) {
307
+ return "stable_overconfident";
308
+ }
309
+
310
+ // Stable: low variance throughout (check AFTER V-shaped so custom configs work)
311
+ if (range < 0.1) {
312
+ return "stable";
313
+ }
314
+
315
+ // Declining: monotonic or mostly decreasing
316
+ let decreases = 0;
317
+ for (let i = 1; i < n; i++) {
318
+ if (confidences[i]! < confidences[i - 1]!) decreases++;
319
+ }
320
+ if (decreases >= (n - 1) * 0.7) {
321
+ return "declining";
322
+ }
323
+
324
+ // Improving: monotonic or mostly increasing
325
+ let increases = 0;
326
+ for (let i = 1; i < n; i++) {
327
+ if (confidences[i]! > confidences[i - 1]!) increases++;
328
+ }
329
+ if (increases >= (n - 1) * 0.7) {
330
+ return "improving";
331
+ }
332
+
333
+ // Oscillating: multiple direction changes
334
+ let directionChanges = 0;
335
+ let lastDir = 0;
336
+ for (let i = 1; i < n; i++) {
337
+ const dir = Math.sign(confidences[i]! - confidences[i - 1]!);
338
+ if (dir !== 0 && dir !== lastDir) {
339
+ directionChanges++;
340
+ lastDir = dir;
341
+ }
342
+ }
343
+ if (directionChanges >= 3) {
344
+ return "oscillating";
345
+ }
346
+
347
+ // Default to stable if no clear pattern
348
+ return "stable";
349
+ }
350
+
351
+ /**
352
+ * Generate human-readable explanation of the drift analysis.
353
+ */
354
+ function generateExplanation(
355
+ pattern: DriftPattern,
356
+ maxDrop: number,
357
+ recovery: number,
358
+ minIdx: number,
359
+ stepNumbers: number[],
360
+ hasRevision: boolean,
361
+ minConfidence?: number,
362
+ ): string {
363
+ const dropPct = (maxDrop * 100).toFixed(0);
364
+ const recoveryPct = (recovery * 100).toFixed(0);
365
+ const minStep = stepNumbers[minIdx];
366
+
367
+ switch (pattern) {
368
+ case "stable":
369
+ return "Confidence remained stable throughout reasoning chain.";
370
+
371
+ case "stable_overconfident":
372
+ return `⚠️ Stable high confidence (≥${((minConfidence ?? 0.85) * 100).toFixed(0)}%) throughout chain. On complex/trap questions, consistent high confidence without doubt often correlates with incorrect answers.`;
373
+
374
+ case "declining":
375
+ if (minConfidence !== undefined && minConfidence < 0.5) {
376
+ return `⚠️ Confidence declined steadily to ${(minConfidence * 100).toFixed(0)}% (${dropPct}% total drop). Ending with low confidence suggests unresolved uncertainty.`;
377
+ }
378
+ return `Confidence declined steadily (${dropPct}% total drop). This may indicate increasing uncertainty or problem difficulty.`;
379
+
380
+ case "improving":
381
+ return `Confidence improved throughout reasoning (${recoveryPct}% increase). Good progressive understanding.`;
382
+
383
+ case "v_shaped":
384
+ if (hasRevision) {
385
+ return `V-shaped confidence pattern detected: ${dropPct}% drop at step ${minStep}, then ${recoveryPct}% recovery. Revision step present - uncertainty was addressed.`;
386
+ } else {
387
+ return `⚠️ V-shaped confidence pattern: ${dropPct}% drop at step ${minStep}, then ${recoveryPct}% recovery WITHOUT revision. The reasoning may have "pushed through" uncertainty without addressing it.`;
388
+ }
389
+
390
+ case "oscillating":
391
+ return `Confidence oscillated throughout reasoning. Multiple uncertainty points encountered.`;
392
+
393
+ case "cliff":
394
+ return `Confidence dropped sharply at the end (${dropPct}% drop). Possible error or contradiction detected late in reasoning.`;
395
+
396
+ case "insufficient":
397
+ return "Not enough steps for meaningful drift analysis.";
398
+
399
+ default:
400
+ return "Confidence pattern analyzed.";
401
+ }
402
+ }
403
+
404
+ /**
405
+ * Generate actionable suggestion for unresolved drift.
406
+ */
407
+ function generateSuggestion(
408
+ minStep: number,
409
+ dropMagnitude: number,
410
+ pattern?: DriftPattern,
411
+ minConfidence?: number,
412
+ ): string {
413
+ // Special handling for stable overconfident pattern
414
+ if (pattern === "stable_overconfident") {
415
+ const confPct = ((minConfidence ?? 0.85) * 100).toFixed(0);
416
+ return `High confidence (${confPct}%+) throughout suggests possible overconfidence. Consider: Is this a trick question? Have you verified assumptions? Adding a self-check step could help catch errors.`;
417
+ }
418
+
419
+ // Special handling for cliff pattern - error detected at end
420
+ if (pattern === "cliff") {
421
+ const dropPct = (dropMagnitude * 100).toFixed(0);
422
+ return `Sharp confidence drop (${dropPct}%) at the final step suggests an error or contradiction was detected late. Consider: What caused this doubt? Should you revise earlier steps before concluding?`;
423
+ }
424
+
425
+ // Special handling for declining pattern - ended uncertain
426
+ if (pattern === "declining") {
427
+ const finalConfPct = ((minConfidence ?? 0.5) * 100).toFixed(0);
428
+ return `Confidence declined to ${finalConfPct}% by the end. The reasoning chain ended with significant doubt. Consider: What's causing the uncertainty? Is the approach valid? Should you try a different method?`;
429
+ }
430
+
431
+ if (dropMagnitude >= 0.3) {
432
+ return `Consider revising from step ${minStep} where confidence dropped significantly. The recovery without explicit revision suggests the uncertainty was not properly addressed.`;
433
+ } else {
434
+ return `Review step ${minStep} where confidence was lowest. Adding explicit reasoning about why confidence recovered could strengthen the chain.`;
435
+ }
436
+ }
437
+
438
+ // ============================================================================
439
+ // CONVENIENCE FUNCTIONS
440
+ // ============================================================================
441
+
442
+ /**
443
+ * Quick check if a reasoning chain has concerning drift.
444
+ * Use for fast filtering before detailed analysis.
445
+ */
446
+ export function hasConcerningDrift(steps: ThoughtRecord[], _threshold: number = 0.3): boolean {
447
+ if (steps.length < 3) return false;
448
+
449
+ const confidences = steps.map((s) => s.verification?.confidence ?? 0.5);
450
+ const min = Math.min(...confidences);
451
+ const minIdx = confidences.indexOf(min);
452
+ const final = confidences[confidences.length - 1]!;
453
+
454
+ // Quick V-shape detection
455
+ const hasDrop = confidences.slice(0, minIdx + 1).some((c) => c - min >= 0.15);
456
+ const hasRecovery = final - min >= 0.15;
457
+ const noRevision = !steps.slice(minIdx + 1).some((s) => s.revises_step !== undefined);
458
+
459
+ return hasDrop && hasRecovery && noRevision && minIdx > 0 && minIdx < steps.length - 1;
460
+ }
461
+
462
+ /**
463
+ * Extract just the confidence trajectory for visualization/logging.
464
+ */
465
+ export function extractConfidenceTrajectory(
466
+ steps: ThoughtRecord[],
467
+ ): { step: number; confidence: number }[] {
468
+ return steps.map((s) => ({
469
+ step: s.step_number,
470
+ confidence: s.verification?.confidence ?? 0.5,
471
+ }));
472
+ }
473
+
474
+ /**
475
+ * Compute aggregate statistics for a confidence trajectory.
476
+ */
477
+ export function computeTrajectoryStats(steps: ThoughtRecord[]): {
478
+ mean: number;
479
+ stddev: number;
480
+ min: number;
481
+ max: number;
482
+ trend: "up" | "down" | "flat";
483
+ } {
484
+ const confidences = steps.map((s) => s.verification?.confidence ?? 0.5);
485
+ const n = confidences.length;
486
+
487
+ if (n === 0) {
488
+ return { mean: 0.5, stddev: 0, min: 0.5, max: 0.5, trend: "flat" };
489
+ }
490
+
491
+ const sum = confidences.reduce((a, b) => a + b, 0);
492
+ const mean = sum / n;
493
+
494
+ const sqDiffs = confidences.map((c) => (c - mean) ** 2);
495
+ const variance = sqDiffs.reduce((a, b) => a + b, 0) / n;
496
+ const stddev = Math.sqrt(variance);
497
+
498
+ const min = Math.min(...confidences);
499
+ const max = Math.max(...confidences);
500
+
501
+ // Linear trend: positive slope = up, negative = down
502
+ const first = confidences[0]!;
503
+ const last = confidences[n - 1]!;
504
+ const trend = last - first > 0.1 ? "up" : last - first < -0.1 ? "down" : "flat";
505
+
506
+ return { mean, stddev, min, max, trend };
507
+ }