npm - verifiable-thinking-mcp - Versions diffs - 0.4.0 - Mend

verifiable-thinking-mcp 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/LICENSE +21 -0
package/README.md +339 -0
package/package.json +75 -0
package/src/index.ts +38 -0
package/src/lib/cache.ts +246 -0
package/src/lib/compression.ts +804 -0
package/src/lib/compute/cache.ts +86 -0
package/src/lib/compute/classifier.ts +555 -0
package/src/lib/compute/confidence.ts +79 -0
package/src/lib/compute/context.ts +154 -0
package/src/lib/compute/extract.ts +200 -0
package/src/lib/compute/filter.ts +224 -0
package/src/lib/compute/index.ts +171 -0
package/src/lib/compute/math.ts +247 -0
package/src/lib/compute/patterns.ts +564 -0
package/src/lib/compute/registry.ts +145 -0
package/src/lib/compute/solvers/arithmetic.ts +65 -0
package/src/lib/compute/solvers/calculus.ts +249 -0
package/src/lib/compute/solvers/derivation-core.ts +371 -0
package/src/lib/compute/solvers/derivation-latex.ts +160 -0
package/src/lib/compute/solvers/derivation-mistakes.ts +1046 -0
package/src/lib/compute/solvers/derivation-simplify.ts +451 -0
package/src/lib/compute/solvers/derivation-transform.ts +620 -0
package/src/lib/compute/solvers/derivation.ts +67 -0
package/src/lib/compute/solvers/facts.ts +120 -0
package/src/lib/compute/solvers/formula.ts +728 -0
package/src/lib/compute/solvers/index.ts +36 -0
package/src/lib/compute/solvers/logic.ts +422 -0
package/src/lib/compute/solvers/probability.ts +307 -0
package/src/lib/compute/solvers/statistics.ts +262 -0
package/src/lib/compute/solvers/word-problems.ts +408 -0
package/src/lib/compute/types.ts +107 -0
package/src/lib/concepts.ts +111 -0
package/src/lib/domain.ts +731 -0
package/src/lib/extraction.ts +912 -0
package/src/lib/index.ts +122 -0
package/src/lib/judge.ts +260 -0
package/src/lib/math/ast.ts +842 -0
package/src/lib/math/index.ts +8 -0
package/src/lib/math/operators.ts +171 -0
package/src/lib/math/tokenizer.ts +477 -0
package/src/lib/patterns.ts +200 -0
package/src/lib/session.ts +825 -0
package/src/lib/think/challenge.ts +323 -0
package/src/lib/think/complexity.ts +504 -0
package/src/lib/think/confidence-drift.ts +507 -0
package/src/lib/think/consistency.ts +347 -0
package/src/lib/think/guidance.ts +188 -0
package/src/lib/think/helpers.ts +568 -0
package/src/lib/think/hypothesis.ts +216 -0
package/src/lib/think/index.ts +127 -0
package/src/lib/think/prompts.ts +262 -0
package/src/lib/think/route.ts +358 -0
package/src/lib/think/schema.ts +98 -0
package/src/lib/think/scratchpad-schema.ts +662 -0
package/src/lib/think/spot-check.ts +961 -0
package/src/lib/think/types.ts +93 -0
package/src/lib/think/verification.ts +260 -0
package/src/lib/tokens.ts +177 -0
package/src/lib/verification.ts +620 -0
package/src/prompts/index.ts +10 -0
package/src/prompts/templates.ts +336 -0
package/src/resources/index.ts +8 -0
package/src/resources/sessions.ts +196 -0
package/src/tools/compress.ts +138 -0
package/src/tools/index.ts +5 -0
package/src/tools/scratchpad.ts +2659 -0
package/src/tools/sessions.ts +144 -0

package/src/lib/think/hypothesis.ts ADDED Viewed

@@ -0,0 +1,216 @@
+/**
+ * Hypothesis Resolution - Detects when a branch's hypothesis is confirmed or refuted
+ *
+ * Uses O(n) pattern matching to detect resolution signals in step content:
+ * - Confirmation: "therefore", "confirmed", "proves", "QED", "thus we have shown"
+ * - Refutation: "contradiction", "impossible", "disproved", "fails", "cannot be"
+ *
+ * Returns resolution status for branches with hypotheses.
+ */
+/** Resolution status for a hypothesis */
+export interface HypothesisResolution {
+  /** Whether the hypothesis has been resolved */
+  resolved: boolean;
+  /** Resolution outcome if resolved */
+  outcome: "confirmed" | "refuted" | "inconclusive" | null;
+  /** Confidence in the resolution (0-1) */
+  confidence: number;
+  /** Step number where resolution was detected */
+  resolved_at_step: number | null;
+  /** Evidence text that triggered resolution */
+  evidence: string | null;
+  /** The original hypothesis being tested */
+  hypothesis: string;
+  /** The success criteria (if provided) */
+  success_criteria: string | null;
+  /** Suggested action based on resolution */
+  suggestion: string;
+}
+// Confirmation patterns - signals that hypothesis is proven true
+const CONFIRMATION_PATTERNS = [
+  /\b(?:therefore|thus|hence|consequently)\b.*\b(?:true|correct|valid|proven|confirmed)\b/i,
+  /\b(?:this\s+)?(?:confirms?|proves?|shows?|demonstrates?)\s+(?:that\s+)?(?:the\s+)?hypothesis\b/i,
+  /\b(?:QED|Q\.E\.D\.|quod\s+erat\s+demonstrandum)\b/i,
+  /\b(?:we\s+have\s+shown|we\s+conclude|this\s+establishes)\b/i,
+  /\bhypothesis\s+(?:is\s+)?(?:true|correct|valid|confirmed)\b/i,
+  // "as expected/hypothesized" but NOT "assume is true"
+  /\bas\s+(?:we\s+)?(?:hypothesized|expected|predicted)\b/i,
+  /\bsuccess(?:fully)?\s+(?:verified|confirmed|proven)\b/i,
+];
+// Refutation patterns - signals that hypothesis is proven false
+const REFUTATION_PATTERNS = [
+  /\b(?:contradiction|contradicts?|inconsistent)\b/i,
+  /\b(?:impossible|cannot\s+be|can't\s+be)\b/i,
+  /\b(?:disprove[ds]?|refute[ds]?|falsif(?:y|ied))\b/i,
+  /\bhypothesis\s+(?:is\s+)?(?:false|incorrect|invalid|wrong|fails?)\b/i,
+  /\b(?:this\s+)?(?:fails?|violates?|breaks?)\s+(?:the\s+)?(?:assumption|hypothesis)\b/i,
+  /\b(?:counterexample|counter-example)\b/i,
+  /\bnot\s+(?:true|valid|correct|possible)\b/i,
+  /\b(?:rejected?|abandon|discard)\s+(?:the\s+)?hypothesis\b/i,
+];
+// Inconclusive patterns - explicitly states uncertainty remains
+const INCONCLUSIVE_PATTERNS = [
+  /\b(?:inconclusive|undetermined|unclear|uncertain)\b/i,
+  /\b(?:need|require)s?\s+(?:more|further|additional)\s+(?:evidence|proof|analysis)\b/i,
+  /\b(?:cannot\s+(?:yet\s+)?(?:determine|conclude|decide))\b/i,
+  /\b(?:insufficient\s+(?:evidence|data|information))\b/i,
+];
+/**
+ * Check if text matches any pattern in a list
+ * Returns the matching text if found
+ */
+function findMatch(text: string, patterns: RegExp[]): string | null {
+  for (const pattern of patterns) {
+    const match = text.match(pattern);
+    if (match) {
+      return match[0];
+    }
+  }
+  return null;
+}
+/**
+ * Check if success criteria is mentioned/satisfied in text
+ */
+function checkSuccessCriteria(text: string, criteria: string): boolean {
+  // Normalize both for comparison
+  const normalizedText = text.toLowerCase();
+  const normalizedCriteria = criteria.toLowerCase();
+  // Check if criteria keywords appear in text
+  const criteriaWords = normalizedCriteria.split(/\s+/).filter((w) => w.length > 3); // Skip short words
+  const matchCount = criteriaWords.filter((word) => normalizedText.includes(word)).length;
+  // If >50% of meaningful words match, consider criteria referenced
+  return matchCount >= criteriaWords.length * 0.5;
+}
+/**
+ * Analyze a step for hypothesis resolution signals
+ *
+ * @param stepText - The thought content of the step
+ * @param hypothesis - The hypothesis being tested
+ * @param successCriteria - Optional success criteria
+ * @param stepNumber - The step number
+ * @returns Resolution analysis or null if no resolution detected
+ */
+export function analyzeStepForResolution(
+  stepText: string,
+  hypothesis: string,
+  successCriteria: string | null,
+  stepNumber: number,
+): HypothesisResolution {
+  const baseResult: HypothesisResolution = {
+    resolved: false,
+    outcome: null,
+    confidence: 0,
+    resolved_at_step: null,
+    evidence: null,
+    hypothesis,
+    success_criteria: successCriteria,
+    suggestion: "Continue testing the hypothesis.",
+  };
+  // Check for refutation first (stronger signal - contradictions are definitive)
+  const refutationMatch = findMatch(stepText, REFUTATION_PATTERNS);
+  if (refutationMatch) {
+    return {
+      ...baseResult,
+      resolved: true,
+      outcome: "refuted",
+      confidence: 0.9,
+      resolved_at_step: stepNumber,
+      evidence: refutationMatch,
+      suggestion: "Hypothesis refuted. Consider abandoning this branch or revising the hypothesis.",
+    };
+  }
+  // Check for explicit inconclusive
+  const inconclusiveMatch = findMatch(stepText, INCONCLUSIVE_PATTERNS);
+  if (inconclusiveMatch) {
+    return {
+      ...baseResult,
+      resolved: true,
+      outcome: "inconclusive",
+      confidence: 0.7,
+      resolved_at_step: stepNumber,
+      evidence: inconclusiveMatch,
+      suggestion: "Hypothesis inconclusive. Gather more evidence or reformulate.",
+    };
+  }
+  // Check for confirmation
+  const confirmationMatch = findMatch(stepText, CONFIRMATION_PATTERNS);
+  if (confirmationMatch) {
+    let confidence = 0.85;
+    // Boost confidence if success criteria is explicitly satisfied
+    if (successCriteria && checkSuccessCriteria(stepText, successCriteria)) {
+      confidence = 0.95;
+    }
+    return {
+      ...baseResult,
+      resolved: true,
+      outcome: "confirmed",
+      confidence,
+      resolved_at_step: stepNumber,
+      evidence: confirmationMatch,
+      suggestion: "Hypothesis confirmed. Consider merging findings back to main branch.",
+    };
+  }
+  // Check if success criteria is mentioned even without explicit confirmation
+  if (successCriteria && checkSuccessCriteria(stepText, successCriteria)) {
+    return {
+      ...baseResult,
+      resolved: false,
+      outcome: null,
+      confidence: 0.6,
+      evidence: "Success criteria keywords detected",
+      suggestion: "Success criteria may be satisfied. Verify and explicitly confirm or refute.",
+    };
+  }
+  return baseResult;
+}
+/**
+ * Analyze all steps in a branch for hypothesis resolution
+ *
+ * @param steps - Array of steps with their content
+ * @param hypothesis - The hypothesis being tested
+ * @param successCriteria - Optional success criteria
+ * @returns Resolution status based on all steps
+ */
+export function analyzeHypothesisResolution(
+  steps: Array<{ step: number; thought: string }>,
+  hypothesis: string,
+  successCriteria: string | null,
+): HypothesisResolution {
+  // Analyze each step, return first resolution found (chronological)
+  for (const { step, thought } of steps) {
+    const result = analyzeStepForResolution(thought, hypothesis, successCriteria, step);
+    if (result.resolved) {
+      return result;
+    }
+  }
+  // No resolution found
+  return {
+    resolved: false,
+    outcome: null,
+    confidence: 0,
+    resolved_at_step: null,
+    evidence: null,
+    hypothesis,
+    success_criteria: successCriteria,
+    suggestion: `Continue testing hypothesis: "${hypothesis.slice(0, 50)}${hypothesis.length > 50 ? "..." : ""}"`,
+  };
+}

package/src/lib/think/index.ts ADDED Viewed

@@ -0,0 +1,127 @@
+/**
+ * Think Library - Barrel export for think-related modules
+ */
+// Local complexity assessment
+export {
+  assessPromptComplexity,
+  type ComplexityResult,
+  getTrivialPrompt,
+  isTrivialQuestion,
+} from "./complexity.ts";
+// Guidance engine (failure pattern detection for free-form reasoning)
+export {
+  analyzeThought,
+  detectDomain,
+  FAILURE_PATTERNS,
+  type FailurePattern,
+  type ThoughtAnalysis,
+  VALID_PURPOSES,
+} from "./guidance.ts";
+// Helpers (extracted for cognitive complexity reduction)
+export {
+  type AugmentResult,
+  assessComplexity,
+  buildBaselineResponse,
+  buildRecord,
+  buildResponse,
+  type ComplexityInfo,
+  type CompressionLevel,
+  type CompressionStats,
+  compressChainContext,
+  compressInput,
+  compressOutput,
+  type ExecuteContext,
+  errorResponse,
+  findMissingDeps,
+  initContext,
+  jsonResponse,
+  runGuidance,
+  runVerify,
+  type StreamFn,
+  storeThought,
+  tryAugment,
+  tryCompute,
+  validateBranch,
+  validateRevision,
+} from "./helpers.ts";
+// Prompts (verbosity-aware templates)
+export {
+  // Domain-aware prompts
+  DOMAIN_PROMPTS,
+  // User prompts
+  formatBaselinePrompt,
+  formatBaselinePromptTerse,
+  formatCriticalCheckPrompt,
+  formatCriticalCheckPromptTerse,
+  formatDomainExplanatoryPrompt,
+  formatExplanatoryPrompt,
+  formatReasoningPrompt,
+  formatReasoningPromptTerse,
+  formatVerificationPrompt,
+  formatVerificationPromptTerse,
+  getDomainSystemPrompt,
+  // Unified getters
+  getSystemPrompt,
+  getUserPrompt,
+  getVerbosity,
+  SYSTEM_ANSWER_ONLY,
+  SYSTEM_ANSWER_ONLY_TERSE,
+  // System prompts
+  SYSTEM_BASELINE,
+  SYSTEM_BASELINE_TERSE,
+  SYSTEM_EXPLANATORY,
+  SYSTEM_REASONING,
+  SYSTEM_REASONING_TERSE,
+  SYSTEM_VERIFICATION,
+  SYSTEM_VERIFICATION_TERSE,
+  type Verbosity,
+} from "./prompts.ts";
+// Routing (complexity-based path selection)
+export {
+  getComplexityInfo,
+  isExplanatoryQuestion,
+  type RoutePrompts,
+  type RouteResult,
+  type RoutingPath,
+  routeQuestion,
+} from "./route.ts";
+// Schema
+export { NextActionSchema, type ThinkArgs, ThinkSchema } from "./schema.ts";
+// Scratchpad schema
+export {
+  type ScratchpadArgs,
+  type ScratchpadResponse,
+  ScratchpadSchema,
+} from "./scratchpad-schema.ts";
+// Spot-check (lightweight trap detection for High+ complexity)
+export {
+  hasTrapPatterns,
+  type NeedsSpotCheckResult,
+  needsSpotCheck,
+  PRIME_AGGRESSIVE,
+  PRIME_DEFAULTS,
+  type PrimeOptions,
+  type PrimeResult,
+  primeQuestion,
+  type SpotCheckResult,
+  spotCheck,
+  type TrapDetector,
+} from "./spot-check.ts";
+// Types
+export type {
+  BaselineResult,
+  BenchmarkResults,
+  BenchmarkSummary,
+  Question,
+  QuestionSet,
+  RunResult,
+  ToolResult,
+} from "./types.ts";
+// Verification
+export { estimateTokens, verifyAnswer } from "./verification.ts";

package/src/lib/think/prompts.ts ADDED Viewed

@@ -0,0 +1,262 @@
+/**
+ * Prompt Templates for LLM Interactions
+ * Centralized prompts used by the think tool and benchmarks
+ *
+ * TERSE MODE: ~50% fewer tokens using Chain-of-Draft style
+ * - Short fragments instead of full sentences
+ * - Minimalist system prompts
+ * - Direct answer extraction
+ */
+// =============================================================================
+// VERBOSITY CONFIGURATION
+// =============================================================================
+export type Verbosity = "terse" | "normal" | "verbose";
+/**
+ * Determine verbosity based on question characteristics
+ * Short questions without "explain/why/how" get terse mode
+ */
+export function getVerbosity(question: string): Verbosity {
+  const wordCount = question.split(/\s+/).length;
+  const needsExplanation = /explain|why|how\s+does|describe|compare/i.test(question);
+  const isSimple = /^\s*(?:what|calculate|compute|is)\s+/i.test(question);
+  if (needsExplanation) return "verbose";
+  if (wordCount < 15 && isSimple) return "terse";
+  return "normal";
+}
+// =============================================================================
+// SYSTEM PROMPTS - NORMAL (default)
+// =============================================================================
+export const SYSTEM_BASELINE = "Answer directly and concisely. Plain text only.";
+export const SYSTEM_REASONING =
+  "Show reasoning step-by-step, then give final answer. Plain text math only.";
+export const SYSTEM_VERIFICATION = "Double-check reasoning. Fix errors. Plain text only.";
+export const SYSTEM_ANSWER_ONLY = "Answer only.";
+// System prompt for explanatory questions - emphasizes conciseness
+export const SYSTEM_EXPLANATORY = "Explain clearly and concisely. Plain text only.";
+// =============================================================================
+// DOMAIN-SPECIFIC PROMPTS (token-light steering)
+// =============================================================================
+/**
+ * Domain-specific system prompts - concise but effective steering.
+ * ~15-25 tokens each, optimized for explanation quality.
+ */
+export const DOMAIN_PROMPTS: Record<string, { system: string; style: string }> = {
+  // Technical domains
+  coding: {
+    system: "Explain clearly. Use code examples when they clarify.",
+    style: "technical",
+  },
+  scientific: {
+    system: "Explain precisely. Use correct terminology and show derivations.",
+    style: "precise",
+  },
+  // Educational - clarity focus
+  educational: {
+    system: "Explain clearly. Start with intuition, then details.",
+    style: "pedagogical",
+  },
+  // Financial - accuracy focus
+  financial: {
+    system: "Explain clearly. State assumptions and show calculations.",
+    style: "careful",
+  },
+  // General - balanced
+  general: {
+    system: "Explain clearly and directly.",
+    style: "balanced",
+  },
+};
+// =============================================================================
+// SYSTEM PROMPTS - TERSE (~50% fewer tokens)
+// =============================================================================
+export const SYSTEM_BASELINE_TERSE = "Answer directly.";
+export const SYSTEM_REASONING_TERSE = "Solve step-by-step. End: Answer: [X]";
+export const SYSTEM_VERIFICATION_TERSE = "Verify. Fix errors. Answer: [X]";
+export const SYSTEM_ANSWER_ONLY_TERSE = "Answer only.";
+// =============================================================================
+// USER PROMPT TEMPLATES - NORMAL
+// =============================================================================
+export function formatBaselinePrompt(question: string): string {
+  return `${question}
+Answer clearly. Number for numeric, choice letter for multiple choice.`;
+}
+export function formatReasoningPrompt(question: string): string {
+  return `${question}
+End with "Answer: " followed by just the answer.`;
+}
+export function formatVerificationPrompt(
+  question: string,
+  initialReasoning: string,
+  patterns: string[],
+): string {
+  return `Verify: ${question}
+Prior reasoning:
+${initialReasoning}
+Risk flags: ${patterns.join(", ")}
+Check for errors, correct if needed.
+Answer:`;
+}
+export function formatCriticalCheckPrompt(question: string): string {
+  return `${question}
+Double-check. Answer only:`;
+}
+/**
+ * Format an explanatory prompt - concise explanations without padding
+ */
+export function formatExplanatoryPrompt(question: string): string {
+  return `${question}
+Be direct. Focus on key concepts.`;
+}
+/**
+ * Format a domain-aware explanatory prompt (token-light)
+ * Just the question - system prompt provides domain steering
+ */
+export function formatDomainExplanatoryPrompt(question: string, _metaDomain: string): string {
+  return question;
+}
+/**
+ * Get domain-aware system prompt for explanatory questions
+ */
+export function getDomainSystemPrompt(metaDomain: string): string {
+  return DOMAIN_PROMPTS[metaDomain]?.system ?? "Direct answer.";
+}
+// =============================================================================
+// USER PROMPT TEMPLATES - TERSE (Chain-of-Draft style)
+// ~50% fewer tokens, uses fragments and minimal structure
+// =============================================================================
+export function formatBaselinePromptTerse(question: string): string {
+  return `Q: ${question}
+A:`;
+}
+export function formatReasoningPromptTerse(question: string): string {
+  return `Q: ${question}
+Steps (max 5 words each):
+Answer:`;
+}
+export function formatVerificationPromptTerse(
+  question: string,
+  initialReasoning: string,
+  patterns: string[],
+): string {
+  // Extract just the answer from initial reasoning if possible
+  const answerMatch = initialReasoning.match(/(?:answer|result)[:\s]+([^\n.]+)/i);
+  const prevAnswer = answerMatch?.[1]?.trim() || "?";
+  return `Q: ${question}
+Prev: ${prevAnswer}
+Flags: ${patterns.slice(0, 2).join(", ")}
+Check. Correct if needed.
+Answer:`;
+}
+export function formatCriticalCheckPromptTerse(question: string): string {
+  return `${question}
+Answer:`;
+}
+// =============================================================================
+// UNIFIED PROMPT GETTERS (respects verbosity setting)
+// =============================================================================
+export function getSystemPrompt(
+  type: "baseline" | "reasoning" | "verification" | "answer_only" | "explanatory",
+  verbosity: Verbosity = "normal",
+): string {
+  if (verbosity === "terse") {
+    switch (type) {
+      case "baseline":
+        return SYSTEM_BASELINE_TERSE;
+      case "reasoning":
+        return SYSTEM_REASONING_TERSE;
+      case "verification":
+        return SYSTEM_VERIFICATION_TERSE;
+      case "answer_only":
+        return SYSTEM_ANSWER_ONLY_TERSE;
+      case "explanatory":
+        return SYSTEM_EXPLANATORY; // No terse version, use standard
+    }
+  }
+  // Normal or verbose use standard prompts
+  switch (type) {
+    case "baseline":
+      return SYSTEM_BASELINE;
+    case "reasoning":
+      return SYSTEM_REASONING;
+    case "verification":
+      return SYSTEM_VERIFICATION;
+    case "answer_only":
+      return SYSTEM_ANSWER_ONLY;
+    case "explanatory":
+      return SYSTEM_EXPLANATORY;
+  }
+}
+export function getUserPrompt(
+  type: "baseline" | "reasoning" | "verification" | "critical" | "explanatory",
+  question: string,
+  verbosity: Verbosity = "normal",
+  opts?: { initialReasoning?: string; patterns?: string[] },
+): string {
+  if (verbosity === "terse") {
+    switch (type) {
+      case "baseline":
+        return formatBaselinePromptTerse(question);
+      case "reasoning":
+        return formatReasoningPromptTerse(question);
+      case "verification":
+        return formatVerificationPromptTerse(
+          question,
+          opts?.initialReasoning || "",
+          opts?.patterns || [],
+        );
+      case "critical":
+        return formatCriticalCheckPromptTerse(question);
+      case "explanatory":
+        return formatExplanatoryPrompt(question); // No terse version
+    }
+  }
+  // Normal or verbose use standard prompts
+  switch (type) {
+    case "baseline":
+      return formatBaselinePrompt(question);
+    case "reasoning":
+      return formatReasoningPrompt(question);
+    case "verification":
+      return formatVerificationPrompt(question, opts?.initialReasoning || "", opts?.patterns || []);
+    case "critical":
+      return formatCriticalCheckPrompt(question);
+    case "explanatory":
+      return formatExplanatoryPrompt(question);
+  }
+}