npm - thumbgate - Versions diffs - 1.8.0 → 1.9.0 - Mend

thumbgate 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/.well-known/llms.txt +4 -0
package/.well-known/mcp/server-card.json +9 -226
package/adapters/README.md +1 -1
package/adapters/claude/.mcp.json +2 -2
package/adapters/mcp/server-stdio.js +46 -1
package/adapters/opencode/opencode.json +1 -1
package/config/mcp-allowlists.json +5 -0
package/package.json +3 -1
package/public/index.html +2 -2
package/scripts/agent-readiness.js +1 -0
package/scripts/autoresearch-runner.js +228 -0
package/scripts/multimodal-retrieval-plan.js +110 -0
package/scripts/tool-registry.js +37 -0
package/src/api/server.js +246 -0

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "thumbgate-marketplace",
-  "version": "1.8.0",
+  "version": "1.9.0",
   "owner": {
     "name": "Igor Ganapolsky",
     "email": "ig5973700@gmail.com"
@@ -13,7 +13,7 @@
         "source": "npm",
         "package": "thumbgate"
       },
-      "version": "1.8.0",
+      "version": "1.9.0",
       "author": {
         "name": "Igor Ganapolsky"
       },

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "thumbgate",
   "description": "Type 👍 or 👎 on any agent action. ThumbGate captures it, distills a lesson, and blocks the pattern from repeating. One thumbs-down = the agent physically cannot make that mistake again. 33 pre-action gates, budget enforcement, self-protection, and NIST/SOC2 compliance tags.",
-  "version": "1.8.0",
+  "version": "1.9.0",
   "author": {
     "name": "Igor Ganapolsky"
   },

package/.well-known/llms.txt CHANGED Viewed

@@ -46,6 +46,10 @@ npx thumbgate init --agent claude-code
 ## Links
+- Agent discovery: https://thumbgate-production.up.railway.app/.well-known/mcp.json
+- Progressive tool index: https://thumbgate-production.up.railway.app/.well-known/mcp/tools.json
+- Agent skills: https://thumbgate-production.up.railway.app/.well-known/mcp/skills.json
+- MCP applications: https://thumbgate-production.up.railway.app/.well-known/mcp/applications.json
 - Documentation: https://thumbgate-production.up.railway.app/guide
 - Dashboard: https://thumbgate-production.up.railway.app/dashboard
 - GitHub: https://github.com/IgorGanapolsky/ThumbGate

package/.well-known/mcp/server-card.json CHANGED Viewed

@@ -1,231 +1,14 @@
 {
   "name": "thumbgate",
-  "version": "1.8.0",
+  "version": "1.9.0",
   "description": "ThumbGate — 👍👎 feedback that teaches your AI agent. Thumbs down a mistake, it never happens again.",
-  "homepage": "https://github.com/IgorGanapolsky/thumbgate",
+  "homepage": "https://thumbgate-production.up.railway.app",
   "transport": "stdio",
-  "configSchema": {
-    "type": "object",
-    "properties": {
-      "mcpProfile": {
-        "type": "string",
-        "description": "MCP profile to use (default, readonly, locked)",
-        "default": "default"
-      }
-    },
-    "required": []
-  },
-  "tools": [
-    {
-      "name": "capture_feedback",
-      "description": "Capture thumbs up/down feedback and promote actionable memory",
-      "inputSchema": {
-        "type": "object",
-        "required": [
-          "signal",
-          "context"
-        ],
-        "properties": {
-          "signal": {
-            "type": "string",
-            "enum": [
-              "up",
-              "down"
-            ]
-          },
-          "context": {
-            "type": "string"
-          },
-          "whatWentWrong": {
-            "type": "string"
-          },
-          "whatToChange": {
-            "type": "string"
-          },
-          "whatWorked": {
-            "type": "string"
-          },
-          "tags": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            }
-          },
-          "skill": {
-            "type": "string"
-          }
-        }
-      }
-    },
-    {
-      "name": "feedback_summary",
-      "description": "Get summary of recent feedback",
-      "inputSchema": {
-        "type": "object",
-        "properties": {
-          "recent": {
-            "type": "number"
-          }
-        }
-      }
-    },
-    {
-      "name": "feedback_stats",
-      "description": "Get feedback stats and recommendations",
-      "inputSchema": {
-        "type": "object",
-        "properties": {}
-      }
-    },
-    {
-      "name": "list_intents",
-      "description": "List available intent plans and whether each requires human approval in the active profile",
-      "inputSchema": {
-        "type": "object",
-        "properties": {
-          "mcpProfile": {
-            "type": "string"
-          },
-          "bundleId": {
-            "type": "string"
-          }
-        }
-      }
-    },
-    {
-      "name": "plan_intent",
-      "description": "Generate an intent execution plan with policy checkpoints",
-      "inputSchema": {
-        "type": "object",
-        "required": [
-          "intentId"
-        ],
-        "properties": {
-          "intentId": {
-            "type": "string"
-          },
-          "context": {
-            "type": "string"
-          },
-          "mcpProfile": {
-            "type": "string"
-          },
-          "bundleId": {
-            "type": "string"
-          },
-          "approved": {
-            "type": "boolean"
-          }
-        }
-      }
-    },
-    {
-      "name": "prevention_rules",
-      "description": "Generate prevention rules from repeated mistake patterns",
-      "inputSchema": {
-        "type": "object",
-        "properties": {
-          "minOccurrences": {
-            "type": "number"
-          },
-          "outputPath": {
-            "type": "string"
-          }
-        }
-      }
-    },
-    {
-      "name": "export_dpo_pairs",
-      "description": "Export DPO preference pairs from local memory log",
-      "inputSchema": {
-        "type": "object",
-        "properties": {
-          "memoryLogPath": {
-            "type": "string"
-          }
-        }
-      }
-    },
-    {
-      "name": "construct_context_pack",
-      "description": "Construct a bounded context pack from contextfs",
-      "inputSchema": {
-        "type": "object",
-        "properties": {
-          "query": {
-            "type": "string"
-          },
-          "maxItems": {
-            "type": "number"
-          },
-          "maxChars": {
-            "type": "number"
-          },
-          "namespaces": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            }
-          }
-        }
-      }
-    },
-    {
-      "name": "evaluate_context_pack",
-      "description": "Record evaluation outcome for a context pack",
-      "inputSchema": {
-        "type": "object",
-        "required": [
-          "packId",
-          "outcome"
-        ],
-        "properties": {
-          "packId": {
-            "type": "string"
-          },
-          "outcome": {
-            "type": "string"
-          },
-          "signal": {
-            "type": "string"
-          },
-          "notes": {
-            "type": "string"
-          }
-        }
-      }
-    },
-    {
-      "name": "context_provenance",
-      "description": "Get recent context/provenance events",
-      "inputSchema": {
-        "type": "object",
-        "properties": {
-          "limit": {
-            "type": "number"
-          }
-        }
-      }
-    },
-    {
-      "name": "recall",
-      "description": "Recall relevant past feedback, memories, and prevention rules for the current task",
-      "inputSchema": {
-        "type": "object",
-        "required": [
-          "query"
-        ],
-        "properties": {
-          "query": {
-            "type": "string",
-            "description": "Describe the current task or context to find relevant past feedback"
-          },
-          "limit": {
-            "type": "number",
-            "description": "Max memories to return (default 5)"
-          }
-        }
-      }
-    }
-  ]
+  "discovery": {
+    "manifestUrl": "https://thumbgate-production.up.railway.app/.well-known/mcp.json",
+    "toolIndexUrl": "https://thumbgate-production.up.railway.app/.well-known/mcp/tools.json",
+    "toolSchemaUrlTemplate": "https://thumbgate-production.up.railway.app/.well-known/mcp/tools/{name}.json",
+    "skillsUrl": "https://thumbgate-production.up.railway.app/.well-known/mcp/skills.json",
+    "applicationsUrl": "https://thumbgate-production.up.railway.app/.well-known/mcp/applications.json"
+  }
 }

package/adapters/README.md CHANGED Viewed

@@ -3,7 +3,7 @@
 - `chatgpt/openapi.yaml`: import into GPT Actions.
 - `gemini/function-declarations.json`: Gemini function-calling definitions.
 - `mcp/server-stdio.js`: underlying local MCP stdio server implementation.
-- `claude/.mcp.json`: example Claude Code MCP config using `npx --yes --package thumbgate@1.8.0 thumbgate serve`.
+- `claude/.mcp.json`: example Claude Code MCP config using `npx --yes --package thumbgate@1.9.0 thumbgate serve`.
 - `codex/config.toml`: example Codex MCP profile section using the same version-pinned portable launcher.
 - `amp/skills/thumbgate-feedback/SKILL.md`: Amp skill template.
 - `opencode/opencode.json`: portable OpenCode MCP profile using the same version-pinned portable launcher.

package/adapters/claude/.mcp.json CHANGED Viewed

@@ -2,13 +2,13 @@
   "mcpServers": {
     "thumbgate": {
       "command": "npx",
-      "args": ["--yes", "--package", "thumbgate@1.8.0", "thumbgate", "serve"]
+      "args": ["--yes", "--package", "thumbgate@1.9.0", "thumbgate", "serve"]
     }
   },
   "hooks": {
     "preToolUse": {
       "command": "npx",
-      "args": ["--yes", "--package", "thumbgate@1.8.0", "thumbgate", "gate-check"]
+      "args": ["--yes", "--package", "thumbgate@1.9.0", "thumbgate", "gate-check"]
     }
   }
 }

package/adapters/mcp/server-stdio.js CHANGED Viewed

@@ -106,6 +106,9 @@ const {
 const {
   searchThumbgate,
 } = require('../../scripts/thumbgate-search');
+const {
+  buildMultimodalRetrievalPlan,
+} = require('../../scripts/multimodal-retrieval-plan');
 const {
   importDocument,
   listImportedDocuments,
@@ -117,6 +120,7 @@ const {
   listHarnesses,
   runHarness,
 } = require('../../scripts/natural-language-harness');
+const { runLoop: runAutoresearchLoop } = require('../../scripts/autoresearch-runner');
 const { TOOLS } = require('../../scripts/tool-registry');
 const { reflect: reflectOnFeedback } = require('../../scripts/reflector-agent');
 const { submitProductIssue } = require('../../scripts/product-feedback');
@@ -148,7 +152,7 @@ const {
   finalizeSession: finalizeFeedbackSession,
 } = require('../../scripts/feedback-session');
-const SERVER_INFO = { name: 'thumbgate-mcp', version: '1.8.0' };
+const SERVER_INFO = { name: 'thumbgate-mcp', version: '1.9.0' };
 const COMMERCE_CATEGORIES = [
   'product_recommendation',
   'brand_compliance',
@@ -196,6 +200,17 @@ function resolveImportDocumentPath(targetPath) {
   return resolved;
 }
+function resolveWorkspaceCwd(targetPath) {
+  if (!targetPath) return undefined;
+  const workspaceRoot = path.resolve(process.cwd());
+  const resolved = path.resolve(workspaceRoot, String(targetPath));
+  const relative = path.relative(workspaceRoot, resolved);
+  if (relative.startsWith('..') || path.isAbsolute(relative)) {
+    throw new Error(`cwd must stay within ${workspaceRoot}`);
+  }
+  return resolved;
+}
 function toTextResult(payload) {
   const text = typeof payload === 'string' ? payload : JSON.stringify(payload, null, 2);
   return {
@@ -851,6 +866,36 @@ async function callToolInner(name, args) {
       return toTextResult({ harnesses: listHarnesses({ tag: args.tag }) });
     case 'run_harness':
       return toTextResult(runHarness(args.harness, args.inputs || {}, { jobId: args.jobId }));
+    case 'plan_multimodal_retrieval':
+      return toTextResult(buildMultimodalRetrievalPlan(args));
+    case 'run_autoresearch': {
+      const iterations = Math.max(1, Math.min(5, Number(args.iterations || 1)));
+      const timeoutMs = Math.max(1000, Math.min(600000, Number(args.timeoutMs || 120000)));
+      const holdoutCommands = Array.isArray(args.holdoutCommands)
+        ? args.holdoutCommands.filter((command) => typeof command === 'string' && command.trim())
+        : [];
+      const result = await runAutoresearchLoop({
+        iterations,
+        targetName: args.targetName || undefined,
+        nextValue: Number.isFinite(args.nextValue) ? args.nextValue : undefined,
+        testCommand: args.testCommand || 'npm test',
+        holdoutCommands,
+        timeoutMs,
+        cwd: resolveWorkspaceCwd(args.cwd),
+        researchQuery: args.researchQuery || null,
+        paperLimit: Math.max(1, Math.min(10, Number(args.paperLimit || 5))),
+      });
+      return toTextResult({
+        ...result,
+        controls: {
+          iterations,
+          timeoutMs,
+          holdoutCommands,
+          maxIterationsPerCall: 5,
+          maxTimeoutMs: 600000,
+        },
+      });
+    }
     case 'open_feedback_session':
       return toTextResult(openFeedbackSession(args.feedbackEventId, args.signal, args.initialContext));
     case 'append_feedback_context':

package/adapters/opencode/opencode.json CHANGED Viewed

@@ -7,7 +7,7 @@
         "npx",
         "--yes",
         "--package",
-        "thumbgate@1.8.0",
+        "thumbgate@1.9.0",
         "thumbgate",
         "serve"
       ],

package/config/mcp-allowlists.json CHANGED Viewed

@@ -12,6 +12,7 @@
       "search_lessons",
       "retrieve_lessons",
       "search_thumbgate",
+      "plan_multimodal_retrieval",
       "reflect_on_feedback",
       "feedback_stats",
       "diagnose_failure",
@@ -45,6 +46,7 @@
       "settings_status",
       "list_harnesses",
       "run_harness",
+      "run_autoresearch",
       "estimate_uncertainty",
       "get_business_metrics",
       "describe_semantic_entity",
@@ -70,6 +72,7 @@
       "search_lessons",
       "retrieve_lessons",
       "search_thumbgate",
+      "plan_multimodal_retrieval",
       "reflect_on_feedback",
       "prevention_rules",
       "set_task_scope",
@@ -114,6 +117,7 @@
       "search_lessons",
       "retrieve_lessons",
       "search_thumbgate",
+      "plan_multimodal_retrieval",
       "feedback_stats",
       "diagnose_failure",
       "list_harnesses",
@@ -146,6 +150,7 @@
       "search_lessons",
       "retrieve_lessons",
       "search_thumbgate",
+      "plan_multimodal_retrieval",
       "feedback_stats",
       "diagnose_failure",
       "list_harnesses",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "thumbgate",
-  "version": "1.8.0",
+  "version": "1.9.0",
   "description": "Self-improving agent governance: type thumbs-up or thumbs-down on any AI agent action. ThumbGate turns every mistake into a prevention rule and blocks the pattern from repeating. One thumbs-down, never again. 33 pre-action gates, budget enforcement, and self-protection for Claude Code, Cursor, Codex, Gemini CLI, and Amp.",
   "homepage": "https://thumbgate-production.up.railway.app",
   "repository": {
@@ -49,6 +49,7 @@
     "scripts/analytics-report.js",
     "scripts/analytics-window.js",
     "scripts/autonomous-workflow.js",
+    "scripts/autoresearch-runner.js",
     "scripts/async-job-runner.js",
     "scripts/audit-trail.js",
     "scripts/auto-promote-gates.js",
@@ -135,6 +136,7 @@
     "scripts/mcp-policy.js",
     "scripts/memory-firewall.js",
     "scripts/meta-agent-loop.js",
+    "scripts/multimodal-retrieval-plan.js",
     "scripts/natural-language-harness.js",
     "scripts/obsidian-export.js",
     "scripts/operational-dashboard.js",

package/public/index.html CHANGED Viewed

@@ -974,7 +974,7 @@ __GA_BOOTSTRAP__
 <!-- HOW IT WORKS -->
 <section class="how-it-works" id="how-it-works">
   <div class="container">
-    <div class="section-label">New in v1.8.0</div>
+    <div class="section-label">New in v1.9.0</div>
     <h2 class="section-title">Three steps to stop repeated AI failures</h2>
     <div class="steps">
       <div class="step">
@@ -1330,7 +1330,7 @@ __GA_BOOTSTRAP__
       <a href="https://www.linkedin.com/in/igorganapolsky" target="_blank" rel="noopener">LinkedIn</a>
       <a href="/blog">Blog</a>
     </div>
-    <span class="footer-copy">© 2026 Max Smith KDP LLC · MIT License · v1.8.0</span>
+    <span class="footer-copy">© 2026 Max Smith KDP LLC · MIT License · v1.9.0</span>
   </div>
 </footer>

package/scripts/agent-readiness.js CHANGED Viewed

@@ -24,6 +24,7 @@ const WRITE_CAPABLE_TOOLS = new Set([
   'approve_protected_action',
   'track_action',
   'register_claim_gate',
+  'run_autoresearch',
 ]);
 const BOOTSTRAP_FILES = [

package/scripts/autoresearch-runner.js ADDED Viewed

@@ -0,0 +1,228 @@
+#!/usr/bin/env node
+'use strict';
+/**
+ * Autoresearch Runner (AUTORESEARCH-02)
+ *
+ * Karpathy-inspired self-optimizing loop for the ThumbGate feedback studio.
+ * Each iteration: mutate local evolution state → run primary + holdout checks
+ * → measure score → keep/discard with rollback snapshots.
+ *
+ * The runner never rewrites tracked source files. It mutates the local
+ * evolution-state overlay, evaluates in place, and only persists accepted
+ * settings plus rollback snapshots.
+ *
+ * Mutation targets (in priority order):
+ *   1. Thompson Sampling priors (HALF_LIFE_DAYS, DECAY_FLOOR)
+ *   2. Prevention rule thresholds (minOccurrences)
+ *   3. Verification loop retries (MAX_RETRIES)
+ *   4. DPO temperature (DPO_BETA)
+ *
+ * Score function: command pass rate × approval weighting, with holdout gating.
+ *
+ * Zero external dependencies.
+ *
+ * Exports: runIteration, runLoop, scoreSuite, MUTATION_TARGETS
+ */
+const {
+  getProgress,
+} = require('./experiment-tracker');
+const { buildResearchBrief } = require('./hf-papers');
+const {
+  EVOLUTION_TARGETS,
+  parseCommandScore,
+  runWorkspaceEvolution,
+} = require('./workspace-evolver');
+// ---------------------------------------------------------------------------
+// Mutation Targets
+// ---------------------------------------------------------------------------
+const MUTATION_TARGETS = EVOLUTION_TARGETS;
+// ---------------------------------------------------------------------------
+// Score Function
+// ---------------------------------------------------------------------------
+/**
+ * Score a test suite run. Returns a number in [0, 1].
+ *
+ * @param {object} params
+ * @param {string} params.testOutput - stdout from test run
+ * @param {number} [params.approvalRate] - Current approval rate from feedback
+ * @returns {{ score: number, testPassRate: number, details: object }}
+ */
+function scoreSuite(params) {
+  return parseCommandScore(params.testOutput || '', 0, typeof params.approvalRate === 'number' ? params.approvalRate : 0.5);
+}
+// ---------------------------------------------------------------------------
+// Single Iteration
+// ---------------------------------------------------------------------------
+/**
+ * Run one autoresearch iteration.
+ *
+ * 1. Pick a random mutation target
+ * 2. Read current value, compute a random neighbor
+ * 3. Run the test suite in a tmp env with the mutation
+ * 4. Score and keep/discard via experiment tracker
+ *
+ * @param {object} [opts]
+ * @param {string} [opts.targetName] - Force a specific mutation target
+ * @param {number} [opts.nextValue] - Force the candidate value instead of a random neighbor
+ * @param {string} [opts.testCommand] - Override test command (default: npm test)
+ * @param {string[]} [opts.holdoutCommands] - Optional holdout commands required for acceptance
+ * @param {number} [opts.timeoutMs] - Test timeout in ms (default: 120000)
+ * @param {string} [opts.cwd] - Working directory for evaluation commands
+ * @param {string} [opts.researchQuery] - Optional external research query
+ * @param {number} [opts.paperLimit] - Max papers to ingest for research context
+ * @param {Function} [opts.fetchImpl] - Optional fetch implementation override
+ * @param {Function} [opts.searchPapersImpl] - Optional paper search override
+ * @returns {Promise<object>} experiment result
+ */
+async function runIteration(opts = {}) {
+  const options = opts || {};
+  const timeoutMs = options.timeoutMs || 120000;
+  const testCommand = options.testCommand || 'npm test';
+  const research = options.researchQuery
+    ? await buildResearchBrief({
+      query: options.researchQuery,
+      limit: options.paperLimit,
+      fetchImpl: options.fetchImpl,
+      searchPapersImpl: options.searchPapersImpl,
+      template: 'autoresearch-brief',
+    })
+    : null;
+  const result = runWorkspaceEvolution({
+    targetName: options.targetName,
+    nextValue: options.nextValue,
+    primaryCommands: [testCommand],
+    holdoutCommands: options.holdoutCommands || [],
+    timeoutMs,
+    cwd: options.cwd,
+    hypothesisSuffix: research ? `Research query: ${research.query}` : null,
+    additionalMetrics: {
+      researchQuery: research ? research.query : null,
+      researchPackId: research ? research.packId : null,
+      researchPaperIds: research ? research.citations.map((citation) => citation.paperId).filter(Boolean) : [],
+    },
+  });
+  return result;
+}
+// ---------------------------------------------------------------------------
+// Multi-Iteration Loop
+// ---------------------------------------------------------------------------
+/**
+ * Run N autoresearch iterations.
+ *
+ * @param {object} params
+ * @param {number} params.iterations - Number of experiments to run
+ * @param {string} [params.targetName] - Force a specific mutation target
+ * @param {number} [params.nextValue] - Force the candidate value instead of a random neighbor
+ * @param {string} [params.testCommand] - Override test command
+ * @param {string[]} [params.holdoutCommands] - Optional holdout commands required for acceptance
+ * @param {number} [params.timeoutMs] - Per-iteration timeout
+ * @param {string} [params.cwd] - Working directory for evaluation commands
+ * @param {string} [params.researchQuery] - Optional external research query
+ * @param {number} [params.paperLimit] - Max papers to ingest for research context
+ * @param {Function} [params.fetchImpl] - Optional fetch implementation override
+ * @param {Function} [params.searchPapersImpl] - Optional paper search override
+ * @returns {Promise<object>} { results, progress }
+ */
+async function runLoop(params) {
+  const iterations = params.iterations || 1;
+  const results = [];
+  for (let i = 0; i < iterations; i++) {
+    console.log(`\n[autoresearch] Iteration ${i + 1}/${iterations}`);
+    try {
+      const result = await runIteration({
+        targetName: params.targetName,
+        nextValue: Number.isFinite(params.nextValue) ? params.nextValue : undefined,
+        testCommand: params.testCommand,
+        holdoutCommands: params.holdoutCommands,
+        timeoutMs: params.timeoutMs,
+        cwd: params.cwd,
+        researchQuery: params.researchQuery,
+        paperLimit: params.paperLimit,
+        fetchImpl: params.fetchImpl,
+        searchPapersImpl: params.searchPapersImpl,
+      });
+      results.push(result);
+      if (result.kept) {
+        console.log(`  ✓ KEPT: ${result.name} (delta: +${(result.delta || 0).toFixed(4)})`);
+      } else if (result.skipped) {
+        console.log(`  ⊘ SKIPPED: ${result.reason}`);
+      } else {
+        console.log(`  ✗ DISCARDED: ${result.reason}`);
+      }
+    } catch (err) {
+      console.error(`  ✗ ERROR: ${err.message}`);
+      results.push({ error: err.message });
+    }
+  }
+  const progress = getProgress();
+  console.log(`\n[autoresearch] Progress: ${progress.completed} experiments, ${progress.kept} kept (${progress.keepRate}%)`);
+  return { results, progress };
+}
+// ---------------------------------------------------------------------------
+// CLI
+// ---------------------------------------------------------------------------
+if (require.main === module) {
+  const args = {};
+  process.argv.slice(2).forEach((arg) => {
+    if (!arg.startsWith('--')) return;
+    const [key, ...rest] = arg.slice(2).split('=');
+    args[key] = rest.length > 0 ? rest.join('=') : true;
+  });
+  if (args.run) {
+    const iterations = Number(args.iterations || 1);
+    const testCommand = args['test-command'] || 'npm test';
+    const timeoutMs = Number(args.timeout || 120000);
+    const paperLimit = Number(args['paper-limit'] || 5);
+    const holdoutCommands = args.holdout ? [args.holdout] : [];
+    runLoop({
+      iterations,
+      targetName: args.target || null,
+      nextValue: args['next-value'] !== undefined ? Number(args['next-value']) : undefined,
+      testCommand,
+      holdoutCommands,
+      timeoutMs,
+      cwd: args.cwd || undefined,
+      researchQuery: args['research-query'] || null,
+      paperLimit,
+    }).catch((error) => {
+      console.error(error.message);
+      process.exit(1);
+    });
+  } else if (args.targets) {
+    console.log('Mutation targets:');
+    MUTATION_TARGETS.forEach((t) => {
+      console.log(`  ${t.name} (${t.type}): range [${t.range.join(', ')}], step ${t.step}`);
+    });
+  } else {
+    console.log(`Usage:
+  node scripts/autoresearch-runner.js --run [--iterations=5] [--target=half_life_days] [--next-value=8] [--test-command="npm test"] [--holdout="npm run self-heal:check"] [--timeout=120000] [--research-query="rank fusion"] [--paper-limit=5]
+  node scripts/autoresearch-runner.js --targets`);
+  }
+}
+// ---------------------------------------------------------------------------
+// Exports
+// ---------------------------------------------------------------------------
+module.exports = {
+  runIteration,
+  runLoop,
+  scoreSuite,
+  MUTATION_TARGETS,
+};

package/scripts/multimodal-retrieval-plan.js ADDED Viewed

@@ -0,0 +1,110 @@
+'use strict';
+const DEFAULT_EVIDENCE_TYPES = ['screenshots', 'pdf_pages', 'proof_artifacts'];
+const DEFAULT_DIMS = [1024, 512, 256, 128, 64];
+function clampInteger(value, { min, max, fallback }) {
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed)) return fallback;
+  return Math.max(min, Math.min(max, Math.floor(parsed)));
+}
+function normalizeEvidenceTypes(value) {
+  if (!Array.isArray(value)) return DEFAULT_EVIDENCE_TYPES;
+  const normalized = value
+    .map((item) => String(item || '').trim().toLowerCase().replace(/[^a-z0-9]+/g, '_'))
+    .filter(Boolean);
+  return normalized.length > 0 ? [...new Set(normalized)] : DEFAULT_EVIDENCE_TYPES;
+}
+function dimensionPlan({ corpusItems, maxEmbeddingDim }) {
+  const dims = DEFAULT_DIMS.filter((dim) => dim <= maxEmbeddingDim);
+  const selected = dims.length > 0 ? dims : [maxEmbeddingDim];
+  return selected.map((dim) => ({
+    dim,
+    estimatedFloat32Mb: Number(((corpusItems * dim * 4) / (1024 * 1024)).toFixed(2)),
+    useWhen: dim >= 1024
+      ? 'default quality pass for launch-critical retrieval'
+      : 'cost-down pass when storage or latency dominates',
+  }));
+}
+function buildMultimodalRetrievalPlan(args = {}) {
+  const evidenceTypes = normalizeEvidenceTypes(args.evidenceTypes);
+  const corpusItems = clampInteger(args.corpusItems, {
+    min: 100,
+    max: 10000000,
+    fallback: 5000,
+  });
+  const maxEmbeddingDim = clampInteger(args.maxEmbeddingDim, {
+    min: 64,
+    max: 2048,
+    fallback: 1024,
+  });
+  const latencyBudgetMs = clampInteger(args.latencyBudgetMs, {
+    min: 50,
+    max: 30000,
+    fallback: 750,
+  });
+  const useReranker = args.useReranker !== false;
+  const goal = String(args.goal || 'retrieve visual proof for agent-governance decisions').trim();
+  const dims = dimensionPlan({ corpusItems, maxEmbeddingDim });
+  const defaultDim = dims.some((entry) => entry.dim === 1024) ? 1024 : dims[0].dim;
+  return {
+    planVersion: '2026-04-20',
+    sourcePattern: 'multimodal Sentence Transformers visual document retrieval',
+    goal,
+    evidenceTypes,
+    architecture: {
+      stage1: 'Index screenshots, PDF pages, dashboard captures, and proof artifacts with a multimodal embedding model.',
+      stage2: useReranker
+        ? 'Rerank the top candidates with a multimodal cross-encoder before using evidence in a gate, PR, or sales proof claim.'
+        : 'Skip reranking for low-latency agent recall; require stronger holdout evaluation before shipping.',
+      fallback: 'Keep text-only search as a fallback for code, logs, markdown, and plain policy docs.',
+    },
+    trainingData: {
+      pilotSchema: ['query', 'image', 'negative_0'],
+      hardNegativeStrategy: 'Pair each proof query with visually similar but wrong screenshots or PDF pages.',
+      minimumPilot: 'Start with 300 labeled evaluation queries and at least one hard negative per query before finetuning.',
+    },
+    evaluation: {
+      baseline: 'Measure current text-only retrieval before any model changes.',
+      primaryMetric: 'NDCG@10',
+      secondaryMetrics: ['Recall@5', 'MAP', 'false_positive_gate_rate'],
+      holdoutSets: [
+        'agent failure screenshots',
+        'dashboard proof captures',
+        'visual docs that contain tables or charts',
+      ],
+    },
+    deployment: {
+      latencyBudgetMs,
+      defaultEmbeddingDim: defaultDim,
+      matryoshkaDimensions: dims,
+      compressionPath: 'Use Matryoshka truncation first, then quantization only after holdout quality is stable.',
+    },
+    thumbgateUseCases: [
+      'Find the exact screenshot or proof artifact behind a completion claim.',
+      'Retrieve visual evidence before approving a workflow-hardening sprint.',
+      'Rank dashboard captures and PDF runbook pages for GEO/SEO evidence pages.',
+      'Attach visual hard negatives to Autoresearch loops so agents cannot reward-hack by deleting hard cases.',
+    ],
+    guardrails: [
+      'Never promote visual retrieval results into claims without a linked artifact URL or local path.',
+      'Keep the multimodal index read-only for agent recall; gate training and index rebuilds behind explicit workflow checks.',
+      'Evaluate retrieval on holdout screenshots/PDF pages before replacing text-only recall.',
+    ],
+    nextActions: [
+      'Create a small visual proof corpus from existing public dashboard screenshots and proof artifacts.',
+      'Log query -> correct artifact -> hard negative triples during workflow sprint reviews.',
+      'Use Autoresearch to optimize NDCG@10 and latency only after the baseline corpus exists.',
+    ],
+  };
+}
+module.exports = {
+  buildMultimodalRetrievalPlan,
+  dimensionPlan,
+  normalizeEvidenceTypes,
+};

package/scripts/tool-registry.js CHANGED Viewed

@@ -134,6 +134,25 @@ const TOOLS = [
       },
     },
   }),
+  readOnlyTool({
+    name: 'plan_multimodal_retrieval',
+    description: 'Plan a high-ROI multimodal retrieval rollout for screenshots, PDF pages, dashboard captures, and proof artifacts without starting GPU training.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        goal: { type: 'string', description: 'Business or workflow objective for visual/document retrieval.' },
+        evidenceTypes: {
+          type: 'array',
+          items: { type: 'string' },
+          description: 'Evidence surfaces to include, such as screenshots, pdf_pages, proof_artifacts, dashboards, or videos.',
+        },
+        corpusItems: { type: 'number', description: 'Estimated number of visual artifacts or document pages to index.' },
+        maxEmbeddingDim: { type: 'number', description: 'Maximum embedding dimension to budget for Matryoshka-style truncation planning.' },
+        latencyBudgetMs: { type: 'number', description: 'Target retrieval latency budget for agent recall.' },
+        useReranker: { type: 'boolean', description: 'Whether to include a multimodal reranker stage after initial embedding retrieval.' },
+      },
+    },
+  }),
   destructiveTool({
     name: 'import_document',
     description: 'Import a local policy or runbook document into ThumbGate, normalize it for search, and propose provenance-backed gate candidates.',
@@ -872,6 +891,24 @@ const TOOLS = [
       },
     },
   }),
+  destructiveTool({
+    name: 'run_autoresearch',
+    description: 'Run a bounded metric-improvement loop: measure a baseline, test a hypothesis, require primary and holdout checks, then keep or discard the candidate mutation with proof.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        iterations: { type: 'number', description: 'Number of iterations to run. Capped at 5 per call; default 1.' },
+        targetName: { type: 'string', enum: ['half_life_days', 'decay_floor', 'prevention_min_occurrences', 'verification_max_retries', 'dpo_beta'], description: 'Optional evolution target to mutate.' },
+        nextValue: { type: 'number', description: 'Optional explicit candidate value for the target.' },
+        testCommand: { type: 'string', description: 'Primary metric command. Defaults to npm test.' },
+        holdoutCommands: { type: 'array', items: { type: 'string' }, description: 'Additional checks required before a candidate can be kept.' },
+        timeoutMs: { type: 'number', description: 'Per-command timeout in milliseconds. Capped at 600000; default 120000.' },
+        cwd: { type: 'string', description: 'Optional workspace directory for the evaluation commands.' },
+        researchQuery: { type: 'string', description: 'Optional research query used to build an autoresearch context brief.' },
+        paperLimit: { type: 'number', description: 'Maximum research papers to ingest when researchQuery is set. Capped at 10; default 5.' },
+      },
+    },
+  }),
   destructiveTool({
     name: 'schedule',
     description: 'Create, list, or delete scheduled tasks. Supports natural language scheduling like "daily 9:00", "weekly monday 8:30", "hourly". Installs as macOS LaunchAgent or Linux crontab.',

package/src/api/server.js CHANGED Viewed

@@ -552,6 +552,169 @@ function getServerCardTools() {
   }));
 }
+function buildPublicUrl(hostedConfig, pathname) {
+  return `${hostedConfig.appOrigin}${pathname}`;
+}
+const VERIFICATION_EVIDENCE_URL = 'https://github.com/IgorGanapolsky/ThumbGate/blob/main/docs/VERIFICATION_EVIDENCE.md';
+function getToolDiscoveryIndex(hostedConfig) {
+  return MCP_TOOLS.map((tool) => ({
+    name: tool.name,
+    description: tool.description,
+    annotations: tool.annotations || {},
+    schemaUrl: buildPublicUrl(hostedConfig, `/.well-known/mcp/tools/${encodeURIComponent(tool.name)}.json`),
+  }));
+}
+function getMcpSkillManifests(hostedConfig) {
+  return [
+    {
+      name: 'thumbgate',
+      title: 'ThumbGate Pre-Action Gates',
+      description: 'Capture feedback, recall lessons, generate rules, and block repeated agent mistakes before tool execution.',
+      triggers: ['thumbgate', 'pre-action gates', 'prevent repeated AI mistakes', 'agent feedback', 'PreToolUse hooks'],
+      recommendedFlow: [
+        'Recall lessons before risky work.',
+        'Plan high-risk actions with checkpoints.',
+        'Capture concrete thumbs-down/up feedback.',
+        'Inspect prevention_rules after repeats.',
+      ],
+      installCommand: 'npx thumbgate init',
+      contextUrl: buildPublicUrl(hostedConfig, '/public/llm-context.md'),
+      proofUrl: VERIFICATION_EVIDENCE_URL,
+    },
+    {
+      name: 'workflow-hardening-sprint',
+      title: 'Workflow Hardening Sprint',
+      description: 'Turn one repeated agent failure into an enforced gate with proof and rollout evidence.',
+      triggers: ['workflow hardening', 'team rollout', 'agent governance', 'approval boundary', 'audit trail'],
+      recommendedFlow: [
+        'Pick one costly repeated failure.',
+        'Import the policy or runbook.',
+        'Ship the gate with dashboard proof.',
+      ],
+      intakeUrl: buildPublicUrl(hostedConfig, '/#workflow-sprint-intake'),
+      proofUrl: VERIFICATION_EVIDENCE_URL,
+    },
+    {
+      name: 'visual-proof-retrieval',
+      title: 'Visual Proof Retrieval',
+      description: 'Use screenshots, PDF pages, dashboard captures, and proof artifacts as searchable evidence for agent-governance claims.',
+      triggers: ['visual document retrieval', 'multimodal embeddings', 'screenshots', 'PDF evidence', 'proof artifacts'],
+      recommendedFlow: [
+        'Plan the corpus and Matryoshka dimension budget.',
+        'Baseline text-only retrieval before finetuning.',
+        'Evaluate NDCG@10 on visual hard negatives.',
+        'Require artifact links before using retrieved evidence in claims.',
+      ],
+      contextUrl: buildPublicUrl(hostedConfig, '/public/llm-context.md'),
+      proofUrl: VERIFICATION_EVIDENCE_URL,
+    },
+  ];
+}
+function getMcpApplications(hostedConfig) {
+  return [
+    {
+      name: 'dashboard',
+      title: 'ThumbGate Dashboard',
+      description: 'Review feedback, gates, blocked actions, funnel metrics, and proof.',
+      url: buildPublicUrl(hostedConfig, '/dashboard'),
+      useWhen: 'Need proof before approving more autonomy.',
+    },
+    {
+      name: 'lessons',
+      title: 'Lessons',
+      description: 'Browse promoted lessons and corrective actions.',
+      url: buildPublicUrl(hostedConfig, '/lessons'),
+      useWhen: 'Need human-approved context before risk.',
+    },
+    {
+      name: 'guide',
+      title: 'Setup Guide',
+      description: 'Install ThumbGate for Claude Code, Cursor, Codex, Gemini CLI, Amp, OpenCode, and MCP agents.',
+      url: buildPublicUrl(hostedConfig, '/guide'),
+      useWhen: 'Need setup without searching the repo.',
+    },
+    {
+      name: 'workflow-sprint-intake',
+      title: 'Workflow Hardening Sprint Intake',
+      description: 'Submit a repeated agent failure for a proof-backed sprint.',
+      url: buildPublicUrl(hostedConfig, '/#workflow-sprint-intake'),
+      useWhen: 'Ready to convert mistakes into gates.',
+    },
+  ];
+}
+function getMcpDiscoveryManifest(hostedConfig) {
+  return {
+    schemaVersion: '2026-04-20',
+    name: 'thumbgate',
+    title: 'ThumbGate',
+    version: pkg.version,
+    description: 'Pre-Action Gates for AI coding agents: feedback, recall, prevention rules, and tool-call blocking.',
+    homepage: hostedConfig.appOrigin,
+    repository: 'https://github.com/IgorGanapolsky/ThumbGate',
+    package: {
+      registry: 'npm',
+      name: 'thumbgate',
+      installCommand: 'npx thumbgate init',
+    },
+    transport: {
+      type: 'streamable-http',
+      endpoint: buildPublicUrl(hostedConfig, '/mcp'),
+      unauthenticatedDiscovery: ['initialize', 'tools/list'],
+      authenticatedMethods: ['tools/call'],
+    },
+    discovery: {
+      serverCardUrl: buildPublicUrl(hostedConfig, '/.well-known/mcp/server-card.json'),
+      toolIndexUrl: buildPublicUrl(hostedConfig, '/.well-known/mcp/tools.json'),
+      toolSchemaUrlTemplate: buildPublicUrl(hostedConfig, '/.well-known/mcp/tools/{name}.json'),
+      skillsUrl: buildPublicUrl(hostedConfig, '/.well-known/mcp/skills.json'),
+      applicationsUrl: buildPublicUrl(hostedConfig, '/.well-known/mcp/applications.json'),
+      llmsTxtUrl: buildPublicUrl(hostedConfig, '/.well-known/llms.txt'),
+      progressive: {
+        pattern: 'Load manifest, inspect tools.json, fetch one tool schema only when needed.',
+        tokenStrategy: 'Do not preload every inputSchema. Use per-tool schema URLs.',
+      },
+    },
+    primaryFlows: [
+      {
+        name: 'capture-to-gate',
+        description: 'Capture feedback, retrieve lessons, generate rules, enforce a gate.',
+        tools: ['capture_feedback', 'search_lessons', 'prevention_rules', 'gate_stats'],
+      },
+      {
+        name: 'safe-autonomous-work',
+        description: 'Plan high-risk work, recall lessons, diagnose failures.',
+        tools: ['plan_intent', 'recall', 'diagnose_failure', 'feedback_summary'],
+      },
+      {
+        name: 'team-rollout-proof',
+        description: 'Show dashboard evidence, metrics, and sprint proof.',
+        tools: ['dashboard', 'get_business_metrics', 'construct_context_pack'],
+      },
+      {
+        name: 'metric-autoresearch',
+        description: 'Run bounded baseline -> hypothesis -> holdout loops with keep/discard proof.',
+        tools: ['get_business_metrics', 'construct_context_pack', 'run_autoresearch', 'require_evidence_for_claim'],
+      },
+      {
+        name: 'visual-proof-retrieval',
+        description: 'Plan screenshot/PDF/proof-artifact retrieval before investing in multimodal finetuning.',
+        tools: ['plan_multimodal_retrieval', 'search_thumbgate', 'construct_context_pack', 'require_evidence_for_claim'],
+      },
+    ],
+    skills: getMcpSkillManifests(hostedConfig),
+    applications: getMcpApplications(hostedConfig),
+    proof: {
+      verificationEvidenceUrl: VERIFICATION_EVIDENCE_URL,
+      llmContextUrl: buildPublicUrl(hostedConfig, '/public/llm-context.md'),
+    },
+  };
+}
 function createHttpError(statusCode, message) {
   const err = new Error(message);
   err.statusCode = statusCode;
@@ -3904,7 +4067,85 @@ async function addContext(){
       return;
     }
+    if (isGetLikeRequest && pathname === '/.well-known/mcp.json') {
+      sendJson(res, 200, getMcpDiscoveryManifest(hostedConfig), {}, {
+        headOnly: isHeadRequest,
+      });
+      return;
+    }
+    if (isGetLikeRequest && pathname === '/.well-known/mcp/tools.json') {
+      sendJson(res, 200, {
+        name: 'thumbgate',
+        version: pkg.version,
+        count: MCP_TOOLS.length,
+        tools: getToolDiscoveryIndex(hostedConfig),
+      }, {}, {
+        headOnly: isHeadRequest,
+      });
+      return;
+    }
+    if (isGetLikeRequest && pathname.startsWith('/.well-known/mcp/tools/') && pathname.endsWith('.json')) {
+      const encodedToolName = pathname.slice('/.well-known/mcp/tools/'.length, -'.json'.length);
+      let toolName = encodedToolName;
+      try {
+        toolName = decodeURIComponent(encodedToolName);
+      } catch (_err) {
+        sendJson(res, 400, {
+          error: 'invalid_tool_name',
+          toolIndexUrl: buildPublicUrl(hostedConfig, '/.well-known/mcp/tools.json'),
+        }, {}, {
+          headOnly: isHeadRequest,
+        });
+        return;
+      }
+      const tool = MCP_TOOLS.find((candidate) => candidate.name === toolName);
+      if (!tool) {
+        sendJson(res, 404, {
+          error: 'tool_not_found',
+          toolName,
+          toolIndexUrl: buildPublicUrl(hostedConfig, '/.well-known/mcp/tools.json'),
+        }, {}, {
+          headOnly: isHeadRequest,
+        });
+        return;
+      }
+      sendJson(res, 200, {
+        name: tool.name,
+        description: tool.description,
+        annotations: tool.annotations || {},
+        inputSchema: tool.inputSchema,
+      }, {}, {
+        headOnly: isHeadRequest,
+      });
+      return;
+    }
+    if (isGetLikeRequest && pathname === '/.well-known/mcp/skills.json') {
+      sendJson(res, 200, {
+        name: 'thumbgate',
+        version: pkg.version,
+        skills: getMcpSkillManifests(hostedConfig),
+      }, {}, {
+        headOnly: isHeadRequest,
+      });
+      return;
+    }
+    if (isGetLikeRequest && pathname === '/.well-known/mcp/applications.json') {
+      sendJson(res, 200, {
+        name: 'thumbgate',
+        version: pkg.version,
+        applications: getMcpApplications(hostedConfig),
+      }, {}, {
+        headOnly: isHeadRequest,
+      });
+      return;
+    }
     if (isGetLikeRequest && pathname === '/.well-known/mcp/server-card.json') {
+      const discoveryManifest = getMcpDiscoveryManifest(hostedConfig);
       sendJson(res, 200, {
         serverInfo: {
           name: 'thumbgate',
@@ -3913,7 +4154,12 @@ async function addContext(){
         name: 'thumbgate',
         description: 'Pre-action gates that physically block AI coding agents from repeating known mistakes. Captures feedback, auto-promotes failures into prevention rules, and enforces them via PreToolUse hooks. Works with Claude Code, Codex, Gemini, Amp, Cursor, OpenCode, and any MCP-compatible agent.',
         version: pkg.version,
+        transport: discoveryManifest.transport,
+        discovery: discoveryManifest.discovery,
         tools: getServerCardTools(),
+        skills: getMcpSkillManifests(hostedConfig),
+        applications: getMcpApplications(hostedConfig),
+        proof: discoveryManifest.proof,
         repository: 'https://github.com/IgorGanapolsky/ThumbGate',
         homepage: hostedConfig.appOrigin,
       }, {}, {