npm - thumbgate - Versions diffs - 1.16.12 → 1.16.19 - Mend

thumbgate 1.16.12 → 1.16.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/.well-known/mcp/server-card.json +1 -1
package/README.md +3 -1
package/adapters/claude/.mcp.json +2 -2
package/adapters/mcp/server-stdio.js +26 -1
package/adapters/opencode/opencode.json +1 -1
package/bin/cli.js +420 -1
package/config/gate-templates.json +372 -0
package/config/mcp-allowlists.json +25 -0
package/config/model-candidates.json +59 -2
package/config/model-tiers.json +4 -1
package/package.json +79 -22
package/public/compare.html +6 -0
package/public/index.html +144 -11
package/public/numbers.html +11 -11
package/public/pro.html +22 -24
package/scripts/agent-design-governance.js +211 -0
package/scripts/agent-reasoning-traces.js +683 -0
package/scripts/agent-reward-model.js +438 -0
package/scripts/agent-stack-survival-audit.js +231 -0
package/scripts/ai-engineering-stack-guardrails.js +256 -0
package/scripts/billing.js +16 -4
package/scripts/chatgpt-ads-readiness-pack.js +195 -0
package/scripts/cli-schema.js +277 -0
package/scripts/code-graph-guardrails.js +176 -0
package/scripts/deepseek-v4-runtime-guardrails.js +253 -0
package/scripts/gemini-embedding-policy.js +198 -0
package/scripts/inference-cache-policy.js +39 -0
package/scripts/judge-reward-function.js +396 -0
package/scripts/llm-behavior-monitor.js +251 -0
package/scripts/long-running-agent-context-guardrails.js +176 -0
package/scripts/multimodal-retrieval-plan.js +31 -11
package/scripts/oss-pr-opportunity-scout.js +240 -0
package/scripts/proactive-agent-eval-guardrails.js +230 -0
package/scripts/profile-router.js +5 -4
package/scripts/prompting-operating-system.js +273 -0
package/scripts/proxy-pointer-rag-guardrails.js +189 -0
package/scripts/rag-precision-guardrails.js +202 -0
package/scripts/rate-limiter.js +1 -1
package/scripts/reasoning-efficiency-guardrails.js +176 -0
package/scripts/reward-hacking-guardrails.js +251 -0
package/scripts/seo-gsd.js +1201 -11
package/scripts/single-use-credential-gate.js +182 -0
package/scripts/structured-prompt-driven.js +226 -0
package/scripts/telemetry-analytics.js +31 -6
package/scripts/tool-registry.js +92 -0
package/scripts/upstream-contribution-engine.js +379 -0
package/scripts/vector-store.js +119 -4
package/src/api/server.js +333 -100
package/scripts/agents-sdk-sandbox-plan.js +0 -57
package/scripts/ai-org-governance.js +0 -98
package/scripts/artifact-agent-plan.js +0 -81
package/scripts/enterprise-agent-rollout.js +0 -34
package/scripts/experience-replay-governance.js +0 -69
package/scripts/inference-economics.js +0 -53
package/scripts/knowledge-layer-plan.js +0 -108
package/scripts/memory-store-governance.js +0 -60
package/scripts/post-training-governance.js +0 -34
package/scripts/production-agent-readiness.js +0 -40
package/scripts/scaling-law-claims.js +0 -60
package/scripts/student-consistent-training.js +0 -73

package/scripts/single-use-credential-gate.js ADDED Viewed

@@ -0,0 +1,182 @@
+#!/usr/bin/env node
+'use strict';
+/**
+ * Single-Use Credential Gate
+ *
+ * Converts the Link CLI pattern into local ThumbGate policy: risky agent
+ * actions should request narrow, one-time credentials with synchronous
+ * approval instead of reusing long-lived secrets.
+ */
+const crypto = require('node:crypto');
+const path = require('node:path');
+const DEFAULT_TTL_SECONDS = 300;
+const RISK_PATTERNS = [
+  { tag: 'purchase', pattern: /\b(buy|buys|buying|purchase|purchases|checkout|payment|gumroad|stripe|card)\b/i },
+  { tag: 'credential', pattern: /\b(token|secret|credential|api[_-]?key|oauth|login)\b/i },
+  { tag: 'deploy', pattern: /\b(deploy|production|railway|release)\b/i },
+  { tag: 'external-write', pattern: /\b(post|reply|send|email|upload|publish|create order)\b/i },
+];
+function planSingleUseCredentialRequest(action = {}, options = {}) {
+  const text = buildActionText(action);
+  const riskTags = RISK_PATTERNS.filter((item) => item.pattern.test(text)).map((item) => item.tag);
+  const highRisk = riskTags.length > 0 || Boolean(action.requiresCredential);
+  const scope = normalizeScope(action.scope || inferScope(text));
+  const ttlSeconds = clamp(Number(action.ttlSeconds || options.ttlSeconds || DEFAULT_TTL_SECONDS), 30, 900);
+  return {
+    required: highRisk,
+    riskTags,
+    scope,
+    ttlSeconds,
+    singleUse: true,
+    approvalMode: highRisk ? 'synchronous' : 'not-required',
+    approvalPrompt: highRisk
+      ? `Approve one-time credential for ${scope.resource} (${scope.operation})? Expires in ${ttlSeconds}s and cannot be reused.`
+      : 'No credential approval required.',
+    deniedReasons: buildDeniedReasons(action, scope),
+  };
+}
+function mintCredentialGrant(request = {}, approval = {}) {
+  const approved = Boolean(approval.approved);
+  return {
+    grantId: `cred_${Date.now()}_${crypto.randomBytes(4).toString('hex')}`,
+    approved,
+    singleUse: request.singleUse !== false,
+    scope: normalizeScope(request.scope),
+    issuedAt: new Date().toISOString(),
+    expiresAt: new Date(Date.now() + (Number(request.ttlSeconds || DEFAULT_TTL_SECONDS) * 1000)).toISOString(),
+    approvedBy: approval.approvedBy || null,
+    approvalEvidence: approval.evidence || null,
+    usedAt: null,
+  };
+}
+function evaluateCredentialUse(grant = {}, action = {}, now = new Date()) {
+  const reasons = [];
+  if (!grant.approved) reasons.push('credential_not_approved');
+  if (!grant.singleUse) reasons.push('credential_not_single_use');
+  if (grant.usedAt) reasons.push('credential_already_used');
+  if (grant.expiresAt && new Date(grant.expiresAt).getTime() < now.getTime()) reasons.push('credential_expired');
+  const actionScope = normalizeScope(action.scope || inferScope(buildActionText(action)));
+  const grantScope = normalizeScope(grant.scope);
+  if (!scopeAllows(grantScope, actionScope)) reasons.push('credential_scope_mismatch');
+  return {
+    allowed: reasons.length === 0,
+    reasons,
+    grantId: grant.grantId || null,
+    requiredScope: actionScope,
+    grantedScope: grantScope,
+  };
+}
+function markCredentialUsed(grant = {}, now = new Date()) {
+  return {
+    ...grant,
+    usedAt: now.toISOString(),
+  };
+}
+function buildActionText(action = {}) {
+  return [
+    action.command,
+    action.intent,
+    action.description,
+    action.url,
+    ...(action.tags || []),
+  ].filter(Boolean).join(' ');
+}
+function inferScope(text = '') {
+  if (/\b(stripe|checkout|payment|card)\b/i.test(text)) return { resource: 'payments', operation: 'write' };
+  if (/\b(gumroad|buy|buys|buying|purchase|purchases)\b/i.test(text)) return { resource: 'purchase', operation: 'create' };
+  if (/\b(deploy|railway|production)\b/i.test(text)) return { resource: 'deployment', operation: 'write' };
+  if (/\b(post|reply|email|send|publish)\b/i.test(text)) return { resource: 'external-message', operation: 'send' };
+  return { resource: 'local', operation: 'read' };
+}
+function normalizeScope(scope = {}) {
+  if (typeof scope === 'string') {
+    const [resource, operation = 'use'] = scope.split(':');
+    return { resource: resource || 'local', operation };
+  }
+  return {
+    resource: String(scope.resource || 'local'),
+    operation: String(scope.operation || 'read'),
+  };
+}
+function scopeAllows(granted, required) {
+  if (granted.resource === '*') return true;
+  if (granted.resource !== required.resource) return false;
+  return granted.operation === '*' || granted.operation === required.operation;
+}
+function buildDeniedReasons(action, scope) {
+  const reasons = [];
+  if (action.persistent === true) reasons.push('persistent_credentials_not_allowed');
+  if (scope.resource === '*' || scope.operation === '*') reasons.push('credential_scope_too_broad');
+  return reasons;
+}
+function clamp(value, min, max) {
+  if (!Number.isFinite(value)) return min;
+  return Math.min(max, Math.max(min, value));
+}
+function formatCredentialPlan(plan = {}) {
+  return [
+    '# Single-Use Credential Plan',
+    '',
+    `Required: ${plan.required ? 'yes' : 'no'}`,
+    `Approval mode: ${plan.approvalMode}`,
+    `Scope: ${plan.scope?.resource}:${plan.scope?.operation}`,
+    `TTL seconds: ${plan.ttlSeconds}`,
+    `Denied reasons: ${(plan.deniedReasons || []).join(', ') || 'none'}`,
+    '',
+    plan.approvalPrompt || '',
+    '',
+  ].join('\n');
+}
+function parseArgs(argv = process.argv.slice(2)) {
+  const args = { command: argv[0] || 'plan', intent: '' };
+  for (const arg of argv.slice(1)) {
+    if (arg.startsWith('--intent=')) args.intent = arg.slice('--intent='.length);
+    if (arg.startsWith('--action=')) args.intent = arg.slice('--action='.length);
+    if (arg.startsWith('--description=')) args.description = arg.slice('--description='.length);
+    if (arg.startsWith('--scope=')) args.scope = arg.slice('--scope='.length);
+  }
+  return args;
+}
+function isCliInvocation(argv = process.argv) {
+  return Boolean(argv[1] && path.resolve(argv[1]) === __filename);
+}
+if (isCliInvocation()) {
+  const args = parseArgs();
+  const plan = planSingleUseCredentialRequest(args);
+  if (args.command === 'json') {
+    console.log(JSON.stringify(plan, null, 2));
+  } else if (args.command === 'plan') {
+    console.log(formatCredentialPlan(plan));
+  } else {
+    console.error(`Unknown command: ${args.command}. Use: plan, json`);
+    process.exit(1);
+  }
+}
+module.exports = {
+  evaluateCredentialUse,
+  formatCredentialPlan,
+  markCredentialUsed,
+  mintCredentialGrant,
+  planSingleUseCredentialRequest,
+};

package/scripts/structured-prompt-driven.js ADDED Viewed

@@ -0,0 +1,226 @@
+#!/usr/bin/env node
+'use strict';
+/**
+ * Structured Prompt-Driven Development (SPDD) Gate
+ *
+ * Makes code-generation prompts governable artifacts by requiring a compact
+ * REASONS canvas before risky implementation work proceeds.
+ */
+const path = require('node:path');
+const FIELD_DEFINITIONS = [
+  ['requirements', 'Problem, business value, scope, and definition of done.'],
+  ['entities', 'Domain nouns, relationships, and data contracts.'],
+  ['approach', 'Strategy for satisfying the requirements.'],
+  ['structure', 'Files, modules, dependencies, and integration boundaries.'],
+  ['operations', 'Concrete, testable implementation steps.'],
+  ['norms', 'Reusable engineering standards and team conventions.'],
+  ['safeguards', 'Non-negotiable constraints, risks, and verification gates.'],
+];
+const FIELD_KEYS = FIELD_DEFINITIONS.map(([key]) => key);
+function buildReasonsCanvas(input = {}) {
+  const source = typeof input === 'string' ? { request: input } : input;
+  const request = String(source.request || source.story || source.task || '').trim();
+  const canvas = {};
+  for (const key of FIELD_KEYS) {
+    canvas[key] = normalizeList(source[key]);
+  }
+  if (request && canvas.requirements.length === 0) {
+    canvas.requirements.push(request);
+  }
+  if (source.acceptanceCriteria) {
+    canvas.requirements.push(...normalizeList(source.acceptanceCriteria));
+  }
+  if (source.files || source.changedFiles) {
+    canvas.structure.push(...normalizeList(source.files || source.changedFiles));
+  }
+  if (source.tests || source.verification) {
+    canvas.safeguards.push(...normalizeList(source.tests || source.verification).map((item) => `Verification: ${item}`));
+  }
+  if (canvas.norms.length === 0) {
+    canvas.norms.push('Keep prompt, code, and tests synchronized in version control.');
+  }
+  if (canvas.safeguards.length === 0) {
+    canvas.safeguards.push('Do not claim completion without passing verification evidence.');
+  }
+  return {
+    title: source.title || inferTitle(request),
+    canvas,
+    source: {
+      request,
+      artifactPath: source.artifactPath || 'docs/prompts/<feature>.reasons.md',
+    },
+  };
+}
+function evaluateReasonsCanvas(document = {}, options = {}) {
+  const canvas = document.canvas || document;
+  const gates = [];
+  const missing = FIELD_KEYS.filter((key) => normalizeList(canvas[key]).length === 0);
+  for (const key of missing) {
+    gates.push({
+      id: `missing-${key}`,
+      severity: key === 'requirements' || key === 'safeguards' ? 'block' : 'warn',
+      reason: `${labelFor(key)} is empty; the agent lacks a governed ${key} boundary.`,
+    });
+  }
+  const operations = normalizeList(canvas.operations);
+  if (operations.length > 0 && !operations.some(isTestableOperation)) {
+    gates.push({
+      id: 'operations-not-testable',
+      severity: 'block',
+      reason: 'At least one operation must be concrete and testable before code generation.',
+    });
+  }
+  const safeguards = normalizeList(canvas.safeguards).join('\n');
+  if (!/\b(tests?|verify|verification|evidence|gate|security|privacy|rollback|performance)\b/i.test(safeguards)) {
+    gates.push({
+      id: 'safeguards-without-verification',
+      severity: 'block',
+      reason: 'Safeguards must name verification, evidence, or non-negotiable risk controls.',
+    });
+  }
+  const changedFiles = normalizeList(options.changedFiles);
+  const structure = normalizeList(canvas.structure).join('\n');
+  if (changedFiles.length > 0 && !changedFiles.some((file) => structure.includes(file) || structure.includes(path.basename(file)))) {
+    gates.push({
+      id: 'code-prompt-drift',
+      severity: 'warn',
+      reason: 'Changed files are not represented in the prompt structure; sync the canvas before review.',
+    });
+  }
+  const hardBlocks = gates.filter((gate) => gate.severity === 'block');
+  const warnings = gates.filter((gate) => gate.severity === 'warn');
+  return {
+    allowed: hardBlocks.length === 0,
+    score: Math.max(0, 100 - (hardBlocks.length * 30) - (warnings.length * 10)),
+    gates,
+    missing,
+    recommendation: hardBlocks.length
+      ? 'Fix the structured prompt before generating or merging code.'
+      : warnings.length
+        ? 'Proceed only after syncing prompt drift and documenting review evidence.'
+        : 'Structured prompt is ready for code generation and review.',
+  };
+}
+function buildPromptSyncPlan(document = {}, changes = {}) {
+  const evaluation = evaluateReasonsCanvas(document, changes);
+  const changedFiles = normalizeList(changes.changedFiles);
+  const verification = normalizeList(changes.verification || changes.tests);
+  return {
+    promptFirst: evaluation.allowed,
+    artifactPath: document.source?.artifactPath || changes.artifactPath || 'docs/prompts/<feature>.reasons.md',
+    requiredUpdates: [
+      ...(evaluation.gates || []).map((gate) => gate.id),
+      ...(changedFiles.length ? ['sync-structure-with-changed-files'] : []),
+      ...(verification.length ? ['attach-verification-evidence'] : ['add-verification-evidence']),
+    ],
+    reviewChecklist: [
+      'Review intent and scope before reviewing code diff.',
+      'Confirm operations map to focused tests.',
+      'Update the canvas when implementation reality diverges.',
+      'Store prompt artifact beside the feature or PR evidence.',
+    ],
+  };
+}
+function formatReasonsCanvas(document = {}, evaluation = evaluateReasonsCanvas(document)) {
+  const canvas = document.canvas || document;
+  return [
+    `# ${document.title || 'Structured Prompt Canvas'}`,
+    '',
+    `Artifact: ${document.source?.artifactPath || 'docs/prompts/<feature>.reasons.md'}`,
+    `Readiness: ${evaluation.allowed ? 'ready' : 'blocked'} (${evaluation.score}/100)`,
+    '',
+    ...FIELD_DEFINITIONS.flatMap(([key, description]) => [
+      `## ${labelFor(key)}`,
+      '',
+      `_${description}_`,
+      '',
+      ...renderList(normalizeList(canvas[key])),
+      '',
+    ]),
+    '## Gates',
+    '',
+    ...(evaluation.gates.length ? evaluation.gates.map((gate) => `- ${gate.severity}: ${gate.id} — ${gate.reason}`) : ['- pass: canvas-ready — Structured prompt is complete enough to govern generation.']),
+    '',
+    `Recommendation: ${evaluation.recommendation}`,
+    '',
+  ].join('\n');
+}
+function normalizeList(value) {
+  if (!value) return [];
+  if (Array.isArray(value)) return value.map((item) => String(item).trim()).filter(Boolean);
+  return String(value)
+    .split(/\n|;/)
+    .map((item) => item.replace(/^[-*]\s*/, '').trim())
+    .filter(Boolean);
+}
+function renderList(items) {
+  return items.length ? items.map((item) => `- ${item}`) : ['- <missing>'];
+}
+function labelFor(key) {
+  return key.charAt(0).toUpperCase() + key.slice(1);
+}
+function isTestableOperation(operation) {
+  return /\b(add|update|remove|implement|verify|test|run|assert|block|allow|return|emit)\b/i.test(operation);
+}
+function inferTitle(request) {
+  if (!request) return 'Structured Prompt Canvas';
+  return request.length > 70 ? `${request.slice(0, 67)}...` : request;
+}
+function parseArgs(argv = process.argv.slice(2)) {
+  const args = { command: argv[0] || 'canvas', request: '' };
+  for (const arg of argv.slice(1)) {
+    if (arg.startsWith('--request=')) args.request = arg.slice('--request='.length);
+    if (arg.startsWith('--file=')) args.files = [...(args.files || []), arg.slice('--file='.length)];
+    if (arg.startsWith('--test=')) args.tests = [...(args.tests || []), arg.slice('--test='.length)];
+    if (arg.startsWith('--operation=')) args.operations = [...(args.operations || []), arg.slice('--operation='.length)];
+    if (arg.startsWith('--safeguard=')) args.safeguards = [...(args.safeguards || []), arg.slice('--safeguard='.length)];
+  }
+  return args;
+}
+function isCliInvocation(argv = process.argv) {
+  return Boolean(argv[1] && path.resolve(argv[1]) === __filename);
+}
+if (isCliInvocation()) {
+  const args = parseArgs();
+  const document = buildReasonsCanvas(args);
+  const evaluation = evaluateReasonsCanvas(document, { changedFiles: args.files });
+  if (args.command === 'json') {
+    console.log(JSON.stringify({ document, evaluation, syncPlan: buildPromptSyncPlan(document, { changedFiles: args.files, tests: args.tests }) }, null, 2));
+  } else if (args.command === 'canvas') {
+    console.log(formatReasonsCanvas(document, evaluation));
+  } else {
+    console.error(`Unknown command: ${args.command}. Use: canvas, json`);
+    process.exit(1);
+  }
+}
+module.exports = {
+  buildPromptSyncPlan,
+  buildReasonsCanvas,
+  evaluateReasonsCanvas,
+  formatReasonsCanvas,
+};

package/scripts/telemetry-analytics.js CHANGED Viewed

@@ -344,9 +344,31 @@ function appendTelemetryEvent(feedbackDir, payload = {}, headers = {}) {
   return entry;
 }
-function loadTelemetryEventsFromPath(filePath) {
-  if (!fs.existsSync(filePath)) return [];
-  const raw = fs.readFileSync(filePath, 'utf-8').trim();
+const DEFAULT_BOUNDED_TELEMETRY_TAIL_BYTES = 8 * 1024 * 1024;
+function readTelemetryText(filePath, options = {}) {
+  if (!fs.existsSync(filePath)) return '';
+  const maxBytes = Number(options.maxBytes || 0);
+  if (maxBytes > 0) {
+    const stats = fs.statSync(filePath);
+    if (stats.size > maxBytes) {
+      const fd = fs.openSync(filePath, 'r');
+      try {
+        const buffer = Buffer.alloc(maxBytes);
+        fs.readSync(fd, buffer, 0, maxBytes, stats.size - maxBytes);
+        const text = buffer.toString('utf-8');
+        const firstNewline = text.indexOf('\n');
+        return firstNewline >= 0 ? text.slice(firstNewline + 1) : text;
+      } finally {
+        fs.closeSync(fd);
+      }
+    }
+  }
+  return fs.readFileSync(filePath, 'utf-8');
+}
+function loadTelemetryEventsFromPath(filePath, options = {}) {
+  const raw = readTelemetryText(filePath, options).trim();
   if (!raw) return [];
   return raw
     .split('\n')
@@ -365,13 +387,13 @@ function loadTelemetryEventsFromPath(filePath) {
     .filter(Boolean);
 }
-function loadTelemetryEvents(feedbackDir) {
+function loadTelemetryEvents(feedbackDir, options = {}) {
   const diagnostics = getTelemetrySourceDiagnostics(feedbackDir);
   const merged = [];
   const seen = new Set();
   for (const filePath of diagnostics.activePaths) {
-    const rows = loadTelemetryEventsFromPath(filePath);
+    const rows = loadTelemetryEventsFromPath(filePath, options);
     for (const row of rows) {
       const key = JSON.stringify(row);
       if (seen.has(key)) continue;
@@ -406,8 +428,11 @@ function summarizeRecentEvents(events) {
 function getTelemetrySummary(feedbackDir, options = {}) {
   const analyticsWindow = resolveAnalyticsWindow(options);
+  const telemetryLoadOptions = analyticsWindow.bounded
+    ? { maxBytes: Number(options.telemetryTailBytes || DEFAULT_BOUNDED_TELEMETRY_TAIL_BYTES) }
+    : {};
   const events = filterEntriesForWindow(
-    loadTelemetryEvents(feedbackDir),
+    loadTelemetryEvents(feedbackDir, telemetryLoadOptions),
     analyticsWindow,
     (entry) => entry && (entry.receivedAt || entry.timestamp)
   );

package/scripts/tool-registry.js CHANGED Viewed

@@ -192,6 +192,98 @@ const TOOLS = [
       },
     },
   }),
+  readOnlyTool({
+    name: 'plan_agent_design_governance',
+    description: 'Evaluate an agent workflow before adding tools, autonomy, or subagents. Recommends single-agent vs manager/decentralized patterns, baseline evals, instruction fixes, and tool safeguards.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        workflow: { type: 'string', description: 'Workflow name or short description.' },
+        tools: { type: 'array', items: { type: 'string' }, description: 'Tool names available to the agent.' },
+        toolCount: { type: 'number', description: 'Total tools when names are not listed.' },
+        similarToolCount: { type: 'number', description: 'Number of similar or overlapping tools.' },
+        conditionalBranches: { type: 'number', description: 'Rough count of if/then instruction branches.' },
+        handoffCount: { type: 'number', description: 'Existing or proposed handoff count.' },
+        highRiskTools: { type: 'array', items: { type: 'string' }, description: 'Tools that affect production, money, data, secrets, or outbound actions.' },
+        writeTools: { type: 'array', items: { type: 'string' }, description: 'Write-capable tools.' },
+        hasBaselineEvals: { type: 'boolean', description: 'Whether baseline agent evals exist.' },
+        hasDocs: { type: 'boolean', description: 'Instructions draw on existing workflow docs.' },
+        hasExamples: { type: 'boolean', description: 'Instructions include concrete examples.' },
+        hasEdgeCases: { type: 'boolean', description: 'Instructions include edge cases and failure paths.' },
+        hasToolApprovals: { type: 'boolean', description: 'Risky tool calls require approval.' },
+        hasExitCondition: { type: 'boolean', description: 'Instructions define when the run is complete.' },
+        reversibleActions: { type: 'boolean', description: 'Risky actions are reversible or have rollback procedures.' },
+      },
+    },
+  }),
+  readOnlyTool({
+    name: 'plan_proactive_agent_eval_guardrails',
+    description: 'Map proactive-assistant eval gaps to PARE-style state-machine, active-user-simulation, goal-inference, intervention-timing, and multi-app orchestration gates.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        workflow: { type: 'string', description: 'Proactive assistant workflow name.' },
+        apps: { type: 'array', items: { type: 'string' }, description: 'Apps involved in the proactive workflow.' },
+        states: { type: 'array', items: { type: 'string' }, description: 'Modeled app states.' },
+        stateCount: { type: 'number', description: 'Number of modeled states.' },
+        actionCount: { type: 'number', description: 'Number of state-dependent actions.' },
+        taskCount: { type: 'number', description: 'Number of benchmark tasks or scenarios.' },
+        hasStateMachine: { type: 'boolean', description: 'Whether apps are modeled as finite state machines.' },
+        hasActiveUserSimulation: { type: 'boolean', description: 'Whether active user simulation exists.' },
+        hasGoalInferenceEvals: { type: 'boolean', description: 'Whether goal inference is graded.' },
+        hasInterventionTimingEvals: { type: 'boolean', description: 'Whether intervention timing is graded.' },
+        hasMultiAppEvals: { type: 'boolean', description: 'Whether multi-app orchestration is graded.' },
+        flatToolApiOnly: { type: 'boolean', description: 'Current eval only covers flat tool calls.' },
+        proactiveWrites: { type: 'boolean', description: 'Proactive agent can write or mutate state.' },
+        userVisibleActions: { type: 'boolean', description: 'Interventions can notify, schedule, send, or affect users.' },
+      },
+    },
+  }),
+  readOnlyTool({
+    name: 'plan_reward_hacking_guardrails',
+    description: 'Detect reward-hacking patterns such as unsupported completion claims, sycophancy, verbosity-as-proof, benchmark overfitting, evaluator manipulation, and proxy-only metrics.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        workflow: { type: 'string', description: 'Agent workflow or release lane being evaluated.' },
+        text: { type: 'string', description: 'Candidate response, claim, summary, or verifier output to inspect.' },
+        evidence: { type: 'array', items: { type: 'string' }, description: 'Evidence artifacts attached to the claim.' },
+        metrics: { type: 'array', items: { type: 'string' }, description: 'Proxy metrics or reward scores used by the workflow.' },
+        hasHoldout: { type: 'boolean', description: 'Whether holdout, regression, or real-workflow evidence exists.' },
+        hasHumanObjective: { type: 'boolean', description: 'Whether proxy metrics are mapped to a user objective.' },
+        hasVerifierTrace: { type: 'boolean', description: 'Whether verifier trace, run log, or proof artifact exists.' },
+        optimizedForScore: { type: 'boolean', description: 'Whether an eval, benchmark, or reward score is being optimized.' },
+        multimodal: { type: 'boolean', description: 'Whether claims depend on screenshots, PDFs, charts, images, or video.' },
+      },
+    },
+  }),
+  readOnlyTool({
+    name: 'plan_oss_pr_opportunity_scout',
+    description: 'Rank upstream GitHub repositories ThumbGate depends on for proof-backed issue, bounty, and PR opportunities without spammy drive-by contributions.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        packagePath: { type: 'string', description: 'Path to package.json used to discover dependencies.' },
+        dependencies: { type: 'array', items: { type: 'string' }, description: 'Dependency names to scout instead of package.json.' },
+        maxRepos: { type: 'number', description: 'Maximum mapped repositories to include.' },
+        includeBounties: { type: 'boolean', description: 'Include bug-bounty and security search queries.' },
+      },
+    },
+  }),
+  readOnlyTool({
+    name: 'plan_chatgpt_ads_readiness',
+    description: 'Prepare ThumbGate intent clusters, ad copy, proof links, UTM measurement, and launch gates for ChatGPT Ads Manager tests.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        offer: { type: 'string', description: 'Offer to advertise, such as Pro or Workflow Hardening Sprint.' },
+        audience: { type: 'string', description: 'Audience segment to target.' },
+        budget: { type: 'number', description: 'Initial test budget.' },
+        keywords: { type: 'array', items: { type: 'string' }, description: 'High-intent conversational queries.' },
+        proofLinks: { type: 'array', items: { type: 'string' }, description: 'Proof URLs required by ad claims.' },
+      },
+    },
+  }),
   destructiveTool({
     name: 'import_document',
     description: 'Import a local policy or runbook document into ThumbGate, normalize it for search, and propose provenance-backed gate candidates.',