thumbgate 1.26.8 → 1.27.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/.well-known/agentic-verify.txt +1 -0
  3. package/.well-known/llms.txt +2 -0
  4. package/.well-known/mcp/server-card.json +1 -1
  5. package/README.md +44 -31
  6. package/adapters/claude/.mcp.json +2 -2
  7. package/adapters/gcp/dfcx-webhook-gate.js +295 -0
  8. package/adapters/mcp/server-stdio.js +41 -1
  9. package/adapters/opencode/opencode.json +1 -1
  10. package/bench/thumbgate-bench.json +2 -2
  11. package/bin/cli.js +184 -8
  12. package/bin/dashboard-cli.js +7 -0
  13. package/config/gate-classifier-routing.json +98 -0
  14. package/config/gate-templates.json +60 -0
  15. package/config/mcp-allowlists.json +8 -7
  16. package/config/model-candidates.json +71 -6
  17. package/package.json +28 -12
  18. package/public/about.html +162 -0
  19. package/public/chatgpt-app.html +330 -0
  20. package/public/codex-plugin.html +66 -14
  21. package/public/compare.html +2 -2
  22. package/public/dashboard.html +224 -36
  23. package/public/guide.html +2 -2
  24. package/public/index.html +122 -40
  25. package/public/learn.html +70 -0
  26. package/public/lessons.html +129 -6
  27. package/public/numbers.html +2 -2
  28. package/public/pricing.html +28 -23
  29. package/public/pro.html +3 -3
  30. package/scripts/agent-operations-planner.js +621 -0
  31. package/scripts/agent-reward-model.js +53 -1
  32. package/scripts/ai-component-inventory.js +367 -0
  33. package/scripts/classifier-routing.js +130 -0
  34. package/scripts/cli-schema.js +26 -0
  35. package/scripts/commercial-offer.js +10 -2
  36. package/scripts/dashboard-chat.js +199 -51
  37. package/scripts/feedback-sanitizer.js +105 -0
  38. package/scripts/gates-engine.js +301 -67
  39. package/scripts/hybrid-feedback-context.js +141 -7
  40. package/scripts/memory-scope-readiness.js +159 -0
  41. package/scripts/oss-pr-opportunity-scout.js +35 -5
  42. package/scripts/parallel-workflow-orchestrator.js +293 -0
  43. package/scripts/plausible-domain-config.js +86 -0
  44. package/scripts/plausible-server-events.js +4 -2
  45. package/scripts/proxy-pointer-rag-guardrails.js +42 -1
  46. package/scripts/qa-scenario-planner.js +136 -0
  47. package/scripts/rate-limiter.js +2 -2
  48. package/scripts/repeat-metric.js +28 -12
  49. package/scripts/secret-fixture-tokens.js +61 -0
  50. package/scripts/secret-scanner.js +44 -5
  51. package/scripts/security-scanner.js +80 -0
  52. package/scripts/seo-gsd.js +113 -0
  53. package/scripts/thumbgate-bench.js +16 -1
  54. package/scripts/tool-registry.js +37 -0
  55. package/scripts/workflow-sentinel.js +282 -54
  56. package/src/api/server.js +466 -60
  57. package/.claude-plugin/marketplace.json +0 -85
@@ -7,10 +7,10 @@
7
7
  // does NOT write to disk; it is a pure function over gates-engine.loadStats().
8
8
  //
9
9
  // The headline number is stats.recurringBlocks — incremented by recordStat()
10
- // in gates-engine.js every time the SAME gateId fires twice within one session
11
- // bucket. That is exactly "a pre-action gate fire that stopped a tool call the
12
- // agent had already been blocked on", i.e. a repeat attempt prevented before it
13
- // could round-trip and execute.
10
+ // in gates-engine.js every time the same gate blocks/warns the same sanitized
11
+ // action fingerprint within one session bucket. That is "a pre-action gate fire
12
+ // that stopped a tool call the agent had already been blocked on", rather than
13
+ // merely "the same noisy gate fired again."
14
14
  // ---------------------------------------------------------------------------
15
15
 
16
16
  const gatesEngine = require('./gates-engine');
@@ -18,12 +18,12 @@ const gatesEngine = require('./gates-engine');
18
18
  /**
19
19
  * Derive a per-gate { firstBlocks, repeatBlocks } split from the raw stats.
20
20
  *
21
- * recordStat() records, per session bucket, which gates have fired
22
- * (stats.sessionFiredGates[sessionKey][gateId] === true). The FIRST fire of a
23
- * gate in a bucket marks the flag; every subsequent fire in that same bucket
24
- * increments stats.recurringBlocks. So for each gate:
25
- * firstBlocks = number of distinct session buckets the gate fired in
26
- * repeatBlocks = (total block+warn events for the gate) - firstBlocks
21
+ * Modern stats record, per session bucket, which sanitized action fingerprints
22
+ * each gate fired on:
23
+ * stats.sessionFiredActions[sessionKey][gateId][fingerprint] === true
24
+ *
25
+ * firstBlocks is the count of distinct first action fingerprints. Legacy stats
26
+ * without fingerprints fall back to the old per-session-gate split.
27
27
  *
28
28
  * total block+warn events come from stats.byGate[id] (blocked + warned), which
29
29
  * recordStat() also maintains. repeatBlocks is clamped to >= 0 to stay robust
@@ -34,15 +34,30 @@ const gatesEngine = require('./gates-engine');
34
34
  */
35
35
  function computeByGateSplit(stats) {
36
36
  const byGate = {};
37
+ const sessionFiredActions = (stats && stats.sessionFiredActions) || {};
37
38
  const sessionFiredGates = (stats && stats.sessionFiredGates) || {};
38
39
  const rawByGate = (stats && stats.byGate) || {};
39
40
 
40
- // Count distinct session buckets each gate fired in => firstBlocks.
41
+ // Count distinct action fingerprints each gate fired on => firstBlocks.
41
42
  const firstBlocksByGate = {};
43
+ const gatesWithActionStats = new Set();
44
+ for (const sessionKey of Object.keys(sessionFiredActions)) {
45
+ const fired = sessionFiredActions[sessionKey] || {};
46
+ for (const gateId of Object.keys(fired)) {
47
+ const fingerprints = fired[gateId] || {};
48
+ const count = Object.values(fingerprints).filter(Boolean).length;
49
+ if (count > 0) {
50
+ gatesWithActionStats.add(gateId);
51
+ firstBlocksByGate[gateId] = (firstBlocksByGate[gateId] || 0) + count;
52
+ }
53
+ }
54
+ }
55
+
56
+ // Legacy fallback: old stats only tracked gate fired per session bucket.
42
57
  for (const sessionKey of Object.keys(sessionFiredGates)) {
43
58
  const fired = sessionFiredGates[sessionKey] || {};
44
59
  for (const gateId of Object.keys(fired)) {
45
- if (fired[gateId]) {
60
+ if (fired[gateId] && !gatesWithActionStats.has(gateId)) {
46
61
  firstBlocksByGate[gateId] = (firstBlocksByGate[gateId] || 0) + 1;
47
62
  }
48
63
  }
@@ -52,6 +67,7 @@ function computeByGateSplit(stats) {
52
67
  const gateIds = new Set([
53
68
  ...Object.keys(rawByGate),
54
69
  ...Object.keys(firstBlocksByGate),
70
+ ...Object.keys(sessionFiredActions).flatMap((sessionKey) => Object.keys(sessionFiredActions[sessionKey] || {})),
55
71
  ]);
56
72
 
57
73
  for (const gateId of gateIds) {
@@ -0,0 +1,61 @@
1
+ 'use strict';
2
+
3
+ const FIXTURE_TOKENS = {
4
+ awsAccessKeyId: '__TG_FIXTURE_AWS_ACCESS_KEY_ID__',
5
+ githubPat: '__TG_FIXTURE_GITHUB_PAT__',
6
+ openAiLegacyKey: '__TG_FIXTURE_OPENAI_LEGACY_KEY__',
7
+ openAiProjectKey: '__TG_FIXTURE_OPENAI_PROJECT_KEY__',
8
+ rsaPrivateKeyHeader: '__TG_FIXTURE_RSA_PRIVATE_KEY_HEADER__',
9
+ ecPrivateKeyHeader: '__TG_FIXTURE_EC_PRIVATE_KEY_HEADER__',
10
+ privateKeyHeader: '__TG_FIXTURE_PRIVATE_KEY_HEADER__',
11
+ };
12
+
13
+ function buildAwsAccessKeyId() {
14
+ return ['AKIA', 'IOSFODNN7EXAMPLE'].join('');
15
+ }
16
+
17
+ function buildGitHubPat() {
18
+ return ['gh', 'p_', 'x'.repeat(36)].join('');
19
+ }
20
+
21
+ function buildOpenAiLegacyKey() {
22
+ return ['sk', '-', 'abcdefghijklmnopqrstuvwxyz01234567890'].join('');
23
+ }
24
+
25
+ function buildOpenAiProjectKey() {
26
+ return ['sk', '-proj-', 'abcdefghijklmnopqrstuvwxyz01234567890'].join('');
27
+ }
28
+
29
+ function buildPemHeader(prefix = '') {
30
+ return ['-----BEGIN ', prefix, 'PRIVATE KEY-----'].join('');
31
+ }
32
+
33
+ function fixtureReplacements() {
34
+ return [
35
+ [FIXTURE_TOKENS.awsAccessKeyId, buildAwsAccessKeyId()],
36
+ [FIXTURE_TOKENS.githubPat, buildGitHubPat()],
37
+ [FIXTURE_TOKENS.openAiLegacyKey, buildOpenAiLegacyKey()],
38
+ [FIXTURE_TOKENS.openAiProjectKey, buildOpenAiProjectKey()],
39
+ [FIXTURE_TOKENS.rsaPrivateKeyHeader, buildPemHeader('RSA ')],
40
+ [FIXTURE_TOKENS.ecPrivateKeyHeader, buildPemHeader('EC ')],
41
+ [FIXTURE_TOKENS.privateKeyHeader, buildPemHeader('')],
42
+ ];
43
+ }
44
+
45
+ function expandFixturePlaceholders(value) {
46
+ let expanded = String(value || '');
47
+ for (const [token, replacement] of fixtureReplacements()) {
48
+ expanded = expanded.split(token).join(replacement);
49
+ }
50
+ return expanded;
51
+ }
52
+
53
+ module.exports = {
54
+ FIXTURE_TOKENS,
55
+ buildAwsAccessKeyId,
56
+ buildGitHubPat,
57
+ buildOpenAiLegacyKey,
58
+ buildOpenAiProjectKey,
59
+ buildPemHeader,
60
+ expandFixturePlaceholders,
61
+ };
@@ -55,6 +55,11 @@ const BASH_SECRET_READ_PREFIXES = [
55
55
  ];
56
56
 
57
57
  const EDIT_LIKE_TOOLS = new Set(['Edit', 'Write', 'MultiEdit']);
58
+ const SAFE_SECRET_STORAGE_DIRS = [
59
+ '.resume_secrets',
60
+ '.thumbgate/secrets',
61
+ '.config/thumbgate',
62
+ ];
58
63
 
59
64
  function redactText(text) {
60
65
  if (!text) return '';
@@ -172,6 +177,13 @@ function heuristicScanText(text, source = 'text') {
172
177
  pattern.regex.lastIndex = 0;
173
178
  let match = pattern.regex.exec(input);
174
179
  while (match) {
180
+ // Safe test key bypass
181
+ const matchedString = match[0].toLowerCase();
182
+ if (pattern.id === 'generic_assignment' && (matchedString.includes('sk_test_') || matchedString.includes('test_token'))) {
183
+ match = pattern.regex.exec(input);
184
+ continue;
185
+ }
186
+
175
187
  findings.push({
176
188
  id: pattern.id,
177
189
  label: pattern.label,
@@ -295,6 +307,26 @@ function resolvePathToken(token, cwd) {
295
307
  return path.join(cwd || process.cwd(), normalized);
296
308
  }
297
309
 
310
+ function normalizePathForPolicy(filePath) {
311
+ return path.resolve(String(filePath || '').replace(/^~(?=\/|$)/, os.homedir()));
312
+ }
313
+
314
+ function isSafeSecretStoragePath(filePath) {
315
+ if (!filePath) return false;
316
+ const normalized = normalizePathForPolicy(filePath);
317
+ const home = normalizePathForPolicy(os.homedir());
318
+ return SAFE_SECRET_STORAGE_DIRS.some((dir) => {
319
+ const allowedRoot = path.join(home, dir);
320
+ return normalized === allowedRoot || normalized.startsWith(`${allowedRoot}${path.sep}`);
321
+ });
322
+ }
323
+
324
+ function isSafeSecretStorageWrite(toolName, toolInput = {}, cwd = process.cwd()) {
325
+ if (!EDIT_LIKE_TOOLS.has(toolName)) return false;
326
+ const paths = getToolInputPaths(toolInput, cwd);
327
+ return paths.length > 0 && paths.every((filePath) => isSafeSecretStoragePath(filePath));
328
+ }
329
+
298
330
  function scanBashCommand(command, options = {}) {
299
331
  const cwd = options.cwd || process.cwd();
300
332
  const findings = [];
@@ -347,6 +379,7 @@ function scanHookInput(input = {}, options = {}) {
347
379
  let provider = resolveProvider(options.provider);
348
380
  let commandHash = null;
349
381
  let fileHashes = [];
382
+ const safeSecretStorageWrite = isSafeSecretStorageWrite(toolName, toolInput, cwd);
350
383
 
351
384
  const contentFields = [
352
385
  toolInput.content,
@@ -376,11 +409,13 @@ function scanHookInput(input = {}, options = {}) {
376
409
  }
377
410
  }
378
411
 
379
- for (const content of contentFields) {
380
- const result = scanText(content, { provider, source: 'tool_input' });
381
- if (result.detected) {
382
- provider = result.provider;
383
- findings.push(...result.findings);
412
+ if (!safeSecretStorageWrite) {
413
+ for (const content of contentFields) {
414
+ const result = scanText(content, { provider, source: 'tool_input' });
415
+ if (result.detected) {
416
+ provider = result.provider;
417
+ findings.push(...result.findings);
418
+ }
384
419
  }
385
420
  }
386
421
 
@@ -402,6 +437,8 @@ function buildSafeSummary(findings, prefix) {
402
437
  module.exports = {
403
438
  SECRET_PATTERNS,
404
439
  SECRET_FILE_PATTERNS,
440
+ SAFE_SECRET_STORAGE_DIRS,
441
+ EDIT_LIKE_TOOLS,
405
442
  redactText,
406
443
  resolveProvider,
407
444
  scanText,
@@ -409,6 +446,8 @@ module.exports = {
409
446
  scanBashCommand,
410
447
  scanHookInput,
411
448
  classifySecretPath,
449
+ isSafeSecretStoragePath,
450
+ isSafeSecretStorageWrite,
412
451
  buildSafeSummary,
413
452
  tokenizeCommand,
414
453
  };
@@ -146,6 +146,14 @@ const VULN_PATTERNS = [
146
146
  regex: /(?:unserialize|yaml\.load\s*\((?!.*Loader\s*=\s*yaml\.SafeLoader)|pickle\.loads?|Marshal\.load)/g,
147
147
  fileTypes: ['.js', '.ts', '.py', '.rb'],
148
148
  },
149
+ {
150
+ id: 'badhost-url-confusion',
151
+ category: 'host-header',
152
+ severity: 'high',
153
+ label: 'Potential BadHost-style host or URL confusion in AI service',
154
+ regex: /\b(?:request\.url(?:\.path)?|url_for\s*\([^)]*_external\s*=\s*True|headers\s*\[\s*['"](?:host|x-forwarded-host)['"]\s*\])/gi,
155
+ fileTypes: ['.py'],
156
+ },
149
157
  ];
150
158
 
151
159
  // ---------------------------------------------------------------------------
@@ -231,6 +239,22 @@ function scanCode(content, filePath = '') {
231
239
  };
232
240
  }
233
241
 
242
+ /**
243
+ * Scan Python / AI-service code for BadHost-style URL and host-header confusion.
244
+ * This is deliberately narrow and evidence-oriented: it does not claim a CVE,
245
+ * it flags code that should prove canonical host handling before deployment.
246
+ * @param {string} content
247
+ * @param {string} filePath
248
+ * @returns {{ detected: boolean, findings: Array<Object> }}
249
+ */
250
+ function scanBadHostExposure(content, filePath = '') {
251
+ const result = scanCode(content, filePath);
252
+ return {
253
+ detected: result.findings.some((finding) => finding.id === 'badhost-url-confusion'),
254
+ findings: result.findings.filter((finding) => finding.id === 'badhost-url-confusion'),
255
+ };
256
+ }
257
+
234
258
  /**
235
259
  * Scan dependency changes in package.json mutations.
236
260
  * @param {string} oldContent - Previous package.json content (empty string if new file)
@@ -503,6 +527,60 @@ function scanGitDiff(diffContent) {
503
527
  };
504
528
  }
505
529
 
530
+ function buildThreatDefensePlaybook(scanResult = {}, options = {}) {
531
+ const findings = Array.isArray(scanResult.findings)
532
+ ? scanResult.findings
533
+ : (scanResult.securityScan && Array.isArray(scanResult.securityScan.findings) ? scanResult.securityScan.findings : []);
534
+ const critical = findings.filter((finding) => finding.severity === 'critical');
535
+ const high = findings.filter((finding) => finding.severity === 'high');
536
+ const categories = Array.from(new Set(findings.map((finding) => finding.category).filter(Boolean)));
537
+ const hasFindings = findings.length > 0;
538
+ const hasPatchEvidence = Boolean(options.patchEvidence || options.testEvidence || options.ciEvidence);
539
+
540
+ return {
541
+ name: 'thumbgate-ai-threat-defense-playbook',
542
+ status: critical.length > 0 ? 'block' : high.length > 0 ? 'remediate' : 'monitor',
543
+ phases: [
544
+ {
545
+ id: 'prepare',
546
+ action: 'harden-foundation',
547
+ evidence: ['gate templates enabled', 'protected files configured', 'rollback path documented'],
548
+ required: true,
549
+ },
550
+ {
551
+ id: 'scan-prioritize',
552
+ action: hasFindings ? 'prioritize detected security findings by severity and exploit surface' : 'keep posture scan active',
553
+ evidence: categories.length ? categories : ['clean scan'],
554
+ required: true,
555
+ },
556
+ {
557
+ id: 'remediate',
558
+ action: hasFindings ? 'patch, run focused tests, and re-scan before allowing risky agent actions' : 'no remediation required from current scan',
559
+ evidence: hasPatchEvidence ? ['patch evidence present'] : ['patch diff', 'focused test output', 'repeat scan'],
560
+ required: hasFindings,
561
+ },
562
+ {
563
+ id: 'monitor',
564
+ action: 'record audit event and keep continuous detection enabled for future tool calls',
565
+ evidence: ['audit trail event', 'gate stats', 'review checkpoint'],
566
+ required: true,
567
+ },
568
+ ],
569
+ priority: {
570
+ critical: critical.length,
571
+ high: high.length,
572
+ total: findings.length,
573
+ categories,
574
+ },
575
+ gateDecision: critical.length > 0 ? 'deny' : high.length > 0 ? 'warn' : 'allow',
576
+ nextActions: critical.length > 0
577
+ ? ['Block the action', 'Patch the critical finding', 'Run focused tests', 'Re-scan the diff before retry']
578
+ : high.length > 0
579
+ ? ['Warn the operator', 'Create a remediation task', 'Run focused tests', 'Monitor for repeat findings']
580
+ : ['Keep continuous scan enabled', 'Review checkpoint metrics after the next session'],
581
+ };
582
+ }
583
+
506
584
  // ---------------------------------------------------------------------------
507
585
  // Exports
508
586
  // ---------------------------------------------------------------------------
@@ -512,7 +590,9 @@ module.exports = {
512
590
  VULN_PATTERNS,
513
591
  SUPPLY_CHAIN_PATTERNS,
514
592
  scanCode,
593
+ scanBadHostExposure,
515
594
  scanDependencyChange,
516
595
  evaluateSecurityScan,
517
596
  scanGitDiff,
597
+ buildThreatDefensePlaybook,
518
598
  };
@@ -394,6 +394,117 @@ function buildSemanticPseoGuide() {
394
394
  });
395
395
  }
396
396
 
397
+ const ZERO_TRUST_GUIDE_SPEC = Object.freeze({
398
+ slug: 'ai-coding-agent-zero-trust',
399
+ meta: {
400
+ query: 'zero trust for ai coding agents',
401
+ title: 'Zero Trust for AI Coding Agents | Enforce It at the Tool Call',
402
+ heroTitle: 'Zero Trust for AI Coding Agents, Enforced at the Tool Call',
403
+ heroSummary: 'Zero trust for agents means never trust, always verify; least privilege; assume breach. ThumbGate is the local-first way to enforce those principles for Claude Code, Cursor, and Codex — blocking dangerous tool calls before they run, and turning every thumbs-down into a prevention rule so the same mistake never repeats.',
404
+ },
405
+ takeaways: [
406
+ 'Zero trust for agents means verifying every action at the boundary where it executes — the tool call — instead of trusting the model’s stated intent.',
407
+ 'ThumbGate runs in the PreToolUse hook on your machine: rm -rf, secret writes, off-scope edits, and bad git push are blocked before execution (assume breach, least privilege).',
408
+ 'Unlike static DIY hooks, ThumbGate learns — a thumbs-down becomes an auto-promoted prevention rule that holds across every session, model, and agent.',
409
+ ],
410
+ sections: [
411
+ ['paragraphs', 'Why AI coding agents need zero trust at the tool call', [
412
+ 'A coding agent reads files, runs shell commands, calls APIs, and pushes code with minimal human approval at each step. If it is manipulated, misconfigured, or simply wrong, the blast radius is whatever it can execute — and unlike a human, it does not pause to question a suspicious request.',
413
+ 'Zero-trust security for agents adapts three principles to this reality: never trust, always verify; least privilege; and assume breach. The practical place to apply them is the action boundary — the moment before a tool call runs — not the model’s prompt or its good intentions.',
414
+ ]],
415
+ ['bullets', 'ThumbGate vs. rolling your own Claude Code hooks', [
416
+ 'Static hooks and community repos do pattern-matching you write and maintain by hand, per machine, per project. ThumbGate ships the same blocking and adds a learning layer on top.',
417
+ 'A thumbs-down on a bad action becomes an auto-promoted prevention rule — the repeat is blocked automatically next time, on every session and every agent, with zero extra config.',
418
+ 'Local-first: enforcement runs in the PreToolUse hook on the developer machine, not a server-side gateway, so it works the moment you npx thumbgate init.',
419
+ 'Works across Claude Code, Cursor, Codex, Gemini, Amp, Cline, and OpenCode — one rule set, every MCP-compatible agent.',
420
+ ]],
421
+ ['paragraphs', 'How ThumbGate maps to the zero-trust principles', [
422
+ 'Never trust, always verify: every high-risk tool call is checked against prevention rules and workflow shape before it executes. Least privilege: task scope and approval gates keep an agent inside its declared blast radius. Assume breach: dangerous commands are blocked before they touch the disk, so a compromised or confused agent cannot do damage on the way to being caught.',
423
+ 'This is enforcement, not observability. ThumbGate decides at the tool call whether the action runs — which is exactly where zero-trust controls have to live for autonomous agents.',
424
+ ]],
425
+ ],
426
+ faq: [
427
+ [
428
+ 'Isn’t this just Claude Code’s built-in hooks?',
429
+ 'Native hooks and community repos do static pattern-matching that you author and maintain per machine. ThumbGate adds the learning layer: a thumbs-down becomes a prevention rule that blocks the repeat automatically, across sessions and agents — the part static hooks cannot do.',
430
+ ],
431
+ [
432
+ 'How does ThumbGate enforce zero trust for AI agents?',
433
+ 'It applies the core principles at the tool-call boundary on your machine: never trust, always verify (every risky action is checked before it runs), least privilege (task scope and approval gates), and assume breach (dangerous calls are blocked before they touch disk).',
434
+ ],
435
+ ],
436
+ relatedPaths: ['/guides/pre-action-checks', '/guides/agent-harness-optimization'],
437
+ });
438
+
439
+ function buildZeroTrustGuide() {
440
+ return preActionGuide(ZERO_TRUST_GUIDE_SPEC.slug, {
441
+ ...ZERO_TRUST_GUIDE_SPEC.meta,
442
+ takeaways: ZERO_TRUST_GUIDE_SPEC.takeaways,
443
+ sections: ZERO_TRUST_GUIDE_SPEC.sections.map(([kind, heading, entries]) => buildSectionFromSpec(kind, heading, entries)),
444
+ faq: ZERO_TRUST_GUIDE_SPEC.faq.map(([question, text]) => answer(question, text)),
445
+ relatedPaths: ZERO_TRUST_GUIDE_SPEC.relatedPaths,
446
+ });
447
+ }
448
+
449
+ const GOVERN_CLAUDE_FOR_LEGAL_GUIDE_SPEC = Object.freeze({
450
+ slug: 'govern-claude-for-legal-agents',
451
+ meta: {
452
+ query: 'govern claude for legal agents',
453
+ title: 'Govern Claude for Legal Agents | A Gate Before They Act',
454
+ heroTitle: 'Govern Claude for Legal’s 90+ Agents at the Tool Call',
455
+ heroSummary: 'Claude for Legal ships 90+ named agents that review contracts, answer DSARs, and run continuously on document and email streams. Anthropic’s own guidance is that there must be a gate before anything is filed, sent, or relied on. ThumbGate is that gate — it checks each agent action at the tool-call boundary, in your tenant, and logs every decision for the record.',
456
+ },
457
+ takeaways: [
458
+ 'Claude for Legal’s agents take real side effects — sending a DSAR response, filing a document, writing to a system of record. ThumbGate gates the action before the side effect runs, not after, on a dashboard.',
459
+ 'Intent-agnostic: whether an agent is wrong, prompt-injected, or off-playbook, ThumbGate blocks the same way and records the rule that fired. The risk is not a “rogue” agent — it is an ordinary one acting at volume.',
460
+ 'Every gated decision is logged with its source rule — a SIEM-exportable audit trail your ethics, risk, and conflicts owners can query.',
461
+ ],
462
+ sections: [
463
+ ['paragraphs', 'Why 90+ legal agents need a gate before the side effect', [
464
+ 'A firm running Claude for Legal now has dozens of agents acting on ongoing document and email streams — vendor-agreement review, termination review, DSAR responses, claim charts. No one can review every action by hand. The risk is not malice; it is an ordinary agent that sends the wrong response, files against the wrong playbook, or surfaces a privileged document.',
465
+ 'Anthropic’s own framing names the control: an explicit gate before anything is filed, sent, or relied on. ThumbGate implements that gate at the tool-call boundary — the moment before the action executes — instead of trusting the agent’s stated intent.',
466
+ ]],
467
+ ['bullets', 'What ThumbGate gates for legal agents', [
468
+ 'The send/file/write action itself — e.g. a DSAR or client response before it leaves, a filing before it goes out, a write to a conflicted matter — held or blocked at the boundary.',
469
+ 'Playbook deviations — an action that departs from the firm’s approved workflow is stopped for review rather than executed.',
470
+ 'Privileged-document exposure — flagged before an agent surfaces or forwards it.',
471
+ 'Continuous runs — one rule set covers every agent and every scheduled run, so coverage scales with agent count, not headcount.',
472
+ ]],
473
+ ['paragraphs', 'Enforcement in your tenant, with an audit trail', [
474
+ 'ThumbGate runs as a pre-action gate in front of agent fulfillment, including a Dialogflow CX webhook gate deployed in your own GCP tenant, so matter content does not leave your boundary. Risk and planning scoring can run on Gemini via Vertex, in-tenant. This is a white-glove design-partner pilot, not a turnkey product purchase.',
475
+ 'Every gated detection is logged with the rule that fired and the feedback event that generated it. That decision trail is the evidence a firm needs for malpractice defense and bar-compliance review — queryable, exportable, and tied to a named owner.',
476
+ ]],
477
+ ['paragraphs', 'ThumbGate complements Claude for Legal — it does not replace it', [
478
+ 'Claude for Legal decides what the work is. ThumbGate decides what is allowed to execute. Use both: keep the 90+ agents doing the legal work, and put a gate between each agent and its next side effect. A thumbs-down on a bad action becomes a prevention rule, so the same mistake is blocked across every agent and matter next time.',
479
+ ]],
480
+ ],
481
+ faq: [
482
+ [
483
+ 'Does ThumbGate replace Claude for Legal?',
484
+ 'No. Claude for Legal’s agents do the legal work; ThumbGate governs what they are allowed to execute — a gate before anything is filed, sent, or relied on. You run both.',
485
+ ],
486
+ [
487
+ 'Where does the gate run?',
488
+ 'In your tenant. ThumbGate gates agent fulfillment locally or via a Dialogflow CX webhook gate in your own GCP project; matter content does not leave your boundary, and Vertex/Gemini scoring runs in-tenant. It is a white-glove design-partner pilot, not a turnkey purchase.',
489
+ ],
490
+ [
491
+ 'What proof does a firm get?',
492
+ 'Every gated decision is logged with the rule that fired and the feedback that generated it — a SIEM-exportable audit trail for ethics, risk, and conflicts owners.',
493
+ ],
494
+ ],
495
+ relatedPaths: ['/guides/ai-coding-agent-zero-trust', '/guides/pre-action-checks'],
496
+ });
497
+
498
+ function buildGovernClaudeForLegalGuide() {
499
+ return preActionGuide(GOVERN_CLAUDE_FOR_LEGAL_GUIDE_SPEC.slug, {
500
+ ...GOVERN_CLAUDE_FOR_LEGAL_GUIDE_SPEC.meta,
501
+ takeaways: GOVERN_CLAUDE_FOR_LEGAL_GUIDE_SPEC.takeaways,
502
+ sections: GOVERN_CLAUDE_FOR_LEGAL_GUIDE_SPEC.sections.map(([kind, heading, entries]) => buildSectionFromSpec(kind, heading, entries)),
503
+ faq: GOVERN_CLAUDE_FOR_LEGAL_GUIDE_SPEC.faq.map(([question, text]) => answer(question, text)),
504
+ relatedPaths: GOVERN_CLAUDE_FOR_LEGAL_GUIDE_SPEC.relatedPaths,
505
+ });
506
+ }
507
+
397
508
  const PROXY_POINTER_RAG_GUARDRAILS_SPEC = Object.freeze({
398
509
  slug: 'proxy-pointer-rag-guardrails',
399
510
  meta: {
@@ -1536,6 +1647,8 @@ const PAGE_BLUEPRINTS = [
1536
1647
  relatedPaths: ['/compare/speclock', '/guides/claude-code-feedback'],
1537
1648
  },
1538
1649
  buildSemanticPseoGuide(),
1650
+ buildZeroTrustGuide(),
1651
+ buildGovernClaudeForLegalGuide(),
1539
1652
  buildProxyPointerRagGuide(),
1540
1653
  buildRagPrecisionTuningGuide(),
1541
1654
  buildAiEngineeringStackGuide(),
@@ -4,6 +4,7 @@
4
4
  const fs = require('node:fs');
5
5
  const os = require('node:os');
6
6
  const path = require('node:path');
7
+ const { expandFixturePlaceholders } = require('./secret-fixture-tokens');
7
8
 
8
9
  const ROOT = path.join(__dirname, '..');
9
10
  const DEFAULT_SUITE_PATH = path.join(ROOT, 'bench', 'thumbgate-bench.json');
@@ -180,6 +181,20 @@ function assertObject(value, label) {
180
181
  }
181
182
  }
182
183
 
184
+ function expandScenarioFixturePlaceholders(value) {
185
+ if (typeof value === 'string') return expandFixturePlaceholders(value);
186
+ if (Array.isArray(value)) return value.map(expandScenarioFixturePlaceholders);
187
+ if (value && typeof value === 'object') {
188
+ return Object.fromEntries(
189
+ Object.entries(value).map(([key, nestedValue]) => [
190
+ key,
191
+ expandScenarioFixturePlaceholders(nestedValue),
192
+ ]),
193
+ );
194
+ }
195
+ return value;
196
+ }
197
+
183
198
  function loadScenarioSuite(filePath = DEFAULT_SUITE_PATH) {
184
199
  const suite = readJson(filePath);
185
200
  assertObject(suite, 'Scenario suite');
@@ -202,7 +217,7 @@ function loadScenarioSuite(filePath = DEFAULT_SUITE_PATH) {
202
217
  throw new Error(`Scenario ${id} has invalid expectedDecision`);
203
218
  }
204
219
  return {
205
- ...scenario,
220
+ ...expandScenarioFixturePlaceholders(scenario),
206
221
  id,
207
222
  unsafe: Boolean(scenario.unsafe),
208
223
  positivePattern: Boolean(scenario.positivePattern),
@@ -161,6 +161,19 @@ const TOOLS = [
161
161
  required: ['toolName'],
162
162
  },
163
163
  }),
164
+ readOnlyTool({
165
+ name: 'ai_component_inventory',
166
+ description: 'Scan a project for AI/ML provider SDKs, agent frameworks, vector databases, Vertex/Gemini/Dialogflow CX usage, and model artifacts. Returns evidence suitable for enterprise AI inventory and ML-BOM review.',
167
+ inputSchema: {
168
+ type: 'object',
169
+ properties: {
170
+ rootDir: { type: 'string', description: 'Project root to scan. Defaults to the current process working directory.' },
171
+ format: { type: 'string', enum: ['summary', 'json', 'cyclonedx'], description: 'Response format. summary is compact text; json returns ThumbGate inventory; cyclonedx returns ML-BOM JSON.' },
172
+ maxFiles: { type: 'number', description: 'Maximum files to scan (default 2500).' },
173
+ includeSnippets: { type: 'boolean', description: 'Include matched source snippets in evidence. Defaults true.' },
174
+ },
175
+ },
176
+ }),
164
177
  readOnlyTool({
165
178
  name: 'search_thumbgate',
166
179
  description: 'Search raw ThumbGate state across feedback logs, ContextFS memory, prevention rules, and imported policy documents.',
@@ -818,6 +831,17 @@ const TOOLS = [
818
831
  items: { type: 'string' },
819
832
  description: 'Optional protected-file globs that require explicit approval before editing or publishing',
820
833
  },
834
+ workflowContract: {
835
+ type: 'object',
836
+ description: 'Optional deterministic workflow run contract. Supports workflowId, allowedBranches, blockedActions, requiredEvidence, and completionGate.',
837
+ properties: {
838
+ workflowId: { type: 'string' },
839
+ allowedBranches: { type: 'array', items: { type: 'string' } },
840
+ blockedActions: { type: 'array', items: { type: 'string' } },
841
+ requiredEvidence: { type: 'array', items: { type: 'string' } },
842
+ completionGate: { type: 'string' },
843
+ },
844
+ },
821
845
  repoPath: { type: 'string', description: 'Optional repo root used when evaluating git diff scope' },
822
846
  localOnly: { type: 'boolean', description: 'When true, also marks the task as local-only' },
823
847
  clear: { type: 'boolean', description: 'Clear the current task scope instead of setting one' },
@@ -1460,6 +1484,19 @@ const TOOLS = [
1460
1484
  },
1461
1485
  },
1462
1486
  }),
1487
+ destructiveTool({
1488
+ name: 'parallel_workflow',
1489
+ description: 'Execute a parallel, multi-step subtask workflow to resolve an objective like a security audit, performance benchmark, or repository inspection.',
1490
+ inputSchema: {
1491
+ type: 'object',
1492
+ required: ['objective'],
1493
+ properties: {
1494
+ objective: { type: 'string', description: 'The objective to plan and execute (e.g. security audit, performance benchmark)' },
1495
+ concurrency: { type: 'number', description: 'Maximum parallel subtasks (default 3)' },
1496
+ timeoutMs: { type: 'number', description: 'Timeout in milliseconds (default 60000)' },
1497
+ },
1498
+ },
1499
+ }),
1463
1500
  ];
1464
1501
 
1465
1502
  // Normalize at export: guarantee EVERY tool carries a human-readable title and a