thumbgate 1.16.3 → 1.16.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/public/guide.html CHANGED
@@ -31,7 +31,7 @@
31
31
  "url": "https://thumbgate-production.up.railway.app"
32
32
  },
33
33
  "datePublished": "2026-03-27",
34
- "dateModified": "2026-03-27",
34
+ "dateModified": "2026-04-25",
35
35
  "mainEntityOfPage": "https://thumbgate-production.up.railway.app/guide",
36
36
  "about": [
37
37
  {"@type": "Thing", "name": "AI coding agents"},
@@ -184,8 +184,23 @@
184
184
  .comparison-table td:first-child { font-weight: 600; }
185
185
  .cta { display: inline-block; background: var(--cyan); color: #000; padding: 0.75rem 1.5rem; border-radius: 8px; text-decoration: none; font-weight: 600; margin: 1rem 0; }
186
186
  .cta:hover { opacity: 0.9; }
187
+ .cta-secondary { background: transparent; color: var(--text); border: 1px solid var(--border); margin-left: 0.75rem; }
188
+ .cta-secondary:hover { border-color: var(--cyan); color: var(--cyan); }
187
189
  .breadcrumb { color: var(--muted); font-size: 0.85rem; margin-bottom: 0.5rem; }
188
190
  .breadcrumb a { color: var(--muted); }
191
+ .proof-links { display: grid; gap: 0.85rem; margin: 1.25rem 0 0; }
192
+ .proof-links a {
193
+ display: block;
194
+ padding: 0.9rem 1rem;
195
+ border: 1px solid var(--border);
196
+ border-radius: 10px;
197
+ text-decoration: none;
198
+ color: var(--text);
199
+ background: #111113;
200
+ }
201
+ .proof-links a strong { color: var(--cyan); display: block; margin-bottom: 0.2rem; }
202
+ .proof-links a span { color: var(--muted); font-size: 0.95rem; }
203
+ .buyer-paths { margin-top: 1rem; }
189
204
  @media (max-width: 600px) { h1 { font-size: 1.6rem; } .container { padding: 1rem; } }
190
205
  </style>
191
206
  </head>
@@ -306,10 +321,39 @@ npx thumbgate init --agent gemini</code></pre>
306
321
  <h3>Agent uses wrong API endpoint</h3>
307
322
  <p>Give a thumbs-down: "called staging API in production code." The check blocks tool calls that reference staging URLs in production contexts.</p>
308
323
 
324
+ <h2>When ThumbGate becomes a paid decision</h2>
325
+ <p>Stay on the free install path while one operator is proving the workflow locally. The paid motion starts when a workflow owner asks for proof, shared enforcement, or a safer rollout path.</p>
326
+ <div class="card buyer-paths">
327
+ <h3>Three honest next steps</h3>
328
+ <ul>
329
+ <li><strong>Free:</strong> use <code>npx thumbgate init</code> to prove one workflow on one machine.</li>
330
+ <li><strong>Pro:</strong> buy the self-serve lane only when you want a personal local dashboard, DPO export, and proof-ready workflow review for the next risky flow.</li>
331
+ <li><strong>Workflow Hardening Sprint:</strong> use the team intake path once one workflow, one owner, and one repeated failure are already clear.</li>
332
+ </ul>
333
+ </div>
334
+
335
+ <h2>Proof before a buyer says yes</h2>
336
+ <p>The repo sales plan is proof-led, not hype-led. Commercial claims stay anchored to the current truth file, and engineering claims stay anchored to verification evidence and machine-readable proof reports.</p>
337
+ <div class="proof-links">
338
+ <a href="https://github.com/IgorGanapolsky/ThumbGate/blob/main/docs/COMMERCIAL_TRUTH.md" target="_blank" rel="noopener">
339
+ <strong>Commercial Truth</strong>
340
+ <span>Current pricing, traction guardrails, and what the product can honestly claim today.</span>
341
+ </a>
342
+ <a href="https://github.com/IgorGanapolsky/ThumbGate/blob/main/docs/VERIFICATION_EVIDENCE.md" target="_blank" rel="noopener">
343
+ <strong>Verification Evidence</strong>
344
+ <span>Human-readable proof log for the engineering and workflow claims used across the site.</span>
345
+ </a>
346
+ <a href="https://github.com/IgorGanapolsky/ThumbGate/blob/main/proof/automation/report.json" target="_blank" rel="noopener">
347
+ <strong>Automation Proof</strong>
348
+ <span>Machine-readable report for the feedback, enforcement, and automation surfaces behind ThumbGate.</span>
349
+ </a>
350
+ </div>
351
+
309
352
  <h2>Get Started</h2>
310
353
  <pre><code>npx thumbgate init</code></pre>
311
354
  <p>One command. Works with Claude Code, Cursor, Codex, Gemini, Amp, and OpenCode. Claude Code can also call Codex for review, adversarial review, and second-pass handoffs through the repo-local bridge plugin.</p>
312
355
  <a href="https://thumbgate-production.up.railway.app/checkout/pro?utm_source=guide&utm_medium=cta_button&utm_campaign=pro_pack" class="cta">Get Pro — $19/mo or $149/yr</a>
356
+ <a href="https://thumbgate-production.up.railway.app/#workflow-sprint-intake" class="cta cta-secondary">Start a Workflow Hardening Sprint</a>
313
357
  <p style="color:var(--muted); font-size:0.85rem;">Free keeps local enforcement with 3 daily feedback captures, 5 lesson searches, unlimited recall, blocking, and history-aware lesson distillation. Pro is $19/mo or $149/yr for a personal local dashboard and DPO export. Team rollout starts intake-first at $49/seat/mo with a 3-seat minimum for the hosted shared lesson DB, org dashboard, and generated review views.</p>
314
358
 
315
359
  </div>
package/public/index.html CHANGED
@@ -24,7 +24,7 @@ __GOOGLE_SITE_VERIFICATION_META__
24
24
  <link rel="apple-touch-icon" href="/assets/brand/thumbgate-mark.svg">
25
25
  <meta property="og:image" content="/og.png">
26
26
  <title>ThumbGate — Stop paying for the same AI mistake twice</title>
27
- <meta name="description" content="Stop paying for the same AI mistake twice. ThumbGate is the enforcement layer for AI agent orchestration: 👍 thumbs up and 👎 thumbs down become history-aware lessons, shared lessons and org visibility, plus Pre-Action Checks that block repeat mistakes before the next tool call across Claude Code, Cursor, Codex, Gemini, Amp, Cline, and OpenCode.">
27
+ <meta name="description" content="Stop paying for the same AI mistake twice. ThumbGate is machine-speed pre-action defense for AI coding agents: 👍 thumbs up and 👎 thumbs down become history-aware lessons, shared lessons and org visibility, actionable remediations, agent surface inventory, and Pre-Action Checks that block repeat mistakes before the next tool call across Claude Code, Cursor, Codex, Gemini, Amp, Cline, and OpenCode.">
28
28
  <meta property="og:title" content="ThumbGate — Stop paying for the same AI mistake twice">
29
29
  <meta property="og:description" content="Frontier LLMs are expensive, opaque, and unreliable in production. ThumbGate gates risky agent actions before they run: workflow shape, inspection evidence, token budget, and repeated-failure memory in one pre-action check.">
30
30
  <meta property="og:type" content="website">
@@ -53,7 +53,7 @@ __GA_BOOTSTRAP__
53
53
  "@type": "SoftwareApplication",
54
54
  "name": "ThumbGate",
55
55
  "alternateName": "thumbgate",
56
- "description": "ThumbGate stops you from paying for the same AI mistake twice. Frontier LLMs are expensive, opaque, and unreliable in production every repeated hallucination, retry loop, or known-bad tool call burns more tokens. ThumbGate's Pre-Action Checks inspect workflow shape, environment evidence, budget, and repeated-failure memory before the action runs. Works with Claude Code, Cursor, Codex, Gemini, Amp, Cline, OpenCode, and any MCP-compatible agent.",
56
+ "description": "ThumbGate stops you from paying for the same AI mistake twice. It is machine-speed pre-action defense for coding agents: thumbs-up/down feedback becomes history-aware lessons, shared lessons and org visibility, actionable remediations, agent surface inventory, and Pre-Action Checks that inspect workflow shape, environment evidence, budget, and repeated-failure memory before the next tool call across Claude Code, Cursor, Codex, Gemini, Amp, Cline, OpenCode, and any MCP-compatible agent.",
57
57
  "applicationCategory": "DeveloperApplication",
58
58
  "operatingSystem": "Cross-platform, Node.js >=18.18.0",
59
59
  "license": "https://opensource.org/licenses/MIT",
@@ -70,7 +70,9 @@ __GA_BOOTSTRAP__
70
70
  "Prevent expensive AI mistakes — catch bad commands, destructive database actions, unsafe publishes, and risky API calls before execution",
71
71
  "Make AI stop repeating mistakes — thumbs-down feedback becomes history-aware lessons and Pre-Action Checks",
72
72
  "Turn AI into a reliable operator — checkpoint risky actions, enforce safe patterns, and keep proof of what changed",
73
- "ThumbGate GPT for ChatGPT check proposed agent actions, capture thumbs-up/down lessons, and route users into local enforcement",
73
+ "Agent surface inventorysee which tools, MCP surfaces, and policy sources are actually active before rollout",
74
+ "Actionable remediations — rank the next highest-ROI fixes from real feedback and risk pressure",
75
+ "ThumbGate GPT for ChatGPT — preflight risky commands, refunds, deploys, and PR actions, capture typed thumbs-up/down lessons, and route users into local enforcement",
74
76
  "Workflow Sentinel — score blast radius before PR, merge, release, and publish actions fire",
75
77
  "Workflow architecture checks — distinguish predefined workflows, parallel fan-out, and open-ended agents before execution",
76
78
  "Environment inspection evidence — require read-before-write, screenshots, API response checks, tests, or output validation for open-ended agent loops",
@@ -572,9 +574,9 @@ __GA_BOOTSTRAP__
572
574
  <section class="hero">
573
575
  <div class="container">
574
576
  <div class="hero-thumbs">👍👎</div>
575
- <div class="hero-badge">● Your AI coding bill has a leak</div>
577
+ <div class="hero-badge">● Machine-speed pre-action defense for coding agents</div>
576
578
  <h1>Stop paying $ for the same AI mistake.</h1>
577
- <p style="font-size:18px;color:var(--text-muted);max-width:720px;margin:0 auto 20px;line-height:1.6;">Every retry loop, every hallucinated import, every "let me try a different approach" — those are billable tokens on every LLM vendor's bill. Thumbs-down once; ThumbGate blocks that exact mistake on every future call. Across Claude Code, Cursor, Codex, Gemini, Amp, Cline, OpenCode — any MCP-compatible agent, forever, including fast-moving vibe coding workflows.</p>
579
+ <p style="font-size:18px;color:var(--text-muted);max-width:720px;margin:0 auto 20px;line-height:1.6;">Every retry loop, every hallucinated import, every "let me try a different approach" — those are billable tokens on every LLM vendor's bill. ThumbGate is machine-speed pre-action defense: thumbs-down once, block that exact mistake on every future call, surface the next highest-ROI remediation, and show which agent surfaces are actually active before rollout. Across Claude Code, Cursor, Codex, Gemini, Amp, Cline, OpenCode — any MCP-compatible agent, forever, including fast-moving vibe coding workflows.</p>
578
580
  <p style="font-size:15px;color:var(--text-dim);max-width:760px;margin:0 auto 24px;line-height:1.6;">As desktop agents move into parallel sessions, terminals, and production workflows, ThumbGate checks the thing benchmarks miss: is this next action a known workflow, an open-ended agent, a costly fan-out, or a blind tool call with no way to verify it worked?</p>
579
581
 
580
582
  <!-- HERO PRICING CARD — visible in first viewport so $19/mo and $149/yr never get buried -->
@@ -611,12 +613,12 @@ __GA_BOOTSTRAP__
611
613
  <span style="display:inline-flex;align-items:center;gap:6px;color:#4ade80;"><span style="width:6px;height:6px;border-radius:50%;background:#4ade80;box-shadow:0 0 8px #4ade80;animation:pulse 1.6s ease-in-out infinite;"></span>enforcing</span>
612
614
  </div>
613
615
  <div style="font-size:13px;color:var(--text-muted);margin-bottom:4px;">💸 Tokens saved — since install (Sonnet-blended, conservative)</div>
614
- <div id="hero-savings-counter" data-target="1247.82" style="font-size:44px;font-weight:700;color:#4ade80;letter-spacing:-0.02em;line-height:1;margin-bottom:18px;">$0.00</div>
616
+ <div id="hero-savings-counter" data-target="0" style="font-size:44px;font-weight:700;color:#4ade80;letter-spacing:-0.02em;line-height:1;margin-bottom:18px;">$0.00</div>
615
617
  <div style="font-size:12px;line-height:1.8;border-top:1px solid rgba(255,255,255,0.06);padding-top:12px;">
616
618
  <div style="color:#4ade80;">✅ check:no-force-push — blocked 12×</div>
617
619
  <div style="color:#4ade80;">✅ check:no-hallucinated-import — blocked 8×</div>
618
620
  <div style="color:#f87171;">❌ check:no-drop-prod — FIRED · saved ~$3.40</div>
619
- <div style="color:var(--text-muted);font-size:11px;margin-top:8px;">Sample shown. Your own dashboard tracks live feedback log + blocked calls from day one. <span style="color:var(--cyan);">Open dashboard →</span></div>
621
+ <div style="color:var(--text-muted);font-size:11px;margin-top:8px;">Sample shown. Your own dashboard tracks live feedback log, actionable remediations, and agent surface inventory from day one. <span style="color:var(--cyan);">Open dashboard →</span></div>
620
622
  </div>
621
623
  </a>
622
624
  <style>@keyframes pulse{0%,100%{opacity:1}50%{opacity:0.4}}</style>
@@ -632,7 +634,8 @@ __GA_BOOTSTRAP__
632
634
  </script>
633
635
  <div class="hero-signals">
634
636
  <a class="signal-pill signal-down" href="#how-it-works" title="See how check interception works">Block repeat hallucinations before the model sees them</a>
635
- <a class="signal-pill signal-up" href="#how-it-works" title="See the one-thumbs-down enforcement loop">Thumbs-down once, blocked forever, across every agent</a>
637
+ <a class="signal-pill signal-up" href="/dashboard" title="See the remediation and inventory dashboard">Thumbs-down once, blocked forever</a>
638
+ <a class="signal-pill" href="/dashboard" title="See the remediation and inventory dashboard">Actionable remediations + agent surface inventory</a>
636
639
  <a class="signal-pill" href="#install" title="Install the CLI">CLI-first workflow governance with a live tokens-saved counter</a>
637
640
  </div>
638
641
  <p class="hero-persona" style="display:none">For consultancies, platform teams, and AI product teams with one workflow owner, one repeated failure, and one buyer who needs proof before a wider rollout.</p>
@@ -784,15 +787,15 @@ __GA_BOOTSTRAP__
784
787
  <div class="container">
785
788
  <div class="gpt-panel">
786
789
  <div class="section-label" style="text-align:left;">ChatGPT Entry Point · Live ThumbGate GPT for ChatGPT</div>
787
- <h2>Open the GPT. Give typed thumbs feedback. Turn the lesson into a check.</h2>
788
- <p>ThumbGate should meet users where they already ask AI for help. The live GPT is the lowest-friction way to capture a useful thumbs-up/down lesson, check a risky action, and prove the enforcement loop before installing anything. As ChatGPT ads roll out, this matters more: ChatGPT can stay the discovery and checkpointing layer, while ThumbGate remains the hard execution boundary after <code>npx thumbgate init</code>.</p>
790
+ <h2>Use the GPT as a preflight desk for risky commands, refunds, deploys, and PR actions.</h2>
791
+ <p>ThumbGate should meet users where they already ask AI for help. The live GPT is the fastest way to preflight a risky action, capture a typed thumbs-up/down lesson, and prove the enforcement loop before installing anything. As ChatGPT ads roll out, this matters more: ChatGPT can stay the discovery and checkpointing layer, while ThumbGate remains the hard execution boundary after <code>npx thumbgate init</code>.</p>
789
792
  <div class="gpt-steps">
790
793
  <div class="gpt-step">
791
- <strong>1. Try the live GPT</strong>
792
- <p>Paste a proposed command, file edit, merge, deploy, or API call and ask whether to allow, block, or checkpoint it.</p>
794
+ <strong>1. Open the live GPT</strong>
795
+ <p>Paste a proposed command, file edit, merge, deploy, refund, invoice, or API call and ask whether to allow, block, or checkpoint it.</p>
793
796
  </div>
794
797
  <div class="gpt-step">
795
- <strong>2. Save the signal</strong>
798
+ <strong>2. Save the typed signal</strong>
796
799
  <p>Reply in chat with <code>thumbs up:</code> or <code>thumbs down:</code> plus one concrete sentence. Do not rely on ChatGPT's native rating buttons for ThumbGate memory.</p>
797
800
  </div>
798
801
  <div class="gpt-step">
@@ -805,7 +808,7 @@ __GA_BOOTSTRAP__
805
808
  <a href="https://github.com/IgorGanapolsky/ThumbGate/blob/main/adapters/chatgpt/INSTALL.md" class="btn-free" target="_blank" rel="noopener" style="display:inline-flex;align-items:center;padding:12px 20px;border-radius:8px;">ChatGPT Actions setup</a>
806
809
  <a href="/guides/chatgpt-ads-trust" class="btn-free" style="display:inline-flex;align-items:center;padding:12px 20px;border-radius:8px;">Why ChatGPT ads need checks</a>
807
810
  </div>
808
- <p class="gpt-note"><strong>Plain English rule:</strong> ChatGPT is the discovery and memory surface for advice, checkpointing, and typed feedback capture. One typed signal becomes one remembered rule. The hard Reliability Gateway still runs in the local agent or CI lane.</p>
811
+ <p class="gpt-note"><strong>Find it fast:</strong> if the direct link does not open, go to <strong>Explore GPTs</strong>, search <code>ThumbGate</code>, and choose the GPT by Igor Ganapolsky in <strong>Programming</strong>. <strong>Plain English rule:</strong> ChatGPT is the discovery and memory surface for advice, checkpointing, and typed feedback capture. One typed signal becomes one remembered rule. The hard Reliability Gateway still runs in the local agent or CI lane.</p>
809
812
  </div>
810
813
  </div>
811
814
  </section>
@@ -857,7 +860,7 @@ __GA_BOOTSTRAP__
857
860
  </a>
858
861
  <a class="compat-card" href="/go/gpt?utm_source=website&utm_medium=compatibility&utm_campaign=chatgpt_gpt&cta_id=compat_open_gpt&cta_placement=compatibility" target="_blank" rel="noopener">
859
862
  <h3>💬 ChatGPT GPT Actions</h3>
860
- <p>Open the ThumbGate GPT to check proposed AI actions, capture thumbs-up/down lessons, and get setup guidance. Real blocking for coding agents still runs locally after <code>npx thumbgate init</code>.</p>
863
+ <p>Open the ThumbGate GPT to preflight risky commands, deploys, refunds, PR actions, and setup steps, capture thumbs-up/down lessons, and save typed signals. Real blocking for coding agents still runs locally after <code>npx thumbgate init</code>.</p>
861
864
  <div class="card-arrow">Open ThumbGate GPT →</div>
862
865
  </a>
863
866
  </div>
@@ -1079,7 +1082,7 @@ __GA_BOOTSTRAP__
1079
1082
  <!-- HOW IT WORKS -->
1080
1083
  <section class="how-it-works" id="how-it-works">
1081
1084
  <div class="container">
1082
- <div class="section-label">New in v1.16.3</div>
1085
+ <div class="section-label">New in v1.16.5</div>
1083
1086
  <h2 class="section-title">Three steps to stop repeated AI failures</h2>
1084
1087
  <div class="steps">
1085
1088
  <div class="step">
@@ -1439,7 +1442,7 @@ __GA_BOOTSTRAP__
1439
1442
  <a href="https://www.linkedin.com/in/igorganapolsky" target="_blank" rel="noopener">LinkedIn</a>
1440
1443
  <a href="/blog">Blog</a>
1441
1444
  </div>
1442
- <span class="footer-copy">© 2026 Max Smith KDP LLC · MIT License · v1.16.3</span>
1445
+ <span class="footer-copy">© 2026 Max Smith KDP LLC · MIT License · v1.16.5</span>
1443
1446
  </div>
1444
1447
  </footer>
1445
1448
 
@@ -0,0 +1,229 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * Background Agent Governance — the missing layer for Ramp/Ona-style agent stacks.
6
+ *
7
+ * Background agents run unattended (writing 57% of PRs at Ramp). They need:
8
+ * 1. Run tracking — what did each agent run do?
9
+ * 2. Governance gate — should this PR/action be allowed based on past failures?
10
+ * 3. Post-run audit — auto-capture feedback from CI results
11
+ * 4. Governance report — "X runs, Y blocked, Z lessons learned"
12
+ *
13
+ * Integrates with: MCP server, gates engine, org dashboard, lesson inference.
14
+ */
15
+
16
+ const fs = require('fs');
17
+ const path = require('path');
18
+ const { resolveFeedbackDir } = require('./feedback-paths');
19
+ const { ensureParentDir, readJsonl } = require('./fs-utils');
20
+
21
+ const RUNS_FILE = 'agent-runs.jsonl';
22
+
23
+ function getFeedbackDir(feedbackDir) { return resolveFeedbackDir({ feedbackDir }); }
24
+ function getRunsPath(feedbackDir) { return path.join(getFeedbackDir(feedbackDir), RUNS_FILE); }
25
+
26
+ // ---------------------------------------------------------------------------
27
+ // 1. Run Tracking
28
+ // ---------------------------------------------------------------------------
29
+
30
+ /**
31
+ * Record a background agent run.
32
+ * Called when a background agent starts or completes a task.
33
+ */
34
+ function recordAgentRun({ agentId, runType, source, branch, prNumber, status, gatesChecked, gatesBlocked, filesChanged, ciPassed, duration, metadata } = {}, feedbackDir) {
35
+ const runsPath = getRunsPath(feedbackDir);
36
+ ensureParentDir(runsPath);
37
+ const run = {
38
+ id: `run_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
39
+ timestamp: new Date().toISOString(),
40
+ agentId: agentId || 'unknown',
41
+ runType: runType || 'unknown', // 'pr', 'fix', 'refactor', 'ci-repair', 'migration'
42
+ source: source || 'background', // 'background', 'triggered', 'scheduled', 'manual'
43
+ branch: branch || null,
44
+ prNumber: prNumber || null,
45
+ status: status || 'started', // 'started', 'completed', 'blocked', 'failed'
46
+ gatesChecked: gatesChecked || 0,
47
+ gatesBlocked: gatesBlocked || 0,
48
+ filesChanged: filesChanged || 0,
49
+ ciPassed: ciPassed === undefined ? null : ciPassed,
50
+ durationMs: duration || null,
51
+ metadata: metadata || {},
52
+ };
53
+ fs.appendFileSync(runsPath, JSON.stringify(run) + '\n');
54
+ return run;
55
+ }
56
+
57
+ // ---------------------------------------------------------------------------
58
+ // 2. Governance Gate — pre-run check
59
+ // ---------------------------------------------------------------------------
60
+
61
+ /**
62
+ * Check if a background agent run should proceed based on governance rules.
63
+ * Returns { allowed, blockers, warnings, governanceScore }.
64
+ */
65
+ function checkRunGovernance({ agentId, runType, branch, filesChanged } = {}, feedbackDir) {
66
+ const runs = readJsonl(getRunsPath(feedbackDir));
67
+ const blockers = [];
68
+ const warnings = [];
69
+
70
+ // Rule 1: Block if this agent has > 50% failure rate in last 10 runs
71
+ const agentRuns = runs.filter((r) => r.agentId === agentId).slice(-10);
72
+ const failedRuns = agentRuns.filter((r) => r.status === 'failed' || r.status === 'blocked');
73
+ if (agentRuns.length >= 5 && failedRuns.length / agentRuns.length > 0.5) {
74
+ blockers.push({ rule: 'high_failure_rate', message: `Agent ${agentId} has ${failedRuns.length}/${agentRuns.length} failed runs (>50%)`, severity: 'critical' });
75
+ }
76
+
77
+ // Rule 2: Warn if agent has been blocked by gates in recent runs
78
+ const recentBlocked = agentRuns.filter((r) => r.gatesBlocked > 0);
79
+ if (recentBlocked.length >= 3) {
80
+ warnings.push({ rule: 'repeated_gate_blocks', message: `Agent ${agentId} has been gate-blocked in ${recentBlocked.length} recent runs`, severity: 'warning' });
81
+ }
82
+
83
+ // Rule 3: Block if targeting protected branch without CI passing
84
+ if (branch && /^(main|master|develop)$/.test(branch)) {
85
+ warnings.push({ rule: 'protected_branch', message: `Run targets protected branch "${branch}" — CI must pass before merge`, severity: 'warning' });
86
+ }
87
+
88
+ // Rule 4: Warn if too many files changed (large blast radius)
89
+ if (filesChanged > 20) {
90
+ warnings.push({ rule: 'large_blast_radius', message: `${filesChanged} files changed — consider splitting into smaller PRs`, severity: 'warning' });
91
+ }
92
+
93
+ const governanceScore = Math.max(0, 100 - blockers.length * 40 - warnings.length * 10);
94
+
95
+ return {
96
+ allowed: blockers.length === 0,
97
+ blockers,
98
+ warnings,
99
+ governanceScore,
100
+ checkedAt: new Date().toISOString(),
101
+ };
102
+ }
103
+
104
+ // ---------------------------------------------------------------------------
105
+ // 3. Post-Run Audit — auto-capture feedback from CI
106
+ // ---------------------------------------------------------------------------
107
+
108
+ /**
109
+ * Auto-capture feedback from a completed background agent run.
110
+ * Converts CI pass/fail into structured feedback for the learning loop.
111
+ */
112
+ function auditCompletedRun({ runId, agentId, ciPassed, ciOutput, prNumber, branch, filesChanged } = {}, feedbackDir) {
113
+ const signal = ciPassed ? 'positive' : 'negative';
114
+ const context = ciPassed
115
+ ? `Background agent run ${runId || 'unknown'} completed successfully. PR #${prNumber || '?'} on ${branch || '?'}. ${filesChanged || 0} files changed. CI passed.`
116
+ : `Background agent run ${runId || 'unknown'} failed. PR #${prNumber || '?'} on ${branch || '?'}. ${filesChanged || 0} files changed. CI failed.`;
117
+
118
+ const whatWentWrong = !ciPassed && ciOutput ? ciOutput.slice(0, 500) : null;
119
+
120
+ // Record the completed run
121
+ const run = recordAgentRun({
122
+ agentId,
123
+ runType: 'pr',
124
+ source: 'background',
125
+ branch,
126
+ prNumber,
127
+ status: ciPassed ? 'completed' : 'failed',
128
+ filesChanged,
129
+ ciPassed,
130
+ }, feedbackDir);
131
+
132
+ // Auto-capture feedback
133
+ let feedbackResult = null;
134
+ try {
135
+ const { captureFeedback } = require('./feedback-loop');
136
+ feedbackResult = captureFeedback({
137
+ signal: ciPassed ? 'up' : 'down',
138
+ context,
139
+ whatWentWrong,
140
+ whatWorked: ciPassed ? `Agent successfully completed PR #${prNumber || '?'}` : undefined,
141
+ tags: ['background-agent', ciPassed ? 'ci-pass' : 'ci-fail', `agent:${agentId || 'unknown'}`],
142
+ });
143
+ } catch { /* feedback capture is non-critical */ }
144
+
145
+ return { run, feedbackResult, signal, context };
146
+ }
147
+
148
+ // ---------------------------------------------------------------------------
149
+ // 4. Governance Report
150
+ // ---------------------------------------------------------------------------
151
+
152
+ /**
153
+ * Generate a governance report for background agent runs.
154
+ * Shows: total runs, blocked, pass rate, top failing agents, lessons learned.
155
+ */
156
+ function generateGovernanceReport({ periodHours = 24, feedbackDir } = {}) {
157
+ const runs = readJsonl(getRunsPath(feedbackDir));
158
+ const cutoff = Date.now() - periodHours * 60 * 60 * 1000;
159
+ const recent = runs.filter((r) => new Date(r.timestamp).getTime() > cutoff);
160
+
161
+ const total = recent.length;
162
+ const completed = recent.filter((r) => r.status === 'completed').length;
163
+ const failed = recent.filter((r) => r.status === 'failed').length;
164
+ const blocked = recent.filter((r) => r.status === 'blocked').length;
165
+ const started = recent.filter((r) => r.status === 'started').length;
166
+
167
+ const passRate = (completed + failed) > 0 ? Math.round((completed / (completed + failed)) * 1000) / 10 : 0;
168
+ const totalGatesChecked = recent.reduce((s, r) => s + (r.gatesChecked || 0), 0);
169
+ const totalGatesBlocked = recent.reduce((s, r) => s + (r.gatesBlocked || 0), 0);
170
+
171
+ // Per-agent breakdown
172
+ const byAgent = {};
173
+ for (const r of recent) {
174
+ if (!byAgent[r.agentId]) byAgent[r.agentId] = { completed: 0, failed: 0, blocked: 0, total: 0 };
175
+ byAgent[r.agentId].total++;
176
+ if (r.status === 'completed') byAgent[r.agentId].completed++;
177
+ if (r.status === 'failed') byAgent[r.agentId].failed++;
178
+ if (r.status === 'blocked') byAgent[r.agentId].blocked++;
179
+ }
180
+
181
+ const agentSummaries = Object.entries(byAgent).map(([id, counts]) => ({
182
+ agentId: id,
183
+ ...counts,
184
+ passRate: (counts.completed + counts.failed) > 0 ? Math.round((counts.completed / (counts.completed + counts.failed)) * 1000) / 10 : 0,
185
+ })).sort((a, b) => a.passRate - b.passRate);
186
+
187
+ // By run type
188
+ const byType = {};
189
+ for (const r of recent) {
190
+ if (!byType[r.runType]) byType[r.runType] = 0;
191
+ byType[r.runType]++;
192
+ }
193
+
194
+ return {
195
+ periodHours,
196
+ total, completed, failed, blocked, started,
197
+ passRate,
198
+ gatesChecked: totalGatesChecked,
199
+ gatesBlocked: totalGatesBlocked,
200
+ agents: agentSummaries,
201
+ topFailingAgent: agentSummaries.length > 0 && agentSummaries[0].passRate < 80 ? agentSummaries[0] : null,
202
+ byType,
203
+ generatedAt: new Date().toISOString(),
204
+ };
205
+ }
206
+
207
+ /**
208
+ * Format governance report as a human-readable string.
209
+ */
210
+ function formatGovernanceReport(report) {
211
+ const lines = [
212
+ `Background Agent Governance Report (${report.periodHours}h)`,
213
+ `Total runs: ${report.total} | Completed: ${report.completed} | Failed: ${report.failed} | Blocked: ${report.blocked}`,
214
+ `Pass rate: ${report.passRate}%`,
215
+ `Gates checked: ${report.gatesChecked} | Gates blocked: ${report.gatesBlocked}`,
216
+ ];
217
+ if (report.topFailingAgent) {
218
+ lines.push(`Top failing agent: ${report.topFailingAgent.agentId} (${report.topFailingAgent.passRate}% pass rate)`);
219
+ }
220
+ if (Object.keys(report.byType).length > 0) {
221
+ lines.push(`Run types: ${Object.entries(report.byType).map(([t, c]) => `${t}:${c}`).join(', ')}`);
222
+ }
223
+ return lines.join('\n');
224
+ }
225
+
226
+ module.exports = {
227
+ recordAgentRun, checkRunGovernance, auditCompletedRun,
228
+ generateGovernanceReport, formatGovernanceReport, getRunsPath,
229
+ };