thumbgate 1.14.1 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.well-known/mcp/server-card.json +1 -1
  4. package/README.md +2 -1
  5. package/adapters/claude/.mcp.json +2 -2
  6. package/adapters/mcp/server-stdio.js +8 -1
  7. package/adapters/opencode/opencode.json +1 -1
  8. package/bin/cli.js +54 -0
  9. package/config/enforcement.json +59 -7
  10. package/config/gates/default.json +33 -0
  11. package/config/mcp-allowlists.json +4 -0
  12. package/config/merge-quality-checks.json +2 -1
  13. package/package.json +17 -5
  14. package/public/codex-plugin.html +7 -1
  15. package/public/dashboard.html +23 -2
  16. package/public/index.html +20 -2
  17. package/public/learn.html +39 -0
  18. package/public/lessons.html +25 -1
  19. package/public/numbers.html +271 -0
  20. package/public/pro.html +7 -1
  21. package/scripts/cli-feedback.js +2 -1
  22. package/scripts/cli-schema.js +43 -4
  23. package/scripts/commercial-offer.js +1 -1
  24. package/scripts/contextfs.js +214 -32
  25. package/scripts/feedback-loop.js +49 -5
  26. package/scripts/harness-selector.js +132 -0
  27. package/scripts/lesson-canonical.js +181 -0
  28. package/scripts/lesson-db.js +71 -10
  29. package/scripts/lesson-synthesis.js +23 -2
  30. package/scripts/native-messaging-audit.js +514 -0
  31. package/scripts/pr-manager.js +47 -7
  32. package/scripts/profile-router.js +16 -1
  33. package/scripts/rule-validator.js +285 -0
  34. package/scripts/seo-gsd.js +182 -2
  35. package/scripts/tool-registry.js +12 -0
  36. package/skills/thumbgate/SKILL.md +1 -1
  37. package/src/api/server.js +53 -0
  38. package/.claude-plugin/README.md +0 -170
  39. package/adapters/README.md +0 -12
  40. package/skills/agent-memory/SKILL.md +0 -97
  41. package/skills/solve-architecture-autonomy/SKILL.md +0 -17
  42. package/skills/solve-architecture-autonomy/tool.js +0 -33
  43. package/skills/thumbgate-feedback/SKILL.md +0 -49
@@ -0,0 +1,285 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * scripts/rule-validator.js
5
+ *
6
+ * Pre-promotion validation harness for synthesized prevention rules.
7
+ *
8
+ * Why this exists:
9
+ * Before this module, `synthesizePreventionRule` (lesson-synthesis.js) auto-
10
+ * promoted any lesson that hit the occurrence threshold straight into
11
+ * `synthesized-rules.jsonl` — no check that the proposed rule actually
12
+ * matches the mistake pattern it was synthesized from, and no check that
13
+ * it doesn't also fire on recent positive-signal events from overlapping
14
+ * tags. That's the exact failure mode Autogenesis
15
+ * (https://arxiv.org/abs/2604.15034) calls out: candidate improvements
16
+ * must be validated through testing before integration, otherwise static
17
+ * agents accumulate self-contradicting rules that degrade precision.
18
+ *
19
+ * We already had 3 of the 4 Autogenesis phases:
20
+ * - capability-gap identification (negative feedback events),
21
+ * - candidate generation (synthesizePreventionRule),
22
+ * - integration (append to synthesized-rules.jsonl).
23
+ * The missing phase was validation. This module fills it.
24
+ *
25
+ * Validation contract:
26
+ * A proposed rule is promotable iff:
27
+ * 1. It matches the seed lesson that triggered promotion (otherwise the
28
+ * rule is tautologically broken — it wouldn't catch the mistake it
29
+ * was built for).
30
+ * 2. Its precision on a recent-events sample clears a threshold
31
+ * (default 0.8) — of the events the rule fires on, most must carry
32
+ * the negative signal. A rule that blocks positive outcomes too is
33
+ * a regression, not a prevention.
34
+ *
35
+ * Recall is reported for operator visibility but does not gate
36
+ * promotion — an overly specific rule is less harmful than an overly
37
+ * broad one.
38
+ *
39
+ * Design notes:
40
+ * - Pure functions, no IO. Caller supplies the event samples so tests
41
+ * stay hermetic and the validator can run inside captureFeedback
42
+ * without reaching for the filesystem.
43
+ * - Token matching is deliberately simple (lowercase, punctuation strip,
44
+ * length-2+ tokens, all-tokens-present) so the behavior is debuggable
45
+ * from the console. We are not competing with NLP — we are gating a
46
+ * one-line trigger string against a handful of sibling events.
47
+ */
48
+
49
+ // Intentionally tiny stop list — we only drop noise that would erase the
50
+ // trigger's discriminative tokens. If a stop-word-only rule ever matches a
51
+ // positive event, that's a real false positive and we want to see it.
52
+ const STOP = new Set([
53
+ 'a', 'an', 'the', 'to', 'of', 'in', 'on', 'at', 'for', 'and', 'or',
54
+ 'is', 'are', 'was', 'were', 'be', 'do', 'does', 'did',
55
+ 'this', 'that', 'these', 'those',
56
+ 'it', 'its', 'i', 'you', 'we', 'they',
57
+ ]);
58
+
59
+ // Modality / negation words that `synthesizePreventionRule` commonly
60
+ // inherits from lesson titles like "MISTAKE: never force-push". We want
61
+ // these tokens to survive ordinary tokenize() output (they're legitimate
62
+ // English), but we strip them from a rule's trigger before matching so
63
+ // the rule still fires on events that describe the mistake without
64
+ // echoing the modality. They remain meaningful in haystack positions.
65
+ const TRIGGER_MODALITY = new Set(['never', 'always', 'ever', 'must', 'not', 'no']);
66
+
67
+ /**
68
+ * Strip a few common English suffixes so "force-pushed" in a bug report
69
+ * matches a trigger token "push". We are NOT doing Porter-grade stemming;
70
+ * the goal is just to keep morphological variants from silently breaking
71
+ * the matcher. Minimum 3-char stem preserved so "goes" → "goe" (harmless)
72
+ * but "is" / "as" stay intact.
73
+ */
74
+ function stem(token) {
75
+ if (token.length <= 3) return token;
76
+ if (token.endsWith('ing') && token.length > 5) return token.slice(0, -3);
77
+ if (token.endsWith('ed') && token.length > 4) return token.slice(0, -2);
78
+ if (token.endsWith('es') && token.length > 4) return token.slice(0, -2);
79
+ if (token.endsWith('s') && !token.endsWith('ss') && token.length > 3) {
80
+ return token.slice(0, -1);
81
+ }
82
+ return token;
83
+ }
84
+
85
+ function tokenize(text) {
86
+ if (text === null || text === undefined) return [];
87
+ return String(text)
88
+ .toLowerCase()
89
+ .replace(/[^a-z0-9\s]/g, ' ')
90
+ .split(/\s+/)
91
+ .filter((t) => t.length > 1 && !STOP.has(t))
92
+ .map(stem);
93
+ }
94
+
95
+ function eventText(event) {
96
+ if (!event || typeof event !== 'object') return '';
97
+ return [
98
+ event.title,
99
+ event.content,
100
+ event.whatToChange,
101
+ event.whatWentWrong,
102
+ event.whatWorked,
103
+ event.context,
104
+ ].filter(Boolean).join(' ');
105
+ }
106
+
107
+ function eventSignal(event) {
108
+ if (!event || typeof event !== 'object') return null;
109
+ const raw = event.signal;
110
+ if (!raw) return null;
111
+ const lower = String(raw).toLowerCase();
112
+ if (lower === 'up' || lower === 'positive') return 'positive';
113
+ if (lower === 'down' || lower === 'negative') return 'negative';
114
+ return lower;
115
+ }
116
+
117
+ /**
118
+ * Does `rule` fire on `event`? A rule fires when every content token of
119
+ * its trigger.condition appears in the event's combined text **in the
120
+ * same relative order** (subsequence match). An empty trigger never fires
121
+ * — that's a degenerate rule and we want the validator to reject it
122
+ * rather than silently match everything.
123
+ *
124
+ * Order matters because it's the cheapest way to distinguish
125
+ * "force-push to main caused incident" (trigger condition narrates the
126
+ * action) from "main branch healthy, no force push" (same tokens, wrong
127
+ * narrative). Without order we'd flag the second event as a false
128
+ * positive against every rule built on the same vocabulary.
129
+ */
130
+ function ruleMatches(rule, event) {
131
+ const trigger = rule && rule.rule && rule.rule.trigger && rule.rule.trigger.condition;
132
+ const rawTokens = tokenize(trigger);
133
+ const tokens = rawTokens.filter((t) => !TRIGGER_MODALITY.has(t));
134
+ if (tokens.length === 0) return false;
135
+
136
+ const haystack = tokenize(eventText(event));
137
+ let hi = 0;
138
+ for (const t of tokens) {
139
+ while (hi < haystack.length && haystack[hi] !== t) hi += 1;
140
+ if (hi >= haystack.length) return false;
141
+ hi += 1;
142
+ }
143
+ return true;
144
+ }
145
+
146
+ /**
147
+ * Count true-positive / false-positive / false-negative / true-negative
148
+ * firings on a sample. Tags are used to scope the sample — only events
149
+ * that share at least one tag with the rule are considered, on the premise
150
+ * that a rule about git force-push shouldn't be precision-scored against
151
+ * deploy-pipeline events it was never meant to see.
152
+ */
153
+ function scoreOnSample(rule, events, { scopeTags = null } = {}) {
154
+ const ruleTags = new Set((rule.tags || []).filter(Boolean).map((t) => String(t).toLowerCase()));
155
+ const scope = scopeTags ? new Set(scopeTags.map((t) => String(t).toLowerCase())) : null;
156
+
157
+ let tp = 0;
158
+ let fp = 0;
159
+ let fn = 0;
160
+ let tn = 0;
161
+
162
+ for (const event of Array.isArray(events) ? events : []) {
163
+ const tags = Array.isArray(event.tags)
164
+ ? event.tags.map((t) => String(t).toLowerCase())
165
+ : [];
166
+
167
+ // Out-of-scope events are ignored — they have nothing to say about
168
+ // this rule's precision.
169
+ if (scope && tags.length > 0 && !tags.some((t) => scope.has(t))) continue;
170
+ if (ruleTags.size > 0 && tags.length > 0 && !tags.some((t) => ruleTags.has(t))) continue;
171
+
172
+ const fires = ruleMatches(rule, event);
173
+ const signal = eventSignal(event);
174
+
175
+ if (signal === 'negative' && fires) tp += 1;
176
+ else if (signal === 'positive' && fires) fp += 1;
177
+ else if (signal === 'negative' && !fires) fn += 1;
178
+ else if (signal === 'positive' && !fires) tn += 1;
179
+ }
180
+
181
+ const firings = tp + fp;
182
+ const negatives = tp + fn;
183
+ return {
184
+ tp,
185
+ fp,
186
+ fn,
187
+ tn,
188
+ precision: firings > 0 ? tp / firings : null,
189
+ recall: negatives > 0 ? tp / negatives : null,
190
+ };
191
+ }
192
+
193
+ const DEFAULT_PRECISION_FLOOR = 0.8;
194
+ const DEFAULT_MIN_SAMPLE = 3;
195
+
196
+ /**
197
+ * Top-level validator. Returns a detailed report plus a boolean
198
+ * `shouldPromote`. The caller (feedback-loop) stamps the report onto the
199
+ * rule record so downstream operators can see why a rule was or wasn't
200
+ * promoted — silent rejection is worse than a rejected rule we can audit.
201
+ *
202
+ * Thresholds are overridable but the defaults are deliberately loose for
203
+ * Stage-1 rollout: precision ≥ 0.8, with a minimum of 3 sampled events in
204
+ * scope. Below the minimum sample, the validator promotes the rule but
205
+ * flags `reason: 'insufficient_sample'` so we don't starve the gate of new
206
+ * rules while feedback volume is still small.
207
+ */
208
+ function validateProposedRule(rule, {
209
+ seedLesson,
210
+ recentEvents = [],
211
+ precisionFloor = DEFAULT_PRECISION_FLOOR,
212
+ minSample = DEFAULT_MIN_SAMPLE,
213
+ } = {}) {
214
+ const report = {
215
+ shouldPromote: false,
216
+ reason: null,
217
+ matchesSeed: false,
218
+ precision: null,
219
+ recall: null,
220
+ sampleSize: 0,
221
+ tp: 0,
222
+ fp: 0,
223
+ fn: 0,
224
+ tn: 0,
225
+ };
226
+
227
+ if (!rule || !rule.rule) {
228
+ report.reason = 'invalid_rule_shape';
229
+ return report;
230
+ }
231
+
232
+ // Invariant 1: the rule must fire on the seed lesson. If it doesn't, the
233
+ // trigger extraction dropped the discriminative tokens and the rule is
234
+ // broken regardless of what the sample says.
235
+ report.matchesSeed = seedLesson ? ruleMatches(rule, seedLesson) : false;
236
+ if (!report.matchesSeed) {
237
+ report.reason = 'rule_does_not_match_seed_lesson';
238
+ return report;
239
+ }
240
+
241
+ // Invariant 2: precision on recent overlapping-tag events. We pass
242
+ // scopeTags = rule.tags so the scorer restricts to the same topical
243
+ // cluster as the rule.
244
+ const scoreReport = scoreOnSample(rule, recentEvents, { scopeTags: rule.tags });
245
+ Object.assign(report, scoreReport);
246
+ report.sampleSize = scoreReport.tp + scoreReport.fp + scoreReport.fn + scoreReport.tn;
247
+
248
+ if (report.sampleSize < minSample) {
249
+ // Permissive path: we can't prove harm, so allow promotion but flag
250
+ // the rule for later audit when more data accumulates.
251
+ report.shouldPromote = true;
252
+ report.reason = 'insufficient_sample';
253
+ return report;
254
+ }
255
+
256
+ if (report.precision === null) {
257
+ // Rule never fired on the in-scope sample. Still worth promoting
258
+ // because the seed invariant held — absence of firings just means
259
+ // this topic is quiet in recent history.
260
+ report.shouldPromote = true;
261
+ report.reason = 'no_firings_in_sample';
262
+ return report;
263
+ }
264
+
265
+ if (report.precision < precisionFloor) {
266
+ report.shouldPromote = false;
267
+ report.reason = 'precision_below_floor';
268
+ return report;
269
+ }
270
+
271
+ report.shouldPromote = true;
272
+ report.reason = 'validated';
273
+ return report;
274
+ }
275
+
276
+ module.exports = {
277
+ tokenize,
278
+ eventText,
279
+ eventSignal,
280
+ ruleMatches,
281
+ scoreOnSample,
282
+ validateProposedRule,
283
+ DEFAULT_PRECISION_FLOOR,
284
+ DEFAULT_MIN_SAMPLE,
285
+ };
@@ -43,6 +43,21 @@ const HIGH_ROI_QUERY_SEEDS = [
43
43
  source: 'seed',
44
44
  notes: 'Category-defining query that explains the core wedge.',
45
45
  },
46
+ querySeed(
47
+ 'ai agent harness optimization',
48
+ 94,
49
+ 'Fresh harness-engineering demand that maps directly to ThumbGate progressive disclosure, pre-action gates, and workflow audits.',
50
+ ),
51
+ querySeed(
52
+ 'browser automation safety',
53
+ 93,
54
+ 'High-intent browser-agent safety query tied to prompt injection, permissions, and cross-app automation risk.',
55
+ ),
56
+ querySeed(
57
+ 'native messaging host security',
58
+ 91,
59
+ 'Security-led query that maps directly to browser bridge auditing and explicit connector governance.',
60
+ ),
46
61
  {
47
62
  query: 'thumbs up thumbs down feedback for ai coding agents',
48
63
  businessValue: 95,
@@ -99,6 +114,10 @@ const HIGH_ROI_QUERY_SEEDS = [
99
114
  },
100
115
  ];
101
116
 
117
+ function querySeed(query, businessValue, notes) {
118
+ return { query, businessValue, source: 'seed', notes };
119
+ }
120
+
102
121
  function guideBlueprint({
103
122
  query,
104
123
  path,
@@ -138,6 +157,163 @@ function answer(question, text) {
138
157
  return { question, answer: text };
139
158
  }
140
159
 
160
+ function preActionGuide(slug, content) {
161
+ return guideBlueprint({
162
+ ...content,
163
+ path: `/guides/${slug}`,
164
+ pillar: 'pre-action-gates',
165
+ });
166
+ }
167
+
168
+ const HARNESS_OPTIMIZATION_QUERY = 'ai agent harness optimization';
169
+ const HARNESS_OPTIMIZATION_GUIDE_SPEC = Object.freeze({
170
+ slug: 'agent-harness-optimization',
171
+ meta: {
172
+ query: HARNESS_OPTIMIZATION_QUERY,
173
+ title: 'AI Agent Harness Optimization | Progressive Disclosure + Pre-Action Gates',
174
+ heroTitle: 'AI Agent Harness Optimization That Blocks Repeat Failures',
175
+ heroSummary: 'A better harness keeps global instructions lean, loads MCP schemas only when needed, and turns feedback into pre-action gates. ThumbGate makes that workflow measurable and enforceable.',
176
+ },
177
+ takeaways: [
178
+ 'Harness optimization is the control layer around the model: context, tools, guardrails, and feedback.',
179
+ 'Progressive disclosure keeps agents out of prompt bloat while preserving proof and tool access.',
180
+ 'ThumbGate adds a concrete audit path and Pre-Action Gates so harness lessons become runtime enforcement.',
181
+ ],
182
+ sections: [
183
+ ['paragraphs', 'What changed', [
184
+ 'The model is no longer the whole system. The harness decides which instructions, tools, context packs, and approval rules the model sees before it acts.',
185
+ 'When a team stuffs every rule into a global prompt, the agent loses reasoning room. When it routes work through lean discovery surfaces, the agent can fetch the exact tool schema, lesson, or harness only when the task requires it.',
186
+ ]],
187
+ ['bullets', 'How ThumbGate improves the harness', [
188
+ 'Scores global agent docs so AGENTS.md, CLAUDE.md, and GEMINI.md stay lean instead of becoming unreviewable prompt bundles.',
189
+ 'Publishes progressive MCP discovery through lightweight indexes and per-tool schema URLs.',
190
+ 'Selects specialized gate harnesses for deploy, code-edit, and database-write actions instead of loading every gate for every workflow.',
191
+ 'Turns thumbs-down feedback into prevention rules, then into hard Pre-Action Gates that block repeated mistakes.',
192
+ ]],
193
+ ['paragraphs', 'Where this creates ROI', [
194
+ 'For acquisition, this page names the buyer category: AI agent harness optimization. For conversion, the CLI audit gives a concrete first action. For retention, the same audit keeps local instructions and MCP surfaces from drifting back into bloat.',
195
+ ]],
196
+ ],
197
+ faq: [
198
+ [
199
+ 'What is an AI agent harness?',
200
+ 'An AI agent harness is the runtime layer around the model: context loading, tool calls, guardrails, approval boundaries, memory, and verification. ThumbGate focuses on the enforcement part of that harness.',
201
+ ],
202
+ [
203
+ 'How does ThumbGate optimize a harness?',
204
+ 'ThumbGate keeps global instructions lean, supports progressive MCP discovery, selects workflow-specific gate harnesses, and converts feedback into Pre-Action Gates that block known-bad actions before execution.',
205
+ ],
206
+ ],
207
+ relatedPaths: ['/guides/pre-action-gates', '/guides/codex-cli-guardrails'],
208
+ });
209
+
210
+ function buildSectionFromSpec(kind, heading, entries) {
211
+ return kind === 'bullets' ? bullets(heading, entries) : paragraphs(heading, entries);
212
+ }
213
+
214
+ function buildHarnessOptimizationGuide() {
215
+ return preActionGuide(HARNESS_OPTIMIZATION_GUIDE_SPEC.slug, {
216
+ ...HARNESS_OPTIMIZATION_GUIDE_SPEC.meta,
217
+ takeaways: HARNESS_OPTIMIZATION_GUIDE_SPEC.takeaways,
218
+ sections: HARNESS_OPTIMIZATION_GUIDE_SPEC.sections.map(([kind, heading, entries]) => buildSectionFromSpec(kind, heading, entries)),
219
+ faq: HARNESS_OPTIMIZATION_GUIDE_SPEC.faq.map(([question, text]) => answer(question, text)),
220
+ relatedPaths: HARNESS_OPTIMIZATION_GUIDE_SPEC.relatedPaths,
221
+ });
222
+ }
223
+
224
+ const BROWSER_BRIDGE_GUIDE_SPECS = Object.freeze([
225
+ {
226
+ slug: 'browser-automation-safety',
227
+ meta: {
228
+ query: 'browser automation safety',
229
+ title: 'Browser Automation Safety | Prompt Injection, Permissions, and Pre-Action Gates',
230
+ heroTitle: 'Browser automation safety needs explicit approval boundaries',
231
+ heroSummary: 'Browser agents can click, type, and navigate for you, but they also widen prompt-injection and cross-app integration risk. ThumbGate adds approval boundaries, auditability, and a native messaging audit before those bridges turn into silent blast-radius expansion.',
232
+ },
233
+ takeaways: [
234
+ 'Browser automation is useful because it has real permissions, which is exactly why it needs governance.',
235
+ 'Prompt injection becomes more dangerous when an extension can reach a local executable through a browser bridge.',
236
+ 'ThumbGate gives teams a first action now: audit native messaging hosts, then require explicit approval before browser-use connectors expand.',
237
+ ],
238
+ sections: [
239
+ ['paragraphs', 'Why browser-use changes the threat model', [
240
+ 'Browser agents do not just read text. They can click buttons, fill forms, switch tabs, and sometimes bridge into local binaries. That means the blast radius is no longer only "bad output" but "real actions on live websites and local systems."',
241
+ 'Once browser automation enters the stack, prompt injection stops being an abstract model weakness and becomes a workflow-governance problem. The right control is not more prompt advice. It is a hard boundary around what the agent is allowed to connect, install, and execute.',
242
+ ]],
243
+ ['bullets', 'What to audit first', [
244
+ 'Which browser extensions hold automation permissions such as debugger, tabs, downloads, and nativeMessaging.',
245
+ 'Whether the desktop app or CLI has registered native messaging hosts for browsers you did not explicitly connect.',
246
+ 'Whether host manifests point to live local binaries and whether those binaries sit outside the browser sandbox.',
247
+ 'Whether browser-use runs default to ask-before-acting or silently expand capability before a human approves them.',
248
+ ]],
249
+ ['paragraphs', 'How ThumbGate fits', [
250
+ 'ThumbGate is the approval and enforcement layer around browser-use. Start by running npx thumbgate native-messaging-audit. Then gate future connector installs, record who approved them, and turn browser-bridge mistakes into Pre-Action Gates before the same pattern repeats.',
251
+ ]],
252
+ ],
253
+ faq: [
254
+ [
255
+ 'Why is browser automation riskier than ordinary chat?',
256
+ 'Because the agent can take real actions in a browser and may also reach local executables through native messaging bridges. That turns prompt injection and permission drift into operational risk, not just output-quality risk.',
257
+ ],
258
+ [
259
+ 'What should a team do before enabling browser-use broadly?',
260
+ 'Audit native messaging hosts, review extension permissions, keep ask-before-acting enabled by default, and require explicit approval for any cross-app connector that expands the agent runtime beyond the browser sandbox.',
261
+ ],
262
+ ],
263
+ relatedPaths: ['/guides/native-messaging-host-security', '/guides/pre-action-gates'],
264
+ },
265
+ {
266
+ slug: 'native-messaging-host-security',
267
+ meta: {
268
+ query: 'native messaging host security',
269
+ title: 'Native Messaging Host Security | Audit Browser Bridges Before They Expand',
270
+ heroTitle: 'Native messaging host security for AI browser bridges',
271
+ heroSummary: 'Native messaging hosts let browser extensions talk to local executables. That can be useful, but it also creates a persistent bridge outside the browser sandbox. ThumbGate audits those registrations and helps teams require explicit approval before they become part of the workflow.',
272
+ },
273
+ takeaways: [
274
+ 'Native messaging is a real local capability boundary, not a harmless implementation detail.',
275
+ 'A manifest can pre-authorize extension origins long before a human operator understands the blast radius.',
276
+ 'ThumbGate turns native messaging review into an auditable operator workflow instead of an invisible local side effect.',
277
+ ],
278
+ sections: [
279
+ ['paragraphs', 'What native messaging hosts actually do', [
280
+ 'A native messaging host is a local manifest that tells a browser extension which executable it may launch on the operator machine. That bridge sits outside the browser sandbox, so it deserves the same review discipline teams use for deploy credentials or production write access.',
281
+ 'The risk is not only the host binary itself. It is the combination of extension permissions, allowed origins, and whether the host remains registered for browsers the operator did not intentionally connect.',
282
+ ]],
283
+ ['bullets', 'Signals ThumbGate audits', [
284
+ 'Manifest files under browser-specific NativeMessagingHosts directories on macOS and Linux.',
285
+ 'Allowed extension origins and extension-id fan-out per host registration.',
286
+ 'Host binaries that are missing on disk, which leaves stale or broken registrations behind.',
287
+ 'AI/browser bridge manifests registered for browsers not detected in the usual local install paths.',
288
+ ]],
289
+ ['paragraphs', 'The fastest operator action', [
290
+ 'Run npx thumbgate native-messaging-audit --json in the repo or workstation you govern. Review every AI browser bridge, remove anything you did not intentionally integrate, and keep browser-use in ask-before-acting mode until connector scope is explicit and revocable.',
291
+ ]],
292
+ ],
293
+ faq: [
294
+ [
295
+ 'Why does native messaging deserve a separate security review?',
296
+ 'Because it lets a browser extension hand work to a local executable outside the browser sandbox. That is a different trust boundary than ordinary page automation or side-panel UI access.',
297
+ ],
298
+ [
299
+ 'How does ThumbGate help with native messaging host security?',
300
+ 'ThumbGate audits known host locations, highlights AI/browser bridges, flags stale or missing host binaries, and gives teams an enforcement layer so future connector expansion requires explicit approval.',
301
+ ],
302
+ ],
303
+ relatedPaths: ['/guides/browser-automation-safety', '/guides/pre-action-gates'],
304
+ },
305
+ ]);
306
+
307
+ function buildBrowserBridgeGuide(spec) {
308
+ return preActionGuide(spec.slug, {
309
+ ...spec.meta,
310
+ takeaways: spec.takeaways,
311
+ sections: spec.sections.map(([kind, heading, entries]) => buildSectionFromSpec(kind, heading, entries)),
312
+ faq: spec.faq.map(([question, text]) => answer(question, text)),
313
+ relatedPaths: spec.relatedPaths,
314
+ });
315
+ }
316
+
141
317
  const PAGE_BLUEPRINTS = [
142
318
  {
143
319
  query: 'thumbgate vs speclock',
@@ -288,6 +464,7 @@ const PAGE_BLUEPRINTS = [
288
464
  ],
289
465
  relatedPaths: ['/compare/speclock', '/guides/claude-code-feedback'],
290
466
  },
467
+ buildHarnessOptimizationGuide(),
291
468
  {
292
469
  query: 'stop ai coding agents from repeating mistakes',
293
470
  path: '/guides/stop-repeated-ai-agent-mistakes',
@@ -529,6 +706,7 @@ const PAGE_BLUEPRINTS = [
529
706
  ],
530
707
  relatedPaths: ['/compare/mem0', '/guides/stop-repeated-ai-agent-mistakes'],
531
708
  },
709
+ ...BROWSER_BRIDGE_GUIDE_SPECS.map(buildBrowserBridgeGuide),
532
710
  guideBlueprint({
533
711
  query: 'autoresearch agent safety',
534
712
  path: '/guides/autoresearch-agent-safety',
@@ -749,7 +927,7 @@ function classifyIntent(query) {
749
927
  return 'commercial';
750
928
  }
751
929
  if (/\b(what is|how to|guide|best practices|why)\b/.test(normalized)) return 'informational';
752
- if (/\b(guardrails|pre-action gates|feedback|prevent repeated mistakes|repeating mistakes|memory)\b/.test(normalized)) {
930
+ if (/\b(guardrails|pre-action gates|feedback|prevent repeated mistakes|repeating mistakes|memory|harness optimization)\b/.test(normalized)) {
753
931
  return 'commercial';
754
932
  }
755
933
  return 'informational';
@@ -759,7 +937,7 @@ function inferPillar(query) {
759
937
  const normalized = normalizeText(query).toLowerCase();
760
938
  if (/\b(speclock|mem0|alternative|vs|compare|comparison)\b/.test(normalized)) return 'comparison';
761
939
  if (/\b(thumbs up|thumbs down|feedback|reinforce|mistake)\b/.test(normalized)) return 'feedback-loop';
762
- if (/\b(autoresearch|self-improving|benchmark|reward hacking)\b/.test(normalized)) return 'pre-action-gates';
940
+ if (/\b(autoresearch|self-improving|benchmark|reward hacking|harness optimization|browser automation|native messaging|browser bridge|prompt injection)\b/.test(normalized)) return 'pre-action-gates';
763
941
  if (/\b(pre-action gates|guardrails|block|prevent repeated mistakes|repeating mistakes)\b/.test(normalized)) return 'pre-action-gates';
764
942
  if (/\b(claude code|cursor|codex|gemini|amp|opencode|integration|plugin)\b/.test(normalized)) return 'agent-workflows';
765
943
  return 'ai-agent-reliability';
@@ -1139,6 +1317,7 @@ function renderSeoPageHtml(page, runtimeConfig = {}) {
1139
1317
  <meta property="og:type" content="article" />
1140
1318
  <meta property="og:url" content="${escapeHtml(canonicalUrl)}" />
1141
1319
  <link rel="canonical" href="${escapeHtml(canonicalUrl)}" />
1320
+ <link rel="llm-context" href="/public/llm-context.md" type="text/markdown" />
1142
1321
  <link rel="icon" type="image/svg+xml" href="/thumbgate-icon.png" />
1143
1322
  <link rel="apple-touch-icon" href="/assets/brand/thumbgate-mark.svg" />
1144
1323
  <meta property="og:image" content="/og.png" />
@@ -1379,6 +1558,7 @@ ${renderWebPageJsonLd(page, { appOrigin })}
1379
1558
  <p><strong>Opportunity score:</strong> ${page.opportunityScore}</p>
1380
1559
  <p><strong>Primary persona:</strong> ${escapeHtml(page.persona)}</p>
1381
1560
  <p><strong>Keyword cluster:</strong> ${escapeHtml(page.keywordCluster.join(', '))}</p>
1561
+ <p><strong>Pricing:</strong> Pro $19/mo or $149/yr. Team $49/seat/mo.</p>
1382
1562
  <div class="proof-links">${proofLinks}</div>
1383
1563
  <a class="cta-button" href="${escapeHtml(page.cta.href)}" target="_blank" rel="noopener">${escapeHtml(page.cta.label)}</a>
1384
1564
  </div>
@@ -841,6 +841,18 @@ const TOOLS = [
841
841
  properties: {},
842
842
  },
843
843
  }),
844
+ readOnlyTool({
845
+ name: 'native_messaging_audit',
846
+ description: 'Audit local browser native messaging hosts and AI browser bridges. Flags missing host binaries, pre-authorized extension bridges, and manifests for browsers not detected locally.',
847
+ inputSchema: {
848
+ type: 'object',
849
+ properties: {
850
+ platform: { type: 'string', enum: ['darwin', 'linux', 'win32'], description: 'Optional platform override for manifest discovery.' },
851
+ homeDir: { type: 'string', description: 'Optional home-directory override for manifest discovery.' },
852
+ aiOnly: { type: 'boolean', description: 'When true, only AI/browser bridge manifests are returned.' },
853
+ },
854
+ },
855
+ }),
844
856
  readOnlyTool({
845
857
  name: 'commerce_recall',
846
858
  description: 'Recall past feedback filtered by commerce categories (product_recommendation, brand_compliance, sizing, pricing, regulatory). Returns quality scores alongside memories for agentic commerce agents.',
@@ -94,7 +94,7 @@ Bounded retrieval of relevant feedback history for the current task. The agent g
94
94
  | Seats | 1 | 1 | Per-seat |
95
95
  | Price | $0 | $19/mo | $49/seat/mo |
96
96
 
97
- Start a 7-day free trial of Pro: <https://buy.stripe.com/fZu9AT3Ug6zcdWh0XN3sI08>
97
+ Start a 7-day free trial of Pro: <https://thumbgate-production.up.railway.app/go/pro?utm_source=skill>
98
98
 
99
99
  ## Compatibility
100
100
 
package/src/api/server.js CHANGED
@@ -97,6 +97,7 @@ const {
97
97
  samplePosteriors,
98
98
  } = require('../../scripts/thompson-sampling');
99
99
  const {
100
+ appendFunnelEvent,
100
101
  createCheckoutSession,
101
102
  getCheckoutSessionStatus,
102
103
  provisionApiKey,
@@ -225,6 +226,7 @@ const GUIDE_PAGE_PATH = path.resolve(__dirname, '../../public/guide.html');
225
226
  const CODEX_PLUGIN_PAGE_PATH = path.resolve(__dirname, '../../public/codex-plugin.html');
226
227
  const COMPARE_PAGE_PATH = path.resolve(__dirname, '../../public/compare.html');
227
228
  const LEARN_PAGE_PATH = path.resolve(__dirname, '../../public/learn.html');
229
+ const NUMBERS_PAGE_PATH = path.resolve(__dirname, '../../public/numbers.html');
228
230
  const LEARN_DIR = path.resolve(__dirname, '../../public/learn');
229
231
  const GUIDES_DIR = path.resolve(__dirname, '../../public/guides');
230
232
  const COMPARE_DIR = path.resolve(__dirname, '../../public/compare');
@@ -2161,6 +2163,37 @@ function servePublicMarketingPage({
2161
2163
  'landing_page_view'
2162
2164
  );
2163
2165
 
2166
+ // Funnel-ledger write (2026-04-21): populate funnel-events.jsonl with a
2167
+ // discovery-stage event on every landing-page view so UTM-tagged social
2168
+ // traffic becomes visible in `npm run feedback:summary` and
2169
+ // `bin/cli.js cfo --today`. Prior to this wire, landing views wrote only
2170
+ // to telemetry-pings.jsonl (invisible to the CEO-facing revenue surface),
2171
+ // leaving funnel-events.jsonl empty despite 404 published Zernio posts.
2172
+ // Best-effort: wrapped in try/catch so a billing-ledger hiccup never
2173
+ // breaks a page render.
2174
+ try {
2175
+ appendFunnelEvent({
2176
+ stage: 'discovery',
2177
+ event: 'landing_view',
2178
+ installId: journeyState.visitorId || null,
2179
+ traceId: journeyState.acquisitionId || null,
2180
+ evidence: landingAttribution.landingPath || 'landing_view',
2181
+ metadata: {
2182
+ page: extraTelemetry.pageType || landingAttribution.page || 'landing',
2183
+ utmSource: landingAttribution.utmSource || null,
2184
+ utmMedium: landingAttribution.utmMedium || null,
2185
+ utmCampaign: landingAttribution.utmCampaign || null,
2186
+ utmContent: landingAttribution.utmContent || null,
2187
+ utmTerm: landingAttribution.utmTerm || null,
2188
+ referrerHost: landingAttribution.referrerHost || null,
2189
+ sessionId: journeyState.sessionId || null,
2190
+ },
2191
+ });
2192
+ } catch {
2193
+ // Funnel ledger is best-effort on page render; telemetry-pings remains
2194
+ // the authoritative observability path if the ledger write fails.
2195
+ }
2196
+
2164
2197
  if (isSeoAttributionSource(landingAttribution.source)) {
2165
2198
  appendBestEffortTelemetry(FEEDBACK_DIR, {
2166
2199
  eventType: 'seo_landing_view',
@@ -3777,6 +3810,26 @@ async function addContext(){
3777
3810
  return;
3778
3811
  }
3779
3812
 
3813
+ if (isGetLikeRequest && (pathname === '/numbers' || pathname === '/numbers.html')) {
3814
+ // Route through servePublicMarketingPage so landing_page_view telemetry
3815
+ // + funnel-events.jsonl `discovery/landing_view` get captured with UTM
3816
+ // attribution — critical for Zernio social CTAs that target /numbers.
3817
+ try {
3818
+ servePublicMarketingPage({
3819
+ req,
3820
+ res,
3821
+ parsed,
3822
+ hostedConfig,
3823
+ isHeadRequest,
3824
+ renderHtml: () => fs.readFileSync(NUMBERS_PAGE_PATH, 'utf-8'),
3825
+ extraTelemetry: { pageType: 'numbers' },
3826
+ });
3827
+ } catch {
3828
+ sendJson(res, 404, { error: 'Numbers page not found' });
3829
+ }
3830
+ return;
3831
+ }
3832
+
3780
3833
  if (isGetLikeRequest && pathname === '/learn/learn.css') {
3781
3834
  try {
3782
3835
  const cssPath = path.join(LEARN_DIR, 'learn.css');