thumbgate 1.14.1 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +2 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +8 -1
- package/adapters/opencode/opencode.json +1 -1
- package/bin/cli.js +54 -0
- package/config/enforcement.json +59 -7
- package/config/gates/default.json +33 -0
- package/config/mcp-allowlists.json +4 -0
- package/config/merge-quality-checks.json +2 -1
- package/package.json +17 -5
- package/public/codex-plugin.html +7 -1
- package/public/dashboard.html +23 -2
- package/public/index.html +20 -2
- package/public/learn.html +39 -0
- package/public/lessons.html +25 -1
- package/public/numbers.html +271 -0
- package/public/pro.html +7 -1
- package/scripts/cli-feedback.js +2 -1
- package/scripts/cli-schema.js +43 -4
- package/scripts/commercial-offer.js +1 -1
- package/scripts/contextfs.js +214 -32
- package/scripts/feedback-loop.js +49 -5
- package/scripts/harness-selector.js +132 -0
- package/scripts/lesson-canonical.js +181 -0
- package/scripts/lesson-db.js +71 -10
- package/scripts/lesson-synthesis.js +23 -2
- package/scripts/native-messaging-audit.js +514 -0
- package/scripts/pr-manager.js +47 -7
- package/scripts/profile-router.js +16 -1
- package/scripts/rule-validator.js +285 -0
- package/scripts/seo-gsd.js +182 -2
- package/scripts/tool-registry.js +12 -0
- package/skills/thumbgate/SKILL.md +1 -1
- package/src/api/server.js +53 -0
- package/.claude-plugin/README.md +0 -170
- package/adapters/README.md +0 -12
- package/skills/agent-memory/SKILL.md +0 -97
- package/skills/solve-architecture-autonomy/SKILL.md +0 -17
- package/skills/solve-architecture-autonomy/tool.js +0 -33
- package/skills/thumbgate-feedback/SKILL.md +0 -49
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* scripts/rule-validator.js
|
|
5
|
+
*
|
|
6
|
+
* Pre-promotion validation harness for synthesized prevention rules.
|
|
7
|
+
*
|
|
8
|
+
* Why this exists:
|
|
9
|
+
* Before this module, `synthesizePreventionRule` (lesson-synthesis.js) auto-
|
|
10
|
+
* promoted any lesson that hit the occurrence threshold straight into
|
|
11
|
+
* `synthesized-rules.jsonl` — no check that the proposed rule actually
|
|
12
|
+
* matches the mistake pattern it was synthesized from, and no check that
|
|
13
|
+
* it doesn't also fire on recent positive-signal events from overlapping
|
|
14
|
+
* tags. That's the exact failure mode Autogenesis
|
|
15
|
+
* (https://arxiv.org/abs/2604.15034) calls out: candidate improvements
|
|
16
|
+
* must be validated through testing before integration, otherwise static
|
|
17
|
+
* agents accumulate self-contradicting rules that degrade precision.
|
|
18
|
+
*
|
|
19
|
+
* We already had 3 of the 4 Autogenesis phases:
|
|
20
|
+
* - capability-gap identification (negative feedback events),
|
|
21
|
+
* - candidate generation (synthesizePreventionRule),
|
|
22
|
+
* - integration (append to synthesized-rules.jsonl).
|
|
23
|
+
* The missing phase was validation. This module fills it.
|
|
24
|
+
*
|
|
25
|
+
* Validation contract:
|
|
26
|
+
* A proposed rule is promotable iff:
|
|
27
|
+
* 1. It matches the seed lesson that triggered promotion (otherwise the
|
|
28
|
+
* rule is tautologically broken — it wouldn't catch the mistake it
|
|
29
|
+
* was built for).
|
|
30
|
+
* 2. Its precision on a recent-events sample clears a threshold
|
|
31
|
+
* (default 0.8) — of the events the rule fires on, most must carry
|
|
32
|
+
* the negative signal. A rule that blocks positive outcomes too is
|
|
33
|
+
* a regression, not a prevention.
|
|
34
|
+
*
|
|
35
|
+
* Recall is reported for operator visibility but does not gate
|
|
36
|
+
* promotion — an overly specific rule is less harmful than an overly
|
|
37
|
+
* broad one.
|
|
38
|
+
*
|
|
39
|
+
* Design notes:
|
|
40
|
+
* - Pure functions, no IO. Caller supplies the event samples so tests
|
|
41
|
+
* stay hermetic and the validator can run inside captureFeedback
|
|
42
|
+
* without reaching for the filesystem.
|
|
43
|
+
* - Token matching is deliberately simple (lowercase, punctuation strip,
|
|
44
|
+
* length-2+ tokens, all-tokens-present) so the behavior is debuggable
|
|
45
|
+
* from the console. We are not competing with NLP — we are gating a
|
|
46
|
+
* one-line trigger string against a handful of sibling events.
|
|
47
|
+
*/
|
|
48
|
+
|
|
49
|
+
// Intentionally tiny stop list — we only drop noise that would erase the
|
|
50
|
+
// trigger's discriminative tokens. If a stop-word-only rule ever matches a
|
|
51
|
+
// positive event, that's a real false positive and we want to see it.
|
|
52
|
+
const STOP = new Set([
|
|
53
|
+
'a', 'an', 'the', 'to', 'of', 'in', 'on', 'at', 'for', 'and', 'or',
|
|
54
|
+
'is', 'are', 'was', 'were', 'be', 'do', 'does', 'did',
|
|
55
|
+
'this', 'that', 'these', 'those',
|
|
56
|
+
'it', 'its', 'i', 'you', 'we', 'they',
|
|
57
|
+
]);
|
|
58
|
+
|
|
59
|
+
// Modality / negation words that `synthesizePreventionRule` commonly
|
|
60
|
+
// inherits from lesson titles like "MISTAKE: never force-push". We want
|
|
61
|
+
// these tokens to survive ordinary tokenize() output (they're legitimate
|
|
62
|
+
// English), but we strip them from a rule's trigger before matching so
|
|
63
|
+
// the rule still fires on events that describe the mistake without
|
|
64
|
+
// echoing the modality. They remain meaningful in haystack positions.
|
|
65
|
+
const TRIGGER_MODALITY = new Set(['never', 'always', 'ever', 'must', 'not', 'no']);
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Strip a few common English suffixes so "force-pushed" in a bug report
|
|
69
|
+
* matches a trigger token "push". We are NOT doing Porter-grade stemming;
|
|
70
|
+
* the goal is just to keep morphological variants from silently breaking
|
|
71
|
+
* the matcher. Minimum 3-char stem preserved so "goes" → "goe" (harmless)
|
|
72
|
+
* but "is" / "as" stay intact.
|
|
73
|
+
*/
|
|
74
|
+
function stem(token) {
|
|
75
|
+
if (token.length <= 3) return token;
|
|
76
|
+
if (token.endsWith('ing') && token.length > 5) return token.slice(0, -3);
|
|
77
|
+
if (token.endsWith('ed') && token.length > 4) return token.slice(0, -2);
|
|
78
|
+
if (token.endsWith('es') && token.length > 4) return token.slice(0, -2);
|
|
79
|
+
if (token.endsWith('s') && !token.endsWith('ss') && token.length > 3) {
|
|
80
|
+
return token.slice(0, -1);
|
|
81
|
+
}
|
|
82
|
+
return token;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function tokenize(text) {
|
|
86
|
+
if (text === null || text === undefined) return [];
|
|
87
|
+
return String(text)
|
|
88
|
+
.toLowerCase()
|
|
89
|
+
.replace(/[^a-z0-9\s]/g, ' ')
|
|
90
|
+
.split(/\s+/)
|
|
91
|
+
.filter((t) => t.length > 1 && !STOP.has(t))
|
|
92
|
+
.map(stem);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function eventText(event) {
|
|
96
|
+
if (!event || typeof event !== 'object') return '';
|
|
97
|
+
return [
|
|
98
|
+
event.title,
|
|
99
|
+
event.content,
|
|
100
|
+
event.whatToChange,
|
|
101
|
+
event.whatWentWrong,
|
|
102
|
+
event.whatWorked,
|
|
103
|
+
event.context,
|
|
104
|
+
].filter(Boolean).join(' ');
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function eventSignal(event) {
|
|
108
|
+
if (!event || typeof event !== 'object') return null;
|
|
109
|
+
const raw = event.signal;
|
|
110
|
+
if (!raw) return null;
|
|
111
|
+
const lower = String(raw).toLowerCase();
|
|
112
|
+
if (lower === 'up' || lower === 'positive') return 'positive';
|
|
113
|
+
if (lower === 'down' || lower === 'negative') return 'negative';
|
|
114
|
+
return lower;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Does `rule` fire on `event`? A rule fires when every content token of
|
|
119
|
+
* its trigger.condition appears in the event's combined text **in the
|
|
120
|
+
* same relative order** (subsequence match). An empty trigger never fires
|
|
121
|
+
* — that's a degenerate rule and we want the validator to reject it
|
|
122
|
+
* rather than silently match everything.
|
|
123
|
+
*
|
|
124
|
+
* Order matters because it's the cheapest way to distinguish
|
|
125
|
+
* "force-push to main caused incident" (trigger condition narrates the
|
|
126
|
+
* action) from "main branch healthy, no force push" (same tokens, wrong
|
|
127
|
+
* narrative). Without order we'd flag the second event as a false
|
|
128
|
+
* positive against every rule built on the same vocabulary.
|
|
129
|
+
*/
|
|
130
|
+
function ruleMatches(rule, event) {
|
|
131
|
+
const trigger = rule && rule.rule && rule.rule.trigger && rule.rule.trigger.condition;
|
|
132
|
+
const rawTokens = tokenize(trigger);
|
|
133
|
+
const tokens = rawTokens.filter((t) => !TRIGGER_MODALITY.has(t));
|
|
134
|
+
if (tokens.length === 0) return false;
|
|
135
|
+
|
|
136
|
+
const haystack = tokenize(eventText(event));
|
|
137
|
+
let hi = 0;
|
|
138
|
+
for (const t of tokens) {
|
|
139
|
+
while (hi < haystack.length && haystack[hi] !== t) hi += 1;
|
|
140
|
+
if (hi >= haystack.length) return false;
|
|
141
|
+
hi += 1;
|
|
142
|
+
}
|
|
143
|
+
return true;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Count true-positive / false-positive / false-negative / true-negative
|
|
148
|
+
* firings on a sample. Tags are used to scope the sample — only events
|
|
149
|
+
* that share at least one tag with the rule are considered, on the premise
|
|
150
|
+
* that a rule about git force-push shouldn't be precision-scored against
|
|
151
|
+
* deploy-pipeline events it was never meant to see.
|
|
152
|
+
*/
|
|
153
|
+
function scoreOnSample(rule, events, { scopeTags = null } = {}) {
|
|
154
|
+
const ruleTags = new Set((rule.tags || []).filter(Boolean).map((t) => String(t).toLowerCase()));
|
|
155
|
+
const scope = scopeTags ? new Set(scopeTags.map((t) => String(t).toLowerCase())) : null;
|
|
156
|
+
|
|
157
|
+
let tp = 0;
|
|
158
|
+
let fp = 0;
|
|
159
|
+
let fn = 0;
|
|
160
|
+
let tn = 0;
|
|
161
|
+
|
|
162
|
+
for (const event of Array.isArray(events) ? events : []) {
|
|
163
|
+
const tags = Array.isArray(event.tags)
|
|
164
|
+
? event.tags.map((t) => String(t).toLowerCase())
|
|
165
|
+
: [];
|
|
166
|
+
|
|
167
|
+
// Out-of-scope events are ignored — they have nothing to say about
|
|
168
|
+
// this rule's precision.
|
|
169
|
+
if (scope && tags.length > 0 && !tags.some((t) => scope.has(t))) continue;
|
|
170
|
+
if (ruleTags.size > 0 && tags.length > 0 && !tags.some((t) => ruleTags.has(t))) continue;
|
|
171
|
+
|
|
172
|
+
const fires = ruleMatches(rule, event);
|
|
173
|
+
const signal = eventSignal(event);
|
|
174
|
+
|
|
175
|
+
if (signal === 'negative' && fires) tp += 1;
|
|
176
|
+
else if (signal === 'positive' && fires) fp += 1;
|
|
177
|
+
else if (signal === 'negative' && !fires) fn += 1;
|
|
178
|
+
else if (signal === 'positive' && !fires) tn += 1;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
const firings = tp + fp;
|
|
182
|
+
const negatives = tp + fn;
|
|
183
|
+
return {
|
|
184
|
+
tp,
|
|
185
|
+
fp,
|
|
186
|
+
fn,
|
|
187
|
+
tn,
|
|
188
|
+
precision: firings > 0 ? tp / firings : null,
|
|
189
|
+
recall: negatives > 0 ? tp / negatives : null,
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const DEFAULT_PRECISION_FLOOR = 0.8;
|
|
194
|
+
const DEFAULT_MIN_SAMPLE = 3;
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Top-level validator. Returns a detailed report plus a boolean
|
|
198
|
+
* `shouldPromote`. The caller (feedback-loop) stamps the report onto the
|
|
199
|
+
* rule record so downstream operators can see why a rule was or wasn't
|
|
200
|
+
* promoted — silent rejection is worse than a rejected rule we can audit.
|
|
201
|
+
*
|
|
202
|
+
* Thresholds are overridable but the defaults are deliberately loose for
|
|
203
|
+
* Stage-1 rollout: precision ≥ 0.8, with a minimum of 3 sampled events in
|
|
204
|
+
* scope. Below the minimum sample, the validator promotes the rule but
|
|
205
|
+
* flags `reason: 'insufficient_sample'` so we don't starve the gate of new
|
|
206
|
+
* rules while feedback volume is still small.
|
|
207
|
+
*/
|
|
208
|
+
function validateProposedRule(rule, {
|
|
209
|
+
seedLesson,
|
|
210
|
+
recentEvents = [],
|
|
211
|
+
precisionFloor = DEFAULT_PRECISION_FLOOR,
|
|
212
|
+
minSample = DEFAULT_MIN_SAMPLE,
|
|
213
|
+
} = {}) {
|
|
214
|
+
const report = {
|
|
215
|
+
shouldPromote: false,
|
|
216
|
+
reason: null,
|
|
217
|
+
matchesSeed: false,
|
|
218
|
+
precision: null,
|
|
219
|
+
recall: null,
|
|
220
|
+
sampleSize: 0,
|
|
221
|
+
tp: 0,
|
|
222
|
+
fp: 0,
|
|
223
|
+
fn: 0,
|
|
224
|
+
tn: 0,
|
|
225
|
+
};
|
|
226
|
+
|
|
227
|
+
if (!rule || !rule.rule) {
|
|
228
|
+
report.reason = 'invalid_rule_shape';
|
|
229
|
+
return report;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Invariant 1: the rule must fire on the seed lesson. If it doesn't, the
|
|
233
|
+
// trigger extraction dropped the discriminative tokens and the rule is
|
|
234
|
+
// broken regardless of what the sample says.
|
|
235
|
+
report.matchesSeed = seedLesson ? ruleMatches(rule, seedLesson) : false;
|
|
236
|
+
if (!report.matchesSeed) {
|
|
237
|
+
report.reason = 'rule_does_not_match_seed_lesson';
|
|
238
|
+
return report;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Invariant 2: precision on recent overlapping-tag events. We pass
|
|
242
|
+
// scopeTags = rule.tags so the scorer restricts to the same topical
|
|
243
|
+
// cluster as the rule.
|
|
244
|
+
const scoreReport = scoreOnSample(rule, recentEvents, { scopeTags: rule.tags });
|
|
245
|
+
Object.assign(report, scoreReport);
|
|
246
|
+
report.sampleSize = scoreReport.tp + scoreReport.fp + scoreReport.fn + scoreReport.tn;
|
|
247
|
+
|
|
248
|
+
if (report.sampleSize < minSample) {
|
|
249
|
+
// Permissive path: we can't prove harm, so allow promotion but flag
|
|
250
|
+
// the rule for later audit when more data accumulates.
|
|
251
|
+
report.shouldPromote = true;
|
|
252
|
+
report.reason = 'insufficient_sample';
|
|
253
|
+
return report;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
if (report.precision === null) {
|
|
257
|
+
// Rule never fired on the in-scope sample. Still worth promoting
|
|
258
|
+
// because the seed invariant held — absence of firings just means
|
|
259
|
+
// this topic is quiet in recent history.
|
|
260
|
+
report.shouldPromote = true;
|
|
261
|
+
report.reason = 'no_firings_in_sample';
|
|
262
|
+
return report;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
if (report.precision < precisionFloor) {
|
|
266
|
+
report.shouldPromote = false;
|
|
267
|
+
report.reason = 'precision_below_floor';
|
|
268
|
+
return report;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
report.shouldPromote = true;
|
|
272
|
+
report.reason = 'validated';
|
|
273
|
+
return report;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
module.exports = {
|
|
277
|
+
tokenize,
|
|
278
|
+
eventText,
|
|
279
|
+
eventSignal,
|
|
280
|
+
ruleMatches,
|
|
281
|
+
scoreOnSample,
|
|
282
|
+
validateProposedRule,
|
|
283
|
+
DEFAULT_PRECISION_FLOOR,
|
|
284
|
+
DEFAULT_MIN_SAMPLE,
|
|
285
|
+
};
|
package/scripts/seo-gsd.js
CHANGED
|
@@ -43,6 +43,21 @@ const HIGH_ROI_QUERY_SEEDS = [
|
|
|
43
43
|
source: 'seed',
|
|
44
44
|
notes: 'Category-defining query that explains the core wedge.',
|
|
45
45
|
},
|
|
46
|
+
querySeed(
|
|
47
|
+
'ai agent harness optimization',
|
|
48
|
+
94,
|
|
49
|
+
'Fresh harness-engineering demand that maps directly to ThumbGate progressive disclosure, pre-action gates, and workflow audits.',
|
|
50
|
+
),
|
|
51
|
+
querySeed(
|
|
52
|
+
'browser automation safety',
|
|
53
|
+
93,
|
|
54
|
+
'High-intent browser-agent safety query tied to prompt injection, permissions, and cross-app automation risk.',
|
|
55
|
+
),
|
|
56
|
+
querySeed(
|
|
57
|
+
'native messaging host security',
|
|
58
|
+
91,
|
|
59
|
+
'Security-led query that maps directly to browser bridge auditing and explicit connector governance.',
|
|
60
|
+
),
|
|
46
61
|
{
|
|
47
62
|
query: 'thumbs up thumbs down feedback for ai coding agents',
|
|
48
63
|
businessValue: 95,
|
|
@@ -99,6 +114,10 @@ const HIGH_ROI_QUERY_SEEDS = [
|
|
|
99
114
|
},
|
|
100
115
|
];
|
|
101
116
|
|
|
117
|
+
function querySeed(query, businessValue, notes) {
|
|
118
|
+
return { query, businessValue, source: 'seed', notes };
|
|
119
|
+
}
|
|
120
|
+
|
|
102
121
|
function guideBlueprint({
|
|
103
122
|
query,
|
|
104
123
|
path,
|
|
@@ -138,6 +157,163 @@ function answer(question, text) {
|
|
|
138
157
|
return { question, answer: text };
|
|
139
158
|
}
|
|
140
159
|
|
|
160
|
+
function preActionGuide(slug, content) {
|
|
161
|
+
return guideBlueprint({
|
|
162
|
+
...content,
|
|
163
|
+
path: `/guides/${slug}`,
|
|
164
|
+
pillar: 'pre-action-gates',
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const HARNESS_OPTIMIZATION_QUERY = 'ai agent harness optimization';
|
|
169
|
+
const HARNESS_OPTIMIZATION_GUIDE_SPEC = Object.freeze({
|
|
170
|
+
slug: 'agent-harness-optimization',
|
|
171
|
+
meta: {
|
|
172
|
+
query: HARNESS_OPTIMIZATION_QUERY,
|
|
173
|
+
title: 'AI Agent Harness Optimization | Progressive Disclosure + Pre-Action Gates',
|
|
174
|
+
heroTitle: 'AI Agent Harness Optimization That Blocks Repeat Failures',
|
|
175
|
+
heroSummary: 'A better harness keeps global instructions lean, loads MCP schemas only when needed, and turns feedback into pre-action gates. ThumbGate makes that workflow measurable and enforceable.',
|
|
176
|
+
},
|
|
177
|
+
takeaways: [
|
|
178
|
+
'Harness optimization is the control layer around the model: context, tools, guardrails, and feedback.',
|
|
179
|
+
'Progressive disclosure keeps agents out of prompt bloat while preserving proof and tool access.',
|
|
180
|
+
'ThumbGate adds a concrete audit path and Pre-Action Gates so harness lessons become runtime enforcement.',
|
|
181
|
+
],
|
|
182
|
+
sections: [
|
|
183
|
+
['paragraphs', 'What changed', [
|
|
184
|
+
'The model is no longer the whole system. The harness decides which instructions, tools, context packs, and approval rules the model sees before it acts.',
|
|
185
|
+
'When a team stuffs every rule into a global prompt, the agent loses reasoning room. When it routes work through lean discovery surfaces, the agent can fetch the exact tool schema, lesson, or harness only when the task requires it.',
|
|
186
|
+
]],
|
|
187
|
+
['bullets', 'How ThumbGate improves the harness', [
|
|
188
|
+
'Scores global agent docs so AGENTS.md, CLAUDE.md, and GEMINI.md stay lean instead of becoming unreviewable prompt bundles.',
|
|
189
|
+
'Publishes progressive MCP discovery through lightweight indexes and per-tool schema URLs.',
|
|
190
|
+
'Selects specialized gate harnesses for deploy, code-edit, and database-write actions instead of loading every gate for every workflow.',
|
|
191
|
+
'Turns thumbs-down feedback into prevention rules, then into hard Pre-Action Gates that block repeated mistakes.',
|
|
192
|
+
]],
|
|
193
|
+
['paragraphs', 'Where this creates ROI', [
|
|
194
|
+
'For acquisition, this page names the buyer category: AI agent harness optimization. For conversion, the CLI audit gives a concrete first action. For retention, the same audit keeps local instructions and MCP surfaces from drifting back into bloat.',
|
|
195
|
+
]],
|
|
196
|
+
],
|
|
197
|
+
faq: [
|
|
198
|
+
[
|
|
199
|
+
'What is an AI agent harness?',
|
|
200
|
+
'An AI agent harness is the runtime layer around the model: context loading, tool calls, guardrails, approval boundaries, memory, and verification. ThumbGate focuses on the enforcement part of that harness.',
|
|
201
|
+
],
|
|
202
|
+
[
|
|
203
|
+
'How does ThumbGate optimize a harness?',
|
|
204
|
+
'ThumbGate keeps global instructions lean, supports progressive MCP discovery, selects workflow-specific gate harnesses, and converts feedback into Pre-Action Gates that block known-bad actions before execution.',
|
|
205
|
+
],
|
|
206
|
+
],
|
|
207
|
+
relatedPaths: ['/guides/pre-action-gates', '/guides/codex-cli-guardrails'],
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
function buildSectionFromSpec(kind, heading, entries) {
|
|
211
|
+
return kind === 'bullets' ? bullets(heading, entries) : paragraphs(heading, entries);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
function buildHarnessOptimizationGuide() {
|
|
215
|
+
return preActionGuide(HARNESS_OPTIMIZATION_GUIDE_SPEC.slug, {
|
|
216
|
+
...HARNESS_OPTIMIZATION_GUIDE_SPEC.meta,
|
|
217
|
+
takeaways: HARNESS_OPTIMIZATION_GUIDE_SPEC.takeaways,
|
|
218
|
+
sections: HARNESS_OPTIMIZATION_GUIDE_SPEC.sections.map(([kind, heading, entries]) => buildSectionFromSpec(kind, heading, entries)),
|
|
219
|
+
faq: HARNESS_OPTIMIZATION_GUIDE_SPEC.faq.map(([question, text]) => answer(question, text)),
|
|
220
|
+
relatedPaths: HARNESS_OPTIMIZATION_GUIDE_SPEC.relatedPaths,
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const BROWSER_BRIDGE_GUIDE_SPECS = Object.freeze([
|
|
225
|
+
{
|
|
226
|
+
slug: 'browser-automation-safety',
|
|
227
|
+
meta: {
|
|
228
|
+
query: 'browser automation safety',
|
|
229
|
+
title: 'Browser Automation Safety | Prompt Injection, Permissions, and Pre-Action Gates',
|
|
230
|
+
heroTitle: 'Browser automation safety needs explicit approval boundaries',
|
|
231
|
+
heroSummary: 'Browser agents can click, type, and navigate for you, but they also widen prompt-injection and cross-app integration risk. ThumbGate adds approval boundaries, auditability, and a native messaging audit before those bridges turn into silent blast-radius expansion.',
|
|
232
|
+
},
|
|
233
|
+
takeaways: [
|
|
234
|
+
'Browser automation is useful because it has real permissions, which is exactly why it needs governance.',
|
|
235
|
+
'Prompt injection becomes more dangerous when an extension can reach a local executable through a browser bridge.',
|
|
236
|
+
'ThumbGate gives teams a first action now: audit native messaging hosts, then require explicit approval before browser-use connectors expand.',
|
|
237
|
+
],
|
|
238
|
+
sections: [
|
|
239
|
+
['paragraphs', 'Why browser-use changes the threat model', [
|
|
240
|
+
'Browser agents do not just read text. They can click buttons, fill forms, switch tabs, and sometimes bridge into local binaries. That means the blast radius is no longer only "bad output" but "real actions on live websites and local systems."',
|
|
241
|
+
'Once browser automation enters the stack, prompt injection stops being an abstract model weakness and becomes a workflow-governance problem. The right control is not more prompt advice. It is a hard boundary around what the agent is allowed to connect, install, and execute.',
|
|
242
|
+
]],
|
|
243
|
+
['bullets', 'What to audit first', [
|
|
244
|
+
'Which browser extensions hold automation permissions such as debugger, tabs, downloads, and nativeMessaging.',
|
|
245
|
+
'Whether the desktop app or CLI has registered native messaging hosts for browsers you did not explicitly connect.',
|
|
246
|
+
'Whether host manifests point to live local binaries and whether those binaries sit outside the browser sandbox.',
|
|
247
|
+
'Whether browser-use runs default to ask-before-acting or silently expand capability before a human approves them.',
|
|
248
|
+
]],
|
|
249
|
+
['paragraphs', 'How ThumbGate fits', [
|
|
250
|
+
'ThumbGate is the approval and enforcement layer around browser-use. Start by running npx thumbgate native-messaging-audit. Then gate future connector installs, record who approved them, and turn browser-bridge mistakes into Pre-Action Gates before the same pattern repeats.',
|
|
251
|
+
]],
|
|
252
|
+
],
|
|
253
|
+
faq: [
|
|
254
|
+
[
|
|
255
|
+
'Why is browser automation riskier than ordinary chat?',
|
|
256
|
+
'Because the agent can take real actions in a browser and may also reach local executables through native messaging bridges. That turns prompt injection and permission drift into operational risk, not just output-quality risk.',
|
|
257
|
+
],
|
|
258
|
+
[
|
|
259
|
+
'What should a team do before enabling browser-use broadly?',
|
|
260
|
+
'Audit native messaging hosts, review extension permissions, keep ask-before-acting enabled by default, and require explicit approval for any cross-app connector that expands the agent runtime beyond the browser sandbox.',
|
|
261
|
+
],
|
|
262
|
+
],
|
|
263
|
+
relatedPaths: ['/guides/native-messaging-host-security', '/guides/pre-action-gates'],
|
|
264
|
+
},
|
|
265
|
+
{
|
|
266
|
+
slug: 'native-messaging-host-security',
|
|
267
|
+
meta: {
|
|
268
|
+
query: 'native messaging host security',
|
|
269
|
+
title: 'Native Messaging Host Security | Audit Browser Bridges Before They Expand',
|
|
270
|
+
heroTitle: 'Native messaging host security for AI browser bridges',
|
|
271
|
+
heroSummary: 'Native messaging hosts let browser extensions talk to local executables. That can be useful, but it also creates a persistent bridge outside the browser sandbox. ThumbGate audits those registrations and helps teams require explicit approval before they become part of the workflow.',
|
|
272
|
+
},
|
|
273
|
+
takeaways: [
|
|
274
|
+
'Native messaging is a real local capability boundary, not a harmless implementation detail.',
|
|
275
|
+
'A manifest can pre-authorize extension origins long before a human operator understands the blast radius.',
|
|
276
|
+
'ThumbGate turns native messaging review into an auditable operator workflow instead of an invisible local side effect.',
|
|
277
|
+
],
|
|
278
|
+
sections: [
|
|
279
|
+
['paragraphs', 'What native messaging hosts actually do', [
|
|
280
|
+
'A native messaging host is a local manifest that tells a browser extension which executable it may launch on the operator machine. That bridge sits outside the browser sandbox, so it deserves the same review discipline teams use for deploy credentials or production write access.',
|
|
281
|
+
'The risk is not only the host binary itself. It is the combination of extension permissions, allowed origins, and whether the host remains registered for browsers the operator did not intentionally connect.',
|
|
282
|
+
]],
|
|
283
|
+
['bullets', 'Signals ThumbGate audits', [
|
|
284
|
+
'Manifest files under browser-specific NativeMessagingHosts directories on macOS and Linux.',
|
|
285
|
+
'Allowed extension origins and extension-id fan-out per host registration.',
|
|
286
|
+
'Host binaries that are missing on disk, which leaves stale or broken registrations behind.',
|
|
287
|
+
'AI/browser bridge manifests registered for browsers not detected in the usual local install paths.',
|
|
288
|
+
]],
|
|
289
|
+
['paragraphs', 'The fastest operator action', [
|
|
290
|
+
'Run npx thumbgate native-messaging-audit --json in the repo or workstation you govern. Review every AI browser bridge, remove anything you did not intentionally integrate, and keep browser-use in ask-before-acting mode until connector scope is explicit and revocable.',
|
|
291
|
+
]],
|
|
292
|
+
],
|
|
293
|
+
faq: [
|
|
294
|
+
[
|
|
295
|
+
'Why does native messaging deserve a separate security review?',
|
|
296
|
+
'Because it lets a browser extension hand work to a local executable outside the browser sandbox. That is a different trust boundary than ordinary page automation or side-panel UI access.',
|
|
297
|
+
],
|
|
298
|
+
[
|
|
299
|
+
'How does ThumbGate help with native messaging host security?',
|
|
300
|
+
'ThumbGate audits known host locations, highlights AI/browser bridges, flags stale or missing host binaries, and gives teams an enforcement layer so future connector expansion requires explicit approval.',
|
|
301
|
+
],
|
|
302
|
+
],
|
|
303
|
+
relatedPaths: ['/guides/browser-automation-safety', '/guides/pre-action-gates'],
|
|
304
|
+
},
|
|
305
|
+
]);
|
|
306
|
+
|
|
307
|
+
function buildBrowserBridgeGuide(spec) {
|
|
308
|
+
return preActionGuide(spec.slug, {
|
|
309
|
+
...spec.meta,
|
|
310
|
+
takeaways: spec.takeaways,
|
|
311
|
+
sections: spec.sections.map(([kind, heading, entries]) => buildSectionFromSpec(kind, heading, entries)),
|
|
312
|
+
faq: spec.faq.map(([question, text]) => answer(question, text)),
|
|
313
|
+
relatedPaths: spec.relatedPaths,
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
|
|
141
317
|
const PAGE_BLUEPRINTS = [
|
|
142
318
|
{
|
|
143
319
|
query: 'thumbgate vs speclock',
|
|
@@ -288,6 +464,7 @@ const PAGE_BLUEPRINTS = [
|
|
|
288
464
|
],
|
|
289
465
|
relatedPaths: ['/compare/speclock', '/guides/claude-code-feedback'],
|
|
290
466
|
},
|
|
467
|
+
buildHarnessOptimizationGuide(),
|
|
291
468
|
{
|
|
292
469
|
query: 'stop ai coding agents from repeating mistakes',
|
|
293
470
|
path: '/guides/stop-repeated-ai-agent-mistakes',
|
|
@@ -529,6 +706,7 @@ const PAGE_BLUEPRINTS = [
|
|
|
529
706
|
],
|
|
530
707
|
relatedPaths: ['/compare/mem0', '/guides/stop-repeated-ai-agent-mistakes'],
|
|
531
708
|
},
|
|
709
|
+
...BROWSER_BRIDGE_GUIDE_SPECS.map(buildBrowserBridgeGuide),
|
|
532
710
|
guideBlueprint({
|
|
533
711
|
query: 'autoresearch agent safety',
|
|
534
712
|
path: '/guides/autoresearch-agent-safety',
|
|
@@ -749,7 +927,7 @@ function classifyIntent(query) {
|
|
|
749
927
|
return 'commercial';
|
|
750
928
|
}
|
|
751
929
|
if (/\b(what is|how to|guide|best practices|why)\b/.test(normalized)) return 'informational';
|
|
752
|
-
if (/\b(guardrails|pre-action gates|feedback|prevent repeated mistakes|repeating mistakes|memory)\b/.test(normalized)) {
|
|
930
|
+
if (/\b(guardrails|pre-action gates|feedback|prevent repeated mistakes|repeating mistakes|memory|harness optimization)\b/.test(normalized)) {
|
|
753
931
|
return 'commercial';
|
|
754
932
|
}
|
|
755
933
|
return 'informational';
|
|
@@ -759,7 +937,7 @@ function inferPillar(query) {
|
|
|
759
937
|
const normalized = normalizeText(query).toLowerCase();
|
|
760
938
|
if (/\b(speclock|mem0|alternative|vs|compare|comparison)\b/.test(normalized)) return 'comparison';
|
|
761
939
|
if (/\b(thumbs up|thumbs down|feedback|reinforce|mistake)\b/.test(normalized)) return 'feedback-loop';
|
|
762
|
-
if (/\b(autoresearch|self-improving|benchmark|reward hacking)\b/.test(normalized)) return 'pre-action-gates';
|
|
940
|
+
if (/\b(autoresearch|self-improving|benchmark|reward hacking|harness optimization|browser automation|native messaging|browser bridge|prompt injection)\b/.test(normalized)) return 'pre-action-gates';
|
|
763
941
|
if (/\b(pre-action gates|guardrails|block|prevent repeated mistakes|repeating mistakes)\b/.test(normalized)) return 'pre-action-gates';
|
|
764
942
|
if (/\b(claude code|cursor|codex|gemini|amp|opencode|integration|plugin)\b/.test(normalized)) return 'agent-workflows';
|
|
765
943
|
return 'ai-agent-reliability';
|
|
@@ -1139,6 +1317,7 @@ function renderSeoPageHtml(page, runtimeConfig = {}) {
|
|
|
1139
1317
|
<meta property="og:type" content="article" />
|
|
1140
1318
|
<meta property="og:url" content="${escapeHtml(canonicalUrl)}" />
|
|
1141
1319
|
<link rel="canonical" href="${escapeHtml(canonicalUrl)}" />
|
|
1320
|
+
<link rel="llm-context" href="/public/llm-context.md" type="text/markdown" />
|
|
1142
1321
|
<link rel="icon" type="image/svg+xml" href="/thumbgate-icon.png" />
|
|
1143
1322
|
<link rel="apple-touch-icon" href="/assets/brand/thumbgate-mark.svg" />
|
|
1144
1323
|
<meta property="og:image" content="/og.png" />
|
|
@@ -1379,6 +1558,7 @@ ${renderWebPageJsonLd(page, { appOrigin })}
|
|
|
1379
1558
|
<p><strong>Opportunity score:</strong> ${page.opportunityScore}</p>
|
|
1380
1559
|
<p><strong>Primary persona:</strong> ${escapeHtml(page.persona)}</p>
|
|
1381
1560
|
<p><strong>Keyword cluster:</strong> ${escapeHtml(page.keywordCluster.join(', '))}</p>
|
|
1561
|
+
<p><strong>Pricing:</strong> Pro $19/mo or $149/yr. Team $49/seat/mo.</p>
|
|
1382
1562
|
<div class="proof-links">${proofLinks}</div>
|
|
1383
1563
|
<a class="cta-button" href="${escapeHtml(page.cta.href)}" target="_blank" rel="noopener">${escapeHtml(page.cta.label)}</a>
|
|
1384
1564
|
</div>
|
package/scripts/tool-registry.js
CHANGED
|
@@ -841,6 +841,18 @@ const TOOLS = [
|
|
|
841
841
|
properties: {},
|
|
842
842
|
},
|
|
843
843
|
}),
|
|
844
|
+
readOnlyTool({
|
|
845
|
+
name: 'native_messaging_audit',
|
|
846
|
+
description: 'Audit local browser native messaging hosts and AI browser bridges. Flags missing host binaries, pre-authorized extension bridges, and manifests for browsers not detected locally.',
|
|
847
|
+
inputSchema: {
|
|
848
|
+
type: 'object',
|
|
849
|
+
properties: {
|
|
850
|
+
platform: { type: 'string', enum: ['darwin', 'linux', 'win32'], description: 'Optional platform override for manifest discovery.' },
|
|
851
|
+
homeDir: { type: 'string', description: 'Optional home-directory override for manifest discovery.' },
|
|
852
|
+
aiOnly: { type: 'boolean', description: 'When true, only AI/browser bridge manifests are returned.' },
|
|
853
|
+
},
|
|
854
|
+
},
|
|
855
|
+
}),
|
|
844
856
|
readOnlyTool({
|
|
845
857
|
name: 'commerce_recall',
|
|
846
858
|
description: 'Recall past feedback filtered by commerce categories (product_recommendation, brand_compliance, sizing, pricing, regulatory). Returns quality scores alongside memories for agentic commerce agents.',
|
|
@@ -94,7 +94,7 @@ Bounded retrieval of relevant feedback history for the current task. The agent g
|
|
|
94
94
|
| Seats | 1 | 1 | Per-seat |
|
|
95
95
|
| Price | $0 | $19/mo | $49/seat/mo |
|
|
96
96
|
|
|
97
|
-
Start a 7-day free trial of Pro: <https://
|
|
97
|
+
Start a 7-day free trial of Pro: <https://thumbgate-production.up.railway.app/go/pro?utm_source=skill>
|
|
98
98
|
|
|
99
99
|
## Compatibility
|
|
100
100
|
|
package/src/api/server.js
CHANGED
|
@@ -97,6 +97,7 @@ const {
|
|
|
97
97
|
samplePosteriors,
|
|
98
98
|
} = require('../../scripts/thompson-sampling');
|
|
99
99
|
const {
|
|
100
|
+
appendFunnelEvent,
|
|
100
101
|
createCheckoutSession,
|
|
101
102
|
getCheckoutSessionStatus,
|
|
102
103
|
provisionApiKey,
|
|
@@ -225,6 +226,7 @@ const GUIDE_PAGE_PATH = path.resolve(__dirname, '../../public/guide.html');
|
|
|
225
226
|
const CODEX_PLUGIN_PAGE_PATH = path.resolve(__dirname, '../../public/codex-plugin.html');
|
|
226
227
|
const COMPARE_PAGE_PATH = path.resolve(__dirname, '../../public/compare.html');
|
|
227
228
|
const LEARN_PAGE_PATH = path.resolve(__dirname, '../../public/learn.html');
|
|
229
|
+
const NUMBERS_PAGE_PATH = path.resolve(__dirname, '../../public/numbers.html');
|
|
228
230
|
const LEARN_DIR = path.resolve(__dirname, '../../public/learn');
|
|
229
231
|
const GUIDES_DIR = path.resolve(__dirname, '../../public/guides');
|
|
230
232
|
const COMPARE_DIR = path.resolve(__dirname, '../../public/compare');
|
|
@@ -2161,6 +2163,37 @@ function servePublicMarketingPage({
|
|
|
2161
2163
|
'landing_page_view'
|
|
2162
2164
|
);
|
|
2163
2165
|
|
|
2166
|
+
// Funnel-ledger write (2026-04-21): populate funnel-events.jsonl with a
|
|
2167
|
+
// discovery-stage event on every landing-page view so UTM-tagged social
|
|
2168
|
+
// traffic becomes visible in `npm run feedback:summary` and
|
|
2169
|
+
// `bin/cli.js cfo --today`. Prior to this wire, landing views wrote only
|
|
2170
|
+
// to telemetry-pings.jsonl (invisible to the CEO-facing revenue surface),
|
|
2171
|
+
// leaving funnel-events.jsonl empty despite 404 published Zernio posts.
|
|
2172
|
+
// Best-effort: wrapped in try/catch so a billing-ledger hiccup never
|
|
2173
|
+
// breaks a page render.
|
|
2174
|
+
try {
|
|
2175
|
+
appendFunnelEvent({
|
|
2176
|
+
stage: 'discovery',
|
|
2177
|
+
event: 'landing_view',
|
|
2178
|
+
installId: journeyState.visitorId || null,
|
|
2179
|
+
traceId: journeyState.acquisitionId || null,
|
|
2180
|
+
evidence: landingAttribution.landingPath || 'landing_view',
|
|
2181
|
+
metadata: {
|
|
2182
|
+
page: extraTelemetry.pageType || landingAttribution.page || 'landing',
|
|
2183
|
+
utmSource: landingAttribution.utmSource || null,
|
|
2184
|
+
utmMedium: landingAttribution.utmMedium || null,
|
|
2185
|
+
utmCampaign: landingAttribution.utmCampaign || null,
|
|
2186
|
+
utmContent: landingAttribution.utmContent || null,
|
|
2187
|
+
utmTerm: landingAttribution.utmTerm || null,
|
|
2188
|
+
referrerHost: landingAttribution.referrerHost || null,
|
|
2189
|
+
sessionId: journeyState.sessionId || null,
|
|
2190
|
+
},
|
|
2191
|
+
});
|
|
2192
|
+
} catch {
|
|
2193
|
+
// Funnel ledger is best-effort on page render; telemetry-pings remains
|
|
2194
|
+
// the authoritative observability path if the ledger write fails.
|
|
2195
|
+
}
|
|
2196
|
+
|
|
2164
2197
|
if (isSeoAttributionSource(landingAttribution.source)) {
|
|
2165
2198
|
appendBestEffortTelemetry(FEEDBACK_DIR, {
|
|
2166
2199
|
eventType: 'seo_landing_view',
|
|
@@ -3777,6 +3810,26 @@ async function addContext(){
|
|
|
3777
3810
|
return;
|
|
3778
3811
|
}
|
|
3779
3812
|
|
|
3813
|
+
if (isGetLikeRequest && (pathname === '/numbers' || pathname === '/numbers.html')) {
|
|
3814
|
+
// Route through servePublicMarketingPage so landing_page_view telemetry
|
|
3815
|
+
// + funnel-events.jsonl `discovery/landing_view` get captured with UTM
|
|
3816
|
+
// attribution — critical for Zernio social CTAs that target /numbers.
|
|
3817
|
+
try {
|
|
3818
|
+
servePublicMarketingPage({
|
|
3819
|
+
req,
|
|
3820
|
+
res,
|
|
3821
|
+
parsed,
|
|
3822
|
+
hostedConfig,
|
|
3823
|
+
isHeadRequest,
|
|
3824
|
+
renderHtml: () => fs.readFileSync(NUMBERS_PAGE_PATH, 'utf-8'),
|
|
3825
|
+
extraTelemetry: { pageType: 'numbers' },
|
|
3826
|
+
});
|
|
3827
|
+
} catch {
|
|
3828
|
+
sendJson(res, 404, { error: 'Numbers page not found' });
|
|
3829
|
+
}
|
|
3830
|
+
return;
|
|
3831
|
+
}
|
|
3832
|
+
|
|
3780
3833
|
if (isGetLikeRequest && pathname === '/learn/learn.css') {
|
|
3781
3834
|
try {
|
|
3782
3835
|
const cssPath = path.join(LEARN_DIR, 'learn.css');
|