thumbgate 1.17.0 โ†’ 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "thumbgate-marketplace",
3
- "version": "1.17.0",
3
+ "version": "1.18.0",
4
4
  "owner": {
5
5
  "name": "Igor Ganapolsky",
6
6
  "email": "ig5973700@gmail.com"
@@ -13,7 +13,7 @@
13
13
  "source": "npm",
14
14
  "package": "thumbgate"
15
15
  },
16
- "version": "1.17.0",
16
+ "version": "1.18.0",
17
17
  "author": {
18
18
  "name": "Igor Ganapolsky"
19
19
  },
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "thumbgate",
3
3
  "description": "Type ๐Ÿ‘ or ๐Ÿ‘Ž on any agent action. ThumbGate captures it, distills a lesson, and blocks the pattern from repeating. One thumbs-down = the agent physically cannot make that mistake again. 33 pre-action checks, budget enforcement, self-protection, and NIST/SOC2 compliance tags.",
4
- "version": "1.17.0",
4
+ "version": "1.18.0",
5
5
  "author": {
6
6
  "name": "Igor Ganapolsky"
7
7
  },
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "thumbgate",
3
- "version": "1.17.0",
3
+ "version": "1.18.0",
4
4
  "description": "ThumbGate โ€” ๐Ÿ‘๐Ÿ‘Ž feedback that teaches your AI agent. Thumbs down a mistake, it never happens again.",
5
5
  "homepage": "https://thumbgate-production.up.railway.app",
6
6
  "transport": "stdio",
@@ -2,13 +2,13 @@
2
2
  "mcpServers": {
3
3
  "thumbgate": {
4
4
  "command": "npx",
5
- "args": ["--yes", "--package", "thumbgate@1.17.0", "thumbgate", "serve"]
5
+ "args": ["--yes", "--package", "thumbgate@1.18.0", "thumbgate", "serve"]
6
6
  }
7
7
  },
8
8
  "hooks": {
9
9
  "preToolUse": {
10
10
  "command": "npx",
11
- "args": ["--yes", "--package", "thumbgate@1.17.0", "thumbgate", "gate-check"]
11
+ "args": ["--yes", "--package", "thumbgate@1.18.0", "thumbgate", "gate-check"]
12
12
  }
13
13
  }
14
14
  }
@@ -216,7 +216,7 @@ const {
216
216
  finalizeSession: finalizeFeedbackSession,
217
217
  } = require('../../scripts/feedback-session');
218
218
 
219
- const SERVER_INFO = { name: 'thumbgate-mcp', version: '1.17.0' };
219
+ const SERVER_INFO = { name: 'thumbgate-mcp', version: '1.18.0' };
220
220
  const COMMERCE_CATEGORIES = [
221
221
  'product_recommendation',
222
222
  'brand_compliance',
@@ -7,7 +7,7 @@
7
7
  "npx",
8
8
  "--yes",
9
9
  "--package",
10
- "thumbgate@1.17.0",
10
+ "thumbgate@1.18.0",
11
11
  "thumbgate",
12
12
  "serve"
13
13
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "thumbgate",
3
- "version": "1.17.0",
3
+ "version": "1.18.0",
4
4
  "description": "ThumbGate self-improving agent governance: thumbs-up/down turns every mistake into a prevention rule and blocks repeat patterns. 33 pre-action checks, budget enforcement, and self-protection for Claude Code, Cursor, Codex, Gemini CLI, and Amp.",
5
5
  "homepage": "https://thumbgate-production.up.railway.app",
6
6
  "repository": {
@@ -110,6 +110,7 @@
110
110
  "scripts/feedback-loop.js",
111
111
  "scripts/feedback-paths.js",
112
112
  "scripts/feedback-quality.js",
113
+ "scripts/feedback_quality_eval.py",
113
114
  "scripts/feedback-schema.js",
114
115
  "scripts/feedback-session.js",
115
116
  "scripts/feedback-to-rules.js",
@@ -346,6 +347,7 @@
346
347
  "test:telemetry-tracked-link-slug": "node --test tests/telemetry-tracked-link-slug.test.js",
347
348
  "test:prompt-eval": "node --test tests/prompt-eval.test.js",
348
349
  "eval:feedback": "node scripts/prompt-eval.js --from-feedback",
350
+ "eval:feedback-quality": "python3 scripts/feedback_quality_eval.py",
349
351
  "test:decision-trace": "node --test tests/decision-trace.test.js",
350
352
  "test:feedback-fallback": "node --test tests/feedback-fallback.test.js",
351
353
  "test:metaclaw": "node --test tests/metaclaw-features.test.js",
@@ -406,7 +408,7 @@
406
408
  "test:e2e": "node --test tests/e2e-pipeline.test.js tests/e2e-product-flows.test.js tests/e2e-coverage-contract.test.js",
407
409
  "test:rlaif": "node --test tests/rlaif-self-audit.test.js tests/dpo-optimizer.test.js tests/meta-policy.test.js tests/agent-reward-model.test.js",
408
410
  "test:attribution": "node --test tests/feedback-attribution.test.js tests/hybrid-feedback-context.test.js",
409
- "test:quality": "node --test tests/validate-feedback.test.js",
411
+ "test:quality": "node --test tests/validate-feedback.test.js tests/feedback-quality-eval-python.test.js",
410
412
  "test:intelligence": "node --test tests/intelligence.test.js",
411
413
  "test:training-export": "node --test tests/training-export.test.js tests/databricks-export.test.js",
412
414
  "test:deployment": "node --test tests/deployment.test.js tests/deploy-policy.test.js tests/publish-decision.test.js tests/changeset-check.test.js tests/release-notes.test.js tests/sonarcloud-workflow.test.js tests/package-boundary.test.js tests/public-package-boundary.test.js tests/revenue-observability-workflow.test.js",
@@ -676,7 +678,7 @@
676
678
  "dependencies": {
677
679
  "@anthropic-ai/sdk": "0.92.0",
678
680
  "@google/genai": "1.49.0",
679
- "@huggingface/transformers": "^4.1.0",
681
+ "@huggingface/transformers": "^4.2.0",
680
682
  "@lancedb/lancedb": "^0.27.2",
681
683
  "apache-arrow": "^18.1.0",
682
684
  "better-sqlite3": "^12.9.0",
@@ -692,7 +694,7 @@
692
694
  },
693
695
  "mcpName": "io.github.IgorGanapolsky/thumbgate",
694
696
  "devDependencies": {
695
- "@changesets/changelog-github": "^0.6.0",
697
+ "@changesets/changelog-github": "^0.7.0",
696
698
  "@changesets/cli": "^2.31.0",
697
699
  "c8": "^11.0.0",
698
700
  "undici": "^8.2.0"
package/public/index.html CHANGED
@@ -19,7 +19,7 @@ __GOOGLE_SITE_VERIFICATION_META__
19
19
  <meta property="og:image" content="https://thumbgate-production.up.railway.app/og.png">
20
20
  <meta name="twitter:card" content="summary_large_image">
21
21
  <meta name="twitter:image" content="https://thumbgate-production.up.railway.app/og.png">
22
- <meta name="thumbgate-version" content="1.17.0">
22
+ <meta name="thumbgate-version" content="1.18.0">
23
23
  <meta name="keywords" content="ThumbGate, thumbgate, AI agent orchestration, AI experience orchestration, agent enforcement layer, save LLM tokens, reduce Claude API cost, reduce OpenAI cost, AI agent token savings, prevent LLM retries, prevent hallucination retries, stop AI token waste, pre-action checks, agent governance, Claude Code, Cursor, Codex, Gemini, Amp, Cline, OpenCode, workflow hardening, context engineering, AI authenticity, brand authenticity AI">
24
24
  <link rel="apple-touch-icon" href="/apple-touch-icon.png">
25
25
 
@@ -1231,26 +1231,26 @@ __GA_BOOTSTRAP__
1231
1231
  <a href="https://www.npmjs.com/package/thumbgate" target="_blank" rel="noopener" class="btn-free">Install Free</a>
1232
1232
  </div>
1233
1233
  <div class="price-card pro" data-price-dollars="19">
1234
- <div class="tier">Solo Pro</div>
1234
+ <div class="tier">Pro</div>
1235
1235
  <div class="price">$19<span style="font-size:16px;color:var(--text-dim)">/mo</span></div>
1236
- <div class="price-sub">For builders who want proof, exports, and unlimited local learning.</div>
1236
+ <div class="price-sub">Stop paying tokens to re-correct the same agent mistake across sessions.</div>
1237
1237
  <ul>
1238
- <li>Unlimited feedback captures and prevention rules</li>
1239
- <li>Lesson recall and search across sessions</li>
1240
- <li><a href="/dashboard#insights" style="color:var(--cyan);text-decoration:underline;">Visual check debugger</a> for every blocked action and the check that fired</li>
1241
- <li>Auto-connect so supported agents appear automatically after setup</li>
1242
- <li><a href="/dashboard#export" style="color:var(--cyan);text-decoration:underline;">DPO training data export</a> with ready-to-use preference pairs for fine-tuning</li>
1243
- <li>Personal local dashboard for the individual operator</li>
1244
- <li>Model Hardening Advisor plus HuggingFace dataset export</li>
1245
- <li>Review-ready workflow support and proof-ready lesson bundles</li>
1246
- <li>Team lesson export/import for handoff or migration</li>
1238
+ <li><strong>Block every repeat mistake</strong> โ€” unlimited feedback captures and prevention rules (Free caps at 5 active rules)</li>
1239
+ <li><strong>Never re-explain a correction</strong> โ€” lesson recall and search across sessions on every agent surface</li>
1240
+ <li><strong>See exactly which rule fired</strong> โ€” <a href="/dashboard#insights" style="color:var(--cyan);text-decoration:underline;">Visual check debugger</a> for every blocked action and the check that fired</li>
1241
+ <li><strong>One install, every agent</strong> โ€” Auto-connect so supported agents appear automatically after setup (Claude Code, Cursor, Codex, Gemini, Amp, Cline, OpenCode)</li>
1242
+ <li><strong>Fine-tune your local model</strong> on what your team actually wants โ€” <a href="/dashboard#export" style="color:var(--cyan);text-decoration:underline;">DPO training data export</a> with ready-to-use preference pairs for fine-tuning</li>
1243
+ <li><strong>Audit-ready enforcement proof</strong> โ€” Personal local dashboard for the individual operator with auditable block history</li>
1244
+ <li><strong>Ship hardened agents to production</strong> โ€” Model Hardening Advisor plus HuggingFace dataset export</li>
1245
+ <li><strong>Hand a PR with proof</strong> โ€” Review-ready workflow support and proof-ready lesson bundles a reviewer can verify in 30 seconds</li>
1246
+ <li><strong>Hand off without re-onboarding</strong> โ€” Team lesson export/import for handoff or migration</li>
1247
1247
  </ul>
1248
1248
  <div style="margin:12px 0 16px;padding:12px;border:1px solid rgba(34,211,238,0.25);border-radius:8px;background:rgba(34,211,238,0.06);">
1249
1249
  <div style="font-size:12px;color:var(--text-muted);margin-bottom:4px;">What your Pro dashboard looks like</div>
1250
1250
  <div style="font-family:var(--mono);font-size:12px;color:var(--cyan);line-height:1.6;">checks: 36 active<br>feedback: unlimited<br>exports: DPO + lessons</div>
1251
1251
  </div>
1252
1252
  <div style="font-size:11px;letter-spacing:0.08em;text-transform:uppercase;color:var(--cyan);font-weight:800;margin-bottom:8px;">PAY-NOW PRO</div>
1253
- <a href="/checkout/pro?confirm=1&utm_source=pricing&utm_medium=cta&utm_campaign=v2_landing&utm_content=pricing_pro" id="pro-checkout-link" class="btn-pro" onclick="handleProCheckout();return false;" style="display:block;width:100%;text-align:center;padding:12px;font-size:15px;">Upgrade to Pro โ€” $19/mo</a>
1253
+ <a href="/checkout/pro?utm_source=pricing&utm_medium=cta&utm_campaign=v2_landing&utm_content=pricing_pro&plan_id=pro" id="pro-checkout-link" class="btn-pro" onclick="handleProCheckout();return false;" style="display:block;width:100%;text-align:center;padding:12px;font-size:15px;">Upgrade to Pro โ€” $19/mo</a>
1254
1254
  <p style="font-size:11px;color:var(--text-muted);margin-top:8px;text-align:center;">Billed today ยท cancel anytime.</p>
1255
1255
  <input type="email" id="pro-email" data-buyer-email placeholder="you@company.com" style="margin-top:10px;width:100%;padding:10px 12px;border:1px solid var(--border);border-radius:8px;background:var(--bg-raised);color:var(--text);font-size:14px;">
1256
1256
  </div>
@@ -1267,8 +1267,9 @@ __GA_BOOTSTRAP__
1267
1267
  <li>Workflow hardening sprint intake for approval boundaries and rollback safety</li>
1268
1268
  <li>Email support during pilot rollout</li>
1269
1269
  </ul>
1270
- <a href="#workflow-sprint-intake" class="btn-team" style="display:block;text-align:center;">Start Workflow Hardening Sprint</a>
1271
- <p style="font-size:11px;color:var(--text-muted);margin-top:8px;text-align:center;">No self-serve trial. Team is $49/seat/mo with a 3-seat minimum and starts with the Workflow Hardening Sprint intake, not a self-serve trial.</p>
1270
+ <a href="/checkout/pro?plan_id=team&amp;seat_count=3&amp;confirm=1&amp;utm_source=website&amp;utm_medium=pricing&amp;utm_campaign=team_self_serve&amp;cta_id=pricing_team_self_serve&amp;cta_placement=pricing&amp;landing_path=%2F" onclick="try{posthog.capture('team_self_serve_click',{cta:'team_self_serve',seats:3,price:147})}catch(_){};sendFirstPartyTelemetry('team_self_serve_checkout_started',{ctaId:'pricing_team_self_serve',ctaPlacement:'pricing',planId:'team',seatCount:3,price:147});sendGa4Event('begin_checkout',{currency:'USD',value:147,items:[{item_id:'team_monthly',item_name:'ThumbGate Team (3 seats)',quantity:3}]});" class="btn-team" style="display:block;text-align:center;">Start 3-seat Team โ€” $147/mo</a>
1271
+ <a href="#workflow-sprint-intake" style="display:block;text-align:center;margin-top:8px;font-size:13px;color:var(--text-dim);text-decoration:none;">Or qualify first via Workflow Hardening Sprint intake โ†’</a>
1272
+ <p style="font-size:11px;color:var(--text-muted);margin-top:8px;text-align:center;">Team is $49/seat/mo with a 3-seat minimum. Self-serve checkout starts you at $147/mo for 3 seats; add more seats from the dashboard. Prefer to qualify the workflow before paying? Start with the intake.</p>
1272
1273
  </div>
1273
1274
  </div>
1274
1275
  <p style="text-align:center;color:var(--text-muted);font-size:13px;margin:40px 0;">Need a custom diagnostic, sprint, or setup? <a href="#workflow-sprint-intake" style="color:var(--cyan);text-decoration:none;">Send workflow first โ†’</a></p>
@@ -1464,7 +1465,7 @@ __GA_BOOTSTRAP__
1464
1465
  <a href="https://www.linkedin.com/in/igorganapolsky" target="_blank" rel="noopener">LinkedIn</a>
1465
1466
  <a href="/blog">Blog</a>
1466
1467
  </div>
1467
- <span class="footer-copy">ยฉ 2026 ThumbGate ยท MIT License ยท npm v1.17.0</span>
1468
+ <span class="footer-copy">ยฉ 2026 ThumbGate ยท MIT License ยท npm v1.18.0</span>
1468
1469
  </div>
1469
1470
  </footer>
1470
1471
 
@@ -25,7 +25,7 @@
25
25
  "alternateName": "thumbgate",
26
26
  "applicationCategory": "DeveloperApplication",
27
27
  "operatingSystem": "Cross-platform, Node.js >=18.18.0",
28
- "softwareVersion": "1.17.0",
28
+ "softwareVersion": "1.18.0",
29
29
  "url": "https://thumbgate-production.up.railway.app/numbers",
30
30
  "dateModified": "2026-05-07",
31
31
  "creator": {
@@ -202,7 +202,7 @@
202
202
  <main class="container">
203
203
  <h1>The Numbers</h1>
204
204
  <p class="subtitle">Generated first-party operational snapshot from the ThumbGate runtime. This is not customer traction, install volume, revenue, or proof that a configured gate has fired.</p>
205
- <div class="freshness">Updated: 2026-05-07 ยท Version 1.17.0</div>
205
+ <div class="freshness">Updated: 2026-05-07 ยท Version 1.18.0</div>
206
206
  <div class="truth-note"><strong>Read this first:</strong> configured checks are inventory. Recorded blocks and warnings are usage evidence. This snapshot currently reports 0 recorded hard-block event(s) and 0 recorded warning event(s).</div>
207
207
 
208
208
  <h2>Gate enforcement</h2>
@@ -6,7 +6,10 @@ const path = require('path');
6
6
  const { resolveFeedbackDir } = require('./feedback-paths');
7
7
 
8
8
  const MAX_AUTO_GATES = 10;
9
- const WARN_THRESHOLD = 2; // 2+ repeated failures surface a warning gate
9
+ // 1+ failure auto-promotes to a warning gate. Cold buyers expect "one ๐Ÿ‘Ž โ†’ blocked next time"
10
+ // โ€” a 2-capture threshold made first-capture invisible and broke the activation loop. Block
11
+ // escalation still requires 3 captures (BLOCK_THRESHOLD) so noise doesn't auto-hard-block.
12
+ const WARN_THRESHOLD = 1;
10
13
  const BLOCK_THRESHOLD = 3; // 3+ repeated failures hard-block the action
11
14
  const WINDOW_DAYS = 30;
12
15
 
@@ -32,7 +32,17 @@ function normalize(ctx) {
32
32
  return (ctx || '').replace(/\/Users\/[^\s/]+/g, '~').replace(/:[0-9]+/g, '').toLowerCase().trim();
33
33
  }
34
34
 
35
- const HIGH_RISK_TAGS = new Set(['git-workflow', 'scope-control', 'trust-breach', 'execution-gap', 'regression', 'security']);
35
+ // HIGH_RISK_TAGS triggers single-capture promotion (count >= 1 && hasHighRisk).
36
+ // Tags here MUST overlap with what inferSemanticTags() actually emits (see scripts/feedback-loop.js)
37
+ // โ€” otherwise cold buyers' first ๐Ÿ‘Ž stays a lesson and never becomes a gate.
38
+ const HIGH_RISK_TAGS = new Set([
39
+ // Original semantic-category labels
40
+ 'git-workflow', 'scope-control', 'trust-breach', 'execution-gap', 'regression', 'security',
41
+ // Tags inferSemanticTags() emits for destructive / irreversible operations
42
+ 'destructive', 'force-push', 'delete', 'drop', 'force-overwrite',
43
+ 'production', 'database', 'payment', 'credentials', 'secrets',
44
+ 'rm-rf', 'reset-hard', 'truncate', 'data-loss',
45
+ ]);
36
46
  function analyze(entries) {
37
47
  let positiveCount = 0, negativeCount = 0;
38
48
  const categories = {};
@@ -0,0 +1,725 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Offline feedback quality evaluation for ThumbGate.
4
+
5
+ This is intentionally stdlib-only. It turns feedback-log.jsonl into a small
6
+ quality report that answers: where are repeated failures clustering, how stable
7
+ is the signal, and do we have enough labeled gate decisions to compute true
8
+ precision/recall yet?
9
+ """
10
+
11
+ import argparse
12
+ import json
13
+ import math
14
+ import os
15
+ import re
16
+ import sqlite3
17
+ from collections import Counter, defaultdict
18
+ from datetime import datetime, timezone
19
+ from pathlib import Path
20
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
21
+
22
+
23
+ PROJECT_ROOT = Path(__file__).parent.parent
24
+
25
+ DEFAULT_CATEGORIES = {
26
+ "code_edit": {
27
+ "keywords": ["edit", "write", "implement", "refactor", "fix", "update", "create file"],
28
+ "tools": ["edit", "write", "multiedit"],
29
+ },
30
+ "git": {
31
+ "keywords": ["commit", "push", "branch", "merge", "pr", "pull request", "rebase", "cherry-pick"],
32
+ "tools": ["bash", "git"],
33
+ },
34
+ "testing": {
35
+ "keywords": ["test", "jest", "coverage", "verify", "verification", "spec", "mock", "assert"],
36
+ "tools": [],
37
+ },
38
+ "review": {
39
+ "keywords": ["review", "pr comment", "resolve", "thread", "feedback"],
40
+ "tools": [],
41
+ },
42
+ "search": {
43
+ "keywords": ["search", "find", "grep", "glob", "explore", "where is", "look for", "rg"],
44
+ "tools": ["grep", "glob", "read", "rg"],
45
+ },
46
+ "security": {
47
+ "keywords": ["security", "secret", "credential", "token", "auth", "injection", "xss"],
48
+ "tools": [],
49
+ },
50
+ "debugging": {
51
+ "keywords": ["debug", "error", "crash", "stack trace", "log", "diagnose", "investigate"],
52
+ "tools": [],
53
+ },
54
+ }
55
+
56
+
57
+ def resolve_feedback_dir() -> Path:
58
+ env_dir = os.environ.get("THUMBGATE_FEEDBACK_DIR")
59
+ if env_dir:
60
+ return Path(env_dir)
61
+
62
+ local_thumbgate = PROJECT_ROOT / ".thumbgate"
63
+ if local_thumbgate.exists():
64
+ return local_thumbgate
65
+
66
+ local_legacy = PROJECT_ROOT / ".claude" / "memory" / "feedback"
67
+ if local_legacy.exists():
68
+ return local_legacy
69
+
70
+ return Path.home() / ".thumbgate" / "projects" / PROJECT_ROOT.name
71
+
72
+
73
+ def read_jsonl(path: Path) -> Tuple[List[Dict[str, Any]], int]:
74
+ rows: List[Dict[str, Any]] = []
75
+ invalid = 0
76
+ if not path.exists():
77
+ return rows, invalid
78
+
79
+ with path.open("r", encoding="utf-8") as handle:
80
+ for raw in handle:
81
+ line = raw.strip()
82
+ if not line:
83
+ continue
84
+ try:
85
+ parsed = json.loads(line)
86
+ except json.JSONDecodeError:
87
+ invalid += 1
88
+ continue
89
+ if isinstance(parsed, dict):
90
+ rows.append(parsed)
91
+ else:
92
+ invalid += 1
93
+ return rows, invalid
94
+
95
+
96
+ def load_sqlite_lessons(db_path: Optional[Path]) -> Dict[str, Any]:
97
+ if not db_path:
98
+ return {
99
+ "available": False,
100
+ "path": None,
101
+ "totalLessons": 0,
102
+ "bySignal": {},
103
+ "byDomain": {},
104
+ "sourceFeedbackIds": [],
105
+ "error": None,
106
+ }
107
+ if not db_path.exists():
108
+ return {
109
+ "available": False,
110
+ "path": str(db_path),
111
+ "totalLessons": 0,
112
+ "bySignal": {},
113
+ "byDomain": {},
114
+ "sourceFeedbackIds": [],
115
+ "error": "SQLite lesson DB does not exist.",
116
+ }
117
+
118
+ try:
119
+ connection = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
120
+ connection.row_factory = sqlite3.Row
121
+ try:
122
+ table_exists = connection.execute(
123
+ "SELECT name FROM sqlite_master WHERE type='table' AND name='lessons'"
124
+ ).fetchone()
125
+ if not table_exists:
126
+ return {
127
+ "available": False,
128
+ "path": str(db_path),
129
+ "totalLessons": 0,
130
+ "bySignal": {},
131
+ "byDomain": {},
132
+ "sourceFeedbackIds": [],
133
+ "error": "SQLite DB does not contain a lessons table.",
134
+ }
135
+
136
+ rows = connection.execute(
137
+ "SELECT id, signal, domain, sourceFeedbackId FROM lessons WHERE pruned = 0"
138
+ ).fetchall()
139
+ finally:
140
+ connection.close()
141
+ except sqlite3.Error as exc:
142
+ return {
143
+ "available": False,
144
+ "path": str(db_path),
145
+ "totalLessons": 0,
146
+ "bySignal": {},
147
+ "byDomain": {},
148
+ "sourceFeedbackIds": [],
149
+ "error": str(exc),
150
+ }
151
+
152
+ by_signal = Counter(str(row["signal"] or "unknown") for row in rows)
153
+ by_domain = Counter(str(row["domain"] or "unknown") for row in rows)
154
+ source_ids = sorted({
155
+ str(row["sourceFeedbackId"])
156
+ for row in rows
157
+ if row["sourceFeedbackId"]
158
+ })
159
+ return {
160
+ "available": True,
161
+ "path": str(db_path),
162
+ "totalLessons": len(rows),
163
+ "bySignal": dict(sorted(by_signal.items())),
164
+ "byDomain": dict(sorted(by_domain.items())),
165
+ "sourceFeedbackIds": source_ids,
166
+ "error": None,
167
+ }
168
+
169
+
170
+ def normalize_signal(entry: Dict[str, Any]) -> Optional[str]:
171
+ raw = str(entry.get("signal") or entry.get("feedback") or "").strip().lower()
172
+ if raw in {"positive", "up", "thumbsup", "thumbs_up", "๐Ÿ‘"}:
173
+ return "positive"
174
+ if raw in {"negative", "down", "thumbsdown", "thumbs_down", "๐Ÿ‘Ž"}:
175
+ return "negative"
176
+
177
+ reward = entry.get("reward")
178
+ if isinstance(reward, (int, float)):
179
+ if reward > 0:
180
+ return "positive"
181
+ if reward < 0:
182
+ return "negative"
183
+ return None
184
+
185
+
186
+ def normalize_text(*values: Any) -> str:
187
+ parts = []
188
+ for value in values:
189
+ if value is None:
190
+ continue
191
+ if isinstance(value, list):
192
+ parts.extend(str(item) for item in value)
193
+ elif isinstance(value, dict):
194
+ parts.append(json.dumps(value, sort_keys=True))
195
+ else:
196
+ parts.append(str(value))
197
+ return " ".join(parts).lower()
198
+
199
+
200
+ def contains_keyword(text: str, keyword: str) -> bool:
201
+ normalized_keyword = keyword.lower().strip()
202
+ if not normalized_keyword:
203
+ return False
204
+ if len(normalized_keyword) <= 3 or re.fullmatch(r"[a-z0-9_+-]+", normalized_keyword):
205
+ return re.search(rf"(?<![a-z0-9_+-]){re.escape(normalized_keyword)}(?![a-z0-9_+-])", text) is not None
206
+ return normalized_keyword in text
207
+
208
+
209
+ def classify_entry(entry: Dict[str, Any]) -> List[str]:
210
+ tags = entry.get("tags") if isinstance(entry.get("tags"), list) else []
211
+ tool = entry.get("toolName") or entry.get("tool_name") or entry.get("last_tool")
212
+ text = normalize_text(
213
+ entry.get("context"),
214
+ entry.get("whatWentWrong"),
215
+ entry.get("whatToChange"),
216
+ entry.get("whatWorked"),
217
+ entry.get("actionReason"),
218
+ entry.get("failureType"),
219
+ tags,
220
+ )
221
+ tool_text = normalize_text(tool)
222
+
223
+ matched = []
224
+ for category, config in DEFAULT_CATEGORIES.items():
225
+ keyword_match = any(contains_keyword(text, keyword) for keyword in config["keywords"])
226
+ tool_match = any(contains_keyword(tool_text, tool_name) for tool_name in config["tools"])
227
+ if keyword_match or tool_match:
228
+ matched.append(category)
229
+
230
+ if not matched:
231
+ domain = entry.get("richContext", {}).get("domain") if isinstance(entry.get("richContext"), dict) else None
232
+ if isinstance(domain, str) and domain:
233
+ matched.append(domain)
234
+
235
+ return matched or ["uncategorized"]
236
+
237
+
238
+ def parse_timestamp(value: Any) -> Optional[datetime]:
239
+ if not isinstance(value, str) or not value:
240
+ return None
241
+ try:
242
+ normalized = value.replace("Z", "+00:00")
243
+ parsed = datetime.fromisoformat(normalized)
244
+ if parsed.tzinfo is None:
245
+ parsed = parsed.replace(tzinfo=timezone.utc)
246
+ return parsed
247
+ except ValueError:
248
+ return None
249
+
250
+
251
+ def rate(numerator: int, denominator: int) -> float:
252
+ return round(numerator / denominator, 4) if denominator else 0.0
253
+
254
+
255
+ def wilson_lower_bound(positive: int, total: int, z: float = 1.96) -> float:
256
+ if total <= 0:
257
+ return 0.0
258
+ p = positive / total
259
+ denom = 1 + z * z / total
260
+ centre = p + z * z / (2 * total)
261
+ spread = z * math.sqrt((p * (1 - p) + z * z / (4 * total)) / total)
262
+ return round((centre - spread) / denom, 4)
263
+
264
+
265
+ def summarize_bucket(name: str, values: Iterable[str], signals: List[str], min_support: int) -> List[Dict[str, Any]]:
266
+ counts: Dict[str, Counter] = defaultdict(Counter)
267
+ for bucket_value, signal in zip(values, signals):
268
+ counts[bucket_value][signal] += 1
269
+
270
+ rows = []
271
+ for bucket_value, counter in counts.items():
272
+ positive = counter["positive"]
273
+ negative = counter["negative"]
274
+ total = positive + negative
275
+ if total < min_support:
276
+ continue
277
+ rows.append({
278
+ name: bucket_value,
279
+ "support": total,
280
+ "positive": positive,
281
+ "negative": negative,
282
+ "positiveRate": rate(positive, total),
283
+ "negativeRate": rate(negative, total),
284
+ "wilsonPositiveLower": wilson_lower_bound(positive, total),
285
+ })
286
+
287
+ return sorted(rows, key=lambda row: (-row["negativeRate"], -row["support"], row[name]))
288
+
289
+
290
+ def explicit_gate_label(entry: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
291
+ """Return expected/actual labels when the log carries explicit gate labels.
292
+
293
+ expected: harmful/safe based on feedback signal.
294
+ actual: blocked/allowed from gate decision fields.
295
+ """
296
+ signal = normalize_signal(entry)
297
+ if not signal:
298
+ return None, None
299
+
300
+ expected = "harmful" if signal == "negative" else "safe"
301
+
302
+ for key in ("gateDecision", "decision", "outcome", "status"):
303
+ value = str(entry.get(key) or "").lower()
304
+ if value in {"block", "blocked", "deny", "denied", "rejected"}:
305
+ return expected, "blocked"
306
+ if value in {"allow", "allowed", "pass", "passed", "accepted"}:
307
+ return expected, "allowed"
308
+
309
+ if isinstance(entry.get("allowed"), bool):
310
+ return expected, "allowed" if entry["allowed"] else "blocked"
311
+ if isinstance(entry.get("blocked"), bool):
312
+ return expected, "blocked" if entry["blocked"] else "allowed"
313
+ if entry.get("actionType") == "no-action":
314
+ return expected, "blocked"
315
+
316
+ return expected, None
317
+
318
+
319
+ def compute_sqlite_metrics(entries: List[Dict[str, Any]], sqlite_lessons: Dict[str, Any]) -> Dict[str, Any]:
320
+ if not sqlite_lessons.get("available"):
321
+ return {
322
+ "available": False,
323
+ "path": sqlite_lessons.get("path"),
324
+ "totalLessons": 0,
325
+ "feedbackLessonCoverage": 0.0,
326
+ "negativeLessonCoverage": 0.0,
327
+ "bySignal": {},
328
+ "byDomain": {},
329
+ "error": sqlite_lessons.get("error"),
330
+ }
331
+
332
+ feedback_ids = {str(entry.get("id")) for entry in entries if entry.get("id")}
333
+ negative_ids = {
334
+ str(entry.get("id"))
335
+ for entry in entries
336
+ if entry.get("id") and normalize_signal(entry) == "negative"
337
+ }
338
+ lesson_feedback_ids = set(sqlite_lessons.get("sourceFeedbackIds") or [])
339
+
340
+ return {
341
+ "available": True,
342
+ "path": sqlite_lessons.get("path"),
343
+ "totalLessons": sqlite_lessons.get("totalLessons", 0),
344
+ "feedbackLessonCoverage": rate(len(feedback_ids & lesson_feedback_ids), len(feedback_ids)),
345
+ "negativeLessonCoverage": rate(len(negative_ids & lesson_feedback_ids), len(negative_ids)),
346
+ "bySignal": sqlite_lessons.get("bySignal") or {},
347
+ "byDomain": sqlite_lessons.get("byDomain") or {},
348
+ "error": None,
349
+ }
350
+
351
+
352
+ def retrieval_score(row: Dict[str, Any]) -> Optional[float]:
353
+ for key in ("score", "similarity", "distanceScore", "topSimilarity"):
354
+ value = row.get(key)
355
+ if isinstance(value, (int, float)) and math.isfinite(value):
356
+ return float(value)
357
+ try:
358
+ return float(value)
359
+ except (TypeError, ValueError):
360
+ continue
361
+ return None
362
+
363
+
364
+ def feedback_id_for_retrieval(row: Dict[str, Any]) -> Optional[str]:
365
+ for key in ("feedbackId", "sourceFeedbackId", "queryFeedbackId", "id"):
366
+ value = row.get(key)
367
+ if value:
368
+ return str(value)
369
+ return None
370
+
371
+
372
+ def unavailable_retrieval_metrics() -> Dict[str, Any]:
373
+ return {
374
+ "available": False,
375
+ "rows": 0,
376
+ "queries": 0,
377
+ "averageTopScore": None,
378
+ "negativeNeighborRate": None,
379
+ "error": None,
380
+ }
381
+
382
+
383
+ def bucket_retrieval_rows(retrieval_rows: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
384
+ by_feedback: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
385
+ for row in retrieval_rows:
386
+ feedback_id = feedback_id_for_retrieval(row) or "unknown"
387
+ by_feedback[feedback_id].append(row)
388
+ return by_feedback
389
+
390
+
391
+ def top_retrieval_scores(by_feedback: Dict[str, List[Dict[str, Any]]]) -> List[float]:
392
+ top_scores = []
393
+ for rows in by_feedback.values():
394
+ scores = [score for score in (retrieval_score(row) for row in rows) if score is not None]
395
+ if scores:
396
+ top_scores.append(max(scores))
397
+ return top_scores
398
+
399
+
400
+ def retrieval_neighbor_summary(retrieval_rows: List[Dict[str, Any]]) -> Dict[str, int]:
401
+ summary = {"labeled": 0, "negative": 0}
402
+ for row in retrieval_rows:
403
+ neighbor_signal = normalize_signal({
404
+ "signal": row.get("matchedSignal") or row.get("neighborSignal") or row.get("signal")
405
+ })
406
+ if not neighbor_signal:
407
+ continue
408
+ summary["labeled"] += 1
409
+ if neighbor_signal == "negative":
410
+ summary["negative"] += 1
411
+ return summary
412
+
413
+
414
+ def compute_retrieval_metrics(retrieval_rows: List[Dict[str, Any]]) -> Dict[str, Any]:
415
+ if not retrieval_rows:
416
+ return unavailable_retrieval_metrics()
417
+
418
+ by_feedback = bucket_retrieval_rows(retrieval_rows)
419
+ top_scores = top_retrieval_scores(by_feedback)
420
+ neighbor_summary = retrieval_neighbor_summary(retrieval_rows)
421
+ labeled_neighbors = neighbor_summary["labeled"]
422
+
423
+ return {
424
+ "available": True,
425
+ "rows": len(retrieval_rows),
426
+ "queries": len(by_feedback),
427
+ "averageTopScore": round(sum(top_scores) / len(top_scores), 4) if top_scores else None,
428
+ "negativeNeighborRate": rate(neighbor_summary["negative"], labeled_neighbors) if labeled_neighbors else None,
429
+ "error": None,
430
+ }
431
+
432
+
433
+ GATE_OUTCOME_KEYS = {
434
+ ("harmful", "blocked"): "truePositiveBlocks",
435
+ ("safe", "allowed"): "trueNegativeAllows",
436
+ ("safe", "blocked"): "falsePositiveBlocks",
437
+ ("harmful", "allowed"): "falseNegativeAllows",
438
+ }
439
+
440
+
441
+ def initial_gate_counts() -> Dict[str, int]:
442
+ return {
443
+ "truePositiveBlocks": 0,
444
+ "trueNegativeAllows": 0,
445
+ "falsePositiveBlocks": 0,
446
+ "falseNegativeAllows": 0,
447
+ "unlabeledFeedback": 0,
448
+ }
449
+
450
+
451
+ def count_gate_outcomes(entries: List[Dict[str, Any]]) -> Dict[str, int]:
452
+ counts = initial_gate_counts()
453
+
454
+ for entry in entries:
455
+ expected, actual = explicit_gate_label(entry)
456
+ if expected is None:
457
+ continue
458
+ if actual is None:
459
+ counts["unlabeledFeedback"] += 1
460
+ continue
461
+
462
+ count_key = GATE_OUTCOME_KEYS.get((expected, actual))
463
+ if count_key:
464
+ counts[count_key] += 1
465
+
466
+ return counts
467
+
468
+
469
+ def compute_f1(precision: Optional[float], recall: Optional[float], labeled: int) -> Optional[float]:
470
+ if not labeled:
471
+ return None
472
+ if not precision or not recall:
473
+ return 0.0
474
+ return round((2 * precision * recall) / (precision + recall), 4)
475
+
476
+
477
+ def compute_gate_metrics(entries: List[Dict[str, Any]]) -> Dict[str, Any]:
478
+ counts = count_gate_outcomes(entries)
479
+ tp = counts["truePositiveBlocks"]
480
+ tn = counts["trueNegativeAllows"]
481
+ fp = counts["falsePositiveBlocks"]
482
+ fn = counts["falseNegativeAllows"]
483
+
484
+ labeled = tp + tn + fp + fn
485
+ precision = rate(tp, tp + fp) if labeled else None
486
+ recall = rate(tp, tp + fn) if labeled else None
487
+ f1 = compute_f1(precision, recall, labeled)
488
+
489
+ return {
490
+ "available": labeled > 0,
491
+ "labeledDecisions": labeled,
492
+ "unlabeledFeedback": counts["unlabeledFeedback"],
493
+ "truePositiveBlocks": tp,
494
+ "trueNegativeAllows": tn,
495
+ "falsePositiveBlocks": fp,
496
+ "falseNegativeAllows": fn,
497
+ "precision": precision,
498
+ "recall": recall,
499
+ "f1": f1,
500
+ "note": None if labeled else "No explicit gate decision labels found; feedback quality metrics are available, but classifier precision/recall needs blocked/allowed labels.",
501
+ }
502
+
503
+
504
+ def base_recommendations(report: Dict[str, Any]) -> List[str]:
505
+ items = []
506
+ if report["usableEntries"] < 10:
507
+ items.append("Collect at least 10 usable feedback entries before making threshold changes.")
508
+ if not report["gateMetrics"]["available"]:
509
+ items.append("Start logging gate decisions as blocked/allowed so precision, recall, and false-positive rate can be computed.")
510
+ return items
511
+
512
+
513
+ def storage_recommendations(report: Dict[str, Any]) -> List[str]:
514
+ items = []
515
+ sqlite_metrics = report.get("sqliteLessonMetrics") or {}
516
+ if sqlite_metrics.get("available") and sqlite_metrics.get("negativeLessonCoverage", 0) < 0.8:
517
+ items.append("Backfill SQLite lesson rows for negative feedback before treating SQL dashboards as complete eval evidence.")
518
+
519
+ retrieval_metrics = report.get("retrievalMetrics") or {}
520
+ if retrieval_metrics.get("available") and retrieval_metrics.get("negativeNeighborRate") is not None and retrieval_metrics["negativeNeighborRate"] >= 0.5:
521
+ items.append("Inspect LanceDB retrieval neighborhoods: most labeled neighbors are negative, which is a good candidate for repeated-failure clustering.")
522
+ return items
523
+
524
+
525
+ def category_recommendations(report: Dict[str, Any]) -> List[str]:
526
+ items = []
527
+ weak_categories = [
528
+ row for row in report["categoryMetrics"]
529
+ if row["support"] >= report["minSupport"] and row["negativeRate"] >= 0.5
530
+ ]
531
+ if weak_categories:
532
+ top = weak_categories[0]
533
+ items.append(
534
+ f"Tighten prevention rules for {top['category']}: {top['negative']} negative signals across {top['support']} entries."
535
+ )
536
+ return items
537
+
538
+
539
+ def tag_recommendations(report: Dict[str, Any]) -> List[str]:
540
+ volatile_tags = [
541
+ row for row in report["tagMetrics"]
542
+ if row["support"] >= report["minSupport"] and 0.35 <= row["positiveRate"] <= 0.65
543
+ ]
544
+ if not volatile_tags:
545
+ return []
546
+ return [
547
+ f"Review mixed-signal tag '{volatile_tags[0]['tag']}' before promoting broad rules; signal is not separable yet."
548
+ ]
549
+
550
+
551
+ def build_recommendations(report: Dict[str, Any]) -> List[str]:
552
+ recommendations = []
553
+ recommendations.extend(base_recommendations(report))
554
+ recommendations.extend(storage_recommendations(report))
555
+ recommendations.extend(category_recommendations(report))
556
+ recommendations.extend(tag_recommendations(report))
557
+ if not recommendations:
558
+ recommendations.append("No immediate eval action required; keep collecting feedback and rerun this report after the next batch.")
559
+ return recommendations
560
+
561
+
562
+ def evaluate_feedback(
563
+ entries: List[Dict[str, Any]],
564
+ invalid_entries: int = 0,
565
+ min_support: int = 2,
566
+ sqlite_lessons: Optional[Dict[str, Any]] = None,
567
+ retrieval_rows: Optional[List[Dict[str, Any]]] = None,
568
+ ) -> Dict[str, Any]:
569
+ usable = []
570
+ signals = []
571
+ category_values = []
572
+ tag_values = []
573
+ failure_values = []
574
+ timestamps = []
575
+
576
+ for entry in entries:
577
+ signal = normalize_signal(entry)
578
+ if signal not in {"positive", "negative"}:
579
+ continue
580
+ usable.append(entry)
581
+ signals.append(signal)
582
+ categories = classify_entry(entry)
583
+ category_values.append(categories[0])
584
+ tags = entry.get("tags") if isinstance(entry.get("tags"), list) else []
585
+ tag_values.append(str(tags[0]).strip().lower() if tags else "untagged")
586
+ failure_values.append(str(entry.get("failureType") or "unspecified").strip().lower())
587
+ parsed_ts = parse_timestamp(entry.get("timestamp"))
588
+ if parsed_ts:
589
+ timestamps.append(parsed_ts)
590
+
591
+ positive = signals.count("positive")
592
+ negative = signals.count("negative")
593
+ report = {
594
+ "generatedAt": datetime.now(timezone.utc).isoformat(),
595
+ "minSupport": min_support,
596
+ "totalEntries": len(entries),
597
+ "usableEntries": len(usable),
598
+ "invalidEntries": invalid_entries,
599
+ "positive": positive,
600
+ "negative": negative,
601
+ "positiveRate": rate(positive, len(usable)),
602
+ "negativeRate": rate(negative, len(usable)),
603
+ "firstTimestamp": min(timestamps).isoformat() if timestamps else None,
604
+ "lastTimestamp": max(timestamps).isoformat() if timestamps else None,
605
+ "categoryMetrics": summarize_bucket("category", category_values, signals, min_support),
606
+ "tagMetrics": summarize_bucket("tag", tag_values, signals, min_support),
607
+ "failureTypeMetrics": summarize_bucket("failureType", failure_values, signals, min_support),
608
+ "gateMetrics": compute_gate_metrics(usable),
609
+ "sqliteLessonMetrics": compute_sqlite_metrics(usable, sqlite_lessons or {"available": False, "error": None}),
610
+ "retrievalMetrics": compute_retrieval_metrics(retrieval_rows or []),
611
+ }
612
+ report["recommendations"] = build_recommendations(report)
613
+ return report
614
+
615
+
616
+ def render_markdown(report: Dict[str, Any]) -> str:
617
+ lines = [
618
+ "# Feedback Quality Eval",
619
+ "",
620
+ f"- Generated: {report['generatedAt']}",
621
+ f"- Usable feedback: {report['usableEntries']} / {report['totalEntries']}",
622
+ f"- Positive rate: {report['positiveRate']}",
623
+ f"- Negative rate: {report['negativeRate']}",
624
+ "",
625
+ "## Gate Metrics",
626
+ "",
627
+ ]
628
+ gate = report["gateMetrics"]
629
+ if gate["available"]:
630
+ lines.extend([
631
+ f"- Labeled decisions: {gate['labeledDecisions']}",
632
+ f"- Precision: {gate['precision']}",
633
+ f"- Recall: {gate['recall']}",
634
+ f"- F1: {gate['f1']}",
635
+ f"- False positive blocks: {gate['falsePositiveBlocks']}",
636
+ f"- False negative allows: {gate['falseNegativeAllows']}",
637
+ ])
638
+ else:
639
+ lines.append(f"- {gate['note']}")
640
+
641
+ lines.extend(["", "## Highest-Risk Categories", ""])
642
+ if report["categoryMetrics"]:
643
+ lines.append("| Category | Support | Positive | Negative | Negative rate |")
644
+ lines.append("| --- | ---: | ---: | ---: | ---: |")
645
+ for row in report["categoryMetrics"][:8]:
646
+ lines.append(f"| {row['category']} | {row['support']} | {row['positive']} | {row['negative']} | {row['negativeRate']} |")
647
+ else:
648
+ lines.append("- Not enough category support yet.")
649
+
650
+ sqlite_metrics = report["sqliteLessonMetrics"]
651
+ lines.extend(["", "## SQLite Lesson Coverage", ""])
652
+ if sqlite_metrics["available"]:
653
+ lines.extend([
654
+ f"- Lessons: {sqlite_metrics['totalLessons']}",
655
+ f"- Feedback coverage: {sqlite_metrics['feedbackLessonCoverage']}",
656
+ f"- Negative feedback coverage: {sqlite_metrics['negativeLessonCoverage']}",
657
+ ])
658
+ else:
659
+ lines.append(f"- Not available{': ' + sqlite_metrics['error'] if sqlite_metrics.get('error') else ''}.")
660
+
661
+ retrieval_metrics = report["retrievalMetrics"]
662
+ lines.extend(["", "## LanceDB Retrieval Export", ""])
663
+ if retrieval_metrics["available"]:
664
+ lines.extend([
665
+ f"- Rows: {retrieval_metrics['rows']}",
666
+ f"- Queries: {retrieval_metrics['queries']}",
667
+ f"- Average top score: {retrieval_metrics['averageTopScore']}",
668
+ f"- Negative neighbor rate: {retrieval_metrics['negativeNeighborRate']}",
669
+ ])
670
+ else:
671
+ lines.append("- Not available. Export retrieval rows to JSONL to evaluate semantic recall quality.")
672
+
673
+ lines.extend(["", "## Recommendations", ""])
674
+ lines.extend(f"- {item}" for item in report["recommendations"])
675
+ lines.append("")
676
+ return "\n".join(lines)
677
+
678
+
679
+ def parse_args() -> argparse.Namespace:
680
+ parser = argparse.ArgumentParser(description="Evaluate ThumbGate feedback quality from feedback-log.jsonl.")
681
+ parser.add_argument("--feedback-log", help="Path to feedback-log.jsonl. Defaults to the resolved ThumbGate feedback dir.")
682
+ parser.add_argument("--feedback-dir", help="Directory containing feedback-log.jsonl.")
683
+ parser.add_argument("--lesson-db", help="Path to lessons.sqlite for SQL lesson coverage metrics.")
684
+ parser.add_argument("--retrieval-log", help="JSONL export of LanceDB retrieval rows for semantic recall metrics.")
685
+ parser.add_argument("--min-support", type=int, default=2, help="Minimum bucket support for category/tag metrics.")
686
+ parser.add_argument("--json", action="store_true", help="Print JSON instead of Markdown.")
687
+ parser.add_argument("--write-report", help="Write the rendered report to a file.")
688
+ return parser.parse_args()
689
+
690
+
691
+ def main() -> int:
692
+ args = parse_args()
693
+ feedback_log = Path(args.feedback_log) if args.feedback_log else None
694
+ if feedback_log is None:
695
+ feedback_dir = Path(args.feedback_dir) if args.feedback_dir else resolve_feedback_dir()
696
+ feedback_log = feedback_dir / "feedback-log.jsonl"
697
+
698
+ entries, invalid = read_jsonl(feedback_log)
699
+ lesson_db = Path(args.lesson_db) if args.lesson_db else None
700
+ retrieval_log = Path(args.retrieval_log) if args.retrieval_log else None
701
+ retrieval_rows, retrieval_invalid = read_jsonl(retrieval_log) if retrieval_log else ([], 0)
702
+ sqlite_lessons = load_sqlite_lessons(lesson_db)
703
+ report = evaluate_feedback(
704
+ entries,
705
+ invalid_entries=invalid,
706
+ min_support=max(args.min_support, 1),
707
+ sqlite_lessons=sqlite_lessons,
708
+ retrieval_rows=retrieval_rows,
709
+ )
710
+ report["feedbackLog"] = str(feedback_log)
711
+ report["lessonDb"] = str(lesson_db) if lesson_db else None
712
+ report["retrievalLog"] = str(retrieval_log) if retrieval_log else None
713
+ report["invalidRetrievalRows"] = retrieval_invalid
714
+
715
+ output = json.dumps(report, indent=2, sort_keys=True) if args.json else render_markdown(report)
716
+ if args.write_report:
717
+ out_path = Path(args.write_report)
718
+ out_path.parent.mkdir(parents=True, exist_ok=True)
719
+ out_path.write_text(output + ("\n" if not output.endswith("\n") else ""), encoding="utf-8")
720
+ print(output)
721
+ return 0
722
+
723
+
724
+ if __name__ == "__main__":
725
+ raise SystemExit(main())
package/src/api/server.js CHANGED
@@ -403,6 +403,27 @@ const TRACKED_LINK_TARGETS = Object.freeze({
403
403
  },
404
404
  allowCustomerEmail: true,
405
405
  },
406
+ // 2026-05-12: Aiventyx marketplace listing routes its Teams clicks through
407
+ // /go/teams (best-performing listing at ~62% CTR). Without this slug the
408
+ // server returned 404 + "Tracked link not found". Every Aiventyx Teams
409
+ // click between the URL swap and this deploy landed on that error page.
410
+ // Destination: 3-seat Team self-serve Stripe checkout (the path I shipped
411
+ // in PR #1877 โ€” plan_id=team + seat_count=3 = $147/mo entry).
412
+ teams: {
413
+ path: '/checkout/pro',
414
+ ctaId: 'go_teams',
415
+ ctaPlacement: 'link_router',
416
+ eventType: 'cta_click',
417
+ defaults: {
418
+ utm_source: 'website',
419
+ utm_medium: 'link_router',
420
+ utm_campaign: 'team_self_serve',
421
+ plan_id: 'team',
422
+ seat_count: '3',
423
+ billing_cycle: 'monthly',
424
+ },
425
+ allowCustomerEmail: true,
426
+ },
406
427
  install: {
407
428
  path: '/guide',
408
429
  ctaId: 'go_install',
@@ -1511,7 +1532,7 @@ function renderCheckoutIntentPage({
1511
1532
  const teardownAction = safeWorkflowTeardownCheckoutHref
1512
1533
  ? `<a data-i="workflow_teardown_checkout" href="${safeWorkflowTeardownCheckoutHref}">Pay $99 teardown</a>`
1513
1534
  : '';
1514
- return `<!doctype html><html lang="en"><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1"><style>body{background:#0a0a0a;color:#eee;font-family:system-ui,sans-serif}div{max-width:560px;margin:12vh auto}a{display:block;margin:10px 0;padding:12px;border:1px solid #374151;color:inherit;text-align:center}.primary{background:#22d3ee;color:#000}.high-ticket{border-color:#4ade80;color:#4ade80}</style><div><h1>Choose the right paid path.</h1><p>For one repeated workflow failure, start with the diagnostic or sprint. Use Pro only when you need the local self-serve dashboard.</p>${diagnosticAction.replace('<a ', '<a class="high-ticket" ')}${sprintAction.replace('<a ', '<a class="high-ticket" ')}<a class="primary" data-i="pro_checkout_confirmed" href="${safeConfirmHref}">Pay in Stripe</a>${teardownAction}${quickReadAction}${firstRuleAction}<a data-i="workflow_sprint_intake" href="${safeWorkflowIntakeHref}">Send workflow first</a><a data-i="team_paid_path" href="${safeTeamOptionsHref}">See options</a><p>Stripe checkout.</p><a href="/">Back</a></div><script>addEventListener('click',e=>{let a=e.target.closest('[data-i]');if(a&&navigator.sendBeacon)navigator.sendBeacon('/v1/telemetry/ping',new Blob([JSON.stringify({eventType:'checkout_interstitial_cta_clicked',clientType:'web',page:'/checkout/pro',ctaId:a.dataset.i,ctaPlacement:'checkout_interstitial'})],{type:'application/json'}))})</script>`;
1535
+ return `<!doctype html><html lang="en"><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1"><title>Confirm โ€” ThumbGate Pro</title><style>body{background:#0a0a0a;color:#eee;font-family:system-ui,-apple-system,sans-serif;line-height:1.5}main{max-width:520px;margin:8vh auto;padding:0 20px}.brand{display:flex;align-items:center;gap:10px;margin-bottom:24px;font-size:14px;color:#94a3b8}.brand-mark{width:24px;height:24px;background:#22d3ee;border-radius:6px;display:inline-block}h1{font-size:24px;margin:0 0 8px;color:#fff}.price{font-size:32px;font-weight:700;color:#22d3ee;margin:8px 0 4px}.price small{font-size:14px;color:#94a3b8;font-weight:400}p{color:#cbd5e1;margin:8px 0}a{display:block;text-decoration:none}a.primary{background:#22d3ee;color:#000;padding:16px;text-align:center;border-radius:8px;font-weight:700;font-size:16px;margin:20px 0 10px}a.secondary{border:1px solid #374151;color:#cbd5e1;padding:12px;text-align:center;border-radius:8px;margin:8px 0;font-size:14px}.trust{margin:24px 0;padding:16px;border:1px solid #1f2937;border-radius:8px;background:#0f172a}.trust-item{font-size:13px;color:#cbd5e1;padding:4px 0;display:flex;gap:8px}.trust-item::before{content:"โœ“";color:#22d3ee;font-weight:700}details{margin-top:32px;font-size:13px;color:#94a3b8}details summary{cursor:pointer;padding:8px 0}details a{border:1px solid #374151;color:#94a3b8;padding:10px;text-align:center;border-radius:6px;margin:6px 0;font-size:13px}.back{text-align:center;color:#64748b;font-size:12px;margin-top:24px}.back a{color:#64748b;display:inline}</style><main><div class="brand"><span class="brand-mark"></span><span>ThumbGate</span></div><h1>Start ThumbGate Pro</h1><div class="price">$19<small>/mo</small></div><p>Block every repeat AI-agent mistake. Local-first. MIT-licensed CLI included. Cancel anytime.</p><a class="primary" data-i="pro_checkout_confirmed" href="${safeConfirmHref}">Pay $19/mo with Stripe โ†’</a><div class="trust"><div class="trust-item">6 paying customers, 18,000+ installs verified on npm</div><div class="trust-item">Cancel anytime โ€” instant refund within 7 days</div><div class="trust-item">MIT open source ยท no vendor lock-in</div><div class="trust-item">Works with Claude Code, Cursor, Codex, Gemini, Amp, Cline, OpenCode</div></div><details><summary>Other paid paths (diagnostic, sprint, teardown, single-rule)</summary>${diagnosticAction.replace('<a ', '<a class="secondary" ')}${sprintAction.replace('<a ', '<a class="secondary" ')}${teardownAction.replace('<a ', '<a class="secondary" ')}${quickReadAction.replace('<a ', '<a class="secondary" ')}${firstRuleAction.replace('<a ', '<a class="secondary" ')}<a class="secondary" data-i="workflow_sprint_intake" href="${safeWorkflowIntakeHref}">Send workflow first (intake)</a><a class="secondary" data-i="team_paid_path" href="${safeTeamOptionsHref}">See all options</a></details><p class="back"><a href="/">โ† Back to thumbgate.ai</a></p></main><script>addEventListener('click',e=>{let a=e.target.closest('[data-i]');if(a&&navigator.sendBeacon)navigator.sendBeacon('/v1/telemetry/ping',new Blob([JSON.stringify({eventType:'checkout_interstitial_cta_clicked',clientType:'web',page:'/checkout/pro',ctaId:a.dataset.i,ctaPlacement:'checkout_interstitial'})],{type:'application/json'}))})</script></html>`;
1515
1536
  }
1516
1537
 
1517
1538
  function buildCheckoutBootstrapBody(parsed, req, journeyState = resolveJourneyState(req, parsed)) {