thumbgate 1.4.2 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/.claude-plugin/README.md +45 -34
  2. package/.claude-plugin/marketplace.json +3 -3
  3. package/.claude-plugin/plugin.json +3 -3
  4. package/.well-known/llms.txt +1 -1
  5. package/.well-known/mcp/server-card.json +1 -1
  6. package/README.md +26 -2
  7. package/adapters/README.md +4 -1
  8. package/adapters/claude/.mcp.json +2 -2
  9. package/adapters/codex/config.toml +2 -2
  10. package/adapters/mcp/server-stdio.js +10 -4
  11. package/adapters/opencode/opencode.json +1 -1
  12. package/bin/cli.js +246 -90
  13. package/config/mcp-allowlists.json +11 -3
  14. package/package.json +184 -21
  15. package/scripts/audit-trail.js +25 -15
  16. package/scripts/auto-wire-hooks.js +127 -0
  17. package/scripts/cli-demo.js +102 -0
  18. package/scripts/cli-schema.js +285 -0
  19. package/scripts/cli-status.js +166 -0
  20. package/scripts/cross-encoder-reranker.js +235 -0
  21. package/scripts/explore-subcommands.js +277 -0
  22. package/scripts/explore.js +569 -0
  23. package/scripts/feedback-loop.js +20 -6
  24. package/scripts/lesson-inference.js +7 -1
  25. package/scripts/lesson-reranker.js +263 -0
  26. package/scripts/lesson-retrieval.js +34 -17
  27. package/scripts/lesson-search.js +69 -0
  28. package/scripts/perplexity-client.js +210 -0
  29. package/scripts/reflector-agent.js +2 -2
  30. package/scripts/statusline-local-stats.js +3 -1
  31. package/scripts/statusline.sh +12 -11
  32. package/src/api/server.js +178 -17
  33. package/src/index.js +3 -0
  34. package/.claude-plugin/bundle/icon.png +0 -0
  35. package/.claude-plugin/bundle/icon.svg +0 -18
  36. package/.claude-plugin/bundle/server/index.js +0 -24
  37. package/adapters/chatgpt/INSTALL.md +0 -138
  38. package/bin/memory.sh +0 -64
  39. package/bin/obsidian-sync.sh +0 -20
  40. package/plugins/amp-skill/INSTALL.md +0 -52
  41. package/plugins/amp-skill/SKILL.md +0 -64
  42. package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +0 -22
  43. package/plugins/claude-codex-bridge/.mcp.json +0 -14
  44. package/plugins/claude-codex-bridge/INSTALL.md +0 -43
  45. package/plugins/claude-codex-bridge/README.md +0 -46
  46. package/plugins/claude-codex-bridge/scripts/codex-bridge.js +0 -286
  47. package/plugins/claude-codex-bridge/skills/adversarial-review/SKILL.md +0 -24
  48. package/plugins/claude-codex-bridge/skills/result/SKILL.md +0 -22
  49. package/plugins/claude-codex-bridge/skills/review/SKILL.md +0 -28
  50. package/plugins/claude-codex-bridge/skills/second-pass/SKILL.md +0 -27
  51. package/plugins/claude-codex-bridge/skills/setup/SKILL.md +0 -21
  52. package/plugins/claude-codex-bridge/skills/status/SKILL.md +0 -19
  53. package/plugins/claude-skill/INSTALL.md +0 -55
  54. package/plugins/claude-skill/SKILL.md +0 -46
  55. package/plugins/codex-profile/.codex-plugin/plugin.json +0 -43
  56. package/plugins/codex-profile/.mcp.json +0 -14
  57. package/plugins/codex-profile/AGENTS.md +0 -20
  58. package/plugins/codex-profile/INSTALL.md +0 -89
  59. package/plugins/codex-profile/README.md +0 -61
  60. package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +0 -23
  61. package/plugins/cursor-marketplace/CHANGELOG.md +0 -30
  62. package/plugins/cursor-marketplace/LICENSE +0 -21
  63. package/plugins/cursor-marketplace/README.md +0 -124
  64. package/plugins/cursor-marketplace/agents/reliability-reviewer.md +0 -31
  65. package/plugins/cursor-marketplace/assets/logo-400x400.png +0 -0
  66. package/plugins/cursor-marketplace/commands/capture-feedback.md +0 -33
  67. package/plugins/cursor-marketplace/commands/check-gates.md +0 -25
  68. package/plugins/cursor-marketplace/commands/show-lessons.md +0 -27
  69. package/plugins/cursor-marketplace/hooks/hooks.json +0 -10
  70. package/plugins/cursor-marketplace/mcp.json +0 -14
  71. package/plugins/cursor-marketplace/rules/feedback-capture.mdc +0 -34
  72. package/plugins/cursor-marketplace/rules/pre-action-gates.mdc +0 -30
  73. package/plugins/cursor-marketplace/rules/session-continuity.mdc +0 -28
  74. package/plugins/cursor-marketplace/scripts/gate-check.sh +0 -21
  75. package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +0 -48
  76. package/plugins/cursor-marketplace/skills/prevention-rules/SKILL.md +0 -31
  77. package/plugins/cursor-marketplace/skills/recall-context/SKILL.md +0 -30
  78. package/plugins/cursor-marketplace/skills/search-lessons/SKILL.md +0 -33
  79. package/plugins/gemini-extension/INSTALL.md +0 -92
  80. package/plugins/gemini-extension/gemini_prompt.txt +0 -14
  81. package/plugins/gemini-extension/tool_contract.json +0 -45
  82. package/plugins/opencode-profile/INSTALL.md +0 -57
  83. package/public/assets/instagram-card.png +0 -0
  84. package/public/assets/tiktok-agent-memory.mp4 +0 -0
  85. package/public/blog.html +0 -474
  86. package/public/compare/mem0.html +0 -189
  87. package/public/compare/speclock.html +0 -180
  88. package/public/compare.html +0 -310
  89. package/public/dashboard.html +0 -1100
  90. package/public/guide.html +0 -317
  91. package/public/guides/claude-code-prevent-repeated-mistakes.html +0 -161
  92. package/public/guides/codex-cli-guardrails.html +0 -158
  93. package/public/guides/cursor-prevent-repeated-mistakes.html +0 -161
  94. package/public/guides/pre-action-gates.html +0 -162
  95. package/public/guides/stop-repeated-ai-agent-mistakes.html +0 -159
  96. package/public/index.html +0 -1128
  97. package/public/js/buyer-intent.js +0 -252
  98. package/public/learn/agent-harness-pattern.html +0 -180
  99. package/public/learn/ai-agent-persistent-memory.html +0 -203
  100. package/public/learn/learn.css +0 -45
  101. package/public/learn/mcp-pre-action-gates-explained.html +0 -172
  102. package/public/learn/stop-ai-agent-force-push.html +0 -134
  103. package/public/learn/vibe-coding-safety-net.html +0 -142
  104. package/public/learn.html +0 -274
  105. package/public/lessons.html +0 -967
  106. package/public/llm-context.md +0 -140
  107. package/public/pro.html +0 -1087
  108. package/public/vercel.json +0 -8
  109. package/scripts/a2ui-engine.js +0 -73
  110. package/scripts/adk-consolidator.js +0 -274
  111. package/scripts/agent-security-hardening.js +0 -225
  112. package/scripts/ai-search-visibility.js +0 -142
  113. package/scripts/autonomous-sales-agent.js +0 -39
  114. package/scripts/autoresearch-runner.js +0 -216
  115. package/scripts/background-agent-governance.js +0 -229
  116. package/scripts/behavioral-extraction.js +0 -93
  117. package/scripts/budget-enforcer.js +0 -173
  118. package/scripts/budget-guard.js +0 -173
  119. package/scripts/build-claude-mcpb.js +0 -255
  120. package/scripts/build-codex-plugin.js +0 -152
  121. package/scripts/capture-railway-diagnostics.sh +0 -97
  122. package/scripts/changeset-check.js +0 -372
  123. package/scripts/check-congruence.js +0 -443
  124. package/scripts/computer-use-firewall.js +0 -280
  125. package/scripts/content-engine/linkedin-content-generator.js +0 -154
  126. package/scripts/content-engine/output/linkedin-memento-validation.md +0 -17
  127. package/scripts/content-engine/output/linkedin-posts-2026-04-09.md +0 -175
  128. package/scripts/content-engine/reddit-thread-finder.js +0 -154
  129. package/scripts/context-engine.js +0 -710
  130. package/scripts/daily-digest.js +0 -11
  131. package/scripts/data-governance.js +0 -173
  132. package/scripts/deploy-gcp.sh +0 -44
  133. package/scripts/deploy-policy.js +0 -249
  134. package/scripts/disagreement-mining.js +0 -315
  135. package/scripts/dpo-optimizer.js +0 -206
  136. package/scripts/ensure-repo-bootstrap.js +0 -130
  137. package/scripts/ephemeral-agent-store.js +0 -212
  138. package/scripts/eval-harness.js +0 -56
  139. package/scripts/export-kto-pairs.js +0 -309
  140. package/scripts/export-training.js +0 -446
  141. package/scripts/feedback-fallback.js +0 -111
  142. package/scripts/feedback-inbox-read.js +0 -162
  143. package/scripts/feedback-root-consolidator.js +0 -233
  144. package/scripts/feedback-to-memory.js +0 -185
  145. package/scripts/gate-satisfy.js +0 -42
  146. package/scripts/generate-paperbanana-diagrams.sh +0 -99
  147. package/scripts/generate-pretool-hook.sh +0 -40
  148. package/scripts/github-about.js +0 -430
  149. package/scripts/github-outreach.js +0 -65
  150. package/scripts/gtm-revenue-loop.js +0 -535
  151. package/scripts/hallucination-detector.js +0 -226
  152. package/scripts/hf-papers.js +0 -317
  153. package/scripts/hook-auto-capture.sh +0 -100
  154. package/scripts/hook-stop-pr-thread-check.sh +0 -68
  155. package/scripts/hook-stop-self-score.sh +0 -51
  156. package/scripts/hook-stop-verify-deploy.sh +0 -31
  157. package/scripts/hook-verify-before-done.sh +0 -20
  158. package/scripts/managed-dpo-export.js +0 -91
  159. package/scripts/markdown-escape.js +0 -12
  160. package/scripts/marketing-experiment.js +0 -657
  161. package/scripts/memalign-recall.js +0 -111
  162. package/scripts/memory-migration.js +0 -296
  163. package/scripts/meta-policy.js +0 -190
  164. package/scripts/metered-billing.js +0 -16
  165. package/scripts/model-tier-router.js +0 -310
  166. package/scripts/money-watcher.js +0 -218
  167. package/scripts/multi-hop-recall.js +0 -240
  168. package/scripts/per-step-scoring.js +0 -163
  169. package/scripts/perplexity-marketing.js +0 -466
  170. package/scripts/pii-scanner.js +0 -153
  171. package/scripts/plan-gate.js +0 -154
  172. package/scripts/post-everywhere.js +0 -341
  173. package/scripts/post-to-x-retry.sh +0 -22
  174. package/scripts/post-to-x.js +0 -369
  175. package/scripts/pr-manager.js +0 -421
  176. package/scripts/principle-extractor.js +0 -162
  177. package/scripts/pro-features.js +0 -41
  178. package/scripts/prompt-dlp.js +0 -222
  179. package/scripts/prove-adapters.js +0 -860
  180. package/scripts/prove-attribution.js +0 -361
  181. package/scripts/prove-automation.js +0 -651
  182. package/scripts/prove-autoresearch.js +0 -304
  183. package/scripts/prove-claim-verification.js +0 -277
  184. package/scripts/prove-cloudflare-sandbox.js +0 -161
  185. package/scripts/prove-data-pipeline.js +0 -408
  186. package/scripts/prove-data-quality.js +0 -227
  187. package/scripts/prove-evolution.js +0 -352
  188. package/scripts/prove-harnesses.js +0 -287
  189. package/scripts/prove-intelligence.js +0 -257
  190. package/scripts/prove-lancedb.js +0 -425
  191. package/scripts/prove-local-intelligence.js +0 -340
  192. package/scripts/prove-loop-closure.js +0 -263
  193. package/scripts/prove-packaged-runtime.js +0 -326
  194. package/scripts/prove-predictive-insights.js +0 -355
  195. package/scripts/prove-runtime.js +0 -363
  196. package/scripts/prove-seo-gsd.js +0 -234
  197. package/scripts/prove-settings.js +0 -279
  198. package/scripts/prove-subway-upgrades.js +0 -277
  199. package/scripts/prove-tessl.js +0 -229
  200. package/scripts/prove-training-export.js +0 -325
  201. package/scripts/prove-workflow-contract.js +0 -112
  202. package/scripts/prove-xmemory.js +0 -332
  203. package/scripts/publish-decision.js +0 -159
  204. package/scripts/ralph-loop.js +0 -376
  205. package/scripts/ralph-mode-ci.js +0 -331
  206. package/scripts/reddit-dm-outreach.js +0 -192
  207. package/scripts/reddit-monitor-cron.sh +0 -26
  208. package/scripts/reminder-engine.js +0 -132
  209. package/scripts/revenue-status.js +0 -472
  210. package/scripts/rotate-stripe-webhook-secret.js +0 -314
  211. package/scripts/schedule-manager.js +0 -249
  212. package/scripts/self-healing-check.js +0 -193
  213. package/scripts/shieldcortex-memory-firewall-runner.mjs +0 -53
  214. package/scripts/skill-exporter.js +0 -260
  215. package/scripts/skill-materializer.js +0 -134
  216. package/scripts/skill-packs.js +0 -136
  217. package/scripts/skill-proposer.js +0 -99
  218. package/scripts/skill-quality-tracker.js +0 -282
  219. package/scripts/slow-loop.js +0 -72
  220. package/scripts/social-analytics/db/analytics.sqlite +0 -0
  221. package/scripts/social-analytics/db/schema.sql +0 -32
  222. package/scripts/social-analytics/digest.js +0 -256
  223. package/scripts/social-analytics/engagement-audit.js +0 -185
  224. package/scripts/social-analytics/generate-instagram-card.js +0 -97
  225. package/scripts/social-analytics/instagram-thumbgate-post.js +0 -111
  226. package/scripts/social-analytics/install-growth-automation.js +0 -114
  227. package/scripts/social-analytics/load-env.js +0 -77
  228. package/scripts/social-analytics/mcp-server.js +0 -289
  229. package/scripts/social-analytics/normalizer.js +0 -580
  230. package/scripts/social-analytics/notify.js +0 -162
  231. package/scripts/social-analytics/poll-all.js +0 -107
  232. package/scripts/social-analytics/pollers/github.js +0 -195
  233. package/scripts/social-analytics/pollers/instagram.js +0 -253
  234. package/scripts/social-analytics/pollers/linkedin.js +0 -340
  235. package/scripts/social-analytics/pollers/plausible.js +0 -245
  236. package/scripts/social-analytics/pollers/reddit.js +0 -306
  237. package/scripts/social-analytics/pollers/threads.js +0 -233
  238. package/scripts/social-analytics/pollers/tiktok.js +0 -203
  239. package/scripts/social-analytics/pollers/x.js +0 -227
  240. package/scripts/social-analytics/pollers/youtube.js +0 -304
  241. package/scripts/social-analytics/pollers/zernio.js +0 -183
  242. package/scripts/social-analytics/publish-instagram-thumbgate.js +0 -104
  243. package/scripts/social-analytics/publish-thumbgate-launch.js +0 -322
  244. package/scripts/social-analytics/publishers/devto.js +0 -122
  245. package/scripts/social-analytics/publishers/instagram.js +0 -317
  246. package/scripts/social-analytics/publishers/linkedin.js +0 -294
  247. package/scripts/social-analytics/publishers/reddit.js +0 -385
  248. package/scripts/social-analytics/publishers/threads.js +0 -275
  249. package/scripts/social-analytics/publishers/tiktok.js +0 -217
  250. package/scripts/social-analytics/publishers/x.js +0 -259
  251. package/scripts/social-analytics/publishers/youtube.js +0 -223
  252. package/scripts/social-analytics/publishers/zernio.js +0 -539
  253. package/scripts/social-analytics/reconcile-thumbgate-campaign.js +0 -165
  254. package/scripts/social-analytics/run-digest.js +0 -34
  255. package/scripts/social-analytics/schedule-thumbgate-campaign.js +0 -275
  256. package/scripts/social-analytics/store.js +0 -455
  257. package/scripts/social-analytics/sync-launch-assets.js +0 -185
  258. package/scripts/social-analytics/utm.js +0 -143
  259. package/scripts/social-pipeline.js +0 -2626
  260. package/scripts/social-post-hourly.js +0 -228
  261. package/scripts/social-quality-gate.js +0 -134
  262. package/scripts/social-reply-monitor.js +0 -592
  263. package/scripts/status-dashboard.js +0 -155
  264. package/scripts/stripe-live-status.js +0 -115
  265. package/scripts/subagent-profiles.js +0 -79
  266. package/scripts/sync-branch-protection.js +0 -340
  267. package/scripts/sync-gh-secrets-from-env.sh +0 -70
  268. package/scripts/sync-github-about.js +0 -55
  269. package/scripts/sync-version.js +0 -479
  270. package/scripts/synthetic-dpo.js +0 -234
  271. package/scripts/tessl-export.js +0 -369
  272. package/scripts/test-coverage.js +0 -128
  273. package/scripts/thumbgate_session_start.sh +0 -32
  274. package/scripts/train_from_feedback.py +0 -929
  275. package/scripts/validate-feedback.js +0 -581
  276. package/scripts/verify-obsidian-setup.sh +0 -269
  277. package/scripts/verify-run.js +0 -269
  278. package/scripts/weekly-auto-post.js +0 -124
  279. package/scripts/x-autonomous-marketing.js +0 -139
@@ -1,929 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Thompson Sampling Feedback Model Trainer
4
-
5
- Beta-Bernoulli Thompson Sampling for per-category reliability estimation.
6
- Reads from feedback-log.jsonl and builds a Bayesian model of Claude's
7
- performance across different task categories.
8
-
9
- Usage:
10
- python train_from_feedback.py --train # Full rebuild from JSONL
11
- python train_from_feedback.py --incremental # Update with latest entry
12
- python train_from_feedback.py --reliability # Print reliability table
13
- python train_from_feedback.py --sample # Sample from posteriors
14
- python train_from_feedback.py --snapshot # Save model snapshot
15
- python train_from_feedback.py --dpo-train # DPO batch optimization (Feb 2026)
16
- python train_from_feedback.py --config config.json # Use custom categories
17
-
18
- This script only reads and writes local feedback artifacts under the active ThumbGate feedback directory.
19
- Those runtime outputs are git-ignored even though this utility is intentionally versioned.
20
- """
21
-
22
- import json
23
- import math
24
- import random
25
- import argparse
26
- import os
27
- from datetime import datetime
28
- from pathlib import Path
29
- from typing import Dict, List, Any, Optional, Tuple
30
-
31
- # Configuration
32
- PROJECT_ROOT = Path(__file__).parent.parent
33
-
34
- def resolve_feedback_dir() -> Path:
35
- env_dir = os.environ.get("THUMBGATE_FEEDBACK_DIR")
36
- if env_dir:
37
- return Path(env_dir)
38
-
39
- local_thumbgate = PROJECT_ROOT / ".thumbgate"
40
- if local_thumbgate.exists():
41
- return local_thumbgate
42
-
43
- local_legacy = PROJECT_ROOT / ".claude" / "memory" / "feedback"
44
- if local_legacy.exists():
45
- return local_legacy
46
-
47
- return Path.home() / ".thumbgate" / "projects" / PROJECT_ROOT.name
48
-
49
- FEEDBACK_DIR = resolve_feedback_dir()
50
- FEEDBACK_LOG = FEEDBACK_DIR / "feedback-log.jsonl"
51
- MODEL_FILE = FEEDBACK_DIR / "feedback_model.json"
52
- SNAPSHOTS_DIR = FEEDBACK_DIR / "model_snapshots"
53
-
54
- # Default categories (overridden by --config)
55
- DEFAULT_CATEGORIES = {
56
- "code_edit": {
57
- "keywords": ["edit", "write", "implement", "refactor", "fix", "update", "create file"],
58
- "tools": ["Edit", "Write", "MultiEdit"],
59
- },
60
- "git": {
61
- "keywords": ["commit", "push", "branch", "merge", "pr", "pull request", "rebase", "cherry-pick"],
62
- "tools": ["Bash"],
63
- },
64
- "testing": {
65
- "keywords": ["test", "jest", "coverage", "reassure", "perf", "spec", "mock", "assert"],
66
- "tools": [],
67
- },
68
- "pr_review": {
69
- "keywords": ["review", "pr comment", "resolve", "minimize", "thread", "feedback"],
70
- "tools": [],
71
- },
72
- "search": {
73
- "keywords": ["search", "find", "grep", "glob", "explore", "where is", "look for"],
74
- "tools": ["Grep", "Glob", "Read"],
75
- },
76
- "architecture": {
77
- "keywords": ["architecture", "design", "pattern", "structure", "fsd", "module", "navigation"],
78
- "tools": [],
79
- },
80
- "security": {
81
- "keywords": ["security", "secret", "vulnerability", "injection", "xss", "owasp", "trufflehog"],
82
- "tools": [],
83
- },
84
- "debugging": {
85
- "keywords": ["debug", "error", "crash", "stack trace", "log", "diagnose", "investigate"],
86
- "tools": [],
87
- },
88
- }
89
-
90
- # Time decay configuration (2026 upgrade: exponential decay with half-life)
91
- # Step decay (legacy)
92
- DECAY_WEIGHTS = {
93
- 7: 1.0, # < 7 days: full weight
94
- 30: 0.5, # 7-30 days: half weight
95
- None: 0.25 # > 30 days: quarter weight
96
- }
97
-
98
- # Exponential decay (2026 best practice)
99
- # Half-life of 7 days: feedback loses half its weight every 7 days
100
- HALF_LIFE_DAYS = 7.0
101
- USE_EXPONENTIAL_DECAY = True # Toggle between step and exponential
102
-
103
-
104
- def ensure_category(model: Dict[str, Any], category_name: str) -> None:
105
- """Ensure a category exists with uniform Beta priors."""
106
- categories = model.setdefault("categories", {})
107
- if category_name in categories:
108
- return
109
-
110
- categories[category_name] = {
111
- "alpha": 1.0,
112
- "beta": 1.0,
113
- "samples": 0,
114
- "last_updated": None,
115
- }
116
-
117
-
118
- def load_config(config_path: Optional[str]) -> Dict:
119
- """Load category configuration from file or use defaults."""
120
- if config_path:
121
- path = Path(config_path)
122
- if path.exists():
123
- return json.loads(path.read_text())
124
- return DEFAULT_CATEGORIES
125
-
126
-
127
- def load_model(categories: Optional[Dict[str, Any]] = None) -> Dict:
128
- """Load existing model or create with uniform priors."""
129
- if MODEL_FILE.exists():
130
- try:
131
- return json.loads(MODEL_FILE.read_text())
132
- except json.JSONDecodeError:
133
- pass
134
- return create_initial_model(categories or DEFAULT_CATEGORIES)
135
-
136
-
137
- def create_initial_model(categories: Dict) -> Dict:
138
- """Create model with uniform Beta(1,1) priors for all categories."""
139
- model = {
140
- "version": 1,
141
- "created": datetime.now().isoformat(),
142
- "updated": datetime.now().isoformat(),
143
- "total_entries": 0,
144
- "categories": {},
145
- }
146
- for cat_name in categories:
147
- ensure_category(model, cat_name)
148
- return model
149
-
150
-
151
- def save_model(model: Dict):
152
- """Save model to disk."""
153
- # Resolve and verify path stays within trusted local ThumbGate roots (CodeQL S2083)
154
- resolved = MODEL_FILE.resolve()
155
- allowed_roots = [PROJECT_ROOT.resolve(), FEEDBACK_DIR.resolve()]
156
- if not any(str(resolved).startswith(str(root)) for root in allowed_roots):
157
- raise ValueError(f"Model path escapes allowed ThumbGate roots: {resolved}")
158
- resolved.parent.mkdir(parents=True, exist_ok=True)
159
- model["updated"] = datetime.now().isoformat()
160
- resolved.write_text(json.dumps(model, indent=2))
161
-
162
-
163
- def time_decay_weight(timestamp_str: str) -> float:
164
- """Compute time decay weight for a feedback entry.
165
-
166
- 2026 Upgrade: Supports both step decay and exponential decay.
167
- Exponential decay uses half-life formula: weight = 2^(-age/half_life)
168
- """
169
- try:
170
- ts_clean = timestamp_str.replace("Z", "").split("+")[0]
171
- entry_time = datetime.fromisoformat(ts_clean)
172
- except (ValueError, AttributeError):
173
- return DECAY_WEIGHTS[None]
174
-
175
- age_days = (datetime.now() - entry_time).days
176
-
177
- if USE_EXPONENTIAL_DECAY:
178
- # Exponential decay: weight = 2^(-age/half_life)
179
- # At age=0: weight=1.0, at age=half_life: weight=0.5, etc.
180
- weight = 2 ** (-age_days / HALF_LIFE_DAYS)
181
- return max(weight, 0.01) # Floor at 1% to prevent zero weights
182
- else:
183
- # Legacy step decay
184
- for threshold, weight in sorted(DECAY_WEIGHTS.items(), key=lambda x: (x[0] is None, x[0])):
185
- if threshold is not None and age_days < threshold:
186
- return weight
187
- return DECAY_WEIGHTS[None]
188
-
189
-
190
- def classify_entry(entry: Dict, categories: Dict) -> List[str]:
191
- """Classify a feedback entry into categories based on keywords/tools."""
192
- matched = []
193
-
194
- # Build searchable text from entry
195
- context = (entry.get("context", "") or "").lower()
196
- message = (entry.get("message", "") or "").lower()
197
- last_action = (entry.get("last_action", "") or "").lower()
198
- last_tool = (entry.get("last_tool", "") or "").lower()
199
- tags = entry.get("tags", [])
200
- if isinstance(tags, list):
201
- tags_str = " ".join(t.lower() for t in tags)
202
- else:
203
- tags_str = ""
204
-
205
- searchable = f"{context} {message} {last_action} {tags_str}"
206
-
207
- for cat_name, cat_config in categories.items():
208
- keywords = cat_config.get("keywords", [])
209
- tools = cat_config.get("tools", [])
210
-
211
- # Check keyword match
212
- keyword_match = any(kw.lower() in searchable for kw in keywords)
213
-
214
- # Check tool match
215
- tool_match = any(t.lower() in last_tool for t in tools) if tools else False
216
-
217
- if keyword_match or tool_match:
218
- matched.append(cat_name)
219
-
220
- return matched if matched else ["uncategorized"]
221
-
222
-
223
- def load_feedback_entries() -> List[Dict]:
224
- """Load all feedback entries from JSONL."""
225
- if not FEEDBACK_LOG.exists():
226
- return []
227
-
228
- entries = []
229
- with open(FEEDBACK_LOG) as f:
230
- for line in f:
231
- line = line.strip()
232
- if not line:
233
- continue
234
- try:
235
- entries.append(json.loads(line))
236
- except json.JSONDecodeError:
237
- continue
238
- return entries
239
-
240
-
241
- def is_positive(entry: Dict) -> bool:
242
- """Determine if a feedback entry is positive."""
243
- if entry.get("reward", 0) > 0:
244
- return True
245
- # ThumbGate uses signal field: 'positive' or 'negative'
246
- signal = entry.get("signal", "").lower()
247
- if signal in ("positive", "up", "thumbsup"):
248
- return True
249
- feedback = entry.get("feedback", "").lower()
250
- return feedback in ("positive", "up", "thumbsup")
251
-
252
-
253
- def train_full(categories: Dict) -> Dict:
254
- """Full rebuild: read all entries, compute posteriors."""
255
- entries = load_feedback_entries()
256
- model = create_initial_model(categories)
257
- model["total_entries"] = len(entries)
258
-
259
- # Ensure uncategorized exists
260
- ensure_category(model, "uncategorized")
261
-
262
- for entry in entries:
263
- weight = time_decay_weight(entry.get("timestamp", ""))
264
- cats = classify_entry(entry, categories)
265
- positive = is_positive(entry)
266
-
267
- for cat in cats:
268
- ensure_category(model, cat)
269
-
270
- if positive:
271
- model["categories"][cat]["alpha"] += weight
272
- else:
273
- model["categories"][cat]["beta"] += weight
274
-
275
- model["categories"][cat]["samples"] += 1
276
- model["categories"][cat]["last_updated"] = entry.get("timestamp")
277
-
278
- save_model(model)
279
- return model
280
-
281
-
282
- def train_incremental(categories: Dict) -> Dict:
283
- """Incremental update: process only the latest entry."""
284
- entries = load_feedback_entries()
285
- if not entries:
286
- return load_model(categories)
287
-
288
- model = load_model(categories)
289
-
290
- # Ensure all categories exist
291
- for cat_name in categories:
292
- ensure_category(model, cat_name)
293
- ensure_category(model, "uncategorized")
294
-
295
- latest = entries[-1]
296
- weight = time_decay_weight(latest.get("timestamp", ""))
297
- cats = classify_entry(latest, categories)
298
- positive = is_positive(latest)
299
-
300
- for cat in cats:
301
- ensure_category(model, cat)
302
-
303
- if positive:
304
- model["categories"][cat]["alpha"] += weight
305
- else:
306
- model["categories"][cat]["beta"] += weight
307
-
308
- model["categories"][cat]["samples"] += 1
309
- model["categories"][cat]["last_updated"] = latest.get("timestamp")
310
-
311
- model["total_entries"] = len(entries)
312
- save_model(model)
313
- return model
314
-
315
-
316
- def compute_reliability(model: Dict) -> List[Tuple[str, float, float, float, int, float]]:
317
- """Compute reliability (posterior mean) for each category."""
318
- results = []
319
- for cat_name, params in model.get("categories", {}).items():
320
- alpha = params["alpha"]
321
- beta_val = params["beta"]
322
- samples = params["samples"]
323
-
324
- # Posterior mean of Beta distribution: alpha / (alpha + beta)
325
- reliability = alpha / (alpha + beta_val) if (alpha + beta_val) > 0 else 0.5
326
-
327
- # 95% credible interval width (approximate)
328
- # For Beta(a,b): variance = ab / ((a+b)^2 * (a+b+1))
329
- total = alpha + beta_val
330
- if total > 0 and (total + 1) > 0:
331
- variance = (alpha * beta_val) / (total * total * (total + 1))
332
- ci_width = 2 * 1.96 * math.sqrt(variance)
333
- else:
334
- ci_width = 1.0
335
-
336
- results.append((cat_name, alpha, beta_val, reliability, samples, ci_width))
337
-
338
- return sorted(results, key=lambda x: -x[3])
339
-
340
-
341
- def sample_posteriors(model: Dict) -> Dict[str, float]:
342
- """Thompson Sampling: draw from each category's posterior."""
343
- samples = {}
344
- for cat_name, params in model.get("categories", {}).items():
345
- alpha = max(params["alpha"], 0.01)
346
- beta_val = max(params["beta"], 0.01)
347
- samples[cat_name] = random.betavariate(alpha, beta_val)
348
- return samples
349
-
350
-
351
- def save_snapshot(model: Dict) -> Path:
352
- """Save a timestamped snapshot for lift comparison."""
353
- SNAPSHOTS_DIR.mkdir(parents=True, exist_ok=True)
354
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
355
- snapshot_file = SNAPSHOTS_DIR / f"model_{timestamp}.json"
356
- snapshot_file.write_text(json.dumps(model, indent=2))
357
- return snapshot_file
358
-
359
-
360
- # ============================================
361
- # META-POLICY RULES (2026 Best Practice)
362
- # Consolidate repeated mistakes into reusable rules
363
- # Based on: Meta-Policy Reflexion (arXiv:2509.03990)
364
- # ============================================
365
-
366
- META_POLICY_FILE = FEEDBACK_DIR / "meta_policy_rules.json"
367
-
368
-
369
- def extract_meta_policy_rules(min_occurrences: int = 3) -> List[Dict[str, Any]]:
370
- """Extract reusable rules from repeated negative feedback patterns.
371
-
372
- Feb 2026 Upgrade: Recency + intensity weighted confidence.
373
- - Recent mistakes weigh more than old ones (exponential decay)
374
- - High-intensity feedback (user frustration) boosts confidence faster
375
- - Rules include trend analysis (improving vs deteriorating)
376
-
377
- Args:
378
- min_occurrences: Minimum times a pattern must appear to become a rule
379
-
380
- Returns:
381
- List of meta-policy rules with condition, action, weighted confidence
382
- """
383
- entries = load_feedback_entries()
384
- negative_entries = [e for e in entries if not is_positive(e)]
385
-
386
- if len(negative_entries) < min_occurrences:
387
- return []
388
-
389
- # Group by category
390
- category_patterns: Dict[str, List[Dict]] = {}
391
- for entry in negative_entries:
392
- cats = classify_entry(entry, DEFAULT_CATEGORIES)
393
- for cat in cats:
394
- if cat not in category_patterns:
395
- category_patterns[cat] = []
396
- category_patterns[cat].append(entry)
397
-
398
- # Also count positive entries per category for trend analysis
399
- positive_entries = [e for e in entries if is_positive(e)]
400
- category_positives: Dict[str, int] = {}
401
- for entry in positive_entries:
402
- cats = classify_entry(entry, DEFAULT_CATEGORIES)
403
- for cat in cats:
404
- category_positives[cat] = category_positives.get(cat, 0) + 1
405
-
406
- rules = []
407
- for category, patterns in category_patterns.items():
408
- if len(patterns) >= min_occurrences:
409
- # Feb 2026: Recency + intensity weighted confidence
410
- weighted_sum = 0.0
411
- total_weight = 0.0
412
- recent_count = 0 # Last 7 days
413
- recent_positive = 0
414
-
415
- for e in patterns:
416
- recency = time_decay_weight(e.get("timestamp", ""))
417
- intensity = e.get("intensity", 3) / 5.0 # Normalize to 0-1
418
- weight = recency * (0.5 + 0.5 * intensity) # Blend recency + intensity
419
- weighted_sum += weight
420
- total_weight += 1.0
421
-
422
- # Track recent entries
423
- try:
424
- ts = e.get("timestamp", "").replace("Z", "").split("+")[0]
425
- entry_time = datetime.fromisoformat(ts)
426
- if (datetime.now() - entry_time).days <= 7:
427
- recent_count += 1
428
- except (ValueError, AttributeError):
429
- pass
430
-
431
- # Count recent positives for trend
432
- for e in positive_entries:
433
- cats = classify_entry(e, DEFAULT_CATEGORIES)
434
- if category in cats:
435
- try:
436
- ts = e.get("timestamp", "").replace("Z", "").split("+")[0]
437
- entry_time = datetime.fromisoformat(ts)
438
- if (datetime.now() - entry_time).days <= 7:
439
- recent_positive += 1
440
- except (ValueError, AttributeError):
441
- pass
442
-
443
- # Weighted confidence: base + recency-weighted adjustment
444
- avg_weighted = weighted_sum / total_weight if total_weight > 0 else 0
445
- confidence = min(0.95, 0.4 + (avg_weighted * 0.3) + (len(patterns) * 0.05))
446
-
447
- # Trend: improving or deteriorating
448
- total_positives = category_positives.get(category, 0)
449
- pos_ratio = total_positives / (total_positives + len(patterns)) if (total_positives + len(patterns)) > 0 else 0
450
- if recent_count == 0 and recent_positive > 0:
451
- trend = "improving"
452
- elif recent_count > 2 and recent_positive == 0:
453
- trend = "deteriorating"
454
- elif recent_count > recent_positive:
455
- trend = "needs_attention"
456
- else:
457
- trend = "stable"
458
-
459
- rule = {
460
- "id": f"rule_{category}_{len(patterns)}",
461
- "category": category,
462
- "occurrences": len(patterns),
463
- "confidence": round(confidence, 3),
464
- "weighted_confidence": round(avg_weighted, 4),
465
- "trend": trend,
466
- "recent_negatives_7d": recent_count,
467
- "recent_positives_7d": recent_positive,
468
- "positive_ratio": round(pos_ratio, 3),
469
- "created": datetime.now().isoformat(),
470
- "condition": f"When working on {category} tasks",
471
- "action": f"Pay extra attention - {len(patterns)} past mistakes in this area",
472
- "examples": [
473
- e.get("context", e.get("message", ""))[:100]
474
- for e in sorted(patterns, key=lambda x: x.get("timestamp", ""), reverse=True)[:3]
475
- ],
476
- }
477
-
478
- # Category-specific rules
479
- if category == "git":
480
- rule["action"] = "VERIFY git operations before executing - check branch, status, diff"
481
- elif category == "code_edit":
482
- rule["action"] = "READ the file first, understand context before editing"
483
- elif category == "testing":
484
- rule["action"] = "Run tests after changes, don't assume they pass"
485
- elif category == "pr_review":
486
- rule["action"] = "Address ALL review comments, don't just minimize"
487
- elif category == "debugging":
488
- rule["action"] = "Verify the fix actually works - don't claim success without evidence"
489
-
490
- rules.append(rule)
491
-
492
- # Sort by confidence descending (most urgent first)
493
- rules.sort(key=lambda r: r["confidence"], reverse=True)
494
- return rules
495
-
496
-
497
- def save_meta_policy_rules(rules: List[Dict[str, Any]]):
498
- """Save extracted rules to disk."""
499
- META_POLICY_FILE.parent.mkdir(parents=True, exist_ok=True)
500
- with open(META_POLICY_FILE, "w") as f:
501
- json.dump({
502
- "updated": datetime.now().isoformat(),
503
- "rule_count": len(rules),
504
- "rules": rules,
505
- }, f, indent=2)
506
-
507
-
508
- def load_meta_policy_rules() -> List[Dict[str, Any]]:
509
- """Load existing meta-policy rules."""
510
- if not META_POLICY_FILE.exists():
511
- return []
512
- try:
513
- with open(META_POLICY_FILE) as f:
514
- data = json.load(f)
515
- return data.get("rules", [])
516
- except (json.JSONDecodeError, KeyError):
517
- return []
518
-
519
-
520
- # ============================================
521
- # DPO-STYLE BATCH OPTIMIZATION (Feb 2026)
522
- # Direct Preference Optimization without explicit reward model.
523
- # Builds preference pairs from positive/negative feedback,
524
- # then adjusts category priors more aggressively than
525
- # simple counting — mimicking DPO's closed-form update.
526
- #
527
- # Reference: Rafailov et al. 2023 (arXiv:2305.18290)
528
- # ============================================
529
-
530
- DPO_MODEL_FILE = FEEDBACK_DIR / "dpo_model.json"
531
- DPO_BETA = 0.1 # Temperature parameter (lower = more aggressive preference following)
532
-
533
-
534
- def _override_dpo_beta(value: float):
535
- """Override DPO_BETA at module level."""
536
- global DPO_BETA
537
- DPO_BETA = value
538
-
539
-
540
- def build_preference_pairs(categories: Dict) -> Dict[str, List[Tuple[Dict, Dict]]]:
541
- """Build (chosen, rejected) preference pairs per category.
542
-
543
- For each category, pair the most recent positive entry with the most
544
- recent negative entry. This creates implicit preference data without
545
- needing explicit A/B comparisons.
546
- """
547
- entries = load_feedback_entries()
548
- if not entries:
549
- return {}
550
-
551
- # Classify entries by category and sentiment
552
- cat_positives: Dict[str, List[Dict]] = {}
553
- cat_negatives: Dict[str, List[Dict]] = {}
554
-
555
- for entry in entries:
556
- cats = classify_entry(entry, categories)
557
- for cat in cats:
558
- if is_positive(entry):
559
- cat_positives.setdefault(cat, []).append(entry)
560
- else:
561
- cat_negatives.setdefault(cat, []).append(entry)
562
-
563
- # Build pairs: each positive paired with closest-in-time negative
564
- pairs: Dict[str, List[Tuple[Dict, Dict]]] = {}
565
- all_cats = set(list(cat_positives.keys()) + list(cat_negatives.keys()))
566
-
567
- for cat in all_cats:
568
- pos = cat_positives.get(cat, [])
569
- neg = cat_negatives.get(cat, [])
570
- if not pos or not neg:
571
- continue
572
-
573
- cat_pairs = []
574
- # Sort by timestamp
575
- pos_sorted = sorted(pos, key=lambda e: e.get("timestamp", ""))
576
- neg_sorted = sorted(neg, key=lambda e: e.get("timestamp", ""))
577
-
578
- # Pair each positive with the nearest negative (greedy matching)
579
- used_neg = set()
580
- for p in pos_sorted:
581
- best_neg = None
582
- best_dist = float("inf")
583
- for i, n in enumerate(neg_sorted):
584
- if i in used_neg:
585
- continue
586
- try:
587
- p_ts = datetime.fromisoformat(p.get("timestamp", "").replace("Z", "").split("+")[0])
588
- n_ts = datetime.fromisoformat(n.get("timestamp", "").replace("Z", "").split("+")[0])
589
- dist = abs((p_ts - n_ts).total_seconds())
590
- except (ValueError, AttributeError):
591
- dist = float("inf")
592
- if dist < best_dist:
593
- best_dist = dist
594
- best_neg = i
595
- if best_neg is not None:
596
- used_neg.add(best_neg)
597
- cat_pairs.append((p, neg_sorted[best_neg]))
598
-
599
- if cat_pairs:
600
- pairs[cat] = cat_pairs
601
-
602
- return pairs
603
-
604
-
605
- def dpo_log_ratio(chosen_weight: float, rejected_weight: float, beta: float = DPO_BETA) -> float:
606
- """Compute DPO implicit reward difference.
607
-
608
- DPO loss: -log(sigmoid(beta * (log pi(chosen) - log pi(rejected))))
609
- We use time-decay weights as proxy for log-probabilities.
610
-
611
- Returns adjustment to apply to category alpha/beta parameters.
612
- """
613
- # Avoid log(0)
614
- chosen_weight = max(chosen_weight, 0.01)
615
- rejected_weight = max(rejected_weight, 0.01)
616
-
617
- log_ratio = math.log(chosen_weight) - math.log(rejected_weight)
618
- sigmoid = 1.0 / (1.0 + math.exp(-beta * log_ratio))
619
-
620
- # Scale adjustment: larger preference gap → larger update
621
- adjustment = (sigmoid - 0.5) * 2 # Range: -1 to 1
622
- return adjustment
623
-
624
-
625
- def train_dpo(categories: Dict) -> Dict:
626
- """DPO-style batch optimization (Feb 2026 upgrade).
627
-
628
- Instead of simple counting, uses preference pairs to compute
629
- direct policy updates. Works alongside Thompson Sampling:
630
- - Thompson Sampling: online exploration (per-feedback updates)
631
- - DPO: batch exploitation (accumulated preference pairs)
632
-
633
- The DPO adjustment is applied on top of the Thompson model.
634
- """
635
- pairs = build_preference_pairs(categories)
636
- if not pairs:
637
- print("No preference pairs found. Need both positive and negative feedback per category.")
638
- return load_model(categories)
639
-
640
- model = load_model(categories)
641
-
642
- dpo_adjustments = {}
643
-
644
- for cat, cat_pairs in pairs.items():
645
- if cat not in model["categories"]:
646
- continue
647
-
648
- total_adjustment = 0.0
649
- for chosen, rejected in cat_pairs:
650
- chosen_weight = time_decay_weight(chosen.get("timestamp", ""))
651
- rejected_weight = time_decay_weight(rejected.get("timestamp", ""))
652
-
653
- # Compute DPO-style adjustment
654
- adj = dpo_log_ratio(chosen_weight, rejected_weight)
655
- total_adjustment += adj
656
-
657
- # Average adjustment over all pairs
658
- avg_adjustment = total_adjustment / len(cat_pairs) if cat_pairs else 0
659
-
660
- # Apply DPO adjustment to model parameters
661
- # Positive adjustment → boost alpha (more reliable)
662
- # Negative adjustment → boost beta (less reliable)
663
- if avg_adjustment > 0:
664
- boost = avg_adjustment * len(cat_pairs) * 0.5 # Scale by pair count
665
- model["categories"][cat]["alpha"] += boost
666
- else:
667
- penalty = abs(avg_adjustment) * len(cat_pairs) * 0.5
668
- model["categories"][cat]["beta"] += penalty
669
-
670
- dpo_adjustments[cat] = {
671
- "pairs": len(cat_pairs),
672
- "avg_adjustment": round(avg_adjustment, 4),
673
- "direction": "boost" if avg_adjustment > 0 else "penalize",
674
- }
675
-
676
- # Save DPO metadata
677
- dpo_meta = {
678
- "updated": datetime.now().isoformat(),
679
- "beta": DPO_BETA,
680
- "total_pairs": sum(len(p) for p in pairs.values()),
681
- "categories": dpo_adjustments,
682
- }
683
- DPO_MODEL_FILE.parent.mkdir(parents=True, exist_ok=True)
684
- with open(DPO_MODEL_FILE, "w") as f:
685
- json.dump(dpo_meta, f, indent=2)
686
-
687
- save_model(model)
688
- return model
689
-
690
-
691
- def print_dpo_results(model: Dict):
692
- """Print DPO training results."""
693
- if not DPO_MODEL_FILE.exists():
694
- print("\nNo DPO model found. Run --dpo-train first.")
695
- return
696
-
697
- with open(DPO_MODEL_FILE) as f:
698
- dpo_meta = json.load(f)
699
-
700
- print()
701
- print("=" * 60)
702
- print("DPO BATCH OPTIMIZATION RESULTS (Feb 2026)")
703
- print("=" * 60)
704
- print(f" Beta (temperature): {dpo_meta.get('beta', DPO_BETA)}")
705
- print(f" Total preference pairs: {dpo_meta.get('total_pairs', 0)}")
706
- print(f" Updated: {dpo_meta.get('updated', 'never')}")
707
- print()
708
-
709
- for cat, adj in sorted(
710
- dpo_meta.get("categories", {}).items(),
711
- key=lambda x: abs(x[1].get("avg_adjustment", 0)),
712
- reverse=True,
713
- ):
714
- direction = adj.get("direction", "none")
715
- arrow = "+" if direction == "boost" else "-"
716
- bar_val = abs(adj.get("avg_adjustment", 0)) * 10
717
- bar = "#" * min(10, int(bar_val)) + "-" * max(0, 10 - int(bar_val))
718
- print(f" {cat:<20s} [{bar}] {arrow}{abs(adj.get('avg_adjustment', 0)):.4f} ({adj.get('pairs', 0)} pairs)")
719
-
720
- print()
721
- print(" DPO adjusts Thompson Sampling priors based on preference pairs.")
722
- print(" Run --reliability to see combined effect.")
723
- print("=" * 60)
724
-
725
-
726
- def print_meta_policy_rules():
727
- """Print meta-policy rules for session context."""
728
- rules = load_meta_policy_rules()
729
-
730
- print()
731
- print("=" * 60)
732
- print("META-POLICY RULES (Recency + Intensity Weighted)")
733
- print("=" * 60)
734
-
735
- if not rules:
736
- print("\n No rules extracted yet. Need more feedback data.")
737
- print(" Run --extract-rules after accumulating feedback.")
738
- else:
739
- for rule in rules:
740
- conf_bar = "#" * int(rule["confidence"] * 10)
741
- trend = rule.get("trend", "unknown")
742
- trend_icon = {"improving": "+", "deteriorating": "!", "needs_attention": "?", "stable": "="}
743
- trend_char = trend_icon.get(trend, "?")
744
- print(f"\n [{rule['category'].upper()}] Confidence: [{conf_bar}] {rule['confidence']:.0%} (trend: {trend_char} {trend})")
745
- print(f" Condition: {rule['condition']}")
746
- print(f" Action: {rule['action']}")
747
- print(f" Based on: {rule['occurrences']} negatives | Positive ratio: {rule.get('positive_ratio', 0):.0%}")
748
- recent_neg = rule.get("recent_negatives_7d", 0)
749
- recent_pos = rule.get("recent_positives_7d", 0)
750
- if recent_neg or recent_pos:
751
- print(f" Last 7d: {recent_neg} neg / {recent_pos} pos")
752
-
753
- print("\n" + "=" * 60)
754
-
755
-
756
- def print_reliability_table(model: Dict):
757
- """Print formatted reliability table."""
758
- results = compute_reliability(model)
759
-
760
- print()
761
- print("=" * 78)
762
- print("THOMPSON SAMPLING RELIABILITY TABLE")
763
- print("=" * 78)
764
- print()
765
- print(f" Model updated: {model.get('updated', 'never')}")
766
- print(f" Total entries: {model.get('total_entries', 0)}")
767
- print()
768
- print(f" {'Category':<20s} | {'Alpha':>7s} | {'Beta':>7s} | {'Reliability':>12s} | {'Samples':>7s} | {'CI Width':>8s}")
769
- print(" " + "-" * 74)
770
-
771
- for cat, alpha, beta_val, reliability, samples, ci_width in results:
772
- # Visual bar
773
- bar_len = int(reliability * 10)
774
- bar = "#" * bar_len + "-" * (10 - bar_len)
775
-
776
- print(f" {cat:<20s} | {alpha:>7.1f} | {beta_val:>7.1f} | [{bar}] {reliability:>4.0%} | {samples:>7d} | {ci_width:>7.3f}")
777
-
778
- print()
779
- print("=" * 78)
780
-
781
- # Summary
782
- if results:
783
- best = results[0]
784
- worst = results[-1]
785
- print(f" Best: {best[0]} ({best[3]:.0%})")
786
- print(f" Worst: {worst[0]} ({worst[3]:.0%})")
787
- print()
788
-
789
- # Categories needing attention (reliability < 50% with 3+ samples)
790
- weak = [r for r in results if r[3] < 0.5 and r[4] >= 3]
791
- if weak:
792
- print(" Categories needing improvement:")
793
- for cat, _, _, rel, samp, _ in weak:
794
- print(f" - {cat}: {rel:.0%} ({samp} samples)")
795
- print()
796
-
797
- print("=" * 78)
798
-
799
-
800
- def print_samples(model: Dict):
801
- """Print Thompson-sampled probabilities."""
802
- samples = sample_posteriors(model)
803
-
804
- print()
805
- print("=" * 50)
806
- print("THOMPSON SAMPLING (Single Draw)")
807
- print("=" * 50)
808
- print()
809
-
810
- for cat, prob in sorted(samples.items(), key=lambda x: -x[1]):
811
- bar = "#" * int(prob * 20) + "-" * (20 - int(prob * 20))
812
- print(f" {cat:<20s} [{bar}] {prob:.3f}")
813
-
814
- print()
815
- print(" (Each run produces different samples - this is expected)")
816
- print("=" * 50)
817
-
818
-
819
- def main():
820
- parser = argparse.ArgumentParser(description="Thompson Sampling Feedback Model Trainer (2026)")
821
- parser.add_argument("--train", action="store_true", help="Full rebuild from JSONL")
822
- parser.add_argument("--incremental", action="store_true", help="Update with latest entry")
823
- parser.add_argument("--reliability", action="store_true", help="Print reliability table")
824
- parser.add_argument("--sample", action="store_true", help="Sample from posteriors")
825
- parser.add_argument("--snapshot", action="store_true", help="Save model snapshot")
826
- parser.add_argument("--extract-rules", action="store_true", help="Extract meta-policy rules (2026)")
827
- parser.add_argument("--show-rules", action="store_true", help="Show meta-policy rules")
828
- parser.add_argument("--dpo-train", action="store_true", help="DPO batch optimization (Feb 2026)")
829
- parser.add_argument("--dpo-beta", type=float, default=DPO_BETA, help="DPO temperature parameter")
830
- parser.add_argument("--config", type=str, help="Path to custom categories JSON")
831
- parser.add_argument("--json", action="store_true", help="Output as JSON (for hook consumption)")
832
-
833
- args = parser.parse_args()
834
-
835
- categories = load_config(args.config)
836
-
837
- if args.train:
838
- model = train_full(categories)
839
- # Auto-run DPO batch optimization on full train (Feb 2026: autonomous)
840
- dpo_model = train_dpo(categories)
841
- # Auto-extract meta-policy rules with recency+intensity weighting
842
- rules = extract_meta_policy_rules()
843
- save_meta_policy_rules(rules)
844
- if args.json:
845
- print(json.dumps({"status": "trained", "entries": model["total_entries"], "dpo": True, "rules": len(rules)}))
846
- else:
847
- print(f"Trained model from {model['total_entries']} entries.")
848
- print(f"DPO batch optimization applied. Meta-policy rules: {len(rules)}.")
849
- print(f"Saved to: {MODEL_FILE}")
850
- print_reliability_table(dpo_model)
851
-
852
- elif args.incremental:
853
- model = train_incremental(categories)
854
- if args.json:
855
- print(json.dumps({"status": "updated", "entries": model["total_entries"]}))
856
- else:
857
- print(f"Incremental update complete. Total entries: {model['total_entries']}")
858
-
859
- elif args.reliability:
860
- model = load_model(categories)
861
- if args.json:
862
- results = compute_reliability(model)
863
- output = {
864
- "updated": model.get("updated"),
865
- "total_entries": model.get("total_entries", 0),
866
- "categories": {
867
- cat: {"alpha": a, "beta": b, "reliability": r, "samples": s, "ci_width": ci}
868
- for cat, a, b, r, s, ci in results
869
- },
870
- }
871
- print(json.dumps(output, indent=2))
872
- else:
873
- print_reliability_table(model)
874
-
875
- elif args.sample:
876
- model = load_model(categories)
877
- if args.json:
878
- samples = sample_posteriors(model)
879
- print(json.dumps(samples, indent=2))
880
- else:
881
- print_samples(model)
882
-
883
- elif args.snapshot:
884
- model = load_model(categories)
885
- snapshot_file = save_snapshot(model)
886
- if args.json:
887
- print(json.dumps({"snapshot": str(snapshot_file)}))
888
- else:
889
- print(f"Snapshot saved: {snapshot_file}")
890
-
891
- elif args.extract_rules:
892
- rules = extract_meta_policy_rules()
893
- save_meta_policy_rules(rules)
894
- if args.json:
895
- print(json.dumps({"status": "extracted", "rule_count": len(rules), "rules": rules}))
896
- else:
897
- print(f"Extracted {len(rules)} meta-policy rules.")
898
- print(f"Saved to: {META_POLICY_FILE}")
899
- print_meta_policy_rules()
900
-
901
- elif args.show_rules:
902
- if args.json:
903
- rules = load_meta_policy_rules()
904
- print(json.dumps({"rules": rules}, indent=2))
905
- else:
906
- print_meta_policy_rules()
907
-
908
- elif args.dpo_train:
909
- # Override DPO_BETA via module-level reassignment
910
- _override_dpo_beta(args.dpo_beta)
911
- model = train_dpo(categories)
912
- if args.json:
913
- dpo_meta = {}
914
- if DPO_MODEL_FILE.exists():
915
- with open(DPO_MODEL_FILE) as f:
916
- dpo_meta = json.load(f)
917
- print(json.dumps({"status": "dpo_trained", **dpo_meta}))
918
- else:
919
- print(f"DPO batch optimization complete.")
920
- print(f"Saved to: {DPO_MODEL_FILE}")
921
- print_dpo_results(model)
922
- print_reliability_table(model)
923
-
924
- else:
925
- parser.print_help()
926
-
927
-
928
- if __name__ == "__main__":
929
- main()