thumbgate 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/.claude-plugin/README.md +4 -4
  2. package/.claude-plugin/marketplace.json +1 -1
  3. package/.claude-plugin/plugin.json +1 -1
  4. package/.well-known/mcp/server-card.json +1 -1
  5. package/README.md +48 -16
  6. package/adapters/README.md +1 -1
  7. package/adapters/claude/.mcp.json +2 -2
  8. package/adapters/codex/config.toml +2 -2
  9. package/adapters/mcp/server-stdio.js +11 -8
  10. package/adapters/opencode/opencode.json +1 -1
  11. package/bin/cli.js +20 -11
  12. package/config/github-about.json +1 -1
  13. package/config/model-tiers.json +11 -0
  14. package/package.json +22 -11
  15. package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +1 -1
  16. package/plugins/claude-codex-bridge/.mcp.json +1 -1
  17. package/plugins/codex-profile/.codex-plugin/plugin.json +1 -1
  18. package/plugins/codex-profile/.mcp.json +1 -1
  19. package/plugins/codex-profile/INSTALL.md +1 -1
  20. package/plugins/codex-profile/README.md +1 -1
  21. package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +1 -1
  22. package/plugins/cursor-marketplace/README.md +2 -2
  23. package/plugins/cursor-marketplace/commands/capture-feedback.md +2 -2
  24. package/plugins/cursor-marketplace/rules/feedback-capture.mdc +3 -3
  25. package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +3 -2
  26. package/plugins/opencode-profile/INSTALL.md +1 -1
  27. package/public/compare.html +302 -0
  28. package/public/guide.html +4 -4
  29. package/public/index.html +77 -38
  30. package/public/learn/ai-agent-persistent-memory.html +1 -0
  31. package/public/lessons.html +325 -17
  32. package/scripts/__pycache__/train_from_feedback.cpython-312.pyc +0 -0
  33. package/scripts/ai-search-visibility.js +142 -0
  34. package/scripts/audit-trail.js +6 -0
  35. package/scripts/capture-railway-diagnostics.sh +97 -0
  36. package/scripts/changeset-check.js +372 -0
  37. package/scripts/check-congruence.js +8 -5
  38. package/scripts/claude-feedback-sync.js +320 -0
  39. package/scripts/cli-telemetry.js +4 -1
  40. package/scripts/computer-use-firewall.js +45 -15
  41. package/scripts/contextfs.js +32 -23
  42. package/scripts/dashboard.js +84 -0
  43. package/scripts/docker-sandbox-planner.js +208 -0
  44. package/scripts/feedback-loop.js +16 -0
  45. package/scripts/github-about.js +56 -0
  46. package/scripts/intervention-policy.js +696 -0
  47. package/scripts/local-model-profile.js +18 -2
  48. package/scripts/model-tier-router.js +10 -1
  49. package/scripts/operational-integrity.js +361 -32
  50. package/scripts/prove-adapters.js +1 -0
  51. package/scripts/prove-automation.js +2 -2
  52. package/scripts/prove-packaged-runtime.js +260 -0
  53. package/scripts/prove-runtime.js +13 -0
  54. package/scripts/published-cli.js +10 -1
  55. package/scripts/rate-limiter.js +3 -3
  56. package/scripts/statusline-links.js +238 -0
  57. package/scripts/statusline-local-stats.js +2 -0
  58. package/scripts/statusline.sh +200 -10
  59. package/scripts/sync-github-about.js +7 -4
  60. package/scripts/tool-registry.js +2 -2
  61. package/scripts/workflow-sentinel.js +197 -39
  62. package/skills/thumbgate/SKILL.md +1 -1
  63. package/src/api/server.js +12 -1
@@ -6,13 +6,23 @@
6
6
  # Resolve script directory safely (CodeQL: no uncontrolled paths)
7
7
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)"
8
8
  case "$SCRIPT_DIR" in *[!a-zA-Z0-9/_.-]*) echo "ThumbGate: invalid script path"; exit 1;; esac
9
+ LOCAL_API_ORIGIN="${THUMBGATE_LOCAL_API_ORIGIN:-http://localhost:3456}"
9
10
 
10
11
  # ── Parse Claude Code session JSON from stdin ─────────────────────
11
12
  eval "$(cat | jq -r '
12
13
  def n(f): f // 0;
13
- @sh "CTX_PCT=\(n(.context_window.used_percentage) | floor)"
14
+ @sh "CTX_PCT=\(n(.context_window.used_percentage) | floor)",
15
+ @sh "PROJECT_CWD=\(.cwd // .working_directory // "")"
14
16
  ' 2>/dev/null)"
15
17
  CTX_PCT="${CTX_PCT:-0}"
18
+ PROJECT_CWD="${PROJECT_CWD:-}"
19
+
20
+ if [ -n "$PROJECT_CWD" ] && [ -d "$PROJECT_CWD" ]; then
21
+ export THUMBGATE_PROJECT_DIR="$PROJECT_CWD"
22
+ if [ -z "${THUMBGATE_FEEDBACK_DIR:-}" ]; then
23
+ export THUMBGATE_FEEDBACK_DIR="${PROJECT_CWD}/.claude/memory/feedback"
24
+ fi
25
+ fi
16
26
 
17
27
  # ── ThumbGate stats from cache ────────────────────────────────────────
18
28
  THUMBGATE_CACHE=""
@@ -63,7 +73,7 @@ fi
63
73
  # Background refresh from REST API when cache is stale (>120s)
64
74
  if [ $(( _NOW - ${CACHE_TS:-0} )) -gt 120 ]; then
65
75
  (
66
- _R=$(curl -s --max-time 3 "http://localhost:3456/v1/feedback/stats" -H "Authorization: Bearer ${THUMBGATE_API_KEY:-tg_creator_dev_enterprise}" 2>/dev/null)
76
+ _R=$(curl -s --max-time 3 "${LOCAL_API_ORIGIN}/v1/feedback/stats" -H "Authorization: Bearer ${THUMBGATE_API_KEY:-tg_creator_dev_enterprise}" 2>/dev/null)
67
77
  [ -z "$_R" ] && exit 0
68
78
  echo "$_R" | python3 -c "
69
79
  import json,sys,time,os
@@ -78,6 +88,23 @@ except:pass
78
88
  disown 2>/dev/null
79
89
  fi
80
90
 
91
+ # ── Clickable statusline affordances ─────────────────────────────
92
+ LINK_STATE="offline"
93
+ UP_URL=""; DOWN_URL=""; DASHBOARD_URL=""; LESSONS_URL=""
94
+ DASHBOARD_LABEL="Dashboard"; LESSONS_LABEL="Lessons"
95
+ _LINKS_JSON=$(node "${SCRIPT_DIR}/statusline-links.js" 2>/dev/null)
96
+ if [ -n "$_LINKS_JSON" ]; then
97
+ eval "$(echo "$_LINKS_JSON" | jq -r '
98
+ @sh "LINK_STATE=\(.state // "offline")",
99
+ @sh "UP_URL=\(.upUrl // "")",
100
+ @sh "DOWN_URL=\(.downUrl // "")",
101
+ @sh "DASHBOARD_URL=\(.dashboardUrl // "")",
102
+ @sh "LESSONS_URL=\(.lessonsUrl // "")",
103
+ @sh "DASHBOARD_LABEL=\(.dashboardLabel // "Dashboard")",
104
+ @sh "LESSONS_LABEL=\(.lessonsLabel // "Lessons")"
105
+ ' 2>/dev/null)"
106
+ fi
107
+
81
108
  # ── ThumbGate package metadata ────────────────────────────────────────
82
109
  TG_VERSION="unknown"; TG_TIER="Free"
83
110
  _META_JSON=$(node "${SCRIPT_DIR}/statusline-meta.js" 2>/dev/null)
@@ -99,6 +126,16 @@ if [ -n "$_TOWER_JSON" ]; then
99
126
  ' 2>/dev/null)"
100
127
  fi
101
128
 
129
+ # ── Latest lesson ──────────────────────────────────────────────────
130
+ LESSON_TEXT=""; LESSON_ID=""
131
+ _LESSON_JSON=$(node "${SCRIPT_DIR}/statusline-lesson.js" 2>/dev/null)
132
+ if [ -n "$_LESSON_JSON" ]; then
133
+ eval "$(echo "$_LESSON_JSON" | jq -r '
134
+ @sh "LESSON_TEXT=\(.text // "")",
135
+ @sh "LESSON_ID=\(.lessonId // "")"
136
+ ' 2>/dev/null)"
137
+ fi
138
+
102
139
  # ── Colors ────────────────────────────────────────────────────────
103
140
  G='\033[32m'; R='\033[31m'; M='\033[35m'; C='\033[36m'; D='\033[90m'; BD='\033[1m'; RST='\033[0m'
104
141
 
@@ -107,17 +144,170 @@ case "${TREND}" in
107
144
  improving) ARROW="↗" ;; degrading) ARROW="↘" ;; stable) ARROW="→" ;; *) ARROW="?" ;;
108
145
  esac
109
146
 
147
+ osc8_link() {
148
+ local url="$1"
149
+ local label="$2"
150
+ if [ -n "$url" ]; then
151
+ printf '\033]8;;%s\a%s\033]8;;\a' "$url" "$label"
152
+ else
153
+ printf '%s' "$label"
154
+ fi
155
+ }
156
+
157
+ UP_ICON="$(osc8_link "$UP_URL" "👍")"
158
+ DOWN_ICON="$(osc8_link "$DOWN_URL" "👎")"
159
+ DASHBOARD_LINK="$(osc8_link "$DASHBOARD_URL" "$DASHBOARD_LABEL")"
160
+ LESSONS_LINK="$(osc8_link "$LESSONS_URL" "$LESSONS_LABEL")"
161
+
162
+ is_numeric() {
163
+ case "$1" in
164
+ ''|*[!0-9]*) return 1 ;;
165
+ *) return 0 ;;
166
+ esac
167
+ }
168
+
169
+ # Keep ThumbGate within a conservative left-side budget so Claude's own
170
+ # right-side notices do not visually collide with our line.
171
+ STATUSLINE_DEFAULT_MAX_CHARS="${THUMBGATE_STATUSLINE_DEFAULT_MAX_CHARS:-96}"
172
+ STATUSLINE_RIGHT_RESERVE="${THUMBGATE_STATUSLINE_RIGHT_RESERVE:-28}"
173
+ if ! is_numeric "$STATUSLINE_DEFAULT_MAX_CHARS"; then STATUSLINE_DEFAULT_MAX_CHARS=96; fi
174
+ if ! is_numeric "$STATUSLINE_RIGHT_RESERVE"; then STATUSLINE_RIGHT_RESERVE=28; fi
175
+
176
+ if is_numeric "${THUMBGATE_STATUSLINE_MAX_CHARS:-}"; then
177
+ STATUSLINE_MAX_CHARS="$THUMBGATE_STATUSLINE_MAX_CHARS"
178
+ else
179
+ STATUSLINE_MAX_CHARS="$STATUSLINE_DEFAULT_MAX_CHARS"
180
+ if is_numeric "${COLUMNS:-}"; then
181
+ _AVAILABLE_CHARS=$(( COLUMNS - STATUSLINE_RIGHT_RESERVE ))
182
+ if [ "$_AVAILABLE_CHARS" -gt 0 ] && [ "$_AVAILABLE_CHARS" -lt "$STATUSLINE_MAX_CHARS" ]; then
183
+ STATUSLINE_MAX_CHARS="$_AVAILABLE_CHARS"
184
+ fi
185
+ fi
186
+ fi
187
+ if [ "$STATUSLINE_MAX_CHARS" -lt 48 ]; then STATUSLINE_MAX_CHARS=48; fi
188
+
189
+ PLAIN_SEGMENTS=()
190
+ RENDERED_SEGMENTS=()
191
+
192
+ current_plain_length() {
193
+ local total=0
194
+ local i
195
+ for ((i = 0; i < ${#PLAIN_SEGMENTS[@]}; i++)); do
196
+ if [ "$i" -gt 0 ]; then
197
+ total=$((total + 3))
198
+ fi
199
+ total=$((total + ${#PLAIN_SEGMENTS[$i]}))
200
+ done
201
+ printf '%s' "$total"
202
+ }
203
+
204
+ push_segment() {
205
+ PLAIN_SEGMENTS+=("$1")
206
+ RENDERED_SEGMENTS+=("$2")
207
+ }
208
+
209
+ add_segment_if_fit() {
210
+ local plain="$1"
211
+ local rendered="$2"
212
+ local current extra
213
+ current=$(current_plain_length)
214
+ extra=${#plain}
215
+ if [ "${#PLAIN_SEGMENTS[@]}" -gt 0 ]; then
216
+ extra=$((extra + 3))
217
+ fi
218
+ if [ $((current + extra)) -le "$STATUSLINE_MAX_CHARS" ]; then
219
+ push_segment "$plain" "$rendered"
220
+ return 0
221
+ fi
222
+ return 1
223
+ }
224
+
225
+ truncate_plain_text() {
226
+ local text="$1"
227
+ local max_chars="$2"
228
+ if [ "$max_chars" -le 0 ]; then
229
+ printf ''
230
+ elif [ "${#text}" -le "$max_chars" ]; then
231
+ printf '%s' "$text"
232
+ elif [ "$max_chars" -le 3 ]; then
233
+ printf '%.*s' "$max_chars" "$text"
234
+ else
235
+ printf '%s...' "${text:0:$((max_chars - 3))}"
236
+ fi
237
+ }
238
+
239
+ add_truncated_segment_if_fit() {
240
+ local plain="$1"
241
+ local color="$2"
242
+ local min_chars="${3:-14}"
243
+ local current sep remaining truncated
244
+ current=$(current_plain_length)
245
+ sep=0
246
+ if [ "${#PLAIN_SEGMENTS[@]}" -gt 0 ]; then
247
+ sep=3
248
+ fi
249
+ remaining=$((STATUSLINE_MAX_CHARS - current - sep))
250
+ if [ "$remaining" -lt "$min_chars" ]; then
251
+ return 1
252
+ fi
253
+ truncated=$(truncate_plain_text "$plain" "$remaining")
254
+ push_segment "$truncated" "${color}${truncated}${RST}"
255
+ return 0
256
+ }
257
+
258
+ render_segments() {
259
+ local line=''
260
+ local i
261
+ for ((i = 0; i < ${#RENDERED_SEGMENTS[@]}; i++)); do
262
+ if [ "$i" -gt 0 ]; then
263
+ line="${line} · "
264
+ fi
265
+ line="${line}${RENDERED_SEGMENTS[$i]}"
266
+ done
267
+ printf '%b\n' "$line"
268
+ }
269
+
110
270
  # ── Output (single line) ─────────────────────────────────────────
111
- LINE="ThumbGate v${TG_VERSION} · ${TG_TIER}"
112
271
  if [ "$UP" = "0" ] && [ "$DOWN" = "0" ]; then
113
- echo -e "${D}${LINE} · no feedback yet${RST}"
272
+ push_segment "ThumbGate v${TG_VERSION}" "${D}ThumbGate v${TG_VERSION}${RST}"
273
+ push_segment "${TG_TIER}" "${D}${TG_TIER}${RST}"
274
+ push_segment "no feedback yet" "${D}no feedback yet${RST}"
275
+ add_segment_if_fit "${DASHBOARD_LABEL}" "${C}${DASHBOARD_LINK}${RST}"
276
+ add_segment_if_fit "${LESSONS_LABEL}" "${M}${LESSONS_LINK}${RST}"
277
+ render_segments
114
278
  else
115
- LINE="${LINE} · ${G}${BD}${UP}${RST}👍 ${R}${BD}${DOWN}${RST}👎 ${ARROW}"
279
+ STATS_PLAIN="${UP}👍 ${DOWN}👎 ${ARROW}"
280
+ STATS_RENDERED="${G}${BD}${UP}${RST}${UP_ICON} ${R}${BD}${DOWN}${RST}${DOWN_ICON} ${ARROW}"
281
+ ALERTS_PLAIN=''
282
+ ALERTS_RENDERED=''
283
+
284
+ if [ "${SLO_V:-0}" -gt 0 ]; then
285
+ ALERTS_PLAIN="${ALERTS_PLAIN}${ALERTS_PLAIN:+ }${SLO_V} SLO"
286
+ ALERTS_RENDERED="${ALERTS_RENDERED}${ALERTS_RENDERED:+ }${R}${SLO_V} SLO${RST}"
287
+ fi
288
+ if [ "${AT_RISK:-0}" -gt 0 ]; then
289
+ ALERTS_PLAIN="${ALERTS_PLAIN}${ALERTS_PLAIN:+ }${AT_RISK}⚠"
290
+ ALERTS_RENDERED="${ALERTS_RENDERED}${ALERTS_RENDERED:+ }${R}${AT_RISK}⚠${RST}"
291
+ fi
292
+ if [ "${ANOMALIES:-0}" -gt 0 ]; then
293
+ ALERTS_PLAIN="${ALERTS_PLAIN}${ALERTS_PLAIN:+ }${ANOMALIES}☠"
294
+ ALERTS_RENDERED="${ALERTS_RENDERED}${ALERTS_RENDERED:+ }${R}${ANOMALIES}☠${RST}"
295
+ fi
116
296
 
117
- # Control Tower alerts (if any)
118
- [ "${SLO_V:-0}" -gt 0 ] && LINE="${LINE} ${R}${SLO_V} SLO${RST}"
119
- [ "${AT_RISK:-0}" -gt 0 ] && LINE="${LINE} ${R}${AT_RISK}⚠${RST}"
120
- [ "${ANOMALIES:-0}" -gt 0 ] && LINE="${LINE} ${R}${ANOMALIES}☠${RST}"
297
+ push_segment "ThumbGate v${TG_VERSION}" "ThumbGate v${TG_VERSION}"
298
+ push_segment "${TG_TIER}" "${TG_TIER}"
299
+ push_segment "${STATS_PLAIN}" "${STATS_RENDERED}"
300
+ add_segment_if_fit "${DASHBOARD_LABEL}" "${C}${DASHBOARD_LINK}${RST}"
301
+ add_segment_if_fit "${LESSONS_LABEL}" "${M}${LESSONS_LINK}${RST}"
302
+ if [ "${LESSONS:-0}" -gt 0 ]; then
303
+ add_segment_if_fit "${LESSONS} lessons" "${M}${BD}${LESSONS}${RST} lessons"
304
+ fi
305
+ if [ -n "${ALERTS_PLAIN}" ]; then
306
+ add_segment_if_fit "${ALERTS_PLAIN}" "${ALERTS_RENDERED}"
307
+ fi
308
+ if [ -n "${LESSON_TEXT}" ]; then
309
+ add_truncated_segment_if_fit "${LESSON_TEXT}" "${D}" 14
310
+ fi
121
311
 
122
- echo -e "$LINE"
312
+ render_segments
123
313
  fi
@@ -6,6 +6,7 @@ const {
6
6
  fetchLiveGitHubAbout,
7
7
  loadGitHubAboutConfig,
8
8
  updateLiveGitHubAbout,
9
+ verifyLiveGitHubAbout,
9
10
  } = require('./github-about');
10
11
 
11
12
  async function main() {
@@ -32,11 +33,13 @@ async function main() {
32
33
  console.log(`Syncing GitHub About for ${about.repo}...`);
33
34
  await updateLiveGitHubAbout({ repo: about.repo });
34
35
 
35
- const after = await fetchLiveGitHubAbout({ repo: about.repo });
36
- const remaining = compareGitHubAbout(about, after, `Live GitHub About (${about.repo})`);
37
- if (remaining.length > 0) {
36
+ const verification = await verifyLiveGitHubAbout({
37
+ expected: about,
38
+ repo: about.repo,
39
+ });
40
+ if (verification.errors.length > 0) {
38
41
  console.error(`\n❌ GitHub About sync incomplete for ${about.repo}:\n`);
39
- for (const error of remaining) {
42
+ for (const error of verification.errors) {
40
43
  console.error(` • ${error}`);
41
44
  }
42
45
  console.error('');
@@ -36,7 +36,7 @@ const TOOLS = [
36
36
  whatWorked: { type: 'string' },
37
37
  chatHistory: {
38
38
  type: 'array',
39
- description: 'Optional recent conversation window used for history-aware lesson distillation.',
39
+ description: 'Optional caller-supplied recent conversation window used for history-aware lesson distillation. The current Claude auto-capture path sends up to 8 prior recorded entries for vague negative inline signals.',
40
40
  items: {
41
41
  type: 'object',
42
42
  properties: {
@@ -59,7 +59,7 @@ const TOOLS = [
59
59
  timestamp: { type: 'string' },
60
60
  },
61
61
  },
62
- description: 'Last 5-10 conversation turns before the feedback signal. Raw messages, not summaries.',
62
+ description: 'Recent conversation turns before the feedback signal. Raw messages, not summaries.',
63
63
  },
64
64
  rubricScores: {
65
65
  type: 'array',
@@ -14,7 +14,9 @@ const {
14
14
  normalizePosix,
15
15
  resolveRepoRoot,
16
16
  } = require('./operational-integrity');
17
+ const { buildDockerSandboxPlan } = require('./docker-sandbox-planner');
17
18
  const { evaluatePretool } = require('./hybrid-feedback-context');
19
+ const { getInterventionRecommendation } = require('./intervention-policy');
18
20
 
19
21
  const GOVERNANCE_STATE_PATH = path.join(process.env.HOME || '/tmp', '.thumbgate', 'governance-state.json');
20
22
  const DEFAULT_PROTECTED_FILE_GLOBS = [
@@ -386,6 +388,7 @@ function scoreRisk({
386
388
  affectedFiles,
387
389
  integrity,
388
390
  memoryGuard,
391
+ learnedPolicy,
389
392
  blastRadius,
390
393
  taskScopeViolation,
391
394
  protectedSurface,
@@ -471,6 +474,43 @@ function scoreRisk({
471
474
  { mode: memoryGuard.mode }
472
475
  );
473
476
  }
477
+ if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
478
+ const confidence = learnedPolicy.prediction.confidence || 0;
479
+ const label = learnedPolicy.prediction.label;
480
+ if (label === 'deny' && confidence >= 0.6) {
481
+ addDriver(
482
+ drivers,
483
+ 'learned_policy_deny',
484
+ Math.min(0.26, 0.16 + (confidence * 0.12)),
485
+ 'Learned intervention policy predicts a deny-worthy failure pattern.',
486
+ { confidence, label }
487
+ );
488
+ } else if (label === 'warn' && confidence >= 0.3) {
489
+ addDriver(
490
+ drivers,
491
+ 'learned_policy_warn',
492
+ Math.min(0.18, 0.1 + (confidence * 0.08)),
493
+ 'Learned intervention policy predicts elevated execution risk.',
494
+ { confidence, label }
495
+ );
496
+ } else if (label === 'verify' && confidence >= 0.3) {
497
+ addDriver(
498
+ drivers,
499
+ 'learned_policy_verify',
500
+ Math.min(0.16, 0.08 + (confidence * 0.06)),
501
+ 'Learned intervention policy predicts a verification gap before close-out.',
502
+ { confidence, label }
503
+ );
504
+ } else if (label === 'recall' && confidence >= 0.3) {
505
+ addDriver(
506
+ drivers,
507
+ 'learned_policy_recall',
508
+ Math.min(0.14, 0.06 + (confidence * 0.05)),
509
+ 'Learned intervention policy predicts prior lessons are needed before execution.',
510
+ { confidence, label }
511
+ );
512
+ }
513
+ }
474
514
 
475
515
  const score = Math.min(1, drivers.reduce((sum, driver) => sum + driver.weight, 0));
476
516
  return {
@@ -491,6 +531,7 @@ function scoreRisk({
491
531
  function buildEvidence({
492
532
  integrity,
493
533
  memoryGuard,
534
+ learnedPolicy,
494
535
  blastRadius,
495
536
  taskScopeViolation,
496
537
  protectedSurface,
@@ -499,6 +540,16 @@ function buildEvidence({
499
540
  if (memoryGuard && memoryGuard.mode && memoryGuard.mode !== 'allow') {
500
541
  evidence.push(`Memory guard predicted ${memoryGuard.mode}: ${memoryGuard.reason}`);
501
542
  }
543
+ if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
544
+ const topTokens = Array.isArray(learnedPolicy.topTokens)
545
+ ? learnedPolicy.topTokens.map((entry) => entry.token).slice(0, 3)
546
+ : [];
547
+ evidence.push(
548
+ `Learned policy predicted ${learnedPolicy.prediction.label} (${Math.round((learnedPolicy.prediction.confidence || 0) * 100)}% confidence)`
549
+ + (topTokens.length ? ` from ${topTokens.join(', ')}` : '')
550
+ + '.'
551
+ );
552
+ }
502
553
  if (taskScopeViolation) {
503
554
  evidence.push(
504
555
  taskScopeViolation.reasonCode === 'missing_task_scope'
@@ -523,12 +574,59 @@ function buildEvidence({
523
574
  return evidence;
524
575
  }
525
576
 
577
+ function addIntegrityRemediations(push, integrity) {
578
+ if (!integrity || !Array.isArray(integrity.blockers)) {
579
+ return;
580
+ }
581
+
582
+ const blockerCodes = new Set(integrity.blockers.map((blocker) => blocker.code));
583
+ const remediationSpecs = [
584
+ {
585
+ codes: ['missing_branch_governance'],
586
+ id: 'set_branch_governance',
587
+ title: 'Declare branch governance',
588
+ action: 'Call set_branch_governance with branchName, baseBranch, and PR/release expectations.',
589
+ why: 'Release, merge, and PR workflows need explicit branch state.',
590
+ },
591
+ {
592
+ codes: ['merge_requires_pr_context'],
593
+ id: 'attach_pr_context',
594
+ title: 'Attach PR context',
595
+ action: 'Update branch governance with prNumber or prUrl before merging.',
596
+ why: 'Merge actions should be tied to one explicit review surface.',
597
+ },
598
+ {
599
+ codes: ['missing_release_version', 'release_version_mismatch'],
600
+ id: 'align_release_version',
601
+ title: 'Align release version',
602
+ action: 'Set branch governance releaseVersion and verify it matches package.json before publish.',
603
+ why: 'Release metadata should match the artifact being published.',
604
+ },
605
+ {
606
+ codes: ['publish_requires_base_branch', 'publish_requires_mainline_head'],
607
+ id: 'switch_to_mainline',
608
+ title: 'Run publish from mainline',
609
+ action: `Move the action onto ${integrity.baseBranch || DEFAULT_BASE_BRANCH} after the merge commit exists.`,
610
+ why: 'Publish and tag flows should execute from the protected mainline branch.',
611
+ },
612
+ ];
613
+
614
+ for (const remediation of remediationSpecs) {
615
+ if (!remediation.codes.some((code) => blockerCodes.has(code))) {
616
+ continue;
617
+ }
618
+ push(remediation.id, remediation.title, remediation.action, remediation.why);
619
+ }
620
+ }
621
+
526
622
  function buildRemediations({
527
623
  integrity,
528
624
  taskScopeViolation,
529
625
  protectedSurface,
530
626
  blastRadius,
531
627
  memoryGuard,
628
+ learnedPolicy,
629
+ executionSurface,
532
630
  }) {
533
631
  const remediations = [];
534
632
  const seen = new Set();
@@ -555,41 +653,7 @@ function buildRemediations({
555
653
  'Protected policy files need an explicit time-bounded approval.'
556
654
  );
557
655
  }
558
- if (integrity && Array.isArray(integrity.blockers)) {
559
- const blockerCodes = new Set(integrity.blockers.map((blocker) => blocker.code));
560
- if (blockerCodes.has('missing_branch_governance')) {
561
- push(
562
- 'set_branch_governance',
563
- 'Declare branch governance',
564
- 'Call set_branch_governance with branchName, baseBranch, and PR/release expectations.',
565
- 'Release, merge, and PR workflows need explicit branch state.'
566
- );
567
- }
568
- if (blockerCodes.has('merge_requires_pr_context')) {
569
- push(
570
- 'attach_pr_context',
571
- 'Attach PR context',
572
- 'Update branch governance with prNumber or prUrl before merging.',
573
- 'Merge actions should be tied to one explicit review surface.'
574
- );
575
- }
576
- if (blockerCodes.has('missing_release_version') || blockerCodes.has('release_version_mismatch')) {
577
- push(
578
- 'align_release_version',
579
- 'Align release version',
580
- 'Set branch governance releaseVersion and verify it matches package.json before publish.',
581
- 'Release metadata should match the artifact being published.'
582
- );
583
- }
584
- if (blockerCodes.has('publish_requires_base_branch') || blockerCodes.has('publish_requires_mainline_head')) {
585
- push(
586
- 'switch_to_mainline',
587
- 'Run publish from mainline',
588
- `Move the action onto ${integrity.baseBranch || DEFAULT_BASE_BRANCH} after the merge commit exists.`,
589
- 'Publish and tag flows should execute from the protected mainline branch.'
590
- );
591
- }
592
- }
656
+ addIntegrityRemediations(push, integrity);
593
657
  if (memoryGuard && memoryGuard.mode && memoryGuard.mode !== 'allow') {
594
658
  push(
595
659
  'retrieve_lessons',
@@ -598,6 +662,24 @@ function buildRemediations({
598
662
  'The system already has evidence that this action pattern failed before.'
599
663
  );
600
664
  }
665
+ if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
666
+ if (learnedPolicy.prediction.label === 'verify' && learnedPolicy.prediction.confidence >= 0.3) {
667
+ push(
668
+ 'verify_before_closeout',
669
+ 'Raise verification before claiming success',
670
+ 'Run the relevant proof or test command and confirm the exact output before retrying or closing out.',
671
+ 'The learned policy predicts this path tends to fail at verification time.'
672
+ );
673
+ }
674
+ if (learnedPolicy.prediction.label === 'recall' && learnedPolicy.prediction.confidence >= 0.3) {
675
+ push(
676
+ 'retrieve_lessons',
677
+ 'Inspect prior lessons',
678
+ 'Call retrieve_lessons or search_lessons for this tool context before retrying.',
679
+ 'The learned policy predicts this action needs prior lessons and corrective context.'
680
+ );
681
+ }
682
+ }
601
683
  if (blastRadius.fileCount >= 4 || blastRadius.surfaceCount >= 3) {
602
684
  push(
603
685
  'split_blast_radius',
@@ -606,6 +688,14 @@ function buildRemediations({
606
688
  'Smaller blast radii are easier to verify and recover.'
607
689
  );
608
690
  }
691
+ if (executionSurface?.shouldSandbox) {
692
+ push(
693
+ 'route_to_docker_sandbox',
694
+ 'Route through Docker Sandboxes',
695
+ `Launch the repo in Docker Sandboxes before retrying. Standalone: ${executionSurface.launchers.standalone}. Docker Desktop: ${executionSurface.launchers.dockerDesktop}.`,
696
+ 'Isolated execution limits host damage when a high-risk local action goes wrong.'
697
+ );
698
+ }
609
699
 
610
700
  return remediations;
611
701
  }
@@ -615,6 +705,14 @@ function buildReasoning(report) {
615
705
  `Workflow sentinel risk ${report.band} (${report.riskScore}) for ${report.toolName}.`,
616
706
  `Blast radius: ${report.blastRadius.summary}.`,
617
707
  ];
708
+ if (report.learnedPolicy && report.learnedPolicy.enabled && report.learnedPolicy.prediction) {
709
+ lines.push(
710
+ `Learned policy predicted ${report.learnedPolicy.prediction.label} (${report.learnedPolicy.prediction.confidence}).`
711
+ );
712
+ }
713
+ if (report.executionSurface?.shouldSandbox) {
714
+ lines.push(`Execution surface: ${report.executionSurface.summary}`);
715
+ }
618
716
  for (const driver of report.drivers.slice(0, 4)) {
619
717
  lines.push(`Driver ${driver.key} (+${driver.weight}): ${driver.reason}`);
620
718
  }
@@ -624,15 +722,42 @@ function buildReasoning(report) {
624
722
  return lines;
625
723
  }
626
724
 
627
- function chooseDecision({ riskScore, integrity, memoryGuard, blastRadius, command }) {
725
+ function getSentinelActionType(toolName) {
726
+ if (toolName === 'Bash') {
727
+ return 'shell.exec';
728
+ }
729
+ if (EDIT_LIKE_TOOLS.has(toolName)) {
730
+ return 'file.write';
731
+ }
732
+ return '';
733
+ }
734
+
735
+ function chooseDecision({ riskScore, integrity, memoryGuard, learnedPolicy, blastRadius, command }) {
628
736
  const hasOperationalBlockers = Boolean(integrity && Array.isArray(integrity.blockers) && integrity.blockers.length > 0);
629
737
  const destructiveBypass = /\bgit\s+push\b.*(?:--force|-f)\b/i.test(command) || /\bgh\s+pr\s+merge\b.*--admin\b/i.test(command);
738
+ const learnedPrediction = learnedPolicy && learnedPolicy.enabled ? learnedPolicy.prediction : null;
739
+ const learnedHardStop = Boolean(
740
+ learnedPrediction
741
+ && learnedPrediction.label === 'deny'
742
+ && learnedPrediction.confidence >= 0.7
743
+ );
744
+ const learnedWarning = Boolean(
745
+ learnedPrediction
746
+ && ['warn', 'verify', 'deny'].includes(learnedPrediction.label)
747
+ && learnedPrediction.confidence >= 0.3
748
+ );
749
+ const learnedRecall = Boolean(
750
+ learnedPrediction
751
+ && learnedPrediction.label === 'recall'
752
+ && learnedPrediction.confidence >= 0.3
753
+ );
630
754
  const lowBlastRadius = blastRadius.fileCount <= 1
631
755
  && blastRadius.surfaceCount <= 1
632
756
  && blastRadius.releaseSensitiveFiles.length === 0
633
757
  && blastRadius.unapprovedProtectedFiles === 0;
634
758
  const lowRiskHandoff = /\bgit\s+push\b|\bgh\s+pr\s+(?:create|merge)\b/i.test(command)
635
759
  && !destructiveBypass
760
+ && !learnedHardStop
636
761
  && lowBlastRadius
637
762
  && !hasOperationalBlockers
638
763
  && memoryGuard
@@ -652,10 +777,10 @@ function chooseDecision({ riskScore, integrity, memoryGuard, blastRadius, comman
652
777
  if (lowRiskHandoff) {
653
778
  return 'allow';
654
779
  }
655
- if (destructiveBypass || repeatedHighBlast || (hasOperationalBlockers && riskScore >= 0.72) || riskScore >= 0.86) {
780
+ if (destructiveBypass || learnedHardStop || repeatedHighBlast || (hasOperationalBlockers && riskScore >= 0.72) || riskScore >= 0.86) {
656
781
  return 'deny';
657
782
  }
658
- if (riskScore >= 0.45) {
783
+ if (riskScore >= 0.45 || (learnedWarning && riskScore >= 0.3) || (learnedRecall && riskScore >= 0.34)) {
659
784
  return 'warn';
660
785
  }
661
786
  return 'allow';
@@ -698,6 +823,20 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
698
823
  affectedFiles,
699
824
  }), options.feedbackOptions || {});
700
825
  const memoryGuard = normalizeMemoryGuardForSentinel(rawMemoryGuard, highRiskAction);
826
+ const learnedPolicy = getInterventionRecommendation({
827
+ toolName,
828
+ command: toolInput.command || '',
829
+ affectedFiles,
830
+ integrity,
831
+ memoryGuard,
832
+ riskBand: highRiskAction ? 'high' : 'low',
833
+ taskScopeViolation,
834
+ protectedSurface: protectedSurfaceForRisk,
835
+ }, {
836
+ feedbackDir: options.feedbackDir
837
+ || process.env.THUMBGATE_FEEDBACK_DIR
838
+ || (repoRoot ? path.join(repoRoot, '.thumbgate') : null),
839
+ });
701
840
  const blastRadius = buildBlastRadius({
702
841
  affectedFiles,
703
842
  integrity,
@@ -709,14 +848,28 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
709
848
  affectedFiles,
710
849
  integrity,
711
850
  memoryGuard,
851
+ learnedPolicy,
712
852
  blastRadius,
713
853
  taskScopeViolation,
714
854
  protectedSurface: protectedSurfaceForRisk,
715
855
  });
856
+ const executionSurface = buildDockerSandboxPlan({
857
+ toolName,
858
+ actionType: getSentinelActionType(toolName),
859
+ command: toolInput.command,
860
+ repoPath,
861
+ affectedFiles,
862
+ riskBand: risk.band,
863
+ riskScore: risk.score,
864
+ requiresNetwork: Boolean(
865
+ /\b(?:curl|wget|gh\s+pr|git\s+push|npm\s+publish|yarn\s+publish|pnpm\s+publish)\b/i.test(toolInput.command || '')
866
+ ),
867
+ });
716
868
  const decision = chooseDecision({
717
869
  riskScore: risk.score,
718
870
  integrity,
719
871
  memoryGuard,
872
+ learnedPolicy,
720
873
  blastRadius: {
721
874
  ...blastRadius,
722
875
  unapprovedProtectedFiles: protectedSurfaceForRisk.unapprovedProtectedFiles.length,
@@ -726,6 +879,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
726
879
  const evidence = buildEvidence({
727
880
  integrity,
728
881
  memoryGuard,
882
+ learnedPolicy,
729
883
  blastRadius,
730
884
  taskScopeViolation,
731
885
  protectedSurface: protectedSurfaceForRisk,
@@ -736,6 +890,8 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
736
890
  protectedSurface: protectedSurfaceForRisk,
737
891
  blastRadius,
738
892
  memoryGuard,
893
+ learnedPolicy,
894
+ executionSurface,
739
895
  });
740
896
  const summary = decision === 'allow'
741
897
  ? 'No predictive workflow blockers detected.'
@@ -743,7 +899,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
743
899
  ? 'Predicted workflow risk is elevated before execution.'
744
900
  : 'Predicted workflow failure before execution.';
745
901
  const report = {
746
- sentinelVersion: 'workflow-sentinel-v1',
902
+ sentinelVersion: 'workflow-sentinel-v2',
747
903
  toolName,
748
904
  decision,
749
905
  riskScore: risk.score,
@@ -753,7 +909,9 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
753
909
  blastRadius,
754
910
  evidence,
755
911
  remediations,
912
+ executionSurface,
756
913
  memoryGuard,
914
+ learnedPolicy,
757
915
  taskScopeViolation,
758
916
  operationalIntegrity: {
759
917
  ok: integrity.ok,
@@ -86,7 +86,7 @@ Bounded retrieval of relevant feedback history for the current task. The agent g
86
86
 
87
87
  | | Free | Pro | Team |
88
88
  |---|---|---|---|
89
- | Feedback capture | Unlimited | Unlimited | Unlimited |
89
+ | Feedback capture | 3/day | Unlimited | Unlimited |
90
90
  | Lesson search | 5/day | Unlimited | Unlimited |
91
91
  | Active gates | 5 | Unlimited | Unlimited |
92
92
  | Dashboard | - | Yes | Yes |