thumbgate 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/README.md +4 -4
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +48 -16
- package/adapters/README.md +1 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/codex/config.toml +2 -2
- package/adapters/mcp/server-stdio.js +11 -8
- package/adapters/opencode/opencode.json +1 -1
- package/bin/cli.js +20 -11
- package/config/github-about.json +1 -1
- package/config/model-tiers.json +11 -0
- package/package.json +22 -11
- package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +1 -1
- package/plugins/claude-codex-bridge/.mcp.json +1 -1
- package/plugins/codex-profile/.codex-plugin/plugin.json +1 -1
- package/plugins/codex-profile/.mcp.json +1 -1
- package/plugins/codex-profile/INSTALL.md +1 -1
- package/plugins/codex-profile/README.md +1 -1
- package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +1 -1
- package/plugins/cursor-marketplace/README.md +2 -2
- package/plugins/cursor-marketplace/commands/capture-feedback.md +2 -2
- package/plugins/cursor-marketplace/rules/feedback-capture.mdc +3 -3
- package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +3 -2
- package/plugins/opencode-profile/INSTALL.md +1 -1
- package/public/compare.html +302 -0
- package/public/guide.html +4 -4
- package/public/index.html +77 -38
- package/public/learn/ai-agent-persistent-memory.html +1 -0
- package/public/lessons.html +325 -17
- package/scripts/__pycache__/train_from_feedback.cpython-312.pyc +0 -0
- package/scripts/ai-search-visibility.js +142 -0
- package/scripts/audit-trail.js +6 -0
- package/scripts/capture-railway-diagnostics.sh +97 -0
- package/scripts/changeset-check.js +372 -0
- package/scripts/check-congruence.js +8 -5
- package/scripts/claude-feedback-sync.js +320 -0
- package/scripts/cli-telemetry.js +4 -1
- package/scripts/computer-use-firewall.js +45 -15
- package/scripts/contextfs.js +32 -23
- package/scripts/dashboard.js +84 -0
- package/scripts/docker-sandbox-planner.js +208 -0
- package/scripts/feedback-loop.js +16 -0
- package/scripts/github-about.js +56 -0
- package/scripts/intervention-policy.js +696 -0
- package/scripts/local-model-profile.js +18 -2
- package/scripts/model-tier-router.js +10 -1
- package/scripts/operational-integrity.js +361 -32
- package/scripts/prove-adapters.js +1 -0
- package/scripts/prove-automation.js +2 -2
- package/scripts/prove-packaged-runtime.js +260 -0
- package/scripts/prove-runtime.js +13 -0
- package/scripts/published-cli.js +10 -1
- package/scripts/rate-limiter.js +3 -3
- package/scripts/statusline-links.js +238 -0
- package/scripts/statusline-local-stats.js +2 -0
- package/scripts/statusline.sh +200 -10
- package/scripts/sync-github-about.js +7 -4
- package/scripts/tool-registry.js +2 -2
- package/scripts/workflow-sentinel.js +197 -39
- package/skills/thumbgate/SKILL.md +1 -1
- package/src/api/server.js +12 -1
package/scripts/statusline.sh
CHANGED
|
@@ -6,13 +6,23 @@
|
|
|
6
6
|
# Resolve script directory safely (CodeQL: no uncontrolled paths)
|
|
7
7
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)"
|
|
8
8
|
case "$SCRIPT_DIR" in *[!a-zA-Z0-9/_.-]*) echo "ThumbGate: invalid script path"; exit 1;; esac
|
|
9
|
+
LOCAL_API_ORIGIN="${THUMBGATE_LOCAL_API_ORIGIN:-http://localhost:3456}"
|
|
9
10
|
|
|
10
11
|
# ── Parse Claude Code session JSON from stdin ─────────────────────
|
|
11
12
|
eval "$(cat | jq -r '
|
|
12
13
|
def n(f): f // 0;
|
|
13
|
-
@sh "CTX_PCT=\(n(.context_window.used_percentage) | floor)"
|
|
14
|
+
@sh "CTX_PCT=\(n(.context_window.used_percentage) | floor)",
|
|
15
|
+
@sh "PROJECT_CWD=\(.cwd // .working_directory // "")"
|
|
14
16
|
' 2>/dev/null)"
|
|
15
17
|
CTX_PCT="${CTX_PCT:-0}"
|
|
18
|
+
PROJECT_CWD="${PROJECT_CWD:-}"
|
|
19
|
+
|
|
20
|
+
if [ -n "$PROJECT_CWD" ] && [ -d "$PROJECT_CWD" ]; then
|
|
21
|
+
export THUMBGATE_PROJECT_DIR="$PROJECT_CWD"
|
|
22
|
+
if [ -z "${THUMBGATE_FEEDBACK_DIR:-}" ]; then
|
|
23
|
+
export THUMBGATE_FEEDBACK_DIR="${PROJECT_CWD}/.claude/memory/feedback"
|
|
24
|
+
fi
|
|
25
|
+
fi
|
|
16
26
|
|
|
17
27
|
# ── ThumbGate stats from cache ────────────────────────────────────────
|
|
18
28
|
THUMBGATE_CACHE=""
|
|
@@ -63,7 +73,7 @@ fi
|
|
|
63
73
|
# Background refresh from REST API when cache is stale (>120s)
|
|
64
74
|
if [ $(( _NOW - ${CACHE_TS:-0} )) -gt 120 ]; then
|
|
65
75
|
(
|
|
66
|
-
_R=$(curl -s --max-time 3 "
|
|
76
|
+
_R=$(curl -s --max-time 3 "${LOCAL_API_ORIGIN}/v1/feedback/stats" -H "Authorization: Bearer ${THUMBGATE_API_KEY:-tg_creator_dev_enterprise}" 2>/dev/null)
|
|
67
77
|
[ -z "$_R" ] && exit 0
|
|
68
78
|
echo "$_R" | python3 -c "
|
|
69
79
|
import json,sys,time,os
|
|
@@ -78,6 +88,23 @@ except:pass
|
|
|
78
88
|
disown 2>/dev/null
|
|
79
89
|
fi
|
|
80
90
|
|
|
91
|
+
# ── Clickable statusline affordances ─────────────────────────────
|
|
92
|
+
LINK_STATE="offline"
|
|
93
|
+
UP_URL=""; DOWN_URL=""; DASHBOARD_URL=""; LESSONS_URL=""
|
|
94
|
+
DASHBOARD_LABEL="Dashboard"; LESSONS_LABEL="Lessons"
|
|
95
|
+
_LINKS_JSON=$(node "${SCRIPT_DIR}/statusline-links.js" 2>/dev/null)
|
|
96
|
+
if [ -n "$_LINKS_JSON" ]; then
|
|
97
|
+
eval "$(echo "$_LINKS_JSON" | jq -r '
|
|
98
|
+
@sh "LINK_STATE=\(.state // "offline")",
|
|
99
|
+
@sh "UP_URL=\(.upUrl // "")",
|
|
100
|
+
@sh "DOWN_URL=\(.downUrl // "")",
|
|
101
|
+
@sh "DASHBOARD_URL=\(.dashboardUrl // "")",
|
|
102
|
+
@sh "LESSONS_URL=\(.lessonsUrl // "")",
|
|
103
|
+
@sh "DASHBOARD_LABEL=\(.dashboardLabel // "Dashboard")",
|
|
104
|
+
@sh "LESSONS_LABEL=\(.lessonsLabel // "Lessons")"
|
|
105
|
+
' 2>/dev/null)"
|
|
106
|
+
fi
|
|
107
|
+
|
|
81
108
|
# ── ThumbGate package metadata ────────────────────────────────────────
|
|
82
109
|
TG_VERSION="unknown"; TG_TIER="Free"
|
|
83
110
|
_META_JSON=$(node "${SCRIPT_DIR}/statusline-meta.js" 2>/dev/null)
|
|
@@ -99,6 +126,16 @@ if [ -n "$_TOWER_JSON" ]; then
|
|
|
99
126
|
' 2>/dev/null)"
|
|
100
127
|
fi
|
|
101
128
|
|
|
129
|
+
# ── Latest lesson ──────────────────────────────────────────────────
|
|
130
|
+
LESSON_TEXT=""; LESSON_ID=""
|
|
131
|
+
_LESSON_JSON=$(node "${SCRIPT_DIR}/statusline-lesson.js" 2>/dev/null)
|
|
132
|
+
if [ -n "$_LESSON_JSON" ]; then
|
|
133
|
+
eval "$(echo "$_LESSON_JSON" | jq -r '
|
|
134
|
+
@sh "LESSON_TEXT=\(.text // "")",
|
|
135
|
+
@sh "LESSON_ID=\(.lessonId // "")"
|
|
136
|
+
' 2>/dev/null)"
|
|
137
|
+
fi
|
|
138
|
+
|
|
102
139
|
# ── Colors ────────────────────────────────────────────────────────
|
|
103
140
|
G='\033[32m'; R='\033[31m'; M='\033[35m'; C='\033[36m'; D='\033[90m'; BD='\033[1m'; RST='\033[0m'
|
|
104
141
|
|
|
@@ -107,17 +144,170 @@ case "${TREND}" in
|
|
|
107
144
|
improving) ARROW="↗" ;; degrading) ARROW="↘" ;; stable) ARROW="→" ;; *) ARROW="?" ;;
|
|
108
145
|
esac
|
|
109
146
|
|
|
147
|
+
osc8_link() {
|
|
148
|
+
local url="$1"
|
|
149
|
+
local label="$2"
|
|
150
|
+
if [ -n "$url" ]; then
|
|
151
|
+
printf '\033]8;;%s\a%s\033]8;;\a' "$url" "$label"
|
|
152
|
+
else
|
|
153
|
+
printf '%s' "$label"
|
|
154
|
+
fi
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
UP_ICON="$(osc8_link "$UP_URL" "👍")"
|
|
158
|
+
DOWN_ICON="$(osc8_link "$DOWN_URL" "👎")"
|
|
159
|
+
DASHBOARD_LINK="$(osc8_link "$DASHBOARD_URL" "$DASHBOARD_LABEL")"
|
|
160
|
+
LESSONS_LINK="$(osc8_link "$LESSONS_URL" "$LESSONS_LABEL")"
|
|
161
|
+
|
|
162
|
+
is_numeric() {
|
|
163
|
+
case "$1" in
|
|
164
|
+
''|*[!0-9]*) return 1 ;;
|
|
165
|
+
*) return 0 ;;
|
|
166
|
+
esac
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
# Keep ThumbGate within a conservative left-side budget so Claude's own
|
|
170
|
+
# right-side notices do not visually collide with our line.
|
|
171
|
+
STATUSLINE_DEFAULT_MAX_CHARS="${THUMBGATE_STATUSLINE_DEFAULT_MAX_CHARS:-96}"
|
|
172
|
+
STATUSLINE_RIGHT_RESERVE="${THUMBGATE_STATUSLINE_RIGHT_RESERVE:-28}"
|
|
173
|
+
if ! is_numeric "$STATUSLINE_DEFAULT_MAX_CHARS"; then STATUSLINE_DEFAULT_MAX_CHARS=96; fi
|
|
174
|
+
if ! is_numeric "$STATUSLINE_RIGHT_RESERVE"; then STATUSLINE_RIGHT_RESERVE=28; fi
|
|
175
|
+
|
|
176
|
+
if is_numeric "${THUMBGATE_STATUSLINE_MAX_CHARS:-}"; then
|
|
177
|
+
STATUSLINE_MAX_CHARS="$THUMBGATE_STATUSLINE_MAX_CHARS"
|
|
178
|
+
else
|
|
179
|
+
STATUSLINE_MAX_CHARS="$STATUSLINE_DEFAULT_MAX_CHARS"
|
|
180
|
+
if is_numeric "${COLUMNS:-}"; then
|
|
181
|
+
_AVAILABLE_CHARS=$(( COLUMNS - STATUSLINE_RIGHT_RESERVE ))
|
|
182
|
+
if [ "$_AVAILABLE_CHARS" -gt 0 ] && [ "$_AVAILABLE_CHARS" -lt "$STATUSLINE_MAX_CHARS" ]; then
|
|
183
|
+
STATUSLINE_MAX_CHARS="$_AVAILABLE_CHARS"
|
|
184
|
+
fi
|
|
185
|
+
fi
|
|
186
|
+
fi
|
|
187
|
+
if [ "$STATUSLINE_MAX_CHARS" -lt 48 ]; then STATUSLINE_MAX_CHARS=48; fi
|
|
188
|
+
|
|
189
|
+
PLAIN_SEGMENTS=()
|
|
190
|
+
RENDERED_SEGMENTS=()
|
|
191
|
+
|
|
192
|
+
current_plain_length() {
|
|
193
|
+
local total=0
|
|
194
|
+
local i
|
|
195
|
+
for ((i = 0; i < ${#PLAIN_SEGMENTS[@]}; i++)); do
|
|
196
|
+
if [ "$i" -gt 0 ]; then
|
|
197
|
+
total=$((total + 3))
|
|
198
|
+
fi
|
|
199
|
+
total=$((total + ${#PLAIN_SEGMENTS[$i]}))
|
|
200
|
+
done
|
|
201
|
+
printf '%s' "$total"
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
push_segment() {
|
|
205
|
+
PLAIN_SEGMENTS+=("$1")
|
|
206
|
+
RENDERED_SEGMENTS+=("$2")
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
add_segment_if_fit() {
|
|
210
|
+
local plain="$1"
|
|
211
|
+
local rendered="$2"
|
|
212
|
+
local current extra
|
|
213
|
+
current=$(current_plain_length)
|
|
214
|
+
extra=${#plain}
|
|
215
|
+
if [ "${#PLAIN_SEGMENTS[@]}" -gt 0 ]; then
|
|
216
|
+
extra=$((extra + 3))
|
|
217
|
+
fi
|
|
218
|
+
if [ $((current + extra)) -le "$STATUSLINE_MAX_CHARS" ]; then
|
|
219
|
+
push_segment "$plain" "$rendered"
|
|
220
|
+
return 0
|
|
221
|
+
fi
|
|
222
|
+
return 1
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
truncate_plain_text() {
|
|
226
|
+
local text="$1"
|
|
227
|
+
local max_chars="$2"
|
|
228
|
+
if [ "$max_chars" -le 0 ]; then
|
|
229
|
+
printf ''
|
|
230
|
+
elif [ "${#text}" -le "$max_chars" ]; then
|
|
231
|
+
printf '%s' "$text"
|
|
232
|
+
elif [ "$max_chars" -le 3 ]; then
|
|
233
|
+
printf '%.*s' "$max_chars" "$text"
|
|
234
|
+
else
|
|
235
|
+
printf '%s...' "${text:0:$((max_chars - 3))}"
|
|
236
|
+
fi
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
add_truncated_segment_if_fit() {
|
|
240
|
+
local plain="$1"
|
|
241
|
+
local color="$2"
|
|
242
|
+
local min_chars="${3:-14}"
|
|
243
|
+
local current sep remaining truncated
|
|
244
|
+
current=$(current_plain_length)
|
|
245
|
+
sep=0
|
|
246
|
+
if [ "${#PLAIN_SEGMENTS[@]}" -gt 0 ]; then
|
|
247
|
+
sep=3
|
|
248
|
+
fi
|
|
249
|
+
remaining=$((STATUSLINE_MAX_CHARS - current - sep))
|
|
250
|
+
if [ "$remaining" -lt "$min_chars" ]; then
|
|
251
|
+
return 1
|
|
252
|
+
fi
|
|
253
|
+
truncated=$(truncate_plain_text "$plain" "$remaining")
|
|
254
|
+
push_segment "$truncated" "${color}${truncated}${RST}"
|
|
255
|
+
return 0
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
render_segments() {
|
|
259
|
+
local line=''
|
|
260
|
+
local i
|
|
261
|
+
for ((i = 0; i < ${#RENDERED_SEGMENTS[@]}; i++)); do
|
|
262
|
+
if [ "$i" -gt 0 ]; then
|
|
263
|
+
line="${line} · "
|
|
264
|
+
fi
|
|
265
|
+
line="${line}${RENDERED_SEGMENTS[$i]}"
|
|
266
|
+
done
|
|
267
|
+
printf '%b\n' "$line"
|
|
268
|
+
}
|
|
269
|
+
|
|
110
270
|
# ── Output (single line) ─────────────────────────────────────────
|
|
111
|
-
LINE="ThumbGate v${TG_VERSION} · ${TG_TIER}"
|
|
112
271
|
if [ "$UP" = "0" ] && [ "$DOWN" = "0" ]; then
|
|
113
|
-
|
|
272
|
+
push_segment "ThumbGate v${TG_VERSION}" "${D}ThumbGate v${TG_VERSION}${RST}"
|
|
273
|
+
push_segment "${TG_TIER}" "${D}${TG_TIER}${RST}"
|
|
274
|
+
push_segment "no feedback yet" "${D}no feedback yet${RST}"
|
|
275
|
+
add_segment_if_fit "${DASHBOARD_LABEL}" "${C}${DASHBOARD_LINK}${RST}"
|
|
276
|
+
add_segment_if_fit "${LESSONS_LABEL}" "${M}${LESSONS_LINK}${RST}"
|
|
277
|
+
render_segments
|
|
114
278
|
else
|
|
115
|
-
|
|
279
|
+
STATS_PLAIN="${UP}👍 ${DOWN}👎 ${ARROW}"
|
|
280
|
+
STATS_RENDERED="${G}${BD}${UP}${RST}${UP_ICON} ${R}${BD}${DOWN}${RST}${DOWN_ICON} ${ARROW}"
|
|
281
|
+
ALERTS_PLAIN=''
|
|
282
|
+
ALERTS_RENDERED=''
|
|
283
|
+
|
|
284
|
+
if [ "${SLO_V:-0}" -gt 0 ]; then
|
|
285
|
+
ALERTS_PLAIN="${ALERTS_PLAIN}${ALERTS_PLAIN:+ }${SLO_V} SLO"
|
|
286
|
+
ALERTS_RENDERED="${ALERTS_RENDERED}${ALERTS_RENDERED:+ }${R}${SLO_V} SLO${RST}"
|
|
287
|
+
fi
|
|
288
|
+
if [ "${AT_RISK:-0}" -gt 0 ]; then
|
|
289
|
+
ALERTS_PLAIN="${ALERTS_PLAIN}${ALERTS_PLAIN:+ }${AT_RISK}⚠"
|
|
290
|
+
ALERTS_RENDERED="${ALERTS_RENDERED}${ALERTS_RENDERED:+ }${R}${AT_RISK}⚠${RST}"
|
|
291
|
+
fi
|
|
292
|
+
if [ "${ANOMALIES:-0}" -gt 0 ]; then
|
|
293
|
+
ALERTS_PLAIN="${ALERTS_PLAIN}${ALERTS_PLAIN:+ }${ANOMALIES}☠"
|
|
294
|
+
ALERTS_RENDERED="${ALERTS_RENDERED}${ALERTS_RENDERED:+ }${R}${ANOMALIES}☠${RST}"
|
|
295
|
+
fi
|
|
116
296
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
297
|
+
push_segment "ThumbGate v${TG_VERSION}" "ThumbGate v${TG_VERSION}"
|
|
298
|
+
push_segment "${TG_TIER}" "${TG_TIER}"
|
|
299
|
+
push_segment "${STATS_PLAIN}" "${STATS_RENDERED}"
|
|
300
|
+
add_segment_if_fit "${DASHBOARD_LABEL}" "${C}${DASHBOARD_LINK}${RST}"
|
|
301
|
+
add_segment_if_fit "${LESSONS_LABEL}" "${M}${LESSONS_LINK}${RST}"
|
|
302
|
+
if [ "${LESSONS:-0}" -gt 0 ]; then
|
|
303
|
+
add_segment_if_fit "${LESSONS} lessons" "${M}${BD}${LESSONS}${RST} lessons"
|
|
304
|
+
fi
|
|
305
|
+
if [ -n "${ALERTS_PLAIN}" ]; then
|
|
306
|
+
add_segment_if_fit "${ALERTS_PLAIN}" "${ALERTS_RENDERED}"
|
|
307
|
+
fi
|
|
308
|
+
if [ -n "${LESSON_TEXT}" ]; then
|
|
309
|
+
add_truncated_segment_if_fit "${LESSON_TEXT}" "${D}" 14
|
|
310
|
+
fi
|
|
121
311
|
|
|
122
|
-
|
|
312
|
+
render_segments
|
|
123
313
|
fi
|
|
@@ -6,6 +6,7 @@ const {
|
|
|
6
6
|
fetchLiveGitHubAbout,
|
|
7
7
|
loadGitHubAboutConfig,
|
|
8
8
|
updateLiveGitHubAbout,
|
|
9
|
+
verifyLiveGitHubAbout,
|
|
9
10
|
} = require('./github-about');
|
|
10
11
|
|
|
11
12
|
async function main() {
|
|
@@ -32,11 +33,13 @@ async function main() {
|
|
|
32
33
|
console.log(`Syncing GitHub About for ${about.repo}...`);
|
|
33
34
|
await updateLiveGitHubAbout({ repo: about.repo });
|
|
34
35
|
|
|
35
|
-
const
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
const verification = await verifyLiveGitHubAbout({
|
|
37
|
+
expected: about,
|
|
38
|
+
repo: about.repo,
|
|
39
|
+
});
|
|
40
|
+
if (verification.errors.length > 0) {
|
|
38
41
|
console.error(`\n❌ GitHub About sync incomplete for ${about.repo}:\n`);
|
|
39
|
-
for (const error of
|
|
42
|
+
for (const error of verification.errors) {
|
|
40
43
|
console.error(` • ${error}`);
|
|
41
44
|
}
|
|
42
45
|
console.error('');
|
package/scripts/tool-registry.js
CHANGED
|
@@ -36,7 +36,7 @@ const TOOLS = [
|
|
|
36
36
|
whatWorked: { type: 'string' },
|
|
37
37
|
chatHistory: {
|
|
38
38
|
type: 'array',
|
|
39
|
-
description: 'Optional recent conversation window used for history-aware lesson distillation.',
|
|
39
|
+
description: 'Optional caller-supplied recent conversation window used for history-aware lesson distillation. The current Claude auto-capture path sends up to 8 prior recorded entries for vague negative inline signals.',
|
|
40
40
|
items: {
|
|
41
41
|
type: 'object',
|
|
42
42
|
properties: {
|
|
@@ -59,7 +59,7 @@ const TOOLS = [
|
|
|
59
59
|
timestamp: { type: 'string' },
|
|
60
60
|
},
|
|
61
61
|
},
|
|
62
|
-
description: '
|
|
62
|
+
description: 'Recent conversation turns before the feedback signal. Raw messages, not summaries.',
|
|
63
63
|
},
|
|
64
64
|
rubricScores: {
|
|
65
65
|
type: 'array',
|
|
@@ -14,7 +14,9 @@ const {
|
|
|
14
14
|
normalizePosix,
|
|
15
15
|
resolveRepoRoot,
|
|
16
16
|
} = require('./operational-integrity');
|
|
17
|
+
const { buildDockerSandboxPlan } = require('./docker-sandbox-planner');
|
|
17
18
|
const { evaluatePretool } = require('./hybrid-feedback-context');
|
|
19
|
+
const { getInterventionRecommendation } = require('./intervention-policy');
|
|
18
20
|
|
|
19
21
|
const GOVERNANCE_STATE_PATH = path.join(process.env.HOME || '/tmp', '.thumbgate', 'governance-state.json');
|
|
20
22
|
const DEFAULT_PROTECTED_FILE_GLOBS = [
|
|
@@ -386,6 +388,7 @@ function scoreRisk({
|
|
|
386
388
|
affectedFiles,
|
|
387
389
|
integrity,
|
|
388
390
|
memoryGuard,
|
|
391
|
+
learnedPolicy,
|
|
389
392
|
blastRadius,
|
|
390
393
|
taskScopeViolation,
|
|
391
394
|
protectedSurface,
|
|
@@ -471,6 +474,43 @@ function scoreRisk({
|
|
|
471
474
|
{ mode: memoryGuard.mode }
|
|
472
475
|
);
|
|
473
476
|
}
|
|
477
|
+
if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
|
|
478
|
+
const confidence = learnedPolicy.prediction.confidence || 0;
|
|
479
|
+
const label = learnedPolicy.prediction.label;
|
|
480
|
+
if (label === 'deny' && confidence >= 0.6) {
|
|
481
|
+
addDriver(
|
|
482
|
+
drivers,
|
|
483
|
+
'learned_policy_deny',
|
|
484
|
+
Math.min(0.26, 0.16 + (confidence * 0.12)),
|
|
485
|
+
'Learned intervention policy predicts a deny-worthy failure pattern.',
|
|
486
|
+
{ confidence, label }
|
|
487
|
+
);
|
|
488
|
+
} else if (label === 'warn' && confidence >= 0.3) {
|
|
489
|
+
addDriver(
|
|
490
|
+
drivers,
|
|
491
|
+
'learned_policy_warn',
|
|
492
|
+
Math.min(0.18, 0.1 + (confidence * 0.08)),
|
|
493
|
+
'Learned intervention policy predicts elevated execution risk.',
|
|
494
|
+
{ confidence, label }
|
|
495
|
+
);
|
|
496
|
+
} else if (label === 'verify' && confidence >= 0.3) {
|
|
497
|
+
addDriver(
|
|
498
|
+
drivers,
|
|
499
|
+
'learned_policy_verify',
|
|
500
|
+
Math.min(0.16, 0.08 + (confidence * 0.06)),
|
|
501
|
+
'Learned intervention policy predicts a verification gap before close-out.',
|
|
502
|
+
{ confidence, label }
|
|
503
|
+
);
|
|
504
|
+
} else if (label === 'recall' && confidence >= 0.3) {
|
|
505
|
+
addDriver(
|
|
506
|
+
drivers,
|
|
507
|
+
'learned_policy_recall',
|
|
508
|
+
Math.min(0.14, 0.06 + (confidence * 0.05)),
|
|
509
|
+
'Learned intervention policy predicts prior lessons are needed before execution.',
|
|
510
|
+
{ confidence, label }
|
|
511
|
+
);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
474
514
|
|
|
475
515
|
const score = Math.min(1, drivers.reduce((sum, driver) => sum + driver.weight, 0));
|
|
476
516
|
return {
|
|
@@ -491,6 +531,7 @@ function scoreRisk({
|
|
|
491
531
|
function buildEvidence({
|
|
492
532
|
integrity,
|
|
493
533
|
memoryGuard,
|
|
534
|
+
learnedPolicy,
|
|
494
535
|
blastRadius,
|
|
495
536
|
taskScopeViolation,
|
|
496
537
|
protectedSurface,
|
|
@@ -499,6 +540,16 @@ function buildEvidence({
|
|
|
499
540
|
if (memoryGuard && memoryGuard.mode && memoryGuard.mode !== 'allow') {
|
|
500
541
|
evidence.push(`Memory guard predicted ${memoryGuard.mode}: ${memoryGuard.reason}`);
|
|
501
542
|
}
|
|
543
|
+
if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
|
|
544
|
+
const topTokens = Array.isArray(learnedPolicy.topTokens)
|
|
545
|
+
? learnedPolicy.topTokens.map((entry) => entry.token).slice(0, 3)
|
|
546
|
+
: [];
|
|
547
|
+
evidence.push(
|
|
548
|
+
`Learned policy predicted ${learnedPolicy.prediction.label} (${Math.round((learnedPolicy.prediction.confidence || 0) * 100)}% confidence)`
|
|
549
|
+
+ (topTokens.length ? ` from ${topTokens.join(', ')}` : '')
|
|
550
|
+
+ '.'
|
|
551
|
+
);
|
|
552
|
+
}
|
|
502
553
|
if (taskScopeViolation) {
|
|
503
554
|
evidence.push(
|
|
504
555
|
taskScopeViolation.reasonCode === 'missing_task_scope'
|
|
@@ -523,12 +574,59 @@ function buildEvidence({
|
|
|
523
574
|
return evidence;
|
|
524
575
|
}
|
|
525
576
|
|
|
577
|
+
function addIntegrityRemediations(push, integrity) {
|
|
578
|
+
if (!integrity || !Array.isArray(integrity.blockers)) {
|
|
579
|
+
return;
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
const blockerCodes = new Set(integrity.blockers.map((blocker) => blocker.code));
|
|
583
|
+
const remediationSpecs = [
|
|
584
|
+
{
|
|
585
|
+
codes: ['missing_branch_governance'],
|
|
586
|
+
id: 'set_branch_governance',
|
|
587
|
+
title: 'Declare branch governance',
|
|
588
|
+
action: 'Call set_branch_governance with branchName, baseBranch, and PR/release expectations.',
|
|
589
|
+
why: 'Release, merge, and PR workflows need explicit branch state.',
|
|
590
|
+
},
|
|
591
|
+
{
|
|
592
|
+
codes: ['merge_requires_pr_context'],
|
|
593
|
+
id: 'attach_pr_context',
|
|
594
|
+
title: 'Attach PR context',
|
|
595
|
+
action: 'Update branch governance with prNumber or prUrl before merging.',
|
|
596
|
+
why: 'Merge actions should be tied to one explicit review surface.',
|
|
597
|
+
},
|
|
598
|
+
{
|
|
599
|
+
codes: ['missing_release_version', 'release_version_mismatch'],
|
|
600
|
+
id: 'align_release_version',
|
|
601
|
+
title: 'Align release version',
|
|
602
|
+
action: 'Set branch governance releaseVersion and verify it matches package.json before publish.',
|
|
603
|
+
why: 'Release metadata should match the artifact being published.',
|
|
604
|
+
},
|
|
605
|
+
{
|
|
606
|
+
codes: ['publish_requires_base_branch', 'publish_requires_mainline_head'],
|
|
607
|
+
id: 'switch_to_mainline',
|
|
608
|
+
title: 'Run publish from mainline',
|
|
609
|
+
action: `Move the action onto ${integrity.baseBranch || DEFAULT_BASE_BRANCH} after the merge commit exists.`,
|
|
610
|
+
why: 'Publish and tag flows should execute from the protected mainline branch.',
|
|
611
|
+
},
|
|
612
|
+
];
|
|
613
|
+
|
|
614
|
+
for (const remediation of remediationSpecs) {
|
|
615
|
+
if (!remediation.codes.some((code) => blockerCodes.has(code))) {
|
|
616
|
+
continue;
|
|
617
|
+
}
|
|
618
|
+
push(remediation.id, remediation.title, remediation.action, remediation.why);
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
|
|
526
622
|
function buildRemediations({
|
|
527
623
|
integrity,
|
|
528
624
|
taskScopeViolation,
|
|
529
625
|
protectedSurface,
|
|
530
626
|
blastRadius,
|
|
531
627
|
memoryGuard,
|
|
628
|
+
learnedPolicy,
|
|
629
|
+
executionSurface,
|
|
532
630
|
}) {
|
|
533
631
|
const remediations = [];
|
|
534
632
|
const seen = new Set();
|
|
@@ -555,41 +653,7 @@ function buildRemediations({
|
|
|
555
653
|
'Protected policy files need an explicit time-bounded approval.'
|
|
556
654
|
);
|
|
557
655
|
}
|
|
558
|
-
|
|
559
|
-
const blockerCodes = new Set(integrity.blockers.map((blocker) => blocker.code));
|
|
560
|
-
if (blockerCodes.has('missing_branch_governance')) {
|
|
561
|
-
push(
|
|
562
|
-
'set_branch_governance',
|
|
563
|
-
'Declare branch governance',
|
|
564
|
-
'Call set_branch_governance with branchName, baseBranch, and PR/release expectations.',
|
|
565
|
-
'Release, merge, and PR workflows need explicit branch state.'
|
|
566
|
-
);
|
|
567
|
-
}
|
|
568
|
-
if (blockerCodes.has('merge_requires_pr_context')) {
|
|
569
|
-
push(
|
|
570
|
-
'attach_pr_context',
|
|
571
|
-
'Attach PR context',
|
|
572
|
-
'Update branch governance with prNumber or prUrl before merging.',
|
|
573
|
-
'Merge actions should be tied to one explicit review surface.'
|
|
574
|
-
);
|
|
575
|
-
}
|
|
576
|
-
if (blockerCodes.has('missing_release_version') || blockerCodes.has('release_version_mismatch')) {
|
|
577
|
-
push(
|
|
578
|
-
'align_release_version',
|
|
579
|
-
'Align release version',
|
|
580
|
-
'Set branch governance releaseVersion and verify it matches package.json before publish.',
|
|
581
|
-
'Release metadata should match the artifact being published.'
|
|
582
|
-
);
|
|
583
|
-
}
|
|
584
|
-
if (blockerCodes.has('publish_requires_base_branch') || blockerCodes.has('publish_requires_mainline_head')) {
|
|
585
|
-
push(
|
|
586
|
-
'switch_to_mainline',
|
|
587
|
-
'Run publish from mainline',
|
|
588
|
-
`Move the action onto ${integrity.baseBranch || DEFAULT_BASE_BRANCH} after the merge commit exists.`,
|
|
589
|
-
'Publish and tag flows should execute from the protected mainline branch.'
|
|
590
|
-
);
|
|
591
|
-
}
|
|
592
|
-
}
|
|
656
|
+
addIntegrityRemediations(push, integrity);
|
|
593
657
|
if (memoryGuard && memoryGuard.mode && memoryGuard.mode !== 'allow') {
|
|
594
658
|
push(
|
|
595
659
|
'retrieve_lessons',
|
|
@@ -598,6 +662,24 @@ function buildRemediations({
|
|
|
598
662
|
'The system already has evidence that this action pattern failed before.'
|
|
599
663
|
);
|
|
600
664
|
}
|
|
665
|
+
if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
|
|
666
|
+
if (learnedPolicy.prediction.label === 'verify' && learnedPolicy.prediction.confidence >= 0.3) {
|
|
667
|
+
push(
|
|
668
|
+
'verify_before_closeout',
|
|
669
|
+
'Raise verification before claiming success',
|
|
670
|
+
'Run the relevant proof or test command and confirm the exact output before retrying or closing out.',
|
|
671
|
+
'The learned policy predicts this path tends to fail at verification time.'
|
|
672
|
+
);
|
|
673
|
+
}
|
|
674
|
+
if (learnedPolicy.prediction.label === 'recall' && learnedPolicy.prediction.confidence >= 0.3) {
|
|
675
|
+
push(
|
|
676
|
+
'retrieve_lessons',
|
|
677
|
+
'Inspect prior lessons',
|
|
678
|
+
'Call retrieve_lessons or search_lessons for this tool context before retrying.',
|
|
679
|
+
'The learned policy predicts this action needs prior lessons and corrective context.'
|
|
680
|
+
);
|
|
681
|
+
}
|
|
682
|
+
}
|
|
601
683
|
if (blastRadius.fileCount >= 4 || blastRadius.surfaceCount >= 3) {
|
|
602
684
|
push(
|
|
603
685
|
'split_blast_radius',
|
|
@@ -606,6 +688,14 @@ function buildRemediations({
|
|
|
606
688
|
'Smaller blast radii are easier to verify and recover.'
|
|
607
689
|
);
|
|
608
690
|
}
|
|
691
|
+
if (executionSurface?.shouldSandbox) {
|
|
692
|
+
push(
|
|
693
|
+
'route_to_docker_sandbox',
|
|
694
|
+
'Route through Docker Sandboxes',
|
|
695
|
+
`Launch the repo in Docker Sandboxes before retrying. Standalone: ${executionSurface.launchers.standalone}. Docker Desktop: ${executionSurface.launchers.dockerDesktop}.`,
|
|
696
|
+
'Isolated execution limits host damage when a high-risk local action goes wrong.'
|
|
697
|
+
);
|
|
698
|
+
}
|
|
609
699
|
|
|
610
700
|
return remediations;
|
|
611
701
|
}
|
|
@@ -615,6 +705,14 @@ function buildReasoning(report) {
|
|
|
615
705
|
`Workflow sentinel risk ${report.band} (${report.riskScore}) for ${report.toolName}.`,
|
|
616
706
|
`Blast radius: ${report.blastRadius.summary}.`,
|
|
617
707
|
];
|
|
708
|
+
if (report.learnedPolicy && report.learnedPolicy.enabled && report.learnedPolicy.prediction) {
|
|
709
|
+
lines.push(
|
|
710
|
+
`Learned policy predicted ${report.learnedPolicy.prediction.label} (${report.learnedPolicy.prediction.confidence}).`
|
|
711
|
+
);
|
|
712
|
+
}
|
|
713
|
+
if (report.executionSurface?.shouldSandbox) {
|
|
714
|
+
lines.push(`Execution surface: ${report.executionSurface.summary}`);
|
|
715
|
+
}
|
|
618
716
|
for (const driver of report.drivers.slice(0, 4)) {
|
|
619
717
|
lines.push(`Driver ${driver.key} (+${driver.weight}): ${driver.reason}`);
|
|
620
718
|
}
|
|
@@ -624,15 +722,42 @@ function buildReasoning(report) {
|
|
|
624
722
|
return lines;
|
|
625
723
|
}
|
|
626
724
|
|
|
627
|
-
function
|
|
725
|
+
function getSentinelActionType(toolName) {
|
|
726
|
+
if (toolName === 'Bash') {
|
|
727
|
+
return 'shell.exec';
|
|
728
|
+
}
|
|
729
|
+
if (EDIT_LIKE_TOOLS.has(toolName)) {
|
|
730
|
+
return 'file.write';
|
|
731
|
+
}
|
|
732
|
+
return '';
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
function chooseDecision({ riskScore, integrity, memoryGuard, learnedPolicy, blastRadius, command }) {
|
|
628
736
|
const hasOperationalBlockers = Boolean(integrity && Array.isArray(integrity.blockers) && integrity.blockers.length > 0);
|
|
629
737
|
const destructiveBypass = /\bgit\s+push\b.*(?:--force|-f)\b/i.test(command) || /\bgh\s+pr\s+merge\b.*--admin\b/i.test(command);
|
|
738
|
+
const learnedPrediction = learnedPolicy && learnedPolicy.enabled ? learnedPolicy.prediction : null;
|
|
739
|
+
const learnedHardStop = Boolean(
|
|
740
|
+
learnedPrediction
|
|
741
|
+
&& learnedPrediction.label === 'deny'
|
|
742
|
+
&& learnedPrediction.confidence >= 0.7
|
|
743
|
+
);
|
|
744
|
+
const learnedWarning = Boolean(
|
|
745
|
+
learnedPrediction
|
|
746
|
+
&& ['warn', 'verify', 'deny'].includes(learnedPrediction.label)
|
|
747
|
+
&& learnedPrediction.confidence >= 0.3
|
|
748
|
+
);
|
|
749
|
+
const learnedRecall = Boolean(
|
|
750
|
+
learnedPrediction
|
|
751
|
+
&& learnedPrediction.label === 'recall'
|
|
752
|
+
&& learnedPrediction.confidence >= 0.3
|
|
753
|
+
);
|
|
630
754
|
const lowBlastRadius = blastRadius.fileCount <= 1
|
|
631
755
|
&& blastRadius.surfaceCount <= 1
|
|
632
756
|
&& blastRadius.releaseSensitiveFiles.length === 0
|
|
633
757
|
&& blastRadius.unapprovedProtectedFiles === 0;
|
|
634
758
|
const lowRiskHandoff = /\bgit\s+push\b|\bgh\s+pr\s+(?:create|merge)\b/i.test(command)
|
|
635
759
|
&& !destructiveBypass
|
|
760
|
+
&& !learnedHardStop
|
|
636
761
|
&& lowBlastRadius
|
|
637
762
|
&& !hasOperationalBlockers
|
|
638
763
|
&& memoryGuard
|
|
@@ -652,10 +777,10 @@ function chooseDecision({ riskScore, integrity, memoryGuard, blastRadius, comman
|
|
|
652
777
|
if (lowRiskHandoff) {
|
|
653
778
|
return 'allow';
|
|
654
779
|
}
|
|
655
|
-
if (destructiveBypass || repeatedHighBlast || (hasOperationalBlockers && riskScore >= 0.72) || riskScore >= 0.86) {
|
|
780
|
+
if (destructiveBypass || learnedHardStop || repeatedHighBlast || (hasOperationalBlockers && riskScore >= 0.72) || riskScore >= 0.86) {
|
|
656
781
|
return 'deny';
|
|
657
782
|
}
|
|
658
|
-
if (riskScore >= 0.45) {
|
|
783
|
+
if (riskScore >= 0.45 || (learnedWarning && riskScore >= 0.3) || (learnedRecall && riskScore >= 0.34)) {
|
|
659
784
|
return 'warn';
|
|
660
785
|
}
|
|
661
786
|
return 'allow';
|
|
@@ -698,6 +823,20 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
698
823
|
affectedFiles,
|
|
699
824
|
}), options.feedbackOptions || {});
|
|
700
825
|
const memoryGuard = normalizeMemoryGuardForSentinel(rawMemoryGuard, highRiskAction);
|
|
826
|
+
const learnedPolicy = getInterventionRecommendation({
|
|
827
|
+
toolName,
|
|
828
|
+
command: toolInput.command || '',
|
|
829
|
+
affectedFiles,
|
|
830
|
+
integrity,
|
|
831
|
+
memoryGuard,
|
|
832
|
+
riskBand: highRiskAction ? 'high' : 'low',
|
|
833
|
+
taskScopeViolation,
|
|
834
|
+
protectedSurface: protectedSurfaceForRisk,
|
|
835
|
+
}, {
|
|
836
|
+
feedbackDir: options.feedbackDir
|
|
837
|
+
|| process.env.THUMBGATE_FEEDBACK_DIR
|
|
838
|
+
|| (repoRoot ? path.join(repoRoot, '.thumbgate') : null),
|
|
839
|
+
});
|
|
701
840
|
const blastRadius = buildBlastRadius({
|
|
702
841
|
affectedFiles,
|
|
703
842
|
integrity,
|
|
@@ -709,14 +848,28 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
709
848
|
affectedFiles,
|
|
710
849
|
integrity,
|
|
711
850
|
memoryGuard,
|
|
851
|
+
learnedPolicy,
|
|
712
852
|
blastRadius,
|
|
713
853
|
taskScopeViolation,
|
|
714
854
|
protectedSurface: protectedSurfaceForRisk,
|
|
715
855
|
});
|
|
856
|
+
const executionSurface = buildDockerSandboxPlan({
|
|
857
|
+
toolName,
|
|
858
|
+
actionType: getSentinelActionType(toolName),
|
|
859
|
+
command: toolInput.command,
|
|
860
|
+
repoPath,
|
|
861
|
+
affectedFiles,
|
|
862
|
+
riskBand: risk.band,
|
|
863
|
+
riskScore: risk.score,
|
|
864
|
+
requiresNetwork: Boolean(
|
|
865
|
+
/\b(?:curl|wget|gh\s+pr|git\s+push|npm\s+publish|yarn\s+publish|pnpm\s+publish)\b/i.test(toolInput.command || '')
|
|
866
|
+
),
|
|
867
|
+
});
|
|
716
868
|
const decision = chooseDecision({
|
|
717
869
|
riskScore: risk.score,
|
|
718
870
|
integrity,
|
|
719
871
|
memoryGuard,
|
|
872
|
+
learnedPolicy,
|
|
720
873
|
blastRadius: {
|
|
721
874
|
...blastRadius,
|
|
722
875
|
unapprovedProtectedFiles: protectedSurfaceForRisk.unapprovedProtectedFiles.length,
|
|
@@ -726,6 +879,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
726
879
|
const evidence = buildEvidence({
|
|
727
880
|
integrity,
|
|
728
881
|
memoryGuard,
|
|
882
|
+
learnedPolicy,
|
|
729
883
|
blastRadius,
|
|
730
884
|
taskScopeViolation,
|
|
731
885
|
protectedSurface: protectedSurfaceForRisk,
|
|
@@ -736,6 +890,8 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
736
890
|
protectedSurface: protectedSurfaceForRisk,
|
|
737
891
|
blastRadius,
|
|
738
892
|
memoryGuard,
|
|
893
|
+
learnedPolicy,
|
|
894
|
+
executionSurface,
|
|
739
895
|
});
|
|
740
896
|
const summary = decision === 'allow'
|
|
741
897
|
? 'No predictive workflow blockers detected.'
|
|
@@ -743,7 +899,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
743
899
|
? 'Predicted workflow risk is elevated before execution.'
|
|
744
900
|
: 'Predicted workflow failure before execution.';
|
|
745
901
|
const report = {
|
|
746
|
-
sentinelVersion: 'workflow-sentinel-
|
|
902
|
+
sentinelVersion: 'workflow-sentinel-v2',
|
|
747
903
|
toolName,
|
|
748
904
|
decision,
|
|
749
905
|
riskScore: risk.score,
|
|
@@ -753,7 +909,9 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
753
909
|
blastRadius,
|
|
754
910
|
evidence,
|
|
755
911
|
remediations,
|
|
912
|
+
executionSurface,
|
|
756
913
|
memoryGuard,
|
|
914
|
+
learnedPolicy,
|
|
757
915
|
taskScopeViolation,
|
|
758
916
|
operationalIntegrity: {
|
|
759
917
|
ok: integrity.ok,
|
|
@@ -86,7 +86,7 @@ Bounded retrieval of relevant feedback history for the current task. The agent g
|
|
|
86
86
|
|
|
87
87
|
| | Free | Pro | Team |
|
|
88
88
|
|---|---|---|---|
|
|
89
|
-
| Feedback capture |
|
|
89
|
+
| Feedback capture | 3/day | Unlimited | Unlimited |
|
|
90
90
|
| Lesson search | 5/day | Unlimited | Unlimited |
|
|
91
91
|
| Active gates | 5 | Unlimited | Unlimited |
|
|
92
92
|
| Dashboard | - | Yes | Yes |
|