thumbgate 1.5.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,329 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * Session Episode Store — episodic memory for agent sessions.
6
+ *
7
+ * Persists session health snapshots across conversations so the system
8
+ * learns cross-session degradation patterns:
9
+ * - Which times of day produce degraded sessions
10
+ * - Which task categories trigger repeat errors
11
+ * - How long sessions last before degradation onset
12
+ * - Whether feedback is actually reducing repeat mistakes over time
13
+ *
14
+ * This is the "episodic experience" layer described in the harnessed-agent
15
+ * framework (Memory = working context + semantic knowledge + episodic experience).
16
+ * The session-health-sensor provides the real-time signal; this module provides
17
+ * the longitudinal learning.
18
+ */
19
+
20
+ const crypto = require('node:crypto');
21
+ const path = require('node:path');
22
+ const { readJsonl, appendJsonl } = require('./fs-utils');
23
+ const { resolveFeedbackDir } = require('./feedback-paths');
24
+ const {
25
+ computeSessionHealth,
26
+ loadRecentFeedback,
27
+ } = require('./session-health-sensor');
28
+
29
+ const EPISODE_FILE = 'session-episodes.jsonl';
30
+ const PATTERN_WINDOW_EPISODES = 20;
31
+
32
+ // ---------------------------------------------------------------------------
33
+ // Episode Recording
34
+ // ---------------------------------------------------------------------------
35
+
36
+ function getEpisodePath({ feedbackDir } = {}) {
37
+ const dir = feedbackDir || resolveFeedbackDir();
38
+ return path.join(dir, EPISODE_FILE);
39
+ }
40
+
41
+ function buildEpisode({
42
+ sessionId = null,
43
+ health = null,
44
+ feedbackEntries = [],
45
+ tags = [],
46
+ durationMs = null,
47
+ } = {}) {
48
+ const now = new Date();
49
+ const effectiveHealth = health || computeSessionHealth(feedbackEntries);
50
+
51
+ const negativeEntries = feedbackEntries.filter((e) => e.signal === 'negative');
52
+ const categories = extractCategories(feedbackEntries);
53
+ const errorFingerprints = extractErrorFingerprints(negativeEntries);
54
+
55
+ return {
56
+ sessionId: sessionId || `session_${Date.now()}_${crypto.randomBytes(4).toString('hex')}`,
57
+ recordedAt: now.toISOString(),
58
+ hourOfDay: now.getHours(),
59
+ dayOfWeek: now.getDay(),
60
+ score: effectiveHealth.score,
61
+ grade: effectiveHealth.grade,
62
+ signals: effectiveHealth.signals.map((s) => ({ signal: s.signal, severity: s.severity })),
63
+ recommendation: effectiveHealth.recommendation,
64
+ feedbackCount: feedbackEntries.length,
65
+ negativeCount: negativeEntries.length,
66
+ positiveCount: feedbackEntries.filter((e) => e.signal === 'positive').length,
67
+ categories,
68
+ errorFingerprints,
69
+ durationMs,
70
+ tags,
71
+ };
72
+ }
73
+
74
+ function recordEpisode(episode, options = {}) {
75
+ const episodePath = getEpisodePath(options);
76
+ appendJsonl(episodePath, episode);
77
+ return episode;
78
+ }
79
+
80
+ function captureAndRecordEpisode(options = {}) {
81
+ const feedbackEntries = loadRecentFeedback(options);
82
+ const episode = buildEpisode({
83
+ sessionId: options.sessionId,
84
+ feedbackEntries,
85
+ tags: options.tags || [],
86
+ durationMs: options.durationMs,
87
+ });
88
+ return recordEpisode(episode, options);
89
+ }
90
+
91
+ // ---------------------------------------------------------------------------
92
+ // Episode Loading
93
+ // ---------------------------------------------------------------------------
94
+
95
+ function loadEpisodes(options = {}) {
96
+ return readJsonl(getEpisodePath(options));
97
+ }
98
+
99
+ function loadRecentEpisodes(count = PATTERN_WINDOW_EPISODES, options = {}) {
100
+ return readJsonl(getEpisodePath(options), { tail: true, maxLines: count });
101
+ }
102
+
103
+ // ---------------------------------------------------------------------------
104
+ // Cross-Session Pattern Detection
105
+ // ---------------------------------------------------------------------------
106
+
107
+ function analyzeTimeOfDayPatterns(episodes) {
108
+ const byHour = new Map();
109
+ for (const ep of episodes) {
110
+ const hour = ep.hourOfDay;
111
+ if (hour === undefined || hour === null) continue;
112
+ const bucket = byHour.get(hour) || { total: 0, degraded: 0, critical: 0, totalScore: 0 };
113
+ bucket.total += 1;
114
+ bucket.totalScore += ep.score || 0;
115
+ if (ep.grade === 'degraded') bucket.degraded += 1;
116
+ if (ep.grade === 'critical') bucket.critical += 1;
117
+ byHour.set(hour, bucket);
118
+ }
119
+
120
+ const patterns = [];
121
+ for (const [hour, bucket] of byHour) {
122
+ if (bucket.total < 2) continue;
123
+ const failRate = (bucket.degraded + bucket.critical) / bucket.total;
124
+ const avgScore = Math.round(bucket.totalScore / bucket.total);
125
+ if (failRate > 0.5) {
126
+ patterns.push({
127
+ type: 'time_of_day_risk',
128
+ hour,
129
+ failRate: Math.round(failRate * 100),
130
+ avgScore,
131
+ sessions: bucket.total,
132
+ recommendation: `Sessions at ${formatHour(hour)} degrade ${Math.round(failRate * 100)}% of the time. Consider scheduling complex work at other hours.`,
133
+ });
134
+ }
135
+ }
136
+
137
+ return patterns.sort((a, b) => b.failRate - a.failRate);
138
+ }
139
+
140
+ function analyzeCategoryPatterns(episodes) {
141
+ const byCategory = new Map();
142
+ for (const ep of episodes) {
143
+ for (const cat of ep.categories || []) {
144
+ const bucket = byCategory.get(cat) || { total: 0, degraded: 0, totalScore: 0 };
145
+ bucket.total += 1;
146
+ bucket.totalScore += ep.score || 0;
147
+ if (ep.grade === 'degraded' || ep.grade === 'critical') bucket.degraded += 1;
148
+ byCategory.set(cat, bucket);
149
+ }
150
+ }
151
+
152
+ const patterns = [];
153
+ for (const [category, bucket] of byCategory) {
154
+ if (bucket.total < 2) continue;
155
+ const failRate = bucket.degraded / bucket.total;
156
+ const avgScore = Math.round(bucket.totalScore / bucket.total);
157
+ if (failRate > 0.4) {
158
+ patterns.push({
159
+ type: 'category_risk',
160
+ category,
161
+ failRate: Math.round(failRate * 100),
162
+ avgScore,
163
+ sessions: bucket.total,
164
+ recommendation: `"${category}" tasks degrade ${Math.round(failRate * 100)}% of sessions. Break these into smaller chunks or add prevention rules.`,
165
+ });
166
+ }
167
+ }
168
+
169
+ return patterns.sort((a, b) => b.failRate - a.failRate);
170
+ }
171
+
172
+ function analyzeRecurringErrors(episodes) {
173
+ const fingerprints = new Map();
174
+ for (const ep of episodes) {
175
+ for (const fp of ep.errorFingerprints || []) {
176
+ const count = (fingerprints.get(fp) || 0) + 1;
177
+ fingerprints.set(fp, count);
178
+ }
179
+ }
180
+
181
+ const patterns = [];
182
+ for (const [fingerprint, count] of fingerprints) {
183
+ if (count < 2) continue;
184
+ patterns.push({
185
+ type: 'recurring_error',
186
+ fingerprint,
187
+ occurrences: count,
188
+ recommendation: `Error "${fingerprint.slice(0, 80)}" has recurred across ${count} sessions. Promote to a prevention rule.`,
189
+ });
190
+ }
191
+
192
+ return patterns.sort((a, b) => b.occurrences - a.occurrences);
193
+ }
194
+
195
+ function analyzeFeedbackEffectiveness(episodes) {
196
+ if (episodes.length < 3) return null;
197
+
198
+ const recentHalf = episodes.slice(Math.floor(episodes.length / 2));
199
+ const olderHalf = episodes.slice(0, Math.floor(episodes.length / 2));
200
+
201
+ const avgRecent = average(recentHalf.map((e) => e.score || 0));
202
+ const avgOlder = average(olderHalf.map((e) => e.score || 0));
203
+ const recentRepeatRate = average(recentHalf.map((e) => (e.errorFingerprints || []).length));
204
+ const olderRepeatRate = average(olderHalf.map((e) => (e.errorFingerprints || []).length));
205
+
206
+ const scoreTrend = avgRecent - avgOlder;
207
+ const repeatTrend = recentRepeatRate - olderRepeatRate;
208
+
209
+ return {
210
+ type: 'feedback_effectiveness',
211
+ olderAvgScore: Math.round(avgOlder),
212
+ recentAvgScore: Math.round(avgRecent),
213
+ scoreTrend: Math.round(scoreTrend),
214
+ olderRepeatRate: round2(olderRepeatRate),
215
+ recentRepeatRate: round2(recentRepeatRate),
216
+ repeatTrend: round2(repeatTrend),
217
+ improving: scoreTrend > 0 && repeatTrend <= 0,
218
+ recommendation: scoreTrend > 0
219
+ ? `Session health is improving (${Math.round(avgOlder)} → ${Math.round(avgRecent)}). Feedback loop is working.`
220
+ : `Session health is declining (${Math.round(avgOlder)} → ${Math.round(avgRecent)}). Review prevention rules and consider a fresh context reset.`,
221
+ };
222
+ }
223
+
224
+ function analyzePatterns(episodes) {
225
+ const timePatterns = analyzeTimeOfDayPatterns(episodes);
226
+ const categoryPatterns = analyzeCategoryPatterns(episodes);
227
+ const recurringErrors = analyzeRecurringErrors(episodes);
228
+ const effectiveness = analyzeFeedbackEffectiveness(episodes);
229
+
230
+ return {
231
+ timeOfDay: timePatterns,
232
+ categories: categoryPatterns,
233
+ recurringErrors,
234
+ effectiveness,
235
+ episodesAnalyzed: episodes.length,
236
+ analyzedAt: new Date().toISOString(),
237
+ };
238
+ }
239
+
240
+ // ---------------------------------------------------------------------------
241
+ // Helpers
242
+ // ---------------------------------------------------------------------------
243
+
244
+ function extractCategories(entries) {
245
+ const cats = new Set();
246
+ for (const entry of entries) {
247
+ if (Array.isArray(entry.tags)) {
248
+ for (const tag of entry.tags) cats.add(tag);
249
+ }
250
+ if (entry.richContext && entry.richContext.domain) {
251
+ cats.add(entry.richContext.domain);
252
+ }
253
+ }
254
+ return Array.from(cats).slice(0, 20);
255
+ }
256
+
257
+ function extractErrorFingerprints(negativeEntries) {
258
+ const fps = new Set();
259
+ for (const entry of negativeEntries) {
260
+ if (!entry.whatWentWrong) continue;
261
+ const fp = entry.whatWentWrong
262
+ .toLowerCase()
263
+ .replace(/\b(line|col|column)\s*\d+/g, '')
264
+ .replace(/\b\d+\b/g, 'N')
265
+ .replace(/\s+/g, ' ')
266
+ .trim()
267
+ .slice(0, 200);
268
+ if (fp) fps.add(fp);
269
+ }
270
+ return Array.from(fps).slice(0, 20);
271
+ }
272
+
273
+ function formatHour(hour) {
274
+ const h = hour % 12 || 12;
275
+ const ampm = hour < 12 ? 'AM' : 'PM';
276
+ return `${h}${ampm}`;
277
+ }
278
+
279
+ function average(nums) {
280
+ return nums.length > 0 ? nums.reduce((a, b) => a + b, 0) / nums.length : 0;
281
+ }
282
+
283
+ function round2(n) {
284
+ return Math.round(n * 100) / 100;
285
+ }
286
+
287
+ // ---------------------------------------------------------------------------
288
+ // CLI
289
+ // ---------------------------------------------------------------------------
290
+
291
+ function isCliInvocation(argv = process.argv) {
292
+ const invokedPath = argv[1];
293
+ return invokedPath ? path.resolve(invokedPath) === __filename : false;
294
+ }
295
+
296
+ if (isCliInvocation()) {
297
+ const command = process.argv[2] || 'capture';
298
+
299
+ if (command === 'capture') {
300
+ const episode = captureAndRecordEpisode();
301
+ console.log(JSON.stringify(episode, null, 2));
302
+ } else if (command === 'patterns') {
303
+ const episodes = loadEpisodes();
304
+ const patterns = analyzePatterns(episodes);
305
+ console.log(JSON.stringify(patterns, null, 2));
306
+ } else if (command === 'history') {
307
+ const episodes = loadRecentEpisodes(20);
308
+ console.log(JSON.stringify(episodes, null, 2));
309
+ } else {
310
+ console.error(`Unknown command: ${command}. Use: capture, patterns, history`);
311
+ process.exit(1);
312
+ }
313
+ }
314
+
315
+ module.exports = {
316
+ EPISODE_FILE,
317
+ PATTERN_WINDOW_EPISODES,
318
+ analyzePatterns,
319
+ analyzeCategoryPatterns,
320
+ analyzeFeedbackEffectiveness,
321
+ analyzeRecurringErrors,
322
+ analyzeTimeOfDayPatterns,
323
+ buildEpisode,
324
+ captureAndRecordEpisode,
325
+ getEpisodePath,
326
+ loadEpisodes,
327
+ loadRecentEpisodes,
328
+ recordEpisode,
329
+ };
@@ -0,0 +1,242 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * Session Health Sensor
6
+ *
7
+ * Detects real-time agent session degradation by analyzing feedback patterns,
8
+ * error recurrence, and context drift signals. Inspired by community research
9
+ * showing that "context rot" — not model quality — is the primary cause of
10
+ * perceived AI agent degradation on large projects.
11
+ *
12
+ * Signals tracked:
13
+ * 1. Repeat error rate — same error recurring within a session window
14
+ * 2. Negative feedback density — ratio of thumbs-down in recent window
15
+ * 3. Stagnation — consecutive negative signals without recovery
16
+ * 4. Context amnesia — feedback referencing "forgot", "again", "already told"
17
+ *
18
+ * Output: A session health score (0–100) and actionable degradation signals.
19
+ *
20
+ * Integration points:
21
+ * - Thompson Sampling: feeds per-category reliability with session context
22
+ * - Gates engine: health score can trigger "restart session" recommendation
23
+ * - Self-heal: low health triggers diagnostic capture
24
+ */
25
+
26
+ const path = require('node:path');
27
+ const { readJsonl } = require('./fs-utils');
28
+ const { resolveFeedbackDir } = require('./feedback-paths');
29
+
30
+ // ---------------------------------------------------------------------------
31
+ // Constants
32
+ // ---------------------------------------------------------------------------
33
+
34
+ const SESSION_WINDOW_MS = 45 * 60 * 1000; // 45 minutes — aligned with community best practice
35
+ const AMNESIA_PATTERNS = /\b(again|forgot|already told|repeated|same mistake|same error|keeps? (doing|making|breaking)|context (lost|drift|rot)|amnesia)\b/i;
36
+ const STAGNATION_THRESHOLD = 4; // consecutive negatives without a positive
37
+ const HEALTH_FLOOR = 0;
38
+ const HEALTH_CEILING = 100;
39
+
40
+ // ---------------------------------------------------------------------------
41
+ // Data Loading
42
+ // ---------------------------------------------------------------------------
43
+
44
+ function loadRecentFeedback({ feedbackDir, windowMs = SESSION_WINDOW_MS, now = Date.now() } = {}) {
45
+ const dir = feedbackDir || resolveFeedbackDir();
46
+ const logPath = path.join(dir, 'feedback-log.jsonl');
47
+ const entries = readJsonl(logPath, { tail: true, maxLines: 200 });
48
+ const cutoff = now - windowMs;
49
+
50
+ return entries.filter((entry) => {
51
+ const ts = entry.timestamp ? new Date(entry.timestamp).getTime() : 0;
52
+ return Number.isFinite(ts) && ts >= cutoff;
53
+ });
54
+ }
55
+
56
+ // ---------------------------------------------------------------------------
57
+ // Signal Detectors
58
+ // ---------------------------------------------------------------------------
59
+
60
+ function detectRepeatErrors(entries) {
61
+ const errorTexts = entries
62
+ .filter((e) => e.signal === 'negative' && e.whatWentWrong)
63
+ .map((e) => normalizeErrorText(e.whatWentWrong));
64
+
65
+ const seen = new Map();
66
+ let repeats = 0;
67
+
68
+ for (const text of errorTexts) {
69
+ const count = (seen.get(text) || 0) + 1;
70
+ seen.set(text, count);
71
+ if (count > 1) repeats += 1;
72
+ }
73
+
74
+ return {
75
+ signal: 'repeat_errors',
76
+ count: repeats,
77
+ total: errorTexts.length,
78
+ rate: errorTexts.length > 0 ? repeats / errorTexts.length : 0,
79
+ severity: repeats >= 3 ? 'critical' : repeats >= 1 ? 'warning' : 'healthy',
80
+ };
81
+ }
82
+
83
+ function detectNegativeDensity(entries) {
84
+ if (entries.length === 0) {
85
+ return { signal: 'negative_density', count: 0, total: 0, rate: 0, severity: 'healthy' };
86
+ }
87
+
88
+ const negatives = entries.filter((e) => e.signal === 'negative').length;
89
+ const rate = negatives / entries.length;
90
+
91
+ return {
92
+ signal: 'negative_density',
93
+ count: negatives,
94
+ total: entries.length,
95
+ rate,
96
+ severity: rate > 0.7 ? 'critical' : rate > 0.4 ? 'warning' : 'healthy',
97
+ };
98
+ }
99
+
100
+ function detectStagnation(entries) {
101
+ let maxConsecutiveNegatives = 0;
102
+ let current = 0;
103
+
104
+ for (const entry of entries) {
105
+ if (entry.signal === 'negative') {
106
+ current += 1;
107
+ maxConsecutiveNegatives = Math.max(maxConsecutiveNegatives, current);
108
+ } else {
109
+ current = 0;
110
+ }
111
+ }
112
+
113
+ return {
114
+ signal: 'stagnation',
115
+ consecutiveNegatives: maxConsecutiveNegatives,
116
+ threshold: STAGNATION_THRESHOLD,
117
+ severity: maxConsecutiveNegatives >= STAGNATION_THRESHOLD * 2 ? 'critical'
118
+ : maxConsecutiveNegatives >= STAGNATION_THRESHOLD ? 'warning'
119
+ : 'healthy',
120
+ };
121
+ }
122
+
123
+ function detectContextAmnesia(entries) {
124
+ const amnesiaEntries = entries.filter((e) => {
125
+ const text = [e.context, e.whatWentWrong, e.whatToChange].filter(Boolean).join(' ');
126
+ return AMNESIA_PATTERNS.test(text);
127
+ });
128
+
129
+ return {
130
+ signal: 'context_amnesia',
131
+ count: amnesiaEntries.length,
132
+ total: entries.length,
133
+ severity: amnesiaEntries.length >= 3 ? 'critical'
134
+ : amnesiaEntries.length >= 1 ? 'warning'
135
+ : 'healthy',
136
+ };
137
+ }
138
+
139
+ // ---------------------------------------------------------------------------
140
+ // Health Score
141
+ // ---------------------------------------------------------------------------
142
+
143
+ const SEVERITY_WEIGHTS = { healthy: 0, warning: 15, critical: 30 };
144
+
145
+ function computeSessionHealth(entries) {
146
+ const signals = [
147
+ detectRepeatErrors(entries),
148
+ detectNegativeDensity(entries),
149
+ detectStagnation(entries),
150
+ detectContextAmnesia(entries),
151
+ ];
152
+
153
+ let penalty = 0;
154
+ for (const signal of signals) {
155
+ penalty += SEVERITY_WEIGHTS[signal.severity] || 0;
156
+ }
157
+
158
+ // Extra penalty for high negative density rate
159
+ const density = signals.find((s) => s.signal === 'negative_density');
160
+ if (density && density.rate > 0) {
161
+ penalty += Math.round(density.rate * 20);
162
+ }
163
+
164
+ const score = Math.max(HEALTH_FLOOR, Math.min(HEALTH_CEILING, HEALTH_CEILING - penalty));
165
+
166
+ return {
167
+ score,
168
+ grade: score >= 80 ? 'healthy' : score >= 50 ? 'degraded' : 'critical',
169
+ signals,
170
+ recommendation: buildRecommendation(score, signals),
171
+ windowMs: SESSION_WINDOW_MS,
172
+ entriesAnalyzed: entries.length,
173
+ computedAt: new Date().toISOString(),
174
+ };
175
+ }
176
+
177
+ function buildRecommendation(score, signals) {
178
+ if (score >= 80) return null;
179
+
180
+ const critical = signals.filter((s) => s.severity === 'critical');
181
+ const parts = [];
182
+
183
+ if (critical.some((s) => s.signal === 'context_amnesia')) {
184
+ parts.push('Context drift detected. Start a fresh session with CLAUDE.md re-read.');
185
+ }
186
+ if (critical.some((s) => s.signal === 'repeat_errors')) {
187
+ parts.push('Same errors recurring. Capture feedback and promote to prevention rule.');
188
+ }
189
+ if (critical.some((s) => s.signal === 'stagnation')) {
190
+ parts.push('No recovery from failures. Break the task into smaller chunks or restart.');
191
+ }
192
+ if (score < 50 && parts.length === 0) {
193
+ parts.push('Session health is critically low. Consider starting a fresh conversation.');
194
+ }
195
+
196
+ return parts.length > 0 ? parts.join(' ') : 'Session showing mild degradation. Monitor closely.';
197
+ }
198
+
199
+ // ---------------------------------------------------------------------------
200
+ // Helpers
201
+ // ---------------------------------------------------------------------------
202
+
203
+ function normalizeErrorText(text) {
204
+ if (!text) return '';
205
+ return text
206
+ .toLowerCase()
207
+ .replace(/\b(line|col|column)\s*\d+/g, '')
208
+ .replace(/\b\d+\b/g, 'N')
209
+ .replace(/\s+/g, ' ')
210
+ .trim()
211
+ .slice(0, 200);
212
+ }
213
+
214
+ // ---------------------------------------------------------------------------
215
+ // CLI
216
+ // ---------------------------------------------------------------------------
217
+
218
+ function isCliInvocation(argv = process.argv) {
219
+ const invokedPath = argv[1];
220
+ return invokedPath ? path.resolve(invokedPath) === __filename : false;
221
+ }
222
+
223
+ if (isCliInvocation()) {
224
+ const entries = loadRecentFeedback();
225
+ const health = computeSessionHealth(entries);
226
+ console.log(JSON.stringify(health, null, 2));
227
+ if (health.grade === 'critical') process.exit(1);
228
+ if (health.grade === 'degraded') process.exit(2);
229
+ }
230
+
231
+ module.exports = {
232
+ AMNESIA_PATTERNS,
233
+ SESSION_WINDOW_MS,
234
+ STAGNATION_THRESHOLD,
235
+ computeSessionHealth,
236
+ detectContextAmnesia,
237
+ detectNegativeDensity,
238
+ detectRepeatErrors,
239
+ detectStagnation,
240
+ loadRecentFeedback,
241
+ normalizeErrorText,
242
+ };