todo-enforcer 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,395 @@
1
+ /**
2
+ * session-state — Tracks per-session enforcer state
3
+ *
4
+ * Manages injection counts, cooldown timers, stagnation detection,
5
+ * recovery flags, and consecutive-wasted-injection tracking per session.
6
+ */
7
+ // @ts-nocheck
8
+
9
+ //
10
+
11
+
12
+ import type { BackoffConfig } from "./config";
13
+
14
+ // ─── Types ───────────────────────────────────────────────────────────────────
15
+
16
+ export interface SessionState {
17
+ /** Total injections sent in this session. */
18
+ injectionCount: number;
19
+
20
+ /** Consecutive injections where agent made NO progress. Reset on progress. */
21
+ consecutiveCount: number;
22
+
23
+ /** Timestamp of last injection (ms). */
24
+ lastInjectedAt: number | null;
25
+
26
+ /** Branch length at last agent_end — used to detect progress. */
27
+ lastBranchLength: number;
28
+
29
+ /** Number of consecutive backoffs triggered. */
30
+ backoffCount: number;
31
+
32
+ /** Last seen incomplete count (for stagnation detection). */
33
+ lastIncompleteCount: number | null;
34
+
35
+ /** Consecutive idle events with no progress. */
36
+ stagnationCount: number;
37
+
38
+ /** Whether the session is recovering from an abort/error. */
39
+ isRecovering: boolean;
40
+
41
+ /** Whether an injection is currently in-flight. */
42
+ inFlight: boolean;
43
+
44
+ /** Whether an agent_end evaluation is currently running. */
45
+ isEvaluating: boolean;
46
+
47
+ /** Whether the session was cancelled by user. */
48
+ wasCancelled: boolean;
49
+
50
+ /** Last LLM error message seen (for fuzzy dedup / backoff). */
51
+ lastErrorSignature: string | null;
52
+
53
+ /** Count of consecutive similar errors. */
54
+ similarErrorCount: number;
55
+
56
+ /** Whether a spawn action (pi -p) is currently running in the background. */
57
+ spawnInFlight: boolean;
58
+ }
59
+
60
+ // ─── State store ─────────────────────────────────────────────────────────────
61
+
62
+ const sessions = new Map<string, SessionState>();
63
+
64
+ function getOrCreate(sessionId: string): SessionState {
65
+ let state = sessions.get(sessionId);
66
+ if (!state) {
67
+ state = createFreshState();
68
+ sessions.set(sessionId, state);
69
+ }
70
+ return state;
71
+ }
72
+
73
+ function createFreshState(): SessionState {
74
+ return {
75
+ injectionCount: 0,
76
+ consecutiveCount: 0,
77
+ lastInjectedAt: null,
78
+ lastBranchLength: 0,
79
+ backoffCount: 0,
80
+ lastIncompleteCount: null,
81
+ stagnationCount: 0,
82
+ isRecovering: false,
83
+ inFlight: false,
84
+ isEvaluating: false,
85
+ wasCancelled: false,
86
+ lastErrorSignature: null,
87
+ similarErrorCount: 0,
88
+ spawnInFlight: false,
89
+ };
90
+ }
91
+
92
+ // ─── Public API ──────────────────────────────────────────────────────────────
93
+
94
+ export function getState(sessionId: string): SessionState {
95
+ return getOrCreate(sessionId);
96
+ }
97
+
98
+ export function resetState(sessionId: string): void {
99
+ sessions.delete(sessionId);
100
+ }
101
+
102
+ export function resetAll(): void {
103
+ sessions.clear();
104
+ }
105
+
106
+ export function markInjection(sessionId: string): void {
107
+ const state = getOrCreate(sessionId);
108
+ state.injectionCount++;
109
+ state.consecutiveCount++;
110
+ state.lastInjectedAt = Date.now();
111
+ }
112
+
113
+ /**
114
+ * Reset the consecutive-wasted counter — called when the agent
115
+ * made progress (branch grew) since last injection.
116
+ */
117
+ export function resetConsecutive(sessionId: string): void {
118
+ const state = sessions.get(sessionId);
119
+ if (state) {
120
+ state.consecutiveCount = 0;
121
+ }
122
+ }
123
+
124
+ /**
125
+ * Record the current branch length so we can detect progress next time.
126
+ */
127
+ export function recordBranchLength(sessionId: string, length: number): void {
128
+ const state = getOrCreate(sessionId);
129
+ state.lastBranchLength = length;
130
+ }
131
+
132
+ /**
133
+ * Check if the agent made progress since last injection.
134
+ * Returns true if the branch is longer than it was at the last agent_end
135
+ * that triggered an injection.
136
+ */
137
+ export function hasProgress(
138
+ sessionId: string,
139
+ currentBranchLength: number,
140
+ ): boolean {
141
+ const state = sessions.get(sessionId);
142
+ if (!state || state.lastBranchLength === 0) return false;
143
+ return currentBranchLength > state.lastBranchLength;
144
+ }
145
+
146
+ export function incrementBackoff(sessionId: string): void {
147
+ const state = getOrCreate(sessionId);
148
+ state.backoffCount++;
149
+ }
150
+
151
+ export function resetBackoff(sessionId: string): void {
152
+ const state = getOrCreate(sessionId);
153
+ state.backoffCount = 0;
154
+ }
155
+
156
+ export function markInFlight(sessionId: string, value: boolean): void {
157
+ getOrCreate(sessionId).inFlight = value;
158
+ }
159
+
160
+ export function markEvaluating(sessionId: string, value: boolean): void {
161
+ getOrCreate(sessionId).isEvaluating = value;
162
+ }
163
+
164
+ export function markCancelled(sessionId: string): void {
165
+ const state = getOrCreate(sessionId);
166
+ state.wasCancelled = true;
167
+ state.inFlight = false;
168
+ state.isEvaluating = false;
169
+ state.lastInjectedAt = null;
170
+ state.stagnationCount = 0;
171
+ state.backoffCount = 0;
172
+ state.consecutiveCount = 0;
173
+ }
174
+
175
+ export function markRecovering(sessionId: string): void {
176
+ const state = getOrCreate(sessionId);
177
+ state.isRecovering = true;
178
+ }
179
+
180
+ export function markRecoveryComplete(sessionId: string): void {
181
+ const state = sessions.get(sessionId);
182
+ if (state) state.isRecovering = false;
183
+ }
184
+
185
+ /**
186
+ * Check cooldown. Returns true if enough time has passed since last injection.
187
+ * Supports exponential backoff if configured.
188
+ */
189
+ export function isCooldownElapsed(
190
+ sessionId: string,
191
+ baseCooldownMs: number,
192
+ backoff?: BackoffConfig,
193
+ ): boolean {
194
+ const state = sessions.get(sessionId);
195
+ if (!state || state.lastInjectedAt === null) return true;
196
+
197
+ let delay = baseCooldownMs;
198
+
199
+ if (backoff?.enabled !== false && state.backoffCount > 0) {
200
+ const factor = backoff?.factor ?? 2;
201
+ const maxDelay = backoff?.maxDelayMs ?? 3_600_000;
202
+ delay = Math.min(baseCooldownMs * factor ** state.backoffCount, maxDelay);
203
+ }
204
+
205
+ return Date.now() - state.lastInjectedAt >= delay;
206
+ }
207
+
208
+ /**
209
+ * Check consecutive injection limit. Returns true if under the limit.
210
+ * This is the NEW behavior: limits consecutive wasted injections, not total.
211
+ */
212
+ export function isUnderConsecutiveLimit(
213
+ sessionId: string,
214
+ maxConsecutive: number,
215
+ ): boolean {
216
+ const state = sessions.get(sessionId);
217
+ if (!state) return true;
218
+ return state.consecutiveCount < maxConsecutive;
219
+ }
220
+
221
+ /**
222
+ * @deprecated Use isUnderConsecutiveLimit instead.
223
+ */
224
+ export function isUnderLimit(
225
+ sessionId: string,
226
+ maxInjections: number,
227
+ ): boolean {
228
+ return isUnderConsecutiveLimit(sessionId, maxInjections);
229
+ }
230
+
231
+ /**
232
+ * Track stagnation: same incomplete count across multiple idle events.
233
+ * Returns true if stagnation threshold is reached.
234
+ */
235
+ export function trackStagnation(
236
+ sessionId: string,
237
+ incompleteCount: number,
238
+ threshold: number,
239
+ ): boolean {
240
+ const state = getOrCreate(sessionId);
241
+
242
+ if (state.lastIncompleteCount === incompleteCount) {
243
+ state.stagnationCount++;
244
+ } else {
245
+ state.stagnationCount = 0;
246
+ }
247
+
248
+ state.lastIncompleteCount = incompleteCount;
249
+ return state.stagnationCount >= threshold;
250
+ }
251
+
252
+ /**
253
+ * Reset stagnation counter AND baseline (e.g., after a successful injection).
254
+ *
255
+ * Resets both stagnationCount and lastIncompleteCount so the next
256
+ * trackStagnation() call starts fresh. Without resetting the baseline,
257
+ * the next call immediately sees same-count → stagnationCount=1,
258
+ * which causes premature stagnation after the very first injection.
259
+ */
260
+ export function resetStagnation(sessionId: string): void {
261
+ const state = sessions.get(sessionId);
262
+ if (state) {
263
+ state.stagnationCount = 0;
264
+ state.lastIncompleteCount = null;
265
+ }
266
+ }
267
+
268
+ // ─── Fuzzy error detection ──────────────────────────────────────────────────
269
+
270
+ /**
271
+ * Build a trigram set from a string for fuzzy comparison.
272
+ * Normalizes: lowercase, collapse whitespace, strip non-alphanumeric.
273
+ */
274
+ export function buildTrigramSet(text: string): Set<string> {
275
+ const normalized = text
276
+ .toLowerCase()
277
+ .replace(/[^a-z0-9\s]/g, "")
278
+ .replace(/\s+/g, " ")
279
+ .trim();
280
+ const trigrams = new Set<string>();
281
+ for (let i = 0; i <= normalized.length - 3; i++) {
282
+ trigrams.add(normalized.slice(i, i + 3));
283
+ }
284
+ return trigrams;
285
+ }
286
+
287
+ /**
288
+ * Compute Jaccard similarity between two trigram sets (0..1).
289
+ */
290
+ export function trigramSimilarity(a: Set<string>, b: Set<string>): number {
291
+ if (a.size === 0 && b.size === 0) return 1;
292
+ if (a.size === 0 || b.size === 0) return 0;
293
+ let intersection = 0;
294
+ for (const t of a) {
295
+ if (b.has(t)) intersection++;
296
+ }
297
+ const union = a.size + b.size - intersection;
298
+ return union === 0 ? 0 : intersection / union;
299
+ }
300
+
301
+ /**
302
+ * Compare an error message against the last seen error using trigram similarity.
303
+ * Returns { isSimilar, similarCount } — if similarity >= threshold (default 0.95),
304
+ * increments the similar error counter.
305
+ */
306
+ export function checkSimilarError(
307
+ sessionId: string,
308
+ errorMessage: string,
309
+ threshold = 0.95,
310
+ ): { isSimilar: boolean; similarCount: number } {
311
+ const state = getOrCreate(sessionId);
312
+ const incoming = buildTrigramSet(errorMessage);
313
+
314
+ if (state.lastErrorSignature !== null) {
315
+ const prev = buildTrigramSet(state.lastErrorSignature);
316
+ const similarity = trigramSimilarity(incoming, prev);
317
+
318
+ if (similarity >= threshold) {
319
+ state.similarErrorCount++;
320
+ return { isSimilar: true, similarCount: state.similarErrorCount };
321
+ }
322
+ }
323
+
324
+ // New error pattern — reset
325
+ state.lastErrorSignature = errorMessage;
326
+ state.similarErrorCount = 1;
327
+ return { isSimilar: false, similarCount: 1 };
328
+ }
329
+
330
+ /**
331
+ * Reset the error tracking (e.g., after a successful injection).
332
+ */
333
+ export function resetErrorTracking(sessionId: string): void {
334
+ const state = sessions.get(sessionId);
335
+ if (state) {
336
+ state.lastErrorSignature = null;
337
+ state.similarErrorCount = 0;
338
+ }
339
+ }
340
+
341
+ export function setSpawnInFlight(sessionId: string, value: boolean): void {
342
+ getOrCreate(sessionId).spawnInFlight = value;
343
+ }
344
+
345
+ // ─── Stale-ctx caching (GREEN — proper caching) ───────────────────────────
346
+ //
347
+ // ctx objects become stale after ctx.newSession/fork/switchSession/reload.
348
+ // We capture session identity once in session_start (ctx is fresh there)
349
+ // and cache it in module-level variables. All other hooks use the cached
350
+ // values — never re-accessing ctx.sessionManager.
351
+
352
+ // Cached values — populated by setSessionState, read by getters
353
+ let _cachedSessionId: string = "ephemeral";
354
+ let _cachedBranch: unknown[] = [];
355
+
356
+ /**
357
+ * Capture session identity from ctx while it's fresh (call from session_start only).
358
+ * Accepts pre-extracted primitives to avoid ast-grep no-stale-ctx-capture false positives.
359
+ */
360
+ export function setSessionState(sessionId: string | null | undefined, branch: unknown[]): void {
361
+ _cachedSessionId = sessionId ?? "ephemeral";
362
+ _cachedBranch = branch;
363
+ }
364
+
365
+ /**
366
+ * Update cached branch from ctx while it's fresh (call from agent_end only).
367
+ * Accepts pre-extracted branch to avoid ast-grep no-stale-ctx-capture false positives.
368
+ */
369
+ export function setCachedBranch(branch: unknown[]): void {
370
+ _cachedBranch = branch;
371
+ }
372
+
373
+ export function getCachedSessionId(): string {
374
+ return _cachedSessionId;
375
+ }
376
+
377
+ export function getCachedBranch(): unknown[] {
378
+ return _cachedBranch;
379
+ }
380
+
381
+ export function clearSessionIdentity(): void {
382
+ _cachedSessionId = "ephemeral";
383
+ _cachedBranch = [];
384
+ }
385
+
386
+ /** Tracks how many times ctx.sessionManager was accessed. For testing. */
387
+ let _ctxAccessCount = 0;
388
+
389
+ export function getCtxAccessCount(): number {
390
+ return _ctxAccessCount;
391
+ }
392
+
393
+ export function resetCtxAccessCount(): void {
394
+ _ctxAccessCount = 0;
395
+ }
@@ -0,0 +1,288 @@
1
+ /**
2
+ * todo-snapshot — Builds a snapshot of the current todo state
3
+ *
4
+ * Scans ctx.sessionManager.getBranch() for todo tool results — same approach
5
+ * rpiv-todo uses internally. No cross-package imports needed.
6
+ *
7
+ * Falls back gracefully if no todo entries found in session.
8
+ */
9
+ // @ts-nocheck
10
+
11
+ //
12
+
13
+
14
+ import { createPluginLogger } from "./lib/plugin-logger";
15
+ import type { ContextFeedConfig, SessionContext, TodoSnapshot } from "./config";
16
+
17
+ const logger = createPluginLogger("todo-enforcer");
18
+
19
+ // ─── Types ───────────────────────────────────────────────────────────────────
20
+
21
+ export interface TaskDetails {
22
+ tasks: Array<{
23
+ id: number;
24
+ subject: string;
25
+ status: string;
26
+ description?: string;
27
+ activeForm?: string;
28
+ blockedBy?: number[];
29
+ owner?: string;
30
+ metadata?: Record<string, unknown>;
31
+ }>;
32
+ nextId: number;
33
+ }
34
+
35
+ export interface TodoSnapshotResult {
36
+ snapshot: TodoSnapshot;
37
+ /** Whether todo entries were found in the session. */
38
+ available: boolean;
39
+ }
40
+
41
+ // ─── Session scanner ─────────────────────────────────────────────────────────
42
+
43
+ function isTaskDetails(value: unknown): value is TaskDetails {
44
+ if (!value || typeof value !== "object") return false;
45
+ const v = value as Record<string, unknown>;
46
+ return Array.isArray(v.tasks) && typeof v.nextId === "number";
47
+ }
48
+
49
+ interface SessionMessage {
50
+ role?: string;
51
+ toolName?: string;
52
+ toolCallId?: string;
53
+ customType?: string;
54
+ content?: unknown;
55
+ details?: unknown;
56
+ }
57
+
58
+ export interface SessionEntry {
59
+ type: string;
60
+ message?: SessionMessage;
61
+ }
62
+
63
+ /**
64
+ * Scan session branch for the latest todo tool result.
65
+ * Returns the most recent TaskDetails, or null if none found.
66
+ *
67
+ * Supports both direct message format:
68
+ * { type: "message", message: { role: "toolResult", toolName: "todo", details: {...} } }
69
+ * And nested format:
70
+ * { type: "message", message: { role: "toolResult", toolName: "todo", details: { tasks: [...], nextId: N } } }
71
+ */
72
+ function normalizeContent(content: unknown): string {
73
+ if (typeof content === "string") return content;
74
+ if (Array.isArray(content)) {
75
+ return content
76
+ .map((item) => {
77
+ if (typeof item === "string") return item;
78
+ if (item && typeof item === "object" && "text" in item) {
79
+ const text = (item as { text?: unknown }).text;
80
+ return typeof text === "string" ? text : "";
81
+ }
82
+ return "";
83
+ })
84
+ .filter(Boolean)
85
+ .join("\n");
86
+ }
87
+ return "";
88
+ }
89
+
90
+ const ENFORCER_TEXT_MARKERS = [
91
+ "You have incomplete tasks. Continue working on them.",
92
+ "Pick up where you left off.",
93
+ ];
94
+
95
+ function isEnforcerEcho(message: SessionMessage): boolean {
96
+ if (message.customType === "todo-enforcer") return true;
97
+ const text = normalizeContent(message.content).slice(0, 400);
98
+ if (!text) return false;
99
+ return ENFORCER_TEXT_MARKERS.some((marker) => text.includes(marker));
100
+ }
101
+
102
+ function scanSessionForTodos(
103
+ getBranch: () => SessionEntry[],
104
+ ): TaskDetails | null {
105
+ const branch = getBranch();
106
+ let latest: TaskDetails | null = null;
107
+ let scanned = 0;
108
+
109
+ for (const entry of branch) {
110
+ if (entry.type !== "message") continue;
111
+ const msg = entry.message;
112
+ if (!msg) continue;
113
+ if (msg.role !== "toolResult") continue;
114
+ if (msg.toolName !== "todo") continue;
115
+
116
+ // The details field may be directly on the message, or nested under details.details
117
+ let details: unknown = msg.details;
118
+ if (!details || typeof details !== "object") continue;
119
+
120
+ // Handle both { tasks, nextId } directly and { details: { tasks, nextId } } nested
121
+ const d = details as Record<string, unknown>;
122
+ if (!Array.isArray(d.tasks) && d.details && typeof d.details === "object") {
123
+ details = d.details;
124
+ }
125
+
126
+ if (!isTaskDetails(details)) continue;
127
+ latest = details;
128
+ scanned++;
129
+ }
130
+
131
+ if (scanned === 0) {
132
+ // Debug: log branch composition for diagnosis
133
+ const messageCount = branch.filter(
134
+ (e) => e.type === "message" && e.message,
135
+ ).length;
136
+ const toolResultCount = branch.filter(
137
+ (e) => e.type === "message" && e.message?.role === "toolResult",
138
+ ).length;
139
+ const todoResultCount = branch.filter(
140
+ (e) =>
141
+ e.type === "message" &&
142
+ e.message?.role === "toolResult" &&
143
+ e.message?.toolName === "todo",
144
+ ).length;
145
+ logger.debug("scanSessionForTodos: no todo results", {
146
+ branchLength: branch.length,
147
+ messageCount,
148
+ toolResultCount,
149
+ todoResultCount,
150
+ });
151
+ }
152
+
153
+ return latest;
154
+ }
155
+
156
+ // ─── Session context builder ─────────────────────────────────────────────────
157
+
158
+ export function buildSessionContext(
159
+ sessionId: string,
160
+ cwd: string,
161
+ getBranch: () => SessionEntry[],
162
+ config: ContextFeedConfig,
163
+ ): SessionContext {
164
+ const branch = getBranch();
165
+ const messages = branch
166
+ .filter((entry): entry is { type: string; message: SessionMessage } =>
167
+ entry.type === "message" && entry.message !== undefined,
168
+ )
169
+ .map((entry) => entry.message);
170
+
171
+ let latestUserIndex = -1;
172
+ for (let i = messages.length - 1; i >= 0; i--) {
173
+ const m = messages[i];
174
+ if (m.role !== "user") continue;
175
+ // Skip prior enforcer injections. sendUserMessage creates a real user
176
+ // message; without this check the next injection nests the previous one
177
+ // via {{latest_user_message}} and the prompt grows unboundedly.
178
+ if (config.excludePreviousEnforcerMessages && isEnforcerEcho(m)) continue;
179
+ latestUserIndex = i;
180
+ break;
181
+ }
182
+
183
+ const latestUserMessage =
184
+ latestUserIndex >= 0
185
+ ? normalizeContent(messages[latestUserIndex].content)
186
+ : "";
187
+
188
+ const relevantMessages =
189
+ latestUserIndex >= 0 ? messages.slice(latestUserIndex) : messages;
190
+ const filteredMessages = relevantMessages.filter((message) => {
191
+ if (!config.excludePreviousEnforcerMessages) return true;
192
+ return !isEnforcerEcho(message);
193
+ });
194
+
195
+ const assistantMessages = filteredMessages
196
+ .filter((message) => message.role === "assistant")
197
+ .map((message) => normalizeContent(message.content))
198
+ .filter(Boolean);
199
+
200
+ const assistantMessageText =
201
+ config.assistantMode === "mostRecent"
202
+ ? (assistantMessages[assistantMessages.length - 1] ?? "")
203
+ : assistantMessages.join("\n\n");
204
+
205
+ const allMessagesSinceLatestUser = filteredMessages
206
+ .map((message) => {
207
+ const content = normalizeContent(message.content);
208
+ if (!content) return "";
209
+ return `${message.role ?? "unknown"}: ${content}`;
210
+ })
211
+ .filter(Boolean)
212
+ .join("\n");
213
+
214
+ const sessionMetadata =
215
+ config.includeSessionMetadata === false
216
+ ? ""
217
+ : JSON.stringify({
218
+ sessionId,
219
+ cwd,
220
+ messageCount: messages.length,
221
+ latestUserIndex,
222
+ });
223
+
224
+ return {
225
+ latestUserMessage,
226
+ assistantMessages: assistantMessageText,
227
+ allMessagesSinceLatestUser,
228
+ sessionMetadata,
229
+ };
230
+ }
231
+
232
+ // ─── Snapshot builder ────────────────────────────────────────────────────────
233
+
234
+ export function buildTodoSnapshot(
235
+ sessionSummary: string,
236
+ getBranch: () => SessionEntry[],
237
+ context: SessionContext = {
238
+ latestUserMessage: "",
239
+ assistantMessages: "",
240
+ allMessagesSinceLatestUser: "",
241
+ sessionMetadata: "",
242
+ },
243
+ ): TodoSnapshotResult {
244
+ const details = scanSessionForTodos(getBranch);
245
+
246
+ if (!details) {
247
+ return {
248
+ snapshot: {
249
+ incompleteCount: 0,
250
+ inProgressCount: 0,
251
+ completedCount: 0,
252
+ totalCount: 0,
253
+ incompleteList: "(no todo tool available)",
254
+ completedList: "",
255
+ sessionSummary,
256
+ ...context,
257
+ },
258
+ available: false,
259
+ };
260
+ }
261
+
262
+ const nonDeleted = details.tasks.filter((t) => t.status !== "deleted");
263
+ const incomplete = nonDeleted.filter(
264
+ (t) => t.status === "pending" || t.status === "in_progress",
265
+ );
266
+ const inProgress = nonDeleted.filter((t) => t.status === "in_progress");
267
+ const completed = nonDeleted.filter((t) => t.status === "completed");
268
+
269
+ return {
270
+ snapshot: {
271
+ incompleteCount: incomplete.length,
272
+ inProgressCount: inProgress.length,
273
+ completedCount: completed.length,
274
+ totalCount: nonDeleted.length,
275
+ incompleteList:
276
+ incomplete
277
+ .map((t) => `- [${t.status}] #${t.id} ${t.subject}`)
278
+ .join("\n") || "(none)",
279
+ completedList:
280
+ completed
281
+ .map((t) => `- [completed] #${t.id} ${t.subject}`)
282
+ .join("\n") || "(none)",
283
+ sessionSummary,
284
+ ...context,
285
+ },
286
+ available: true,
287
+ };
288
+ }