spec-and-loop 3.0.3 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,6 +28,14 @@ const DEFAULTS = {
28
28
  maxIterations: 50,
29
29
  completionPromise: 'COMPLETE',
30
30
  taskPromise: 'READY_FOR_NEXT_TASK',
31
+ // Emitted by the agent when a task's `Stop and hand off if:` clause fires
32
+ // (i.e. external decision required: revert protected drift, file an
33
+ // out-of-scope refactor, escalate to a human reviewer, etc). The runner
34
+ // recognizes this as a *clean* exit distinct from `stalled` — it preserves
35
+ // the agent's diagnosis under `<ralphDir>/HANDOFF.md` and surfaces
36
+ // `exitReason='blocked_handoff'` so operators can tell "this task is
37
+ // genuinely blocked on me" apart from "the loop livelocked."
38
+ blockedHandoffPromise: 'BLOCKED_HANDOFF',
31
39
  tasksMode: false,
32
40
  noCommit: false,
33
41
  verbose: false,
@@ -48,11 +56,16 @@ const DEFAULTS = {
48
56
  * Determine whether an iteration made any forward progress.
49
57
  *
50
58
  * An iteration is considered productive if any of the following are true:
51
- * - OpenCode emitted the task or completion promise
59
+ * - OpenCode emitted the task, completion, or blocked-handoff promise
52
60
  * - One or more tasks transitioned to "completed" during the iteration
53
61
  * - At least one repo-tracked file was observed to have changed
54
62
  * - The iteration failed outright (its signal is handled separately)
55
63
  *
64
+ * Note: a blocked-handoff iteration is intentionally excluded from "stalled"
65
+ * because the agent followed protocol — it surfaced a structured exit, the
66
+ * runner caught it, and the loop will break this iteration. We never want
67
+ * to penalize the agent (or the operator) for the canonical hand-off path.
68
+ *
56
69
  * @param {object} iterationSignals
57
70
  * @returns {boolean}
58
71
  */
@@ -61,6 +74,7 @@ function _iterationIsStalled(iterationSignals) {
61
74
  if (iterationSignals.iterationFailed) return false;
62
75
  if (iterationSignals.hasCompletion) return false;
63
76
  if (iterationSignals.hasTask) return false;
77
+ if (iterationSignals.hasBlockedHandoff) return false;
64
78
  if (iterationSignals.completedTasksCount > 0) return false;
65
79
  if (iterationSignals.filesChangedCount > 0) return false;
66
80
  return true;
@@ -118,6 +132,243 @@ function _errorText(err) {
118
132
  return String(err);
119
133
  }
120
134
 
135
+ /**
136
+ * Extract the agent's blocker note from iteration output. The convention is:
137
+ * the line containing `<promise>BLOCKED_HANDOFF</promise>` MAY be preceded by
138
+ * a free-text rationale block (any number of lines up to a sentinel header
139
+ * `## Blocker` / `## Blocker Note` / `Blocker:`), and MAY include `## Why:` /
140
+ * `## Done-When-Will-Be:` / `## Suggested Next Step:` sections. We capture
141
+ * everything from the first sentinel header up to the promise tag, with a
142
+ * fallback to the last 40 non-blank lines preceding the tag if no sentinel
143
+ * is present, so the operator gets *something* useful even when the agent
144
+ * skips the structured format.
145
+ *
146
+ * @param {string} outputText full iteration stdout
147
+ * @param {string} promiseName configured BLOCKED_HANDOFF promise name
148
+ * @returns {string} the extracted note (empty string if the tag is absent)
149
+ */
150
+ function _extractBlockerNote(outputText, promiseName) {
151
+ if (!outputText || !promiseName) return '';
152
+ const tag = `<promise>${promiseName}</promise>`;
153
+ const lines = outputText.split(/\r?\n/);
154
+ let tagIdx = -1;
155
+ for (let i = 0; i < lines.length; i++) {
156
+ if (lines[i].trim() === tag) {
157
+ tagIdx = i;
158
+ break;
159
+ }
160
+ }
161
+ if (tagIdx === -1) return '';
162
+
163
+ // Look backwards for a sentinel header.
164
+ const sentinel = /^\s*(##\s*Blocker(\s+Note)?|Blocker:)/i;
165
+ let startIdx = tagIdx;
166
+ for (let i = tagIdx - 1; i >= 0; i--) {
167
+ if (sentinel.test(lines[i])) {
168
+ startIdx = i;
169
+ break;
170
+ }
171
+ }
172
+
173
+ if (startIdx === tagIdx) {
174
+ // No sentinel — fall back to the last 40 non-blank lines before the tag.
175
+ const window = [];
176
+ for (let i = tagIdx - 1; i >= 0 && window.length < 40; i--) {
177
+ const l = lines[i];
178
+ if (l.trim()) window.unshift(l);
179
+ }
180
+ return window.join('\n').trim();
181
+ }
182
+
183
+ return lines.slice(startIdx, tagIdx).join('\n').trim();
184
+ }
185
+
186
+ /**
187
+ * Scan well-known locations for blocker / diagnostic artifacts the agent
188
+ * may have written during the most recent iteration, and return their
189
+ * content (truncated) so we can tee it into the next iteration's prompt.
190
+ *
191
+ * The motivation is the failure mode we observed in the wild: the agent
192
+ * writes `<change-baseline>/shared-chrome-invariant-report.txt` with a clear
193
+ * `STATUS=BLOCKED REASON=...` diagnosis, then on the next iteration starts
194
+ * from a blank slate, re-derives the same diagnosis, and burns another full
195
+ * LLM cycle. By auto-detecting and surfacing the artifact, the agent gets
196
+ * its own prior diagnosis as input on the next turn, freeing it to either
197
+ * (a) act on it, or (b) emit BLOCKED_HANDOFF with a richer note.
198
+ *
199
+ * Probe paths (relative to ralphDir's parent — i.e. the change root):
200
+ * - <ralphDir>/HANDOFF.md
201
+ * - <ralphDir>/BLOCKED.md
202
+ * - <ralphDir>/blocker.md / blocker-note.md
203
+ * - <repoRoot>/.ralph/baselines/<change>/*report*.{txt,md}
204
+ * - any file under <ralphDir> matching /(blocker|handoff|invariant-report)\.[a-z]+$/i
205
+ *
206
+ * We cap the returned text at 1500 chars per artifact and 3 artifacts total
207
+ * so the feedback block stays bounded. Freshness is required by default to
208
+ * avoid carrying stale diagnostics forever; when a prior run explicitly ended
209
+ * with BLOCKED_HANDOFF, the canonical handoff files may be included even when
210
+ * stale because they are the persisted operator-facing diagnosis.
211
+ *
212
+ * @param {string} ralphDir
213
+ * @param {object} [options] { repoRoot, maxArtifacts = 3, maxCharsEach = 1500, includeStaleHandoff = false }
214
+ * @returns {Array<{ path: string, content: string, truncated: boolean }>}
215
+ */
216
+ function _detectBlockerArtifacts(ralphDir, options) {
217
+ const fs = require('fs');
218
+ const fsPath = require('path');
219
+ const opts = Object.assign(
220
+ {
221
+ repoRoot: process.cwd(),
222
+ maxArtifacts: 3,
223
+ maxCharsEach: 1500,
224
+ includeStaleHandoff: false,
225
+ },
226
+ options || {}
227
+ );
228
+
229
+ if (!ralphDir || !fs.existsSync(ralphDir)) return [];
230
+
231
+ const matches = new Map(); // path -> mtimeMs (dedup by absolute path)
232
+ const isHandoffArtifact = (name) =>
233
+ /^(handoff|blocked|blocker(-note)?)\.(md|txt)$/i.test(name);
234
+ const isInteresting = (name) =>
235
+ isHandoffArtifact(name) ||
236
+ /(invariant|blocker|handoff).*report\.(md|txt)$/i.test(name) ||
237
+ /report\.(md|txt)$/i.test(name);
238
+
239
+ const consider = (p) => {
240
+ try {
241
+ const st = fs.statSync(p);
242
+ if (!st.isFile()) return;
243
+ // Files larger than 1MB are almost certainly not human-curated blocker
244
+ // notes; skip them so we don't load logs or screenshots into the prompt.
245
+ if (st.size > 1024 * 1024) return;
246
+ // Only surface artifacts touched within the last ~10 minutes — older
247
+ // files are almost always stale leftovers from prior runs, and the
248
+ // failure mode we care about (repeated diagnosis with no progress)
249
+ // produces fresh writes every iteration.
250
+ const stale = Date.now() - st.mtimeMs > 10 * 60 * 1000;
251
+ if (stale && !(opts.includeStaleHandoff && isHandoffArtifact(fsPath.basename(p)))) {
252
+ return;
253
+ }
254
+ matches.set(fsPath.resolve(p), st.mtimeMs);
255
+ } catch (_) {
256
+ // ENOENT / permission errors: ignore — this is a best-effort probe.
257
+ }
258
+ };
259
+
260
+ // 1) Direct ralphDir scan, one level deep. .ralph/ is small, so a flat
261
+ // listing is cheap and bounded.
262
+ try {
263
+ const entries = fs.readdirSync(ralphDir, { withFileTypes: true });
264
+ for (const ent of entries) {
265
+ if (ent.isFile() && isInteresting(ent.name)) {
266
+ consider(fsPath.join(ralphDir, ent.name));
267
+ }
268
+ }
269
+ } catch (_) { /* ignore */ }
270
+
271
+ // 2) Convention-based baseline location used by spec-and-loop changes:
272
+ // <repoRoot>/.ralph/baselines/<change>/*report*.{txt,md}
273
+ // The change name is the parent directory of ralphDir's parent in the
274
+ // OpenSpec layout (e.g. .../changes/<name>/.ralph), so we derive it.
275
+ try {
276
+ const changeDir = fsPath.dirname(ralphDir);
277
+ const changeName = fsPath.basename(changeDir);
278
+ const baselinesDir = fsPath.join(opts.repoRoot, '.ralph', 'baselines', changeName);
279
+ if (fs.existsSync(baselinesDir)) {
280
+ const entries = fs.readdirSync(baselinesDir, { withFileTypes: true });
281
+ for (const ent of entries) {
282
+ if (ent.isFile() && isInteresting(ent.name)) {
283
+ consider(fsPath.join(baselinesDir, ent.name));
284
+ }
285
+ }
286
+ }
287
+ } catch (_) { /* ignore */ }
288
+
289
+ if (matches.size === 0) return [];
290
+
291
+ // Sort by mtime descending so the freshest artifact wins when we cap.
292
+ const sorted = Array.from(matches.entries())
293
+ .sort((a, b) => b[1] - a[1])
294
+ .map(([p]) => p);
295
+
296
+ const out = [];
297
+ for (const p of sorted.slice(0, opts.maxArtifacts)) {
298
+ try {
299
+ const raw = fs.readFileSync(p, 'utf8');
300
+ const truncated = raw.length > opts.maxCharsEach;
301
+ const content = truncated ? raw.slice(0, opts.maxCharsEach) : raw;
302
+ out.push({
303
+ path: fsPath.relative(opts.repoRoot, p) || p,
304
+ content: content.trim(),
305
+ truncated,
306
+ });
307
+ } catch (_) {
308
+ // Ignore unreadable artifacts.
309
+ }
310
+ }
311
+
312
+ return out;
313
+ }
314
+
315
+ /**
316
+ * Write the agent's blocker note to <ralphDir>/HANDOFF.md with iteration
317
+ * metadata so an operator can reproduce the context. Appends rather than
318
+ * overwrites: a single change can hit several BLOCKED_HANDOFFs over time
319
+ * (operator unblocks, loop resumes, hits a different blocker), and we want
320
+ * the full audit trail in one file.
321
+ *
322
+ * @param {string} ralphDir
323
+ * @param {object} entry { iteration, task, note, completionPromise, taskPromise }
324
+ * @returns {string} the absolute path to HANDOFF.md
325
+ */
326
+ function _writeHandoff(ralphDir, entry) {
327
+ const fs = require('fs');
328
+ const fsPath = require('path');
329
+ if (!fs.existsSync(ralphDir)) {
330
+ fs.mkdirSync(ralphDir, { recursive: true });
331
+ }
332
+ const handoffPath = fsPath.join(ralphDir, 'HANDOFF.md');
333
+ const ts = new Date().toISOString();
334
+ const taskLine = entry.task && entry.task !== 'N/A'
335
+ ? entry.task
336
+ : '(no task in progress)';
337
+ const noteBlock = entry.note && entry.note.trim()
338
+ ? entry.note.trim()
339
+ : '(agent emitted BLOCKED_HANDOFF without a structured blocker note;\n' +
340
+ 'check the iteration stdout log for the rationale)';
341
+
342
+ const section = [
343
+ '',
344
+ `## Iteration ${entry.iteration} — ${ts}`,
345
+ '',
346
+ `**Task:** ${taskLine}`,
347
+ '',
348
+ '**Agent blocker note:**',
349
+ '',
350
+ noteBlock,
351
+ '',
352
+ '**Operator next step:** investigate the blocker, take one of the actions',
353
+ 'the task spec authorizes (revert / isolate / justify / escalate), then',
354
+ 'rerun `ralph-run` to resume.',
355
+ '',
356
+ '---',
357
+ '',
358
+ ].join('\n');
359
+
360
+ let existing = '';
361
+ if (fs.existsSync(handoffPath)) {
362
+ existing = fs.readFileSync(handoffPath, 'utf8');
363
+ } else {
364
+ existing = '# Ralph Handoff Log\n\nThis file is appended whenever the loop\n' +
365
+ 'exits with `BLOCKED_HANDOFF`. Each section is one blocker the\n' +
366
+ 'agent surfaced — review newest first.\n';
367
+ }
368
+ fs.writeFileSync(handoffPath, existing + section, 'utf8');
369
+ return handoffPath;
370
+ }
371
+
121
372
  function _appendFatalIterationFailure(ralphDir, entry) {
122
373
  errors.append(ralphDir, {
123
374
  iteration: entry.iteration,
@@ -155,6 +406,14 @@ function _appendFatalIterationFailure(ralphDir, entry) {
155
406
  });
156
407
  }
157
408
 
409
+ function _summarizeBlockerNote(note, limit = 500) {
410
+ if (!note || typeof note !== 'string') return '';
411
+ const oneLine = note.replace(/\s+/g, ' ').trim();
412
+ if (!oneLine) return '';
413
+ if (oneLine.length <= limit) return oneLine;
414
+ return `${oneLine.slice(0, Math.max(0, limit - 1)).replace(/\s+$/, '')}…`;
415
+ }
416
+
158
417
  /**
159
418
  * Run the iteration loop.
160
419
  *
@@ -175,6 +434,7 @@ async function run(opts) {
175
434
  const minIterations = options.minIterations;
176
435
  const completionPromise = options.completionPromise;
177
436
  const taskPromise = options.taskPromise;
437
+ const blockedHandoffPromise = options.blockedHandoffPromise;
178
438
  const stallThreshold =
179
439
  typeof options.stallThreshold === 'number' && options.stallThreshold >= 0
180
440
  ? Math.floor(options.stallThreshold)
@@ -200,6 +460,8 @@ async function run(opts) {
200
460
  // otherwise start fresh at 1.
201
461
  const existingState = state.read(ralphDir);
202
462
  const resumeIteration = _resolveStartIteration(existingState, options);
463
+ const priorRunWasBlockedHandoff =
464
+ existingState && existingState.exitReason === 'blocked_handoff';
203
465
 
204
466
  if (options.verbose && resumeIteration > 1) {
205
467
  process.stderr.write(
@@ -234,6 +496,7 @@ async function run(opts) {
234
496
  maxIterations,
235
497
  completionPromise,
236
498
  taskPromise,
499
+ blockedHandoffPromise,
237
500
  tasksMode: options.tasksMode,
238
501
  tasksFile: options.tasksFile || null,
239
502
  promptFile: options.promptFile || null,
@@ -294,8 +557,19 @@ async function run(opts) {
294
557
  // dedup collapses identical entries into a single "same failure as
295
558
  // iteration N" line, so the 3-entry window is sufficient to surface
296
559
  // recurring patterns without bloating the prompt.
560
+ const recentHistory = history.recent(ralphDir, 3);
297
561
  const errorEntries = errors.readEntries(ralphDir, 3);
298
- const iterationFeedback = _buildIterationFeedback(history.recent(ralphDir, 3), errorEntries);
562
+ const blockerArtifacts = _detectBlockerArtifacts(ralphDir, {
563
+ repoRoot: process.cwd(),
564
+ includeStaleHandoff:
565
+ priorRunWasBlockedHandoff ||
566
+ recentHistory.some((entry) => entry && entry.blockedHandoffDetected),
567
+ });
568
+ const iterationFeedback = _buildIterationFeedback(
569
+ recentHistory,
570
+ errorEntries,
571
+ blockerArtifacts,
572
+ );
299
573
 
300
574
  // Inject any pending context
301
575
  const pendingContext = context.consume(ralphDir);
@@ -392,6 +666,14 @@ async function run(opts) {
392
666
  const iterationSucceeded = _wasSuccessfulIteration(result);
393
667
  const hasCompletion = iterationSucceeded && _containsPromise(outputText, completionPromise);
394
668
  const hasTask = iterationSucceeded && _containsPromise(outputText, taskPromise);
669
+ // Blocked-handoff is also a successful-iteration signal (the agent
670
+ // followed protocol and explicitly emitted a structured exit). We
671
+ // treat it as a third top-level outcome alongside completion/task.
672
+ const hasBlockedHandoff = iterationSucceeded
673
+ && _containsPromise(outputText, blockedHandoffPromise);
674
+ const blockerNote = hasBlockedHandoff
675
+ ? _extractBlockerNote(outputText, blockedHandoffPromise)
676
+ : '';
395
677
  const tasksAfter = options.tasksMode && options.tasksFile
396
678
  ? tasks.parseTasks(options.tasksFile)
397
679
  : [];
@@ -435,6 +717,10 @@ async function run(opts) {
435
717
  duration,
436
718
  completionDetected: hasCompletion,
437
719
  taskDetected: hasTask,
720
+ blockedHandoffDetected: hasBlockedHandoff,
721
+ ...(blockerNote ? { blockedHandoffNote: _summarizeBlockerNote(blockerNote) } : {}),
722
+ taskNumber: currentTaskMeta.number,
723
+ taskDescription: currentTaskMeta.description,
438
724
  toolUsage: result.toolUsage || [],
439
725
  filesChanged: result.filesChanged || [],
440
726
  exitCode: result.exitCode,
@@ -472,6 +758,7 @@ async function run(opts) {
472
758
  iterationFailed,
473
759
  hasCompletion,
474
760
  hasTask,
761
+ hasBlockedHandoff,
475
762
  completedTasksCount: completedTasks.length,
476
763
  filesChangedCount: Array.isArray(result.filesChanged) ? result.filesChanged.length : 0,
477
764
  });
@@ -487,12 +774,15 @@ async function run(opts) {
487
774
  durationMs: duration,
488
775
  outcome: iterationFailed
489
776
  ? 'failure'
490
- : stalledThisIteration
491
- ? 'stalled'
492
- : 'success',
777
+ : hasBlockedHandoff
778
+ ? 'blocked'
779
+ : stalledThisIteration
780
+ ? 'stalled'
781
+ : 'success',
493
782
  committed: commitResult.committed === true,
494
783
  hasCompletion,
495
784
  hasTask,
785
+ hasBlockedHandoff,
496
786
  completedTasksCount: completedTasks.length,
497
787
  filesChangedCount: Array.isArray(result.filesChanged) ? result.filesChanged.length : 0,
498
788
  stallStreak,
@@ -508,6 +798,44 @@ async function run(opts) {
508
798
  break;
509
799
  }
510
800
 
801
+ // Blocked-handoff exits the loop *immediately* (no minIterations
802
+ // floor). The agent has signaled an external decision is required;
803
+ // we want the operator unblocked as fast as possible. We persist the
804
+ // agent's note before breaking so it survives even a hard-kill on
805
+ // the parent process (e.g. the operator hits Ctrl-C right after).
806
+ if (hasBlockedHandoff) {
807
+ let handoffPath = '';
808
+ try {
809
+ handoffPath = _writeHandoff(ralphDir, {
810
+ iteration: iterationCount,
811
+ task: currentTask,
812
+ note: blockerNote,
813
+ completionPromise,
814
+ taskPromise,
815
+ });
816
+ } catch (writeErr) {
817
+ // Don't let a HANDOFF.md write failure mask the original signal —
818
+ // we still want to exit cleanly with `blocked_handoff`. Surface
819
+ // the write error to stderr so it's diagnosable.
820
+ process.stderr.write(
821
+ `[mini-ralph] warning: failed to write HANDOFF.md: ${writeErr.message}\n`
822
+ );
823
+ }
824
+ reporter.note(
825
+ handoffPath
826
+ ? `agent emitted ${blockedHandoffPromise}; blocker note saved to ${handoffPath}.`
827
+ : `agent emitted ${blockedHandoffPromise}; halting (HANDOFF.md write failed; see stderr).`,
828
+ 'warn'
829
+ );
830
+ if (options.verbose) {
831
+ process.stderr.write(
832
+ `[mini-ralph] ${blockedHandoffPromise} detected at iteration ${iterationCount}; halting.\n`
833
+ );
834
+ }
835
+ exitReason = 'blocked_handoff';
836
+ break;
837
+ }
838
+
511
839
  if (stallThreshold > 0 && stallStreak >= stallThreshold) {
512
840
  reporter.note(
513
841
  `stall detector: ${stallStreak} consecutive no-op iteration(s); halting.`,
@@ -976,16 +1304,19 @@ function _failureFingerprint(entry, errorEntries) {
976
1304
  stderrHead = _firstNonEmptyLine(match && match.stderr, 120);
977
1305
  }
978
1306
  // A "no promise emitted" iteration is also a distinguishable failure mode
979
- // even when exitCode===0 and there's no stderr (e.g. the agent explicitly
980
- // refuses to continue). Encoding it in the fingerprint lets the dedup
981
- // collapse repeated hand-off iterations into a single actionable line
982
- // instead of N identical bullets.
983
- const noPromise = !entry.completionDetected && !entry.taskDetected;
1307
+ // even when exitCode===0 and there's no stderr (e.g. the agent refuses to
1308
+ // continue without using the control protocol). Encoding it separately keeps
1309
+ // no-progress stalls distinct from explicit BLOCKED_HANDOFF stops.
1310
+ const noPromise =
1311
+ !entry.completionDetected &&
1312
+ !entry.taskDetected &&
1313
+ !entry.blockedHandoffDetected;
984
1314
  return JSON.stringify({
985
1315
  failureStage: entry.failureStage || '',
986
1316
  exitCode: entry.exitCode,
987
1317
  stderrHead,
988
1318
  noPromise,
1319
+ blockedHandoff: Boolean(entry.blockedHandoffDetected),
989
1320
  commitAnomalyType: entry.commitAnomalyType || '',
990
1321
  });
991
1322
  }
@@ -998,6 +1329,7 @@ function _isEmptyFingerprint(fingerprint) {
998
1329
  obj.exitCode === 0 &&
999
1330
  !obj.stderrHead &&
1000
1331
  !obj.noPromise &&
1332
+ !obj.blockedHandoff &&
1001
1333
  !obj.commitAnomalyType
1002
1334
  );
1003
1335
  } catch {
@@ -1005,14 +1337,23 @@ function _isEmptyFingerprint(fingerprint) {
1005
1337
  }
1006
1338
  }
1007
1339
 
1008
- function _buildIterationFeedback(recentHistory, errorEntries) {
1009
- if (!Array.isArray(recentHistory) || recentHistory.length === 0) {
1340
+ function _buildIterationFeedback(recentHistory, errorEntries, blockerArtifacts) {
1341
+ const hasArtifacts = Array.isArray(blockerArtifacts) && blockerArtifacts.length > 0;
1342
+ if ((!Array.isArray(recentHistory) || recentHistory.length === 0) && !hasArtifacts) {
1010
1343
  return '';
1011
1344
  }
1345
+ if (!Array.isArray(recentHistory)) recentHistory = [];
1012
1346
 
1013
1347
  const problemLines = [];
1014
1348
  // Track fingerprint -> first iteration number for dedup
1015
1349
  const fingerprintSeen = new Map();
1350
+ // Track which task each *problematic* iteration was working when it failed
1351
+ // / produced no progress. The same `taskNumber|taskDescription` repeating
1352
+ // across the recent window is the strongest livelock signal we have — the
1353
+ // agent is hitting the same wall with no new information. Persist the run
1354
+ // length so we can emit a HARD prefix above the per-iteration list when
1355
+ // the streak crosses the noise floor (3+ consecutive on the same task).
1356
+ const recentTasks = [];
1016
1357
 
1017
1358
  for (const entry of recentHistory) {
1018
1359
  const issues = [];
@@ -1029,11 +1370,28 @@ function _buildIterationFeedback(recentHistory, errorEntries) {
1029
1370
  issues.push(`commit anomaly: ${entry.commitAnomaly}`);
1030
1371
  }
1031
1372
 
1032
- if (!entry.completionDetected && !entry.taskDetected) {
1373
+ if (entry.blockedHandoffDetected) {
1374
+ issues.push('agent emitted BLOCKED_HANDOFF and requested operator handoff');
1375
+ } else if (!entry.completionDetected && !entry.taskDetected) {
1033
1376
  issues.push('no loop promise emitted');
1034
1377
  }
1035
1378
 
1036
1379
  if (issues.length > 0) {
1380
+ // Build the task-identity stamp (used both for the per-line prefix and
1381
+ // for streak detection). Empty when the runner had no task context for
1382
+ // the iteration (non-tasks-mode, or pre-resume entries written by an
1383
+ // older runner version).
1384
+ const rawTaskId = entry.taskNumber
1385
+ ? `${entry.taskNumber}|${entry.taskDescription || ''}`
1386
+ : (entry.taskDescription || '');
1387
+ const taskStamp = entry.taskNumber
1388
+ ? `Task ${entry.taskNumber}` +
1389
+ (entry.taskDescription ? ` (${entry.taskDescription})` : '')
1390
+ : (entry.taskDescription
1391
+ ? `Task ${entry.taskDescription}`
1392
+ : '');
1393
+ if (rawTaskId) recentTasks.push(rawTaskId);
1394
+
1037
1395
  // Compute fingerprint for dedup
1038
1396
  const fp = _failureFingerprint(entry, errorEntries);
1039
1397
  const isRealFailure = !_isEmptyFingerprint(fp);
@@ -1047,13 +1405,19 @@ function _buildIterationFeedback(recentHistory, errorEntries) {
1047
1405
 
1048
1406
  if (isRealFailure && fingerprintSeen.has(fp) && !isIgnoreFilterAnomaly) {
1049
1407
  const firstIteration = fingerprintSeen.get(fp);
1408
+ const stampSuffix = taskStamp ? ` [${taskStamp}]` : '';
1050
1409
  problemLines.push(
1051
- `- Iteration ${entry.iteration}: same failure as iteration ${firstIteration} (see above).`
1410
+ `- Iteration ${entry.iteration}${stampSuffix}: same failure as iteration ${firstIteration} (see above).`
1052
1411
  );
1053
1412
  } else {
1054
1413
  if (isRealFailure && !isIgnoreFilterAnomaly) fingerprintSeen.set(fp, entry.iteration);
1055
1414
 
1056
- let line = `- Iteration ${entry.iteration}: ${issues.join('; ')}.`;
1415
+ const stampPrefix = taskStamp ? ` [${taskStamp}]` : '';
1416
+ let line = `- Iteration ${entry.iteration}${stampPrefix}: ${issues.join('; ')}.`;
1417
+
1418
+ if (entry.blockedHandoffDetected && entry.blockedHandoffNote) {
1419
+ line += ` Blocker note: ${entry.blockedHandoffNote}`;
1420
+ }
1057
1421
 
1058
1422
  // For paths_ignored_filtered / all_paths_ignored, append the first two
1059
1423
  // ignored paths inline (with a (+N more) suffix) so the agent can see
@@ -1116,14 +1480,82 @@ function _buildIterationFeedback(recentHistory, errorEntries) {
1116
1480
  }
1117
1481
  }
1118
1482
 
1119
- if (problemLines.length === 0) {
1483
+ if (problemLines.length === 0 && !hasArtifacts) {
1120
1484
  return '';
1121
1485
  }
1122
1486
 
1123
- return [
1124
- 'Use these signals to avoid repeating the same failed approach:',
1125
- ...problemLines,
1126
- ].join('\n');
1487
+ // Detect the longest *trailing* run of the same task identity in the
1488
+ // problematic-iteration window. Trailing because the only thing that
1489
+ // matters is "is the most recent stretch still the same task?" — a stale
1490
+ // streak from earlier in the window is irrelevant once the task changed.
1491
+ let sameTaskStreak = 0;
1492
+ let stuckTaskId = '';
1493
+ if (recentTasks.length > 0) {
1494
+ const last = recentTasks[recentTasks.length - 1];
1495
+ if (last) {
1496
+ stuckTaskId = last;
1497
+ for (let i = recentTasks.length - 1; i >= 0; i--) {
1498
+ if (recentTasks[i] === last) {
1499
+ sameTaskStreak++;
1500
+ } else {
1501
+ break;
1502
+ }
1503
+ }
1504
+ }
1505
+ }
1506
+
1507
+ const sections = [];
1508
+ // The 3-iteration threshold matches the default `stallThreshold` so the
1509
+ // hard-prefix and the eventual stall halt are aligned: the agent sees the
1510
+ // warning one iteration before the stall detector fires, giving it a final
1511
+ // chance to hand off cleanly via BLOCKED_HANDOFF rather than livelock.
1512
+ if (sameTaskStreak >= 3 && stuckTaskId) {
1513
+ const display = stuckTaskId.includes('|')
1514
+ ? stuckTaskId.replace('|', ' — ')
1515
+ : stuckTaskId;
1516
+ sections.push(
1517
+ [
1518
+ '⚠ STUCK ON SAME TASK',
1519
+ `You have failed to make progress on the same task ${sameTaskStreak} iterations in a row: ${display}.`,
1520
+ 'Stop retrying the same approach. Re-read the task spec, then either:',
1521
+ ' 1. Pick a materially different approach (different files, different invariant).',
1522
+ ' 2. If the task spec authorizes it (e.g. a "Stop and hand off if:" clause fired), emit <promise>BLOCKED_HANDOFF</promise> with a structured Blocker Note and stop. The runner will save it to .ralph/HANDOFF.md.',
1523
+ '',
1524
+ ].join('\n')
1525
+ );
1526
+ }
1527
+
1528
+ if (problemLines.length > 0) {
1529
+ sections.push(
1530
+ [
1531
+ 'Use these signals to avoid repeating the same failed approach:',
1532
+ ...problemLines,
1533
+ ].join('\n')
1534
+ );
1535
+ }
1536
+
1537
+ if (hasArtifacts) {
1538
+ const artifactBlocks = blockerArtifacts.map((art) => {
1539
+ const header = `### ${art.path}${art.truncated ? ' (truncated)' : ''}`;
1540
+ // Code-fence the body so MDX-y artifacts (` ` `, `<promise>`) don't
1541
+ // collide with the surrounding prompt markdown.
1542
+ return [
1543
+ header,
1544
+ '```',
1545
+ art.content,
1546
+ '```',
1547
+ ].join('\n');
1548
+ });
1549
+
1550
+ sections.push(
1551
+ [
1552
+ 'Prior-iteration blocker artifacts (read these BEFORE re-deriving the same diagnosis):',
1553
+ ...artifactBlocks,
1554
+ ].join('\n\n')
1555
+ );
1556
+ }
1557
+
1558
+ return sections.join('\n');
1127
1559
  }
1128
1560
 
1129
1561
  function _extractErrorForIteration(errorEntries, iteration) {
@@ -1358,4 +1790,7 @@ module.exports = {
1358
1790
  _failureFingerprint,
1359
1791
  _firstNonEmptyLine,
1360
1792
  _iterationIsStalled,
1793
+ _extractBlockerNote,
1794
+ _writeHandoff,
1795
+ _detectBlockerArtifacts,
1361
1796
  };
@@ -141,39 +141,6 @@ function countTasks(tasksFile) {
141
141
  };
142
142
  }
143
143
 
144
- /**
145
- * Build a compact task-context block for the current tasks file.
146
- * Mirrors the shell-side task context format so prompts can render a fresh
147
- * snapshot on every iteration without regenerating the whole PRD.
148
- *
149
- * @param {string} tasksFile
150
- * @returns {string}
151
- */
152
- function taskContext(tasksFile) {
153
- const all = parseTasks(tasksFile);
154
- if (all.length === 0) return '';
155
-
156
- const current =
157
- all.find((task) => task.status === 'in_progress') ||
158
- all.find((task) => task.status === 'incomplete') ||
159
- null;
160
- const completedCount = all.filter((task) => task.status === 'completed').length;
161
- const total = all.length;
162
-
163
- const sections = [];
164
-
165
- if (current) {
166
- sections.push('## Current Task');
167
- sections.push(`- ${current.fullDescription || current.description}`);
168
- sections.push('');
169
- }
170
-
171
- sections.push('## Progress');
172
- sections.push(`- ${completedCount} of ${total} tasks complete`);
173
-
174
- return sections.join('\n');
175
- }
176
-
177
144
  // ---------------------------------------------------------------------------
178
145
  // Internal helpers
179
146
  // ---------------------------------------------------------------------------
@@ -199,6 +166,5 @@ module.exports = {
199
166
  currentTask,
200
167
  hashFile,
201
168
  countTasks,
202
- taskContext,
203
169
  tasksLinkPath,
204
170
  };