spec-and-loop 3.1.0 → 3.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/OPENSPEC-RALPH-BP.md +42 -6
- package/lib/mini-ralph/history.js +4 -0
- package/lib/mini-ralph/index.js +1 -0
- package/lib/mini-ralph/runner.js +1082 -19
- package/lib/mini-ralph/status.js +35 -0
- package/package.json +1 -1
- package/scripts/mini-ralph-cli.js +12 -0
- package/scripts/ralph-run.sh +122 -26
package/lib/mini-ralph/runner.js
CHANGED
|
@@ -28,6 +28,14 @@ const DEFAULTS = {
|
|
|
28
28
|
maxIterations: 50,
|
|
29
29
|
completionPromise: 'COMPLETE',
|
|
30
30
|
taskPromise: 'READY_FOR_NEXT_TASK',
|
|
31
|
+
// Emitted by the agent when a task's `Stop and hand off if:` clause fires
|
|
32
|
+
// (i.e. external decision required: revert protected drift, file an
|
|
33
|
+
// out-of-scope refactor, escalate to a human reviewer, etc). The runner
|
|
34
|
+
// recognizes this as a *clean* exit distinct from `stalled` — it preserves
|
|
35
|
+
// the agent's diagnosis under `<ralphDir>/HANDOFF.md` and surfaces
|
|
36
|
+
// `exitReason='blocked_handoff'` so operators can tell "this task is
|
|
37
|
+
// genuinely blocked on me" apart from "the loop livelocked."
|
|
38
|
+
blockedHandoffPromise: 'BLOCKED_HANDOFF',
|
|
31
39
|
tasksMode: false,
|
|
32
40
|
noCommit: false,
|
|
33
41
|
verbose: false,
|
|
@@ -48,11 +56,16 @@ const DEFAULTS = {
|
|
|
48
56
|
* Determine whether an iteration made any forward progress.
|
|
49
57
|
*
|
|
50
58
|
* An iteration is considered productive if any of the following are true:
|
|
51
|
-
* - OpenCode emitted the task or
|
|
59
|
+
* - OpenCode emitted the task, completion, or blocked-handoff promise
|
|
52
60
|
* - One or more tasks transitioned to "completed" during the iteration
|
|
53
61
|
* - At least one repo-tracked file was observed to have changed
|
|
54
62
|
* - The iteration failed outright (its signal is handled separately)
|
|
55
63
|
*
|
|
64
|
+
* Note: a blocked-handoff iteration is intentionally excluded from "stalled"
|
|
65
|
+
* because the agent followed protocol — it surfaced a structured exit, the
|
|
66
|
+
* runner caught it, and the loop will break this iteration. We never want
|
|
67
|
+
* to penalize the agent (or the operator) for the canonical hand-off path.
|
|
68
|
+
*
|
|
56
69
|
* @param {object} iterationSignals
|
|
57
70
|
* @returns {boolean}
|
|
58
71
|
*/
|
|
@@ -61,6 +74,7 @@ function _iterationIsStalled(iterationSignals) {
|
|
|
61
74
|
if (iterationSignals.iterationFailed) return false;
|
|
62
75
|
if (iterationSignals.hasCompletion) return false;
|
|
63
76
|
if (iterationSignals.hasTask) return false;
|
|
77
|
+
if (iterationSignals.hasBlockedHandoff) return false;
|
|
64
78
|
if (iterationSignals.completedTasksCount > 0) return false;
|
|
65
79
|
if (iterationSignals.filesChangedCount > 0) return false;
|
|
66
80
|
return true;
|
|
@@ -118,6 +132,243 @@ function _errorText(err) {
|
|
|
118
132
|
return String(err);
|
|
119
133
|
}
|
|
120
134
|
|
|
135
|
+
/**
|
|
136
|
+
* Extract the agent's blocker note from iteration output. The convention is:
|
|
137
|
+
* the line containing `<promise>BLOCKED_HANDOFF</promise>` MAY be preceded by
|
|
138
|
+
* a free-text rationale block (any number of lines up to a sentinel header
|
|
139
|
+
* `## Blocker` / `## Blocker Note` / `Blocker:`), and MAY include `## Why:` /
|
|
140
|
+
* `## Done-When-Will-Be:` / `## Suggested Next Step:` sections. We capture
|
|
141
|
+
* everything from the first sentinel header up to the promise tag, with a
|
|
142
|
+
* fallback to the last 40 non-blank lines preceding the tag if no sentinel
|
|
143
|
+
* is present, so the operator gets *something* useful even when the agent
|
|
144
|
+
* skips the structured format.
|
|
145
|
+
*
|
|
146
|
+
* @param {string} outputText full iteration stdout
|
|
147
|
+
* @param {string} promiseName configured BLOCKED_HANDOFF promise name
|
|
148
|
+
* @returns {string} the extracted note (empty string if the tag is absent)
|
|
149
|
+
*/
|
|
150
|
+
function _extractBlockerNote(outputText, promiseName) {
|
|
151
|
+
if (!outputText || !promiseName) return '';
|
|
152
|
+
const tag = `<promise>${promiseName}</promise>`;
|
|
153
|
+
const lines = outputText.split(/\r?\n/);
|
|
154
|
+
let tagIdx = -1;
|
|
155
|
+
for (let i = 0; i < lines.length; i++) {
|
|
156
|
+
if (lines[i].trim() === tag) {
|
|
157
|
+
tagIdx = i;
|
|
158
|
+
break;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
if (tagIdx === -1) return '';
|
|
162
|
+
|
|
163
|
+
// Look backwards for a sentinel header.
|
|
164
|
+
const sentinel = /^\s*(##\s*Blocker(\s+Note)?|Blocker:)/i;
|
|
165
|
+
let startIdx = tagIdx;
|
|
166
|
+
for (let i = tagIdx - 1; i >= 0; i--) {
|
|
167
|
+
if (sentinel.test(lines[i])) {
|
|
168
|
+
startIdx = i;
|
|
169
|
+
break;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if (startIdx === tagIdx) {
|
|
174
|
+
// No sentinel — fall back to the last 40 non-blank lines before the tag.
|
|
175
|
+
const window = [];
|
|
176
|
+
for (let i = tagIdx - 1; i >= 0 && window.length < 40; i--) {
|
|
177
|
+
const l = lines[i];
|
|
178
|
+
if (l.trim()) window.unshift(l);
|
|
179
|
+
}
|
|
180
|
+
return window.join('\n').trim();
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return lines.slice(startIdx, tagIdx).join('\n').trim();
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Scan well-known locations for blocker / diagnostic artifacts the agent
|
|
188
|
+
* may have written during the most recent iteration, and return their
|
|
189
|
+
* content (truncated) so we can tee it into the next iteration's prompt.
|
|
190
|
+
*
|
|
191
|
+
* The motivation is the failure mode we observed in the wild: the agent
|
|
192
|
+
* writes `<change-baseline>/shared-chrome-invariant-report.txt` with a clear
|
|
193
|
+
* `STATUS=BLOCKED REASON=...` diagnosis, then on the next iteration starts
|
|
194
|
+
* from a blank slate, re-derives the same diagnosis, and burns another full
|
|
195
|
+
* LLM cycle. By auto-detecting and surfacing the artifact, the agent gets
|
|
196
|
+
* its own prior diagnosis as input on the next turn, freeing it to either
|
|
197
|
+
* (a) act on it, or (b) emit BLOCKED_HANDOFF with a richer note.
|
|
198
|
+
*
|
|
199
|
+
* Probe paths (relative to ralphDir's parent — i.e. the change root):
|
|
200
|
+
* - <ralphDir>/HANDOFF.md
|
|
201
|
+
* - <ralphDir>/BLOCKED.md
|
|
202
|
+
* - <ralphDir>/blocker.md / blocker-note.md
|
|
203
|
+
* - <repoRoot>/.ralph/baselines/<change>/*report*.{txt,md}
|
|
204
|
+
* - any file under <ralphDir> matching /(blocker|handoff|invariant-report)\.[a-z]+$/i
|
|
205
|
+
*
|
|
206
|
+
* We cap the returned text at 1500 chars per artifact and 3 artifacts total
|
|
207
|
+
* so the feedback block stays bounded. Freshness is required by default to
|
|
208
|
+
* avoid carrying stale diagnostics forever; when a prior run explicitly ended
|
|
209
|
+
* with BLOCKED_HANDOFF, the canonical handoff files may be included even when
|
|
210
|
+
* stale because they are the persisted operator-facing diagnosis.
|
|
211
|
+
*
|
|
212
|
+
* @param {string} ralphDir
|
|
213
|
+
* @param {object} [options] { repoRoot, maxArtifacts = 3, maxCharsEach = 1500, includeStaleHandoff = false }
|
|
214
|
+
* @returns {Array<{ path: string, content: string, truncated: boolean }>}
|
|
215
|
+
*/
|
|
216
|
+
function _detectBlockerArtifacts(ralphDir, options) {
|
|
217
|
+
const fs = require('fs');
|
|
218
|
+
const fsPath = require('path');
|
|
219
|
+
const opts = Object.assign(
|
|
220
|
+
{
|
|
221
|
+
repoRoot: process.cwd(),
|
|
222
|
+
maxArtifacts: 3,
|
|
223
|
+
maxCharsEach: 1500,
|
|
224
|
+
includeStaleHandoff: false,
|
|
225
|
+
},
|
|
226
|
+
options || {}
|
|
227
|
+
);
|
|
228
|
+
|
|
229
|
+
if (!ralphDir || !fs.existsSync(ralphDir)) return [];
|
|
230
|
+
|
|
231
|
+
const matches = new Map(); // path -> mtimeMs (dedup by absolute path)
|
|
232
|
+
const isHandoffArtifact = (name) =>
|
|
233
|
+
/^(handoff|blocked|blocker(-note)?)\.(md|txt)$/i.test(name);
|
|
234
|
+
const isInteresting = (name) =>
|
|
235
|
+
isHandoffArtifact(name) ||
|
|
236
|
+
/(invariant|blocker|handoff).*report\.(md|txt)$/i.test(name) ||
|
|
237
|
+
/report\.(md|txt)$/i.test(name);
|
|
238
|
+
|
|
239
|
+
const consider = (p) => {
|
|
240
|
+
try {
|
|
241
|
+
const st = fs.statSync(p);
|
|
242
|
+
if (!st.isFile()) return;
|
|
243
|
+
// Files larger than 1MB are almost certainly not human-curated blocker
|
|
244
|
+
// notes; skip them so we don't load logs or screenshots into the prompt.
|
|
245
|
+
if (st.size > 1024 * 1024) return;
|
|
246
|
+
// Only surface artifacts touched within the last ~10 minutes — older
|
|
247
|
+
// files are almost always stale leftovers from prior runs, and the
|
|
248
|
+
// failure mode we care about (repeated diagnosis with no progress)
|
|
249
|
+
// produces fresh writes every iteration.
|
|
250
|
+
const stale = Date.now() - st.mtimeMs > 10 * 60 * 1000;
|
|
251
|
+
if (stale && !(opts.includeStaleHandoff && isHandoffArtifact(fsPath.basename(p)))) {
|
|
252
|
+
return;
|
|
253
|
+
}
|
|
254
|
+
matches.set(fsPath.resolve(p), st.mtimeMs);
|
|
255
|
+
} catch (_) {
|
|
256
|
+
// ENOENT / permission errors: ignore — this is a best-effort probe.
|
|
257
|
+
}
|
|
258
|
+
};
|
|
259
|
+
|
|
260
|
+
// 1) Direct ralphDir scan, one level deep. .ralph/ is small, so a flat
|
|
261
|
+
// listing is cheap and bounded.
|
|
262
|
+
try {
|
|
263
|
+
const entries = fs.readdirSync(ralphDir, { withFileTypes: true });
|
|
264
|
+
for (const ent of entries) {
|
|
265
|
+
if (ent.isFile() && isInteresting(ent.name)) {
|
|
266
|
+
consider(fsPath.join(ralphDir, ent.name));
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
} catch (_) { /* ignore */ }
|
|
270
|
+
|
|
271
|
+
// 2) Convention-based baseline location used by spec-and-loop changes:
|
|
272
|
+
// <repoRoot>/.ralph/baselines/<change>/*report*.{txt,md}
|
|
273
|
+
// The change name is the parent directory of ralphDir's parent in the
|
|
274
|
+
// OpenSpec layout (e.g. .../changes/<name>/.ralph), so we derive it.
|
|
275
|
+
try {
|
|
276
|
+
const changeDir = fsPath.dirname(ralphDir);
|
|
277
|
+
const changeName = fsPath.basename(changeDir);
|
|
278
|
+
const baselinesDir = fsPath.join(opts.repoRoot, '.ralph', 'baselines', changeName);
|
|
279
|
+
if (fs.existsSync(baselinesDir)) {
|
|
280
|
+
const entries = fs.readdirSync(baselinesDir, { withFileTypes: true });
|
|
281
|
+
for (const ent of entries) {
|
|
282
|
+
if (ent.isFile() && isInteresting(ent.name)) {
|
|
283
|
+
consider(fsPath.join(baselinesDir, ent.name));
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
} catch (_) { /* ignore */ }
|
|
288
|
+
|
|
289
|
+
if (matches.size === 0) return [];
|
|
290
|
+
|
|
291
|
+
// Sort by mtime descending so the freshest artifact wins when we cap.
|
|
292
|
+
const sorted = Array.from(matches.entries())
|
|
293
|
+
.sort((a, b) => b[1] - a[1])
|
|
294
|
+
.map(([p]) => p);
|
|
295
|
+
|
|
296
|
+
const out = [];
|
|
297
|
+
for (const p of sorted.slice(0, opts.maxArtifacts)) {
|
|
298
|
+
try {
|
|
299
|
+
const raw = fs.readFileSync(p, 'utf8');
|
|
300
|
+
const truncated = raw.length > opts.maxCharsEach;
|
|
301
|
+
const content = truncated ? raw.slice(0, opts.maxCharsEach) : raw;
|
|
302
|
+
out.push({
|
|
303
|
+
path: fsPath.relative(opts.repoRoot, p) || p,
|
|
304
|
+
content: content.trim(),
|
|
305
|
+
truncated,
|
|
306
|
+
});
|
|
307
|
+
} catch (_) {
|
|
308
|
+
// Ignore unreadable artifacts.
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return out;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Write the agent's blocker note to <ralphDir>/HANDOFF.md with iteration
|
|
317
|
+
* metadata so an operator can reproduce the context. Appends rather than
|
|
318
|
+
* overwrites: a single change can hit several BLOCKED_HANDOFFs over time
|
|
319
|
+
* (operator unblocks, loop resumes, hits a different blocker), and we want
|
|
320
|
+
* the full audit trail in one file.
|
|
321
|
+
*
|
|
322
|
+
* @param {string} ralphDir
|
|
323
|
+
* @param {object} entry { iteration, task, note, completionPromise, taskPromise }
|
|
324
|
+
* @returns {string} the absolute path to HANDOFF.md
|
|
325
|
+
*/
|
|
326
|
+
function _writeHandoff(ralphDir, entry) {
|
|
327
|
+
const fs = require('fs');
|
|
328
|
+
const fsPath = require('path');
|
|
329
|
+
if (!fs.existsSync(ralphDir)) {
|
|
330
|
+
fs.mkdirSync(ralphDir, { recursive: true });
|
|
331
|
+
}
|
|
332
|
+
const handoffPath = fsPath.join(ralphDir, 'HANDOFF.md');
|
|
333
|
+
const ts = new Date().toISOString();
|
|
334
|
+
const taskLine = entry.task && entry.task !== 'N/A'
|
|
335
|
+
? entry.task
|
|
336
|
+
: '(no task in progress)';
|
|
337
|
+
const noteBlock = entry.note && entry.note.trim()
|
|
338
|
+
? entry.note.trim()
|
|
339
|
+
: '(agent emitted BLOCKED_HANDOFF without a structured blocker note;\n' +
|
|
340
|
+
'check the iteration stdout log for the rationale)';
|
|
341
|
+
|
|
342
|
+
const section = [
|
|
343
|
+
'',
|
|
344
|
+
`## Iteration ${entry.iteration} — ${ts}`,
|
|
345
|
+
'',
|
|
346
|
+
`**Task:** ${taskLine}`,
|
|
347
|
+
'',
|
|
348
|
+
'**Agent blocker note:**',
|
|
349
|
+
'',
|
|
350
|
+
noteBlock,
|
|
351
|
+
'',
|
|
352
|
+
'**Operator next step:** investigate the blocker, take one of the actions',
|
|
353
|
+
'the task spec authorizes (revert / isolate / justify / escalate), then',
|
|
354
|
+
'rerun `ralph-run` to resume.',
|
|
355
|
+
'',
|
|
356
|
+
'---',
|
|
357
|
+
'',
|
|
358
|
+
].join('\n');
|
|
359
|
+
|
|
360
|
+
let existing = '';
|
|
361
|
+
if (fs.existsSync(handoffPath)) {
|
|
362
|
+
existing = fs.readFileSync(handoffPath, 'utf8');
|
|
363
|
+
} else {
|
|
364
|
+
existing = '# Ralph Handoff Log\n\nThis file is appended whenever the loop\n' +
|
|
365
|
+
'exits with `BLOCKED_HANDOFF`. Each section is one blocker the\n' +
|
|
366
|
+
'agent surfaced — review newest first.\n';
|
|
367
|
+
}
|
|
368
|
+
fs.writeFileSync(handoffPath, existing + section, 'utf8');
|
|
369
|
+
return handoffPath;
|
|
370
|
+
}
|
|
371
|
+
|
|
121
372
|
function _appendFatalIterationFailure(ralphDir, entry) {
|
|
122
373
|
errors.append(ralphDir, {
|
|
123
374
|
iteration: entry.iteration,
|
|
@@ -155,6 +406,14 @@ function _appendFatalIterationFailure(ralphDir, entry) {
|
|
|
155
406
|
});
|
|
156
407
|
}
|
|
157
408
|
|
|
409
|
+
function _summarizeBlockerNote(note, limit = 500) {
|
|
410
|
+
if (!note || typeof note !== 'string') return '';
|
|
411
|
+
const oneLine = note.replace(/\s+/g, ' ').trim();
|
|
412
|
+
if (!oneLine) return '';
|
|
413
|
+
if (oneLine.length <= limit) return oneLine;
|
|
414
|
+
return `${oneLine.slice(0, Math.max(0, limit - 1)).replace(/\s+$/, '')}…`;
|
|
415
|
+
}
|
|
416
|
+
|
|
158
417
|
/**
|
|
159
418
|
* Run the iteration loop.
|
|
160
419
|
*
|
|
@@ -175,6 +434,7 @@ async function run(opts) {
|
|
|
175
434
|
const minIterations = options.minIterations;
|
|
176
435
|
const completionPromise = options.completionPromise;
|
|
177
436
|
const taskPromise = options.taskPromise;
|
|
437
|
+
const blockedHandoffPromise = options.blockedHandoffPromise;
|
|
178
438
|
const stallThreshold =
|
|
179
439
|
typeof options.stallThreshold === 'number' && options.stallThreshold >= 0
|
|
180
440
|
? Math.floor(options.stallThreshold)
|
|
@@ -200,6 +460,8 @@ async function run(opts) {
|
|
|
200
460
|
// otherwise start fresh at 1.
|
|
201
461
|
const existingState = state.read(ralphDir);
|
|
202
462
|
const resumeIteration = _resolveStartIteration(existingState, options);
|
|
463
|
+
const priorRunWasBlockedHandoff =
|
|
464
|
+
existingState && existingState.exitReason === 'blocked_handoff';
|
|
203
465
|
|
|
204
466
|
if (options.verbose && resumeIteration > 1) {
|
|
205
467
|
process.stderr.write(
|
|
@@ -226,6 +488,9 @@ async function run(opts) {
|
|
|
226
488
|
resumeIteration > 1 && existingState && existingState.startedAt
|
|
227
489
|
? existingState.startedAt
|
|
228
490
|
: nowIso;
|
|
491
|
+
let pendingDirtyPaths = _normalizePendingDirtyPaths(
|
|
492
|
+
existingState && existingState.pendingDirtyPaths
|
|
493
|
+
);
|
|
229
494
|
|
|
230
495
|
state.init(ralphDir, {
|
|
231
496
|
active: true,
|
|
@@ -234,6 +499,7 @@ async function run(opts) {
|
|
|
234
499
|
maxIterations,
|
|
235
500
|
completionPromise,
|
|
236
501
|
taskPromise,
|
|
502
|
+
blockedHandoffPromise,
|
|
237
503
|
tasksMode: options.tasksMode,
|
|
238
504
|
tasksFile: options.tasksFile || null,
|
|
239
505
|
promptFile: options.promptFile || null,
|
|
@@ -245,6 +511,7 @@ async function run(opts) {
|
|
|
245
511
|
completedAt: null,
|
|
246
512
|
stoppedAt: null,
|
|
247
513
|
exitReason: null,
|
|
514
|
+
pendingDirtyPaths,
|
|
248
515
|
});
|
|
249
516
|
stateInitialized = true;
|
|
250
517
|
|
|
@@ -269,6 +536,20 @@ async function run(opts) {
|
|
|
269
536
|
: [];
|
|
270
537
|
const currentTask = _getCurrentTaskDescription(tasksBefore);
|
|
271
538
|
const currentTaskMeta = _getCurrentTaskMeta(tasksBefore);
|
|
539
|
+
pendingDirtyPaths = _refreshPendingDirtyPaths(pendingDirtyPaths);
|
|
540
|
+
state.update(ralphDir, { pendingDirtyPaths });
|
|
541
|
+
|
|
542
|
+
if (
|
|
543
|
+
pendingDirtyPaths &&
|
|
544
|
+
!_samePendingTask(pendingDirtyPaths, currentTaskMeta, currentTask)
|
|
545
|
+
) {
|
|
546
|
+
reporter.note(
|
|
547
|
+
_formatPendingDirtyPathsBlock(pendingDirtyPaths, currentTaskMeta, currentTask),
|
|
548
|
+
'error'
|
|
549
|
+
);
|
|
550
|
+
exitReason = 'pending_dirty_paths';
|
|
551
|
+
break;
|
|
552
|
+
}
|
|
272
553
|
|
|
273
554
|
reporter.iterationStarted({
|
|
274
555
|
iteration: iterationCount,
|
|
@@ -279,6 +560,7 @@ async function run(opts) {
|
|
|
279
560
|
let result;
|
|
280
561
|
let promptSize = null;
|
|
281
562
|
let responseSize = { bytes: 0, chars: 0, tokens: 0 };
|
|
563
|
+
let baselineGateConflict = null;
|
|
282
564
|
|
|
283
565
|
try {
|
|
284
566
|
// Build the prompt for this iteration
|
|
@@ -294,8 +576,27 @@ async function run(opts) {
|
|
|
294
576
|
// dedup collapses identical entries into a single "same failure as
|
|
295
577
|
// iteration N" line, so the 3-entry window is sufficient to surface
|
|
296
578
|
// recurring patterns without bloating the prompt.
|
|
579
|
+
const recentHistory = history.recent(ralphDir, 3);
|
|
580
|
+
const fullHistory = history.read(ralphDir);
|
|
297
581
|
const errorEntries = errors.readEntries(ralphDir, 3);
|
|
298
|
-
const
|
|
582
|
+
const blockerArtifacts = _detectBlockerArtifacts(ralphDir, {
|
|
583
|
+
repoRoot: process.cwd(),
|
|
584
|
+
includeStaleHandoff:
|
|
585
|
+
priorRunWasBlockedHandoff ||
|
|
586
|
+
recentHistory.some((entry) => entry && entry.blockedHandoffDetected),
|
|
587
|
+
});
|
|
588
|
+
const iterationFeedback = _buildIterationFeedback(
|
|
589
|
+
recentHistory,
|
|
590
|
+
errorEntries,
|
|
591
|
+
blockerArtifacts,
|
|
592
|
+
);
|
|
593
|
+
baselineGateConflict = _analyzeBaselineGateConflict(
|
|
594
|
+
ralphDir,
|
|
595
|
+
options.tasksFile,
|
|
596
|
+
currentTaskMeta,
|
|
597
|
+
fullHistory,
|
|
598
|
+
);
|
|
599
|
+
const baselineGateFeedback = _formatBaselineGateFeedback(baselineGateConflict);
|
|
299
600
|
|
|
300
601
|
// Inject any pending context
|
|
301
602
|
const pendingContext = context.consume(ralphDir);
|
|
@@ -303,6 +604,10 @@ async function run(opts) {
|
|
|
303
604
|
const lessonsSection = lessons.inject(ralphDir, { limit: 15 });
|
|
304
605
|
const promptSections = [renderedPrompt];
|
|
305
606
|
|
|
607
|
+
if (baselineGateFeedback) {
|
|
608
|
+
promptSections.push(`## Baseline Gate Conflict\n\n${baselineGateFeedback}`);
|
|
609
|
+
}
|
|
610
|
+
|
|
306
611
|
if (iterationFeedback) {
|
|
307
612
|
promptSections.push(`## Recent Loop Signals\n\n${iterationFeedback}`);
|
|
308
613
|
}
|
|
@@ -392,6 +697,14 @@ async function run(opts) {
|
|
|
392
697
|
const iterationSucceeded = _wasSuccessfulIteration(result);
|
|
393
698
|
const hasCompletion = iterationSucceeded && _containsPromise(outputText, completionPromise);
|
|
394
699
|
const hasTask = iterationSucceeded && _containsPromise(outputText, taskPromise);
|
|
700
|
+
// Blocked-handoff is also a successful-iteration signal (the agent
|
|
701
|
+
// followed protocol and explicitly emitted a structured exit). We
|
|
702
|
+
// treat it as a third top-level outcome alongside completion/task.
|
|
703
|
+
const hasBlockedHandoff = iterationSucceeded
|
|
704
|
+
&& _containsPromise(outputText, blockedHandoffPromise);
|
|
705
|
+
const blockerNote = hasBlockedHandoff
|
|
706
|
+
? _extractBlockerNote(outputText, blockedHandoffPromise)
|
|
707
|
+
: '';
|
|
395
708
|
const tasksAfter = options.tasksMode && options.tasksFile
|
|
396
709
|
? tasks.parseTasks(options.tasksFile)
|
|
397
710
|
: [];
|
|
@@ -419,13 +732,42 @@ async function run(opts) {
|
|
|
419
732
|
result.filesChanged.length > 0 &&
|
|
420
733
|
(hasCompletion || (options.tasksMode && hasTask))
|
|
421
734
|
) {
|
|
735
|
+
const filesToStage = _buildAutoCommitAllowlist(
|
|
736
|
+
_mergePathLists(result.filesChanged, pendingDirtyPaths ? pendingDirtyPaths.files : []),
|
|
737
|
+
completedTasks,
|
|
738
|
+
options.tasksFile
|
|
739
|
+
);
|
|
422
740
|
commitResult = _autoCommit(iterationCount, {
|
|
423
741
|
completedTasks,
|
|
424
|
-
filesToStage
|
|
742
|
+
filesToStage,
|
|
425
743
|
tasksFile: options.tasksFile,
|
|
426
744
|
verbose: options.verbose,
|
|
427
745
|
reporter,
|
|
428
746
|
});
|
|
747
|
+
if (commitResult.committed && pendingDirtyPaths) {
|
|
748
|
+
pendingDirtyPaths = _remainingPendingDirtyPathsAfterCommit(
|
|
749
|
+
pendingDirtyPaths,
|
|
750
|
+
commitResult.anomaly
|
|
751
|
+
);
|
|
752
|
+
state.update(ralphDir, { pendingDirtyPaths });
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
if (
|
|
757
|
+
!commitResult.committed &&
|
|
758
|
+
Array.isArray(result.filesChanged) &&
|
|
759
|
+
result.filesChanged.length > 0 &&
|
|
760
|
+
(_isFailedIteration(result) || hasBlockedHandoff)
|
|
761
|
+
) {
|
|
762
|
+
pendingDirtyPaths = _recordPendingDirtyPaths(pendingDirtyPaths, {
|
|
763
|
+
iteration: iterationCount,
|
|
764
|
+
reason: hasBlockedHandoff ? 'blocked_handoff' : 'failed_iteration',
|
|
765
|
+
task: currentTask,
|
|
766
|
+
taskNumber: currentTaskMeta.number,
|
|
767
|
+
taskDescription: currentTaskMeta.description,
|
|
768
|
+
files: result.filesChanged,
|
|
769
|
+
});
|
|
770
|
+
state.update(ralphDir, { pendingDirtyPaths });
|
|
429
771
|
}
|
|
430
772
|
|
|
431
773
|
// Record iteration in history after commit handling so operator-visible
|
|
@@ -435,6 +777,10 @@ async function run(opts) {
|
|
|
435
777
|
duration,
|
|
436
778
|
completionDetected: hasCompletion,
|
|
437
779
|
taskDetected: hasTask,
|
|
780
|
+
blockedHandoffDetected: hasBlockedHandoff,
|
|
781
|
+
...(blockerNote ? { blockedHandoffNote: _summarizeBlockerNote(blockerNote) } : {}),
|
|
782
|
+
taskNumber: currentTaskMeta.number,
|
|
783
|
+
taskDescription: currentTaskMeta.description,
|
|
438
784
|
toolUsage: result.toolUsage || [],
|
|
439
785
|
filesChanged: result.filesChanged || [],
|
|
440
786
|
exitCode: result.exitCode,
|
|
@@ -446,6 +792,16 @@ async function run(opts) {
|
|
|
446
792
|
commitAnomaly: commitResult.anomaly ? commitResult.anomaly.message : '',
|
|
447
793
|
commitAnomalyType: commitResult.anomaly ? commitResult.anomaly.type : '',
|
|
448
794
|
protectedArtifacts: commitResult.anomaly ? commitResult.anomaly.protectedArtifacts || [] : [],
|
|
795
|
+
...(baselineGateConflict
|
|
796
|
+
? {
|
|
797
|
+
baselineGateConflictMode: baselineGateConflict.mode,
|
|
798
|
+
baselineGateRepairAllowedFiles: baselineGateConflict.allowedFiles || [],
|
|
799
|
+
baselineGateRepairAttempted: _baselineGateRepairAttempted(
|
|
800
|
+
baselineGateConflict,
|
|
801
|
+
result.filesChanged || []
|
|
802
|
+
),
|
|
803
|
+
}
|
|
804
|
+
: {}),
|
|
449
805
|
...(commitResult.anomaly && commitResult.anomaly.ignoredPaths && commitResult.anomaly.ignoredPaths.length > 0
|
|
450
806
|
? { ignoredPaths: commitResult.anomaly.ignoredPaths }
|
|
451
807
|
: {}),
|
|
@@ -472,6 +828,7 @@ async function run(opts) {
|
|
|
472
828
|
iterationFailed,
|
|
473
829
|
hasCompletion,
|
|
474
830
|
hasTask,
|
|
831
|
+
hasBlockedHandoff,
|
|
475
832
|
completedTasksCount: completedTasks.length,
|
|
476
833
|
filesChangedCount: Array.isArray(result.filesChanged) ? result.filesChanged.length : 0,
|
|
477
834
|
});
|
|
@@ -487,12 +844,15 @@ async function run(opts) {
|
|
|
487
844
|
durationMs: duration,
|
|
488
845
|
outcome: iterationFailed
|
|
489
846
|
? 'failure'
|
|
490
|
-
:
|
|
491
|
-
? '
|
|
492
|
-
:
|
|
847
|
+
: hasBlockedHandoff
|
|
848
|
+
? 'blocked'
|
|
849
|
+
: stalledThisIteration
|
|
850
|
+
? 'stalled'
|
|
851
|
+
: 'success',
|
|
493
852
|
committed: commitResult.committed === true,
|
|
494
853
|
hasCompletion,
|
|
495
854
|
hasTask,
|
|
855
|
+
hasBlockedHandoff,
|
|
496
856
|
completedTasksCount: completedTasks.length,
|
|
497
857
|
filesChangedCount: Array.isArray(result.filesChanged) ? result.filesChanged.length : 0,
|
|
498
858
|
stallStreak,
|
|
@@ -508,6 +868,44 @@ async function run(opts) {
|
|
|
508
868
|
break;
|
|
509
869
|
}
|
|
510
870
|
|
|
871
|
+
// Blocked-handoff exits the loop *immediately* (no minIterations
|
|
872
|
+
// floor). The agent has signaled an external decision is required;
|
|
873
|
+
// we want the operator unblocked as fast as possible. We persist the
|
|
874
|
+
// agent's note before breaking so it survives even a hard-kill on
|
|
875
|
+
// the parent process (e.g. the operator hits Ctrl-C right after).
|
|
876
|
+
if (hasBlockedHandoff) {
|
|
877
|
+
let handoffPath = '';
|
|
878
|
+
try {
|
|
879
|
+
handoffPath = _writeHandoff(ralphDir, {
|
|
880
|
+
iteration: iterationCount,
|
|
881
|
+
task: currentTask,
|
|
882
|
+
note: blockerNote,
|
|
883
|
+
completionPromise,
|
|
884
|
+
taskPromise,
|
|
885
|
+
});
|
|
886
|
+
} catch (writeErr) {
|
|
887
|
+
// Don't let a HANDOFF.md write failure mask the original signal —
|
|
888
|
+
// we still want to exit cleanly with `blocked_handoff`. Surface
|
|
889
|
+
// the write error to stderr so it's diagnosable.
|
|
890
|
+
process.stderr.write(
|
|
891
|
+
`[mini-ralph] warning: failed to write HANDOFF.md: ${writeErr.message}\n`
|
|
892
|
+
);
|
|
893
|
+
}
|
|
894
|
+
reporter.note(
|
|
895
|
+
handoffPath
|
|
896
|
+
? `agent emitted ${blockedHandoffPromise}; blocker note saved to ${handoffPath}.`
|
|
897
|
+
: `agent emitted ${blockedHandoffPromise}; halting (HANDOFF.md write failed; see stderr).`,
|
|
898
|
+
'warn'
|
|
899
|
+
);
|
|
900
|
+
if (options.verbose) {
|
|
901
|
+
process.stderr.write(
|
|
902
|
+
`[mini-ralph] ${blockedHandoffPromise} detected at iteration ${iterationCount}; halting.\n`
|
|
903
|
+
);
|
|
904
|
+
}
|
|
905
|
+
exitReason = 'blocked_handoff';
|
|
906
|
+
break;
|
|
907
|
+
}
|
|
908
|
+
|
|
511
909
|
if (stallThreshold > 0 && stallStreak >= stallThreshold) {
|
|
512
910
|
reporter.note(
|
|
513
911
|
`stall detector: ${stallStreak} consecutive no-op iteration(s); halting.`,
|
|
@@ -587,6 +985,145 @@ function _containsPromise(text, promiseName) {
|
|
|
587
985
|
.some((line) => line.trim() === expectedTag);
|
|
588
986
|
}
|
|
589
987
|
|
|
988
|
+
function _normalizePendingDirtyPaths(pending) {
|
|
989
|
+
if (!pending || typeof pending !== 'object') return null;
|
|
990
|
+
const files = _mergePathLists(pending.files || pending.paths || []);
|
|
991
|
+
if (files.length === 0) return null;
|
|
992
|
+
|
|
993
|
+
return {
|
|
994
|
+
iteration: typeof pending.iteration === 'number' ? pending.iteration : null,
|
|
995
|
+
reason: pending.reason || 'blocked_handoff',
|
|
996
|
+
task: pending.task || '',
|
|
997
|
+
taskNumber: pending.taskNumber || '',
|
|
998
|
+
taskDescription: pending.taskDescription || '',
|
|
999
|
+
files,
|
|
1000
|
+
recordedAt: pending.recordedAt || new Date().toISOString(),
|
|
1001
|
+
};
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
function _recordPendingDirtyPaths(existing, update) {
|
|
1005
|
+
const normalized = _normalizePendingDirtyPaths({
|
|
1006
|
+
iteration: update && typeof update.iteration === 'number' ? update.iteration : null,
|
|
1007
|
+
reason: update && update.reason ? update.reason : 'blocked_handoff',
|
|
1008
|
+
task: update && update.task ? update.task : '',
|
|
1009
|
+
taskNumber: update && update.taskNumber ? update.taskNumber : '',
|
|
1010
|
+
taskDescription: update && update.taskDescription ? update.taskDescription : '',
|
|
1011
|
+
files: _mergePathLists(
|
|
1012
|
+
existing && existing.files ? existing.files : [],
|
|
1013
|
+
update && update.files ? update.files : []
|
|
1014
|
+
),
|
|
1015
|
+
recordedAt: update && update.recordedAt ? update.recordedAt : new Date().toISOString(),
|
|
1016
|
+
});
|
|
1017
|
+
|
|
1018
|
+
return normalized;
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
function _remainingPendingDirtyPathsAfterCommit(pending, anomaly) {
|
|
1022
|
+
const normalized = _normalizePendingDirtyPaths(pending);
|
|
1023
|
+
if (!normalized) return null;
|
|
1024
|
+
|
|
1025
|
+
const ignoredPaths = anomaly && Array.isArray(anomaly.ignoredPaths)
|
|
1026
|
+
? anomaly.ignoredPaths.map(_repoRelativePath).filter(Boolean)
|
|
1027
|
+
: [];
|
|
1028
|
+
if (ignoredPaths.length === 0) return null;
|
|
1029
|
+
|
|
1030
|
+
const ignoredSet = new Set(ignoredPaths);
|
|
1031
|
+
const files = normalized.files.filter((file) => ignoredSet.has(file));
|
|
1032
|
+
if (files.length === 0) return null;
|
|
1033
|
+
return Object.assign({}, normalized, { files });
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
function _refreshPendingDirtyPaths(pending) {
|
|
1037
|
+
const normalized = _normalizePendingDirtyPaths(pending);
|
|
1038
|
+
if (!normalized) return null;
|
|
1039
|
+
|
|
1040
|
+
const dirtyPaths = _currentDirtyPathSet();
|
|
1041
|
+
if (!dirtyPaths) return normalized;
|
|
1042
|
+
const files = normalized.files.filter((file) => dirtyPaths.has(file));
|
|
1043
|
+
if (files.length === 0) return null;
|
|
1044
|
+
|
|
1045
|
+
return Object.assign({}, normalized, { files });
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
function _samePendingTask(pending, currentTaskMeta, currentTask) {
|
|
1049
|
+
if (!pending) return true;
|
|
1050
|
+
const currentNumber = currentTaskMeta && currentTaskMeta.number ? currentTaskMeta.number : '';
|
|
1051
|
+
const currentDescription = currentTaskMeta && currentTaskMeta.description ? currentTaskMeta.description : '';
|
|
1052
|
+
const currentFull = currentTask || '';
|
|
1053
|
+
|
|
1054
|
+
if (pending.taskNumber && currentNumber) {
|
|
1055
|
+
return pending.taskNumber === currentNumber;
|
|
1056
|
+
}
|
|
1057
|
+
|
|
1058
|
+
if (pending.taskDescription && currentDescription) {
|
|
1059
|
+
return pending.taskDescription === currentDescription;
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
return Boolean(pending.task && currentFull && pending.task === currentFull);
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
function _formatPendingDirtyPathsBlock(pending, currentTaskMeta, currentTask) {
|
|
1066
|
+
const currentStamp = currentTaskMeta && currentTaskMeta.number
|
|
1067
|
+
? `${currentTaskMeta.number} ${currentTaskMeta.description || ''}`.trim()
|
|
1068
|
+
: (currentTask || 'the current task');
|
|
1069
|
+
const pendingStamp = pending.taskNumber
|
|
1070
|
+
? `${pending.taskNumber} ${pending.taskDescription || ''}`.trim()
|
|
1071
|
+
: (pending.task || 'a prior blocked handoff');
|
|
1072
|
+
const files = (pending.files || []).slice(0, 8);
|
|
1073
|
+
const extra = (pending.files || []).length - files.length;
|
|
1074
|
+
const fileLines = files.map((file) => ` - ${file}`).join('\n');
|
|
1075
|
+
const suffix = extra > 0 ? `\n - (+${extra} more)` : '';
|
|
1076
|
+
|
|
1077
|
+
return [
|
|
1078
|
+
`pending dirty paths from ${pending.reason || 'blocked_handoff'} iteration ${pending.iteration || 'unknown'} remain unresolved.`,
|
|
1079
|
+
`Prior task: ${pendingStamp}`,
|
|
1080
|
+
`Current task: ${currentStamp}`,
|
|
1081
|
+
'Resolve the prior patch before Ralph can safely continue: commit it with the same task, revert it, or move it to a separate change.',
|
|
1082
|
+
'Pending paths:',
|
|
1083
|
+
`${fileLines}${suffix}`,
|
|
1084
|
+
].join('\n');
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
function _currentDirtyPathSet() {
|
|
1088
|
+
try {
|
|
1089
|
+
const output = childProcess.execFileSync('git', ['status', '--porcelain'], {
|
|
1090
|
+
encoding: 'utf8',
|
|
1091
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
1092
|
+
});
|
|
1093
|
+
const paths = new Set();
|
|
1094
|
+
for (const line of output.split('\n')) {
|
|
1095
|
+
for (const file of _parseGitStatusPaths(line)) {
|
|
1096
|
+
if (file) paths.add(file);
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
return paths;
|
|
1100
|
+
} catch (_) {
|
|
1101
|
+
return null;
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
|
|
1105
|
+
function _parseGitStatusPaths(line) {
|
|
1106
|
+
if (!line || typeof line !== 'string') return [];
|
|
1107
|
+
const rawPath = line.slice(3).trim();
|
|
1108
|
+
if (!rawPath) return [];
|
|
1109
|
+
if (rawPath.includes(' -> ')) {
|
|
1110
|
+
return rawPath.split(' -> ').map(_stripGitStatusQuotes).filter(Boolean);
|
|
1111
|
+
}
|
|
1112
|
+
return [_stripGitStatusQuotes(rawPath)].filter(Boolean);
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
function _stripGitStatusQuotes(value) {
|
|
1116
|
+
if (!value) return '';
|
|
1117
|
+
const trimmed = value.trim();
|
|
1118
|
+
if (!(trimmed.startsWith('"') && trimmed.endsWith('"'))) {
|
|
1119
|
+
return trimmed;
|
|
1120
|
+
}
|
|
1121
|
+
return trimmed
|
|
1122
|
+
.slice(1, -1)
|
|
1123
|
+
.replace(/\\"/g, '"')
|
|
1124
|
+
.replace(/\\\\/g, '\\');
|
|
1125
|
+
}
|
|
1126
|
+
|
|
590
1127
|
/**
|
|
591
1128
|
* Validate required options and throw descriptive errors.
|
|
592
1129
|
*
|
|
@@ -835,6 +1372,19 @@ function _filterGitignored(paths, cwd) {
|
|
|
835
1372
|
}
|
|
836
1373
|
}
|
|
837
1374
|
|
|
1375
|
+
function _mergePathLists(...lists) {
|
|
1376
|
+
const merged = new Set();
|
|
1377
|
+
for (const list of lists) {
|
|
1378
|
+
for (const file of list || []) {
|
|
1379
|
+
const relativeFile = _repoRelativePath(file);
|
|
1380
|
+
if (relativeFile) {
|
|
1381
|
+
merged.add(relativeFile);
|
|
1382
|
+
}
|
|
1383
|
+
}
|
|
1384
|
+
}
|
|
1385
|
+
return Array.from(merged);
|
|
1386
|
+
}
|
|
1387
|
+
|
|
838
1388
|
/**
|
|
839
1389
|
* Build the explicit per-iteration git staging allowlist.
|
|
840
1390
|
*
|
|
@@ -976,16 +1526,19 @@ function _failureFingerprint(entry, errorEntries) {
|
|
|
976
1526
|
stderrHead = _firstNonEmptyLine(match && match.stderr, 120);
|
|
977
1527
|
}
|
|
978
1528
|
// A "no promise emitted" iteration is also a distinguishable failure mode
|
|
979
|
-
// even when exitCode===0 and there's no stderr (e.g. the agent
|
|
980
|
-
//
|
|
981
|
-
//
|
|
982
|
-
|
|
983
|
-
|
|
1529
|
+
// even when exitCode===0 and there's no stderr (e.g. the agent refuses to
|
|
1530
|
+
// continue without using the control protocol). Encoding it separately keeps
|
|
1531
|
+
// no-progress stalls distinct from explicit BLOCKED_HANDOFF stops.
|
|
1532
|
+
const noPromise =
|
|
1533
|
+
!entry.completionDetected &&
|
|
1534
|
+
!entry.taskDetected &&
|
|
1535
|
+
!entry.blockedHandoffDetected;
|
|
984
1536
|
return JSON.stringify({
|
|
985
1537
|
failureStage: entry.failureStage || '',
|
|
986
1538
|
exitCode: entry.exitCode,
|
|
987
1539
|
stderrHead,
|
|
988
1540
|
noPromise,
|
|
1541
|
+
blockedHandoff: Boolean(entry.blockedHandoffDetected),
|
|
989
1542
|
commitAnomalyType: entry.commitAnomalyType || '',
|
|
990
1543
|
});
|
|
991
1544
|
}
|
|
@@ -998,6 +1551,7 @@ function _isEmptyFingerprint(fingerprint) {
|
|
|
998
1551
|
obj.exitCode === 0 &&
|
|
999
1552
|
!obj.stderrHead &&
|
|
1000
1553
|
!obj.noPromise &&
|
|
1554
|
+
!obj.blockedHandoff &&
|
|
1001
1555
|
!obj.commitAnomalyType
|
|
1002
1556
|
);
|
|
1003
1557
|
} catch {
|
|
@@ -1005,14 +1559,23 @@ function _isEmptyFingerprint(fingerprint) {
|
|
|
1005
1559
|
}
|
|
1006
1560
|
}
|
|
1007
1561
|
|
|
1008
|
-
function _buildIterationFeedback(recentHistory, errorEntries) {
|
|
1009
|
-
|
|
1562
|
+
function _buildIterationFeedback(recentHistory, errorEntries, blockerArtifacts) {
|
|
1563
|
+
const hasArtifacts = Array.isArray(blockerArtifacts) && blockerArtifacts.length > 0;
|
|
1564
|
+
if ((!Array.isArray(recentHistory) || recentHistory.length === 0) && !hasArtifacts) {
|
|
1010
1565
|
return '';
|
|
1011
1566
|
}
|
|
1567
|
+
if (!Array.isArray(recentHistory)) recentHistory = [];
|
|
1012
1568
|
|
|
1013
1569
|
const problemLines = [];
|
|
1014
1570
|
// Track fingerprint -> first iteration number for dedup
|
|
1015
1571
|
const fingerprintSeen = new Map();
|
|
1572
|
+
// Track which task each *problematic* iteration was working when it failed
|
|
1573
|
+
// / produced no progress. The same `taskNumber|taskDescription` repeating
|
|
1574
|
+
// across the recent window is the strongest livelock signal we have — the
|
|
1575
|
+
// agent is hitting the same wall with no new information. Persist the run
|
|
1576
|
+
// length so we can emit a HARD prefix above the per-iteration list when
|
|
1577
|
+
// the streak crosses the noise floor (3+ consecutive on the same task).
|
|
1578
|
+
const recentTasks = [];
|
|
1016
1579
|
|
|
1017
1580
|
for (const entry of recentHistory) {
|
|
1018
1581
|
const issues = [];
|
|
@@ -1029,11 +1592,28 @@ function _buildIterationFeedback(recentHistory, errorEntries) {
|
|
|
1029
1592
|
issues.push(`commit anomaly: ${entry.commitAnomaly}`);
|
|
1030
1593
|
}
|
|
1031
1594
|
|
|
1032
|
-
if (
|
|
1595
|
+
if (entry.blockedHandoffDetected) {
|
|
1596
|
+
issues.push('agent emitted BLOCKED_HANDOFF and requested operator handoff');
|
|
1597
|
+
} else if (!entry.completionDetected && !entry.taskDetected) {
|
|
1033
1598
|
issues.push('no loop promise emitted');
|
|
1034
1599
|
}
|
|
1035
1600
|
|
|
1036
1601
|
if (issues.length > 0) {
|
|
1602
|
+
// Build the task-identity stamp (used both for the per-line prefix and
|
|
1603
|
+
// for streak detection). Empty when the runner had no task context for
|
|
1604
|
+
// the iteration (non-tasks-mode, or pre-resume entries written by an
|
|
1605
|
+
// older runner version).
|
|
1606
|
+
const rawTaskId = entry.taskNumber
|
|
1607
|
+
? `${entry.taskNumber}|${entry.taskDescription || ''}`
|
|
1608
|
+
: (entry.taskDescription || '');
|
|
1609
|
+
const taskStamp = entry.taskNumber
|
|
1610
|
+
? `Task ${entry.taskNumber}` +
|
|
1611
|
+
(entry.taskDescription ? ` (${entry.taskDescription})` : '')
|
|
1612
|
+
: (entry.taskDescription
|
|
1613
|
+
? `Task ${entry.taskDescription}`
|
|
1614
|
+
: '');
|
|
1615
|
+
if (rawTaskId) recentTasks.push(rawTaskId);
|
|
1616
|
+
|
|
1037
1617
|
// Compute fingerprint for dedup
|
|
1038
1618
|
const fp = _failureFingerprint(entry, errorEntries);
|
|
1039
1619
|
const isRealFailure = !_isEmptyFingerprint(fp);
|
|
@@ -1047,13 +1627,19 @@ function _buildIterationFeedback(recentHistory, errorEntries) {
|
|
|
1047
1627
|
|
|
1048
1628
|
if (isRealFailure && fingerprintSeen.has(fp) && !isIgnoreFilterAnomaly) {
|
|
1049
1629
|
const firstIteration = fingerprintSeen.get(fp);
|
|
1630
|
+
const stampSuffix = taskStamp ? ` [${taskStamp}]` : '';
|
|
1050
1631
|
problemLines.push(
|
|
1051
|
-
`- Iteration ${entry.iteration}: same failure as iteration ${firstIteration} (see above).`
|
|
1632
|
+
`- Iteration ${entry.iteration}${stampSuffix}: same failure as iteration ${firstIteration} (see above).`
|
|
1052
1633
|
);
|
|
1053
1634
|
} else {
|
|
1054
1635
|
if (isRealFailure && !isIgnoreFilterAnomaly) fingerprintSeen.set(fp, entry.iteration);
|
|
1055
1636
|
|
|
1056
|
-
|
|
1637
|
+
const stampPrefix = taskStamp ? ` [${taskStamp}]` : '';
|
|
1638
|
+
let line = `- Iteration ${entry.iteration}${stampPrefix}: ${issues.join('; ')}.`;
|
|
1639
|
+
|
|
1640
|
+
if (entry.blockedHandoffDetected && entry.blockedHandoffNote) {
|
|
1641
|
+
line += ` Blocker note: ${entry.blockedHandoffNote}`;
|
|
1642
|
+
}
|
|
1057
1643
|
|
|
1058
1644
|
// For paths_ignored_filtered / all_paths_ignored, append the first two
|
|
1059
1645
|
// ignored paths inline (with a (+N more) suffix) so the agent can see
|
|
@@ -1116,16 +1702,472 @@ function _buildIterationFeedback(recentHistory, errorEntries) {
|
|
|
1116
1702
|
}
|
|
1117
1703
|
}
|
|
1118
1704
|
|
|
1119
|
-
if (problemLines.length === 0) {
|
|
1705
|
+
if (problemLines.length === 0 && !hasArtifacts) {
|
|
1706
|
+
return '';
|
|
1707
|
+
}
|
|
1708
|
+
|
|
1709
|
+
// Detect the longest *trailing* run of the same task identity in the
|
|
1710
|
+
// problematic-iteration window. Trailing because the only thing that
|
|
1711
|
+
// matters is "is the most recent stretch still the same task?" — a stale
|
|
1712
|
+
// streak from earlier in the window is irrelevant once the task changed.
|
|
1713
|
+
let sameTaskStreak = 0;
|
|
1714
|
+
let stuckTaskId = '';
|
|
1715
|
+
if (recentTasks.length > 0) {
|
|
1716
|
+
const last = recentTasks[recentTasks.length - 1];
|
|
1717
|
+
if (last) {
|
|
1718
|
+
stuckTaskId = last;
|
|
1719
|
+
for (let i = recentTasks.length - 1; i >= 0; i--) {
|
|
1720
|
+
if (recentTasks[i] === last) {
|
|
1721
|
+
sameTaskStreak++;
|
|
1722
|
+
} else {
|
|
1723
|
+
break;
|
|
1724
|
+
}
|
|
1725
|
+
}
|
|
1726
|
+
}
|
|
1727
|
+
}
|
|
1728
|
+
|
|
1729
|
+
const sections = [];
|
|
1730
|
+
// The 3-iteration threshold matches the default `stallThreshold` so the
|
|
1731
|
+
// hard-prefix and the eventual stall halt are aligned: the agent sees the
|
|
1732
|
+
// warning one iteration before the stall detector fires, giving it a final
|
|
1733
|
+
// chance to hand off cleanly via BLOCKED_HANDOFF rather than livelock.
|
|
1734
|
+
if (sameTaskStreak >= 3 && stuckTaskId) {
|
|
1735
|
+
const display = stuckTaskId.includes('|')
|
|
1736
|
+
? stuckTaskId.replace('|', ' — ')
|
|
1737
|
+
: stuckTaskId;
|
|
1738
|
+
sections.push(
|
|
1739
|
+
[
|
|
1740
|
+
'⚠ STUCK ON SAME TASK',
|
|
1741
|
+
`You have failed to make progress on the same task ${sameTaskStreak} iterations in a row: ${display}.`,
|
|
1742
|
+
'Stop retrying the same approach. Re-read the task spec, then either:',
|
|
1743
|
+
' 1. Pick a materially different approach (different files, different invariant).',
|
|
1744
|
+
' 2. If the task spec authorizes it (e.g. a "Stop and hand off if:" clause fired), emit <promise>BLOCKED_HANDOFF</promise> with a structured Blocker Note and stop. The runner will save it to .ralph/HANDOFF.md.',
|
|
1745
|
+
'',
|
|
1746
|
+
].join('\n')
|
|
1747
|
+
);
|
|
1748
|
+
}
|
|
1749
|
+
|
|
1750
|
+
if (problemLines.length > 0) {
|
|
1751
|
+
sections.push(
|
|
1752
|
+
[
|
|
1753
|
+
'Use these signals to avoid repeating the same failed approach:',
|
|
1754
|
+
...problemLines,
|
|
1755
|
+
].join('\n')
|
|
1756
|
+
);
|
|
1757
|
+
}
|
|
1758
|
+
|
|
1759
|
+
if (hasArtifacts) {
|
|
1760
|
+
const artifactBlocks = blockerArtifacts.map((art) => {
|
|
1761
|
+
const header = `### ${art.path}${art.truncated ? ' (truncated)' : ''}`;
|
|
1762
|
+
// Code-fence the body so MDX-y artifacts (` ` `, `<promise>`) don't
|
|
1763
|
+
// collide with the surrounding prompt markdown.
|
|
1764
|
+
return [
|
|
1765
|
+
header,
|
|
1766
|
+
'```',
|
|
1767
|
+
art.content,
|
|
1768
|
+
'```',
|
|
1769
|
+
].join('\n');
|
|
1770
|
+
});
|
|
1771
|
+
|
|
1772
|
+
sections.push(
|
|
1773
|
+
[
|
|
1774
|
+
'Prior-iteration blocker artifacts (read these BEFORE re-deriving the same diagnosis):',
|
|
1775
|
+
...artifactBlocks,
|
|
1776
|
+
].join('\n\n')
|
|
1777
|
+
);
|
|
1778
|
+
}
|
|
1779
|
+
|
|
1780
|
+
return sections.join('\n');
|
|
1781
|
+
}
|
|
1782
|
+
|
|
1783
|
+
function _buildBaselineGateFeedback(ralphDir, tasksFile, currentTaskMeta, recentHistory) {
|
|
1784
|
+
return _formatBaselineGateFeedback(
|
|
1785
|
+
_analyzeBaselineGateConflict(ralphDir, tasksFile, currentTaskMeta, recentHistory)
|
|
1786
|
+
);
|
|
1787
|
+
}
|
|
1788
|
+
|
|
1789
|
+
function _analyzeBaselineGateConflict(ralphDir, tasksFile, currentTaskMeta, recentHistory) {
|
|
1790
|
+
if (!ralphDir || !tasksFile || !currentTaskMeta || !currentTaskMeta.description) {
|
|
1791
|
+
return null;
|
|
1792
|
+
}
|
|
1793
|
+
|
|
1794
|
+
const taskBlock = _extractCurrentTaskBlock(tasksFile, currentTaskMeta);
|
|
1795
|
+
if (!taskBlock) return null;
|
|
1796
|
+
|
|
1797
|
+
const strictGates = _detectStrictCleanGates(taskBlock);
|
|
1798
|
+
if (strictGates.length === 0) return null;
|
|
1799
|
+
|
|
1800
|
+
const recordedBaselines = _detectRecordedBaselineGates(ralphDir);
|
|
1801
|
+
const missingBaselines = _detectMissingBaselineGates(
|
|
1802
|
+
strictGates,
|
|
1803
|
+
recordedBaselines,
|
|
1804
|
+
taskBlock,
|
|
1805
|
+
tasksFile
|
|
1806
|
+
);
|
|
1807
|
+
|
|
1808
|
+
if (missingBaselines.length > 0) {
|
|
1809
|
+
return {
|
|
1810
|
+
mode: 'missing_baseline',
|
|
1811
|
+
conflicts: [],
|
|
1812
|
+
missingBaselines,
|
|
1813
|
+
allowedFiles: [],
|
|
1814
|
+
budgetUsed: false,
|
|
1815
|
+
};
|
|
1816
|
+
}
|
|
1817
|
+
|
|
1818
|
+
const failingBaselines = recordedBaselines.filter((gate) => gate.exitCode !== 0);
|
|
1819
|
+
if (failingBaselines.length === 0) return null;
|
|
1820
|
+
|
|
1821
|
+
const baselineByGate = new Map(failingBaselines.map((gate) => [gate.name, gate]));
|
|
1822
|
+
const conflicts = strictGates
|
|
1823
|
+
.map((gate) => ({ gate, baseline: baselineByGate.get(gate.name) }))
|
|
1824
|
+
.filter((item) => item.baseline);
|
|
1825
|
+
|
|
1826
|
+
if (conflicts.length === 0) return null;
|
|
1827
|
+
|
|
1828
|
+
const cleanup = _detectAuthorizedBaselineCleanup(taskBlock);
|
|
1829
|
+
if (cleanup.allowedFiles.length > 0) {
|
|
1830
|
+
return {
|
|
1831
|
+
mode: 'authorized_cleanup',
|
|
1832
|
+
conflicts,
|
|
1833
|
+
allowedFiles: cleanup.allowedFiles,
|
|
1834
|
+
budgetUsed: _baselineGateRepairBudgetUsed(recentHistory, currentTaskMeta, cleanup.allowedFiles),
|
|
1835
|
+
};
|
|
1836
|
+
}
|
|
1837
|
+
|
|
1838
|
+
if (_taskExplicitlyHandlesBaselineFailures(taskBlock)) {
|
|
1839
|
+
return {
|
|
1840
|
+
mode: 'baseline_classification',
|
|
1841
|
+
conflicts,
|
|
1842
|
+
allowedFiles: [],
|
|
1843
|
+
budgetUsed: false,
|
|
1844
|
+
};
|
|
1845
|
+
}
|
|
1846
|
+
|
|
1847
|
+
return {
|
|
1848
|
+
mode: 'missing_policy',
|
|
1849
|
+
conflicts,
|
|
1850
|
+
allowedFiles: [],
|
|
1851
|
+
budgetUsed: false,
|
|
1852
|
+
};
|
|
1853
|
+
}
|
|
1854
|
+
|
|
1855
|
+
function _formatBaselineGateFeedback(conflict) {
|
|
1856
|
+
const conflicts = Array.isArray(conflict && conflict.conflicts) ? conflict.conflicts : [];
|
|
1857
|
+
const missingBaselines = Array.isArray(conflict && conflict.missingBaselines)
|
|
1858
|
+
? conflict.missingBaselines
|
|
1859
|
+
: [];
|
|
1860
|
+
|
|
1861
|
+
if (!conflict || (conflicts.length === 0 && missingBaselines.length === 0)) {
|
|
1120
1862
|
return '';
|
|
1121
1863
|
}
|
|
1122
1864
|
|
|
1865
|
+
const conflictLines = conflicts.map(({ gate, baseline }) =>
|
|
1866
|
+
`- ${gate.command}: baseline ${baseline.file} exits ${baseline.exitCode}.`
|
|
1867
|
+
);
|
|
1868
|
+
const missingLines = missingBaselines.map((gate) =>
|
|
1869
|
+
`- ${gate.command}: no matching baseline artifact found under .ralph/baselines.`
|
|
1870
|
+
);
|
|
1871
|
+
|
|
1872
|
+
if (conflict.mode === 'missing_baseline') {
|
|
1873
|
+
return [
|
|
1874
|
+
'The current task uses a strict clean quality gate and the task plan indicates a pre-flight baseline should exist, but the matching baseline artifact is missing.',
|
|
1875
|
+
'Do not classify failures as pre-existing or spend an implementation iteration trying to satisfy an impossible task contract.',
|
|
1876
|
+
'emit BLOCKED_HANDOFF and ask the operator to rerun or restore the pre-flight baseline artifact, or update the task spec to authorize a different gate policy.',
|
|
1877
|
+
'',
|
|
1878
|
+
...missingLines,
|
|
1879
|
+
].join('\n');
|
|
1880
|
+
}
|
|
1881
|
+
|
|
1882
|
+
if (conflict.mode === 'authorized_cleanup') {
|
|
1883
|
+
if (conflict.budgetUsed) {
|
|
1884
|
+
return [
|
|
1885
|
+
'The current task explicitly authorized cleanup for baseline gate failures, but its one repair attempt has already been used.',
|
|
1886
|
+
'Do not keep iterating on cleanup or broaden the edit scope.',
|
|
1887
|
+
'If the gate is still failing, emit BLOCKED_HANDOFF with the remaining failing identifiers and ask for either a broader cleanup task or a task-spec change.',
|
|
1888
|
+
'',
|
|
1889
|
+
`Authorized cleanup files: ${conflict.allowedFiles.join(', ')}`,
|
|
1890
|
+
...conflictLines,
|
|
1891
|
+
].join('\n');
|
|
1892
|
+
}
|
|
1893
|
+
|
|
1894
|
+
return [
|
|
1895
|
+
'The current task explicitly authorizes cleanup for baseline gate failures in named files.',
|
|
1896
|
+
'You have exactly one repair attempt for this task. Limit edits to compiler/lint-only fixes in the authorized files; do not change behavior or edit other files for this cleanup.',
|
|
1897
|
+
'If this attempt does not clear the gate, emit BLOCKED_HANDOFF instead of continuing to retry.',
|
|
1898
|
+
'',
|
|
1899
|
+
`Authorized cleanup files: ${conflict.allowedFiles.join(', ')}`,
|
|
1900
|
+
...conflictLines,
|
|
1901
|
+
].join('\n');
|
|
1902
|
+
}
|
|
1903
|
+
|
|
1904
|
+
if (conflict.mode === 'baseline_classification') {
|
|
1905
|
+
return [
|
|
1906
|
+
'The current task has strict quality-gate checks, and matching pre-flight baselines are already failing.',
|
|
1907
|
+
'The task text appears to authorize baseline classification, so do not repair unrelated baseline failures unless the task explicitly names those files.',
|
|
1908
|
+
'Complete the task only if the current run has no new failures beyond the named baseline failures.',
|
|
1909
|
+
'',
|
|
1910
|
+
...conflictLines,
|
|
1911
|
+
].join('\n');
|
|
1912
|
+
}
|
|
1913
|
+
|
|
1123
1914
|
return [
|
|
1124
|
-
'
|
|
1125
|
-
|
|
1915
|
+
'The current task requires a clean gate that already has a failing pre-flight baseline, but the task text does not say whether baseline-matching failures may be classified.',
|
|
1916
|
+
'Do not spend iterations repairing unrelated files outside the current task scope.',
|
|
1917
|
+
'If the only remaining gate failures match the baseline, emit BLOCKED_HANDOFF with a task-spec correction request: either allow baseline classification for this gate, or explicitly authorize the named out-of-scope repair.',
|
|
1918
|
+
'',
|
|
1919
|
+
...conflictLines,
|
|
1126
1920
|
].join('\n');
|
|
1127
1921
|
}
|
|
1128
1922
|
|
|
1923
|
+
function _extractCurrentTaskBlock(tasksFile, currentTaskMeta) {
|
|
1924
|
+
const fs = require('fs');
|
|
1925
|
+
if (!tasksFile || !fs.existsSync(tasksFile)) return '';
|
|
1926
|
+
|
|
1927
|
+
const lines = fs.readFileSync(tasksFile, 'utf8').split(/\r?\n/);
|
|
1928
|
+
const taskHeader = /^-\s+\[[ x/]\]\s+(.+)$/;
|
|
1929
|
+
const targetNumber = currentTaskMeta.number || '';
|
|
1930
|
+
const targetDescription = (currentTaskMeta.description || '').trim();
|
|
1931
|
+
let start = -1;
|
|
1932
|
+
|
|
1933
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1934
|
+
const match = lines[i].match(taskHeader);
|
|
1935
|
+
if (!match) continue;
|
|
1936
|
+
|
|
1937
|
+
const fullDescription = match[1].trim();
|
|
1938
|
+
const numMatch = fullDescription.match(/^(\d+\.\d+)\s+(.+)$/);
|
|
1939
|
+
const number = numMatch ? numMatch[1] : '';
|
|
1940
|
+
const description = (numMatch ? numMatch[2] : fullDescription).trim();
|
|
1941
|
+
|
|
1942
|
+
if (
|
|
1943
|
+
(targetNumber && number === targetNumber) ||
|
|
1944
|
+
(!targetNumber && description === targetDescription) ||
|
|
1945
|
+
(targetNumber && description === targetDescription)
|
|
1946
|
+
) {
|
|
1947
|
+
start = i;
|
|
1948
|
+
break;
|
|
1949
|
+
}
|
|
1950
|
+
}
|
|
1951
|
+
|
|
1952
|
+
if (start === -1) return '';
|
|
1953
|
+
|
|
1954
|
+
let end = lines.length;
|
|
1955
|
+
for (let i = start + 1; i < lines.length; i++) {
|
|
1956
|
+
if (taskHeader.test(lines[i])) {
|
|
1957
|
+
end = i;
|
|
1958
|
+
break;
|
|
1959
|
+
}
|
|
1960
|
+
}
|
|
1961
|
+
|
|
1962
|
+
return lines.slice(start, end).join('\n');
|
|
1963
|
+
}
|
|
1964
|
+
|
|
1965
|
+
function _detectStrictCleanGates(taskBlock) {
|
|
1966
|
+
if (!taskBlock) return [];
|
|
1967
|
+
|
|
1968
|
+
const gates = [
|
|
1969
|
+
{
|
|
1970
|
+
name: 'typecheck',
|
|
1971
|
+
command: 'pnpm typecheck',
|
|
1972
|
+
pattern: /`?pnpm\s+typecheck`?[^\n]*(?:exits?|returns?)\s+0/i,
|
|
1973
|
+
},
|
|
1974
|
+
{
|
|
1975
|
+
name: 'lint',
|
|
1976
|
+
command: 'pnpm lint',
|
|
1977
|
+
pattern: /`?pnpm\s+lint`?[^\n]*(?:exits?|returns?)\s+0/i,
|
|
1978
|
+
},
|
|
1979
|
+
{
|
|
1980
|
+
name: 'test',
|
|
1981
|
+
command: 'pnpm test',
|
|
1982
|
+
pattern: /`?pnpm\s+test`?[^\n]*(?:exits?|returns?)\s+0/i,
|
|
1983
|
+
},
|
|
1984
|
+
];
|
|
1985
|
+
|
|
1986
|
+
return gates.filter((gate) => gate.pattern.test(taskBlock));
|
|
1987
|
+
}
|
|
1988
|
+
|
|
1989
|
+
function _detectFailingBaselineGates(ralphDir) {
|
|
1990
|
+
return _detectRecordedBaselineGates(ralphDir).filter((gate) => gate.exitCode !== 0);
|
|
1991
|
+
}
|
|
1992
|
+
|
|
1993
|
+
function _detectRecordedBaselineGates(ralphDir) {
|
|
1994
|
+
const fs = require('fs');
|
|
1995
|
+
const fsPath = require('path');
|
|
1996
|
+
const baselinesDir = fsPath.join(ralphDir, 'baselines');
|
|
1997
|
+
if (!fs.existsSync(baselinesDir) || !fs.statSync(baselinesDir).isDirectory()) {
|
|
1998
|
+
return [];
|
|
1999
|
+
}
|
|
2000
|
+
|
|
2001
|
+
const gates = [];
|
|
2002
|
+
for (const name of fs.readdirSync(baselinesDir)) {
|
|
2003
|
+
if (!/\.txt$/i.test(name)) continue;
|
|
2004
|
+
|
|
2005
|
+
const gateName = _gateNameFromBaselineFile(name);
|
|
2006
|
+
if (!gateName) continue;
|
|
2007
|
+
|
|
2008
|
+
const file = fsPath.join(baselinesDir, name);
|
|
2009
|
+
const tail = _readFileTail(file, 16384);
|
|
2010
|
+
const exitMatch = tail.match(/(?:^|\n)EXIT=(\d+)(?:\n|$)/);
|
|
2011
|
+
if (!exitMatch) continue;
|
|
2012
|
+
|
|
2013
|
+
const exitCode = Number(exitMatch[1]);
|
|
2014
|
+
if (!Number.isInteger(exitCode)) continue;
|
|
2015
|
+
|
|
2016
|
+
gates.push({ name: gateName, file: fsPath.join('baselines', name), exitCode });
|
|
2017
|
+
}
|
|
2018
|
+
|
|
2019
|
+
const priority = { typecheck: 1, lint: 2, test: 3 };
|
|
2020
|
+
return gates.sort((a, b) =>
|
|
2021
|
+
(priority[a.name] || 99) - (priority[b.name] || 99) ||
|
|
2022
|
+
a.file.localeCompare(b.file)
|
|
2023
|
+
);
|
|
2024
|
+
}
|
|
2025
|
+
|
|
2026
|
+
function _detectMissingBaselineGates(strictGates, recordedBaselines, taskBlock, tasksFile) {
|
|
2027
|
+
if (!Array.isArray(strictGates) || strictGates.length === 0) return [];
|
|
2028
|
+
|
|
2029
|
+
const expectsBaseline =
|
|
2030
|
+
_taskExplicitlyHandlesBaselineFailures(taskBlock) ||
|
|
2031
|
+
_completedPreflightBaselineExists(tasksFile);
|
|
2032
|
+
|
|
2033
|
+
if (!expectsBaseline) return [];
|
|
2034
|
+
|
|
2035
|
+
const recordedNames = new Set((recordedBaselines || []).map((gate) => gate.name));
|
|
2036
|
+
return strictGates.filter((gate) => !recordedNames.has(gate.name));
|
|
2037
|
+
}
|
|
2038
|
+
|
|
2039
|
+
function _completedPreflightBaselineExists(tasksFile) {
|
|
2040
|
+
const fs = require('fs');
|
|
2041
|
+
if (!tasksFile || !fs.existsSync(tasksFile)) return false;
|
|
2042
|
+
|
|
2043
|
+
const lines = fs.readFileSync(tasksFile, 'utf8').split(/\r?\n/);
|
|
2044
|
+
return lines.some((line) =>
|
|
2045
|
+
/^-\s+\[x\]\s+.*\bpre-?flight\b.*\bbaselines?\b/i.test(line)
|
|
2046
|
+
);
|
|
2047
|
+
}
|
|
2048
|
+
|
|
2049
|
+
function _gateNameFromBaselineFile(fileName) {
|
|
2050
|
+
const normalized = fileName.toLowerCase();
|
|
2051
|
+
if (/(^|[-_.])typecheck([-_.]|\.|$)/.test(normalized)) return 'typecheck';
|
|
2052
|
+
if (/(^|[-_.])lint([-_.]|\.|$)/.test(normalized)) return 'lint';
|
|
2053
|
+
if (/(^|[-_.])test([-_.]|\.|$)/.test(normalized)) return 'test';
|
|
2054
|
+
return '';
|
|
2055
|
+
}
|
|
2056
|
+
|
|
2057
|
+
function _readFileTail(file, maxBytes) {
|
|
2058
|
+
const fs = require('fs');
|
|
2059
|
+
let fd = null;
|
|
2060
|
+
try {
|
|
2061
|
+
const stat = fs.statSync(file);
|
|
2062
|
+
const length = Math.min(stat.size, maxBytes);
|
|
2063
|
+
const offset = Math.max(0, stat.size - length);
|
|
2064
|
+
const buffer = Buffer.alloc(length);
|
|
2065
|
+
fd = fs.openSync(file, 'r');
|
|
2066
|
+
fs.readSync(fd, buffer, 0, length, offset);
|
|
2067
|
+
return buffer.toString('utf8');
|
|
2068
|
+
} catch {
|
|
2069
|
+
return '';
|
|
2070
|
+
} finally {
|
|
2071
|
+
if (fd !== null) {
|
|
2072
|
+
try {
|
|
2073
|
+
fs.closeSync(fd);
|
|
2074
|
+
} catch {
|
|
2075
|
+
// Ignore close failures while building best-effort feedback.
|
|
2076
|
+
}
|
|
2077
|
+
}
|
|
2078
|
+
}
|
|
2079
|
+
}
|
|
2080
|
+
|
|
2081
|
+
function _taskExplicitlyHandlesBaselineFailures(taskBlock) {
|
|
2082
|
+
return /\bbaseline\b/i.test(taskBlock) &&
|
|
2083
|
+
/\b(match|matches|matching|classif(?:y|ied|ication)|pre-existing|preexisting|no new failures?)\b/i.test(taskBlock);
|
|
2084
|
+
}
|
|
2085
|
+
|
|
2086
|
+
function _detectAuthorizedBaselineCleanup(taskBlock) {
|
|
2087
|
+
if (!taskBlock || !/\b(authori[sz]ed cleanup|after fixing|fixing the named baseline failures?)\b/i.test(taskBlock)) {
|
|
2088
|
+
return { allowedFiles: [] };
|
|
2089
|
+
}
|
|
2090
|
+
|
|
2091
|
+
const allowedFiles = [];
|
|
2092
|
+
const seen = new Set();
|
|
2093
|
+
const backtickPattern = /`([^`]+)`/g;
|
|
2094
|
+
let match;
|
|
2095
|
+
|
|
2096
|
+
while ((match = backtickPattern.exec(taskBlock)) !== null) {
|
|
2097
|
+
const candidate = match[1].trim();
|
|
2098
|
+
if (!_looksLikeCleanupPath(candidate)) continue;
|
|
2099
|
+
|
|
2100
|
+
const normalized = candidate.replace(/\\/g, '/');
|
|
2101
|
+
if (seen.has(normalized)) continue;
|
|
2102
|
+
|
|
2103
|
+
seen.add(normalized);
|
|
2104
|
+
allowedFiles.push(normalized);
|
|
2105
|
+
}
|
|
2106
|
+
|
|
2107
|
+
return { allowedFiles };
|
|
2108
|
+
}
|
|
2109
|
+
|
|
2110
|
+
function _looksLikeCleanupPath(value) {
|
|
2111
|
+
if (!value || /\s/.test(value)) return false;
|
|
2112
|
+
if (/^(pnpm|npm|yarn|node|gtimeout|timeout|rg|git)(\s|$)/i.test(value)) return false;
|
|
2113
|
+
if (/^--?/.test(value)) return false;
|
|
2114
|
+
if (/[*{}]/.test(value)) return false;
|
|
2115
|
+
return value.includes('/') || /\.[A-Za-z0-9]+$/.test(value);
|
|
2116
|
+
}
|
|
2117
|
+
|
|
2118
|
+
function _baselineGateRepairBudgetUsed(recentHistory, currentTaskMeta, allowedFiles) {
|
|
2119
|
+
if (!Array.isArray(recentHistory) || recentHistory.length === 0) return false;
|
|
2120
|
+
|
|
2121
|
+
return recentHistory.some((entry) => {
|
|
2122
|
+
if (!_historyEntryMatchesTask(entry, currentTaskMeta)) return false;
|
|
2123
|
+
if (entry.baselineGateRepairAttempted === true) return true;
|
|
2124
|
+
|
|
2125
|
+
return _baselineGateRepairAttempted(
|
|
2126
|
+
{ mode: 'authorized_cleanup', allowedFiles },
|
|
2127
|
+
entry.filesChanged || []
|
|
2128
|
+
);
|
|
2129
|
+
});
|
|
2130
|
+
}
|
|
2131
|
+
|
|
2132
|
+
function _baselineGateRepairAttempted(conflict, filesChanged) {
|
|
2133
|
+
if (
|
|
2134
|
+
!conflict ||
|
|
2135
|
+
conflict.mode !== 'authorized_cleanup' ||
|
|
2136
|
+
!Array.isArray(conflict.allowedFiles) ||
|
|
2137
|
+
conflict.allowedFiles.length === 0 ||
|
|
2138
|
+
!Array.isArray(filesChanged) ||
|
|
2139
|
+
filesChanged.length === 0
|
|
2140
|
+
) {
|
|
2141
|
+
return false;
|
|
2142
|
+
}
|
|
2143
|
+
|
|
2144
|
+
return _pathsIntersect(conflict.allowedFiles, filesChanged);
|
|
2145
|
+
}
|
|
2146
|
+
|
|
2147
|
+
function _historyEntryMatchesTask(entry, currentTaskMeta) {
|
|
2148
|
+
if (!entry || !currentTaskMeta) return false;
|
|
2149
|
+
|
|
2150
|
+
const currentNumber = currentTaskMeta.number || '';
|
|
2151
|
+
const currentDescription = currentTaskMeta.description || '';
|
|
2152
|
+
|
|
2153
|
+
if (currentNumber && entry.taskNumber === currentNumber) return true;
|
|
2154
|
+
if (!currentNumber && currentDescription && entry.taskDescription === currentDescription) return true;
|
|
2155
|
+
|
|
2156
|
+
return false;
|
|
2157
|
+
}
|
|
2158
|
+
|
|
2159
|
+
function _pathsIntersect(left, right) {
|
|
2160
|
+
const normalizedLeft = new Set((left || []).map(_normalizeComparablePath));
|
|
2161
|
+
return (right || []).some((pathValue) => normalizedLeft.has(_normalizeComparablePath(pathValue)));
|
|
2162
|
+
}
|
|
2163
|
+
|
|
2164
|
+
function _normalizeComparablePath(pathValue) {
|
|
2165
|
+
return String(pathValue || '')
|
|
2166
|
+
.replace(/\\/g, '/')
|
|
2167
|
+
.replace(/^\.\//, '')
|
|
2168
|
+
.replace(/\/+$/, '');
|
|
2169
|
+
}
|
|
2170
|
+
|
|
1129
2171
|
function _extractErrorForIteration(errorEntries, iteration) {
|
|
1130
2172
|
if (!Array.isArray(errorEntries) || errorEntries.length === 0) return null;
|
|
1131
2173
|
|
|
@@ -1337,12 +2379,30 @@ module.exports = {
|
|
|
1337
2379
|
_validateOptions,
|
|
1338
2380
|
_autoCommit,
|
|
1339
2381
|
_buildAutoCommitAllowlist,
|
|
2382
|
+
_mergePathLists,
|
|
2383
|
+
_normalizePendingDirtyPaths,
|
|
2384
|
+
_recordPendingDirtyPaths,
|
|
2385
|
+
_remainingPendingDirtyPathsAfterCommit,
|
|
2386
|
+
_refreshPendingDirtyPaths,
|
|
2387
|
+
_samePendingTask,
|
|
2388
|
+
_currentDirtyPathSet,
|
|
1340
2389
|
_filterGitignored,
|
|
1341
2390
|
_resolveStartIteration,
|
|
1342
2391
|
_completedTaskDelta,
|
|
1343
2392
|
_formatAutoCommitMessage,
|
|
1344
2393
|
_truncateSubjectSummary,
|
|
1345
2394
|
_buildIterationFeedback,
|
|
2395
|
+
_buildBaselineGateFeedback,
|
|
2396
|
+
_analyzeBaselineGateConflict,
|
|
2397
|
+
_formatBaselineGateFeedback,
|
|
2398
|
+
_extractCurrentTaskBlock,
|
|
2399
|
+
_detectStrictCleanGates,
|
|
2400
|
+
_detectFailingBaselineGates,
|
|
2401
|
+
_detectRecordedBaselineGates,
|
|
2402
|
+
_detectMissingBaselineGates,
|
|
2403
|
+
_detectAuthorizedBaselineCleanup,
|
|
2404
|
+
_baselineGateRepairBudgetUsed,
|
|
2405
|
+
_baselineGateRepairAttempted,
|
|
1346
2406
|
_extractErrorForIteration,
|
|
1347
2407
|
_getCurrentTaskDescription,
|
|
1348
2408
|
_getCurrentTaskMeta,
|
|
@@ -1358,4 +2418,7 @@ module.exports = {
|
|
|
1358
2418
|
_failureFingerprint,
|
|
1359
2419
|
_firstNonEmptyLine,
|
|
1360
2420
|
_iterationIsStalled,
|
|
2421
|
+
_extractBlockerNote,
|
|
2422
|
+
_writeHandoff,
|
|
2423
|
+
_detectBlockerArtifacts,
|
|
1361
2424
|
};
|