spec-and-loop 3.3.3 → 3.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,431 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * runner-baseline-gate.js — Strict-clean baseline-gate detection and
5
+ * feedback formatting, split out of runner.js.
6
+ *
7
+ * The baseline-gate engine inspects the current task block for `pnpm
8
+ * typecheck/lint/test exit 0` style strict gates, cross-references the
9
+ * `.ralph/baselines/*` artifacts written by pre-flight tasks, and produces
10
+ * an iteration-feedback paragraph that tells the agent which gate-handling
11
+ * mode the task is in:
12
+ *
13
+ * - `missing_baseline` — strict gate but no baseline → handoff
14
+ * - `authorized_cleanup` — task explicitly authorizes named-file repair
15
+ * - `baseline_classification` — task allows classifying baseline failures
16
+ * - `missing_policy` — task is silent → ask operator to clarify
17
+ *
18
+ * Everything in this module is a pure helper over filesystem snapshots and
19
+ * the recent-history window — no I/O outside `fs`, no internal runner state.
20
+ * Moved verbatim from runner.js so the existing baseline-gate unit tests in
21
+ * `tests/unit/javascript/mini-ralph-runner-baseline-gate.test.js` keep
22
+ * passing without modification.
23
+ */
24
+
25
+ const fs = require('fs');
26
+ const fsPath = require('path');
27
+
28
+ function _buildBaselineGateFeedback(ralphDir, tasksFile, currentTaskMeta, recentHistory) {
29
+ return _formatBaselineGateFeedback(
30
+ _analyzeBaselineGateConflict(ralphDir, tasksFile, currentTaskMeta, recentHistory)
31
+ );
32
+ }
33
+
34
+ function _analyzeBaselineGateConflict(ralphDir, tasksFile, currentTaskMeta, recentHistory) {
35
+ if (!ralphDir || !tasksFile || !currentTaskMeta || !currentTaskMeta.description) {
36
+ return null;
37
+ }
38
+
39
+ const taskBlock = _extractCurrentTaskBlock(tasksFile, currentTaskMeta);
40
+ if (!taskBlock) return null;
41
+
42
+ const strictGates = _detectStrictCleanGates(taskBlock);
43
+ if (strictGates.length === 0) return null;
44
+
45
+ const recordedBaselines = _detectRecordedBaselineGates(ralphDir);
46
+ const missingBaselines = _detectMissingBaselineGates(
47
+ strictGates,
48
+ recordedBaselines,
49
+ taskBlock,
50
+ tasksFile
51
+ );
52
+
53
+ if (missingBaselines.length > 0) {
54
+ return {
55
+ mode: 'missing_baseline',
56
+ conflicts: [],
57
+ missingBaselines,
58
+ allowedFiles: [],
59
+ budgetUsed: false,
60
+ };
61
+ }
62
+
63
+ const failingBaselines = recordedBaselines.filter((gate) => gate.exitCode !== 0);
64
+ if (failingBaselines.length === 0) return null;
65
+
66
+ const baselineByGate = new Map(failingBaselines.map((gate) => [gate.name, gate]));
67
+ const conflicts = strictGates
68
+ .map((gate) => ({ gate, baseline: baselineByGate.get(gate.name) }))
69
+ .filter((item) => item.baseline);
70
+
71
+ if (conflicts.length === 0) return null;
72
+
73
+ const cleanup = _detectAuthorizedBaselineCleanup(taskBlock);
74
+ if (cleanup.allowedFiles.length > 0) {
75
+ return {
76
+ mode: 'authorized_cleanup',
77
+ conflicts,
78
+ allowedFiles: cleanup.allowedFiles,
79
+ budgetUsed: _baselineGateRepairBudgetUsed(recentHistory, currentTaskMeta, cleanup.allowedFiles),
80
+ };
81
+ }
82
+
83
+ if (_taskExplicitlyHandlesBaselineFailures(taskBlock)) {
84
+ return {
85
+ mode: 'baseline_classification',
86
+ conflicts,
87
+ allowedFiles: [],
88
+ budgetUsed: false,
89
+ };
90
+ }
91
+
92
+ return {
93
+ mode: 'missing_policy',
94
+ conflicts,
95
+ allowedFiles: [],
96
+ budgetUsed: false,
97
+ };
98
+ }
99
+
100
+ function _formatBaselineGateFeedback(conflict) {
101
+ const conflicts = Array.isArray(conflict && conflict.conflicts) ? conflict.conflicts : [];
102
+ const missingBaselines = Array.isArray(conflict && conflict.missingBaselines)
103
+ ? conflict.missingBaselines
104
+ : [];
105
+
106
+ if (!conflict || (conflicts.length === 0 && missingBaselines.length === 0)) {
107
+ return '';
108
+ }
109
+
110
+ const conflictLines = conflicts.map(({ gate, baseline }) =>
111
+ `- ${gate.command}: baseline ${baseline.file} exits ${baseline.exitCode}.`
112
+ );
113
+ const missingLines = missingBaselines.map((gate) =>
114
+ `- ${gate.command}: no matching baseline artifact found under .ralph/baselines.`
115
+ );
116
+
117
+ if (conflict.mode === 'missing_baseline') {
118
+ return [
119
+ 'The current task uses a strict clean quality gate and the task plan indicates a pre-flight baseline should exist, but the matching baseline artifact is missing.',
120
+ 'Do not classify failures as pre-existing or spend an implementation iteration trying to satisfy an impossible task contract.',
121
+ 'emit BLOCKED_HANDOFF and ask the operator to rerun or restore the pre-flight baseline artifact, or update the task spec to authorize a different gate policy.',
122
+ '',
123
+ ...missingLines,
124
+ ].join('\n');
125
+ }
126
+
127
+ if (conflict.mode === 'authorized_cleanup') {
128
+ if (conflict.budgetUsed === true) {
129
+ return [
130
+ 'The current task explicitly authorized cleanup for baseline gate failures, but its one repair attempt has already been used.',
131
+ 'Do not keep iterating on cleanup or broaden the edit scope.',
132
+ 'If the gate is still failing, emit BLOCKED_HANDOFF with the remaining failing identifiers and ask for either a broader cleanup task or a task-spec change.',
133
+ '',
134
+ `Authorized cleanup files: ${conflict.allowedFiles.join(', ')}`,
135
+ ...conflictLines,
136
+ ].join('\n');
137
+ }
138
+
139
+ return [
140
+ 'The current task explicitly authorizes cleanup for baseline gate failures in named files.',
141
+ 'You have exactly one repair attempt for this task. Limit edits to compiler/lint-only fixes in the authorized files; do not change behavior or edit other files for this cleanup.',
142
+ 'If this attempt does not clear the gate, emit BLOCKED_HANDOFF instead of continuing to retry.',
143
+ '',
144
+ `Authorized cleanup files: ${conflict.allowedFiles.join(', ')}`,
145
+ ...conflictLines,
146
+ ].join('\n');
147
+ }
148
+
149
+ if (conflict.mode === 'baseline_classification') {
150
+ return [
151
+ 'The current task has strict quality-gate checks, and matching pre-flight baselines are already failing.',
152
+ 'The task text appears to authorize baseline classification, so do not repair unrelated baseline failures unless the task explicitly names those files.',
153
+ 'Complete the task only if the current run has no new failures beyond the named baseline failures.',
154
+ '',
155
+ ...conflictLines,
156
+ ].join('\n');
157
+ }
158
+
159
+ return [
160
+ 'The current task requires a clean gate that already has a failing pre-flight baseline, but the task text does not say whether baseline-matching failures may be classified.',
161
+ 'Do not spend iterations repairing unrelated files outside the current task scope.',
162
+ 'If the only remaining gate failures match the baseline, emit BLOCKED_HANDOFF with a task-spec correction request: either allow baseline classification for this gate, or explicitly authorize the named out-of-scope repair.',
163
+ '',
164
+ ...conflictLines,
165
+ ].join('\n');
166
+ }
167
+
168
+ function _extractCurrentTaskBlock(tasksFile, currentTaskMeta) {
169
+ if (!tasksFile || !fs.existsSync(tasksFile)) return '';
170
+
171
+ const lines = fs.readFileSync(tasksFile, 'utf8').split(/\r?\n/);
172
+ const taskHeader = /^-\s+\[[ x/]\]\s+(.+)$/;
173
+ const targetNumber = currentTaskMeta.number || '';
174
+ const targetDescription = (currentTaskMeta.description || '').trim();
175
+ let start = -1;
176
+
177
+ for (let i = 0; i < lines.length; i++) {
178
+ const match = lines[i].match(taskHeader);
179
+ if (!match) continue;
180
+
181
+ const fullDescription = match[1].trim();
182
+ const numMatch = fullDescription.match(/^(\d+\.\d+)\s+(.+)$/);
183
+ const number = numMatch ? numMatch[1] : '';
184
+ const description = (numMatch ? numMatch[2] : fullDescription).trim();
185
+
186
+ if (
187
+ (targetNumber && number === targetNumber) ||
188
+ (!targetNumber && description === targetDescription) ||
189
+ (targetNumber && description === targetDescription)
190
+ ) {
191
+ start = i;
192
+ break;
193
+ }
194
+ }
195
+
196
+ if (start === -1) return '';
197
+
198
+ let end = lines.length;
199
+ for (let i = start + 1; i < lines.length; i++) {
200
+ if (taskHeader.test(lines[i])) {
201
+ end = i;
202
+ break;
203
+ }
204
+ }
205
+
206
+ return lines.slice(start, end).join('\n');
207
+ }
208
+
209
+ function _detectStrictCleanGates(taskBlock) {
210
+ if (!taskBlock) return [];
211
+
212
+ const gates = [
213
+ {
214
+ name: 'typecheck',
215
+ command: 'pnpm typecheck',
216
+ pattern: /`?pnpm\s+typecheck`?[^\n]*(?:exits?|returns?)\s+0/i,
217
+ },
218
+ {
219
+ name: 'lint',
220
+ command: 'pnpm lint',
221
+ pattern: /`?pnpm\s+lint`?[^\n]*(?:exits?|returns?)\s+0/i,
222
+ },
223
+ {
224
+ name: 'test',
225
+ command: 'pnpm test',
226
+ pattern: /`?pnpm\s+test`?[^\n]*(?:exits?|returns?)\s+0/i,
227
+ },
228
+ ];
229
+
230
+ return gates.filter((gate) => gate.pattern.test(taskBlock));
231
+ }
232
+
233
+ function _detectFailingBaselineGates(ralphDir) {
234
+ return _detectRecordedBaselineGates(ralphDir).filter((gate) => gate.exitCode !== 0);
235
+ }
236
+
237
+ function _detectRecordedBaselineGates(ralphDir) {
238
+ const baselinesDir = fsPath.join(ralphDir, 'baselines');
239
+ if (!fs.existsSync(baselinesDir) || !fs.statSync(baselinesDir).isDirectory()) {
240
+ return [];
241
+ }
242
+
243
+ const gates = [];
244
+ for (const name of fs.readdirSync(baselinesDir)) {
245
+ if (!/\.txt$/i.test(name)) continue;
246
+
247
+ const gateName = _gateNameFromBaselineFile(name);
248
+ if (!gateName) continue;
249
+
250
+ const file = fsPath.join(baselinesDir, name);
251
+ const tail = _readFileTail(file, 16384);
252
+ const exitMatch = tail.match(/(?:^|\n)EXIT=(\d+)(?:\n|$)/);
253
+ if (!exitMatch) continue;
254
+
255
+ const exitCode = Number(exitMatch[1]);
256
+ if (!Number.isInteger(exitCode)) continue;
257
+
258
+ gates.push({ name: gateName, file: fsPath.join('baselines', name), exitCode });
259
+ }
260
+
261
+ const priority = { typecheck: 1, lint: 2, test: 3 };
262
+ return gates.sort((a, b) =>
263
+ (priority[a.name] || 99) - (priority[b.name] || 99) ||
264
+ a.file.localeCompare(b.file)
265
+ );
266
+ }
267
+
268
+ function _detectMissingBaselineGates(strictGates, recordedBaselines, taskBlock, tasksFile) {
269
+ if (!Array.isArray(strictGates) || strictGates.length === 0) return [];
270
+
271
+ const expectsBaseline =
272
+ _taskExplicitlyHandlesBaselineFailures(taskBlock) ||
273
+ _completedPreflightBaselineExists(tasksFile);
274
+
275
+ if (!expectsBaseline) return [];
276
+
277
+ const recordedNames = new Set((recordedBaselines || []).map((gate) => gate.name));
278
+ return strictGates.filter((gate) => !recordedNames.has(gate.name));
279
+ }
280
+
281
+ function _completedPreflightBaselineExists(tasksFile) {
282
+ if (!tasksFile || !fs.existsSync(tasksFile)) return false;
283
+
284
+ const lines = fs.readFileSync(tasksFile, 'utf8').split(/\r?\n/);
285
+ return lines.some((line) =>
286
+ /^-\s+\[x\]\s+.*\bpre-?flight\b.*\bbaselines?\b/i.test(line)
287
+ );
288
+ }
289
+
290
+ function _gateNameFromBaselineFile(fileName) {
291
+ const normalized = fileName.toLowerCase();
292
+ if (/(^|[-_.])typecheck([-_.]|\.|$)/.test(normalized)) return 'typecheck';
293
+ if (/(^|[-_.])lint([-_.]|\.|$)/.test(normalized)) return 'lint';
294
+ if (/(^|[-_.])test([-_.]|\.|$)/.test(normalized)) return 'test';
295
+ return '';
296
+ }
297
+
298
+ function _readFileTail(file, maxBytes) {
299
+ let fd = null;
300
+ try {
301
+ const stat = fs.statSync(file);
302
+ const length = Math.min(stat.size, maxBytes);
303
+ const offset = Math.max(0, stat.size - length);
304
+ const buffer = Buffer.alloc(length);
305
+ fd = fs.openSync(file, 'r');
306
+ fs.readSync(fd, buffer, 0, length, offset);
307
+ return buffer.toString('utf8');
308
+ } catch {
309
+ return '';
310
+ } finally {
311
+ if (fd !== null) {
312
+ try {
313
+ fs.closeSync(fd);
314
+ } catch {
315
+ // Ignore close failures while building best-effort feedback.
316
+ }
317
+ }
318
+ }
319
+ }
320
+
321
+ function _taskExplicitlyHandlesBaselineFailures(taskBlock) {
322
+ return /\bbaseline\b/i.test(taskBlock) &&
323
+ /\b(match|matches|matching|classif(?:y|ied|ication)|pre-existing|preexisting|no new failures?)\b/i.test(taskBlock);
324
+ }
325
+
326
+ function _detectAuthorizedBaselineCleanup(taskBlock) {
327
+ if (!taskBlock || !/\b(authori[sz]ed cleanup|after fixing|fixing the named baseline failures?)\b/i.test(taskBlock)) {
328
+ return { allowedFiles: [] };
329
+ }
330
+
331
+ const allowedFiles = [];
332
+ const seen = new Set();
333
+ const backtickPattern = /`([^`]+)`/g;
334
+ let match;
335
+
336
+ while ((match = backtickPattern.exec(taskBlock)) !== null) {
337
+ const candidate = match[1].trim();
338
+ if (!_looksLikeCleanupPath(candidate)) continue;
339
+
340
+ const normalized = candidate.replace(/\\/g, '/');
341
+ if (seen.has(normalized)) continue;
342
+
343
+ seen.add(normalized);
344
+ allowedFiles.push(normalized);
345
+ }
346
+
347
+ return { allowedFiles };
348
+ }
349
+
350
+ function _looksLikeCleanupPath(value) {
351
+ if (!value || /\s/.test(value)) return false;
352
+ if (/^(pnpm|npm|yarn|node|gtimeout|timeout|rg|git)(\s|$)/i.test(value)) return false;
353
+ if (/^--?/.test(value)) return false;
354
+ if (/[*{}]/.test(value)) return false;
355
+ return value.includes('/') || /\.[A-Za-z0-9]+$/.test(value);
356
+ }
357
+
358
+ function _baselineGateRepairBudgetUsed(recentHistory, currentTaskMeta, allowedFiles) {
359
+ if (!Array.isArray(recentHistory) || recentHistory.length === 0) return false;
360
+
361
+ return recentHistory.some((entry) => {
362
+ if (!_historyEntryMatchesTask(entry, currentTaskMeta)) return false;
363
+ if (entry.baselineGateRepairAttempted === true) return true;
364
+
365
+ return _baselineGateRepairAttempted(
366
+ { mode: 'authorized_cleanup', allowedFiles },
367
+ entry.filesChanged || []
368
+ );
369
+ });
370
+ }
371
+
372
+ function _baselineGateRepairAttempted(conflict, filesChanged) {
373
+ if (
374
+ !conflict ||
375
+ conflict.mode !== 'authorized_cleanup' ||
376
+ !Array.isArray(conflict.allowedFiles) ||
377
+ conflict.allowedFiles.length === 0 ||
378
+ !Array.isArray(filesChanged) ||
379
+ filesChanged.length === 0
380
+ ) {
381
+ return false;
382
+ }
383
+
384
+ return _pathsIntersect(conflict.allowedFiles, filesChanged);
385
+ }
386
+
387
+ function _historyEntryMatchesTask(entry, currentTaskMeta) {
388
+ if (!entry || !currentTaskMeta) return false;
389
+
390
+ const currentNumber = currentTaskMeta.number || '';
391
+ const currentDescription = currentTaskMeta.description || '';
392
+
393
+ if (currentNumber && entry.taskNumber === currentNumber) return true;
394
+ if (!currentNumber && currentDescription && entry.taskDescription === currentDescription) return true;
395
+
396
+ return false;
397
+ }
398
+
399
+ function _pathsIntersect(left, right) {
400
+ const normalizedLeft = new Set((left || []).map(_normalizeComparablePath));
401
+ return (right || []).some((pathValue) => normalizedLeft.has(_normalizeComparablePath(pathValue)));
402
+ }
403
+
404
+ function _normalizeComparablePath(pathValue) {
405
+ return String(pathValue || '')
406
+ .replace(/\\/g, '/')
407
+ .replace(/^\.\//, '')
408
+ .replace(/\/+$/, '');
409
+ }
410
+
411
+ module.exports = {
412
+ _buildBaselineGateFeedback,
413
+ _analyzeBaselineGateConflict,
414
+ _formatBaselineGateFeedback,
415
+ _extractCurrentTaskBlock,
416
+ _detectStrictCleanGates,
417
+ _detectFailingBaselineGates,
418
+ _detectRecordedBaselineGates,
419
+ _detectMissingBaselineGates,
420
+ _completedPreflightBaselineExists,
421
+ _gateNameFromBaselineFile,
422
+ _readFileTail,
423
+ _taskExplicitlyHandlesBaselineFailures,
424
+ _detectAuthorizedBaselineCleanup,
425
+ _looksLikeCleanupPath,
426
+ _baselineGateRepairBudgetUsed,
427
+ _baselineGateRepairAttempted,
428
+ _historyEntryMatchesTask,
429
+ _pathsIntersect,
430
+ _normalizeComparablePath,
431
+ };