nightytidy 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +314 -0
- package/bin/nightytidy.js +3 -0
- package/package.json +55 -0
- package/src/checks.js +367 -0
- package/src/claude.js +655 -0
- package/src/cli.js +1012 -0
- package/src/consolidation.js +81 -0
- package/src/dashboard-html.js +496 -0
- package/src/dashboard-standalone.js +167 -0
- package/src/dashboard-tui.js +208 -0
- package/src/dashboard.js +427 -0
- package/src/env.js +100 -0
- package/src/executor.js +550 -0
- package/src/git.js +348 -0
- package/src/lock.js +186 -0
- package/src/logger.js +111 -0
- package/src/notifications.js +33 -0
- package/src/orchestrator.js +919 -0
- package/src/prompts/loader.js +55 -0
- package/src/prompts/manifest.json +138 -0
- package/src/prompts/specials/changelog.md +28 -0
- package/src/prompts/specials/consolidation.md +61 -0
- package/src/prompts/specials/doc-update.md +1 -0
- package/src/prompts/specials/report.md +95 -0
- package/src/prompts/steps/01-documentation.md +173 -0
- package/src/prompts/steps/02-test-coverage.md +181 -0
- package/src/prompts/steps/03-test-hardening.md +181 -0
- package/src/prompts/steps/04-test-architecture.md +130 -0
- package/src/prompts/steps/05-test-consolidation.md +165 -0
- package/src/prompts/steps/06-test-quality.md +211 -0
- package/src/prompts/steps/07-api-design.md +165 -0
- package/src/prompts/steps/08-security-sweep.md +207 -0
- package/src/prompts/steps/09-dependency-health.md +217 -0
- package/src/prompts/steps/10-codebase-cleanup.md +189 -0
- package/src/prompts/steps/11-crosscutting-concerns.md +196 -0
- package/src/prompts/steps/12-file-decomposition.md +263 -0
- package/src/prompts/steps/13-code-elegance.md +329 -0
- package/src/prompts/steps/14-architectural-complexity.md +297 -0
- package/src/prompts/steps/15-type-safety.md +192 -0
- package/src/prompts/steps/16-logging-error-message.md +173 -0
- package/src/prompts/steps/17-data-integrity.md +139 -0
- package/src/prompts/steps/18-performance.md +183 -0
- package/src/prompts/steps/19-cost-resource-optimization.md +136 -0
- package/src/prompts/steps/20-error-recovery.md +145 -0
- package/src/prompts/steps/21-race-condition-audit.md +178 -0
- package/src/prompts/steps/22-bug-hunt.md +229 -0
- package/src/prompts/steps/23-frontend-quality.md +210 -0
- package/src/prompts/steps/24-uiux-audit.md +284 -0
- package/src/prompts/steps/25-state-management.md +170 -0
- package/src/prompts/steps/26-perceived-performance.md +190 -0
- package/src/prompts/steps/27-devops.md +165 -0
- package/src/prompts/steps/28-scheduled-job-chron-jobs.md +141 -0
- package/src/prompts/steps/29-observability.md +152 -0
- package/src/prompts/steps/30-backup-check.md +155 -0
- package/src/prompts/steps/31-product-polish-ux-friction.md +122 -0
- package/src/prompts/steps/32-feature-discovery-opportunity.md +128 -0
- package/src/prompts/steps/33-strategic-opportunities.md +217 -0
- package/src/report.js +540 -0
- package/src/setup.js +133 -0
- package/src/sync.js +536 -0
package/src/executor.js
ADDED
|
@@ -0,0 +1,550 @@
|
|
|
1
|
+
import { createHash } from 'crypto';
|
|
2
|
+
import { writeFileSync, mkdirSync, readdirSync, unlinkSync } from 'fs';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { runPrompt, ERROR_TYPE, sleep } from './claude.js';
|
|
5
|
+
import { getHeadHash, hasNewCommit, fallbackCommit } from './git.js';
|
|
6
|
+
import { STEPS, DOC_UPDATE_PROMPT } from './prompts/loader.js';
|
|
7
|
+
import { notify } from './notifications.js';
|
|
8
|
+
import { info, warn, error as logError } from './logger.js';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* @fileoverview Core step execution loop for NightyTidy.
|
|
12
|
+
*
|
|
13
|
+
* Executes improvement prompts sequentially, handles retries, rate-limit
|
|
14
|
+
* pause/resume, fast-completion detection, and doc updates.
|
|
15
|
+
*
|
|
16
|
+
* Error contract: This module NEVER throws. Failed steps are recorded in
|
|
17
|
+
* results. Rate-limit failures trigger pause/auto-resume with exponential backoff.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* @typedef {import('./claude.js').CostData} CostData
|
|
22
|
+
* @typedef {import('./claude.js').ErrorType} ErrorType
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* @typedef {Object} Step
|
|
27
|
+
* @property {number} number - Step number (1-based)
|
|
28
|
+
* @property {string} name - Human-readable step name
|
|
29
|
+
* @property {string} prompt - The improvement prompt text
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* @typedef {Object} StepResult
|
|
34
|
+
* @property {{number: number, name: string}} step - Step identifier
|
|
35
|
+
* @property {'completed' | 'failed' | 'skipped'} status - Step completion status
|
|
36
|
+
* @property {string} output - Claude's output text
|
|
37
|
+
* @property {number} duration - Step duration in milliseconds
|
|
38
|
+
* @property {number} attempts - Number of attempts made
|
|
39
|
+
* @property {string|null} error - Error message if failed
|
|
40
|
+
* @property {CostData|null} cost - Cost and token usage data
|
|
41
|
+
* @property {boolean} [suspiciousFast] - True if step was retried for fast completion
|
|
42
|
+
* @property {ErrorType} [errorType] - Error type if failed
|
|
43
|
+
* @property {number|null} [retryAfterMs] - Suggested retry delay for rate limits
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* @typedef {Object} ExecutionResults
|
|
48
|
+
* @property {StepResult[]} results - Array of step results
|
|
49
|
+
* @property {number} totalDuration - Total execution time in milliseconds
|
|
50
|
+
* @property {number} completedCount - Number of successfully completed steps
|
|
51
|
+
* @property {number} failedCount - Number of failed steps
|
|
52
|
+
* @property {number} skippedCount - Number of skipped steps (prompt not applicable)
|
|
53
|
+
*/
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* @typedef {Object} ExecuteStepsOptions
|
|
57
|
+
* @property {AbortSignal} [signal] - Abort signal for cancellation
|
|
58
|
+
* @property {number} [timeout] - Timeout per step in milliseconds
|
|
59
|
+
* @property {(step: Step, index: number, total: number) => void} [onStepStart]
|
|
60
|
+
* @property {(step: Step, index: number, total: number) => void} [onStepComplete]
|
|
61
|
+
* @property {(step: Step, index: number, total: number) => void} [onStepFail]
|
|
62
|
+
* @property {(step: Step, index: number, total: number) => void} [onStepSkip]
|
|
63
|
+
* @property {(chunk: string) => void} [onOutput] - Streaming output callback
|
|
64
|
+
* @property {(retryAfterMs: number|null) => void} [onRateLimitPause]
|
|
65
|
+
* @property {() => void} [onRateLimitResume]
|
|
66
|
+
*/
|
|
67
|
+
|
|
68
|
+
// SHA-256 of all STEPS[].prompt content — update when prompts change.
|
|
69
|
+
// Detects unexpected modification of prompt data before passing to
|
|
70
|
+
// Claude Code with --dangerously-skip-permissions.
|
|
71
|
+
const STEPS_HASH = 'c341ed4301dc1600d848da5457d319e7f1c5a51c215e1142d3889aa3684fd7cf';
|
|
72
|
+
|
|
73
|
+
// Hard cap on total step duration (all retries + doc-update combined).
|
|
74
|
+
// Without this, retries × phases can exceed the user's expected timeout.
|
|
75
|
+
// Must match claude.js DEFAULT_TIMEOUT — kept as a separate constant
|
|
76
|
+
// to avoid adding claude.js mock requirements to all test files.
|
|
77
|
+
const DEFAULT_STEP_TIMEOUT_MS = 45 * 60 * 1000; // 45 minutes
|
|
78
|
+
|
|
79
|
+
// A step completing under 2 minutes is suspicious — Claude likely bailed
|
|
80
|
+
// without doing real work. Triggers one automatic retry with context.
|
|
81
|
+
export const FAST_COMPLETION_THRESHOLD_MS = 120_000;
|
|
82
|
+
|
|
83
|
+
// Output under this length combined with fast completion indicates the prompt
|
|
84
|
+
// didn't match the codebase (e.g., UX audit on a static site). Skip instead of retry.
|
|
85
|
+
export const SKIP_OUTPUT_THRESHOLD_CHARS = 500;
|
|
86
|
+
|
|
87
|
+
const FAST_RETRY_PREFIX =
|
|
88
|
+
'IMPORTANT CONTEXT: You were asked to perform the task below previously, but you ' +
|
|
89
|
+
'completed it in under 2 minutes. For a codebase improvement step, this is too fast ' +
|
|
90
|
+
'and likely means you did not perform thorough work. This time, please:\n' +
|
|
91
|
+
'- Read and understand the relevant code before making changes\n' +
|
|
92
|
+
'- Make substantive, meaningful improvements\n' +
|
|
93
|
+
'- If truly no changes are needed, provide a detailed explanation of what you reviewed and why\n' +
|
|
94
|
+
'- Commit your changes when done\n\n' +
|
|
95
|
+
'Here is the original task:\n\n';
|
|
96
|
+
|
|
97
|
+
export const PROD_PREAMBLE =
|
|
98
|
+
'RECOVERY CONTEXT: Your previous attempt at this task was interrupted. ' +
|
|
99
|
+
'You are resuming in the same session. Before starting fresh:\n' +
|
|
100
|
+
'- Check what work was already done (look at recent git changes, modified files)\n' +
|
|
101
|
+
'- Continue from where you left off rather than starting over\n' +
|
|
102
|
+
'- If substantial work was already committed, focus on completing remaining items\n' +
|
|
103
|
+
'- If no meaningful work was done, proceed with the full task\n\n' +
|
|
104
|
+
'Here is the task:\n\n';
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Verify the integrity of step prompts against the stored hash.
|
|
108
|
+
* Warns but does not block if hash mismatches (user may have legitimate changes).
|
|
109
|
+
*
|
|
110
|
+
* @param {Step[]} steps - Array of step objects to verify
|
|
111
|
+
* @returns {boolean} True if hash matches, false if mismatch
|
|
112
|
+
*/
|
|
113
|
+
function verifyStepsIntegrity(steps) {
|
|
114
|
+
const content = steps.map(s => s.prompt).join('');
|
|
115
|
+
const hash = createHash('sha256').update(content).digest('hex');
|
|
116
|
+
if (hash !== STEPS_HASH) {
|
|
117
|
+
warn(
|
|
118
|
+
'Steps integrity check: prompt content hash mismatch. ' +
|
|
119
|
+
'If you regenerated prompts, update STEPS_HASH in executor.js. ' +
|
|
120
|
+
`Expected: ${STEPS_HASH.slice(0, 16)}... Got: ${hash.slice(0, 16)}...`
|
|
121
|
+
);
|
|
122
|
+
return false;
|
|
123
|
+
}
|
|
124
|
+
info('Steps integrity check passed');
|
|
125
|
+
return true;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Safety preamble prepended to every Claude subprocess prompt.
|
|
129
|
+
// Prevents destructive operations that conflict with NightyTidy's orchestration.
|
|
130
|
+
export const SAFETY_PREAMBLE =
|
|
131
|
+
'IMPORTANT CONSTRAINTS (from the NightyTidy orchestrator — always follow these):\n' +
|
|
132
|
+
'- Do NOT delete any existing files. Create new files or modify existing ones only.\n' +
|
|
133
|
+
'- Do NOT create, switch, or merge git branches. The orchestrator manages all branching.\n' +
|
|
134
|
+
'- Do NOT run destructive git commands (reset, clean, checkout, rm).\n' +
|
|
135
|
+
'- Commit your changes with a descriptive message when done.\n' +
|
|
136
|
+
'- IGNORE the `audit-reports/refactor-prompts/` directory — it contains internal artifacts from the NightyTidy analysis tool, not part of this project\'s codebase. Do not read, analyze, or reference any files in that subdirectory. Other files in `audit-reports/` (step reports) ARE part of your output.\n' +
|
|
137
|
+
'---\n\n';
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Sum two cost objects, handling null values gracefully.
|
|
141
|
+
*
|
|
142
|
+
* @param {CostData|null} a - First cost object
|
|
143
|
+
* @param {CostData|null} b - Second cost object
|
|
144
|
+
* @returns {CostData|null} Combined cost, or null if both inputs are null
|
|
145
|
+
*/
|
|
146
|
+
export function sumCosts(a, b) {
|
|
147
|
+
if (!a && !b) return null;
|
|
148
|
+
if (!a) return b;
|
|
149
|
+
if (!b) return a;
|
|
150
|
+
// Sum token counts. Use null only if BOTH inputs were null (no data),
|
|
151
|
+
// not if the sum happens to be zero (which is valid counted data).
|
|
152
|
+
const inputSum = (a.inputTokens ?? 0) + (b.inputTokens ?? 0);
|
|
153
|
+
const outputSum = (a.outputTokens ?? 0) + (b.outputTokens ?? 0);
|
|
154
|
+
const hasInputData = a.inputTokens != null || b.inputTokens != null;
|
|
155
|
+
const hasOutputData = a.outputTokens != null || b.outputTokens != null;
|
|
156
|
+
return {
|
|
157
|
+
costUSD: (a.costUSD || 0) + (b.costUSD || 0),
|
|
158
|
+
inputTokens: hasInputData ? inputSum : null,
|
|
159
|
+
outputTokens: hasOutputData ? outputSum : null,
|
|
160
|
+
numTurns: (a.numTurns || 0) + (b.numTurns || 0),
|
|
161
|
+
durationApiMs: (a.durationApiMs || 0) + (b.durationApiMs || 0),
|
|
162
|
+
sessionId: b.sessionId || a.sessionId,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Create a standardized step result object.
|
|
168
|
+
*
|
|
169
|
+
* @param {Step} step - The step that was executed
|
|
170
|
+
* @param {'completed' | 'failed' | 'skipped'} status - Completion status
|
|
171
|
+
* @param {import('./claude.js').RunPromptResult} result - Claude result
|
|
172
|
+
* @param {number} duration - Step duration in milliseconds
|
|
173
|
+
* @param {Object} [extra={}] - Additional fields to include
|
|
174
|
+
* @returns {StepResult} Normalized step result
|
|
175
|
+
*/
|
|
176
|
+
function makeStepResult(step, status, result, duration, extra = {}) {
|
|
177
|
+
return {
|
|
178
|
+
step: { number: step.number, name: step.name },
|
|
179
|
+
status,
|
|
180
|
+
output: result.output,
|
|
181
|
+
duration,
|
|
182
|
+
attempts: result.attempts,
|
|
183
|
+
error: status === 'failed' ? result.error : null,
|
|
184
|
+
cost: result.cost || null,
|
|
185
|
+
...extra,
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Execute a single improvement step with doc update.
|
|
191
|
+
*
|
|
192
|
+
* Runs the improvement prompt, optionally retries if suspiciously fast,
|
|
193
|
+
* runs the doc update in the same session, and handles fallback commits.
|
|
194
|
+
*
|
|
195
|
+
* @param {Step} step - The step to execute
|
|
196
|
+
* @param {string} projectDir - Target project directory
|
|
197
|
+
* @param {Object} [options] - Execution options
|
|
198
|
+
* @param {AbortSignal} [options.signal] - Abort signal
|
|
199
|
+
* @param {number} [options.timeout] - Timeout in milliseconds
|
|
200
|
+
* @param {(chunk: string) => void} [options.onOutput] - Streaming callback
|
|
201
|
+
* @returns {Promise<StepResult>} Step result (never throws)
|
|
202
|
+
*/
|
|
203
|
+
export async function executeSingleStep(step, projectDir, { signal, timeout, onOutput, continueSession, promptOverride } = {}) {
|
|
204
|
+
const stepLabel = `Step ${step.number}: ${step.name}`;
|
|
205
|
+
info(`${stepLabel} — starting`);
|
|
206
|
+
|
|
207
|
+
// Step-level timeout: hard cap on total step duration (improvement + retries
|
|
208
|
+
// + fast-retry + doc-update combined). Without this, a step with 4 retry
|
|
209
|
+
// attempts across 3 phases can silently run for 9× the user's expected
|
|
210
|
+
// timeout. The abort signal cancels all in-flight work when the cap is hit.
|
|
211
|
+
const stepTimeoutMs = timeout || DEFAULT_STEP_TIMEOUT_MS;
|
|
212
|
+
const stepAbort = new AbortController();
|
|
213
|
+
const stepTimer = setTimeout(() => {
|
|
214
|
+
warn(`${stepLabel} — step timeout (${Math.round(stepTimeoutMs / 60000)} min) reached. Aborting step.`);
|
|
215
|
+
stepAbort.abort();
|
|
216
|
+
}, stepTimeoutMs);
|
|
217
|
+
stepTimer.unref();
|
|
218
|
+
|
|
219
|
+
// Merge external signal (e.g., SIGINT) with step-level timeout.
|
|
220
|
+
const effectiveSignal = signal
|
|
221
|
+
? AbortSignal.any([signal, stepAbort.signal])
|
|
222
|
+
: stepAbort.signal;
|
|
223
|
+
|
|
224
|
+
try {
|
|
225
|
+
const stepStart = Date.now();
|
|
226
|
+
const preStepHash = await getHeadHash();
|
|
227
|
+
|
|
228
|
+
// Run improvement prompt
|
|
229
|
+
const improvementPrompt = promptOverride || (SAFETY_PREAMBLE + step.prompt);
|
|
230
|
+
const result = await runPrompt(improvementPrompt, projectDir, {
|
|
231
|
+
label: `Step ${step.number} — ${step.name}${continueSession ? ' (prod)' : ''}`,
|
|
232
|
+
signal: effectiveSignal,
|
|
233
|
+
timeout,
|
|
234
|
+
onOutput,
|
|
235
|
+
continueSession: continueSession || false,
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
if (!result.success) {
|
|
239
|
+
const duration = Date.now() - stepStart;
|
|
240
|
+
logError(`${stepLabel} — failed after ${result.attempts} attempts`);
|
|
241
|
+
notify(
|
|
242
|
+
`NightyTidy: Step ${step.number} Failed`,
|
|
243
|
+
`Step ${step.number} (${step.name}) failed after ${result.attempts} attempts. Skipped — run continuing.`
|
|
244
|
+
);
|
|
245
|
+
const failExtra = {};
|
|
246
|
+
if (result.errorType) failExtra.errorType = result.errorType;
|
|
247
|
+
if (result.retryAfterMs) failExtra.retryAfterMs = result.retryAfterMs;
|
|
248
|
+
return makeStepResult(step, 'failed', result, duration, failExtra);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Fast completion detection: skip if minimal output, retry if substantial output
|
|
252
|
+
let improvementResult = result;
|
|
253
|
+
let fastRetried = false;
|
|
254
|
+
|
|
255
|
+
if (!continueSession && result.duration < FAST_COMPLETION_THRESHOLD_MS) {
|
|
256
|
+
const outputLen = (result.output || '').length;
|
|
257
|
+
|
|
258
|
+
if (outputLen < SKIP_OUTPUT_THRESHOLD_CHARS) {
|
|
259
|
+
// Minimal output + fast = prompt didn't match the codebase (e.g., UX audit on a static site)
|
|
260
|
+
warn(
|
|
261
|
+
`${stepLabel}: completed in ${Math.round(result.duration / 1000)}s ` +
|
|
262
|
+
`with only ${outputLen} chars of output — marking as skipped ` +
|
|
263
|
+
`(prompt likely not applicable to this codebase)`
|
|
264
|
+
);
|
|
265
|
+
const duration = Date.now() - stepStart;
|
|
266
|
+
return makeStepResult(step, 'skipped', result, duration);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Substantial output but still fast — retry with context
|
|
270
|
+
warn(
|
|
271
|
+
`${stepLabel}: completed in ${Math.round(result.duration / 1000)}s — ` +
|
|
272
|
+
`suspiciously fast (threshold: ${FAST_COMPLETION_THRESHOLD_MS / 1000}s). Retrying with context.`
|
|
273
|
+
);
|
|
274
|
+
fastRetried = true;
|
|
275
|
+
|
|
276
|
+
const retryResult = await runPrompt(
|
|
277
|
+
SAFETY_PREAMBLE + FAST_RETRY_PREFIX + step.prompt,
|
|
278
|
+
projectDir,
|
|
279
|
+
{ label: `Step ${step.number} — ${step.name} (fast-retry)`, signal: effectiveSignal, timeout, onOutput },
|
|
280
|
+
);
|
|
281
|
+
|
|
282
|
+
if (retryResult.success) {
|
|
283
|
+
info(`${stepLabel}: fast-retry succeeded — using retry result`);
|
|
284
|
+
improvementResult = {
|
|
285
|
+
...retryResult,
|
|
286
|
+
cost: sumCosts(result.cost, retryResult.cost),
|
|
287
|
+
attempts: result.attempts + retryResult.attempts,
|
|
288
|
+
};
|
|
289
|
+
} else {
|
|
290
|
+
warn(`${stepLabel}: fast-retry failed — falling back to original fast result`);
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Run doc update in the same Claude session that made the changes
|
|
295
|
+
const docResult = await runPrompt(SAFETY_PREAMBLE + DOC_UPDATE_PROMPT, projectDir, {
|
|
296
|
+
label: `Step ${step.number} — doc update`,
|
|
297
|
+
signal: effectiveSignal,
|
|
298
|
+
timeout,
|
|
299
|
+
continueSession: true,
|
|
300
|
+
onOutput,
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
if (!docResult.success) {
|
|
304
|
+
warn(`${stepLabel}: Doc update failed after retries — improvement changes preserved but docs may be stale`);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// Combine costs from improvement + doc-update calls
|
|
308
|
+
const combinedCost = sumCosts(improvementResult.cost, docResult.cost);
|
|
309
|
+
|
|
310
|
+
// Commit verification
|
|
311
|
+
const committed = await hasNewCommit(preStepHash);
|
|
312
|
+
if (committed) {
|
|
313
|
+
info(`${stepLabel}: committed by Claude Code \u2713`);
|
|
314
|
+
} else {
|
|
315
|
+
try {
|
|
316
|
+
await fallbackCommit(step.number, step.name);
|
|
317
|
+
} catch (err) {
|
|
318
|
+
warn(`${stepLabel}: automatic commit failed (${err.message}) — changes remain staged`);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Sweep: always stage+commit any remaining untracked/unstaged files.
|
|
323
|
+
// Claude often commits its code changes but forgets to git-add report
|
|
324
|
+
// or audit files it created. Without this sweep, those files stay
|
|
325
|
+
// untracked and are lost if the user stops the run.
|
|
326
|
+
if (committed) {
|
|
327
|
+
try {
|
|
328
|
+
const swept = await fallbackCommit(step.number, step.name);
|
|
329
|
+
if (swept) info(`${stepLabel}: swept uncommitted files \u2713`);
|
|
330
|
+
} catch (err) {
|
|
331
|
+
warn(`${stepLabel}: sweep commit failed (${err.message}) — some files may remain unstaged`);
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const duration = Date.now() - stepStart;
|
|
336
|
+
info(`${stepLabel} — completed (${Math.round(duration / 1000)}s)`);
|
|
337
|
+
|
|
338
|
+
const extra = fastRetried ? { suspiciousFast: true } : {};
|
|
339
|
+
return makeStepResult(step, 'completed', { ...improvementResult, cost: combinedCost }, duration, extra);
|
|
340
|
+
} finally {
|
|
341
|
+
clearTimeout(stepTimer);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Exponential backoff schedule for rate-limit waits (ms)
|
|
346
|
+
const BACKOFF_SCHEDULE_MS = [
|
|
347
|
+
2 * 60_000, // 2 min
|
|
348
|
+
5 * 60_000, // 5 min
|
|
349
|
+
15 * 60_000, // 15 min
|
|
350
|
+
30 * 60_000, // 30 min
|
|
351
|
+
60 * 60_000, // 1 hr
|
|
352
|
+
120 * 60_000, // 2 hr
|
|
353
|
+
120 * 60_000, // 2 hr (repeat — covers 5hr+ usage caps)
|
|
354
|
+
120 * 60_000, // 2 hr (repeat)
|
|
355
|
+
120 * 60_000, // 2 hr (repeat — ~9.9hr total coverage)
|
|
356
|
+
];
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Wait for a rate-limit to clear using exponential backoff and API probes.
|
|
360
|
+
*
|
|
361
|
+
* Uses exponential backoff (2min → 2hr cap) and periodic API probes to
|
|
362
|
+
* detect when the rate limit has cleared.
|
|
363
|
+
*
|
|
364
|
+
* @param {number|null} retryAfterMs - Suggested retry delay from API, or null
|
|
365
|
+
* @param {AbortSignal|undefined} signal - Abort signal for cancellation
|
|
366
|
+
* @param {string} projectDir - Project directory for probe prompts
|
|
367
|
+
* @returns {Promise<boolean>} True if API available, false if gave up or aborted
|
|
368
|
+
*/
|
|
369
|
+
async function waitForRateLimit(retryAfterMs, signal, projectDir) {
|
|
370
|
+
if (signal?.aborted) return false;
|
|
371
|
+
|
|
372
|
+
// If API gave us a retry-after, use it (plus 10s buffer)
|
|
373
|
+
if (retryAfterMs && retryAfterMs > 0) {
|
|
374
|
+
info(`Rate limit: API says retry after ${Math.ceil(retryAfterMs / 1000)}s`);
|
|
375
|
+
await sleep(retryAfterMs + 10_000, signal);
|
|
376
|
+
return !signal?.aborted;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// Otherwise, exponential backoff with probe attempts
|
|
380
|
+
for (let attempt = 0; attempt < BACKOFF_SCHEDULE_MS.length; attempt++) {
|
|
381
|
+
const waitMs = BACKOFF_SCHEDULE_MS[attempt];
|
|
382
|
+
info(`Rate limit: waiting ${Math.ceil(waitMs / 60_000)} minutes before probe (attempt ${attempt + 1}/${BACKOFF_SCHEDULE_MS.length})`);
|
|
383
|
+
|
|
384
|
+
await sleep(waitMs, signal);
|
|
385
|
+
if (signal?.aborted) return false;
|
|
386
|
+
|
|
387
|
+
// Probe: run a tiny prompt to check if rate limit is lifted
|
|
388
|
+
info('Rate limit: probing API availability...');
|
|
389
|
+
const probe = await runPrompt('Reply with the single word OK.', projectDir, {
|
|
390
|
+
label: 'rate-limit-probe',
|
|
391
|
+
retries: 0,
|
|
392
|
+
timeout: 60_000,
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
if (probe.success) {
|
|
396
|
+
info('Rate limit: probe succeeded — API available again');
|
|
397
|
+
return true;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
if (probe.errorType !== ERROR_TYPE.RATE_LIMIT) {
|
|
401
|
+
// Different error — let the main loop handle it
|
|
402
|
+
info('Rate limit: probe returned non-rate-limit error — resuming');
|
|
403
|
+
return true;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
warn(`Rate limit: probe still rate-limited (attempt ${attempt + 1}/${BACKOFF_SCHEDULE_MS.length})`);
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Exhausted all backoff attempts
|
|
410
|
+
logError('Rate limit: exhausted all retry attempts — stopping run');
|
|
411
|
+
return false;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Execute multiple improvement steps sequentially.
|
|
416
|
+
*
|
|
417
|
+
* Handles:
|
|
418
|
+
* - Sequential step execution with callbacks
|
|
419
|
+
* - Rate-limit pause/resume with exponential backoff
|
|
420
|
+
* - Abort signal support for graceful cancellation
|
|
421
|
+
* - Progress callbacks for UI updates
|
|
422
|
+
*
|
|
423
|
+
* Error contract: NEVER throws. Failed steps are recorded in results.
|
|
424
|
+
*
|
|
425
|
+
* @param {Step[]} selectedSteps - Steps to execute
|
|
426
|
+
* @param {string} projectDir - Target project directory
|
|
427
|
+
* @param {ExecuteStepsOptions} [options] - Execution options
|
|
428
|
+
* @returns {Promise<ExecutionResults>} Results object (never throws)
|
|
429
|
+
*/
|
|
430
|
+
export async function executeSteps(selectedSteps, projectDir, { signal, timeout, onStepStart, onStepComplete, onStepFail, onStepSkip, onOutput, onRateLimitPause, onRateLimitResume } = {}) {
|
|
431
|
+
verifyStepsIntegrity(STEPS);
|
|
432
|
+
|
|
433
|
+
const results = [];
|
|
434
|
+
const totalSteps = selectedSteps.length;
|
|
435
|
+
const runStart = Date.now();
|
|
436
|
+
let completedCount = 0;
|
|
437
|
+
let failedCount = 0;
|
|
438
|
+
let skippedCount = 0;
|
|
439
|
+
|
|
440
|
+
for (let i = 0; i < totalSteps; i++) {
|
|
441
|
+
if (signal?.aborted) {
|
|
442
|
+
info('Abort signal received — stopping after previous step');
|
|
443
|
+
break;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
const step = selectedSteps[i];
|
|
447
|
+
onStepStart?.(step, i, totalSteps);
|
|
448
|
+
|
|
449
|
+
const stepResult = await executeSingleStep(step, projectDir, { signal, timeout, onOutput });
|
|
450
|
+
results.push(stepResult);
|
|
451
|
+
|
|
452
|
+
// Success path — increment and notify
|
|
453
|
+
if (stepResult.status === 'completed') {
|
|
454
|
+
completedCount++;
|
|
455
|
+
onStepComplete?.(step, i, totalSteps);
|
|
456
|
+
continue;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// Skipped — prompt not applicable to this codebase
|
|
460
|
+
if (stepResult.status === 'skipped') {
|
|
461
|
+
skippedCount++;
|
|
462
|
+
onStepSkip?.(step, i, totalSteps);
|
|
463
|
+
continue;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
// Rate-limit: pause and wait, then retry the same step
|
|
467
|
+
if (stepResult.errorType === ERROR_TYPE.RATE_LIMIT) {
|
|
468
|
+
info('Rate limit detected — pausing run');
|
|
469
|
+
onRateLimitPause?.(stepResult.retryAfterMs, {
|
|
470
|
+
results: [...results],
|
|
471
|
+
completedCount,
|
|
472
|
+
failedCount,
|
|
473
|
+
skippedCount,
|
|
474
|
+
currentStepIndex: i,
|
|
475
|
+
});
|
|
476
|
+
|
|
477
|
+
const resumed = await waitForRateLimit(stepResult.retryAfterMs, signal, projectDir);
|
|
478
|
+
if (!resumed) {
|
|
479
|
+
info('Rate limit wait ended — stopping run');
|
|
480
|
+
break;
|
|
481
|
+
}
|
|
482
|
+
onRateLimitResume?.();
|
|
483
|
+
info('Rate limit cleared — resuming run');
|
|
484
|
+
|
|
485
|
+
// Remove the failed result and retry the same step
|
|
486
|
+
results.pop();
|
|
487
|
+
i--;
|
|
488
|
+
continue;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// Other failure — record and notify
|
|
492
|
+
failedCount++;
|
|
493
|
+
onStepFail?.(step, i, totalSteps);
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
const totalDuration = Date.now() - runStart;
|
|
497
|
+
|
|
498
|
+
return {
|
|
499
|
+
results,
|
|
500
|
+
totalDuration,
|
|
501
|
+
completedCount,
|
|
502
|
+
failedCount,
|
|
503
|
+
skippedCount,
|
|
504
|
+
};
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
/**
|
|
508
|
+
* Build the filename for a step prompt.
|
|
509
|
+
* @param {Step} step
|
|
510
|
+
* @returns {string}
|
|
511
|
+
*/
|
|
512
|
+
function promptFilename(step) {
|
|
513
|
+
return `${String(step.number).padStart(2, '0')}-${step.name.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/-$/, '')}.md`;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
/**
|
|
517
|
+
* Copy ALL step prompts into the target project's audit-reports/refactor-prompts/ folder.
|
|
518
|
+
*
|
|
519
|
+
* Writes every prompt from STEPS, removes stale files left over from renames,
|
|
520
|
+
* and overwrites existing files so updates are always reflected.
|
|
521
|
+
* Synchronous — never throws (warns on failure).
|
|
522
|
+
*
|
|
523
|
+
* @param {string} projectDir - Target project root directory
|
|
524
|
+
*/
|
|
525
|
+
export function copyPromptsToProject(projectDir) {
|
|
526
|
+
try {
|
|
527
|
+
const promptsDir = path.join(projectDir, 'audit-reports', 'refactor-prompts');
|
|
528
|
+
mkdirSync(promptsDir, { recursive: true });
|
|
529
|
+
|
|
530
|
+
// Build set of current filenames so we can detect stale leftovers
|
|
531
|
+
const currentFiles = new Set(STEPS.map(promptFilename));
|
|
532
|
+
|
|
533
|
+
// Remove stale files (e.g. from renamed prompts)
|
|
534
|
+
for (const existing of readdirSync(promptsDir)) {
|
|
535
|
+
if (existing.endsWith('.md') && !currentFiles.has(existing)) {
|
|
536
|
+
unlinkSync(path.join(promptsDir, existing));
|
|
537
|
+
info(`Removed stale prompt file: ${existing}`);
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// Write all current prompts (creates new + overwrites updated)
|
|
542
|
+
for (const step of STEPS) {
|
|
543
|
+
writeFileSync(path.join(promptsDir, promptFilename(step)), step.prompt, 'utf8');
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
info(`Synced ${STEPS.length} prompts to audit-reports/refactor-prompts/`);
|
|
547
|
+
} catch (err) {
|
|
548
|
+
warn(`Failed to copy prompts to project: ${err.message}`);
|
|
549
|
+
}
|
|
550
|
+
}
|