@sebastianandreasson/pi-autonomous-agents 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -2
- package/SETUP.md +3 -0
- package/docs/PI_SUPERVISOR.md +4 -2
- package/package.json +1 -1
- package/src/index.mjs +1 -0
- package/src/pi-client.mjs +1 -1
- package/src/pi-config.mjs +4 -1
- package/src/pi-prompts.mjs +47 -0
- package/src/pi-repo.mjs +246 -12
- package/src/pi-report.mjs +11 -0
- package/src/pi-rpc-adapter.mjs +48 -4
- package/src/pi-supervisor.mjs +209 -24
- package/src/pi-telemetry.mjs +26 -1
- package/templates/DEVELOPER.md +3 -0
- package/templates/TESTER.md +7 -4
- package/templates/pi.config.example.json +2 -0
package/README.md
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
- a fast verification step
|
|
7
7
|
- a skeptical `tester` pass
|
|
8
8
|
- optional periodic multimodal visual review
|
|
9
|
-
-
|
|
9
|
+
- tester-owned final commit by default
|
|
10
10
|
|
|
11
11
|
The package is intentionally generic. It does not know how to navigate or test a specific app on its own.
|
|
12
12
|
|
|
@@ -18,7 +18,7 @@ The package is intentionally generic. It does not know how to navigate or test a
|
|
|
18
18
|
- telemetry
|
|
19
19
|
- loop guards, timeout guards, and retries
|
|
20
20
|
- tester feedback + visual feedback handoff
|
|
21
|
-
- harness
|
|
21
|
+
- optional legacy harness git finalize step for `commitMode: "plan"`
|
|
22
22
|
- multimodal visual review client
|
|
23
23
|
|
|
24
24
|
## What Stays Per Project
|
|
@@ -93,6 +93,7 @@ The command removes configured harness history/runtime files and verifies that n
|
|
|
93
93
|
|
|
94
94
|
For prompt debugging, the harness also writes the exact assembled prompt for the current role to `.pi-last-prompt.txt` by default.
|
|
95
95
|
For flow debugging, it also writes a machine-readable `.pi-last-iteration.json` summary with the selected task, tester verdict, commit-plan state, and terminal reason.
|
|
96
|
+
For run isolation, the supervisor also maintains `.pi-runtime/active-run.json` and stores PI sessions plus per-run telemetry under `.pi-runtime/runs/<runId>/`.
|
|
96
97
|
|
|
97
98
|
## Generic Contracts
|
|
98
99
|
|
|
@@ -113,10 +114,16 @@ Keep TODO items extremely small and implementation-shaped when using weaker loca
|
|
|
113
114
|
|
|
114
115
|
The adapter heartbeat is PI-RPC-event based. Streaming shell output does not count as progress on its own, so long-running tools should rely on the tool-aware watchdog thresholds rather than terminal streaming.
|
|
115
116
|
|
|
117
|
+
The supervisor now enforces single-run ownership per repo/config. If a stale run crashed mid-iteration, the next run recovers the unfinished iteration number from `.pi-state.json` instead of silently rolling forward.
|
|
118
|
+
|
|
116
119
|
`piModel` remains the default text model, but you can override specific roles with `roleModels` such as `developer`, `developerRetry`, `developerFix`, `tester`, and `visualReview`. `testerCommit` is only relevant if you opt back into `commitMode: "plan"`.
|
|
117
120
|
|
|
118
121
|
By default, successful tester passes should stage and create the commit directly in the same PI turn. The old commit-plan parsing flow is still available as `commitMode: "plan"`, but it is now a compatibility mode rather than the default.
|
|
119
122
|
|
|
120
123
|
Prompt/context handoff is compact by default. The harness now caps prior feedback excerpts, changed-file lists, verification excerpts, and prompt note handoff. If needed, tune `maxPromptChangedFiles`, `maxVisualFeedbackLines`, `maxTesterFeedbackLines`, `maxPromptNotesLines`, and `maxVerificationExcerptLines`.
|
|
121
124
|
|
|
125
|
+
The default coding tool mix is now safer for local models: `read,edit,write,find,ls,bash`. Prompts explicitly steer source inspection toward `read` and reserve shell usage for `git`, tests, and narrow diagnostics.
|
|
126
|
+
|
|
127
|
+
The harness also emits lightweight large-file warnings for touched source/spec files and carries them into `.pi-last-iteration.json`, `pi-harness report`, and relevant prompts. Tune `largeFileWarningLines` and `largeSpecWarningLines` if needed.
|
|
128
|
+
|
|
122
129
|
The harness expects screenshot capture to produce a `manifest.json` plus image files under the configured visual capture directory.
|
package/SETUP.md
CHANGED
|
@@ -47,6 +47,7 @@ If the repo uses another package manager already, use the repo-native equivalent
|
|
|
47
47
|
- `developerInstructionsFile`: `pi/DEVELOPER.md`
|
|
48
48
|
- `testerInstructionsFile`: `pi/TESTER.md`
|
|
49
49
|
- `commitMode`: normally `agent`
|
|
50
|
+
- `promptMode`: normally `compact`
|
|
50
51
|
- `testCommand`: a fast bounded verification command for this repo
|
|
51
52
|
- `visualCaptureCommand`: only if this repo has a real screenshot capture flow
|
|
52
53
|
- `models` / `piModel` / `visualReviewModel` / `roleModels`: configure the models actually available in this environment
|
|
@@ -125,6 +126,7 @@ Recommended pattern:
|
|
|
125
126
|
- local or slightly stronger model for `tester`
|
|
126
127
|
- stronger frontier model for `visualReview` only if available
|
|
127
128
|
- keep `commitMode` as `agent` unless the repo explicitly needs legacy harness-managed commit-plan parsing
|
|
129
|
+
- keep large-file thresholds sensible for local models (`largeFileWarningLines`, `largeSpecWarningLines`)
|
|
128
130
|
|
|
129
131
|
Example shape:
|
|
130
132
|
|
|
@@ -192,6 +194,7 @@ For flow debugging, inspect `.pi-last-iteration.json` after a run. It summarizes
|
|
|
192
194
|
- Do not enable visual review unless the repo actually has a usable capture command and model config.
|
|
193
195
|
- Keep changes minimal and local to harness setup.
|
|
194
196
|
- Prefer very small, implementation-shaped TODO items for local models. Broad tasks tend to create long turns, retries, and weak tester behavior.
|
|
197
|
+
- Prefer `read` for code inspection and keep shell usage focused on `git`, tests, and narrow diagnostics, especially for weaker local models.
|
|
195
198
|
|
|
196
199
|
## What To Report Back
|
|
197
200
|
|
package/docs/PI_SUPERVISOR.md
CHANGED
|
@@ -30,7 +30,7 @@ Main package files:
|
|
|
30
30
|
- `src/pi-client.mjs`: transport layer
|
|
31
31
|
- `src/pi-rpc-adapter.mjs`: built-in adapter from supervisor JSON to `pi --mode rpc`
|
|
32
32
|
- `src/pi-config.mjs`: config loader
|
|
33
|
-
- `src/pi-repo.mjs`: repo helpers, verification runner, git finalize step
|
|
33
|
+
- `src/pi-repo.mjs`: repo helpers, verification runner, and optional legacy git finalize step
|
|
34
34
|
- `src/pi-telemetry.mjs`: telemetry writer/reader
|
|
35
35
|
- `src/pi-prompts.mjs`: default prompt builders
|
|
36
36
|
- `src/pi-visual-review.mjs`: multimodal visual-review worker
|
|
@@ -126,7 +126,7 @@ Request shape:
|
|
|
126
126
|
"runtimeDir": "/absolute/repo/path/.pi-runtime",
|
|
127
127
|
"piCli": "pi",
|
|
128
128
|
"model": "local/model-name",
|
|
129
|
-
"tools": "read,
|
|
129
|
+
"tools": "read,edit,write,find,ls,bash",
|
|
130
130
|
"thinking": "",
|
|
131
131
|
"noExtensions": false,
|
|
132
132
|
"noSkills": false,
|
|
@@ -170,6 +170,8 @@ The default flow keeps commit ownership with the active agent:
|
|
|
170
170
|
|
|
171
171
|
If a repo explicitly needs the older harness-managed commit-plan flow, set `commitMode` to `plan`. In that mode, `testerCommit` and parsed commit plans are used as a compatibility path rather than the default.
|
|
172
172
|
|
|
173
|
+
For source inspection, prompts prefer `read` and reserve shell usage for `git`, tests, and narrow diagnostics. Large shell file reads are more likely to truncate under context pressure than focused `read` calls.
|
|
174
|
+
|
|
173
175
|
## Persistent Handoffs
|
|
174
176
|
|
|
175
177
|
The harness persists two cross-iteration handoff files:
|
package/package.json
CHANGED
package/src/index.mjs
CHANGED
package/src/pi-client.mjs
CHANGED
|
@@ -103,7 +103,7 @@ async function runAdapterTurn({ config, model, sessionId, sessionFile, prompt, i
|
|
|
103
103
|
instructionsFile: config.instructionsFile,
|
|
104
104
|
developerInstructionsFile: config.developerInstructionsFile,
|
|
105
105
|
testerInstructionsFile: config.testerInstructionsFile,
|
|
106
|
-
runtimeDir: config.piRuntimeDir,
|
|
106
|
+
runtimeDir: config.runRuntimeDir || config.piRuntimeDir,
|
|
107
107
|
piCli: config.piCli,
|
|
108
108
|
model: model ?? config.piModel,
|
|
109
109
|
tools: config.piTools,
|
package/src/pi-config.mjs
CHANGED
|
@@ -246,6 +246,7 @@ export function loadConfig(mode = 'once') {
|
|
|
246
246
|
lastPromptFile: resolveFromCwd(cwd, 'PI_LAST_PROMPT_FILE', file.lastPromptFile, '.pi-last-prompt.txt'),
|
|
247
247
|
lastIterationSummaryFile: resolveFromCwd(cwd, 'PI_LAST_ITERATION_SUMMARY_FILE', file.lastIterationSummaryFile, '.pi-last-iteration.json'),
|
|
248
248
|
piRuntimeDir: resolveFromCwd(cwd, 'PI_RUNTIME_DIR', file.piRuntimeDir, '.pi-runtime'),
|
|
249
|
+
activeRunFile: resolveFromCwd(cwd, 'PI_ACTIVE_RUN_FILE', file.activeRunFile, '.pi-runtime/active-run.json'),
|
|
249
250
|
piCli: readString('PI_CLI', file.piCli, 'pi'),
|
|
250
251
|
piModel,
|
|
251
252
|
piModelProfile: resolvedPiModel,
|
|
@@ -258,7 +259,9 @@ export function loadConfig(mode = 'once') {
|
|
|
258
259
|
maxTesterFeedbackLines: readInt('PI_MAX_TESTER_FEEDBACK_LINES', file.maxTesterFeedbackLines, 32),
|
|
259
260
|
maxPromptNotesLines: readInt('PI_MAX_PROMPT_NOTES_LINES', file.maxPromptNotesLines, 16),
|
|
260
261
|
maxVerificationExcerptLines: readInt('PI_MAX_VERIFICATION_EXCERPT_LINES', file.maxVerificationExcerptLines, 40),
|
|
261
|
-
|
|
262
|
+
largeFileWarningLines: readInt('PI_LARGE_FILE_WARNING_LINES', file.largeFileWarningLines, 500),
|
|
263
|
+
largeSpecWarningLines: readInt('PI_LARGE_SPEC_WARNING_LINES', file.largeSpecWarningLines, 300),
|
|
264
|
+
piTools: readString('PI_TOOLS', file.piTools, 'read,edit,write,find,ls,bash'),
|
|
262
265
|
piThinking: readString('PI_THINKING', file.piThinking, ''),
|
|
263
266
|
piNoExtensions: readBool('PI_NO_EXTENSIONS', file.piNoExtensions, false),
|
|
264
267
|
piNoSkills: readBool('PI_NO_SKILLS', file.piNoSkills, false),
|
package/src/pi-prompts.mjs
CHANGED
|
@@ -40,6 +40,20 @@ function formatChangedFilesSection(files, maxFiles) {
|
|
|
40
40
|
return lines.join('\n')
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
+
function formatLargeFileRiskHint(warnings) {
|
|
44
|
+
const list = Array.isArray(warnings) ? warnings.filter(Boolean) : []
|
|
45
|
+
if (list.length === 0) {
|
|
46
|
+
return ''
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const lines = list
|
|
50
|
+
.slice(0, 3)
|
|
51
|
+
.map((warning) => `- ${warning.file} (${warning.lineCount} lines${warning.kind === 'large_spec' ? ', spec' : ''})`)
|
|
52
|
+
.join('\n')
|
|
53
|
+
|
|
54
|
+
return `\nLarge file risk in touched files:\n${lines}\nPrefer helper extraction, smaller scoped edits, or test splitting over broad in-place edits.\n`
|
|
55
|
+
}
|
|
56
|
+
|
|
43
57
|
function displayPath(config, filePath) {
|
|
44
58
|
const relativePath = path.relative(config.cwd, filePath)
|
|
45
59
|
if (
|
|
@@ -160,6 +174,9 @@ Harness rules:
|
|
|
160
174
|
- Start by checking git status so you know whether unrelated changes already exist.
|
|
161
175
|
- Update code, config, and docs only as needed for the selected task.
|
|
162
176
|
- Tick only the checkbox items that are actually completed.
|
|
177
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
178
|
+
- Do not build edits from large sed/grep output or from memory after partial shell reads.
|
|
179
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
163
180
|
- If blocked, add a brief note directly under the relevant task in ${taskFile} explaining the blocker, then stop.
|
|
164
181
|
- Do not create the final commit during the developer pass.
|
|
165
182
|
${staleEditRecoveryRules()}
|
|
@@ -180,6 +197,9 @@ Rules:
|
|
|
180
197
|
- Start with git status.
|
|
181
198
|
- Select the first unchecked actionable checkbox in phase order.
|
|
182
199
|
- Keep changes minimal and scoped.
|
|
200
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
201
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
202
|
+
- Do not edit from memory after partial shell output.
|
|
183
203
|
- Tick only completed items.
|
|
184
204
|
- If blocked, note it under the task in ${taskFile} and stop.
|
|
185
205
|
- Do not touch lockfiles, generated files, or unrelated assets.
|
|
@@ -203,11 +223,13 @@ export function buildFixPrompt(config, recentVerificationOutput, options = {}) {
|
|
|
203
223
|
config.usingBundledDeveloperInstructions,
|
|
204
224
|
)
|
|
205
225
|
const findings = clampLines(recentVerificationOutput, configMaxLines(config, 'maxVerificationExcerptLines', 40))
|
|
226
|
+
const largeFileRiskHint = formatLargeFileRiskHint(options.largeFileWarnings)
|
|
206
227
|
|
|
207
228
|
if (!config.usingBundledDeveloperInstructions) {
|
|
208
229
|
return `Read ${taskFile} and ${instructionsFile}.
|
|
209
230
|
${authorityLine}${visualFeedbackSection}
|
|
210
231
|
${testerFeedbackSection}
|
|
232
|
+
${largeFileRiskHint}
|
|
211
233
|
|
|
212
234
|
The tester step found a real problem in the current implementation. Fix only the product behavior related to the current phase and current task.
|
|
213
235
|
|
|
@@ -218,6 +240,9 @@ Harness rules:
|
|
|
218
240
|
- Start by checking git status so you know which files are already dirty.
|
|
219
241
|
- Do not paper over product bugs by weakening tests.
|
|
220
242
|
- Keep changes minimal and focused on the failing behavior.
|
|
243
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
244
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
245
|
+
- Do not edit from memory after partial shell output.
|
|
221
246
|
- Do not perform speculative cleanup or unrelated refactors in this pass.
|
|
222
247
|
- Do not create the final commit during the developer fix pass.
|
|
223
248
|
${staleEditRecoveryRules()}
|
|
@@ -230,6 +255,7 @@ Before stopping:
|
|
|
230
255
|
return `Read ${taskFile} and ${instructionsFile}.
|
|
231
256
|
${authorityLine}${visualFeedbackSection}
|
|
232
257
|
${testerFeedbackSection}
|
|
258
|
+
${largeFileRiskHint}
|
|
233
259
|
|
|
234
260
|
The tester step found a real problem in the current implementation. Fix only the product behavior related to the current phase and current task.
|
|
235
261
|
|
|
@@ -240,6 +266,9 @@ Rules:
|
|
|
240
266
|
- Start with git status.
|
|
241
267
|
- Keep the fix narrow.
|
|
242
268
|
- Do not weaken tests to hide product bugs.
|
|
269
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
270
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
271
|
+
- Do not edit from memory after partial shell output.
|
|
243
272
|
- Do not perform speculative cleanup or unrelated refactors.
|
|
244
273
|
- Do not create the final commit.
|
|
245
274
|
${staleEditRecoveryRules()}
|
|
@@ -259,12 +288,14 @@ export function buildSteeringPrompt(config, reason, options = {}) {
|
|
|
259
288
|
config.developerInstructionsFile,
|
|
260
289
|
config.usingBundledDeveloperInstructions,
|
|
261
290
|
)
|
|
291
|
+
const largeFileRiskHint = formatLargeFileRiskHint(options.largeFileWarnings)
|
|
262
292
|
|
|
263
293
|
if (!config.usingBundledDeveloperInstructions) {
|
|
264
294
|
return `Continue from the current repo state.
|
|
265
295
|
Read ${taskFile} and ${instructionsFile}.
|
|
266
296
|
${authorityLine}${visualFeedbackSection}
|
|
267
297
|
${testerFeedbackSection}
|
|
298
|
+
${largeFileRiskHint}
|
|
268
299
|
|
|
269
300
|
Reason for this follow-up: ${reason}
|
|
270
301
|
|
|
@@ -272,9 +303,11 @@ Select the first unchecked actionable checkbox in the current phase, complete on
|
|
|
272
303
|
|
|
273
304
|
Additional harness guardrails:
|
|
274
305
|
- Start by checking git status.
|
|
306
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
275
307
|
- Do not repeat the same tool call over and over.
|
|
276
308
|
- If you already read a file, use that context instead of rereading it unless something changed.
|
|
277
309
|
- If an edit fails once, reread the file before retrying. Do not repeat the same exact edit attempt.
|
|
310
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
278
311
|
- If you are stuck, make the smallest decisive next action or stop and state the blocker.`
|
|
279
312
|
}
|
|
280
313
|
|
|
@@ -282,15 +315,18 @@ Additional harness guardrails:
|
|
|
282
315
|
Read ${taskFile} and ${instructionsFile}.
|
|
283
316
|
${authorityLine}${visualFeedbackSection}
|
|
284
317
|
${testerFeedbackSection}
|
|
318
|
+
${largeFileRiskHint}
|
|
285
319
|
|
|
286
320
|
Reason for this follow-up: ${reason}
|
|
287
321
|
|
|
288
322
|
Select the first unchecked actionable checkbox in the current phase, complete one coherent task, tick completed items, run verification, and stop.
|
|
289
323
|
|
|
290
324
|
Additional guardrails:
|
|
325
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
291
326
|
- Do not repeat the same tool call over and over.
|
|
292
327
|
- If you already read a file, use that context instead of rereading it unless something changed.
|
|
293
328
|
- If an edit fails once, reread the file before retrying. Do not repeat the same exact edit attempt.
|
|
329
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
294
330
|
- Prefer the configured smoke verification path and one narrow targeted check over long full-flow Playwright specs.
|
|
295
331
|
- If you are stuck, make the smallest decisive next action or stop and state the blocker.`
|
|
296
332
|
}
|
|
@@ -303,6 +339,7 @@ export function buildTesterPrompt(config, {
|
|
|
303
339
|
reason = 'tester_review',
|
|
304
340
|
visualFeedback = '',
|
|
305
341
|
testerFeedback = '',
|
|
342
|
+
largeFileWarnings = [],
|
|
306
343
|
}) {
|
|
307
344
|
const taskFile = displayPath(config, config.taskFile)
|
|
308
345
|
const instructionsFile = displayPath(config, config.testerInstructionsFile)
|
|
@@ -326,11 +363,13 @@ export function buildTesterPrompt(config, {
|
|
|
326
363
|
config.usingBundledTesterInstructions,
|
|
327
364
|
)
|
|
328
365
|
const passOwnership = testerPassOwnershipRules(config)
|
|
366
|
+
const largeFileRiskHint = formatLargeFileRiskHint(largeFileWarnings)
|
|
329
367
|
|
|
330
368
|
if (!config.usingBundledTesterInstructions) {
|
|
331
369
|
return `Read ${taskFile} and ${instructionsFile}.
|
|
332
370
|
${authorityLine}${visualFeedbackSection}
|
|
333
371
|
${testerFeedbackSection}
|
|
372
|
+
${largeFileRiskHint}
|
|
334
373
|
|
|
335
374
|
You are the TESTER role. You are reviewing the most recent developer work from an independent quality and functionality perspective.
|
|
336
375
|
|
|
@@ -348,6 +387,8 @@ Rules:
|
|
|
348
387
|
- Start with git status.
|
|
349
388
|
- Follow repo-local tester instructions for what to verify and which commands to run.
|
|
350
389
|
- Prefer one focused review pass.
|
|
390
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
391
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
351
392
|
- If blocked or inconclusive, return VERDICT: BLOCKED.
|
|
352
393
|
- Do not hide real bugs with brittle tests.
|
|
353
394
|
- ${passOwnership.successRule.slice(2)}
|
|
@@ -370,6 +411,7 @@ Before stopping, end your final response with exactly one verdict line:
|
|
|
370
411
|
return `Read ${taskFile} and ${instructionsFile}.
|
|
371
412
|
${authorityLine}${visualFeedbackSection}
|
|
372
413
|
${testerFeedbackSection}
|
|
414
|
+
${largeFileRiskHint}
|
|
373
415
|
|
|
374
416
|
You are the TESTER role. You are reviewing the most recent developer work from an independent quality and functionality perspective.
|
|
375
417
|
|
|
@@ -385,9 +427,11 @@ ${changedFilesSection}
|
|
|
385
427
|
|
|
386
428
|
Rules:
|
|
387
429
|
- Start with git status.
|
|
430
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
388
431
|
- Run the repo verification command yourself: ${verificationCommand}
|
|
389
432
|
${indentBlock(innerLoopValidationRules(verificationCommand), '\t')}
|
|
390
433
|
- Prefer one focused browser-driven review pass.
|
|
434
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
391
435
|
- Do not hide real bugs with brittle tests.
|
|
392
436
|
- If blocked or inconclusive, return VERDICT: BLOCKED.
|
|
393
437
|
${indentBlock(passOwnership.successRule, '\t')}
|
|
@@ -415,6 +459,7 @@ export function buildCommitPrompt(config, {
|
|
|
415
459
|
reason = 'tester_passed_without_commit',
|
|
416
460
|
visualFeedback = '',
|
|
417
461
|
testerFeedback = '',
|
|
462
|
+
largeFileWarnings = [],
|
|
418
463
|
}) {
|
|
419
464
|
const taskFile = displayPath(config, config.taskFile)
|
|
420
465
|
const instructionsFile = displayPath(config, config.testerInstructionsFile)
|
|
@@ -433,10 +478,12 @@ export function buildCommitPrompt(config, {
|
|
|
433
478
|
developerNotes || '(none provided)',
|
|
434
479
|
configMaxLines(config, 'maxPromptNotesLines', 16),
|
|
435
480
|
)
|
|
481
|
+
const largeFileRiskHint = formatLargeFileRiskHint(largeFileWarnings)
|
|
436
482
|
|
|
437
483
|
return `Read ${taskFile} and ${instructionsFile}.
|
|
438
484
|
${authorityLine}${visualFeedbackSection}
|
|
439
485
|
${testerFeedbackSection}
|
|
486
|
+
${largeFileRiskHint}
|
|
440
487
|
|
|
441
488
|
You are the TESTER role. The implementation already passed functional review, but the final commit was not created.
|
|
442
489
|
|
package/src/pi-repo.mjs
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import fs from 'node:fs/promises'
|
|
2
2
|
import { readFileSync } from 'node:fs'
|
|
3
3
|
import process from 'node:process'
|
|
4
|
+
import { randomUUID } from 'node:crypto'
|
|
4
5
|
import { execFileSync, spawn } from 'node:child_process'
|
|
5
6
|
import path from 'node:path'
|
|
6
7
|
|
|
@@ -9,7 +10,17 @@ export function timestamp() {
|
|
|
9
10
|
}
|
|
10
11
|
|
|
11
12
|
export async function appendLog(logFile, message) {
|
|
12
|
-
|
|
13
|
+
const runId = String(process.env.PI_RUN_ID ?? '').trim()
|
|
14
|
+
const prefix = runId !== '' ? `[run:${runId}] ` : ''
|
|
15
|
+
const line = `[${timestamp()}] ${prefix}${message}\n`
|
|
16
|
+
await fs.mkdir(path.dirname(logFile), { recursive: true })
|
|
17
|
+
await fs.appendFile(logFile, line, 'utf8')
|
|
18
|
+
|
|
19
|
+
const runLogFile = String(process.env.PI_RUN_LOG_FILE ?? '').trim()
|
|
20
|
+
if (runLogFile !== '' && runLogFile !== logFile) {
|
|
21
|
+
await fs.mkdir(path.dirname(runLogFile), { recursive: true })
|
|
22
|
+
await fs.appendFile(runLogFile, line, 'utf8')
|
|
23
|
+
}
|
|
13
24
|
}
|
|
14
25
|
|
|
15
26
|
export function ensureRepo(cwd) {
|
|
@@ -30,7 +41,27 @@ export async function ensureFileExists(filePath, label) {
|
|
|
30
41
|
export async function readState(stateFile) {
|
|
31
42
|
try {
|
|
32
43
|
const raw = await fs.readFile(stateFile, 'utf8')
|
|
33
|
-
|
|
44
|
+
const parsed = JSON.parse(raw)
|
|
45
|
+
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
46
|
+
throw new Error('Invalid state file payload')
|
|
47
|
+
}
|
|
48
|
+
return {
|
|
49
|
+
iteration: 0,
|
|
50
|
+
lastTransport: '',
|
|
51
|
+
lastPiModel: '',
|
|
52
|
+
sessionId: '',
|
|
53
|
+
sessionFile: '',
|
|
54
|
+
consecutiveFailures: 0,
|
|
55
|
+
successfulIterations: 0,
|
|
56
|
+
lastPhase: '',
|
|
57
|
+
lastStatus: '',
|
|
58
|
+
lastVerificationStatus: '',
|
|
59
|
+
lastVisualStatus: '',
|
|
60
|
+
lastRunAt: '',
|
|
61
|
+
runId: '',
|
|
62
|
+
inProgress: null,
|
|
63
|
+
...parsed,
|
|
64
|
+
}
|
|
34
65
|
} catch {
|
|
35
66
|
return {
|
|
36
67
|
iteration: 0,
|
|
@@ -38,22 +69,165 @@ export async function readState(stateFile) {
|
|
|
38
69
|
lastPiModel: '',
|
|
39
70
|
sessionId: '',
|
|
40
71
|
sessionFile: '',
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
72
|
+
consecutiveFailures: 0,
|
|
73
|
+
successfulIterations: 0,
|
|
74
|
+
lastPhase: '',
|
|
75
|
+
lastStatus: '',
|
|
76
|
+
lastVerificationStatus: '',
|
|
77
|
+
lastVisualStatus: '',
|
|
78
|
+
lastRunAt: '',
|
|
79
|
+
runId: '',
|
|
80
|
+
inProgress: null,
|
|
81
|
+
}
|
|
49
82
|
}
|
|
50
83
|
}
|
|
51
84
|
|
|
52
85
|
export async function writeState(stateFile, state) {
|
|
53
86
|
const formatted = `${JSON.stringify(state, null, 2)}\n`
|
|
87
|
+
await fs.mkdir(path.dirname(stateFile), { recursive: true })
|
|
54
88
|
await fs.writeFile(stateFile, formatted, 'utf8')
|
|
55
89
|
}
|
|
56
90
|
|
|
91
|
+
export function createRunId() {
|
|
92
|
+
return randomUUID()
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function normalizePid(raw) {
|
|
96
|
+
const pid = Number.parseInt(String(raw ?? ''), 10)
|
|
97
|
+
return Number.isInteger(pid) && pid > 0 ? pid : 0
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
export function isProcessRunning(pid) {
|
|
101
|
+
const normalizedPid = normalizePid(pid)
|
|
102
|
+
if (normalizedPid <= 0) {
|
|
103
|
+
return false
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
try {
|
|
107
|
+
process.kill(normalizedPid, 0)
|
|
108
|
+
return true
|
|
109
|
+
} catch (error) {
|
|
110
|
+
if (error && typeof error === 'object' && 'code' in error) {
|
|
111
|
+
return error.code === 'EPERM'
|
|
112
|
+
}
|
|
113
|
+
return false
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
export async function readJsonFile(filePath, fallback = null) {
|
|
118
|
+
try {
|
|
119
|
+
const raw = await fs.readFile(filePath, 'utf8')
|
|
120
|
+
return JSON.parse(raw)
|
|
121
|
+
} catch {
|
|
122
|
+
return fallback
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
async function writeJsonFile(filePath, value, flags) {
|
|
127
|
+
const formatted = `${JSON.stringify(value, null, 2)}\n`
|
|
128
|
+
await fs.mkdir(path.dirname(filePath), { recursive: true })
|
|
129
|
+
await fs.writeFile(filePath, formatted, { encoding: 'utf8', flag: flags })
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export async function acquireRunLock(lockFile, lockState) {
|
|
133
|
+
const desired = {
|
|
134
|
+
runId: String(lockState?.runId ?? ''),
|
|
135
|
+
pid: normalizePid(lockState?.pid),
|
|
136
|
+
startedAt: String(lockState?.startedAt ?? timestamp()),
|
|
137
|
+
heartbeatAt: String(lockState?.heartbeatAt ?? timestamp()),
|
|
138
|
+
status: String(lockState?.status ?? 'starting'),
|
|
139
|
+
iteration: Number.isFinite(Number(lockState?.iteration)) ? Number(lockState.iteration) : 0,
|
|
140
|
+
phase: String(lockState?.phase ?? ''),
|
|
141
|
+
task: String(lockState?.task ?? ''),
|
|
142
|
+
mode: String(lockState?.mode ?? ''),
|
|
143
|
+
configFile: String(lockState?.configFile ?? ''),
|
|
144
|
+
cwd: String(lockState?.cwd ?? ''),
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
await fs.mkdir(path.dirname(lockFile), { recursive: true })
|
|
148
|
+
|
|
149
|
+
try {
|
|
150
|
+
await writeJsonFile(lockFile, desired, 'wx')
|
|
151
|
+
return { acquired: true, staleLock: null }
|
|
152
|
+
} catch (error) {
|
|
153
|
+
if (!error || typeof error !== 'object' || !('code' in error) || error.code !== 'EEXIST') {
|
|
154
|
+
throw error
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const existing = await readJsonFile(lockFile, null)
|
|
159
|
+
const existingPid = normalizePid(existing?.pid)
|
|
160
|
+
if (existing && existingPid > 0 && isProcessRunning(existingPid) && existingPid !== process.pid) {
|
|
161
|
+
throw new Error(
|
|
162
|
+
`Another pi-harness run is active (runId=${String(existing.runId ?? '')} pid=${existingPid} startedAt=${String(existing.startedAt ?? '')}).`
|
|
163
|
+
)
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
await fs.rm(lockFile, { force: true })
|
|
167
|
+
|
|
168
|
+
try {
|
|
169
|
+
await writeJsonFile(lockFile, desired, 'wx')
|
|
170
|
+
} catch (error) {
|
|
171
|
+
if (error && typeof error === 'object' && 'code' in error && error.code === 'EEXIST') {
|
|
172
|
+
const current = await readJsonFile(lockFile, null)
|
|
173
|
+
throw new Error(
|
|
174
|
+
`Another pi-harness run acquired the lock first (runId=${String(current?.runId ?? '')} pid=${String(current?.pid ?? '')}).`
|
|
175
|
+
)
|
|
176
|
+
}
|
|
177
|
+
throw error
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return { acquired: true, staleLock: existing }
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
export async function updateRunLock(lockFile, lockState) {
|
|
184
|
+
const current = await readJsonFile(lockFile, null)
|
|
185
|
+
if (!current) {
|
|
186
|
+
return false
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const next = {
|
|
190
|
+
...current,
|
|
191
|
+
...lockState,
|
|
192
|
+
pid: normalizePid(lockState?.pid ?? current.pid),
|
|
193
|
+
heartbeatAt: String(lockState?.heartbeatAt ?? timestamp()),
|
|
194
|
+
}
|
|
195
|
+
await writeJsonFile(lockFile, next)
|
|
196
|
+
return true
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
export async function releaseRunLock(lockFile, runId) {
|
|
200
|
+
const current = await readJsonFile(lockFile, null)
|
|
201
|
+
if (!current) {
|
|
202
|
+
return false
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
if (String(current.runId ?? '') !== String(runId ?? '')) {
|
|
206
|
+
return false
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
await fs.rm(lockFile, { force: true })
|
|
210
|
+
return true
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
export function signalProcessTree(pid, signal) {
|
|
214
|
+
const normalizedPid = normalizePid(pid)
|
|
215
|
+
if (normalizedPid <= 0) {
|
|
216
|
+
return false
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
try {
|
|
220
|
+
if (process.platform !== 'win32') {
|
|
221
|
+
process.kill(-normalizedPid, signal)
|
|
222
|
+
} else {
|
|
223
|
+
process.kill(normalizedPid, signal)
|
|
224
|
+
}
|
|
225
|
+
return true
|
|
226
|
+
} catch {
|
|
227
|
+
return false
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
57
231
|
export async function readSessionId(sessionFile) {
|
|
58
232
|
try {
|
|
59
233
|
return (await fs.readFile(sessionFile, 'utf8')).trim()
|
|
@@ -225,6 +399,65 @@ export function findFirstUncheckedTaskInfo(taskFile) {
|
|
|
225
399
|
}
|
|
226
400
|
}
|
|
227
401
|
|
|
402
|
+
function countLines(text) {
|
|
403
|
+
const normalized = String(text ?? '')
|
|
404
|
+
if (normalized === '') {
|
|
405
|
+
return 0
|
|
406
|
+
}
|
|
407
|
+
return normalized.split('\n').length
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
function isSpecLikeFile(filePath) {
|
|
411
|
+
const normalized = String(filePath ?? '').replaceAll('\\', '/')
|
|
412
|
+
return /(^|\/)(e2e|test|tests|spec|specs)\//.test(normalized)
|
|
413
|
+
|| /\.(spec|test)\.[cm]?[jt]sx?$/.test(normalized)
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
export function collectLargeFileWarnings(cwd, files, {
|
|
417
|
+
largeFileWarningLines = 500,
|
|
418
|
+
largeSpecWarningLines = 300,
|
|
419
|
+
} = {}) {
|
|
420
|
+
const warnings = []
|
|
421
|
+
const seen = new Set()
|
|
422
|
+
|
|
423
|
+
for (const file of Array.isArray(files) ? files : []) {
|
|
424
|
+
const relativePath = String(file ?? '').trim()
|
|
425
|
+
if (relativePath === '' || seen.has(relativePath)) {
|
|
426
|
+
continue
|
|
427
|
+
}
|
|
428
|
+
seen.add(relativePath)
|
|
429
|
+
|
|
430
|
+
const absolutePath = path.resolve(cwd, relativePath)
|
|
431
|
+
let raw = ''
|
|
432
|
+
try {
|
|
433
|
+
raw = readFileSync(absolutePath, 'utf8')
|
|
434
|
+
} catch {
|
|
435
|
+
continue
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
const lineCount = countLines(raw)
|
|
439
|
+
const isSpec = isSpecLikeFile(relativePath)
|
|
440
|
+
if (isSpec && lineCount >= largeSpecWarningLines) {
|
|
441
|
+
warnings.push({
|
|
442
|
+
file: relativePath,
|
|
443
|
+
lineCount,
|
|
444
|
+
kind: 'large_spec',
|
|
445
|
+
})
|
|
446
|
+
continue
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
if (lineCount >= largeFileWarningLines) {
|
|
450
|
+
warnings.push({
|
|
451
|
+
file: relativePath,
|
|
452
|
+
lineCount,
|
|
453
|
+
kind: 'large_file',
|
|
454
|
+
})
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
return warnings.sort((left, right) => right.lineCount - left.lineCount)
|
|
459
|
+
}
|
|
460
|
+
|
|
228
461
|
export async function runShellCommand({
|
|
229
462
|
cwd,
|
|
230
463
|
command,
|
|
@@ -238,6 +471,7 @@ export async function runShellCommand({
|
|
|
238
471
|
const child = spawn('/bin/zsh', ['-lc', command], {
|
|
239
472
|
cwd,
|
|
240
473
|
env: process.env,
|
|
474
|
+
detached: process.platform !== 'win32',
|
|
241
475
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
242
476
|
})
|
|
243
477
|
|
|
@@ -249,9 +483,9 @@ export async function runShellCommand({
|
|
|
249
483
|
|
|
250
484
|
killTimer = setTimeout(() => {
|
|
251
485
|
timedOut = true
|
|
252
|
-
child.
|
|
486
|
+
signalProcessTree(child.pid, 'SIGTERM')
|
|
253
487
|
forceKillTimer = setTimeout(() => {
|
|
254
|
-
child.
|
|
488
|
+
signalProcessTree(child.pid, 'SIGKILL')
|
|
255
489
|
}, 10000)
|
|
256
490
|
}, timeoutSeconds * 1000)
|
|
257
491
|
|
package/src/pi-report.mjs
CHANGED
|
@@ -35,6 +35,17 @@ async function main() {
|
|
|
35
35
|
console.log(`- ${kind}: ${count}`)
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
const iterationSummaries = recent.filter((event) => event.kind === 'iteration_summary')
|
|
39
|
+
const warningsByIteration = iterationSummaries
|
|
40
|
+
.filter((event) => String(event.riskWarnings ?? '').trim() !== '')
|
|
41
|
+
|
|
42
|
+
if (warningsByIteration.length > 0) {
|
|
43
|
+
console.log('\nLarge file warnings:')
|
|
44
|
+
for (const event of warningsByIteration.slice(-5)) {
|
|
45
|
+
console.log(`- iteration ${event.iteration}: ${event.riskWarnings}`)
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
38
49
|
const last = recent.at(-1)
|
|
39
50
|
if (!last) {
|
|
40
51
|
return
|