@sebastianandreasson/pi-autonomous-agents 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -3
- package/SETUP.md +9 -0
- package/docs/PI_SUPERVISOR.md +11 -11
- package/package.json +3 -3
- package/pi.config.json +1 -0
- package/src/index.mjs +1 -0
- package/src/pi-client.mjs +37 -0
- package/src/pi-config.mjs +51 -18
- package/src/pi-history.mjs +2 -0
- package/src/pi-preflight.mjs +48 -17
- package/src/pi-prompts.mjs +339 -103
- package/src/pi-repo.mjs +65 -3
- package/src/pi-report.mjs +11 -0
- package/src/pi-rpc-adapter.mjs +73 -0
- package/src/pi-supervisor.mjs +465 -26
- package/src/pi-telemetry.mjs +15 -1
- package/templates/DEVELOPER.md +3 -0
- package/templates/TESTER.md +7 -4
- package/templates/pi.config.example.json +4 -1
package/src/pi-prompts.mjs
CHANGED
|
@@ -1,5 +1,59 @@
|
|
|
1
1
|
import path from 'node:path'
|
|
2
2
|
|
|
3
|
+
function clampLines(text, maxLines) {
|
|
4
|
+
const normalized = String(text ?? '').trim()
|
|
5
|
+
if (normalized === '') {
|
|
6
|
+
return ''
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
const lines = normalized.split('\n')
|
|
10
|
+
if (!Number.isFinite(maxLines) || maxLines <= 0 || lines.length <= maxLines) {
|
|
11
|
+
return normalized
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const remaining = lines.length - maxLines
|
|
15
|
+
return `${lines.slice(0, maxLines).join('\n')}\n... (${remaining} more lines omitted)`
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function formatFeedbackSection(label, text, maxLines) {
|
|
19
|
+
const excerpt = clampLines(text, maxLines)
|
|
20
|
+
if (excerpt === '') {
|
|
21
|
+
return ''
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
return `\n${label}:\n${excerpt}\n`
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function formatChangedFilesSection(files, maxFiles) {
|
|
28
|
+
const list = Array.isArray(files) ? files.filter(Boolean) : []
|
|
29
|
+
if (list.length === 0) {
|
|
30
|
+
return '- No file changes were detected from the prior turn.'
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const limit = Number.isFinite(maxFiles) && maxFiles > 0 ? maxFiles : list.length
|
|
34
|
+
const visible = list.slice(0, limit)
|
|
35
|
+
const remaining = list.length - visible.length
|
|
36
|
+
const lines = visible.map((file) => `- ${file}`)
|
|
37
|
+
if (remaining > 0) {
|
|
38
|
+
lines.push(`- ... and ${remaining} more files`)
|
|
39
|
+
}
|
|
40
|
+
return lines.join('\n')
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function formatLargeFileRiskHint(warnings) {
|
|
44
|
+
const list = Array.isArray(warnings) ? warnings.filter(Boolean) : []
|
|
45
|
+
if (list.length === 0) {
|
|
46
|
+
return ''
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const lines = list
|
|
50
|
+
.slice(0, 3)
|
|
51
|
+
.map((warning) => `- ${warning.file} (${warning.lineCount} lines${warning.kind === 'large_spec' ? ', spec' : ''})`)
|
|
52
|
+
.join('\n')
|
|
53
|
+
|
|
54
|
+
return `\nLarge file risk in touched files:\n${lines}\nPrefer helper extraction, smaller scoped edits, or test splitting over broad in-place edits.\n`
|
|
55
|
+
}
|
|
56
|
+
|
|
3
57
|
function displayPath(config, filePath) {
|
|
4
58
|
const relativePath = path.relative(config.cwd, filePath)
|
|
5
59
|
if (
|
|
@@ -13,22 +67,25 @@ function displayPath(config, filePath) {
|
|
|
13
67
|
return path.basename(filePath)
|
|
14
68
|
}
|
|
15
69
|
|
|
16
|
-
function formatVisualFeedback(visualFeedback) {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
return `\nLatest visual feedback from prior runs:\n${text}\n`
|
|
70
|
+
function formatVisualFeedback(config, visualFeedback) {
|
|
71
|
+
return formatFeedbackSection(
|
|
72
|
+
'Latest visual feedback from prior runs',
|
|
73
|
+
visualFeedback,
|
|
74
|
+
configMaxLines(config, 'maxVisualFeedbackLines', 20),
|
|
75
|
+
)
|
|
23
76
|
}
|
|
24
77
|
|
|
25
|
-
function formatTesterFeedback(testerFeedback) {
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
78
|
+
function formatTesterFeedback(config, testerFeedback) {
|
|
79
|
+
return formatFeedbackSection(
|
|
80
|
+
'Latest tester feedback from prior runs',
|
|
81
|
+
testerFeedback,
|
|
82
|
+
configMaxLines(config, 'maxTesterFeedbackLines', 32),
|
|
83
|
+
)
|
|
84
|
+
}
|
|
30
85
|
|
|
31
|
-
|
|
86
|
+
function configMaxLines(config, key, fallback) {
|
|
87
|
+
const value = Number(config?.[key])
|
|
88
|
+
return Number.isFinite(value) && value > 0 ? value : fallback
|
|
32
89
|
}
|
|
33
90
|
|
|
34
91
|
function indentBlock(text, prefix = '') {
|
|
@@ -54,97 +111,222 @@ function staleEditRecoveryRules() {
|
|
|
54
111
|
].join('\n')
|
|
55
112
|
}
|
|
56
113
|
|
|
114
|
+
function repoInstructionsAuthorityLine(config, instructionsFile, usesBundledInstructions) {
|
|
115
|
+
if (usesBundledInstructions) {
|
|
116
|
+
return ''
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return `Repo-local instructions in ${displayPath(config, instructionsFile)} are the primary role contract. Follow them over package defaults when they differ.\n`
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function testerPassOwnershipRules(config) {
|
|
123
|
+
if (config.commitMode === 'plan') {
|
|
124
|
+
return {
|
|
125
|
+
successRule: '- If your verdict is PASS, do not run git add or git commit yourself. Provide a commit plan for the harness to execute.',
|
|
126
|
+
isolationRule: '- The commit plan must include only the files related to this task. If the working tree is too messy to isolate safely, use VERDICT: BLOCKED instead of guessing.',
|
|
127
|
+
extraRule: '- If you can produce a PASS, include the commit plan in the same response. Avoid making the harness ask for a second commit-only pass.',
|
|
128
|
+
successFormat: [
|
|
129
|
+
'If and only if your verdict is PASS, also include exactly this commit plan block before the verdict line:',
|
|
130
|
+
'- COMMIT_MESSAGE: <one-line commit message>',
|
|
131
|
+
'- COMMIT_FILES:',
|
|
132
|
+
'- path/to/file-one',
|
|
133
|
+
'- path/to/file-two',
|
|
134
|
+
'',
|
|
135
|
+
'Do not add commentary on the same lines as COMMIT_MESSAGE or COMMIT_FILES. Put only the message value after COMMIT_MESSAGE:, then one file path per line under COMMIT_FILES:.',
|
|
136
|
+
].join('\n'),
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return {
|
|
141
|
+
successRule: '- If your verdict is PASS, stage only the files related to this task and create the git commit yourself before the verdict line.',
|
|
142
|
+
isolationRule: '- If the working tree is too messy to isolate safely, use VERDICT: BLOCKED instead of guessing.',
|
|
143
|
+
extraRule: '- Use git status before committing, stage only the related files, and create one concise commit message in the format <type>(<scope>): <summary> when possible.',
|
|
144
|
+
successFormat: [
|
|
145
|
+
'If and only if your verdict is PASS, include exactly this block before the verdict line after creating the commit:',
|
|
146
|
+
'- COMMIT_CREATED: true',
|
|
147
|
+
'- COMMIT_MESSAGE: <one-line commit message>',
|
|
148
|
+
'- COMMIT_SHA: <short-or-full-sha>',
|
|
149
|
+
].join('\n'),
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
57
153
|
export function buildMainPrompt(config, options = {}) {
|
|
58
154
|
const taskFile = displayPath(config, config.taskFile)
|
|
59
155
|
const instructionsFile = displayPath(config, config.developerInstructionsFile)
|
|
60
|
-
const visualFeedbackSection = formatVisualFeedback(options.visualFeedback)
|
|
61
|
-
const testerFeedbackSection = formatTesterFeedback(options.testerFeedback)
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
156
|
+
const visualFeedbackSection = formatVisualFeedback(config, options.visualFeedback)
|
|
157
|
+
const testerFeedbackSection = formatTesterFeedback(config, options.testerFeedback)
|
|
158
|
+
const authorityLine = repoInstructionsAuthorityLine(
|
|
159
|
+
config,
|
|
160
|
+
config.developerInstructionsFile,
|
|
161
|
+
config.usingBundledDeveloperInstructions,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
if (!config.usingBundledDeveloperInstructions) {
|
|
165
|
+
return `Read ${taskFile} and ${instructionsFile}.
|
|
166
|
+
${authorityLine}${visualFeedbackSection}
|
|
65
167
|
${testerFeedbackSection}
|
|
66
168
|
|
|
67
169
|
Work only on the current phase.
|
|
68
170
|
Select the first unchecked actionable checkbox in phase order.
|
|
69
|
-
Complete
|
|
171
|
+
Complete one coherent task, or at most 2 tightly related unchecked tasks if they are naturally done together.
|
|
70
172
|
|
|
71
|
-
|
|
173
|
+
Harness rules:
|
|
72
174
|
- Start by checking git status so you know whether unrelated changes already exist.
|
|
73
175
|
- Update code, config, and docs only as needed for the selected task.
|
|
74
176
|
- Tick only the checkbox items that are actually completed.
|
|
75
|
-
-
|
|
76
|
-
-
|
|
77
|
-
-
|
|
177
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
178
|
+
- Do not build edits from large sed/grep output or from memory after partial shell reads.
|
|
179
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
78
180
|
- If blocked, add a brief note directly under the relevant task in ${taskFile} explaining the blocker, then stop.
|
|
79
|
-
- Do not create
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
181
|
+
- Do not create the final commit during the developer pass.
|
|
182
|
+
${staleEditRecoveryRules()}
|
|
183
|
+
|
|
184
|
+
Before stopping:
|
|
185
|
+
- Tick completed checkbox items in ${taskFile}.
|
|
186
|
+
- Keep changes scoped to one coherent step.
|
|
187
|
+
- Stop after finishing that step.`
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
return `Read ${taskFile} and ${instructionsFile}.
|
|
191
|
+
${authorityLine}${visualFeedbackSection}
|
|
192
|
+
${testerFeedbackSection}
|
|
193
|
+
|
|
194
|
+
Do one current-phase unchecked task.
|
|
195
|
+
|
|
196
|
+
Rules:
|
|
197
|
+
- Start with git status.
|
|
198
|
+
- Select the first unchecked actionable checkbox in phase order.
|
|
199
|
+
- Keep changes minimal and scoped.
|
|
200
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
201
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
202
|
+
- Do not edit from memory after partial shell output.
|
|
203
|
+
- Tick only completed items.
|
|
204
|
+
- If blocked, note it under the task in ${taskFile} and stop.
|
|
205
|
+
- Do not touch lockfiles, generated files, or unrelated assets.
|
|
206
|
+
- Do not commit in the developer pass.
|
|
207
|
+
${innerLoopValidationRules(config.testCommand)}
|
|
208
|
+
${staleEditRecoveryRules()}
|
|
89
209
|
|
|
90
210
|
Before stopping:
|
|
91
211
|
- Tick completed checkbox items in ${taskFile}.
|
|
92
|
-
|
|
93
|
-
- Stop after finishing that step.`
|
|
212
|
+
- Stop after one coherent step.`
|
|
94
213
|
}
|
|
95
214
|
|
|
96
215
|
export function buildFixPrompt(config, recentVerificationOutput, options = {}) {
|
|
97
216
|
const taskFile = displayPath(config, config.taskFile)
|
|
98
217
|
const instructionsFile = displayPath(config, config.developerInstructionsFile)
|
|
99
|
-
const visualFeedbackSection = formatVisualFeedback(options.visualFeedback)
|
|
100
|
-
const testerFeedbackSection = formatTesterFeedback(options.testerFeedback)
|
|
218
|
+
const visualFeedbackSection = formatVisualFeedback(config, options.visualFeedback)
|
|
219
|
+
const testerFeedbackSection = formatTesterFeedback(config, options.testerFeedback)
|
|
220
|
+
const authorityLine = repoInstructionsAuthorityLine(
|
|
221
|
+
config,
|
|
222
|
+
config.developerInstructionsFile,
|
|
223
|
+
config.usingBundledDeveloperInstructions,
|
|
224
|
+
)
|
|
225
|
+
const findings = clampLines(recentVerificationOutput, configMaxLines(config, 'maxVerificationExcerptLines', 40))
|
|
226
|
+
const largeFileRiskHint = formatLargeFileRiskHint(options.largeFileWarnings)
|
|
227
|
+
|
|
228
|
+
if (!config.usingBundledDeveloperInstructions) {
|
|
229
|
+
return `Read ${taskFile} and ${instructionsFile}.
|
|
230
|
+
${authorityLine}${visualFeedbackSection}
|
|
231
|
+
${testerFeedbackSection}
|
|
232
|
+
${largeFileRiskHint}
|
|
233
|
+
|
|
234
|
+
The tester step found a real problem in the current implementation. Fix only the product behavior related to the current phase and current task.
|
|
235
|
+
|
|
236
|
+
Recent tester findings:
|
|
237
|
+
${findings}
|
|
238
|
+
|
|
239
|
+
Harness rules:
|
|
240
|
+
- Start by checking git status so you know which files are already dirty.
|
|
241
|
+
- Do not paper over product bugs by weakening tests.
|
|
242
|
+
- Keep changes minimal and focused on the failing behavior.
|
|
243
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
244
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
245
|
+
- Do not edit from memory after partial shell output.
|
|
246
|
+
- Do not perform speculative cleanup or unrelated refactors in this pass.
|
|
247
|
+
- Do not create the final commit during the developer fix pass.
|
|
248
|
+
${staleEditRecoveryRules()}
|
|
249
|
+
|
|
250
|
+
Before stopping:
|
|
251
|
+
- Tick any checkbox in ${taskFile} only if it is now actually complete.
|
|
252
|
+
- Stop after one coherent fix.`
|
|
253
|
+
}
|
|
101
254
|
|
|
102
255
|
return `Read ${taskFile} and ${instructionsFile}.
|
|
103
|
-
${visualFeedbackSection}
|
|
256
|
+
${authorityLine}${visualFeedbackSection}
|
|
104
257
|
${testerFeedbackSection}
|
|
258
|
+
${largeFileRiskHint}
|
|
105
259
|
|
|
106
260
|
The tester step found a real problem in the current implementation. Fix only the product behavior related to the current phase and current task.
|
|
107
261
|
|
|
108
262
|
Recent tester findings:
|
|
109
|
-
${
|
|
263
|
+
${findings}
|
|
110
264
|
|
|
111
265
|
Rules:
|
|
112
|
-
- Start
|
|
113
|
-
-
|
|
114
|
-
-
|
|
115
|
-
-
|
|
116
|
-
-
|
|
117
|
-
- Do not edit
|
|
118
|
-
-
|
|
119
|
-
|
|
120
|
-
${
|
|
121
|
-
- Trust tool results over your own guesses. If a read tool shows file contents, use that exact output instead of arguing with it.
|
|
122
|
-
- Do not repeatedly rewrite the same file because you suspect a formatting issue. Read once, identify the exact mismatch, then make one focused fix.
|
|
123
|
-
${indentBlock(staleEditRecoveryRules(), '\t')}
|
|
124
|
-
- Do not create the final commit during the developer fix pass. Leave the repaired diff for the tester to re-check and commit if it passes.
|
|
266
|
+
- Start with git status.
|
|
267
|
+
- Keep the fix narrow.
|
|
268
|
+
- Do not weaken tests to hide product bugs.
|
|
269
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
270
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
271
|
+
- Do not edit from memory after partial shell output.
|
|
272
|
+
- Do not perform speculative cleanup or unrelated refactors.
|
|
273
|
+
- Do not create the final commit.
|
|
274
|
+
${staleEditRecoveryRules()}
|
|
125
275
|
|
|
126
276
|
Before stopping:
|
|
127
|
-
|
|
128
|
-
|
|
277
|
+
- Tick any checkbox in ${taskFile} only if it is now actually complete.
|
|
278
|
+
- Stop after one coherent fix.`
|
|
129
279
|
}
|
|
130
280
|
|
|
131
281
|
export function buildSteeringPrompt(config, reason, options = {}) {
|
|
132
282
|
const taskFile = displayPath(config, config.taskFile)
|
|
133
|
-
const
|
|
134
|
-
const
|
|
283
|
+
const instructionsFile = displayPath(config, config.developerInstructionsFile)
|
|
284
|
+
const visualFeedbackSection = formatVisualFeedback(config, options.visualFeedback)
|
|
285
|
+
const testerFeedbackSection = formatTesterFeedback(config, options.testerFeedback)
|
|
286
|
+
const authorityLine = repoInstructionsAuthorityLine(
|
|
287
|
+
config,
|
|
288
|
+
config.developerInstructionsFile,
|
|
289
|
+
config.usingBundledDeveloperInstructions,
|
|
290
|
+
)
|
|
291
|
+
const largeFileRiskHint = formatLargeFileRiskHint(options.largeFileWarnings)
|
|
292
|
+
|
|
293
|
+
if (!config.usingBundledDeveloperInstructions) {
|
|
294
|
+
return `Continue from the current repo state.
|
|
295
|
+
Read ${taskFile} and ${instructionsFile}.
|
|
296
|
+
${authorityLine}${visualFeedbackSection}
|
|
297
|
+
${testerFeedbackSection}
|
|
298
|
+
${largeFileRiskHint}
|
|
299
|
+
|
|
300
|
+
Reason for this follow-up: ${reason}
|
|
301
|
+
|
|
302
|
+
Select the first unchecked actionable checkbox in the current phase, complete one coherent task, tick completed items, run any repo-local verification required by your role instructions, and stop.
|
|
303
|
+
|
|
304
|
+
Additional harness guardrails:
|
|
305
|
+
- Start by checking git status.
|
|
306
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
307
|
+
- Do not repeat the same tool call over and over.
|
|
308
|
+
- If you already read a file, use that context instead of rereading it unless something changed.
|
|
309
|
+
- If an edit fails once, reread the file before retrying. Do not repeat the same exact edit attempt.
|
|
310
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
311
|
+
- If you are stuck, make the smallest decisive next action or stop and state the blocker.`
|
|
312
|
+
}
|
|
135
313
|
|
|
136
314
|
return `Continue from the current repo state.
|
|
137
|
-
${
|
|
315
|
+
Read ${taskFile} and ${instructionsFile}.
|
|
316
|
+
${authorityLine}${visualFeedbackSection}
|
|
138
317
|
${testerFeedbackSection}
|
|
318
|
+
${largeFileRiskHint}
|
|
139
319
|
|
|
140
320
|
Reason for this follow-up: ${reason}
|
|
141
321
|
|
|
142
|
-
|
|
322
|
+
Select the first unchecked actionable checkbox in the current phase, complete one coherent task, tick completed items, run verification, and stop.
|
|
143
323
|
|
|
144
324
|
Additional guardrails:
|
|
325
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
145
326
|
- Do not repeat the same tool call over and over.
|
|
146
327
|
- If you already read a file, use that context instead of rereading it unless something changed.
|
|
147
328
|
- If an edit fails once, reread the file before retrying. Do not repeat the same exact edit attempt.
|
|
329
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
148
330
|
- Prefer the configured smoke verification path and one narrow targeted check over long full-flow Playwright specs.
|
|
149
331
|
- If you are stuck, make the smallest decisive next action or stop and state the blocker.`
|
|
150
332
|
}
|
|
@@ -157,22 +339,79 @@ export function buildTesterPrompt(config, {
|
|
|
157
339
|
reason = 'tester_review',
|
|
158
340
|
visualFeedback = '',
|
|
159
341
|
testerFeedback = '',
|
|
342
|
+
largeFileWarnings = [],
|
|
160
343
|
}) {
|
|
161
344
|
const taskFile = displayPath(config, config.taskFile)
|
|
162
345
|
const instructionsFile = displayPath(config, config.testerInstructionsFile)
|
|
163
|
-
const visualFeedbackSection = formatVisualFeedback(visualFeedback)
|
|
164
|
-
const testerFeedbackSection = formatTesterFeedback(testerFeedback)
|
|
165
|
-
const changedFilesSection =
|
|
166
|
-
|
|
167
|
-
|
|
346
|
+
const visualFeedbackSection = formatVisualFeedback(config, visualFeedback)
|
|
347
|
+
const testerFeedbackSection = formatTesterFeedback(config, testerFeedback)
|
|
348
|
+
const changedFilesSection = formatChangedFilesSection(
|
|
349
|
+
changedFiles,
|
|
350
|
+
configMaxLines(config, 'maxPromptChangedFiles', 10),
|
|
351
|
+
)
|
|
352
|
+
const compactDeveloperNotes = clampLines(
|
|
353
|
+
developerNotes || '(none provided)',
|
|
354
|
+
configMaxLines(config, 'maxPromptNotesLines', 16),
|
|
355
|
+
)
|
|
168
356
|
const verificationCommand = config.testCommand.trim() === '' ? '(not configured)' : config.testCommand
|
|
169
357
|
const visualCaptureNote = config.visualReviewEnabled
|
|
170
|
-
? `\n-
|
|
358
|
+
? `\n- Keep the screenshot capture flow working so the harness still produces current visual artifacts for review.`
|
|
171
359
|
: ''
|
|
360
|
+
const authorityLine = repoInstructionsAuthorityLine(
|
|
361
|
+
config,
|
|
362
|
+
config.testerInstructionsFile,
|
|
363
|
+
config.usingBundledTesterInstructions,
|
|
364
|
+
)
|
|
365
|
+
const passOwnership = testerPassOwnershipRules(config)
|
|
366
|
+
const largeFileRiskHint = formatLargeFileRiskHint(largeFileWarnings)
|
|
367
|
+
|
|
368
|
+
if (!config.usingBundledTesterInstructions) {
|
|
369
|
+
return `Read ${taskFile} and ${instructionsFile}.
|
|
370
|
+
${authorityLine}${visualFeedbackSection}
|
|
371
|
+
${testerFeedbackSection}
|
|
372
|
+
${largeFileRiskHint}
|
|
373
|
+
|
|
374
|
+
You are the TESTER role. You are reviewing the most recent developer work from an independent quality and functionality perspective.
|
|
375
|
+
|
|
376
|
+
Current phase: ${phase}
|
|
377
|
+
Current task: ${task}
|
|
378
|
+
Reason for this tester pass: ${reason}
|
|
379
|
+
|
|
380
|
+
Developer notes:
|
|
381
|
+
${compactDeveloperNotes}
|
|
382
|
+
|
|
383
|
+
Files changed by the developer:
|
|
384
|
+
${changedFilesSection}
|
|
385
|
+
|
|
386
|
+
Rules:
|
|
387
|
+
- Start with git status.
|
|
388
|
+
- Follow repo-local tester instructions for what to verify and which commands to run.
|
|
389
|
+
- Prefer one focused review pass.
|
|
390
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
391
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
392
|
+
- If blocked or inconclusive, return VERDICT: BLOCKED.
|
|
393
|
+
- Do not hide real bugs with brittle tests.
|
|
394
|
+
- ${passOwnership.successRule.slice(2)}
|
|
395
|
+
- ${passOwnership.isolationRule.slice(2)}
|
|
396
|
+
- ${passOwnership.extraRule.slice(2)}${visualCaptureNote}
|
|
397
|
+
|
|
398
|
+
Before the verdict line, include a short section in plain text with:
|
|
399
|
+
- Observed flow:
|
|
400
|
+
- Player-facing result:
|
|
401
|
+
- Regression check:
|
|
402
|
+
|
|
403
|
+
${passOwnership.successFormat}
|
|
404
|
+
|
|
405
|
+
Before stopping, end your final response with exactly one verdict line:
|
|
406
|
+
- VERDICT: PASS
|
|
407
|
+
- VERDICT: FAIL
|
|
408
|
+
- VERDICT: BLOCKED`
|
|
409
|
+
}
|
|
172
410
|
|
|
173
411
|
return `Read ${taskFile} and ${instructionsFile}.
|
|
174
|
-
${visualFeedbackSection}
|
|
412
|
+
${authorityLine}${visualFeedbackSection}
|
|
175
413
|
${testerFeedbackSection}
|
|
414
|
+
${largeFileRiskHint}
|
|
176
415
|
|
|
177
416
|
You are the TESTER role. You are reviewing the most recent developer work from an independent quality and functionality perspective.
|
|
178
417
|
|
|
@@ -181,47 +420,30 @@ Current task: ${task}
|
|
|
181
420
|
Reason for this tester pass: ${reason}
|
|
182
421
|
|
|
183
422
|
Developer notes:
|
|
184
|
-
${
|
|
423
|
+
${compactDeveloperNotes}
|
|
185
424
|
|
|
186
425
|
Files changed by the developer:
|
|
187
426
|
${changedFilesSection}
|
|
188
427
|
|
|
189
|
-
|
|
190
|
-
-
|
|
191
|
-
-
|
|
192
|
-
- Prefer browser-driven checks and targeted tests over broad rewrites.
|
|
428
|
+
Rules:
|
|
429
|
+
- Start with git status.
|
|
430
|
+
- Use read for source inspection. Use bash only for git, tests, and narrow diagnostics.
|
|
193
431
|
- Run the repo verification command yourself: ${verificationCommand}
|
|
194
432
|
${indentBlock(innerLoopValidationRules(verificationCommand), '\t')}
|
|
195
|
-
-
|
|
196
|
-
-
|
|
197
|
-
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
- Prefer editing tests, fixtures, and minimal observability hooks.
|
|
203
|
-
- Avoid editing product code unless a tiny testability hook is essential and does not change user-facing behavior.
|
|
204
|
-
- If you find a real product bug or incomplete functionality, do not hide it with brittle tests.
|
|
205
|
-
- If blocked by tooling or environment, state the blocker clearly.
|
|
206
|
-
- Trust tool results over your own guesses. If a read tool shows file contents, use that exact output instead of arguing with it.
|
|
207
|
-
${indentBlock(staleEditRecoveryRules(), '\t')}
|
|
208
|
-
- Treat "the player cannot start, continue, select, buy, unlock, or exit correctly" as a FAIL even if the code compiles.
|
|
209
|
-
- Before PASS, identify at least one concrete player-visible success path you exercised and one thing you checked for regressions.
|
|
210
|
-
- If your verdict is PASS and the verification command succeeded, do not run git add or git commit yourself. Instead, provide a commit plan for the harness to execute.
|
|
211
|
-
- The commit plan must include only the files related to this task. If the working tree is too messy to isolate safely, use VERDICT: BLOCKED instead of guessing.
|
|
212
|
-
- Use a concise commit message in the format <type>(<scope>): <summary> when possible.
|
|
213
|
-
- Stop after one coherent tester pass.
|
|
433
|
+
- Prefer one focused browser-driven review pass.
|
|
434
|
+
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
435
|
+
- Do not hide real bugs with brittle tests.
|
|
436
|
+
- If blocked or inconclusive, return VERDICT: BLOCKED.
|
|
437
|
+
${indentBlock(passOwnership.successRule, '\t')}
|
|
438
|
+
${indentBlock(passOwnership.isolationRule, '\t')}
|
|
439
|
+
${indentBlock(passOwnership.extraRule, '\t')}${visualCaptureNote}
|
|
214
440
|
|
|
215
441
|
Before the verdict line, include a short section in plain text with:
|
|
216
442
|
- Observed flow:
|
|
217
443
|
- Player-facing result:
|
|
218
444
|
- Regression check:
|
|
219
445
|
|
|
220
|
-
|
|
221
|
-
- COMMIT_MESSAGE: <one-line commit message>
|
|
222
|
-
- COMMIT_FILES:
|
|
223
|
-
- path/to/file-one
|
|
224
|
-
- path/to/file-two
|
|
446
|
+
${indentBlock(passOwnership.successFormat, '\t')}
|
|
225
447
|
|
|
226
448
|
Before stopping, end your final response with exactly one verdict line:
|
|
227
449
|
- VERDICT: PASS
|
|
@@ -237,18 +459,31 @@ export function buildCommitPrompt(config, {
|
|
|
237
459
|
reason = 'tester_passed_without_commit',
|
|
238
460
|
visualFeedback = '',
|
|
239
461
|
testerFeedback = '',
|
|
462
|
+
largeFileWarnings = [],
|
|
240
463
|
}) {
|
|
241
464
|
const taskFile = displayPath(config, config.taskFile)
|
|
242
465
|
const instructionsFile = displayPath(config, config.testerInstructionsFile)
|
|
243
|
-
const visualFeedbackSection = formatVisualFeedback(visualFeedback)
|
|
244
|
-
const testerFeedbackSection = formatTesterFeedback(testerFeedback)
|
|
245
|
-
const
|
|
246
|
-
|
|
247
|
-
|
|
466
|
+
const visualFeedbackSection = formatVisualFeedback(config, visualFeedback)
|
|
467
|
+
const testerFeedbackSection = formatTesterFeedback(config, testerFeedback)
|
|
468
|
+
const authorityLine = repoInstructionsAuthorityLine(
|
|
469
|
+
config,
|
|
470
|
+
config.testerInstructionsFile,
|
|
471
|
+
config.usingBundledTesterInstructions,
|
|
472
|
+
)
|
|
473
|
+
const changedFilesSection = formatChangedFilesSection(
|
|
474
|
+
changedFiles,
|
|
475
|
+
configMaxLines(config, 'maxPromptChangedFiles', 10),
|
|
476
|
+
)
|
|
477
|
+
const compactDeveloperNotes = clampLines(
|
|
478
|
+
developerNotes || '(none provided)',
|
|
479
|
+
configMaxLines(config, 'maxPromptNotesLines', 16),
|
|
480
|
+
)
|
|
481
|
+
const largeFileRiskHint = formatLargeFileRiskHint(largeFileWarnings)
|
|
248
482
|
|
|
249
483
|
return `Read ${taskFile} and ${instructionsFile}.
|
|
250
|
-
${visualFeedbackSection}
|
|
484
|
+
${authorityLine}${visualFeedbackSection}
|
|
251
485
|
${testerFeedbackSection}
|
|
486
|
+
${largeFileRiskHint}
|
|
252
487
|
|
|
253
488
|
You are the TESTER role. The implementation already passed functional review, but the final commit was not created.
|
|
254
489
|
|
|
@@ -257,7 +492,7 @@ Current task: ${task}
|
|
|
257
492
|
Reason for this follow-up: ${reason}
|
|
258
493
|
|
|
259
494
|
Developer/tester notes:
|
|
260
|
-
${
|
|
495
|
+
${compactDeveloperNotes}
|
|
261
496
|
|
|
262
497
|
Files currently dirty:
|
|
263
498
|
${changedFilesSection}
|
|
@@ -265,10 +500,9 @@ ${changedFilesSection}
|
|
|
265
500
|
Your job now is commit-plan finalization only. Do not run git commands yourself.
|
|
266
501
|
|
|
267
502
|
Rules:
|
|
268
|
-
- Start
|
|
503
|
+
- Start with git status.
|
|
269
504
|
- Do not change product code, tests, docs, or TODO items in this pass.
|
|
270
505
|
- Select only the files related to this task.
|
|
271
|
-
- Use a concise commit message in the format <type>(<scope>): <summary> when possible.
|
|
272
506
|
- If the working tree is too messy to isolate safely, do not guess. End with VERDICT: BLOCKED.
|
|
273
507
|
|
|
274
508
|
If you can isolate the correct commit, include exactly this block before the verdict line:
|
|
@@ -277,6 +511,8 @@ If you can isolate the correct commit, include exactly this block before the ver
|
|
|
277
511
|
- path/to/file-one
|
|
278
512
|
- path/to/file-two
|
|
279
513
|
|
|
514
|
+
Do not add commentary on the same lines as COMMIT_MESSAGE or COMMIT_FILES. Put only the message value after COMMIT_MESSAGE:, then one file path per line under COMMIT_FILES:.
|
|
515
|
+
|
|
280
516
|
Before stopping, end your final response with exactly one verdict line:
|
|
281
517
|
- VERDICT: PASS
|
|
282
518
|
- VERDICT: BLOCKED`
|
package/src/pi-repo.mjs
CHANGED
|
@@ -74,13 +74,16 @@ function normalizeStatusPath(statusPath) {
|
|
|
74
74
|
}
|
|
75
75
|
|
|
76
76
|
function parseStatusLine(line) {
|
|
77
|
-
|
|
78
|
-
|
|
77
|
+
if (line.trim() === '') {
|
|
78
|
+
return null
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (line.length < 4 || line[2] !== ' ') {
|
|
79
82
|
return null
|
|
80
83
|
}
|
|
81
84
|
|
|
82
85
|
const renamedMarker = ' -> '
|
|
83
|
-
const pathText =
|
|
86
|
+
const pathText = line.slice(3)
|
|
84
87
|
if (pathText.includes(renamedMarker)) {
|
|
85
88
|
const [, nextPath] = pathText.split(renamedMarker)
|
|
86
89
|
return normalizeStatusPath(nextPath)
|
|
@@ -222,6 +225,65 @@ export function findFirstUncheckedTaskInfo(taskFile) {
|
|
|
222
225
|
}
|
|
223
226
|
}
|
|
224
227
|
|
|
228
|
+
function countLines(text) {
|
|
229
|
+
const normalized = String(text ?? '')
|
|
230
|
+
if (normalized === '') {
|
|
231
|
+
return 0
|
|
232
|
+
}
|
|
233
|
+
return normalized.split('\n').length
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function isSpecLikeFile(filePath) {
|
|
237
|
+
const normalized = String(filePath ?? '').replaceAll('\\', '/')
|
|
238
|
+
return /(^|\/)(e2e|test|tests|spec|specs)\//.test(normalized)
|
|
239
|
+
|| /\.(spec|test)\.[cm]?[jt]sx?$/.test(normalized)
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
export function collectLargeFileWarnings(cwd, files, {
|
|
243
|
+
largeFileWarningLines = 500,
|
|
244
|
+
largeSpecWarningLines = 300,
|
|
245
|
+
} = {}) {
|
|
246
|
+
const warnings = []
|
|
247
|
+
const seen = new Set()
|
|
248
|
+
|
|
249
|
+
for (const file of Array.isArray(files) ? files : []) {
|
|
250
|
+
const relativePath = String(file ?? '').trim()
|
|
251
|
+
if (relativePath === '' || seen.has(relativePath)) {
|
|
252
|
+
continue
|
|
253
|
+
}
|
|
254
|
+
seen.add(relativePath)
|
|
255
|
+
|
|
256
|
+
const absolutePath = path.resolve(cwd, relativePath)
|
|
257
|
+
let raw = ''
|
|
258
|
+
try {
|
|
259
|
+
raw = readFileSync(absolutePath, 'utf8')
|
|
260
|
+
} catch {
|
|
261
|
+
continue
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
const lineCount = countLines(raw)
|
|
265
|
+
const isSpec = isSpecLikeFile(relativePath)
|
|
266
|
+
if (isSpec && lineCount >= largeSpecWarningLines) {
|
|
267
|
+
warnings.push({
|
|
268
|
+
file: relativePath,
|
|
269
|
+
lineCount,
|
|
270
|
+
kind: 'large_spec',
|
|
271
|
+
})
|
|
272
|
+
continue
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if (lineCount >= largeFileWarningLines) {
|
|
276
|
+
warnings.push({
|
|
277
|
+
file: relativePath,
|
|
278
|
+
lineCount,
|
|
279
|
+
kind: 'large_file',
|
|
280
|
+
})
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
return warnings.sort((left, right) => right.lineCount - left.lineCount)
|
|
285
|
+
}
|
|
286
|
+
|
|
225
287
|
export async function runShellCommand({
|
|
226
288
|
cwd,
|
|
227
289
|
command,
|