@sebastianandreasson/pi-autonomous-agents 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/docs/PI_SUPERVISOR.md +1 -1
- package/package.json +3 -2
- package/src/pi-debug-live.mjs +52 -16
- package/src/pi-prompts.mjs +38 -2
- package/src/pi-supervisor.mjs +37 -6
- package/src/pi-visualizer-server.mjs +23 -5
- package/src/pi-visualizer-shared.mjs +29 -10
- package/visualizer-ui/dist/assets/index-C5V0jXPE.css +1 -0
- package/visualizer-ui/dist/assets/index-CpHvuv0C.js +12 -0
- package/visualizer-ui/dist/index.html +2 -2
- package/visualizer-ui/dist/assets/index-C398cGuP.js +0 -12
- package/visualizer-ui/dist/assets/index-DuJxYqkl.css +0 -1
package/README.md
CHANGED
|
@@ -326,6 +326,13 @@ For local visualizer iteration against fake live SDK agent:
|
|
|
326
326
|
npm run debug:live-ui
|
|
327
327
|
```
|
|
328
328
|
|
|
329
|
+
Scenario variants:
|
|
330
|
+
|
|
331
|
+
```bash
|
|
332
|
+
node src/cli.mjs debug-live --reset --scenario noisy --task-count 24
|
|
333
|
+
node src/cli.mjs debug-live --reset --scenario retry
|
|
334
|
+
```
|
|
335
|
+
|
|
329
336
|
For React/Vite visualizer UI dev loop:
|
|
330
337
|
|
|
331
338
|
```bash
|
|
@@ -338,6 +345,8 @@ For production visualizer UI build:
|
|
|
338
345
|
npm run build:visualizer:ui
|
|
339
346
|
```
|
|
340
347
|
|
|
348
|
+
Publish now auto-runs check, tests, and UI build via `prepublishOnly`.
|
|
349
|
+
|
|
341
350
|
This seeds `.pi-debug/live-ui/`, runs harness there with streaming fake SDK fixture, hosts visualizer, and gives stable local repro loop for UI work. React app lives in `visualizer-ui/`. Visualizer server now serves built assets from `visualizer-ui/dist/` and falls back to build-instructions page if build artifacts are missing.
|
|
342
351
|
|
|
343
352
|
See `docs/VISUALIZER_UI_PLAN.md` for migration plan.
|
package/docs/PI_SUPERVISOR.md
CHANGED
|
@@ -62,7 +62,7 @@ The package reads `PI_CONFIG_FILE` if provided. Otherwise it falls back to the b
|
|
|
62
62
|
|
|
63
63
|
Visualizer reads active-run lock, TODO file, per-run state, per-run iteration summary, per-run last output snapshot, live feed JSONL, and telemetry to show current stage plus historical runs.
|
|
64
64
|
|
|
65
|
-
For local UI iteration in this package repo, use `pi-harness debug-live` to run against seeded fake live SDK sandbox.
|
|
65
|
+
For local UI iteration in this package repo, use `pi-harness debug-live` to run against seeded fake live SDK sandbox. Useful variants: `--scenario noisy`, `--scenario retry`, `--task-count 24`.
|
|
66
66
|
|
|
67
67
|
## Config Contract
|
|
68
68
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sebastianandreasson/pi-autonomous-agents",
|
|
3
3
|
"private": false,
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.11.0",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"description": "Portable unattended PI harness for developer/tester/visual-review loops.",
|
|
7
7
|
"license": "MIT",
|
|
@@ -23,7 +23,8 @@
|
|
|
23
23
|
"test": "node --test test/pi-heartbeat.test.mjs test/pi-lifecycle.test.mjs test/pi-role-models.test.mjs test/pi-flow.test.mjs test/pi-history.test.mjs test/pi-prompts.test.mjs test/pi-preflight.test.mjs test/pi-repo.test.mjs test/pi-sdk-supervisor.test.mjs test/pi-sdk-turn.test.mjs test/pi-telemetry.test.mjs test/pi-visualizer-shared.test.mjs",
|
|
24
24
|
"debug:live-ui": "node src/cli.mjs debug-live --reset",
|
|
25
25
|
"dev:visualizer:ui": "npm --prefix visualizer-ui run dev",
|
|
26
|
-
"build:visualizer:ui": "npm --prefix visualizer-ui run build"
|
|
26
|
+
"build:visualizer:ui": "npm --prefix visualizer-ui run build",
|
|
27
|
+
"prepublishOnly": "npm run check && npm test && npm run build:visualizer:ui"
|
|
27
28
|
},
|
|
28
29
|
"files": [
|
|
29
30
|
"src",
|
package/src/pi-debug-live.mjs
CHANGED
|
@@ -12,11 +12,51 @@ const cliFile = path.join(scriptDir, 'cli.mjs')
|
|
|
12
12
|
const fakePiFile = path.join(packageRoot, 'test', 'fixtures', 'fake-pi.mjs')
|
|
13
13
|
const fakeLiveSdkFile = path.join(packageRoot, 'test', 'fixtures', 'fake-live-pi-sdk.mjs')
|
|
14
14
|
const sandboxDir = path.join(packageRoot, '.pi-debug', 'live-ui')
|
|
15
|
+
const DEFAULT_TASK_COUNT = 12
|
|
15
16
|
|
|
16
17
|
function shellQuote(value) {
|
|
17
18
|
return JSON.stringify(String(value))
|
|
18
19
|
}
|
|
19
20
|
|
|
21
|
+
function readFlagValue(flag) {
|
|
22
|
+
const index = process.argv.indexOf(flag)
|
|
23
|
+
if (index === -1) {
|
|
24
|
+
return ''
|
|
25
|
+
}
|
|
26
|
+
return String(process.argv[index + 1] ?? '').trim()
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function readScenario() {
|
|
30
|
+
const value = readFlagValue('--scenario') || process.env.PI_FAKE_LIVE_SCENARIO || 'default'
|
|
31
|
+
return String(value).trim() || 'default'
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function readTaskCount() {
|
|
35
|
+
const raw = Number.parseInt(readFlagValue('--task-count') || process.env.PI_DEBUG_TASK_COUNT || `${DEFAULT_TASK_COUNT}`, 10)
|
|
36
|
+
return Number.isFinite(raw) && raw > 0 ? raw : DEFAULT_TASK_COUNT
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function buildTodoLines(taskCount) {
|
|
40
|
+
const lines = []
|
|
41
|
+
for (let index = 1; index <= taskCount; index += 1) {
|
|
42
|
+
const phase = index <= Math.ceil(taskCount / 3)
|
|
43
|
+
? 'Phase 1'
|
|
44
|
+
: index <= Math.ceil((taskCount * 2) / 3)
|
|
45
|
+
? 'Phase 2'
|
|
46
|
+
: 'Phase 3'
|
|
47
|
+
const label = `Fake live task ${index}`
|
|
48
|
+
if (lines.length === 0 || lines[lines.length - 1] !== `## ${phase}`) {
|
|
49
|
+
if (lines.length > 0) {
|
|
50
|
+
lines.push('')
|
|
51
|
+
}
|
|
52
|
+
lines.push(`## ${phase}`)
|
|
53
|
+
lines.push('')
|
|
54
|
+
}
|
|
55
|
+
lines.push(`- [ ] ${label}`)
|
|
56
|
+
}
|
|
57
|
+
return `${lines.join('\n')}\n`
|
|
58
|
+
}
|
|
59
|
+
|
|
20
60
|
async function ensureRepo(cwd) {
|
|
21
61
|
try {
|
|
22
62
|
execFileSync('git', ['rev-parse', '--is-inside-work-tree'], { cwd, stdio: 'ignore' })
|
|
@@ -27,21 +67,11 @@ async function ensureRepo(cwd) {
|
|
|
27
67
|
}
|
|
28
68
|
}
|
|
29
69
|
|
|
30
|
-
async function seedFiles(cwd) {
|
|
70
|
+
async function seedFiles(cwd, { taskCount, scenario }) {
|
|
31
71
|
await fs.mkdir(path.join(cwd, 'pi'), { recursive: true })
|
|
32
|
-
await fs.writeFile(path.join(cwd, 'TODOS.md'),
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
'- [ ] Fake live task one',
|
|
36
|
-
'- [ ] Fake live task two',
|
|
37
|
-
'- [ ] Fake live task three',
|
|
38
|
-
'',
|
|
39
|
-
'## Phase 2',
|
|
40
|
-
'',
|
|
41
|
-
'- [ ] Fake live task four',
|
|
42
|
-
].join('\n') + '\n', 'utf8')
|
|
43
|
-
await fs.writeFile(path.join(cwd, 'DEVELOPER.md'), 'Developer instructions for local visualizer debugging.\n', 'utf8')
|
|
44
|
-
await fs.writeFile(path.join(cwd, 'TESTER.md'), 'Tester instructions for local visualizer debugging.\n', 'utf8')
|
|
72
|
+
await fs.writeFile(path.join(cwd, 'TODOS.md'), buildTodoLines(taskCount), 'utf8')
|
|
73
|
+
await fs.writeFile(path.join(cwd, 'DEVELOPER.md'), `Developer instructions for local visualizer debugging.\nScenario: ${scenario}\n`, 'utf8')
|
|
74
|
+
await fs.writeFile(path.join(cwd, 'TESTER.md'), `Tester instructions for local visualizer debugging.\nScenario: ${scenario}\n`, 'utf8')
|
|
45
75
|
await fs.writeFile(path.join(cwd, 'pi.config.json'), `${JSON.stringify({
|
|
46
76
|
transport: 'sdk',
|
|
47
77
|
taskFile: 'TODOS.md',
|
|
@@ -63,7 +93,7 @@ async function seedFiles(cwd) {
|
|
|
63
93
|
toolContinueAfterSeconds: 3600,
|
|
64
94
|
toolNoEventTimeoutSeconds: 3600,
|
|
65
95
|
sleepBetweenSeconds: 1,
|
|
66
|
-
maxIterations: 20,
|
|
96
|
+
maxIterations: Math.max(taskCount * 3, 20),
|
|
67
97
|
}, null, 2)}\n`, 'utf8')
|
|
68
98
|
}
|
|
69
99
|
|
|
@@ -78,17 +108,22 @@ async function ensureInitialCommit(cwd) {
|
|
|
78
108
|
|
|
79
109
|
async function main() {
|
|
80
110
|
const reset = process.argv.includes('--reset')
|
|
111
|
+
const scenario = readScenario()
|
|
112
|
+
const taskCount = readTaskCount()
|
|
113
|
+
|
|
81
114
|
if (reset) {
|
|
82
115
|
await fs.rm(sandboxDir, { recursive: true, force: true })
|
|
83
116
|
}
|
|
84
117
|
|
|
85
118
|
await fs.mkdir(sandboxDir, { recursive: true })
|
|
86
119
|
await ensureRepo(sandboxDir)
|
|
87
|
-
await seedFiles(sandboxDir)
|
|
120
|
+
await seedFiles(sandboxDir, { taskCount, scenario })
|
|
88
121
|
await ensureInitialCommit(sandboxDir)
|
|
89
122
|
|
|
90
123
|
process.stdout.write(`PI debug sandbox: ${sandboxDir}\n`)
|
|
91
124
|
process.stdout.write(`Using fake live SDK fixture: ${fakeLiveSdkFile}\n`)
|
|
125
|
+
process.stdout.write(`Scenario: ${scenario}\n`)
|
|
126
|
+
process.stdout.write(`Task count: ${taskCount}\n`)
|
|
92
127
|
|
|
93
128
|
const child = spawn(process.execPath, [cliFile, 'run'], {
|
|
94
129
|
cwd: sandboxDir,
|
|
@@ -96,6 +131,7 @@ async function main() {
|
|
|
96
131
|
...process.env,
|
|
97
132
|
PI_CONFIG_FILE: 'pi.config.json',
|
|
98
133
|
PI_SDK_MODULE: fakeLiveSdkFile,
|
|
134
|
+
PI_FAKE_LIVE_SCENARIO: scenario,
|
|
99
135
|
PI_VISUALIZER_HOST: process.env.PI_VISUALIZER_HOST || '127.0.0.1',
|
|
100
136
|
PI_VISUALIZER_PORT: process.env.PI_VISUALIZER_PORT || '4317',
|
|
101
137
|
},
|
package/src/pi-prompts.mjs
CHANGED
|
@@ -119,6 +119,36 @@ function repoInstructionsAuthorityLine(config, instructionsFile, usesBundledInst
|
|
|
119
119
|
return `Repo-local instructions in ${displayPath(config, instructionsFile)} are the primary role contract. Follow them over package defaults when they differ.\n`
|
|
120
120
|
}
|
|
121
121
|
|
|
122
|
+
export function classifyTaskType(task) {
|
|
123
|
+
const text = String(task ?? '').trim().toLowerCase()
|
|
124
|
+
if (text === '') {
|
|
125
|
+
return 'general'
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
if (
|
|
129
|
+
/\b(write|add|create|implement|expand|improve|fix|update)\b.*\b(test|tests|coverage|regression test|spec|specs)\b/.test(text)
|
|
130
|
+
|| /\b(test|tests|coverage|regression test|spec|specs)\b.*\b(write|add|create|implement|expand|improve|fix|update)\b/.test(text)
|
|
131
|
+
) {
|
|
132
|
+
return 'test'
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
return 'general'
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function formatTaskTypeGuidance(taskType) {
|
|
139
|
+
if (taskType !== 'test') {
|
|
140
|
+
return ''
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return [
|
|
144
|
+
'Test-task guidance:',
|
|
145
|
+
'- This TODO is primarily test-focused. Do not fail solely because changes are mostly or entirely tests.',
|
|
146
|
+
'- PASS if the new or updated test adds meaningful behavioral or regression coverage and verification passes.',
|
|
147
|
+
'- FAIL if the test is brittle, redundant, weakly asserted, or not tied to real behavior.',
|
|
148
|
+
'- Prefer checking whether the test would have failed before the change, or whether developer notes justify why missing coverage mattered.',
|
|
149
|
+
].join('\n')
|
|
150
|
+
}
|
|
151
|
+
|
|
122
152
|
function testerPassOwnershipRules(config) {
|
|
123
153
|
if (config.commitMode === 'plan') {
|
|
124
154
|
return {
|
|
@@ -353,6 +383,9 @@ export function buildTesterPrompt(config, {
|
|
|
353
383
|
developerNotes || '(none provided)',
|
|
354
384
|
configMaxLines(config, 'maxPromptNotesLines', 16),
|
|
355
385
|
)
|
|
386
|
+
const taskType = classifyTaskType(task)
|
|
387
|
+
const taskTypeLabel = taskType === 'test' ? 'test-focused' : 'general'
|
|
388
|
+
const taskTypeGuidance = formatTaskTypeGuidance(taskType)
|
|
356
389
|
const verificationCommand = config.testCommand.trim() === '' ? '(not configured)' : config.testCommand
|
|
357
390
|
const visualCaptureNote = config.visualReviewEnabled
|
|
358
391
|
? `\n- Keep the screenshot capture flow working so the harness still produces current visual artifacts for review.`
|
|
@@ -364,6 +397,7 @@ export function buildTesterPrompt(config, {
|
|
|
364
397
|
)
|
|
365
398
|
const passOwnership = testerPassOwnershipRules(config)
|
|
366
399
|
const largeFileRiskHint = formatLargeFileRiskHint(largeFileWarnings)
|
|
400
|
+
const taskTypeRuleBlock = taskTypeGuidance === '' ? '' : `${taskTypeGuidance}\n`
|
|
367
401
|
|
|
368
402
|
if (!config.usingBundledTesterInstructions) {
|
|
369
403
|
return `Read ${taskFile} and ${instructionsFile}.
|
|
@@ -375,6 +409,7 @@ You are the TESTER role. You are reviewing the most recent developer work from a
|
|
|
375
409
|
|
|
376
410
|
Current phase: ${phase}
|
|
377
411
|
Current task: ${task}
|
|
412
|
+
Current task type: ${taskTypeLabel}
|
|
378
413
|
Reason for this tester pass: ${reason}
|
|
379
414
|
|
|
380
415
|
Developer notes:
|
|
@@ -391,7 +426,7 @@ Rules:
|
|
|
391
426
|
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
392
427
|
- If blocked or inconclusive, return VERDICT: BLOCKED.
|
|
393
428
|
- Do not hide real bugs with brittle tests.
|
|
394
|
-
- ${passOwnership.successRule.slice(2)}
|
|
429
|
+
${taskTypeRuleBlock}- ${passOwnership.successRule.slice(2)}
|
|
395
430
|
- ${passOwnership.isolationRule.slice(2)}
|
|
396
431
|
- ${passOwnership.extraRule.slice(2)}${visualCaptureNote}
|
|
397
432
|
|
|
@@ -417,6 +452,7 @@ You are the TESTER role. You are reviewing the most recent developer work from a
|
|
|
417
452
|
|
|
418
453
|
Current phase: ${phase}
|
|
419
454
|
Current task: ${task}
|
|
455
|
+
Current task type: ${taskTypeLabel}
|
|
420
456
|
Reason for this tester pass: ${reason}
|
|
421
457
|
|
|
422
458
|
Developer notes:
|
|
@@ -433,7 +469,7 @@ ${indentBlock(innerLoopValidationRules(verificationCommand), '\t')}
|
|
|
433
469
|
- Prefer one focused browser-driven review pass.
|
|
434
470
|
- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
|
|
435
471
|
- Do not hide real bugs with brittle tests.
|
|
436
|
-
- If blocked or inconclusive, return VERDICT: BLOCKED.
|
|
472
|
+
${taskTypeGuidance === '' ? '' : `${indentBlock(taskTypeGuidance, '\t')}\n`} - If blocked or inconclusive, return VERDICT: BLOCKED.
|
|
437
473
|
${indentBlock(passOwnership.successRule, '\t')}
|
|
438
474
|
${indentBlock(passOwnership.isolationRule, '\t')}
|
|
439
475
|
${indentBlock(passOwnership.extraRule, '\t')}${visualCaptureNote}
|
package/src/pi-supervisor.mjs
CHANGED
|
@@ -318,6 +318,18 @@ function isInfrastructureVerificationFailure(output) {
|
|
|
318
318
|
].some((pattern) => text.includes(pattern))
|
|
319
319
|
}
|
|
320
320
|
|
|
321
|
+
function formatOutputExcerpt(output, maxChars = 4000, maxLines = 40) {
|
|
322
|
+
const text = String(output ?? '').trim()
|
|
323
|
+
if (text === '') {
|
|
324
|
+
return ''
|
|
325
|
+
}
|
|
326
|
+
const excerpt = text.split('\n').slice(-maxLines).join('\n')
|
|
327
|
+
if (excerpt.length <= maxChars) {
|
|
328
|
+
return excerpt
|
|
329
|
+
}
|
|
330
|
+
return `${excerpt.slice(excerpt.length - maxChars + 16)}\n... [truncated]`
|
|
331
|
+
}
|
|
332
|
+
|
|
321
333
|
async function recordEvent(config, event) {
|
|
322
334
|
await appendTelemetry(config, {
|
|
323
335
|
timestamp: timestamp(),
|
|
@@ -345,6 +357,7 @@ async function runAgentInvocation({
|
|
|
345
357
|
activeKind: kind,
|
|
346
358
|
activeRole: role,
|
|
347
359
|
activeReason: reason,
|
|
360
|
+
activeStartedAt: timestamp(),
|
|
348
361
|
})
|
|
349
362
|
|
|
350
363
|
const beforeSnapshot = getRepoSnapshot(config.cwd)
|
|
@@ -533,6 +546,7 @@ async function runHarnessGitFinalize({
|
|
|
533
546
|
activeKind: 'git_finalize',
|
|
534
547
|
activeRole: '',
|
|
535
548
|
activeReason: '',
|
|
549
|
+
activeStartedAt: timestamp(),
|
|
536
550
|
})
|
|
537
551
|
|
|
538
552
|
const beforeSnapshot = getRepoSnapshot(config.cwd)
|
|
@@ -653,6 +667,7 @@ async function runVerificationStep({ config, iteration, phase, kind }) {
|
|
|
653
667
|
activeKind: kind,
|
|
654
668
|
activeRole: '',
|
|
655
669
|
activeReason: '',
|
|
670
|
+
activeStartedAt: timestamp(),
|
|
656
671
|
})
|
|
657
672
|
|
|
658
673
|
const beforeSnapshot = getRepoSnapshot(config.cwd)
|
|
@@ -696,6 +711,7 @@ async function runVerificationStep({ config, iteration, phase, kind }) {
|
|
|
696
711
|
commitPlanFound: '',
|
|
697
712
|
terminalReason: `verification_${verification.status}`,
|
|
698
713
|
notes: verificationNotes,
|
|
714
|
+
outputExcerpt: formatOutputExcerpt(verification.output),
|
|
699
715
|
})
|
|
700
716
|
|
|
701
717
|
return verification
|
|
@@ -820,6 +836,10 @@ async function runDeveloperVerificationAndFix({
|
|
|
820
836
|
let nextSessionId = sessionId
|
|
821
837
|
let nextSessionFile = sessionFile
|
|
822
838
|
let verificationStatus = verification.status
|
|
839
|
+
let verificationOutput = verification.output
|
|
840
|
+
let feedbackSource = (verification.status === 'failed' || verification.status === 'timed_out')
|
|
841
|
+
? 'developer_verification'
|
|
842
|
+
: ''
|
|
823
843
|
|
|
824
844
|
if (verification.status === 'failed' || verification.status === 'timed_out') {
|
|
825
845
|
if (isInfrastructureVerificationFailure(verification.output)) {
|
|
@@ -831,8 +851,8 @@ async function runDeveloperVerificationAndFix({
|
|
|
831
851
|
verificationStatus,
|
|
832
852
|
sessionId: nextSessionId,
|
|
833
853
|
sessionFile: nextSessionFile,
|
|
834
|
-
verificationOutput
|
|
835
|
-
feedbackSource
|
|
854
|
+
verificationOutput,
|
|
855
|
+
feedbackSource,
|
|
836
856
|
}
|
|
837
857
|
}
|
|
838
858
|
|
|
@@ -859,8 +879,11 @@ async function runDeveloperVerificationAndFix({
|
|
|
859
879
|
})
|
|
860
880
|
|
|
861
881
|
verificationStatus = reverify.status
|
|
882
|
+
verificationOutput = reverify.output
|
|
883
|
+
feedbackSource = reverify.status === 'passed' ? '' : 'developer_reverification'
|
|
862
884
|
} else {
|
|
863
885
|
verificationStatus = 'not_run'
|
|
886
|
+
feedbackSource = 'developer_verification'
|
|
864
887
|
}
|
|
865
888
|
}
|
|
866
889
|
|
|
@@ -869,10 +892,8 @@ async function runDeveloperVerificationAndFix({
|
|
|
869
892
|
verificationStatus,
|
|
870
893
|
sessionId: nextSessionId,
|
|
871
894
|
sessionFile: nextSessionFile,
|
|
872
|
-
verificationOutput
|
|
873
|
-
feedbackSource
|
|
874
|
-
? 'developer_verification'
|
|
875
|
-
: '',
|
|
895
|
+
verificationOutput,
|
|
896
|
+
feedbackSource,
|
|
876
897
|
}
|
|
877
898
|
}
|
|
878
899
|
|
|
@@ -1026,6 +1047,7 @@ async function runVisualReview({ config, iteration, phase, task, changedFiles })
|
|
|
1026
1047
|
activeKind: 'visual_capture',
|
|
1027
1048
|
activeRole: '',
|
|
1028
1049
|
activeReason: '',
|
|
1050
|
+
activeStartedAt: timestamp(),
|
|
1029
1051
|
})
|
|
1030
1052
|
|
|
1031
1053
|
const capture = await runVisualCapture(config, {
|
|
@@ -1082,6 +1104,7 @@ async function runVisualReview({ config, iteration, phase, task, changedFiles })
|
|
|
1082
1104
|
activeKind: 'visual_review',
|
|
1083
1105
|
activeRole: 'visualReview',
|
|
1084
1106
|
activeReason: '',
|
|
1107
|
+
activeStartedAt: timestamp(),
|
|
1085
1108
|
})
|
|
1086
1109
|
|
|
1087
1110
|
const visualReviewModel = resolveRoleModel(config, 'visualReview')
|
|
@@ -1180,6 +1203,10 @@ async function runIteration({ config, state, iteration }) {
|
|
|
1180
1203
|
phase: taskInfo.phase || 'complete',
|
|
1181
1204
|
task: '',
|
|
1182
1205
|
lastCompletedIteration: iteration,
|
|
1206
|
+
activeKind: '',
|
|
1207
|
+
activeRole: '',
|
|
1208
|
+
activeReason: '',
|
|
1209
|
+
activeStartedAt: '',
|
|
1183
1210
|
})
|
|
1184
1211
|
await appendLog(config.logFile, 'No unchecked tasks remain in TODOS.md')
|
|
1185
1212
|
return {
|
|
@@ -1248,6 +1275,7 @@ async function runIteration({ config, state, iteration }) {
|
|
|
1248
1275
|
activeKind: '',
|
|
1249
1276
|
activeRole: '',
|
|
1250
1277
|
activeReason: '',
|
|
1278
|
+
activeStartedAt: '',
|
|
1251
1279
|
})
|
|
1252
1280
|
const canResumePriorSession = (
|
|
1253
1281
|
state.lastTransport === config.transport
|
|
@@ -1631,6 +1659,7 @@ async function runIteration({ config, state, iteration }) {
|
|
|
1631
1659
|
activeKind: '',
|
|
1632
1660
|
activeRole: '',
|
|
1633
1661
|
activeReason: '',
|
|
1662
|
+
activeStartedAt: '',
|
|
1634
1663
|
})
|
|
1635
1664
|
|
|
1636
1665
|
await appendLog(
|
|
@@ -1799,6 +1828,7 @@ async function main() {
|
|
|
1799
1828
|
activeKind: '',
|
|
1800
1829
|
activeRole: '',
|
|
1801
1830
|
activeReason: '',
|
|
1831
|
+
activeStartedAt: '',
|
|
1802
1832
|
})
|
|
1803
1833
|
const result = await runIteration({ config, state, iteration })
|
|
1804
1834
|
await writeIterationSummary(config, result.iterationSummary ?? result.summary)
|
|
@@ -1828,6 +1858,7 @@ async function main() {
|
|
|
1828
1858
|
activeKind: '',
|
|
1829
1859
|
activeRole: '',
|
|
1830
1860
|
activeReason: '',
|
|
1861
|
+
activeStartedAt: '',
|
|
1831
1862
|
})
|
|
1832
1863
|
if (visualizer) {
|
|
1833
1864
|
await visualizer.close().catch(() => {})
|
|
@@ -192,6 +192,17 @@ function readRepoDiff(cwd) {
|
|
|
192
192
|
}
|
|
193
193
|
}
|
|
194
194
|
|
|
195
|
+
function compareSequencedEntries(left, right) {
|
|
196
|
+
const leftSeq = Number(left?.seq ?? Number.NaN)
|
|
197
|
+
const rightSeq = Number(right?.seq ?? Number.NaN)
|
|
198
|
+
const leftHasSeq = Number.isFinite(leftSeq)
|
|
199
|
+
const rightHasSeq = Number.isFinite(rightSeq)
|
|
200
|
+
if (leftHasSeq && rightHasSeq && leftSeq !== rightSeq) {
|
|
201
|
+
return leftSeq - rightSeq
|
|
202
|
+
}
|
|
203
|
+
return String(left?.timestamp ?? '').localeCompare(String(right?.timestamp ?? ''))
|
|
204
|
+
}
|
|
205
|
+
|
|
195
206
|
function getRunDir(config, runId) {
|
|
196
207
|
return path.join(config.piRuntimeDir, 'runs', runId)
|
|
197
208
|
}
|
|
@@ -274,19 +285,26 @@ export async function buildSnapshot(config, queryRunId = '') {
|
|
|
274
285
|
readJsonlTail(selectedConfig.liveFeedFile, { maxItems: 300, maxBytes: 768 * 1024 }),
|
|
275
286
|
])
|
|
276
287
|
|
|
277
|
-
const
|
|
288
|
+
const flowOptions = {
|
|
289
|
+
includeVisualReview: config.visualReviewEnabled === true,
|
|
290
|
+
}
|
|
291
|
+
const telemetryWithVizIds = telemetry.map((event, index) => ({
|
|
278
292
|
...event,
|
|
279
293
|
_vizId: `telemetry-${index}`,
|
|
280
294
|
}))
|
|
295
|
+
const sortedLiveFeed = [...liveFeed].sort(compareSequencedEntries)
|
|
296
|
+
const recentTelemetry = telemetryWithVizIds.slice(-160)
|
|
281
297
|
const flow = deriveFlowSnapshot({
|
|
282
298
|
activeRun: selectedRunId !== '' && String(activeRun?.runId ?? '') === selectedRunId ? activeRun : state?.inProgress ?? null,
|
|
283
299
|
summary,
|
|
284
|
-
telemetry,
|
|
300
|
+
telemetry: telemetryWithVizIds,
|
|
301
|
+
options: flowOptions,
|
|
285
302
|
})
|
|
286
303
|
const graph = deriveStageGraph({
|
|
287
304
|
activeRun: selectedRunId !== '' && String(activeRun?.runId ?? '') === selectedRunId ? activeRun : state?.inProgress ?? null,
|
|
288
305
|
summary,
|
|
289
|
-
telemetry,
|
|
306
|
+
telemetry: telemetryWithVizIds,
|
|
307
|
+
options: flowOptions,
|
|
290
308
|
})
|
|
291
309
|
|
|
292
310
|
const selectedRunIsActive = selectedRunId !== '' && String(activeRun?.runId ?? '') === selectedRunId
|
|
@@ -313,13 +331,13 @@ export async function buildSnapshot(config, queryRunId = '') {
|
|
|
313
331
|
summary,
|
|
314
332
|
flow: {
|
|
315
333
|
...flow,
|
|
316
|
-
activeLabel: formatActiveLabel(activeRun, flow),
|
|
334
|
+
activeLabel: formatActiveLabel(activeRun, flow, flowOptions),
|
|
317
335
|
},
|
|
318
336
|
graph,
|
|
319
337
|
todos,
|
|
320
338
|
currentEdits,
|
|
321
339
|
lastOutput: currentOutput,
|
|
322
|
-
liveFeed,
|
|
340
|
+
liveFeed: sortedLiveFeed,
|
|
323
341
|
recentTelemetry,
|
|
324
342
|
}
|
|
325
343
|
}
|
|
@@ -4,8 +4,8 @@ const FLOW_STEPS = [
|
|
|
4
4
|
{ key: 'tester', label: 'Tester' },
|
|
5
5
|
{ key: 'fix', label: 'Fix' },
|
|
6
6
|
{ key: 'git_finalize', label: 'Git Finalize' },
|
|
7
|
-
{ key: 'visual_capture', label: 'Visual Capture' },
|
|
8
|
-
{ key: 'visual_review', label: 'Visual Review' },
|
|
7
|
+
{ key: 'visual_capture', label: 'Visual Capture', feature: 'visualReview' },
|
|
8
|
+
{ key: 'visual_review', label: 'Visual Review', feature: 'visualReview' },
|
|
9
9
|
{ key: 'summary', label: 'Summary' },
|
|
10
10
|
]
|
|
11
11
|
|
|
@@ -27,8 +27,17 @@ const SUCCESS_STATUSES = new Set(['success', 'passed', 'complete'])
|
|
|
27
27
|
const ERROR_STATUSES = new Set(['failed', 'timed_out', 'stalled', 'blocked', 'canceled'])
|
|
28
28
|
const SKIP_STATUSES = new Set(['skipped', 'not_run', 'not_needed'])
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
function shouldIncludeStep(step, options = {}) {
|
|
31
|
+
if (step.feature === 'visualReview' && options.includeVisualReview !== true) {
|
|
32
|
+
return false
|
|
33
|
+
}
|
|
34
|
+
return true
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function getFlowSteps(options = {}) {
|
|
38
|
+
return FLOW_STEPS
|
|
39
|
+
.filter((step) => shouldIncludeStep(step, options))
|
|
40
|
+
.map((step) => ({ ...step }))
|
|
32
41
|
}
|
|
33
42
|
|
|
34
43
|
export function getLabelForKind(kind) {
|
|
@@ -124,13 +133,13 @@ export function deriveCurrentIteration({ activeRun, summary, telemetry }) {
|
|
|
124
133
|
return 0
|
|
125
134
|
}
|
|
126
135
|
|
|
127
|
-
export function deriveFlowSnapshot({ activeRun, summary, telemetry }) {
|
|
136
|
+
export function deriveFlowSnapshot({ activeRun, summary, telemetry, options = {} }) {
|
|
128
137
|
const currentIteration = deriveCurrentIteration({ activeRun, summary, telemetry })
|
|
129
138
|
const iterationTelemetry = Array.isArray(telemetry)
|
|
130
139
|
? telemetry.filter((event) => Number(event?.iteration) === currentIteration)
|
|
131
140
|
: []
|
|
132
141
|
const activeStepKey = getStepKeyForActiveRun(activeRun)
|
|
133
|
-
const steps =
|
|
142
|
+
const steps = getFlowSteps(options).map((step) => {
|
|
134
143
|
const matchingEvents = iterationTelemetry.filter((event) => getStepKeyForKind(event?.kind) === step.key)
|
|
135
144
|
const latestEvent = matchingEvents.at(-1) ?? null
|
|
136
145
|
const status = activeStepKey === step.key
|
|
@@ -139,10 +148,20 @@ export function deriveFlowSnapshot({ activeRun, summary, telemetry }) {
|
|
|
139
148
|
? normalizeEventStatus(latestEvent.status)
|
|
140
149
|
: 'pending'
|
|
141
150
|
|
|
151
|
+
const activeStartedAt = activeStepKey === step.key
|
|
152
|
+
? String(activeRun?.activeStartedAt ?? '')
|
|
153
|
+
: ''
|
|
154
|
+
const durationSeconds = latestEvent && Number.isFinite(Number(latestEvent.durationSeconds))
|
|
155
|
+
? Number(latestEvent.durationSeconds)
|
|
156
|
+
: null
|
|
157
|
+
|
|
142
158
|
return {
|
|
143
159
|
...step,
|
|
144
160
|
status,
|
|
145
161
|
latestEvent,
|
|
162
|
+
latestEventId: String(latestEvent?._vizId ?? ''),
|
|
163
|
+
activeStartedAt,
|
|
164
|
+
durationSeconds,
|
|
146
165
|
}
|
|
147
166
|
})
|
|
148
167
|
|
|
@@ -153,8 +172,8 @@ export function deriveFlowSnapshot({ activeRun, summary, telemetry }) {
|
|
|
153
172
|
}
|
|
154
173
|
}
|
|
155
174
|
|
|
156
|
-
export function deriveStageGraph({ activeRun, summary, telemetry }) {
|
|
157
|
-
const flow = deriveFlowSnapshot({ activeRun, summary, telemetry })
|
|
175
|
+
export function deriveStageGraph({ activeRun, summary, telemetry, options = {} }) {
|
|
176
|
+
const flow = deriveFlowSnapshot({ activeRun, summary, telemetry, options })
|
|
158
177
|
const currentIteration = flow.iteration
|
|
159
178
|
const iterationTelemetry = Array.isArray(telemetry)
|
|
160
179
|
? telemetry.filter((event) => Number(event?.iteration) === currentIteration)
|
|
@@ -196,10 +215,10 @@ export function deriveStageGraph({ activeRun, summary, telemetry }) {
|
|
|
196
215
|
}
|
|
197
216
|
}
|
|
198
217
|
|
|
199
|
-
export function formatActiveLabel(activeRun, flow) {
|
|
218
|
+
export function formatActiveLabel(activeRun, flow, options = {}) {
|
|
200
219
|
const activeStepKey = flow?.activeStepKey || getStepKeyForActiveRun(activeRun)
|
|
201
220
|
if (activeStepKey !== '') {
|
|
202
|
-
const step =
|
|
221
|
+
const step = getFlowSteps(options).find((entry) => entry.key === activeStepKey)
|
|
203
222
|
if (step) {
|
|
204
223
|
return step.label
|
|
205
224
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
:root{--bg: #0b1020;--panel: #121a30;--panel2: #17213d;--text: #e6edf7;--muted: #95a3bf;--line: #263252;--active: #6ee7ff;--done: #53d18d;--error: #ff6b81;--skip: #f0b35a;--pending: #4b5675}*{box-sizing:border-box}*{scrollbar-width:none;-ms-overflow-style:none}*::-webkit-scrollbar{width:0;height:0}html,body,#root{min-height:100%}body{margin:0;font:14px/1.4 ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,Segoe UI,sans-serif;background:linear-gradient(180deg,#08101d,#0b1020 180px);color:var(--text)}button,input,select,textarea{font:inherit}button{color:inherit}.wrap{max-width:1400px;margin:0 auto;padding:20px}.header{display:flex;justify-content:space-between;gap:16px;align-items:flex-start;margin-bottom:20px}.title{font-size:28px;font-weight:700}.subtitle{color:var(--muted);margin-top:4px}.toolbar{display:flex;gap:12px;align-items:center;flex-wrap:wrap}.badge,select{display:inline-flex;align-items:center;gap:8px;padding:8px 12px;border-radius:999px;border:1px solid var(--line);background:#ffffff08;color:var(--text)}select{min-width:260px}.dot{width:10px;height:10px;border-radius:50%;background:var(--pending)}.dot.active{background:var(--active);box-shadow:0 0 18px #6ee7ff99}.grid{display:grid;gap:16px}.main-grid{grid-template-columns:minmax(320px,420px) 1fr;align-items:start}.side-grid{gap:16px}.detail-split{display:grid;grid-template-columns:1fr 1fr;gap:16px;margin-top:16px}.card{background:linear-gradient(180deg,#ffffff05,#ffffff03);border:1px solid var(--line);border-radius:16px;padding:16px;box-shadow:0 12px 40px #0000002e}.card-tight{padding:12px}.no-margin{margin:0}.label{color:var(--muted);font-size:12px;text-transform:uppercase;letter-spacing:.08em}.value{margin-top:8px;font-size:22px;font-weight:700}.value.small{font-size:16px}.todo-list{max-height:calc(100vh - 140px);overflow:auto;padding-right:4px;display:grid;gap:12px;margin-top:12px}.todo-group{display:grid;gap:6px}.todo-group-heading{color:var(--muted);font-size:12px;font-weight:700;text-transform:uppercase;letter-spacing:.08em;padding:0 2px}.todo-group-items{display:grid;gap:6px}.todo-item{width:100%;border:1px solid var(--line);border-radius:12px;background:var(--panel);display:flex;gap:8px;align-items:flex-start;padding:9px 11px;text-align:left;cursor:pointer}.todo-item.active{border-color:var(--active);box-shadow:0 0 0 1px #6ee7ff40 inset}.todo-line{color:var(--muted);font-size:11px;min-width:36px;line-height:1.2}.todo-content{flex:1;min-width:0}.todo-task{font-weight:600;line-height:1.25}.todo-checked{color:var(--done)}.flow{display:grid;grid-template-columns:repeat(8,minmax(0,1fr));gap:10px;margin-top:14px}.step,.graph-node{border:1px solid var(--line);border-radius:14px;padding:12px;background:var(--panel);min-height:96px;position:relative;overflow:hidden}.step{width:100%;text-align:left}.step.clickable{cursor:pointer}.step.selected{box-shadow:0 0 0 1px #ffffff2e inset}.step:before,.graph-node:before{content:"";position:absolute;inset:0 auto 0 0;width:4px;background:var(--pending)}.step.active,.graph-node.active{border-color:var(--active);box-shadow:0 0 0 1px #6ee7ff59 inset,0 0 28px #6ee7ff1f}.step.active:before,.graph-node.active:before{background:var(--active)}.step.done:before,.graph-node.done:before{background:var(--done)}.step.error:before,.graph-node.error:before{background:var(--error)}.step.skipped:before,.graph-node.skipped:before{background:var(--skip)}.step-name{font-weight:700;margin-bottom:6px}.step-status{font-size:12px;text-transform:uppercase;letter-spacing:.08em;color:var(--muted)}.step-meta{margin-top:8px;color:var(--muted);font-size:12px;white-space:pre-wrap}.state-bar{display:flex;gap:10px;flex-wrap:wrap;margin-top:12px}.step-details-card{margin-top:12px}.step-details-summary{margin-top:8px;font-weight:700}.step-details-meta,.step-details-section{margin-top:10px;color:var(--muted);font-size:12px;text-transform:uppercase;letter-spacing:.08em}.state-chip{border:1px solid var(--line);border-radius:999px;padding:6px 10px;color:var(--muted);background:#ffffff08}.feed-toolbar{display:flex;gap:12px;align-items:center;flex-wrap:wrap;margin-top:12px;margin-bottom:10px}.feed-toggle{display:flex;gap:6px;align-items:center;color:var(--muted);font-size:12px}.feed-jump-row{display:flex;justify-content:flex-end;margin-bottom:8px}.feed-jump-button{border:1px solid var(--line);background:#6ee7ff14;color:var(--active);border-radius:999px;padding:6px 10px;cursor:pointer}.feed-jump-button:hover{background:#6ee7ff24}.feed{background:#0a1325;border:1px solid var(--line);border-radius:12px;padding:12px;max-height:320px;overflow:auto;font-family:ui-monospace,SFMono-Regular,Menlo,monospace}.feed-item{padding:8px 0;border-bottom:1px solid rgba(255,255,255,.06)}.feed-item:last-child{border-bottom:0}.feed-head{display:flex;gap:8px;align-items:center;flex-wrap:wrap}.feed-type{display:inline-flex;align-items:center;border:1px solid var(--line);border-radius:999px;padding:2px 8px;font-size:11px;text-transform:uppercase;letter-spacing:.08em}.feed-type.agent_start,.feed-type.agent_end{color:var(--active)}.feed-type.thinking_delta{color:#b392f0}.feed-type.text_delta{color:var(--done)}.feed-type.tool_start,.feed-type.tool_update,.feed-type.tool_end{color:var(--skip)}.feed-meta{color:var(--muted);font-size:12px}.feed-text,.pinned-tool-text{white-space:pre-wrap;word-break:break-word;margin-top:6px}.feed-count{color:var(--muted);font-size:11px}.pinned-tool{background:#0a1325;border:1px solid var(--line);border-radius:12px;padding:12px;margin-top:12px}.pinned-tool-name{font-weight:700}.pinned-tool-meta{color:var(--muted);font-size:12px;margin-top:4px}.kv{display:grid;grid-template-columns:140px 1fr;gap:6px 10px;margin-top:12px}.kv div:nth-child(odd){color:var(--muted)}pre{margin:0;white-space:pre-wrap;word-break:break-word;background:#0a1325;border:1px solid var(--line);border-radius:12px;padding:12px;max-height:320px;overflow:auto}.graph{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr));gap:12px;margin-top:12px}.graph-node{width:100%;min-height:120px;text-align:left;cursor:pointer}.table-wrap{margin-top:12px;overflow:auto;max-height:360px}table{width:100%;border-collapse:collapse}th,td{padding:10px 8px;border-bottom:1px solid var(--line);vertical-align:top;text-align:left}th{color:var(--muted);font-size:12px;text-transform:uppercase;letter-spacing:.08em}td{font-size:13px}tr[data-clickable="1"]{cursor:pointer}.status-pill{display:inline-block;border-radius:999px;padding:3px 8px;font-size:12px;font-weight:700;border:1px solid var(--line);background:var(--panel2)}.status-pill.done{color:var(--done)}.status-pill.error{color:var(--error)}.status-pill.skipped{color:var(--skip)}.status-pill.active{color:var(--active)}.edit-list{max-height:360px;overflow:auto;margin-top:12px}.edit-item{border:1px solid var(--line);border-radius:12px;margin-bottom:10px;overflow:hidden}.edit-head{display:block;list-style:none;padding:10px 12px;background:#ffffff08;font-weight:600;cursor:pointer}.edit-head::-webkit-details-marker{display:none}.muted{color:var(--muted)}.bottom{margin-top:16px}.bottom summary{cursor:pointer;color:var(--muted);margin-bottom:10px}.diagnostics-grid{gap:16px}@media(max-width:1100px){.main-grid,.detail-split,.flow{grid-template-columns:1fr}.todo-list{max-height:none}}
|