@sebastianandreasson/pi-autonomous-agents 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -326,6 +326,13 @@ For local visualizer iteration against fake live SDK agent:
326
326
  npm run debug:live-ui
327
327
  ```
328
328
 
329
+ Scenario variants:
330
+
331
+ ```bash
332
+ node src/cli.mjs debug-live --reset --scenario noisy --task-count 24
333
+ node src/cli.mjs debug-live --reset --scenario retry
334
+ ```
335
+
329
336
  For React/Vite visualizer UI dev loop:
330
337
 
331
338
  ```bash
@@ -338,6 +345,8 @@ For production visualizer UI build:
338
345
  npm run build:visualizer:ui
339
346
  ```
340
347
 
348
+ Publish now auto-runs check, tests, and UI build via `prepublishOnly`.
349
+
341
350
  This seeds `.pi-debug/live-ui/`, runs harness there with streaming fake SDK fixture, hosts visualizer, and gives stable local repro loop for UI work. React app lives in `visualizer-ui/`. Visualizer server now serves built assets from `visualizer-ui/dist/` and falls back to build-instructions page if build artifacts are missing.
342
351
 
343
352
  See `docs/VISUALIZER_UI_PLAN.md` for migration plan.
@@ -62,7 +62,7 @@ The package reads `PI_CONFIG_FILE` if provided. Otherwise it falls back to the b
62
62
 
63
63
  Visualizer reads active-run lock, TODO file, per-run state, per-run iteration summary, per-run last output snapshot, live feed JSONL, and telemetry to show current stage plus historical runs.
64
64
 
65
- For local UI iteration in this package repo, use `pi-harness debug-live` to run against seeded fake live SDK sandbox.
65
+ For local UI iteration in this package repo, use `pi-harness debug-live` to run against seeded fake live SDK sandbox. Useful variants: `--scenario noisy`, `--scenario retry`, `--task-count 24`.
66
66
 
67
67
  ## Config Contract
68
68
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@sebastianandreasson/pi-autonomous-agents",
3
3
  "private": false,
4
- "version": "0.10.0",
4
+ "version": "0.11.0",
5
5
  "type": "module",
6
6
  "description": "Portable unattended PI harness for developer/tester/visual-review loops.",
7
7
  "license": "MIT",
@@ -23,7 +23,8 @@
23
23
  "test": "node --test test/pi-heartbeat.test.mjs test/pi-lifecycle.test.mjs test/pi-role-models.test.mjs test/pi-flow.test.mjs test/pi-history.test.mjs test/pi-prompts.test.mjs test/pi-preflight.test.mjs test/pi-repo.test.mjs test/pi-sdk-supervisor.test.mjs test/pi-sdk-turn.test.mjs test/pi-telemetry.test.mjs test/pi-visualizer-shared.test.mjs",
24
24
  "debug:live-ui": "node src/cli.mjs debug-live --reset",
25
25
  "dev:visualizer:ui": "npm --prefix visualizer-ui run dev",
26
- "build:visualizer:ui": "npm --prefix visualizer-ui run build"
26
+ "build:visualizer:ui": "npm --prefix visualizer-ui run build",
27
+ "prepublishOnly": "npm run check && npm test && npm run build:visualizer:ui"
27
28
  },
28
29
  "files": [
29
30
  "src",
@@ -12,11 +12,51 @@ const cliFile = path.join(scriptDir, 'cli.mjs')
12
12
  const fakePiFile = path.join(packageRoot, 'test', 'fixtures', 'fake-pi.mjs')
13
13
  const fakeLiveSdkFile = path.join(packageRoot, 'test', 'fixtures', 'fake-live-pi-sdk.mjs')
14
14
  const sandboxDir = path.join(packageRoot, '.pi-debug', 'live-ui')
15
+ const DEFAULT_TASK_COUNT = 12
15
16
 
16
17
  function shellQuote(value) {
17
18
  return JSON.stringify(String(value))
18
19
  }
19
20
 
21
+ function readFlagValue(flag) {
22
+ const index = process.argv.indexOf(flag)
23
+ if (index === -1) {
24
+ return ''
25
+ }
26
+ return String(process.argv[index + 1] ?? '').trim()
27
+ }
28
+
29
+ function readScenario() {
30
+ const value = readFlagValue('--scenario') || process.env.PI_FAKE_LIVE_SCENARIO || 'default'
31
+ return String(value).trim() || 'default'
32
+ }
33
+
34
+ function readTaskCount() {
35
+ const raw = Number.parseInt(readFlagValue('--task-count') || process.env.PI_DEBUG_TASK_COUNT || `${DEFAULT_TASK_COUNT}`, 10)
36
+ return Number.isFinite(raw) && raw > 0 ? raw : DEFAULT_TASK_COUNT
37
+ }
38
+
39
+ function buildTodoLines(taskCount) {
40
+ const lines = []
41
+ for (let index = 1; index <= taskCount; index += 1) {
42
+ const phase = index <= Math.ceil(taskCount / 3)
43
+ ? 'Phase 1'
44
+ : index <= Math.ceil((taskCount * 2) / 3)
45
+ ? 'Phase 2'
46
+ : 'Phase 3'
47
+ const label = `Fake live task ${index}`
48
+ if (lines.length === 0 || lines[lines.length - 1] !== `## ${phase}`) {
49
+ if (lines.length > 0) {
50
+ lines.push('')
51
+ }
52
+ lines.push(`## ${phase}`)
53
+ lines.push('')
54
+ }
55
+ lines.push(`- [ ] ${label}`)
56
+ }
57
+ return `${lines.join('\n')}\n`
58
+ }
59
+
20
60
  async function ensureRepo(cwd) {
21
61
  try {
22
62
  execFileSync('git', ['rev-parse', '--is-inside-work-tree'], { cwd, stdio: 'ignore' })
@@ -27,21 +67,11 @@ async function ensureRepo(cwd) {
27
67
  }
28
68
  }
29
69
 
30
- async function seedFiles(cwd) {
70
+ async function seedFiles(cwd, { taskCount, scenario }) {
31
71
  await fs.mkdir(path.join(cwd, 'pi'), { recursive: true })
32
- await fs.writeFile(path.join(cwd, 'TODOS.md'), [
33
- '## Phase 1',
34
- '',
35
- '- [ ] Fake live task one',
36
- '- [ ] Fake live task two',
37
- '- [ ] Fake live task three',
38
- '',
39
- '## Phase 2',
40
- '',
41
- '- [ ] Fake live task four',
42
- ].join('\n') + '\n', 'utf8')
43
- await fs.writeFile(path.join(cwd, 'DEVELOPER.md'), 'Developer instructions for local visualizer debugging.\n', 'utf8')
44
- await fs.writeFile(path.join(cwd, 'TESTER.md'), 'Tester instructions for local visualizer debugging.\n', 'utf8')
72
+ await fs.writeFile(path.join(cwd, 'TODOS.md'), buildTodoLines(taskCount), 'utf8')
73
+ await fs.writeFile(path.join(cwd, 'DEVELOPER.md'), `Developer instructions for local visualizer debugging.\nScenario: ${scenario}\n`, 'utf8')
74
+ await fs.writeFile(path.join(cwd, 'TESTER.md'), `Tester instructions for local visualizer debugging.\nScenario: ${scenario}\n`, 'utf8')
45
75
  await fs.writeFile(path.join(cwd, 'pi.config.json'), `${JSON.stringify({
46
76
  transport: 'sdk',
47
77
  taskFile: 'TODOS.md',
@@ -63,7 +93,7 @@ async function seedFiles(cwd) {
63
93
  toolContinueAfterSeconds: 3600,
64
94
  toolNoEventTimeoutSeconds: 3600,
65
95
  sleepBetweenSeconds: 1,
66
- maxIterations: 20,
96
+ maxIterations: Math.max(taskCount * 3, 20),
67
97
  }, null, 2)}\n`, 'utf8')
68
98
  }
69
99
 
@@ -78,17 +108,22 @@ async function ensureInitialCommit(cwd) {
78
108
 
79
109
  async function main() {
80
110
  const reset = process.argv.includes('--reset')
111
+ const scenario = readScenario()
112
+ const taskCount = readTaskCount()
113
+
81
114
  if (reset) {
82
115
  await fs.rm(sandboxDir, { recursive: true, force: true })
83
116
  }
84
117
 
85
118
  await fs.mkdir(sandboxDir, { recursive: true })
86
119
  await ensureRepo(sandboxDir)
87
- await seedFiles(sandboxDir)
120
+ await seedFiles(sandboxDir, { taskCount, scenario })
88
121
  await ensureInitialCommit(sandboxDir)
89
122
 
90
123
  process.stdout.write(`PI debug sandbox: ${sandboxDir}\n`)
91
124
  process.stdout.write(`Using fake live SDK fixture: ${fakeLiveSdkFile}\n`)
125
+ process.stdout.write(`Scenario: ${scenario}\n`)
126
+ process.stdout.write(`Task count: ${taskCount}\n`)
92
127
 
93
128
  const child = spawn(process.execPath, [cliFile, 'run'], {
94
129
  cwd: sandboxDir,
@@ -96,6 +131,7 @@ async function main() {
96
131
  ...process.env,
97
132
  PI_CONFIG_FILE: 'pi.config.json',
98
133
  PI_SDK_MODULE: fakeLiveSdkFile,
134
+ PI_FAKE_LIVE_SCENARIO: scenario,
99
135
  PI_VISUALIZER_HOST: process.env.PI_VISUALIZER_HOST || '127.0.0.1',
100
136
  PI_VISUALIZER_PORT: process.env.PI_VISUALIZER_PORT || '4317',
101
137
  },
@@ -119,6 +119,36 @@ function repoInstructionsAuthorityLine(config, instructionsFile, usesBundledInst
119
119
  return `Repo-local instructions in ${displayPath(config, instructionsFile)} are the primary role contract. Follow them over package defaults when they differ.\n`
120
120
  }
121
121
 
122
+ export function classifyTaskType(task) {
123
+ const text = String(task ?? '').trim().toLowerCase()
124
+ if (text === '') {
125
+ return 'general'
126
+ }
127
+
128
+ if (
129
+ /\b(write|add|create|implement|expand|improve|fix|update)\b.*\b(test|tests|coverage|regression test|spec|specs)\b/.test(text)
130
+ || /\b(test|tests|coverage|regression test|spec|specs)\b.*\b(write|add|create|implement|expand|improve|fix|update)\b/.test(text)
131
+ ) {
132
+ return 'test'
133
+ }
134
+
135
+ return 'general'
136
+ }
137
+
138
+ function formatTaskTypeGuidance(taskType) {
139
+ if (taskType !== 'test') {
140
+ return ''
141
+ }
142
+
143
+ return [
144
+ 'Test-task guidance:',
145
+ '- This TODO is primarily test-focused. Do not fail solely because changes are mostly or entirely tests.',
146
+ '- PASS if the new or updated test adds meaningful behavioral or regression coverage and verification passes.',
147
+ '- FAIL if the test is brittle, redundant, weakly asserted, or not tied to real behavior.',
148
+ '- Prefer checking whether the test would have failed before the change, or whether developer notes justify why missing coverage mattered.',
149
+ ].join('\n')
150
+ }
151
+
122
152
  function testerPassOwnershipRules(config) {
123
153
  if (config.commitMode === 'plan') {
124
154
  return {
@@ -353,6 +383,9 @@ export function buildTesterPrompt(config, {
353
383
  developerNotes || '(none provided)',
354
384
  configMaxLines(config, 'maxPromptNotesLines', 16),
355
385
  )
386
+ const taskType = classifyTaskType(task)
387
+ const taskTypeLabel = taskType === 'test' ? 'test-focused' : 'general'
388
+ const taskTypeGuidance = formatTaskTypeGuidance(taskType)
356
389
  const verificationCommand = config.testCommand.trim() === '' ? '(not configured)' : config.testCommand
357
390
  const visualCaptureNote = config.visualReviewEnabled
358
391
  ? `\n- Keep the screenshot capture flow working so the harness still produces current visual artifacts for review.`
@@ -364,6 +397,7 @@ export function buildTesterPrompt(config, {
364
397
  )
365
398
  const passOwnership = testerPassOwnershipRules(config)
366
399
  const largeFileRiskHint = formatLargeFileRiskHint(largeFileWarnings)
400
+ const taskTypeRuleBlock = taskTypeGuidance === '' ? '' : `${taskTypeGuidance}\n`
367
401
 
368
402
  if (!config.usingBundledTesterInstructions) {
369
403
  return `Read ${taskFile} and ${instructionsFile}.
@@ -375,6 +409,7 @@ You are the TESTER role. You are reviewing the most recent developer work from a
375
409
 
376
410
  Current phase: ${phase}
377
411
  Current task: ${task}
412
+ Current task type: ${taskTypeLabel}
378
413
  Reason for this tester pass: ${reason}
379
414
 
380
415
  Developer notes:
@@ -391,7 +426,7 @@ Rules:
391
426
  - If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
392
427
  - If blocked or inconclusive, return VERDICT: BLOCKED.
393
428
  - Do not hide real bugs with brittle tests.
394
- - ${passOwnership.successRule.slice(2)}
429
+ ${taskTypeRuleBlock}- ${passOwnership.successRule.slice(2)}
395
430
  - ${passOwnership.isolationRule.slice(2)}
396
431
  - ${passOwnership.extraRule.slice(2)}${visualCaptureNote}
397
432
 
@@ -417,6 +452,7 @@ You are the TESTER role. You are reviewing the most recent developer work from a
417
452
 
418
453
  Current phase: ${phase}
419
454
  Current task: ${task}
455
+ Current task type: ${taskTypeLabel}
420
456
  Reason for this tester pass: ${reason}
421
457
 
422
458
  Developer notes:
@@ -433,7 +469,7 @@ ${indentBlock(innerLoopValidationRules(verificationCommand), '\t')}
433
469
  - Prefer one focused browser-driven review pass.
434
470
  - If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
435
471
  - Do not hide real bugs with brittle tests.
436
- - If blocked or inconclusive, return VERDICT: BLOCKED.
472
+ ${taskTypeGuidance === '' ? '' : `${indentBlock(taskTypeGuidance, '\t')}\n`} - If blocked or inconclusive, return VERDICT: BLOCKED.
437
473
  ${indentBlock(passOwnership.successRule, '\t')}
438
474
  ${indentBlock(passOwnership.isolationRule, '\t')}
439
475
  ${indentBlock(passOwnership.extraRule, '\t')}${visualCaptureNote}
@@ -318,6 +318,18 @@ function isInfrastructureVerificationFailure(output) {
318
318
  ].some((pattern) => text.includes(pattern))
319
319
  }
320
320
 
321
+ function formatOutputExcerpt(output, maxChars = 4000, maxLines = 40) {
322
+ const text = String(output ?? '').trim()
323
+ if (text === '') {
324
+ return ''
325
+ }
326
+ const excerpt = text.split('\n').slice(-maxLines).join('\n')
327
+ if (excerpt.length <= maxChars) {
328
+ return excerpt
329
+ }
330
+ return `${excerpt.slice(excerpt.length - maxChars + 16)}\n... [truncated]`
331
+ }
332
+
321
333
  async function recordEvent(config, event) {
322
334
  await appendTelemetry(config, {
323
335
  timestamp: timestamp(),
@@ -345,6 +357,7 @@ async function runAgentInvocation({
345
357
  activeKind: kind,
346
358
  activeRole: role,
347
359
  activeReason: reason,
360
+ activeStartedAt: timestamp(),
348
361
  })
349
362
 
350
363
  const beforeSnapshot = getRepoSnapshot(config.cwd)
@@ -533,6 +546,7 @@ async function runHarnessGitFinalize({
533
546
  activeKind: 'git_finalize',
534
547
  activeRole: '',
535
548
  activeReason: '',
549
+ activeStartedAt: timestamp(),
536
550
  })
537
551
 
538
552
  const beforeSnapshot = getRepoSnapshot(config.cwd)
@@ -653,6 +667,7 @@ async function runVerificationStep({ config, iteration, phase, kind }) {
653
667
  activeKind: kind,
654
668
  activeRole: '',
655
669
  activeReason: '',
670
+ activeStartedAt: timestamp(),
656
671
  })
657
672
 
658
673
  const beforeSnapshot = getRepoSnapshot(config.cwd)
@@ -696,6 +711,7 @@ async function runVerificationStep({ config, iteration, phase, kind }) {
696
711
  commitPlanFound: '',
697
712
  terminalReason: `verification_${verification.status}`,
698
713
  notes: verificationNotes,
714
+ outputExcerpt: formatOutputExcerpt(verification.output),
699
715
  })
700
716
 
701
717
  return verification
@@ -820,6 +836,10 @@ async function runDeveloperVerificationAndFix({
820
836
  let nextSessionId = sessionId
821
837
  let nextSessionFile = sessionFile
822
838
  let verificationStatus = verification.status
839
+ let verificationOutput = verification.output
840
+ let feedbackSource = (verification.status === 'failed' || verification.status === 'timed_out')
841
+ ? 'developer_verification'
842
+ : ''
823
843
 
824
844
  if (verification.status === 'failed' || verification.status === 'timed_out') {
825
845
  if (isInfrastructureVerificationFailure(verification.output)) {
@@ -831,8 +851,8 @@ async function runDeveloperVerificationAndFix({
831
851
  verificationStatus,
832
852
  sessionId: nextSessionId,
833
853
  sessionFile: nextSessionFile,
834
- verificationOutput: verification.output,
835
- feedbackSource: 'developer_verification',
854
+ verificationOutput,
855
+ feedbackSource,
836
856
  }
837
857
  }
838
858
 
@@ -859,8 +879,11 @@ async function runDeveloperVerificationAndFix({
859
879
  })
860
880
 
861
881
  verificationStatus = reverify.status
882
+ verificationOutput = reverify.output
883
+ feedbackSource = reverify.status === 'passed' ? '' : 'developer_reverification'
862
884
  } else {
863
885
  verificationStatus = 'not_run'
886
+ feedbackSource = 'developer_verification'
864
887
  }
865
888
  }
866
889
 
@@ -869,10 +892,8 @@ async function runDeveloperVerificationAndFix({
869
892
  verificationStatus,
870
893
  sessionId: nextSessionId,
871
894
  sessionFile: nextSessionFile,
872
- verificationOutput: verification.output,
873
- feedbackSource: verification.status === 'failed' || verification.status === 'timed_out'
874
- ? 'developer_verification'
875
- : '',
895
+ verificationOutput,
896
+ feedbackSource,
876
897
  }
877
898
  }
878
899
 
@@ -1026,6 +1047,7 @@ async function runVisualReview({ config, iteration, phase, task, changedFiles })
1026
1047
  activeKind: 'visual_capture',
1027
1048
  activeRole: '',
1028
1049
  activeReason: '',
1050
+ activeStartedAt: timestamp(),
1029
1051
  })
1030
1052
 
1031
1053
  const capture = await runVisualCapture(config, {
@@ -1082,6 +1104,7 @@ async function runVisualReview({ config, iteration, phase, task, changedFiles })
1082
1104
  activeKind: 'visual_review',
1083
1105
  activeRole: 'visualReview',
1084
1106
  activeReason: '',
1107
+ activeStartedAt: timestamp(),
1085
1108
  })
1086
1109
 
1087
1110
  const visualReviewModel = resolveRoleModel(config, 'visualReview')
@@ -1180,6 +1203,10 @@ async function runIteration({ config, state, iteration }) {
1180
1203
  phase: taskInfo.phase || 'complete',
1181
1204
  task: '',
1182
1205
  lastCompletedIteration: iteration,
1206
+ activeKind: '',
1207
+ activeRole: '',
1208
+ activeReason: '',
1209
+ activeStartedAt: '',
1183
1210
  })
1184
1211
  await appendLog(config.logFile, 'No unchecked tasks remain in TODOS.md')
1185
1212
  return {
@@ -1248,6 +1275,7 @@ async function runIteration({ config, state, iteration }) {
1248
1275
  activeKind: '',
1249
1276
  activeRole: '',
1250
1277
  activeReason: '',
1278
+ activeStartedAt: '',
1251
1279
  })
1252
1280
  const canResumePriorSession = (
1253
1281
  state.lastTransport === config.transport
@@ -1631,6 +1659,7 @@ async function runIteration({ config, state, iteration }) {
1631
1659
  activeKind: '',
1632
1660
  activeRole: '',
1633
1661
  activeReason: '',
1662
+ activeStartedAt: '',
1634
1663
  })
1635
1664
 
1636
1665
  await appendLog(
@@ -1799,6 +1828,7 @@ async function main() {
1799
1828
  activeKind: '',
1800
1829
  activeRole: '',
1801
1830
  activeReason: '',
1831
+ activeStartedAt: '',
1802
1832
  })
1803
1833
  const result = await runIteration({ config, state, iteration })
1804
1834
  await writeIterationSummary(config, result.iterationSummary ?? result.summary)
@@ -1828,6 +1858,7 @@ async function main() {
1828
1858
  activeKind: '',
1829
1859
  activeRole: '',
1830
1860
  activeReason: '',
1861
+ activeStartedAt: '',
1831
1862
  })
1832
1863
  if (visualizer) {
1833
1864
  await visualizer.close().catch(() => {})
@@ -192,6 +192,17 @@ function readRepoDiff(cwd) {
192
192
  }
193
193
  }
194
194
 
195
+ function compareSequencedEntries(left, right) {
196
+ const leftSeq = Number(left?.seq ?? Number.NaN)
197
+ const rightSeq = Number(right?.seq ?? Number.NaN)
198
+ const leftHasSeq = Number.isFinite(leftSeq)
199
+ const rightHasSeq = Number.isFinite(rightSeq)
200
+ if (leftHasSeq && rightHasSeq && leftSeq !== rightSeq) {
201
+ return leftSeq - rightSeq
202
+ }
203
+ return String(left?.timestamp ?? '').localeCompare(String(right?.timestamp ?? ''))
204
+ }
205
+
195
206
  function getRunDir(config, runId) {
196
207
  return path.join(config.piRuntimeDir, 'runs', runId)
197
208
  }
@@ -274,19 +285,26 @@ export async function buildSnapshot(config, queryRunId = '') {
274
285
  readJsonlTail(selectedConfig.liveFeedFile, { maxItems: 300, maxBytes: 768 * 1024 }),
275
286
  ])
276
287
 
277
- const recentTelemetry = telemetry.slice(-160).map((event, index) => ({
288
+ const flowOptions = {
289
+ includeVisualReview: config.visualReviewEnabled === true,
290
+ }
291
+ const telemetryWithVizIds = telemetry.map((event, index) => ({
278
292
  ...event,
279
293
  _vizId: `telemetry-${index}`,
280
294
  }))
295
+ const sortedLiveFeed = [...liveFeed].sort(compareSequencedEntries)
296
+ const recentTelemetry = telemetryWithVizIds.slice(-160)
281
297
  const flow = deriveFlowSnapshot({
282
298
  activeRun: selectedRunId !== '' && String(activeRun?.runId ?? '') === selectedRunId ? activeRun : state?.inProgress ?? null,
283
299
  summary,
284
- telemetry,
300
+ telemetry: telemetryWithVizIds,
301
+ options: flowOptions,
285
302
  })
286
303
  const graph = deriveStageGraph({
287
304
  activeRun: selectedRunId !== '' && String(activeRun?.runId ?? '') === selectedRunId ? activeRun : state?.inProgress ?? null,
288
305
  summary,
289
- telemetry,
306
+ telemetry: telemetryWithVizIds,
307
+ options: flowOptions,
290
308
  })
291
309
 
292
310
  const selectedRunIsActive = selectedRunId !== '' && String(activeRun?.runId ?? '') === selectedRunId
@@ -313,13 +331,13 @@ export async function buildSnapshot(config, queryRunId = '') {
313
331
  summary,
314
332
  flow: {
315
333
  ...flow,
316
- activeLabel: formatActiveLabel(activeRun, flow),
334
+ activeLabel: formatActiveLabel(activeRun, flow, flowOptions),
317
335
  },
318
336
  graph,
319
337
  todos,
320
338
  currentEdits,
321
339
  lastOutput: currentOutput,
322
- liveFeed,
340
+ liveFeed: sortedLiveFeed,
323
341
  recentTelemetry,
324
342
  }
325
343
  }
@@ -4,8 +4,8 @@ const FLOW_STEPS = [
4
4
  { key: 'tester', label: 'Tester' },
5
5
  { key: 'fix', label: 'Fix' },
6
6
  { key: 'git_finalize', label: 'Git Finalize' },
7
- { key: 'visual_capture', label: 'Visual Capture' },
8
- { key: 'visual_review', label: 'Visual Review' },
7
+ { key: 'visual_capture', label: 'Visual Capture', feature: 'visualReview' },
8
+ { key: 'visual_review', label: 'Visual Review', feature: 'visualReview' },
9
9
  { key: 'summary', label: 'Summary' },
10
10
  ]
11
11
 
@@ -27,8 +27,17 @@ const SUCCESS_STATUSES = new Set(['success', 'passed', 'complete'])
27
27
  const ERROR_STATUSES = new Set(['failed', 'timed_out', 'stalled', 'blocked', 'canceled'])
28
28
  const SKIP_STATUSES = new Set(['skipped', 'not_run', 'not_needed'])
29
29
 
30
- export function getFlowSteps() {
31
- return FLOW_STEPS.map((step) => ({ ...step }))
30
+ function shouldIncludeStep(step, options = {}) {
31
+ if (step.feature === 'visualReview' && options.includeVisualReview !== true) {
32
+ return false
33
+ }
34
+ return true
35
+ }
36
+
37
+ export function getFlowSteps(options = {}) {
38
+ return FLOW_STEPS
39
+ .filter((step) => shouldIncludeStep(step, options))
40
+ .map((step) => ({ ...step }))
32
41
  }
33
42
 
34
43
  export function getLabelForKind(kind) {
@@ -124,13 +133,13 @@ export function deriveCurrentIteration({ activeRun, summary, telemetry }) {
124
133
  return 0
125
134
  }
126
135
 
127
- export function deriveFlowSnapshot({ activeRun, summary, telemetry }) {
136
+ export function deriveFlowSnapshot({ activeRun, summary, telemetry, options = {} }) {
128
137
  const currentIteration = deriveCurrentIteration({ activeRun, summary, telemetry })
129
138
  const iterationTelemetry = Array.isArray(telemetry)
130
139
  ? telemetry.filter((event) => Number(event?.iteration) === currentIteration)
131
140
  : []
132
141
  const activeStepKey = getStepKeyForActiveRun(activeRun)
133
- const steps = FLOW_STEPS.map((step) => {
142
+ const steps = getFlowSteps(options).map((step) => {
134
143
  const matchingEvents = iterationTelemetry.filter((event) => getStepKeyForKind(event?.kind) === step.key)
135
144
  const latestEvent = matchingEvents.at(-1) ?? null
136
145
  const status = activeStepKey === step.key
@@ -139,10 +148,20 @@ export function deriveFlowSnapshot({ activeRun, summary, telemetry }) {
139
148
  ? normalizeEventStatus(latestEvent.status)
140
149
  : 'pending'
141
150
 
151
+ const activeStartedAt = activeStepKey === step.key
152
+ ? String(activeRun?.activeStartedAt ?? '')
153
+ : ''
154
+ const durationSeconds = latestEvent && Number.isFinite(Number(latestEvent.durationSeconds))
155
+ ? Number(latestEvent.durationSeconds)
156
+ : null
157
+
142
158
  return {
143
159
  ...step,
144
160
  status,
145
161
  latestEvent,
162
+ latestEventId: String(latestEvent?._vizId ?? ''),
163
+ activeStartedAt,
164
+ durationSeconds,
146
165
  }
147
166
  })
148
167
 
@@ -153,8 +172,8 @@ export function deriveFlowSnapshot({ activeRun, summary, telemetry }) {
153
172
  }
154
173
  }
155
174
 
156
- export function deriveStageGraph({ activeRun, summary, telemetry }) {
157
- const flow = deriveFlowSnapshot({ activeRun, summary, telemetry })
175
+ export function deriveStageGraph({ activeRun, summary, telemetry, options = {} }) {
176
+ const flow = deriveFlowSnapshot({ activeRun, summary, telemetry, options })
158
177
  const currentIteration = flow.iteration
159
178
  const iterationTelemetry = Array.isArray(telemetry)
160
179
  ? telemetry.filter((event) => Number(event?.iteration) === currentIteration)
@@ -196,10 +215,10 @@ export function deriveStageGraph({ activeRun, summary, telemetry }) {
196
215
  }
197
216
  }
198
217
 
199
- export function formatActiveLabel(activeRun, flow) {
218
+ export function formatActiveLabel(activeRun, flow, options = {}) {
200
219
  const activeStepKey = flow?.activeStepKey || getStepKeyForActiveRun(activeRun)
201
220
  if (activeStepKey !== '') {
202
- const step = FLOW_STEPS.find((entry) => entry.key === activeStepKey)
221
+ const step = getFlowSteps(options).find((entry) => entry.key === activeStepKey)
203
222
  if (step) {
204
223
  return step.label
205
224
  }
@@ -0,0 +1 @@
1
+ :root{--bg: #0b1020;--panel: #121a30;--panel2: #17213d;--text: #e6edf7;--muted: #95a3bf;--line: #263252;--active: #6ee7ff;--done: #53d18d;--error: #ff6b81;--skip: #f0b35a;--pending: #4b5675}*{box-sizing:border-box}*{scrollbar-width:none;-ms-overflow-style:none}*::-webkit-scrollbar{width:0;height:0}html,body,#root{min-height:100%}body{margin:0;font:14px/1.4 ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,Segoe UI,sans-serif;background:linear-gradient(180deg,#08101d,#0b1020 180px);color:var(--text)}button,input,select,textarea{font:inherit}button{color:inherit}.wrap{max-width:1400px;margin:0 auto;padding:20px}.header{display:flex;justify-content:space-between;gap:16px;align-items:flex-start;margin-bottom:20px}.title{font-size:28px;font-weight:700}.subtitle{color:var(--muted);margin-top:4px}.toolbar{display:flex;gap:12px;align-items:center;flex-wrap:wrap}.badge,select{display:inline-flex;align-items:center;gap:8px;padding:8px 12px;border-radius:999px;border:1px solid var(--line);background:#ffffff08;color:var(--text)}select{min-width:260px}.dot{width:10px;height:10px;border-radius:50%;background:var(--pending)}.dot.active{background:var(--active);box-shadow:0 0 18px #6ee7ff99}.grid{display:grid;gap:16px}.main-grid{grid-template-columns:minmax(320px,420px) 1fr;align-items:start}.side-grid{gap:16px}.detail-split{display:grid;grid-template-columns:1fr 1fr;gap:16px;margin-top:16px}.card{background:linear-gradient(180deg,#ffffff05,#ffffff03);border:1px solid var(--line);border-radius:16px;padding:16px;box-shadow:0 12px 40px #0000002e}.card-tight{padding:12px}.no-margin{margin:0}.label{color:var(--muted);font-size:12px;text-transform:uppercase;letter-spacing:.08em}.value{margin-top:8px;font-size:22px;font-weight:700}.value.small{font-size:16px}.todo-list{max-height:calc(100vh - 140px);overflow:auto;padding-right:4px;display:grid;gap:12px;margin-top:12px}.todo-group{display:grid;gap:6px}.todo-group-heading{color:var(--muted);font-size:12px;font-weight:700;text-transform:uppercase;letter-spacing:.08em;padding:0 2px}.todo-group-items{display:grid;gap:6px}.todo-item{width:100%;border:1px solid var(--line);border-radius:12px;background:var(--panel);display:flex;gap:8px;align-items:flex-start;padding:9px 11px;text-align:left;cursor:pointer}.todo-item.active{border-color:var(--active);box-shadow:0 0 0 1px #6ee7ff40 inset}.todo-line{color:var(--muted);font-size:11px;min-width:36px;line-height:1.2}.todo-content{flex:1;min-width:0}.todo-task{font-weight:600;line-height:1.25}.todo-checked{color:var(--done)}.flow{display:grid;grid-template-columns:repeat(8,minmax(0,1fr));gap:10px;margin-top:14px}.step,.graph-node{border:1px solid var(--line);border-radius:14px;padding:12px;background:var(--panel);min-height:96px;position:relative;overflow:hidden}.step{width:100%;text-align:left}.step.clickable{cursor:pointer}.step.selected{box-shadow:0 0 0 1px #ffffff2e inset}.step:before,.graph-node:before{content:"";position:absolute;inset:0 auto 0 0;width:4px;background:var(--pending)}.step.active,.graph-node.active{border-color:var(--active);box-shadow:0 0 0 1px #6ee7ff59 inset,0 0 28px #6ee7ff1f}.step.active:before,.graph-node.active:before{background:var(--active)}.step.done:before,.graph-node.done:before{background:var(--done)}.step.error:before,.graph-node.error:before{background:var(--error)}.step.skipped:before,.graph-node.skipped:before{background:var(--skip)}.step-name{font-weight:700;margin-bottom:6px}.step-status{font-size:12px;text-transform:uppercase;letter-spacing:.08em;color:var(--muted)}.step-meta{margin-top:8px;color:var(--muted);font-size:12px;white-space:pre-wrap}.state-bar{display:flex;gap:10px;flex-wrap:wrap;margin-top:12px}.step-details-card{margin-top:12px}.step-details-summary{margin-top:8px;font-weight:700}.step-details-meta,.step-details-section{margin-top:10px;color:var(--muted);font-size:12px;text-transform:uppercase;letter-spacing:.08em}.state-chip{border:1px solid var(--line);border-radius:999px;padding:6px 10px;color:var(--muted);background:#ffffff08}.feed-toolbar{display:flex;gap:12px;align-items:center;flex-wrap:wrap;margin-top:12px;margin-bottom:10px}.feed-toggle{display:flex;gap:6px;align-items:center;color:var(--muted);font-size:12px}.feed-jump-row{display:flex;justify-content:flex-end;margin-bottom:8px}.feed-jump-button{border:1px solid var(--line);background:#6ee7ff14;color:var(--active);border-radius:999px;padding:6px 10px;cursor:pointer}.feed-jump-button:hover{background:#6ee7ff24}.feed{background:#0a1325;border:1px solid var(--line);border-radius:12px;padding:12px;max-height:320px;overflow:auto;font-family:ui-monospace,SFMono-Regular,Menlo,monospace}.feed-item{padding:8px 0;border-bottom:1px solid rgba(255,255,255,.06)}.feed-item:last-child{border-bottom:0}.feed-head{display:flex;gap:8px;align-items:center;flex-wrap:wrap}.feed-type{display:inline-flex;align-items:center;border:1px solid var(--line);border-radius:999px;padding:2px 8px;font-size:11px;text-transform:uppercase;letter-spacing:.08em}.feed-type.agent_start,.feed-type.agent_end{color:var(--active)}.feed-type.thinking_delta{color:#b392f0}.feed-type.text_delta{color:var(--done)}.feed-type.tool_start,.feed-type.tool_update,.feed-type.tool_end{color:var(--skip)}.feed-meta{color:var(--muted);font-size:12px}.feed-text,.pinned-tool-text{white-space:pre-wrap;word-break:break-word;margin-top:6px}.feed-count{color:var(--muted);font-size:11px}.pinned-tool{background:#0a1325;border:1px solid var(--line);border-radius:12px;padding:12px;margin-top:12px}.pinned-tool-name{font-weight:700}.pinned-tool-meta{color:var(--muted);font-size:12px;margin-top:4px}.kv{display:grid;grid-template-columns:140px 1fr;gap:6px 10px;margin-top:12px}.kv div:nth-child(odd){color:var(--muted)}pre{margin:0;white-space:pre-wrap;word-break:break-word;background:#0a1325;border:1px solid var(--line);border-radius:12px;padding:12px;max-height:320px;overflow:auto}.graph{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr));gap:12px;margin-top:12px}.graph-node{width:100%;min-height:120px;text-align:left;cursor:pointer}.table-wrap{margin-top:12px;overflow:auto;max-height:360px}table{width:100%;border-collapse:collapse}th,td{padding:10px 8px;border-bottom:1px solid var(--line);vertical-align:top;text-align:left}th{color:var(--muted);font-size:12px;text-transform:uppercase;letter-spacing:.08em}td{font-size:13px}tr[data-clickable="1"]{cursor:pointer}.status-pill{display:inline-block;border-radius:999px;padding:3px 8px;font-size:12px;font-weight:700;border:1px solid var(--line);background:var(--panel2)}.status-pill.done{color:var(--done)}.status-pill.error{color:var(--error)}.status-pill.skipped{color:var(--skip)}.status-pill.active{color:var(--active)}.edit-list{max-height:360px;overflow:auto;margin-top:12px}.edit-item{border:1px solid var(--line);border-radius:12px;margin-bottom:10px;overflow:hidden}.edit-head{display:block;list-style:none;padding:10px 12px;background:#ffffff08;font-weight:600;cursor:pointer}.edit-head::-webkit-details-marker{display:none}.muted{color:var(--muted)}.bottom{margin-top:16px}.bottom summary{cursor:pointer;color:var(--muted);margin-bottom:10px}.diagnostics-grid{gap:16px}@media(max-width:1100px){.main-grid,.detail-split,.flow{grid-template-columns:1fr}.todo-list{max-height:none}}