npm - @sebastianandreasson/pi-autonomous-agents - Versions diffs - 0.10.0 → 0.11.0 - Mend

@sebastianandreasson/pi-autonomous-agents 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +9 -0
package/docs/PI_SUPERVISOR.md +1 -1
package/package.json +3 -2
package/src/pi-debug-live.mjs +52 -16
package/src/pi-prompts.mjs +38 -2
package/src/pi-supervisor.mjs +37 -6
package/src/pi-visualizer-server.mjs +23 -5
package/src/pi-visualizer-shared.mjs +29 -10
package/visualizer-ui/dist/assets/index-C5V0jXPE.css +1 -0
package/visualizer-ui/dist/assets/index-CpHvuv0C.js +12 -0
package/visualizer-ui/dist/index.html +2 -2
package/visualizer-ui/dist/assets/index-C398cGuP.js +0 -12
package/visualizer-ui/dist/assets/index-DuJxYqkl.css +0 -1

package/README.md CHANGED Viewed

@@ -326,6 +326,13 @@ For local visualizer iteration against fake live SDK agent:
 npm run debug:live-ui
 ```
+Scenario variants:
+```bash
+node src/cli.mjs debug-live --reset --scenario noisy --task-count 24
+node src/cli.mjs debug-live --reset --scenario retry
+```
 For React/Vite visualizer UI dev loop:
 ```bash
@@ -338,6 +345,8 @@ For production visualizer UI build:
 npm run build:visualizer:ui
 ```
+Publish now auto-runs check, tests, and UI build via `prepublishOnly`.
 This seeds `.pi-debug/live-ui/`, runs harness there with streaming fake SDK fixture, hosts visualizer, and gives stable local repro loop for UI work. React app lives in `visualizer-ui/`. Visualizer server now serves built assets from `visualizer-ui/dist/` and falls back to build-instructions page if build artifacts are missing.
 See `docs/VISUALIZER_UI_PLAN.md` for migration plan.

package/docs/PI_SUPERVISOR.md CHANGED Viewed

@@ -62,7 +62,7 @@ The package reads `PI_CONFIG_FILE` if provided. Otherwise it falls back to the b
 Visualizer reads active-run lock, TODO file, per-run state, per-run iteration summary, per-run last output snapshot, live feed JSONL, and telemetry to show current stage plus historical runs.
-For local UI iteration in this package repo, use `pi-harness debug-live` to run against seeded fake live SDK sandbox.
+For local UI iteration in this package repo, use `pi-harness debug-live` to run against seeded fake live SDK sandbox. Useful variants: `--scenario noisy`, `--scenario retry`, `--task-count 24`.
 ## Config Contract

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@sebastianandreasson/pi-autonomous-agents",
   "private": false,
-  "version": "0.10.0",
+  "version": "0.11.0",
   "type": "module",
   "description": "Portable unattended PI harness for developer/tester/visual-review loops.",
   "license": "MIT",
@@ -23,7 +23,8 @@
     "test": "node --test test/pi-heartbeat.test.mjs test/pi-lifecycle.test.mjs test/pi-role-models.test.mjs test/pi-flow.test.mjs test/pi-history.test.mjs test/pi-prompts.test.mjs test/pi-preflight.test.mjs test/pi-repo.test.mjs test/pi-sdk-supervisor.test.mjs test/pi-sdk-turn.test.mjs test/pi-telemetry.test.mjs test/pi-visualizer-shared.test.mjs",
     "debug:live-ui": "node src/cli.mjs debug-live --reset",
     "dev:visualizer:ui": "npm --prefix visualizer-ui run dev",
-    "build:visualizer:ui": "npm --prefix visualizer-ui run build"
+    "build:visualizer:ui": "npm --prefix visualizer-ui run build",
+    "prepublishOnly": "npm run check && npm test && npm run build:visualizer:ui"
   },
   "files": [
     "src",

package/src/pi-debug-live.mjs CHANGED Viewed

@@ -12,11 +12,51 @@ const cliFile = path.join(scriptDir, 'cli.mjs')
 const fakePiFile = path.join(packageRoot, 'test', 'fixtures', 'fake-pi.mjs')
 const fakeLiveSdkFile = path.join(packageRoot, 'test', 'fixtures', 'fake-live-pi-sdk.mjs')
 const sandboxDir = path.join(packageRoot, '.pi-debug', 'live-ui')
+const DEFAULT_TASK_COUNT = 12
 function shellQuote(value) {
   return JSON.stringify(String(value))
 }
+function readFlagValue(flag) {
+  const index = process.argv.indexOf(flag)
+  if (index === -1) {
+    return ''
+  }
+  return String(process.argv[index + 1] ?? '').trim()
+}
+function readScenario() {
+  const value = readFlagValue('--scenario') || process.env.PI_FAKE_LIVE_SCENARIO || 'default'
+  return String(value).trim() || 'default'
+}
+function readTaskCount() {
+  const raw = Number.parseInt(readFlagValue('--task-count') || process.env.PI_DEBUG_TASK_COUNT || `${DEFAULT_TASK_COUNT}`, 10)
+  return Number.isFinite(raw) && raw > 0 ? raw : DEFAULT_TASK_COUNT
+}
+function buildTodoLines(taskCount) {
+  const lines = []
+  for (let index = 1; index <= taskCount; index += 1) {
+    const phase = index <= Math.ceil(taskCount / 3)
+      ? 'Phase 1'
+      : index <= Math.ceil((taskCount * 2) / 3)
+        ? 'Phase 2'
+        : 'Phase 3'
+    const label = `Fake live task ${index}`
+    if (lines.length === 0 || lines[lines.length - 1] !== `## ${phase}`) {
+      if (lines.length > 0) {
+        lines.push('')
+      }
+      lines.push(`## ${phase}`)
+      lines.push('')
+    }
+    lines.push(`- [ ] ${label}`)
+  }
+  return `${lines.join('\n')}\n`
+}
 async function ensureRepo(cwd) {
   try {
     execFileSync('git', ['rev-parse', '--is-inside-work-tree'], { cwd, stdio: 'ignore' })
@@ -27,21 +67,11 @@ async function ensureRepo(cwd) {
   }
 }
-async function seedFiles(cwd) {
+async function seedFiles(cwd, { taskCount, scenario }) {
   await fs.mkdir(path.join(cwd, 'pi'), { recursive: true })
-  await fs.writeFile(path.join(cwd, 'TODOS.md'), [
-    '## Phase 1',
-    '',
-    '- [ ] Fake live task one',
-    '- [ ] Fake live task two',
-    '- [ ] Fake live task three',
-    '',
-    '## Phase 2',
-    '',
-    '- [ ] Fake live task four',
-  ].join('\n') + '\n', 'utf8')
-  await fs.writeFile(path.join(cwd, 'DEVELOPER.md'), 'Developer instructions for local visualizer debugging.\n', 'utf8')
-  await fs.writeFile(path.join(cwd, 'TESTER.md'), 'Tester instructions for local visualizer debugging.\n', 'utf8')
+  await fs.writeFile(path.join(cwd, 'TODOS.md'), buildTodoLines(taskCount), 'utf8')
+  await fs.writeFile(path.join(cwd, 'DEVELOPER.md'), `Developer instructions for local visualizer debugging.\nScenario: ${scenario}\n`, 'utf8')
+  await fs.writeFile(path.join(cwd, 'TESTER.md'), `Tester instructions for local visualizer debugging.\nScenario: ${scenario}\n`, 'utf8')
   await fs.writeFile(path.join(cwd, 'pi.config.json'), `${JSON.stringify({
     transport: 'sdk',
     taskFile: 'TODOS.md',
@@ -63,7 +93,7 @@ async function seedFiles(cwd) {
     toolContinueAfterSeconds: 3600,
     toolNoEventTimeoutSeconds: 3600,
     sleepBetweenSeconds: 1,
-    maxIterations: 20,
+    maxIterations: Math.max(taskCount * 3, 20),
   }, null, 2)}\n`, 'utf8')
 }
@@ -78,17 +108,22 @@ async function ensureInitialCommit(cwd) {
 async function main() {
   const reset = process.argv.includes('--reset')
+  const scenario = readScenario()
+  const taskCount = readTaskCount()
   if (reset) {
     await fs.rm(sandboxDir, { recursive: true, force: true })
   }
   await fs.mkdir(sandboxDir, { recursive: true })
   await ensureRepo(sandboxDir)
-  await seedFiles(sandboxDir)
+  await seedFiles(sandboxDir, { taskCount, scenario })
   await ensureInitialCommit(sandboxDir)
   process.stdout.write(`PI debug sandbox: ${sandboxDir}\n`)
   process.stdout.write(`Using fake live SDK fixture: ${fakeLiveSdkFile}\n`)
+  process.stdout.write(`Scenario: ${scenario}\n`)
+  process.stdout.write(`Task count: ${taskCount}\n`)
   const child = spawn(process.execPath, [cliFile, 'run'], {
     cwd: sandboxDir,
@@ -96,6 +131,7 @@ async function main() {
       ...process.env,
       PI_CONFIG_FILE: 'pi.config.json',
       PI_SDK_MODULE: fakeLiveSdkFile,
+      PI_FAKE_LIVE_SCENARIO: scenario,
       PI_VISUALIZER_HOST: process.env.PI_VISUALIZER_HOST || '127.0.0.1',
       PI_VISUALIZER_PORT: process.env.PI_VISUALIZER_PORT || '4317',
     },

package/src/pi-prompts.mjs CHANGED Viewed

@@ -119,6 +119,36 @@ function repoInstructionsAuthorityLine(config, instructionsFile, usesBundledInst
   return `Repo-local instructions in ${displayPath(config, instructionsFile)} are the primary role contract. Follow them over package defaults when they differ.\n`
 }
+export function classifyTaskType(task) {
+  const text = String(task ?? '').trim().toLowerCase()
+  if (text === '') {
+    return 'general'
+  }
+  if (
+    /\b(write|add|create|implement|expand|improve|fix|update)\b.*\b(test|tests|coverage|regression test|spec|specs)\b/.test(text)
+    || /\b(test|tests|coverage|regression test|spec|specs)\b.*\b(write|add|create|implement|expand|improve|fix|update)\b/.test(text)
+  ) {
+    return 'test'
+  }
+  return 'general'
+}
+function formatTaskTypeGuidance(taskType) {
+  if (taskType !== 'test') {
+    return ''
+  }
+  return [
+    'Test-task guidance:',
+    '- This TODO is primarily test-focused. Do not fail solely because changes are mostly or entirely tests.',
+    '- PASS if the new or updated test adds meaningful behavioral or regression coverage and verification passes.',
+    '- FAIL if the test is brittle, redundant, weakly asserted, or not tied to real behavior.',
+    '- Prefer checking whether the test would have failed before the change, or whether developer notes justify why missing coverage mattered.',
+  ].join('\n')
+}
 function testerPassOwnershipRules(config) {
   if (config.commitMode === 'plan') {
     return {
@@ -353,6 +383,9 @@ export function buildTesterPrompt(config, {
     developerNotes || '(none provided)',
     configMaxLines(config, 'maxPromptNotesLines', 16),
   )
+  const taskType = classifyTaskType(task)
+  const taskTypeLabel = taskType === 'test' ? 'test-focused' : 'general'
+  const taskTypeGuidance = formatTaskTypeGuidance(taskType)
   const verificationCommand = config.testCommand.trim() === '' ? '(not configured)' : config.testCommand
   const visualCaptureNote = config.visualReviewEnabled
     ? `\n- Keep the screenshot capture flow working so the harness still produces current visual artifacts for review.`
@@ -364,6 +397,7 @@ export function buildTesterPrompt(config, {
   )
   const passOwnership = testerPassOwnershipRules(config)
   const largeFileRiskHint = formatLargeFileRiskHint(largeFileWarnings)
+  const taskTypeRuleBlock = taskTypeGuidance === '' ? '' : `${taskTypeGuidance}\n`
   if (!config.usingBundledTesterInstructions) {
     return `Read ${taskFile} and ${instructionsFile}.
@@ -375,6 +409,7 @@ You are the TESTER role. You are reviewing the most recent developer work from a
 Current phase: ${phase}
 Current task: ${task}
+Current task type: ${taskTypeLabel}
 Reason for this tester pass: ${reason}
 Developer notes:
@@ -391,7 +426,7 @@ Rules:
 - If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
 - If blocked or inconclusive, return VERDICT: BLOCKED.
 - Do not hide real bugs with brittle tests.
-- ${passOwnership.successRule.slice(2)}
+${taskTypeRuleBlock}- ${passOwnership.successRule.slice(2)}
 - ${passOwnership.isolationRule.slice(2)}
 - ${passOwnership.extraRule.slice(2)}${visualCaptureNote}
@@ -417,6 +452,7 @@ You are the TESTER role. You are reviewing the most recent developer work from a
 Current phase: ${phase}
 Current task: ${task}
+Current task type: ${taskTypeLabel}
 Reason for this tester pass: ${reason}
 Developer notes:
@@ -433,7 +469,7 @@ ${indentBlock(innerLoopValidationRules(verificationCommand), '\t')}
 	- Prefer one focused browser-driven review pass.
 	- If a snippet seems incomplete, reread a smaller exact window with read instead of another large overlapping shell range.
 	- Do not hide real bugs with brittle tests.
-	- If blocked or inconclusive, return VERDICT: BLOCKED.
+${taskTypeGuidance === '' ? '' : `${indentBlock(taskTypeGuidance, '\t')}\n`}	- If blocked or inconclusive, return VERDICT: BLOCKED.
 ${indentBlock(passOwnership.successRule, '\t')}
 ${indentBlock(passOwnership.isolationRule, '\t')}
 ${indentBlock(passOwnership.extraRule, '\t')}${visualCaptureNote}

package/src/pi-supervisor.mjs CHANGED Viewed

@@ -318,6 +318,18 @@ function isInfrastructureVerificationFailure(output) {
   ].some((pattern) => text.includes(pattern))
 }
+function formatOutputExcerpt(output, maxChars = 4000, maxLines = 40) {
+  const text = String(output ?? '').trim()
+  if (text === '') {
+    return ''
+  }
+  const excerpt = text.split('\n').slice(-maxLines).join('\n')
+  if (excerpt.length <= maxChars) {
+    return excerpt
+  }
+  return `${excerpt.slice(excerpt.length - maxChars + 16)}\n... [truncated]`
+}
 async function recordEvent(config, event) {
   await appendTelemetry(config, {
     timestamp: timestamp(),
@@ -345,6 +357,7 @@ async function runAgentInvocation({
     activeKind: kind,
     activeRole: role,
     activeReason: reason,
+    activeStartedAt: timestamp(),
   })
   const beforeSnapshot = getRepoSnapshot(config.cwd)
@@ -533,6 +546,7 @@ async function runHarnessGitFinalize({
     activeKind: 'git_finalize',
     activeRole: '',
     activeReason: '',
+    activeStartedAt: timestamp(),
   })
   const beforeSnapshot = getRepoSnapshot(config.cwd)
@@ -653,6 +667,7 @@ async function runVerificationStep({ config, iteration, phase, kind }) {
     activeKind: kind,
     activeRole: '',
     activeReason: '',
+    activeStartedAt: timestamp(),
   })
   const beforeSnapshot = getRepoSnapshot(config.cwd)
@@ -696,6 +711,7 @@ async function runVerificationStep({ config, iteration, phase, kind }) {
     commitPlanFound: '',
     terminalReason: `verification_${verification.status}`,
     notes: verificationNotes,
+    outputExcerpt: formatOutputExcerpt(verification.output),
   })
   return verification
@@ -820,6 +836,10 @@ async function runDeveloperVerificationAndFix({
   let nextSessionId = sessionId
   let nextSessionFile = sessionFile
   let verificationStatus = verification.status
+  let verificationOutput = verification.output
+  let feedbackSource = (verification.status === 'failed' || verification.status === 'timed_out')
+    ? 'developer_verification'
+    : ''
   if (verification.status === 'failed' || verification.status === 'timed_out') {
     if (isInfrastructureVerificationFailure(verification.output)) {
@@ -831,8 +851,8 @@ async function runDeveloperVerificationAndFix({
         verificationStatus,
         sessionId: nextSessionId,
         sessionFile: nextSessionFile,
-        verificationOutput: verification.output,
-        feedbackSource: 'developer_verification',
+        verificationOutput,
+        feedbackSource,
       }
     }
@@ -859,8 +879,11 @@ async function runDeveloperVerificationAndFix({
       })
       verificationStatus = reverify.status
+      verificationOutput = reverify.output
+      feedbackSource = reverify.status === 'passed' ? '' : 'developer_reverification'
     } else {
       verificationStatus = 'not_run'
+      feedbackSource = 'developer_verification'
     }
   }
@@ -869,10 +892,8 @@ async function runDeveloperVerificationAndFix({
     verificationStatus,
     sessionId: nextSessionId,
     sessionFile: nextSessionFile,
-    verificationOutput: verification.output,
-    feedbackSource: verification.status === 'failed' || verification.status === 'timed_out'
-      ? 'developer_verification'
-      : '',
+    verificationOutput,
+    feedbackSource,
   }
 }
@@ -1026,6 +1047,7 @@ async function runVisualReview({ config, iteration, phase, task, changedFiles })
     activeKind: 'visual_capture',
     activeRole: '',
     activeReason: '',
+    activeStartedAt: timestamp(),
   })
   const capture = await runVisualCapture(config, {
@@ -1082,6 +1104,7 @@ async function runVisualReview({ config, iteration, phase, task, changedFiles })
     activeKind: 'visual_review',
     activeRole: 'visualReview',
     activeReason: '',
+    activeStartedAt: timestamp(),
   })
   const visualReviewModel = resolveRoleModel(config, 'visualReview')
@@ -1180,6 +1203,10 @@ async function runIteration({ config, state, iteration }) {
       phase: taskInfo.phase || 'complete',
       task: '',
       lastCompletedIteration: iteration,
+      activeKind: '',
+      activeRole: '',
+      activeReason: '',
+      activeStartedAt: '',
     })
     await appendLog(config.logFile, 'No unchecked tasks remain in TODOS.md')
     return {
@@ -1248,6 +1275,7 @@ async function runIteration({ config, state, iteration }) {
     activeKind: '',
     activeRole: '',
     activeReason: '',
+    activeStartedAt: '',
   })
   const canResumePriorSession = (
     state.lastTransport === config.transport
@@ -1631,6 +1659,7 @@ async function runIteration({ config, state, iteration }) {
     activeKind: '',
     activeRole: '',
     activeReason: '',
+    activeStartedAt: '',
   })
   await appendLog(
@@ -1799,6 +1828,7 @@ async function main() {
         activeKind: '',
         activeRole: '',
         activeReason: '',
+        activeStartedAt: '',
       })
       const result = await runIteration({ config, state, iteration })
       await writeIterationSummary(config, result.iterationSummary ?? result.summary)
@@ -1828,6 +1858,7 @@ async function main() {
       activeKind: '',
       activeRole: '',
       activeReason: '',
+      activeStartedAt: '',
     })
     if (visualizer) {
       await visualizer.close().catch(() => {})

package/src/pi-visualizer-server.mjs CHANGED Viewed

@@ -192,6 +192,17 @@ function readRepoDiff(cwd) {
   }
 }
+function compareSequencedEntries(left, right) {
+  const leftSeq = Number(left?.seq ?? Number.NaN)
+  const rightSeq = Number(right?.seq ?? Number.NaN)
+  const leftHasSeq = Number.isFinite(leftSeq)
+  const rightHasSeq = Number.isFinite(rightSeq)
+  if (leftHasSeq && rightHasSeq && leftSeq !== rightSeq) {
+    return leftSeq - rightSeq
+  }
+  return String(left?.timestamp ?? '').localeCompare(String(right?.timestamp ?? ''))
+}
 function getRunDir(config, runId) {
   return path.join(config.piRuntimeDir, 'runs', runId)
 }
@@ -274,19 +285,26 @@ export async function buildSnapshot(config, queryRunId = '') {
     readJsonlTail(selectedConfig.liveFeedFile, { maxItems: 300, maxBytes: 768 * 1024 }),
   ])
-  const recentTelemetry = telemetry.slice(-160).map((event, index) => ({
+  const flowOptions = {
+    includeVisualReview: config.visualReviewEnabled === true,
+  }
+  const telemetryWithVizIds = telemetry.map((event, index) => ({
     ...event,
     _vizId: `telemetry-${index}`,
   }))
+  const sortedLiveFeed = [...liveFeed].sort(compareSequencedEntries)
+  const recentTelemetry = telemetryWithVizIds.slice(-160)
   const flow = deriveFlowSnapshot({
     activeRun: selectedRunId !== '' && String(activeRun?.runId ?? '') === selectedRunId ? activeRun : state?.inProgress ?? null,
     summary,
-    telemetry,
+    telemetry: telemetryWithVizIds,
+    options: flowOptions,
   })
   const graph = deriveStageGraph({
     activeRun: selectedRunId !== '' && String(activeRun?.runId ?? '') === selectedRunId ? activeRun : state?.inProgress ?? null,
     summary,
-    telemetry,
+    telemetry: telemetryWithVizIds,
+    options: flowOptions,
   })
   const selectedRunIsActive = selectedRunId !== '' && String(activeRun?.runId ?? '') === selectedRunId
@@ -313,13 +331,13 @@ export async function buildSnapshot(config, queryRunId = '') {
     summary,
     flow: {
       ...flow,
-      activeLabel: formatActiveLabel(activeRun, flow),
+      activeLabel: formatActiveLabel(activeRun, flow, flowOptions),
     },
     graph,
     todos,
     currentEdits,
     lastOutput: currentOutput,
-    liveFeed,
+    liveFeed: sortedLiveFeed,
     recentTelemetry,
   }
 }

package/src/pi-visualizer-shared.mjs CHANGED Viewed

@@ -4,8 +4,8 @@ const FLOW_STEPS = [
   { key: 'tester', label: 'Tester' },
   { key: 'fix', label: 'Fix' },
   { key: 'git_finalize', label: 'Git Finalize' },
-  { key: 'visual_capture', label: 'Visual Capture' },
-  { key: 'visual_review', label: 'Visual Review' },
+  { key: 'visual_capture', label: 'Visual Capture', feature: 'visualReview' },
+  { key: 'visual_review', label: 'Visual Review', feature: 'visualReview' },
   { key: 'summary', label: 'Summary' },
 ]
@@ -27,8 +27,17 @@ const SUCCESS_STATUSES = new Set(['success', 'passed', 'complete'])
 const ERROR_STATUSES = new Set(['failed', 'timed_out', 'stalled', 'blocked', 'canceled'])
 const SKIP_STATUSES = new Set(['skipped', 'not_run', 'not_needed'])
-export function getFlowSteps() {
-  return FLOW_STEPS.map((step) => ({ ...step }))
+function shouldIncludeStep(step, options = {}) {
+  if (step.feature === 'visualReview' && options.includeVisualReview !== true) {
+    return false
+  }
+  return true
+}
+export function getFlowSteps(options = {}) {
+  return FLOW_STEPS
+    .filter((step) => shouldIncludeStep(step, options))
+    .map((step) => ({ ...step }))
 }
 export function getLabelForKind(kind) {
@@ -124,13 +133,13 @@ export function deriveCurrentIteration({ activeRun, summary, telemetry }) {
   return 0
 }
-export function deriveFlowSnapshot({ activeRun, summary, telemetry }) {
+export function deriveFlowSnapshot({ activeRun, summary, telemetry, options = {} }) {
   const currentIteration = deriveCurrentIteration({ activeRun, summary, telemetry })
   const iterationTelemetry = Array.isArray(telemetry)
     ? telemetry.filter((event) => Number(event?.iteration) === currentIteration)
     : []
   const activeStepKey = getStepKeyForActiveRun(activeRun)
-  const steps = FLOW_STEPS.map((step) => {
+  const steps = getFlowSteps(options).map((step) => {
     const matchingEvents = iterationTelemetry.filter((event) => getStepKeyForKind(event?.kind) === step.key)
     const latestEvent = matchingEvents.at(-1) ?? null
     const status = activeStepKey === step.key
@@ -139,10 +148,20 @@ export function deriveFlowSnapshot({ activeRun, summary, telemetry }) {
         ? normalizeEventStatus(latestEvent.status)
         : 'pending'
+    const activeStartedAt = activeStepKey === step.key
+      ? String(activeRun?.activeStartedAt ?? '')
+      : ''
+    const durationSeconds = latestEvent && Number.isFinite(Number(latestEvent.durationSeconds))
+      ? Number(latestEvent.durationSeconds)
+      : null
     return {
       ...step,
       status,
       latestEvent,
+      latestEventId: String(latestEvent?._vizId ?? ''),
+      activeStartedAt,
+      durationSeconds,
     }
   })
@@ -153,8 +172,8 @@ export function deriveFlowSnapshot({ activeRun, summary, telemetry }) {
   }
 }
-export function deriveStageGraph({ activeRun, summary, telemetry }) {
-  const flow = deriveFlowSnapshot({ activeRun, summary, telemetry })
+export function deriveStageGraph({ activeRun, summary, telemetry, options = {} }) {
+  const flow = deriveFlowSnapshot({ activeRun, summary, telemetry, options })
   const currentIteration = flow.iteration
   const iterationTelemetry = Array.isArray(telemetry)
     ? telemetry.filter((event) => Number(event?.iteration) === currentIteration)
@@ -196,10 +215,10 @@ export function deriveStageGraph({ activeRun, summary, telemetry }) {
   }
 }
-export function formatActiveLabel(activeRun, flow) {
+export function formatActiveLabel(activeRun, flow, options = {}) {
   const activeStepKey = flow?.activeStepKey || getStepKeyForActiveRun(activeRun)
   if (activeStepKey !== '') {
-    const step = FLOW_STEPS.find((entry) => entry.key === activeStepKey)
+    const step = getFlowSteps(options).find((entry) => entry.key === activeStepKey)
     if (step) {
       return step.label
     }

package/visualizer-ui/dist/assets/index-C5V0jXPE.css ADDED Viewed

@@ -0,0 +1 @@

+ :root{--bg: #0b1020;--panel: #121a30;--panel2: #17213d;--text: #e6edf7;--muted: #95a3bf;--line: #263252;--active: #6ee7ff;--done: #53d18d;--error: #ff6b81;--skip: #f0b35a;--pending: #4b5675}*{box-sizing:border-box}*{scrollbar-width:none;-ms-overflow-style:none}*::-webkit-scrollbar{width:0;height:0}html,body,#root{min-height:100%}body{margin:0;font:14px/1.4 ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,Segoe UI,sans-serif;background:linear-gradient(180deg,#08101d,#0b1020 180px);color:var(--text)}button,input,select,textarea{font:inherit}button{color:inherit}.wrap{max-width:1400px;margin:0 auto;padding:20px}.header{display:flex;justify-content:space-between;gap:16px;align-items:flex-start;margin-bottom:20px}.title{font-size:28px;font-weight:700}.subtitle{color:var(--muted);margin-top:4px}.toolbar{display:flex;gap:12px;align-items:center;flex-wrap:wrap}.badge,select{display:inline-flex;align-items:center;gap:8px;padding:8px 12px;border-radius:999px;border:1px solid var(--line);background:#ffffff08;color:var(--text)}select{min-width:260px}.dot{width:10px;height:10px;border-radius:50%;background:var(--pending)}.dot.active{background:var(--active);box-shadow:0 0 18px #6ee7ff99}.grid{display:grid;gap:16px}.main-grid{grid-template-columns:minmax(320px,420px) 1fr;align-items:start}.side-grid{gap:16px}.detail-split{display:grid;grid-template-columns:1fr 1fr;gap:16px;margin-top:16px}.card{background:linear-gradient(180deg,#ffffff05,#ffffff03);border:1px solid var(--line);border-radius:16px;padding:16px;box-shadow:0 12px 40px #0000002e}.card-tight{padding:12px}.no-margin{margin:0}.label{color:var(--muted);font-size:12px;text-transform:uppercase;letter-spacing:.08em}.value{margin-top:8px;font-size:22px;font-weight:700}.value.small{font-size:16px}.todo-list{max-height:calc(100vh - 140px);overflow:auto;padding-right:4px;display:grid;gap:12px;margin-top:12px}.todo-group{display:grid;gap:6px}.todo-group-heading{color:var(--muted);font-size:12px;font-weight:700;text-transform:uppercase;letter-spacing:.08em;padding:0 2px}.todo-group-items{display:grid;gap:6px}.todo-item{width:100%;border:1px solid var(--line);border-radius:12px;background:var(--panel);display:flex;gap:8px;align-items:flex-start;padding:9px 11px;text-align:left;cursor:pointer}.todo-item.active{border-color:var(--active);box-shadow:0 0 0 1px #6ee7ff40 inset}.todo-line{color:var(--muted);font-size:11px;min-width:36px;line-height:1.2}.todo-content{flex:1;min-width:0}.todo-task{font-weight:600;line-height:1.25}.todo-checked{color:var(--done)}.flow{display:grid;grid-template-columns:repeat(8,minmax(0,1fr));gap:10px;margin-top:14px}.step,.graph-node{border:1px solid var(--line);border-radius:14px;padding:12px;background:var(--panel);min-height:96px;position:relative;overflow:hidden}.step{width:100%;text-align:left}.step.clickable{cursor:pointer}.step.selected{box-shadow:0 0 0 1px #ffffff2e inset}.step:before,.graph-node:before{content:"";position:absolute;inset:0 auto 0 0;width:4px;background:var(--pending)}.step.active,.graph-node.active{border-color:var(--active);box-shadow:0 0 0 1px #6ee7ff59 inset,0 0 28px #6ee7ff1f}.step.active:before,.graph-node.active:before{background:var(--active)}.step.done:before,.graph-node.done:before{background:var(--done)}.step.error:before,.graph-node.error:before{background:var(--error)}.step.skipped:before,.graph-node.skipped:before{background:var(--skip)}.step-name{font-weight:700;margin-bottom:6px}.step-status{font-size:12px;text-transform:uppercase;letter-spacing:.08em;color:var(--muted)}.step-meta{margin-top:8px;color:var(--muted);font-size:12px;white-space:pre-wrap}.state-bar{display:flex;gap:10px;flex-wrap:wrap;margin-top:12px}.step-details-card{margin-top:12px}.step-details-summary{margin-top:8px;font-weight:700}.step-details-meta,.step-details-section{margin-top:10px;color:var(--muted);font-size:12px;text-transform:uppercase;letter-spacing:.08em}.state-chip{border:1px solid var(--line);border-radius:999px;padding:6px 10px;color:var(--muted);background:#ffffff08}.feed-toolbar{display:flex;gap:12px;align-items:center;flex-wrap:wrap;margin-top:12px;margin-bottom:10px}.feed-toggle{display:flex;gap:6px;align-items:center;color:var(--muted);font-size:12px}.feed-jump-row{display:flex;justify-content:flex-end;margin-bottom:8px}.feed-jump-button{border:1px solid var(--line);background:#6ee7ff14;color:var(--active);border-radius:999px;padding:6px 10px;cursor:pointer}.feed-jump-button:hover{background:#6ee7ff24}.feed{background:#0a1325;border:1px solid var(--line);border-radius:12px;padding:12px;max-height:320px;overflow:auto;font-family:ui-monospace,SFMono-Regular,Menlo,monospace}.feed-item{padding:8px 0;border-bottom:1px solid rgba(255,255,255,.06)}.feed-item:last-child{border-bottom:0}.feed-head{display:flex;gap:8px;align-items:center;flex-wrap:wrap}.feed-type{display:inline-flex;align-items:center;border:1px solid var(--line);border-radius:999px;padding:2px 8px;font-size:11px;text-transform:uppercase;letter-spacing:.08em}.feed-type.agent_start,.feed-type.agent_end{color:var(--active)}.feed-type.thinking_delta{color:#b392f0}.feed-type.text_delta{color:var(--done)}.feed-type.tool_start,.feed-type.tool_update,.feed-type.tool_end{color:var(--skip)}.feed-meta{color:var(--muted);font-size:12px}.feed-text,.pinned-tool-text{white-space:pre-wrap;word-break:break-word;margin-top:6px}.feed-count{color:var(--muted);font-size:11px}.pinned-tool{background:#0a1325;border:1px solid var(--line);border-radius:12px;padding:12px;margin-top:12px}.pinned-tool-name{font-weight:700}.pinned-tool-meta{color:var(--muted);font-size:12px;margin-top:4px}.kv{display:grid;grid-template-columns:140px 1fr;gap:6px 10px;margin-top:12px}.kv div:nth-child(odd){color:var(--muted)}pre{margin:0;white-space:pre-wrap;word-break:break-word;background:#0a1325;border:1px solid var(--line);border-radius:12px;padding:12px;max-height:320px;overflow:auto}.graph{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr));gap:12px;margin-top:12px}.graph-node{width:100%;min-height:120px;text-align:left;cursor:pointer}.table-wrap{margin-top:12px;overflow:auto;max-height:360px}table{width:100%;border-collapse:collapse}th,td{padding:10px 8px;border-bottom:1px solid var(--line);vertical-align:top;text-align:left}th{color:var(--muted);font-size:12px;text-transform:uppercase;letter-spacing:.08em}td{font-size:13px}tr[data-clickable="1"]{cursor:pointer}.status-pill{display:inline-block;border-radius:999px;padding:3px 8px;font-size:12px;font-weight:700;border:1px solid var(--line);background:var(--panel2)}.status-pill.done{color:var(--done)}.status-pill.error{color:var(--error)}.status-pill.skipped{color:var(--skip)}.status-pill.active{color:var(--active)}.edit-list{max-height:360px;overflow:auto;margin-top:12px}.edit-item{border:1px solid var(--line);border-radius:12px;margin-bottom:10px;overflow:hidden}.edit-head{display:block;list-style:none;padding:10px 12px;background:#ffffff08;font-weight:600;cursor:pointer}.edit-head::-webkit-details-marker{display:none}.muted{color:var(--muted)}.bottom{margin-top:16px}.bottom summary{cursor:pointer;color:var(--muted);margin-bottom:10px}.diagnostics-grid{gap:16px}@media(max-width:1100px){.main-grid,.detail-split,.flow{grid-template-columns:1fr}.todo-list{max-height:none}}