npm - @sebastianandreasson/pi-autonomous-agents - Versions diffs - 0.11.0 → 0.13.0 - Mend

@sebastianandreasson/pi-autonomous-agents 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +17 -1
package/SETUP.md +7 -0
package/docs/PI_SUPERVISOR.md +21 -0
package/docs/TOKEN_USAGE_ARTIFACTS.md +215 -0
package/package.json +3 -3
package/pi.config.json +2 -0
package/src/cli.mjs +4 -1
package/src/index.mjs +13 -0
package/src/pi-client.mjs +68 -10
package/src/pi-config.mjs +21 -0
package/src/pi-history.mjs +2 -0
package/src/pi-prompts.mjs +19 -0
package/src/pi-repo.mjs +5 -2
package/src/pi-report.mjs +83 -29
package/src/pi-sdk-turn.mjs +88 -0
package/src/pi-supervisor.mjs +554 -28
package/src/pi-telemetry.mjs +14 -1
package/src/pi-token-analysis.mjs +480 -0
package/src/pi-visualizer-server.mjs +6 -1
package/templates/PROJECT_SETUP.md +4 -1
package/templates/pi.config.example.json +3 -1
package/visualizer-ui/dist/assets/index-Bsli4-ve.css +1 -0
package/visualizer-ui/dist/assets/index-DCGArR7-.js +12 -0
package/visualizer-ui/dist/index.html +2 -2
package/visualizer-ui/dist/assets/index-C5V0jXPE.css +0 -1
package/visualizer-ui/dist/assets/index-CpHvuv0C.js +0 -12

package/README.md CHANGED Viewed

@@ -190,10 +190,13 @@ Common fields in `pi.config.json`:
 - `testCommand`
 - `visualReviewEnabled`
 - `visualCaptureCommand`
+- `failureArtifactDir`
 - `continueAfterSeconds`
 - `toolContinueAfterSeconds`
 - `noEventTimeoutSeconds`
 - `toolNoEventTimeoutSeconds`
+- `sameFileLoopBudget`
+- `loopHistoryLimit`
 - `largeFileWarningLines`
 - `largeSpecWarningLines`
@@ -207,6 +210,8 @@ Key defaults:
 - `toolContinueAfterSeconds`: `900`
 - `noEventTimeoutSeconds`: `900`
 - `toolNoEventTimeoutSeconds`: `1800`
+- `sameFileLoopBudget`: `2`
+- `loopHistoryLimit`: `25`
 ## Prompt and Tooling Behavior
@@ -217,6 +222,7 @@ The package is optimized for local models by default:
 - prompts prefer `read` for source inspection
 - shell is intended for `git`, tests, and narrow diagnostics
 - SDK transport carries forward oversized shell-read warnings and loop/timeout guards
+- repeated same-file loop failures are remembered across iterations and escalate the next edit strategy
 - the supervisor emits large-file/spec warnings when touched files are getting risky
 This is deliberate. Large monolith files, huge e2e specs, and broad TODO items are one of the main causes of local-model drift and retry loops.
@@ -255,16 +261,24 @@ Useful files during a run:
   Latest verification output snapshot.
 - `.pi-last-iteration.json`
   Structured summary of the last completed iteration.
+- `pi-output/failure-artifacts/`
+  Compact failure artifacts with command, exit code, changed files, tester summary, and output excerpt.
 - `.pi-state.json`
   Persistent harness state, including in-progress iteration data.
 - `pi.log`
   Main run log.
 - `pi_telemetry.jsonl`
 - `pi_telemetry.csv`
+- `pi-output/token-usage/events.jsonl`
+  Normalized token-attribution event stream for downstream tools. Each row includes phase, role, kind, session/model, attribution bucket, tool/file context, and token counts.
+- `pi-output/token-usage/summary.json`
+  Derived structured token summary with totals plus breakdowns by phase, model, session, attribution, tool, file, and directory.
 - `.pi-runtime/active-run.json`
 - `.pi-runtime/runs/<runId>/...`
-`pi-harness report` summarizes recent telemetry and surfaces things like terminal reasons and large-file warnings.
+Each run also gets run-scoped token artifacts under `.pi-runtime/runs/<runId>/token-usage.events.jsonl` and `.pi-runtime/runs/<runId>/token-usage.summary.json`.
+`pi-harness report` summarizes recent telemetry and token artifacts and surfaces things like terminal reasons, large-file warnings, failure artifacts, and top token hotspots.
 `pi-harness run` now also starts lightweight local web UI for orchestration flow by default. By default it listens on `127.0.0.1:4317`. Override with `PI_VISUALIZER_HOST` and `PI_VISUALIZER_PORT`. Set `PI_VISUALIZER=0` to disable embedded web UI for a run.
@@ -308,6 +322,8 @@ That clears configured harness runtime/history artifacts and verifies they are g
   Agent-facing setup instructions for consuming repos.
 - [docs/PI_SUPERVISOR.md](./docs/PI_SUPERVISOR.md)
   More detailed flow, transport, telemetry, and runtime documentation.
+- [docs/TOKEN_USAGE_ARTIFACTS.md](./docs/TOKEN_USAGE_ARTIFACTS.md)
+  Agent-facing contract and usage guidance for token-usage artifacts and downstream tooling.
 - [templates/PROJECT_SETUP.md](./templates/PROJECT_SETUP.md)
   Minimal consuming-repo layout summary.

package/SETUP.md CHANGED Viewed

@@ -67,6 +67,12 @@ Important:
   - mention project-specific constraints, startup flow, or directories
   - keep the harness workflow intact
+Recommended:
+- If the repo wants agents to learn from harness token data, also reference:
+  - `node_modules/@sebastianandreasson/pi-autonomous-agents/docs/TOKEN_USAGE_ARTIFACTS.md`
+- Add a short repo-local instruction snippet telling agents to read `pi-output/token-usage/summary.json` before investigating retries, hotspots, or large turns.
 4. Ensure `TODOS.md` exists.
 - If the repo already uses a task file, keep it.
@@ -191,6 +197,7 @@ The harness should fail fast if:
 For prompt debugging, inspect `.pi-last-prompt.txt` after a run. It contains the exact assembled prompt that was sent for the active role.
 For flow debugging, inspect `.pi-last-iteration.json` after a run. It summarizes the selected task, repo-change outcome, tester verdict, commit-plan state, and terminal reason.
+For token-hotspot debugging, inspect `pi-output/token-usage/summary.json` first and only read `pi-output/token-usage/events.jsonl` when the summary is not enough.
 ## Agent Rules

package/docs/PI_SUPERVISOR.md CHANGED Viewed

@@ -80,10 +80,13 @@ Projects typically provide their own `pi.config.json` with fields such as:
 - `visualCaptureCommand`
 - `visualFeedbackFile`
 - `testerFeedbackFile`
+- `failureArtifactDir`
 - `models`
 - `piModel`
 - `visualReviewModel`
 - `commitMode`
+- `sameFileLoopBudget`
+- `loopHistoryLimit`
 Model entries may carry their own OpenAI-compatible endpoint settings, so the PI text loop and the multimodal visual reviewer can point at different backends without changing code.
@@ -124,6 +127,10 @@ The default flow keeps commit ownership with the active agent:
 2. `tester` should review functionality and, on `PASS`, stage only the task-related files and create the commit directly.
 3. If the working tree is too messy to isolate safely, tester should return `VERDICT: BLOCKED` instead of guessing.
+If tester returns `PASS` but leaves a dirty tree without creating the commit, the harness now treats that as a protocol error and automatically falls back to a commit-plan follow-up instead of stalling the iteration.
+If tester edits files before finalization, the harness re-runs the configured smoke verification command immediately and records which files tester touched.
 If a repo explicitly needs the older harness-managed commit-plan flow, set `commitMode` to `plan`. In that mode, `testerCommit` and parsed commit plans are used as a compatibility path rather than the default.
 For source inspection, prompts prefer `read` and reserve shell usage for `git`, tests, and narrow diagnostics. Large shell file reads are more likely to truncate under context pressure than focused `read` calls.
@@ -175,6 +182,7 @@ SDK transport mitigates obvious local loops by watching agent and tool events:
 - repeated identical tool calls are aborted
 - repeated same-path churn is aborted
+- repeated same-file loop targets are persisted in harness state and escalate the next retry strategy
 - a soft `continue` can be sent after inactivity
 - a separate tool-aware watchdog can tolerate long-running `bash` or browser work without treating the turn as dead
 - a hard no-event timeout aborts a wedged turn instead of hanging indefinitely
@@ -200,4 +208,17 @@ Each step records:
 - changed file count
 - verification status
 - retry count
+- artifact path for compact failure diagnostics when available
+- output excerpt for failed verification-style events
 - notes
+The harness also produces structured token-usage artifacts intended for downstream tooling:
+- `pi-output/token-usage/events.jsonl`
+- `pi-output/token-usage/summary.json`
+- `.pi-runtime/runs/<runId>/token-usage.events.jsonl`
+- `.pi-runtime/runs/<runId>/token-usage.summary.json`
+These artifacts are the stable machine-readable token contract. The visualizer and report command are consumers of those files, not the source of truth.
+For agent-facing guidance on how to interpret and use those files in consuming repos, see [TOKEN_USAGE_ARTIFACTS.md](./TOKEN_USAGE_ARTIFACTS.md).

package/docs/TOKEN_USAGE_ARTIFACTS.md ADDED Viewed

@@ -0,0 +1,215 @@
+# Token Usage Artifacts
+This document is written for autonomous coding agents and repo maintainers who want to use the token-usage artifacts produced by `pi-harness` in their own projects.
+The goal is not just to visualize token usage. The goal is to expose a stable machine-readable contract that other tools, prompts, reports, or project-specific scripts can reuse.
+## Produced Artifacts
+Repo-scoped artifacts:
+- `pi-output/token-usage/events.jsonl`
+- `pi-output/token-usage/summary.json`
+Run-scoped artifacts:
+- `.pi-runtime/runs/<runId>/token-usage.events.jsonl`
+- `.pi-runtime/runs/<runId>/token-usage.summary.json`
+Use repo-scoped files when you want the latest cumulative view for the repository.
+Use run-scoped files when you want to inspect one specific harness run in isolation.
+## Recommended Consumption Order
+When an agent wants to use token data, prefer this order:
+1. Read `token-usage.summary.json` first.
+2. Only read `token-usage.events.jsonl` if the summary is not enough.
+3. Prefer run-scoped artifacts when analyzing one run.
+4. Prefer repo-scoped artifacts when looking for long-term hotspots.
+This keeps token-analysis prompts compact and avoids spending more tokens just to inspect token data.
+## Event Schema
+Each line in `events.jsonl` is one normalized token-attribution event.
+Important fields:
+- `schemaVersion`
+- `timestamp`
+- `runId`
+- `transport`
+- `sessionId`
+- `model`
+- `iteration`
+- `retryCount`
+- `reason`
+- `phase`
+- `role`
+- `kind`
+- `attributionKind`
+- `toolNames`
+- `files`
+- `primaryFile`
+- `inputTokens`
+- `outputTokens`
+- `totalTokens`
+- `cacheReadTokens`
+- `cacheWriteTokens`
+Semantics:
+- `kind`, `phase`, and `role` identify the harness stage where the tokens were spent.
+- `toolNames` and `files` capture the nearby tool/file context seen around the token event.
+- `attributionKind` explains how the event was classified:
+  - `thinking`
+  - `response`
+  - `tool_context`
+  - `tool_running`
+  - `agent`
+Important:
+- file and directory attribution are inferred from nearby tool context
+- they are useful for hotspot detection, not exact provider-native accounting
+- if one token event touches multiple files, downstream summaries may split that event across those files
+## Summary Schema
+`summary.json` contains:
+- `schemaVersion`
+- `generatedAt`
+- `source.eventCount`
+- `totals`
+- `coverage`
+- `breakdowns`
+`totals` contains:
+- `inputTokens`
+- `outputTokens`
+- `totalTokens`
+- `cacheReadTokens`
+- `cacheWriteTokens`
+- `eventCount`
+`coverage` contains:
+- `fileAttributedTokens`
+- `unattributedTokens`
+- `fileAttributionRatio`
+`breakdowns` contains:
+- `byKind`
+- `byRole`
+- `byPhase`
+- `byModel`
+- `bySession`
+- `byAttribution`
+- `byTool`
+- `byFile`
+- `byDirectory`
+Each breakdown item contains:
+- `key`
+- `label`
+- `inputTokens`
+- `outputTokens`
+- `totalTokens`
+- `cacheReadTokens`
+- `cacheWriteTokens`
+- `eventCount`
+## How Agents Should Use This Data
+Use token artifacts to answer questions like:
+- Which harness phases are spending the most tokens?
+- Which files or directories repeatedly consume tokens?
+- Are retries concentrated in one hotspot?
+- Is token usage dominated by thinking, tool-context, or response generation?
+- Are certain models or sessions much more expensive than others?
+Good uses:
+- splitting a large TODO item into narrower tasks
+- identifying files that should be decomposed before another agent pass
+- deciding whether a hot directory needs refactor work
+- comparing whether `developer` or `tester` is driving most cost
+- checking whether a local model is wasting tokens on repeated tool/file churn
+Bad uses:
+- treating `byFile` values as exact per-file billing
+- assuming all unattributed tokens are waste
+- optimizing for raw token count while ignoring correctness
+## Agent Workflow Guidance
+When an agent is asked to improve harness efficiency in a repo:
+1. Read `summary.json`.
+2. Inspect `breakdowns.byFile`, `breakdowns.byDirectory`, `breakdowns.byTool`, and `breakdowns.byAttribution`.
+3. If one file or directory dominates, inspect the related source only after confirming the hotspot from the summary.
+4. If `fileAttributionRatio` is low, rely more on `byKind`, `byRole`, `byModel`, and `byAttribution` than on `byFile`.
+5. When proposing changes, explicitly distinguish:
+   - exact token totals from artifacts
+   - inferred file attribution from nearby context
+## Recommended Interpretation Rules
+Use these heuristics:
+- High `byFile` and high `fileAttributionRatio`:
+  Strong signal that the file is a real hotspot.
+- High `byDirectory` with spread across many files:
+  The problem is probably architectural or task-shaping, not one file only.
+- High `byAttribution.tool_context` or `tool_running`:
+  The agent may be rereading, diffing, or patching inefficiently.
+- High `byAttribution.thinking` with low file coverage:
+  The problem may be task ambiguity or prompt shape rather than one code hotspot.
+- High `byModel` on one role:
+  That role may need a smaller scope, different model, or clearer repo instructions.
+## Instruction Snippet For Consuming Repos
+If a consuming repo wants its own agents to use the artifacts, add guidance like this to repo-local instructions:
+```md
+## Token Usage Data
+This repo may contain `pi-harness` token artifacts:
+- `pi-output/token-usage/summary.json`
+- `pi-output/token-usage/events.jsonl`
+When investigating repeated retries, large agent turns, or code hotspots:
+1. Read `summary.json` first.
+2. Use `breakdowns.byFile`, `breakdowns.byDirectory`, `breakdowns.byTool`, and `breakdowns.byAttribution` to locate hotspots.
+3. Treat file and directory token attribution as inferred context, not exact billing.
+4. If one file is a clear hotspot, prefer smaller TODOs, narrower reads, or structural refactors over brute-force retries.
+5. If file attribution is weak, rely more on `byKind`, `byRole`, `byModel`, and `byAttribution`.
+```
+## Project-Specific Extensions
+Projects can build their own tooling on top of these artifacts, for example:
+- nightly regression reports that flag rising token hotspots
+- CI checks that warn when one file dominates token spend
+- repo-specific dashboards
+- prompt builders that mention known hotspots before starting a developer turn
+- scripts that compare token patterns before and after a refactor
+When doing that, depend on:
+- `schemaVersion`
+- the named summary fields
+- the normalized event fields
+Do not depend on the visualizer UI structure or CSS. Those are consumers, not the contract.

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@sebastianandreasson/pi-autonomous-agents",
   "private": false,
-  "version": "0.11.0",
+  "version": "0.13.0",
   "type": "module",
   "description": "Portable unattended PI harness for developer/tester/visual-review loops.",
   "license": "MIT",
@@ -19,8 +19,8 @@
     "@mariozechner/pi-coding-agent": "^0.66.1"
   },
   "scripts": {
-    "check": "node --check src/cli.mjs && node --check src/pi-clear-history.mjs && node --check src/pi-client.mjs && node --check src/pi-config.mjs && node --check src/pi-debug-live.mjs && node --check src/pi-flow.mjs && node --check src/pi-heartbeat.mjs && node --check src/pi-history.mjs && node --check src/pi-preflight.mjs && node --check src/pi-prompts.mjs && node --check src/pi-repo.mjs && node --check src/pi-report.mjs && node --check src/pi-sdk-turn.mjs && node --check src/pi-supervisor.mjs && node --check src/pi-telemetry.mjs && node --check src/pi-visual-once.mjs && node --check src/pi-visual-review.mjs && node --check src/pi-visualizer.mjs && node --check src/pi-visualizer-server.mjs && node --check src/pi-visualizer-shared.mjs && node --check src/index.mjs && node --check test/pi-heartbeat.test.mjs && node --check test/pi-lifecycle.test.mjs && node --check test/pi-role-models.test.mjs && node --check test/pi-flow.test.mjs && node --check test/pi-history.test.mjs && node --check test/pi-prompts.test.mjs && node --check test/pi-preflight.test.mjs && node --check test/pi-repo.test.mjs && node --check test/pi-sdk-supervisor.test.mjs && node --check test/pi-sdk-turn.test.mjs && node --check test/pi-telemetry.test.mjs && node --check test/pi-visualizer-shared.test.mjs && node --check test/fixtures/fake-pi.mjs && node --check test/fixtures/fake-pi-sdk.mjs && node --check test/fixtures/fake-live-pi-sdk.mjs",
-    "test": "node --test test/pi-heartbeat.test.mjs test/pi-lifecycle.test.mjs test/pi-role-models.test.mjs test/pi-flow.test.mjs test/pi-history.test.mjs test/pi-prompts.test.mjs test/pi-preflight.test.mjs test/pi-repo.test.mjs test/pi-sdk-supervisor.test.mjs test/pi-sdk-turn.test.mjs test/pi-telemetry.test.mjs test/pi-visualizer-shared.test.mjs",
+    "check": "node --check src/cli.mjs && node --check src/pi-clear-history.mjs && node --check src/pi-client.mjs && node --check src/pi-config.mjs && node --check src/pi-debug-live.mjs && node --check src/pi-flow.mjs && node --check src/pi-heartbeat.mjs && node --check src/pi-history.mjs && node --check src/pi-preflight.mjs && node --check src/pi-prompts.mjs && node --check src/pi-repo.mjs && node --check src/pi-report.mjs && node --check src/pi-sdk-turn.mjs && node --check src/pi-supervisor.mjs && node --check src/pi-telemetry.mjs && node --check src/pi-token-analysis.mjs && node --check src/pi-visual-once.mjs && node --check src/pi-visual-review.mjs && node --check src/pi-visualizer.mjs && node --check src/pi-visualizer-server.mjs && node --check src/pi-visualizer-shared.mjs && node --check src/index.mjs && node --check test/pi-heartbeat.test.mjs && node --check test/pi-lifecycle.test.mjs && node --check test/pi-role-models.test.mjs && node --check test/pi-flow.test.mjs && node --check test/pi-history.test.mjs && node --check test/pi-prompts.test.mjs && node --check test/pi-preflight.test.mjs && node --check test/pi-repo.test.mjs && node --check test/pi-sdk-supervisor.test.mjs && node --check test/pi-sdk-turn.test.mjs && node --check test/pi-telemetry.test.mjs && node --check test/pi-token-analysis.test.mjs && node --check test/pi-visualizer-shared.test.mjs && node --check test/fixtures/fake-pi.mjs && node --check test/fixtures/fake-pi-sdk.mjs && node --check test/fixtures/fake-live-pi-sdk.mjs",
+    "test": "node --test test/pi-heartbeat.test.mjs test/pi-lifecycle.test.mjs test/pi-role-models.test.mjs test/pi-flow.test.mjs test/pi-history.test.mjs test/pi-prompts.test.mjs test/pi-preflight.test.mjs test/pi-repo.test.mjs test/pi-sdk-supervisor.test.mjs test/pi-sdk-turn.test.mjs test/pi-telemetry.test.mjs test/pi-token-analysis.test.mjs test/pi-visualizer-shared.test.mjs",
     "debug:live-ui": "node src/cli.mjs debug-live --reset",
     "dev:visualizer:ui": "npm --prefix visualizer-ui run dev",
     "build:visualizer:ui": "npm --prefix visualizer-ui run build",

package/pi.config.json CHANGED Viewed

@@ -17,6 +17,8 @@
   "testerFeedbackHistoryDir": "pi-output/tester-feedback/history",
   "visualReviewHistoryDir": "pi-output/visual-review/history",
   "visualCaptureDir": "pi-output/visual-capture",
+  "tokenUsageEventsFile": "pi-output/token-usage/events.jsonl",
+  "tokenUsageSummaryFile": "pi-output/token-usage/summary.json",
   "visualCaptureCommand": "",
   "visualCaptureTimeoutSeconds": 300,
   "visualReviewEnabled": false,

package/src/cli.mjs CHANGED Viewed

@@ -36,11 +36,14 @@ function main() {
   if (subcommand === 'once' || subcommand === 'run') {
     childArgs.push(subcommand)
   }
+  const childStdio = subcommand === 'once' || subcommand === 'run'
+    ? ['pipe', 'inherit', 'inherit']
+    : 'inherit'
   const child = spawn(process.execPath, childArgs, {
     cwd: process.cwd(),
     env: process.env,
-    stdio: 'inherit',
+    stdio: childStdio,
   })
   registerOwnedChildProcess(child)

package/src/index.mjs CHANGED Viewed

@@ -13,5 +13,18 @@ export { clearHarnessHistory, collectHistoryTargets } from './pi-history.mjs'
 export { collectLargeFileWarnings } from './pi-repo.mjs'
 export { runAgentTurn } from './pi-client.mjs'
 export { createSdkSession, createTools, normalizeToolNames, resolveModel, runSdkTurn, runSdkTurnWithPi, splitModelSpec } from './pi-sdk-turn.mjs'
+export {
+  appendTokenUsageEvent,
+  applyTokenAttributionEvent,
+  createEmptyTokenBreakdown,
+  createEmptyTokenUsage,
+  deriveTokenBreakdown,
+  ensureTokenUsageFiles,
+  formatTokenUsageSummary,
+  normalizeTokenAttributionEvent,
+  normalizeTokenUsage,
+  readTokenUsageEvents,
+  readTokenUsageSummary,
+} from './pi-token-analysis.mjs'
 export { deriveCurrentIteration, deriveFlowSnapshot, deriveStageGraph, formatActiveLabel, getFlowSteps, getLabelForKind, getStepKeyForActiveRun, getStepKeyForKind } from './pi-visualizer-shared.mjs'
 export { buildSnapshot, readVisualizerHost, readVisualizerPort, renderHtml, startVisualizerServer } from './pi-visualizer-server.mjs'

package/src/pi-client.mjs CHANGED Viewed

@@ -11,6 +11,7 @@ import {
   writeTextFile,
 } from './pi-repo.mjs'
 import { runSdkTurn } from './pi-sdk-turn.mjs'
+import { appendTokenUsageEvent } from './pi-token-analysis.mjs'
 function truncateForNotes(text) {
   const trimmed = text.trim()
@@ -26,6 +27,7 @@ function formatLastAgentOutput(response) {
     `sessionId: ${String(response.sessionId ?? '')}`,
     `sessionFile: ${String(response.sessionFile ?? '')}`,
     `terminalReason: ${String(response.terminalReason ?? '')}`,
+    `tokens: total=${Number(response.totalTokens ?? 0)} input=${Number(response.inputTokens ?? 0)} output=${Number(response.outputTokens ?? 0)} cacheRead=${Number(response.cacheReadTokens ?? 0)} cacheWrite=${Number(response.cacheWriteTokens ?? 0)}`,
     `notes: ${String(response.notes ?? '').trim()}`,
   ]
@@ -81,10 +83,49 @@ function sanitizeLiveFeedEvent(filePath, event) {
     kind: String(event?.kind ?? ''),
     type: String(event?.type ?? 'event'),
     toolName: String(event?.toolName ?? ''),
+    sessionId: String(event?.sessionId ?? ''),
+    model: String(event?.model ?? ''),
     isError: event?.isError === true,
     text: truncateText(event?.text ?? '', MAX_LIVE_FEED_TEXT),
   }
+  const numericFields = {
+    inputTokens: Number(event?.inputTokens),
+    outputTokens: Number(event?.outputTokens),
+    totalTokens: Number(event?.totalTokens),
+    cacheReadTokens: Number(event?.cacheReadTokens),
+    cacheWriteTokens: Number(event?.cacheWriteTokens),
+  }
+  for (const [key, value] of Object.entries(numericFields)) {
+    if (Number.isFinite(value) && value > 0) {
+      normalized[key] = value
+    }
+  }
+  const attributionKind = String(event?.attributionKind ?? '').trim()
+  if (attributionKind !== '') {
+    normalized.attributionKind = attributionKind
+  }
+  const primaryFile = String(event?.primaryFile ?? '').trim()
+  if (primaryFile !== '') {
+    normalized.primaryFile = primaryFile
+  }
+  const toolNames = Array.isArray(event?.toolNames)
+    ? [...new Set(event.toolNames.map((value) => String(value ?? '').trim()).filter(Boolean))]
+    : []
+  if (toolNames.length > 0) {
+    normalized.toolNames = toolNames
+  }
+  const files = Array.isArray(event?.files)
+    ? [...new Set(event.files.map((value) => String(value ?? '').trim()).filter(Boolean))]
+    : []
+  if (files.length > 0) {
+    normalized.files = files
+  }
   const argsSummary = summarizeValue(event?.args)
   const partialSummary = summarizeValue(event?.partialResult)
   const resultSummary = summarizeValue(event?.result)
@@ -102,21 +143,23 @@ function sanitizeLiveFeedEvent(filePath, event) {
 }
 async function appendLiveFeedEvent(config, event) {
-  if (!config.runLiveFeedFile) {
-    return
-  }
-  const filePath = config.runLiveFeedFile
-  const previous = liveFeedWriteQueues.get(filePath) ?? Promise.resolve()
+  const filePath = String(config.runLiveFeedFile ?? '').trim()
+  const queueKey = filePath || String(config.runTokenUsageEventsFile ?? config.tokenUsageEventsFile ?? 'token-usage')
+  const previous = liveFeedWriteQueues.get(queueKey) ?? Promise.resolve()
   const next = previous
     .catch(() => {})
     .then(async () => {
-      const sanitized = sanitizeLiveFeedEvent(filePath, event)
-      await fs.mkdir(path.dirname(filePath), { recursive: true })
-      await fs.appendFile(filePath, `${JSON.stringify(sanitized)}\n`, 'utf8')
+      const sanitized = sanitizeLiveFeedEvent(queueKey, event)
+      if (filePath !== '') {
+        await fs.mkdir(path.dirname(filePath), { recursive: true })
+        await fs.appendFile(filePath, `${JSON.stringify(sanitized)}\n`, 'utf8')
+      }
+      if (sanitized.type === 'token_usage') {
+        await appendTokenUsageEvent(config, sanitized)
+      }
     })
-  liveFeedWriteQueues.set(filePath, next)
+  liveFeedWriteQueues.set(queueKey, next)
   await next
 }
@@ -154,6 +197,11 @@ async function runMockTurn({ config, sessionId, sessionFile, prompt, reason }) {
     toolCalls: 0,
     toolErrors: 0,
     messageUpdates: 0,
+    inputTokens: 0,
+    outputTokens: 0,
+    totalTokens: 0,
+    cacheReadTokens: 0,
+    cacheWriteTokens: 0,
     stopReason: '',
     loopDetected: false,
     loopSignature: '',
@@ -224,6 +272,11 @@ async function runSdkTransportTurn({ config, model, sessionId, sessionFile, prom
       toolCalls: 0,
       toolErrors: 0,
       messageUpdates: 0,
+      inputTokens: 0,
+      outputTokens: 0,
+      totalTokens: 0,
+      cacheReadTokens: 0,
+      cacheWriteTokens: 0,
       stopReason: '',
       loopDetected: false,
       loopSignature: '',
@@ -248,6 +301,11 @@ async function runSdkTransportTurn({ config, model, sessionId, sessionFile, prom
     toolCalls: Number.isFinite(Number(response.toolCalls)) ? Number(response.toolCalls) : 0,
     toolErrors: Number.isFinite(Number(response.toolErrors)) ? Number(response.toolErrors) : 0,
     messageUpdates: Number.isFinite(Number(response.messageUpdates)) ? Number(response.messageUpdates) : 0,
+    inputTokens: Number.isFinite(Number(response.inputTokens)) ? Number(response.inputTokens) : 0,
+    outputTokens: Number.isFinite(Number(response.outputTokens)) ? Number(response.outputTokens) : 0,
+    totalTokens: Number.isFinite(Number(response.totalTokens)) ? Number(response.totalTokens) : 0,
+    cacheReadTokens: Number.isFinite(Number(response.cacheReadTokens)) ? Number(response.cacheReadTokens) : 0,
+    cacheWriteTokens: Number.isFinite(Number(response.cacheWriteTokens)) ? Number(response.cacheWriteTokens) : 0,
     stopReason: String(response.stopReason ?? ''),
     loopDetected: response.loopDetected === true,
     loopSignature: String(response.loopSignature ?? ''),

package/src/pi-config.mjs CHANGED Viewed

@@ -259,6 +259,7 @@ export function loadConfig(mode = 'once') {
     maxTesterFeedbackLines: readInt('PI_MAX_TESTER_FEEDBACK_LINES', file.maxTesterFeedbackLines, 32),
     maxPromptNotesLines: readInt('PI_MAX_PROMPT_NOTES_LINES', file.maxPromptNotesLines, 16),
     maxVerificationExcerptLines: readInt('PI_MAX_VERIFICATION_EXCERPT_LINES', file.maxVerificationExcerptLines, 40),
+    maxFailureArtifactLines: readInt('PI_MAX_FAILURE_ARTIFACT_LINES', file.maxFailureArtifactLines, 80),
     largeFileWarningLines: readInt('PI_LARGE_FILE_WARNING_LINES', file.largeFileWarningLines, 500),
     largeSpecWarningLines: readInt('PI_LARGE_SPEC_WARNING_LINES', file.largeSpecWarningLines, 300),
     piTools: readString('PI_TOOLS', file.piTools, 'read,edit,write,find,ls,bash'),
@@ -280,6 +281,8 @@ export function loadConfig(mode = 'once') {
     verificationTimeoutSeconds: readInt('PI_VERIFICATION_TIMEOUT', file.verificationTimeoutSeconds, 300),
     idleRetryLimit: readInt('PI_IDLE_RETRY_LIMIT', file.idleRetryLimit, 1),
     noChangeRetryLimit: readInt('PI_NO_CHANGE_RETRY_LIMIT', file.noChangeRetryLimit, 1),
+    sameFileLoopBudget: readInt('PI_SAME_FILE_LOOP_BUDGET', file.sameFileLoopBudget, 2),
+    loopHistoryLimit: readInt('PI_LOOP_HISTORY_LIMIT', file.loopHistoryLimit, 25),
     visualFeedbackFile: resolveFromCwd(
       cwd,
       'PI_VISUAL_FEEDBACK_FILE',
@@ -298,6 +301,12 @@ export function loadConfig(mode = 'once') {
       file.testerFeedbackHistoryDir,
       'pi-output/tester-feedback/history'
     ),
+    failureArtifactDir: resolveFromCwd(
+      cwd,
+      'PI_FAILURE_ARTIFACT_DIR',
+      file.failureArtifactDir,
+      'pi-output/failure-artifacts'
+    ),
     visualReviewHistoryDir: resolveFromCwd(
       cwd,
       'PI_VISUAL_REVIEW_HISTORY_DIR',
@@ -310,6 +319,18 @@ export function loadConfig(mode = 'once') {
       file.visualCaptureDir,
       'pi-output/visual-capture'
     ),
+    tokenUsageEventsFile: resolveFromCwd(
+      cwd,
+      'PI_TOKEN_USAGE_EVENTS_FILE',
+      file.tokenUsageEventsFile,
+      'pi-output/token-usage/events.jsonl'
+    ),
+    tokenUsageSummaryFile: resolveFromCwd(
+      cwd,
+      'PI_TOKEN_USAGE_SUMMARY_FILE',
+      file.tokenUsageSummaryFile,
+      'pi-output/token-usage/summary.json'
+    ),
     visualCaptureCommand: readString('PI_VISUAL_CAPTURE_CMD', file.visualCaptureCommand, ''),
     visualCaptureTimeoutSeconds: readInt('PI_VISUAL_CAPTURE_TIMEOUT', file.visualCaptureTimeoutSeconds, 300),
     visualReviewEnabled: readBool('PI_VISUAL_REVIEW_ENABLED', file.visualReviewEnabled, false),

package/src/pi-history.mjs CHANGED Viewed

@@ -22,6 +22,8 @@ export function collectHistoryTargets(config) {
     config.changedFilesFile,
     config.lastPromptFile,
     config.lastIterationSummaryFile,
+    config.tokenUsageEventsFile,
+    config.tokenUsageSummaryFile,
     config.piRuntimeDir,
     config.visualFeedbackFile,
     config.testerFeedbackFile,