@sebastianandreasson/pi-autonomous-agents 0.11.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -1
- package/SETUP.md +7 -0
- package/docs/PI_SUPERVISOR.md +21 -0
- package/docs/TOKEN_USAGE_ARTIFACTS.md +215 -0
- package/package.json +3 -3
- package/pi.config.json +2 -0
- package/src/cli.mjs +4 -1
- package/src/index.mjs +13 -0
- package/src/pi-client.mjs +68 -10
- package/src/pi-config.mjs +21 -0
- package/src/pi-history.mjs +2 -0
- package/src/pi-prompts.mjs +19 -0
- package/src/pi-repo.mjs +5 -2
- package/src/pi-report.mjs +83 -29
- package/src/pi-sdk-turn.mjs +88 -0
- package/src/pi-supervisor.mjs +554 -28
- package/src/pi-telemetry.mjs +14 -1
- package/src/pi-token-analysis.mjs +480 -0
- package/src/pi-visualizer-server.mjs +6 -1
- package/templates/PROJECT_SETUP.md +4 -1
- package/templates/pi.config.example.json +3 -1
- package/visualizer-ui/dist/assets/index-Bsli4-ve.css +1 -0
- package/visualizer-ui/dist/assets/index-DCGArR7-.js +12 -0
- package/visualizer-ui/dist/index.html +2 -2
- package/visualizer-ui/dist/assets/index-C5V0jXPE.css +0 -1
- package/visualizer-ui/dist/assets/index-CpHvuv0C.js +0 -12
package/README.md
CHANGED
|
@@ -190,10 +190,13 @@ Common fields in `pi.config.json`:
|
|
|
190
190
|
- `testCommand`
|
|
191
191
|
- `visualReviewEnabled`
|
|
192
192
|
- `visualCaptureCommand`
|
|
193
|
+
- `failureArtifactDir`
|
|
193
194
|
- `continueAfterSeconds`
|
|
194
195
|
- `toolContinueAfterSeconds`
|
|
195
196
|
- `noEventTimeoutSeconds`
|
|
196
197
|
- `toolNoEventTimeoutSeconds`
|
|
198
|
+
- `sameFileLoopBudget`
|
|
199
|
+
- `loopHistoryLimit`
|
|
197
200
|
- `largeFileWarningLines`
|
|
198
201
|
- `largeSpecWarningLines`
|
|
199
202
|
|
|
@@ -207,6 +210,8 @@ Key defaults:
|
|
|
207
210
|
- `toolContinueAfterSeconds`: `900`
|
|
208
211
|
- `noEventTimeoutSeconds`: `900`
|
|
209
212
|
- `toolNoEventTimeoutSeconds`: `1800`
|
|
213
|
+
- `sameFileLoopBudget`: `2`
|
|
214
|
+
- `loopHistoryLimit`: `25`
|
|
210
215
|
|
|
211
216
|
## Prompt and Tooling Behavior
|
|
212
217
|
|
|
@@ -217,6 +222,7 @@ The package is optimized for local models by default:
|
|
|
217
222
|
- prompts prefer `read` for source inspection
|
|
218
223
|
- shell is intended for `git`, tests, and narrow diagnostics
|
|
219
224
|
- SDK transport carries forward oversized shell-read warnings and loop/timeout guards
|
|
225
|
+
- repeated same-file loop failures are remembered across iterations and escalate the next edit strategy
|
|
220
226
|
- the supervisor emits large-file/spec warnings when touched files are getting risky
|
|
221
227
|
|
|
222
228
|
This is deliberate. Large monolith files, huge e2e specs, and broad TODO items are one of the main causes of local-model drift and retry loops.
|
|
@@ -255,16 +261,24 @@ Useful files during a run:
|
|
|
255
261
|
Latest verification output snapshot.
|
|
256
262
|
- `.pi-last-iteration.json`
|
|
257
263
|
Structured summary of the last completed iteration.
|
|
264
|
+
- `pi-output/failure-artifacts/`
|
|
265
|
+
Compact failure artifacts with command, exit code, changed files, tester summary, and output excerpt.
|
|
258
266
|
- `.pi-state.json`
|
|
259
267
|
Persistent harness state, including in-progress iteration data.
|
|
260
268
|
- `pi.log`
|
|
261
269
|
Main run log.
|
|
262
270
|
- `pi_telemetry.jsonl`
|
|
263
271
|
- `pi_telemetry.csv`
|
|
272
|
+
- `pi-output/token-usage/events.jsonl`
|
|
273
|
+
Normalized token-attribution event stream for downstream tools. Each row includes phase, role, kind, session/model, attribution bucket, tool/file context, and token counts.
|
|
274
|
+
- `pi-output/token-usage/summary.json`
|
|
275
|
+
Derived structured token summary with totals plus breakdowns by phase, model, session, attribution, tool, file, and directory.
|
|
264
276
|
- `.pi-runtime/active-run.json`
|
|
265
277
|
- `.pi-runtime/runs/<runId>/...`
|
|
266
278
|
|
|
267
|
-
|
|
279
|
+
Each run also gets run-scoped token artifacts under `.pi-runtime/runs/<runId>/token-usage.events.jsonl` and `.pi-runtime/runs/<runId>/token-usage.summary.json`.
|
|
280
|
+
|
|
281
|
+
`pi-harness report` summarizes recent telemetry and token artifacts and surfaces things like terminal reasons, large-file warnings, failure artifacts, and top token hotspots.
|
|
268
282
|
|
|
269
283
|
`pi-harness run` now also starts lightweight local web UI for orchestration flow by default. By default it listens on `127.0.0.1:4317`. Override with `PI_VISUALIZER_HOST` and `PI_VISUALIZER_PORT`. Set `PI_VISUALIZER=0` to disable embedded web UI for a run.
|
|
270
284
|
|
|
@@ -308,6 +322,8 @@ That clears configured harness runtime/history artifacts and verifies they are g
|
|
|
308
322
|
Agent-facing setup instructions for consuming repos.
|
|
309
323
|
- [docs/PI_SUPERVISOR.md](./docs/PI_SUPERVISOR.md)
|
|
310
324
|
More detailed flow, transport, telemetry, and runtime documentation.
|
|
325
|
+
- [docs/TOKEN_USAGE_ARTIFACTS.md](./docs/TOKEN_USAGE_ARTIFACTS.md)
|
|
326
|
+
Agent-facing contract and usage guidance for token-usage artifacts and downstream tooling.
|
|
311
327
|
- [templates/PROJECT_SETUP.md](./templates/PROJECT_SETUP.md)
|
|
312
328
|
Minimal consuming-repo layout summary.
|
|
313
329
|
|
package/SETUP.md
CHANGED
|
@@ -67,6 +67,12 @@ Important:
|
|
|
67
67
|
- mention project-specific constraints, startup flow, or directories
|
|
68
68
|
- keep the harness workflow intact
|
|
69
69
|
|
|
70
|
+
Recommended:
|
|
71
|
+
|
|
72
|
+
- If the repo wants agents to learn from harness token data, also reference:
|
|
73
|
+
- `node_modules/@sebastianandreasson/pi-autonomous-agents/docs/TOKEN_USAGE_ARTIFACTS.md`
|
|
74
|
+
- Add a short repo-local instruction snippet telling agents to read `pi-output/token-usage/summary.json` before investigating retries, hotspots, or large turns.
|
|
75
|
+
|
|
70
76
|
4. Ensure `TODOS.md` exists.
|
|
71
77
|
|
|
72
78
|
- If the repo already uses a task file, keep it.
|
|
@@ -191,6 +197,7 @@ The harness should fail fast if:
|
|
|
191
197
|
|
|
192
198
|
For prompt debugging, inspect `.pi-last-prompt.txt` after a run. It contains the exact assembled prompt that was sent for the active role.
|
|
193
199
|
For flow debugging, inspect `.pi-last-iteration.json` after a run. It summarizes the selected task, repo-change outcome, tester verdict, commit-plan state, and terminal reason.
|
|
200
|
+
For token-hotspot debugging, inspect `pi-output/token-usage/summary.json` first and only read `pi-output/token-usage/events.jsonl` when the summary is not enough.
|
|
194
201
|
|
|
195
202
|
## Agent Rules
|
|
196
203
|
|
package/docs/PI_SUPERVISOR.md
CHANGED
|
@@ -80,10 +80,13 @@ Projects typically provide their own `pi.config.json` with fields such as:
|
|
|
80
80
|
- `visualCaptureCommand`
|
|
81
81
|
- `visualFeedbackFile`
|
|
82
82
|
- `testerFeedbackFile`
|
|
83
|
+
- `failureArtifactDir`
|
|
83
84
|
- `models`
|
|
84
85
|
- `piModel`
|
|
85
86
|
- `visualReviewModel`
|
|
86
87
|
- `commitMode`
|
|
88
|
+
- `sameFileLoopBudget`
|
|
89
|
+
- `loopHistoryLimit`
|
|
87
90
|
|
|
88
91
|
Model entries may carry their own OpenAI-compatible endpoint settings, so the PI text loop and the multimodal visual reviewer can point at different backends without changing code.
|
|
89
92
|
|
|
@@ -124,6 +127,10 @@ The default flow keeps commit ownership with the active agent:
|
|
|
124
127
|
2. `tester` should review functionality and, on `PASS`, stage only the task-related files and create the commit directly.
|
|
125
128
|
3. If the working tree is too messy to isolate safely, tester should return `VERDICT: BLOCKED` instead of guessing.
|
|
126
129
|
|
|
130
|
+
If tester returns `PASS` but leaves a dirty tree without creating the commit, the harness now treats that as a protocol error and automatically falls back to a commit-plan follow-up instead of stalling the iteration.
|
|
131
|
+
|
|
132
|
+
If tester edits files before finalization, the harness re-runs the configured smoke verification command immediately and records which files tester touched.
|
|
133
|
+
|
|
127
134
|
If a repo explicitly needs the older harness-managed commit-plan flow, set `commitMode` to `plan`. In that mode, `testerCommit` and parsed commit plans are used as a compatibility path rather than the default.
|
|
128
135
|
|
|
129
136
|
For source inspection, prompts prefer `read` and reserve shell usage for `git`, tests, and narrow diagnostics. Large shell file reads are more likely to truncate under context pressure than focused `read` calls.
|
|
@@ -175,6 +182,7 @@ SDK transport mitigates obvious local loops by watching agent and tool events:
|
|
|
175
182
|
|
|
176
183
|
- repeated identical tool calls are aborted
|
|
177
184
|
- repeated same-path churn is aborted
|
|
185
|
+
- repeated same-file loop targets are persisted in harness state and escalate the next retry strategy
|
|
178
186
|
- a soft `continue` can be sent after inactivity
|
|
179
187
|
- a separate tool-aware watchdog can tolerate long-running `bash` or browser work without treating the turn as dead
|
|
180
188
|
- a hard no-event timeout aborts a wedged turn instead of hanging indefinitely
|
|
@@ -200,4 +208,17 @@ Each step records:
|
|
|
200
208
|
- changed file count
|
|
201
209
|
- verification status
|
|
202
210
|
- retry count
|
|
211
|
+
- artifact path for compact failure diagnostics when available
|
|
212
|
+
- output excerpt for failed verification-style events
|
|
203
213
|
- notes
|
|
214
|
+
|
|
215
|
+
The harness also produces structured token-usage artifacts intended for downstream tooling:
|
|
216
|
+
|
|
217
|
+
- `pi-output/token-usage/events.jsonl`
|
|
218
|
+
- `pi-output/token-usage/summary.json`
|
|
219
|
+
- `.pi-runtime/runs/<runId>/token-usage.events.jsonl`
|
|
220
|
+
- `.pi-runtime/runs/<runId>/token-usage.summary.json`
|
|
221
|
+
|
|
222
|
+
These artifacts are the stable machine-readable token contract. The visualizer and report command are consumers of those files, not the source of truth.
|
|
223
|
+
|
|
224
|
+
For agent-facing guidance on how to interpret and use those files in consuming repos, see [TOKEN_USAGE_ARTIFACTS.md](./TOKEN_USAGE_ARTIFACTS.md).
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# Token Usage Artifacts
|
|
2
|
+
|
|
3
|
+
This document is written for autonomous coding agents and repo maintainers who want to use the token-usage artifacts produced by `pi-harness` in their own projects.
|
|
4
|
+
|
|
5
|
+
The goal is not just to visualize token usage. The goal is to expose a stable machine-readable contract that other tools, prompts, reports, or project-specific scripts can reuse.
|
|
6
|
+
|
|
7
|
+
## Produced Artifacts
|
|
8
|
+
|
|
9
|
+
Repo-scoped artifacts:
|
|
10
|
+
|
|
11
|
+
- `pi-output/token-usage/events.jsonl`
|
|
12
|
+
- `pi-output/token-usage/summary.json`
|
|
13
|
+
|
|
14
|
+
Run-scoped artifacts:
|
|
15
|
+
|
|
16
|
+
- `.pi-runtime/runs/<runId>/token-usage.events.jsonl`
|
|
17
|
+
- `.pi-runtime/runs/<runId>/token-usage.summary.json`
|
|
18
|
+
|
|
19
|
+
Use repo-scoped files when you want the latest cumulative view for the repository.
|
|
20
|
+
Use run-scoped files when you want to inspect one specific harness run in isolation.
|
|
21
|
+
|
|
22
|
+
## Recommended Consumption Order
|
|
23
|
+
|
|
24
|
+
When an agent wants to use token data, prefer this order:
|
|
25
|
+
|
|
26
|
+
1. Read `token-usage.summary.json` first.
|
|
27
|
+
2. Only read `token-usage.events.jsonl` if the summary is not enough.
|
|
28
|
+
3. Prefer run-scoped artifacts when analyzing one run.
|
|
29
|
+
4. Prefer repo-scoped artifacts when looking for long-term hotspots.
|
|
30
|
+
|
|
31
|
+
This keeps token-analysis prompts compact and avoids spending more tokens just to inspect token data.
|
|
32
|
+
|
|
33
|
+
## Event Schema
|
|
34
|
+
|
|
35
|
+
Each line in `events.jsonl` is one normalized token-attribution event.
|
|
36
|
+
|
|
37
|
+
Important fields:
|
|
38
|
+
|
|
39
|
+
- `schemaVersion`
|
|
40
|
+
- `timestamp`
|
|
41
|
+
- `runId`
|
|
42
|
+
- `transport`
|
|
43
|
+
- `sessionId`
|
|
44
|
+
- `model`
|
|
45
|
+
- `iteration`
|
|
46
|
+
- `retryCount`
|
|
47
|
+
- `reason`
|
|
48
|
+
- `phase`
|
|
49
|
+
- `role`
|
|
50
|
+
- `kind`
|
|
51
|
+
- `attributionKind`
|
|
52
|
+
- `toolNames`
|
|
53
|
+
- `files`
|
|
54
|
+
- `primaryFile`
|
|
55
|
+
- `inputTokens`
|
|
56
|
+
- `outputTokens`
|
|
57
|
+
- `totalTokens`
|
|
58
|
+
- `cacheReadTokens`
|
|
59
|
+
- `cacheWriteTokens`
|
|
60
|
+
|
|
61
|
+
Semantics:
|
|
62
|
+
|
|
63
|
+
- `kind`, `phase`, and `role` identify the harness stage where the tokens were spent.
|
|
64
|
+
- `toolNames` and `files` capture the nearby tool/file context seen around the token event.
|
|
65
|
+
- `attributionKind` explains how the event was classified:
|
|
66
|
+
- `thinking`
|
|
67
|
+
- `response`
|
|
68
|
+
- `tool_context`
|
|
69
|
+
- `tool_running`
|
|
70
|
+
- `agent`
|
|
71
|
+
|
|
72
|
+
Important:
|
|
73
|
+
|
|
74
|
+
- file and directory attribution are inferred from nearby tool context
|
|
75
|
+
- they are useful for hotspot detection, not exact provider-native accounting
|
|
76
|
+
- if one token event touches multiple files, downstream summaries may split that event across those files
|
|
77
|
+
|
|
78
|
+
## Summary Schema
|
|
79
|
+
|
|
80
|
+
`summary.json` contains:
|
|
81
|
+
|
|
82
|
+
- `schemaVersion`
|
|
83
|
+
- `generatedAt`
|
|
84
|
+
- `source.eventCount`
|
|
85
|
+
- `totals`
|
|
86
|
+
- `coverage`
|
|
87
|
+
- `breakdowns`
|
|
88
|
+
|
|
89
|
+
`totals` contains:
|
|
90
|
+
|
|
91
|
+
- `inputTokens`
|
|
92
|
+
- `outputTokens`
|
|
93
|
+
- `totalTokens`
|
|
94
|
+
- `cacheReadTokens`
|
|
95
|
+
- `cacheWriteTokens`
|
|
96
|
+
- `eventCount`
|
|
97
|
+
|
|
98
|
+
`coverage` contains:
|
|
99
|
+
|
|
100
|
+
- `fileAttributedTokens`
|
|
101
|
+
- `unattributedTokens`
|
|
102
|
+
- `fileAttributionRatio`
|
|
103
|
+
|
|
104
|
+
`breakdowns` contains:
|
|
105
|
+
|
|
106
|
+
- `byKind`
|
|
107
|
+
- `byRole`
|
|
108
|
+
- `byPhase`
|
|
109
|
+
- `byModel`
|
|
110
|
+
- `bySession`
|
|
111
|
+
- `byAttribution`
|
|
112
|
+
- `byTool`
|
|
113
|
+
- `byFile`
|
|
114
|
+
- `byDirectory`
|
|
115
|
+
|
|
116
|
+
Each breakdown item contains:
|
|
117
|
+
|
|
118
|
+
- `key`
|
|
119
|
+
- `label`
|
|
120
|
+
- `inputTokens`
|
|
121
|
+
- `outputTokens`
|
|
122
|
+
- `totalTokens`
|
|
123
|
+
- `cacheReadTokens`
|
|
124
|
+
- `cacheWriteTokens`
|
|
125
|
+
- `eventCount`
|
|
126
|
+
|
|
127
|
+
## How Agents Should Use This Data
|
|
128
|
+
|
|
129
|
+
Use token artifacts to answer questions like:
|
|
130
|
+
|
|
131
|
+
- Which harness phases are spending the most tokens?
|
|
132
|
+
- Which files or directories repeatedly consume tokens?
|
|
133
|
+
- Are retries concentrated in one hotspot?
|
|
134
|
+
- Is token usage dominated by thinking, tool-context, or response generation?
|
|
135
|
+
- Are certain models or sessions much more expensive than others?
|
|
136
|
+
|
|
137
|
+
Good uses:
|
|
138
|
+
|
|
139
|
+
- splitting a large TODO item into narrower tasks
|
|
140
|
+
- identifying files that should be decomposed before another agent pass
|
|
141
|
+
- deciding whether a hot directory needs refactor work
|
|
142
|
+
- comparing whether `developer` or `tester` is driving most cost
|
|
143
|
+
- checking whether a local model is wasting tokens on repeated tool/file churn
|
|
144
|
+
|
|
145
|
+
Bad uses:
|
|
146
|
+
|
|
147
|
+
- treating `byFile` values as exact per-file billing
|
|
148
|
+
- assuming all unattributed tokens are waste
|
|
149
|
+
- optimizing for raw token count while ignoring correctness
|
|
150
|
+
|
|
151
|
+
## Agent Workflow Guidance
|
|
152
|
+
|
|
153
|
+
When an agent is asked to improve harness efficiency in a repo:
|
|
154
|
+
|
|
155
|
+
1. Read `summary.json`.
|
|
156
|
+
2. Inspect `breakdowns.byFile`, `breakdowns.byDirectory`, `breakdowns.byTool`, and `breakdowns.byAttribution`.
|
|
157
|
+
3. If one file or directory dominates, inspect the related source only after confirming the hotspot from the summary.
|
|
158
|
+
4. If `fileAttributionRatio` is low, rely more on `byKind`, `byRole`, `byModel`, and `byAttribution` than on `byFile`.
|
|
159
|
+
5. When proposing changes, explicitly distinguish:
|
|
160
|
+
- exact token totals from artifacts
|
|
161
|
+
- inferred file attribution from nearby context
|
|
162
|
+
|
|
163
|
+
## Recommended Interpretation Rules
|
|
164
|
+
|
|
165
|
+
Use these heuristics:
|
|
166
|
+
|
|
167
|
+
- High `byFile` and high `fileAttributionRatio`:
|
|
168
|
+
Strong signal that the file is a real hotspot.
|
|
169
|
+
- High `byDirectory` with spread across many files:
|
|
170
|
+
The problem is probably architectural or task-shaping, not one file only.
|
|
171
|
+
- High `byAttribution.tool_context` or `tool_running`:
|
|
172
|
+
The agent may be rereading, diffing, or patching inefficiently.
|
|
173
|
+
- High `byAttribution.thinking` with low file coverage:
|
|
174
|
+
The problem may be task ambiguity or prompt shape rather than one code hotspot.
|
|
175
|
+
- High `byModel` on one role:
|
|
176
|
+
That role may need a smaller scope, different model, or clearer repo instructions.
|
|
177
|
+
|
|
178
|
+
## Instruction Snippet For Consuming Repos
|
|
179
|
+
|
|
180
|
+
If a consuming repo wants its own agents to use the artifacts, add guidance like this to repo-local instructions:
|
|
181
|
+
|
|
182
|
+
```md
|
|
183
|
+
## Token Usage Data
|
|
184
|
+
|
|
185
|
+
This repo may contain `pi-harness` token artifacts:
|
|
186
|
+
|
|
187
|
+
- `pi-output/token-usage/summary.json`
|
|
188
|
+
- `pi-output/token-usage/events.jsonl`
|
|
189
|
+
|
|
190
|
+
When investigating repeated retries, large agent turns, or code hotspots:
|
|
191
|
+
|
|
192
|
+
1. Read `summary.json` first.
|
|
193
|
+
2. Use `breakdowns.byFile`, `breakdowns.byDirectory`, `breakdowns.byTool`, and `breakdowns.byAttribution` to locate hotspots.
|
|
194
|
+
3. Treat file and directory token attribution as inferred context, not exact billing.
|
|
195
|
+
4. If one file is a clear hotspot, prefer smaller TODOs, narrower reads, or structural refactors over brute-force retries.
|
|
196
|
+
5. If file attribution is weak, rely more on `byKind`, `byRole`, `byModel`, and `byAttribution`.
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## Project-Specific Extensions
|
|
200
|
+
|
|
201
|
+
Projects can build their own tooling on top of these artifacts, for example:
|
|
202
|
+
|
|
203
|
+
- nightly regression reports that flag rising token hotspots
|
|
204
|
+
- CI checks that warn when one file dominates token spend
|
|
205
|
+
- repo-specific dashboards
|
|
206
|
+
- prompt builders that mention known hotspots before starting a developer turn
|
|
207
|
+
- scripts that compare token patterns before and after a refactor
|
|
208
|
+
|
|
209
|
+
When doing that, depend on:
|
|
210
|
+
|
|
211
|
+
- `schemaVersion`
|
|
212
|
+
- the named summary fields
|
|
213
|
+
- the normalized event fields
|
|
214
|
+
|
|
215
|
+
Do not depend on the visualizer UI structure or CSS. Those are consumers, not the contract.
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sebastianandreasson/pi-autonomous-agents",
|
|
3
3
|
"private": false,
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.13.0",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"description": "Portable unattended PI harness for developer/tester/visual-review loops.",
|
|
7
7
|
"license": "MIT",
|
|
@@ -19,8 +19,8 @@
|
|
|
19
19
|
"@mariozechner/pi-coding-agent": "^0.66.1"
|
|
20
20
|
},
|
|
21
21
|
"scripts": {
|
|
22
|
-
"check": "node --check src/cli.mjs && node --check src/pi-clear-history.mjs && node --check src/pi-client.mjs && node --check src/pi-config.mjs && node --check src/pi-debug-live.mjs && node --check src/pi-flow.mjs && node --check src/pi-heartbeat.mjs && node --check src/pi-history.mjs && node --check src/pi-preflight.mjs && node --check src/pi-prompts.mjs && node --check src/pi-repo.mjs && node --check src/pi-report.mjs && node --check src/pi-sdk-turn.mjs && node --check src/pi-supervisor.mjs && node --check src/pi-telemetry.mjs && node --check src/pi-visual-once.mjs && node --check src/pi-visual-review.mjs && node --check src/pi-visualizer.mjs && node --check src/pi-visualizer-server.mjs && node --check src/pi-visualizer-shared.mjs && node --check src/index.mjs && node --check test/pi-heartbeat.test.mjs && node --check test/pi-lifecycle.test.mjs && node --check test/pi-role-models.test.mjs && node --check test/pi-flow.test.mjs && node --check test/pi-history.test.mjs && node --check test/pi-prompts.test.mjs && node --check test/pi-preflight.test.mjs && node --check test/pi-repo.test.mjs && node --check test/pi-sdk-supervisor.test.mjs && node --check test/pi-sdk-turn.test.mjs && node --check test/pi-telemetry.test.mjs && node --check test/pi-visualizer-shared.test.mjs && node --check test/fixtures/fake-pi.mjs && node --check test/fixtures/fake-pi-sdk.mjs && node --check test/fixtures/fake-live-pi-sdk.mjs",
|
|
23
|
-
"test": "node --test test/pi-heartbeat.test.mjs test/pi-lifecycle.test.mjs test/pi-role-models.test.mjs test/pi-flow.test.mjs test/pi-history.test.mjs test/pi-prompts.test.mjs test/pi-preflight.test.mjs test/pi-repo.test.mjs test/pi-sdk-supervisor.test.mjs test/pi-sdk-turn.test.mjs test/pi-telemetry.test.mjs test/pi-visualizer-shared.test.mjs",
|
|
22
|
+
"check": "node --check src/cli.mjs && node --check src/pi-clear-history.mjs && node --check src/pi-client.mjs && node --check src/pi-config.mjs && node --check src/pi-debug-live.mjs && node --check src/pi-flow.mjs && node --check src/pi-heartbeat.mjs && node --check src/pi-history.mjs && node --check src/pi-preflight.mjs && node --check src/pi-prompts.mjs && node --check src/pi-repo.mjs && node --check src/pi-report.mjs && node --check src/pi-sdk-turn.mjs && node --check src/pi-supervisor.mjs && node --check src/pi-telemetry.mjs && node --check src/pi-token-analysis.mjs && node --check src/pi-visual-once.mjs && node --check src/pi-visual-review.mjs && node --check src/pi-visualizer.mjs && node --check src/pi-visualizer-server.mjs && node --check src/pi-visualizer-shared.mjs && node --check src/index.mjs && node --check test/pi-heartbeat.test.mjs && node --check test/pi-lifecycle.test.mjs && node --check test/pi-role-models.test.mjs && node --check test/pi-flow.test.mjs && node --check test/pi-history.test.mjs && node --check test/pi-prompts.test.mjs && node --check test/pi-preflight.test.mjs && node --check test/pi-repo.test.mjs && node --check test/pi-sdk-supervisor.test.mjs && node --check test/pi-sdk-turn.test.mjs && node --check test/pi-telemetry.test.mjs && node --check test/pi-token-analysis.test.mjs && node --check test/pi-visualizer-shared.test.mjs && node --check test/fixtures/fake-pi.mjs && node --check test/fixtures/fake-pi-sdk.mjs && node --check test/fixtures/fake-live-pi-sdk.mjs",
|
|
23
|
+
"test": "node --test test/pi-heartbeat.test.mjs test/pi-lifecycle.test.mjs test/pi-role-models.test.mjs test/pi-flow.test.mjs test/pi-history.test.mjs test/pi-prompts.test.mjs test/pi-preflight.test.mjs test/pi-repo.test.mjs test/pi-sdk-supervisor.test.mjs test/pi-sdk-turn.test.mjs test/pi-telemetry.test.mjs test/pi-token-analysis.test.mjs test/pi-visualizer-shared.test.mjs",
|
|
24
24
|
"debug:live-ui": "node src/cli.mjs debug-live --reset",
|
|
25
25
|
"dev:visualizer:ui": "npm --prefix visualizer-ui run dev",
|
|
26
26
|
"build:visualizer:ui": "npm --prefix visualizer-ui run build",
|
package/pi.config.json
CHANGED
|
@@ -17,6 +17,8 @@
|
|
|
17
17
|
"testerFeedbackHistoryDir": "pi-output/tester-feedback/history",
|
|
18
18
|
"visualReviewHistoryDir": "pi-output/visual-review/history",
|
|
19
19
|
"visualCaptureDir": "pi-output/visual-capture",
|
|
20
|
+
"tokenUsageEventsFile": "pi-output/token-usage/events.jsonl",
|
|
21
|
+
"tokenUsageSummaryFile": "pi-output/token-usage/summary.json",
|
|
20
22
|
"visualCaptureCommand": "",
|
|
21
23
|
"visualCaptureTimeoutSeconds": 300,
|
|
22
24
|
"visualReviewEnabled": false,
|
package/src/cli.mjs
CHANGED
|
@@ -36,11 +36,14 @@ function main() {
|
|
|
36
36
|
if (subcommand === 'once' || subcommand === 'run') {
|
|
37
37
|
childArgs.push(subcommand)
|
|
38
38
|
}
|
|
39
|
+
const childStdio = subcommand === 'once' || subcommand === 'run'
|
|
40
|
+
? ['pipe', 'inherit', 'inherit']
|
|
41
|
+
: 'inherit'
|
|
39
42
|
|
|
40
43
|
const child = spawn(process.execPath, childArgs, {
|
|
41
44
|
cwd: process.cwd(),
|
|
42
45
|
env: process.env,
|
|
43
|
-
stdio:
|
|
46
|
+
stdio: childStdio,
|
|
44
47
|
})
|
|
45
48
|
registerOwnedChildProcess(child)
|
|
46
49
|
|
package/src/index.mjs
CHANGED
|
@@ -13,5 +13,18 @@ export { clearHarnessHistory, collectHistoryTargets } from './pi-history.mjs'
|
|
|
13
13
|
export { collectLargeFileWarnings } from './pi-repo.mjs'
|
|
14
14
|
export { runAgentTurn } from './pi-client.mjs'
|
|
15
15
|
export { createSdkSession, createTools, normalizeToolNames, resolveModel, runSdkTurn, runSdkTurnWithPi, splitModelSpec } from './pi-sdk-turn.mjs'
|
|
16
|
+
export {
|
|
17
|
+
appendTokenUsageEvent,
|
|
18
|
+
applyTokenAttributionEvent,
|
|
19
|
+
createEmptyTokenBreakdown,
|
|
20
|
+
createEmptyTokenUsage,
|
|
21
|
+
deriveTokenBreakdown,
|
|
22
|
+
ensureTokenUsageFiles,
|
|
23
|
+
formatTokenUsageSummary,
|
|
24
|
+
normalizeTokenAttributionEvent,
|
|
25
|
+
normalizeTokenUsage,
|
|
26
|
+
readTokenUsageEvents,
|
|
27
|
+
readTokenUsageSummary,
|
|
28
|
+
} from './pi-token-analysis.mjs'
|
|
16
29
|
export { deriveCurrentIteration, deriveFlowSnapshot, deriveStageGraph, formatActiveLabel, getFlowSteps, getLabelForKind, getStepKeyForActiveRun, getStepKeyForKind } from './pi-visualizer-shared.mjs'
|
|
17
30
|
export { buildSnapshot, readVisualizerHost, readVisualizerPort, renderHtml, startVisualizerServer } from './pi-visualizer-server.mjs'
|
package/src/pi-client.mjs
CHANGED
|
@@ -11,6 +11,7 @@ import {
|
|
|
11
11
|
writeTextFile,
|
|
12
12
|
} from './pi-repo.mjs'
|
|
13
13
|
import { runSdkTurn } from './pi-sdk-turn.mjs'
|
|
14
|
+
import { appendTokenUsageEvent } from './pi-token-analysis.mjs'
|
|
14
15
|
|
|
15
16
|
function truncateForNotes(text) {
|
|
16
17
|
const trimmed = text.trim()
|
|
@@ -26,6 +27,7 @@ function formatLastAgentOutput(response) {
|
|
|
26
27
|
`sessionId: ${String(response.sessionId ?? '')}`,
|
|
27
28
|
`sessionFile: ${String(response.sessionFile ?? '')}`,
|
|
28
29
|
`terminalReason: ${String(response.terminalReason ?? '')}`,
|
|
30
|
+
`tokens: total=${Number(response.totalTokens ?? 0)} input=${Number(response.inputTokens ?? 0)} output=${Number(response.outputTokens ?? 0)} cacheRead=${Number(response.cacheReadTokens ?? 0)} cacheWrite=${Number(response.cacheWriteTokens ?? 0)}`,
|
|
29
31
|
`notes: ${String(response.notes ?? '').trim()}`,
|
|
30
32
|
]
|
|
31
33
|
|
|
@@ -81,10 +83,49 @@ function sanitizeLiveFeedEvent(filePath, event) {
|
|
|
81
83
|
kind: String(event?.kind ?? ''),
|
|
82
84
|
type: String(event?.type ?? 'event'),
|
|
83
85
|
toolName: String(event?.toolName ?? ''),
|
|
86
|
+
sessionId: String(event?.sessionId ?? ''),
|
|
87
|
+
model: String(event?.model ?? ''),
|
|
84
88
|
isError: event?.isError === true,
|
|
85
89
|
text: truncateText(event?.text ?? '', MAX_LIVE_FEED_TEXT),
|
|
86
90
|
}
|
|
87
91
|
|
|
92
|
+
const numericFields = {
|
|
93
|
+
inputTokens: Number(event?.inputTokens),
|
|
94
|
+
outputTokens: Number(event?.outputTokens),
|
|
95
|
+
totalTokens: Number(event?.totalTokens),
|
|
96
|
+
cacheReadTokens: Number(event?.cacheReadTokens),
|
|
97
|
+
cacheWriteTokens: Number(event?.cacheWriteTokens),
|
|
98
|
+
}
|
|
99
|
+
for (const [key, value] of Object.entries(numericFields)) {
|
|
100
|
+
if (Number.isFinite(value) && value > 0) {
|
|
101
|
+
normalized[key] = value
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const attributionKind = String(event?.attributionKind ?? '').trim()
|
|
106
|
+
if (attributionKind !== '') {
|
|
107
|
+
normalized.attributionKind = attributionKind
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const primaryFile = String(event?.primaryFile ?? '').trim()
|
|
111
|
+
if (primaryFile !== '') {
|
|
112
|
+
normalized.primaryFile = primaryFile
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const toolNames = Array.isArray(event?.toolNames)
|
|
116
|
+
? [...new Set(event.toolNames.map((value) => String(value ?? '').trim()).filter(Boolean))]
|
|
117
|
+
: []
|
|
118
|
+
if (toolNames.length > 0) {
|
|
119
|
+
normalized.toolNames = toolNames
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const files = Array.isArray(event?.files)
|
|
123
|
+
? [...new Set(event.files.map((value) => String(value ?? '').trim()).filter(Boolean))]
|
|
124
|
+
: []
|
|
125
|
+
if (files.length > 0) {
|
|
126
|
+
normalized.files = files
|
|
127
|
+
}
|
|
128
|
+
|
|
88
129
|
const argsSummary = summarizeValue(event?.args)
|
|
89
130
|
const partialSummary = summarizeValue(event?.partialResult)
|
|
90
131
|
const resultSummary = summarizeValue(event?.result)
|
|
@@ -102,21 +143,23 @@ function sanitizeLiveFeedEvent(filePath, event) {
|
|
|
102
143
|
}
|
|
103
144
|
|
|
104
145
|
async function appendLiveFeedEvent(config, event) {
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
const filePath = config.runLiveFeedFile
|
|
110
|
-
const previous = liveFeedWriteQueues.get(filePath) ?? Promise.resolve()
|
|
146
|
+
const filePath = String(config.runLiveFeedFile ?? '').trim()
|
|
147
|
+
const queueKey = filePath || String(config.runTokenUsageEventsFile ?? config.tokenUsageEventsFile ?? 'token-usage')
|
|
148
|
+
const previous = liveFeedWriteQueues.get(queueKey) ?? Promise.resolve()
|
|
111
149
|
const next = previous
|
|
112
150
|
.catch(() => {})
|
|
113
151
|
.then(async () => {
|
|
114
|
-
const sanitized = sanitizeLiveFeedEvent(
|
|
115
|
-
|
|
116
|
-
|
|
152
|
+
const sanitized = sanitizeLiveFeedEvent(queueKey, event)
|
|
153
|
+
if (filePath !== '') {
|
|
154
|
+
await fs.mkdir(path.dirname(filePath), { recursive: true })
|
|
155
|
+
await fs.appendFile(filePath, `${JSON.stringify(sanitized)}\n`, 'utf8')
|
|
156
|
+
}
|
|
157
|
+
if (sanitized.type === 'token_usage') {
|
|
158
|
+
await appendTokenUsageEvent(config, sanitized)
|
|
159
|
+
}
|
|
117
160
|
})
|
|
118
161
|
|
|
119
|
-
liveFeedWriteQueues.set(
|
|
162
|
+
liveFeedWriteQueues.set(queueKey, next)
|
|
120
163
|
await next
|
|
121
164
|
}
|
|
122
165
|
|
|
@@ -154,6 +197,11 @@ async function runMockTurn({ config, sessionId, sessionFile, prompt, reason }) {
|
|
|
154
197
|
toolCalls: 0,
|
|
155
198
|
toolErrors: 0,
|
|
156
199
|
messageUpdates: 0,
|
|
200
|
+
inputTokens: 0,
|
|
201
|
+
outputTokens: 0,
|
|
202
|
+
totalTokens: 0,
|
|
203
|
+
cacheReadTokens: 0,
|
|
204
|
+
cacheWriteTokens: 0,
|
|
157
205
|
stopReason: '',
|
|
158
206
|
loopDetected: false,
|
|
159
207
|
loopSignature: '',
|
|
@@ -224,6 +272,11 @@ async function runSdkTransportTurn({ config, model, sessionId, sessionFile, prom
|
|
|
224
272
|
toolCalls: 0,
|
|
225
273
|
toolErrors: 0,
|
|
226
274
|
messageUpdates: 0,
|
|
275
|
+
inputTokens: 0,
|
|
276
|
+
outputTokens: 0,
|
|
277
|
+
totalTokens: 0,
|
|
278
|
+
cacheReadTokens: 0,
|
|
279
|
+
cacheWriteTokens: 0,
|
|
227
280
|
stopReason: '',
|
|
228
281
|
loopDetected: false,
|
|
229
282
|
loopSignature: '',
|
|
@@ -248,6 +301,11 @@ async function runSdkTransportTurn({ config, model, sessionId, sessionFile, prom
|
|
|
248
301
|
toolCalls: Number.isFinite(Number(response.toolCalls)) ? Number(response.toolCalls) : 0,
|
|
249
302
|
toolErrors: Number.isFinite(Number(response.toolErrors)) ? Number(response.toolErrors) : 0,
|
|
250
303
|
messageUpdates: Number.isFinite(Number(response.messageUpdates)) ? Number(response.messageUpdates) : 0,
|
|
304
|
+
inputTokens: Number.isFinite(Number(response.inputTokens)) ? Number(response.inputTokens) : 0,
|
|
305
|
+
outputTokens: Number.isFinite(Number(response.outputTokens)) ? Number(response.outputTokens) : 0,
|
|
306
|
+
totalTokens: Number.isFinite(Number(response.totalTokens)) ? Number(response.totalTokens) : 0,
|
|
307
|
+
cacheReadTokens: Number.isFinite(Number(response.cacheReadTokens)) ? Number(response.cacheReadTokens) : 0,
|
|
308
|
+
cacheWriteTokens: Number.isFinite(Number(response.cacheWriteTokens)) ? Number(response.cacheWriteTokens) : 0,
|
|
251
309
|
stopReason: String(response.stopReason ?? ''),
|
|
252
310
|
loopDetected: response.loopDetected === true,
|
|
253
311
|
loopSignature: String(response.loopSignature ?? ''),
|
package/src/pi-config.mjs
CHANGED
|
@@ -259,6 +259,7 @@ export function loadConfig(mode = 'once') {
|
|
|
259
259
|
maxTesterFeedbackLines: readInt('PI_MAX_TESTER_FEEDBACK_LINES', file.maxTesterFeedbackLines, 32),
|
|
260
260
|
maxPromptNotesLines: readInt('PI_MAX_PROMPT_NOTES_LINES', file.maxPromptNotesLines, 16),
|
|
261
261
|
maxVerificationExcerptLines: readInt('PI_MAX_VERIFICATION_EXCERPT_LINES', file.maxVerificationExcerptLines, 40),
|
|
262
|
+
maxFailureArtifactLines: readInt('PI_MAX_FAILURE_ARTIFACT_LINES', file.maxFailureArtifactLines, 80),
|
|
262
263
|
largeFileWarningLines: readInt('PI_LARGE_FILE_WARNING_LINES', file.largeFileWarningLines, 500),
|
|
263
264
|
largeSpecWarningLines: readInt('PI_LARGE_SPEC_WARNING_LINES', file.largeSpecWarningLines, 300),
|
|
264
265
|
piTools: readString('PI_TOOLS', file.piTools, 'read,edit,write,find,ls,bash'),
|
|
@@ -280,6 +281,8 @@ export function loadConfig(mode = 'once') {
|
|
|
280
281
|
verificationTimeoutSeconds: readInt('PI_VERIFICATION_TIMEOUT', file.verificationTimeoutSeconds, 300),
|
|
281
282
|
idleRetryLimit: readInt('PI_IDLE_RETRY_LIMIT', file.idleRetryLimit, 1),
|
|
282
283
|
noChangeRetryLimit: readInt('PI_NO_CHANGE_RETRY_LIMIT', file.noChangeRetryLimit, 1),
|
|
284
|
+
sameFileLoopBudget: readInt('PI_SAME_FILE_LOOP_BUDGET', file.sameFileLoopBudget, 2),
|
|
285
|
+
loopHistoryLimit: readInt('PI_LOOP_HISTORY_LIMIT', file.loopHistoryLimit, 25),
|
|
283
286
|
visualFeedbackFile: resolveFromCwd(
|
|
284
287
|
cwd,
|
|
285
288
|
'PI_VISUAL_FEEDBACK_FILE',
|
|
@@ -298,6 +301,12 @@ export function loadConfig(mode = 'once') {
|
|
|
298
301
|
file.testerFeedbackHistoryDir,
|
|
299
302
|
'pi-output/tester-feedback/history'
|
|
300
303
|
),
|
|
304
|
+
failureArtifactDir: resolveFromCwd(
|
|
305
|
+
cwd,
|
|
306
|
+
'PI_FAILURE_ARTIFACT_DIR',
|
|
307
|
+
file.failureArtifactDir,
|
|
308
|
+
'pi-output/failure-artifacts'
|
|
309
|
+
),
|
|
301
310
|
visualReviewHistoryDir: resolveFromCwd(
|
|
302
311
|
cwd,
|
|
303
312
|
'PI_VISUAL_REVIEW_HISTORY_DIR',
|
|
@@ -310,6 +319,18 @@ export function loadConfig(mode = 'once') {
|
|
|
310
319
|
file.visualCaptureDir,
|
|
311
320
|
'pi-output/visual-capture'
|
|
312
321
|
),
|
|
322
|
+
tokenUsageEventsFile: resolveFromCwd(
|
|
323
|
+
cwd,
|
|
324
|
+
'PI_TOKEN_USAGE_EVENTS_FILE',
|
|
325
|
+
file.tokenUsageEventsFile,
|
|
326
|
+
'pi-output/token-usage/events.jsonl'
|
|
327
|
+
),
|
|
328
|
+
tokenUsageSummaryFile: resolveFromCwd(
|
|
329
|
+
cwd,
|
|
330
|
+
'PI_TOKEN_USAGE_SUMMARY_FILE',
|
|
331
|
+
file.tokenUsageSummaryFile,
|
|
332
|
+
'pi-output/token-usage/summary.json'
|
|
333
|
+
),
|
|
313
334
|
visualCaptureCommand: readString('PI_VISUAL_CAPTURE_CMD', file.visualCaptureCommand, ''),
|
|
314
335
|
visualCaptureTimeoutSeconds: readInt('PI_VISUAL_CAPTURE_TIMEOUT', file.visualCaptureTimeoutSeconds, 300),
|
|
315
336
|
visualReviewEnabled: readBool('PI_VISUAL_REVIEW_ENABLED', file.visualReviewEnabled, false),
|
package/src/pi-history.mjs
CHANGED
|
@@ -22,6 +22,8 @@ export function collectHistoryTargets(config) {
|
|
|
22
22
|
config.changedFilesFile,
|
|
23
23
|
config.lastPromptFile,
|
|
24
24
|
config.lastIterationSummaryFile,
|
|
25
|
+
config.tokenUsageEventsFile,
|
|
26
|
+
config.tokenUsageSummaryFile,
|
|
25
27
|
config.piRuntimeDir,
|
|
26
28
|
config.visualFeedbackFile,
|
|
27
29
|
config.testerFeedbackFile,
|