@sebastianandreasson/pi-autonomous-agents 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +102 -0
- package/SETUP.md +171 -0
- package/docs/PI_SUPERVISOR.md +246 -0
- package/package.json +37 -0
- package/pi.config.json +28 -0
- package/src/cli.mjs +48 -0
- package/src/index.mjs +7 -0
- package/src/pi-client.mjs +195 -0
- package/src/pi-config.mjs +296 -0
- package/src/pi-flow.mjs +42 -0
- package/src/pi-heartbeat.mjs +152 -0
- package/src/pi-prompts.mjs +274 -0
- package/src/pi-repo.mjs +496 -0
- package/src/pi-report.mjs +55 -0
- package/src/pi-rpc-adapter.mjs +531 -0
- package/src/pi-supervisor.mjs +1156 -0
- package/src/pi-telemetry.mjs +63 -0
- package/src/pi-visual-once.mjs +86 -0
- package/src/pi-visual-review.mjs +236 -0
- package/templates/DEVELOPER.md +34 -0
- package/templates/PROJECT_SETUP.md +42 -0
- package/templates/TESTER.md +37 -0
- package/templates/gitignore.fragment +11 -0
- package/templates/pi.config.example.json +53 -0
package/README.md
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# PI Harness
|
|
2
|
+
|
|
3
|
+
`pi-harness` is a portable CLI/workflow package for running a local PI-based unattended loop with:
|
|
4
|
+
|
|
5
|
+
- a `developer` pass
|
|
6
|
+
- a fast verification step
|
|
7
|
+
- a skeptical `tester` pass
|
|
8
|
+
- optional periodic multimodal visual review
|
|
9
|
+
- harness-owned git finalization
|
|
10
|
+
|
|
11
|
+
The package is intentionally generic. It does not know how to navigate or test a specific app on its own.
|
|
12
|
+
|
|
13
|
+
## What Belongs In The Package
|
|
14
|
+
|
|
15
|
+
- supervisor/orchestration
|
|
16
|
+
- PI adapter/runtime integration
|
|
17
|
+
- config loading
|
|
18
|
+
- telemetry
|
|
19
|
+
- loop guards, timeout guards, and retries
|
|
20
|
+
- tester feedback + visual feedback handoff
|
|
21
|
+
- harness-owned git finalize step
|
|
22
|
+
- multimodal visual review client
|
|
23
|
+
|
|
24
|
+
## What Stays Per Project
|
|
25
|
+
|
|
26
|
+
- `TODOS.md`
|
|
27
|
+
- project instructions
|
|
28
|
+
- browser tests
|
|
29
|
+
- visual capture flow
|
|
30
|
+
- app-specific verification commands
|
|
31
|
+
- app/server startup scripts
|
|
32
|
+
|
|
33
|
+
## Layout
|
|
34
|
+
|
|
35
|
+
```text
|
|
36
|
+
packages/pi-harness/
|
|
37
|
+
package.json
|
|
38
|
+
pi.config.json
|
|
39
|
+
templates/DEVELOPER.md
|
|
40
|
+
templates/TESTER.md
|
|
41
|
+
docs/PI_SUPERVISOR.md
|
|
42
|
+
src/
|
|
43
|
+
cli.mjs
|
|
44
|
+
pi-client.mjs
|
|
45
|
+
pi-config.mjs
|
|
46
|
+
pi-prompts.mjs
|
|
47
|
+
pi-repo.mjs
|
|
48
|
+
pi-report.mjs
|
|
49
|
+
pi-rpc-adapter.mjs
|
|
50
|
+
pi-supervisor.mjs
|
|
51
|
+
pi-telemetry.mjs
|
|
52
|
+
pi-visual-once.mjs
|
|
53
|
+
pi-visual-review.mjs
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## CLI
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pi-harness once
|
|
60
|
+
pi-harness run
|
|
61
|
+
pi-harness report
|
|
62
|
+
pi-harness visual-once
|
|
63
|
+
pi-harness adapter
|
|
64
|
+
pi-harness visual-review-worker
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Use `PI_CONFIG_FILE` to point the harness at a project-local config file. If you do not provide one, the bundled generic `pi.config.json` is used as a fallback.
|
|
68
|
+
|
|
69
|
+
## Setup In Another Repo
|
|
70
|
+
|
|
71
|
+
After installing the package:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
npm install -D @sebastianandreasson/pi-autonomous-agents
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
you can tell another agent in that repo:
|
|
78
|
+
|
|
79
|
+
```text
|
|
80
|
+
Find SETUP.md in @sebastianandreasson/pi-autonomous-agents and set everything up for this repository.
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
The package ships a top-level [SETUP.md](./SETUP.md) specifically for that workflow.
|
|
84
|
+
|
|
85
|
+
## Generic Contracts
|
|
86
|
+
|
|
87
|
+
- `taskFile`: usually `TODOS.md`
|
|
88
|
+
- `developerInstructionsFile`: per-project developer instructions
|
|
89
|
+
- `testerInstructionsFile`: per-project tester instructions
|
|
90
|
+
- `roleModels`: optional per-role model overrides
|
|
91
|
+
- `testCommand`: fast verification command
|
|
92
|
+
- `visualCaptureCommand`: project-defined screenshot capture command
|
|
93
|
+
- `visualFeedbackFile`: latest visual-review handoff
|
|
94
|
+
- `testerFeedbackFile`: latest tester-review handoff
|
|
95
|
+
|
|
96
|
+
For unattended loops, keep `testCommand` fast and bounded, such as a smoke suite. Long real-time Playwright happy-path specs belong in an explicit nightly or post-run lane, not the default developer/tester inner loop.
|
|
97
|
+
|
|
98
|
+
The adapter heartbeat is PI-RPC-event based. Streaming shell output does not count as progress on its own, so long-running tools should rely on the tool-aware watchdog thresholds rather than terminal streaming.
|
|
99
|
+
|
|
100
|
+
`piModel` remains the default text model, but you can override specific roles with `roleModels` such as `developer`, `developerRetry`, `developerFix`, `tester`, `testerCommit`, and `visualReview`.
|
|
101
|
+
|
|
102
|
+
The harness expects screenshot capture to produce a `manifest.json` plus image files under the configured visual capture directory.
|
package/SETUP.md
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# Setup Guide
|
|
2
|
+
|
|
3
|
+
This file is written for an autonomous coding agent that has been asked to set up `@sebastianandreasson/pi-autonomous-agents` in the current repository.
|
|
4
|
+
|
|
5
|
+
## Goal
|
|
6
|
+
|
|
7
|
+
Install and configure the package so this repo can run:
|
|
8
|
+
|
|
9
|
+
- `pi-harness once`
|
|
10
|
+
- `pi-harness run`
|
|
11
|
+
- `pi-harness report`
|
|
12
|
+
- `pi-harness visual-once`
|
|
13
|
+
|
|
14
|
+
Do not stop after installing the dependency. Complete the repo-local setup as well.
|
|
15
|
+
|
|
16
|
+
## Required End State
|
|
17
|
+
|
|
18
|
+
When you are done, the consuming repo should contain:
|
|
19
|
+
|
|
20
|
+
- `pi.config.json`
|
|
21
|
+
- `TODOS.md` if the repo does not already have one
|
|
22
|
+
- `pi/DEVELOPER.md`
|
|
23
|
+
- `pi/TESTER.md`
|
|
24
|
+
- package scripts for the harness commands
|
|
25
|
+
- `.gitignore` entries for harness runtime/output files
|
|
26
|
+
|
|
27
|
+
If the repo already has equivalent files, update them instead of creating duplicates.
|
|
28
|
+
|
|
29
|
+
## Setup Steps
|
|
30
|
+
|
|
31
|
+
1. Install the package.
|
|
32
|
+
|
|
33
|
+
Preferred command:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
npm install -D @sebastianandreasson/pi-autonomous-agents
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
If the repo uses another package manager already, use the repo-native equivalent instead.
|
|
40
|
+
|
|
41
|
+
2. Create `pi.config.json`.
|
|
42
|
+
|
|
43
|
+
- Start from `node_modules/@sebastianandreasson/pi-autonomous-agents/templates/pi.config.example.json`.
|
|
44
|
+
- Copy it into the repo root as `pi.config.json`.
|
|
45
|
+
- Update it for this repo:
|
|
46
|
+
- `taskFile`: usually `TODOS.md`
|
|
47
|
+
- `developerInstructionsFile`: `pi/DEVELOPER.md`
|
|
48
|
+
- `testerInstructionsFile`: `pi/TESTER.md`
|
|
49
|
+
- `testCommand`: a fast bounded verification command for this repo
|
|
50
|
+
- `visualCaptureCommand`: only if this repo has a real screenshot capture flow
|
|
51
|
+
- `models` / `piModel` / `visualReviewModel` / `roleModels`: configure the models actually available in this environment
|
|
52
|
+
|
|
53
|
+
3. Create role instruction files.
|
|
54
|
+
|
|
55
|
+
- Copy `node_modules/@sebastianandreasson/pi-autonomous-agents/templates/DEVELOPER.md` to `pi/DEVELOPER.md`.
|
|
56
|
+
- Copy `node_modules/@sebastianandreasson/pi-autonomous-agents/templates/TESTER.md` to `pi/TESTER.md`.
|
|
57
|
+
- Customize both files for the repo:
|
|
58
|
+
- name the actual product/app
|
|
59
|
+
- describe the real verification expectations
|
|
60
|
+
- mention project-specific constraints, startup flow, or directories
|
|
61
|
+
- keep the harness workflow intact
|
|
62
|
+
|
|
63
|
+
4. Ensure `TODOS.md` exists.
|
|
64
|
+
|
|
65
|
+
- If the repo already uses a task file, keep it.
|
|
66
|
+
- Otherwise create a minimal `TODOS.md` with at least one phase heading and one unchecked actionable checkbox.
|
|
67
|
+
|
|
68
|
+
Minimal example:
|
|
69
|
+
|
|
70
|
+
```md
|
|
71
|
+
## Phase 1
|
|
72
|
+
|
|
73
|
+
- [ ] Define the first real task for this repo
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
5. Add package scripts.
|
|
77
|
+
|
|
78
|
+
Add these scripts to the consuming repo `package.json`, adapting only if necessary:
|
|
79
|
+
|
|
80
|
+
```json
|
|
81
|
+
{
|
|
82
|
+
"scripts": {
|
|
83
|
+
"pi:mock": "PI_CONFIG_FILE=pi.config.json PI_TRANSPORT=mock PI_TEST_CMD= pi-harness once",
|
|
84
|
+
"pi:once": "PI_CONFIG_FILE=pi.config.json pi-harness once",
|
|
85
|
+
"pi:run": "PI_CONFIG_FILE=pi.config.json pi-harness run",
|
|
86
|
+
"pi:report": "PI_CONFIG_FILE=pi.config.json pi-harness report",
|
|
87
|
+
"pi:visual:once": "PI_CONFIG_FILE=pi.config.json pi-harness visual-once"
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
If the repo already has scripts with those names, update them instead of duplicating.
|
|
93
|
+
|
|
94
|
+
6. Update `.gitignore`.
|
|
95
|
+
|
|
96
|
+
Add the entries from:
|
|
97
|
+
|
|
98
|
+
- `node_modules/@sebastianandreasson/pi-autonomous-agents/templates/gitignore.fragment`
|
|
99
|
+
|
|
100
|
+
Merge them into the repo `.gitignore` without duplicating existing lines.
|
|
101
|
+
|
|
102
|
+
7. Pick a safe default verification command.
|
|
103
|
+
|
|
104
|
+
Important:
|
|
105
|
+
|
|
106
|
+
- `testCommand` must be fast and bounded.
|
|
107
|
+
- Do not use a long end-to-end happy-path spec as the inner-loop default.
|
|
108
|
+
- Prefer smoke tests or a narrow targeted command.
|
|
109
|
+
|
|
110
|
+
If the repo does not yet have a good smoke command, set `testCommand` to an empty string and note that setup is incomplete.
|
|
111
|
+
|
|
112
|
+
8. Configure models conservatively.
|
|
113
|
+
|
|
114
|
+
Recommended pattern:
|
|
115
|
+
|
|
116
|
+
- local model for `developer`
|
|
117
|
+
- local model for `developerRetry`
|
|
118
|
+
- local model for `developerFix`
|
|
119
|
+
- local or slightly stronger model for `tester`
|
|
120
|
+
- stronger frontier model for `visualReview` only if available
|
|
121
|
+
|
|
122
|
+
Example shape:
|
|
123
|
+
|
|
124
|
+
```json
|
|
125
|
+
{
|
|
126
|
+
"piModel": "local/dev-model",
|
|
127
|
+
"visualReviewModel": "cloud/vision-model",
|
|
128
|
+
"roleModels": {
|
|
129
|
+
"developer": "local/dev-model",
|
|
130
|
+
"developerRetry": "local/dev-model",
|
|
131
|
+
"developerFix": "local/dev-model",
|
|
132
|
+
"tester": "local/tester-model",
|
|
133
|
+
"testerCommit": "local/tester-model",
|
|
134
|
+
"visualReview": "cloud/vision-model"
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
9. Validate the setup.
|
|
140
|
+
|
|
141
|
+
Run at least:
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
PI_CONFIG_FILE=pi.config.json pi-harness once
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
If the repo is not ready for a real run yet, at minimum run:
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
PI_CONFIG_FILE=pi.config.json PI_TRANSPORT=mock PI_TEST_CMD= pi-harness once
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
If setup validation fails, fix the config rather than leaving a half-configured repo.
|
|
154
|
+
|
|
155
|
+
## Agent Rules
|
|
156
|
+
|
|
157
|
+
- Reuse existing repo conventions where possible.
|
|
158
|
+
- Do not replace project-specific instructions with generic text if good instructions already exist.
|
|
159
|
+
- Do not invent fake test commands or model endpoints.
|
|
160
|
+
- Do not enable visual review unless the repo actually has a usable capture command and model config.
|
|
161
|
+
- Keep changes minimal and local to harness setup.
|
|
162
|
+
|
|
163
|
+
## What To Report Back
|
|
164
|
+
|
|
165
|
+
When setup is complete, report:
|
|
166
|
+
|
|
167
|
+
- which files were created or updated
|
|
168
|
+
- which verification command was configured
|
|
169
|
+
- whether visual review was enabled
|
|
170
|
+
- which roles were mapped to which models
|
|
171
|
+
- whether validation was run successfully
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
# PI Harness Supervisor
|
|
2
|
+
|
|
3
|
+
`pi-harness` provides a bounded unattended-work supervisor for TODO-driven local-agent loops.
|
|
4
|
+
|
|
5
|
+
The package is generic. It orchestrates the loop, but each consuming repo defines its own:
|
|
6
|
+
|
|
7
|
+
- `TODOS.md`
|
|
8
|
+
- project instructions
|
|
9
|
+
- verification command
|
|
10
|
+
- visual capture flow
|
|
11
|
+
- model/backend configuration
|
|
12
|
+
|
|
13
|
+
## Core Flow
|
|
14
|
+
|
|
15
|
+
Each real iteration follows this sequence:
|
|
16
|
+
|
|
17
|
+
1. `developer` implements one coherent task from `TODOS.md`.
|
|
18
|
+
2. A fast local verification command runs immediately after the developer round.
|
|
19
|
+
3. If verification passes, `tester` reviews the change independently from a skeptical user-facing perspective.
|
|
20
|
+
4. If tester or verification finds a real issue, the supervisor gives the findings back to `developer` for one focused repair pass.
|
|
21
|
+
5. If tester reaches `PASS`, tester provides a commit plan and the harness performs the actual git finalization.
|
|
22
|
+
6. Optionally, every `N` successful iterations, the harness runs a read-only visual review over screenshots and persists the feedback for later runs.
|
|
23
|
+
7. If that visual review returns `FAIL`, `BLOCKED`, or times out, the iteration is not counted as a success and the feedback is carried into later prompts.
|
|
24
|
+
|
|
25
|
+
## Package Contents
|
|
26
|
+
|
|
27
|
+
Main package files:
|
|
28
|
+
|
|
29
|
+
- `src/pi-supervisor.mjs`: controller
|
|
30
|
+
- `src/pi-client.mjs`: transport layer
|
|
31
|
+
- `src/pi-rpc-adapter.mjs`: built-in adapter from supervisor JSON to `pi --mode rpc`
|
|
32
|
+
- `src/pi-config.mjs`: config loader
|
|
33
|
+
- `src/pi-repo.mjs`: repo helpers, verification runner, git finalize step
|
|
34
|
+
- `src/pi-telemetry.mjs`: telemetry writer/reader
|
|
35
|
+
- `src/pi-prompts.mjs`: default prompt builders
|
|
36
|
+
- `src/pi-visual-review.mjs`: multimodal visual-review worker
|
|
37
|
+
- `src/pi-visual-once.mjs`: one-shot manual visual review runner
|
|
38
|
+
- `src/pi-report.mjs`: telemetry summary report
|
|
39
|
+
- `templates/DEVELOPER.md`: default developer-role instructions template
|
|
40
|
+
- `templates/TESTER.md`: default tester-role instructions template
|
|
41
|
+
|
|
42
|
+
## CLI
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pi-harness once
|
|
46
|
+
pi-harness run
|
|
47
|
+
pi-harness report
|
|
48
|
+
pi-harness visual-once
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
The package reads `PI_CONFIG_FILE` if provided. Otherwise it falls back to the bundled generic `pi.config.json`.
|
|
52
|
+
|
|
53
|
+
## Config Contract
|
|
54
|
+
|
|
55
|
+
Projects typically provide their own `pi.config.json` with fields such as:
|
|
56
|
+
|
|
57
|
+
- `taskFile`
|
|
58
|
+
- `developerInstructionsFile`
|
|
59
|
+
- `testerInstructionsFile`
|
|
60
|
+
- `roleModels`
|
|
61
|
+
- `testCommand`
|
|
62
|
+
- `continueAfterSeconds`
|
|
63
|
+
- `toolContinueAfterSeconds`
|
|
64
|
+
- `noEventTimeoutSeconds`
|
|
65
|
+
- `toolNoEventTimeoutSeconds`
|
|
66
|
+
- `visualCaptureCommand`
|
|
67
|
+
- `visualFeedbackFile`
|
|
68
|
+
- `testerFeedbackFile`
|
|
69
|
+
- `models`
|
|
70
|
+
- `piModel`
|
|
71
|
+
- `visualReviewModel`
|
|
72
|
+
|
|
73
|
+
Model entries may carry their own OpenAI-compatible endpoint settings, so the PI text loop and the multimodal visual reviewer can point at different backends without changing code.
|
|
74
|
+
|
|
75
|
+
`piModel` is the default text model. Projects can optionally override specific roles through `roleModels`, for example:
|
|
76
|
+
|
|
77
|
+
```json
|
|
78
|
+
{
|
|
79
|
+
"piModel": "local/dev-model",
|
|
80
|
+
"visualReviewModel": "cloud/vision-model",
|
|
81
|
+
"roleModels": {
|
|
82
|
+
"developer": "local/dev-model",
|
|
83
|
+
"developerRetry": "local/dev-model",
|
|
84
|
+
"developerFix": "local/dev-model",
|
|
85
|
+
"tester": "local/tester-model",
|
|
86
|
+
"testerCommit": "local/tester-model",
|
|
87
|
+
"visualReview": "cloud/vision-model"
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
This lets the main developer/tester loop stay on local models while a stronger frontier model is reserved for periodic review roles.
|
|
93
|
+
|
|
94
|
+
For unattended inner-loop work, `testCommand` should be a bounded smoke gate rather than a long real-time end-to-end happy-path run. Reserve full-flow Playwright journeys for explicit nightly or post-run lanes.
|
|
95
|
+
|
|
96
|
+
## Transport Contract
|
|
97
|
+
|
|
98
|
+
The supervisor supports:
|
|
99
|
+
|
|
100
|
+
- `PI_TRANSPORT=mock`
|
|
101
|
+
- `PI_TRANSPORT=adapter`
|
|
102
|
+
|
|
103
|
+
The built-in adapter command is typically:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
pi-harness adapter
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
When using `adapter`, set `PI_ADAPTER_COMMAND` to a command that:
|
|
110
|
+
|
|
111
|
+
1. Reads one JSON request from `stdin`
|
|
112
|
+
2. Talks to PI RPC or your own PI wrapper
|
|
113
|
+
3. Writes one JSON response to `stdout`
|
|
114
|
+
4. Exits with code `0` on success
|
|
115
|
+
|
|
116
|
+
Request shape:
|
|
117
|
+
|
|
118
|
+
```json
|
|
119
|
+
{
|
|
120
|
+
"sessionId": "existing-or-empty",
|
|
121
|
+
"sessionFile": "/absolute/path/to/session.jsonl",
|
|
122
|
+
"prompt": "controller prompt",
|
|
123
|
+
"cwd": "/absolute/repo/path",
|
|
124
|
+
"taskFile": "/absolute/repo/path/TODOS.md",
|
|
125
|
+
"instructionsFile": "/absolute/repo/path/pi/DEVELOPER.md",
|
|
126
|
+
"runtimeDir": "/absolute/repo/path/.pi-runtime",
|
|
127
|
+
"piCli": "pi",
|
|
128
|
+
"model": "local/model-name",
|
|
129
|
+
"tools": "read,bash,edit,write,grep,find,ls",
|
|
130
|
+
"thinking": "",
|
|
131
|
+
"noExtensions": false,
|
|
132
|
+
"noSkills": false,
|
|
133
|
+
"noPromptTemplates": false,
|
|
134
|
+
"noThemes": true,
|
|
135
|
+
"metadata": {
|
|
136
|
+
"iteration": 1,
|
|
137
|
+
"retryCount": 0,
|
|
138
|
+
"reason": "main_workflow"
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Response shape:
|
|
144
|
+
|
|
145
|
+
```json
|
|
146
|
+
{
|
|
147
|
+
"sessionId": "stable-session-id",
|
|
148
|
+
"sessionFile": "/absolute/path/to/session.jsonl",
|
|
149
|
+
"status": "success",
|
|
150
|
+
"output": "agent output text",
|
|
151
|
+
"notes": "short controller note"
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
Allowed response `status` values:
|
|
156
|
+
|
|
157
|
+
- `success`
|
|
158
|
+
- `stalled`
|
|
159
|
+
- `timed_out`
|
|
160
|
+
- `failed`
|
|
161
|
+
- `canceled`
|
|
162
|
+
|
|
163
|
+
## Git Finalization
|
|
164
|
+
|
|
165
|
+
The harness is designed to keep commit history structured:
|
|
166
|
+
|
|
167
|
+
1. `developer` should leave a clean, reviewable diff and should not commit.
|
|
168
|
+
2. `tester` should review functionality and, on `PASS`, provide a commit plan:
|
|
169
|
+
- `COMMIT_MESSAGE: ...`
|
|
170
|
+
- `COMMIT_FILES:`
|
|
171
|
+
- `- path/to/file`
|
|
172
|
+
3. The harness stages only those requested files and performs the commit itself.
|
|
173
|
+
4. If the requested plan cannot be isolated safely, the iteration is blocked or failed instead of committing unrelated work.
|
|
174
|
+
|
|
175
|
+
## Persistent Handoffs
|
|
176
|
+
|
|
177
|
+
The harness persists two cross-iteration handoff files:
|
|
178
|
+
|
|
179
|
+
- visual review feedback:
|
|
180
|
+
- `pi-output/visual-review/FEEDBACK.md`
|
|
181
|
+
- tester feedback:
|
|
182
|
+
- `pi-output/tester-feedback/FEEDBACK.md`
|
|
183
|
+
|
|
184
|
+
These files are included in later developer/tester prompts, so new runs start with the latest review context.
|
|
185
|
+
|
|
186
|
+
Commit-plan follow-up passes still write history entries, but they do not replace the latest substantive tester feedback file. That keeps later developer turns grounded in the last real functional review rather than commit bookkeeping.
|
|
187
|
+
|
|
188
|
+
## Visual Capture Contract
|
|
189
|
+
|
|
190
|
+
The visual-review layer is intentionally generic. The harness does not know how to navigate a specific project.
|
|
191
|
+
|
|
192
|
+
Instead, when `PI_VISUAL_CAPTURE_CMD` is configured, it runs that command with:
|
|
193
|
+
|
|
194
|
+
- `PI_VISUAL_ITERATION`
|
|
195
|
+
- `PI_VISUAL_PHASE`
|
|
196
|
+
- `PI_VISUAL_CAPTURE_DIR`
|
|
197
|
+
- `PI_VISUAL_MANIFEST_FILE`
|
|
198
|
+
- `PI_VISUAL_CHANGED_FILES`
|
|
199
|
+
|
|
200
|
+
The capture command must write a JSON manifest at `PI_VISUAL_MANIFEST_FILE` with this shape:
|
|
201
|
+
|
|
202
|
+
```json
|
|
203
|
+
{
|
|
204
|
+
"screens": [
|
|
205
|
+
{
|
|
206
|
+
"id": "main_menu",
|
|
207
|
+
"label": "Main menu",
|
|
208
|
+
"path": "main-menu.png"
|
|
209
|
+
}
|
|
210
|
+
]
|
|
211
|
+
}
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
`path` is resolved relative to `PI_VISUAL_CAPTURE_DIR`. The harness validates that each referenced image exists before calling the multimodal visual reviewer.
|
|
215
|
+
|
|
216
|
+
## Loop Mitigation
|
|
217
|
+
|
|
218
|
+
The built-in adapter mitigates obvious local loops by watching PI RPC tool events:
|
|
219
|
+
|
|
220
|
+
- repeated identical tool calls are aborted
|
|
221
|
+
- repeated same-path churn is aborted
|
|
222
|
+
- a soft `continue` can be sent after inactivity
|
|
223
|
+
- a separate tool-aware watchdog can tolerate long-running `bash` or browser work without treating the turn as dead
|
|
224
|
+
- a hard no-event timeout aborts a wedged turn instead of hanging indefinitely
|
|
225
|
+
|
|
226
|
+
Important: terminal streaming does not reset the heartbeat by itself. The watchdog keys off PI RPC events and active tool state, not raw shell output.
|
|
227
|
+
|
|
228
|
+
## Telemetry
|
|
229
|
+
|
|
230
|
+
Each step records:
|
|
231
|
+
|
|
232
|
+
- timestamp
|
|
233
|
+
- iteration
|
|
234
|
+
- phase
|
|
235
|
+
- kind
|
|
236
|
+
- status
|
|
237
|
+
- transport
|
|
238
|
+
- session id
|
|
239
|
+
- timeout flag
|
|
240
|
+
- exit code
|
|
241
|
+
- duration
|
|
242
|
+
- commit before and after
|
|
243
|
+
- changed file count
|
|
244
|
+
- verification status
|
|
245
|
+
- retry count
|
|
246
|
+
- notes
|
package/package.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@sebastianandreasson/pi-autonomous-agents",
|
|
3
|
+
"private": false,
|
|
4
|
+
"version": "0.1.0",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"description": "Portable unattended PI harness for developer/tester/visual-review loops.",
|
|
7
|
+
"license": "MIT",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": "./src/index.mjs",
|
|
10
|
+
"./cli": "./src/cli.mjs",
|
|
11
|
+
"./pi-config": "./src/pi-config.mjs",
|
|
12
|
+
"./pi-supervisor": "./src/pi-supervisor.mjs",
|
|
13
|
+
"./pi-visual-review": "./src/pi-visual-review.mjs"
|
|
14
|
+
},
|
|
15
|
+
"bin": {
|
|
16
|
+
"pi-harness": "./src/cli.mjs"
|
|
17
|
+
},
|
|
18
|
+
"scripts": {
|
|
19
|
+
"check": "node --check src/cli.mjs && node --check src/pi-client.mjs && node --check src/pi-config.mjs && node --check src/pi-flow.mjs && node --check src/pi-heartbeat.mjs && node --check src/pi-prompts.mjs && node --check src/pi-repo.mjs && node --check src/pi-report.mjs && node --check src/pi-rpc-adapter.mjs && node --check src/pi-supervisor.mjs && node --check src/pi-telemetry.mjs && node --check src/pi-visual-once.mjs && node --check src/pi-visual-review.mjs && node --check src/index.mjs && node --check test/pi-heartbeat.test.mjs && node --check test/pi-role-models.test.mjs && node --check test/pi-flow.test.mjs",
|
|
20
|
+
"test": "node --test test/pi-heartbeat.test.mjs test/pi-role-models.test.mjs test/pi-flow.test.mjs"
|
|
21
|
+
},
|
|
22
|
+
"files": [
|
|
23
|
+
"src",
|
|
24
|
+
"templates",
|
|
25
|
+
"docs",
|
|
26
|
+
"pi.config.json",
|
|
27
|
+
"SETUP.md",
|
|
28
|
+
"README.md"
|
|
29
|
+
],
|
|
30
|
+
"engines": {
|
|
31
|
+
"node": ">=20"
|
|
32
|
+
},
|
|
33
|
+
"repository": {
|
|
34
|
+
"type": "git",
|
|
35
|
+
"url": "git+https://github.com/sebastianandreasson/pi-autonomous-agents.git"
|
|
36
|
+
}
|
|
37
|
+
}
|
package/pi.config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"transport": "adapter",
|
|
3
|
+
"adapterCommand": "pi-harness adapter",
|
|
4
|
+
"instructionsFile": "",
|
|
5
|
+
"taskFile": "TODOS.md",
|
|
6
|
+
"streamTerminal": false,
|
|
7
|
+
"loopRepeatThreshold": 12,
|
|
8
|
+
"samePathRepeatThreshold": 8,
|
|
9
|
+
"continueAfterSeconds": 300,
|
|
10
|
+
"toolContinueAfterSeconds": 900,
|
|
11
|
+
"continueMessage": "continue",
|
|
12
|
+
"noEventTimeoutSeconds": 900,
|
|
13
|
+
"toolNoEventTimeoutSeconds": 1800,
|
|
14
|
+
"testCommand": "",
|
|
15
|
+
"visualFeedbackFile": "pi-output/visual-review/FEEDBACK.md",
|
|
16
|
+
"testerFeedbackFile": "pi-output/tester-feedback/FEEDBACK.md",
|
|
17
|
+
"testerFeedbackHistoryDir": "pi-output/tester-feedback/history",
|
|
18
|
+
"visualReviewHistoryDir": "pi-output/visual-review/history",
|
|
19
|
+
"visualCaptureDir": "pi-output/visual-capture",
|
|
20
|
+
"visualCaptureCommand": "",
|
|
21
|
+
"visualCaptureTimeoutSeconds": 300,
|
|
22
|
+
"visualReviewEnabled": false,
|
|
23
|
+
"visualReviewEveryNSuccesses": 5,
|
|
24
|
+
"visualReviewModel": "",
|
|
25
|
+
"visualReviewCommand": "pi-harness visual-review-worker",
|
|
26
|
+
"visualReviewMaxImages": 8,
|
|
27
|
+
"visualReviewTimeoutSeconds": 300
|
|
28
|
+
}
|
package/src/cli.mjs
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import path from 'node:path'
|
|
4
|
+
import { spawn } from 'node:child_process'
|
|
5
|
+
import process from 'node:process'
|
|
6
|
+
import { fileURLToPath } from 'node:url'
|
|
7
|
+
|
|
8
|
+
const scriptDir = path.dirname(fileURLToPath(import.meta.url))
|
|
9
|
+
|
|
10
|
+
const COMMANDS = new Map([
|
|
11
|
+
['once', 'pi-supervisor.mjs'],
|
|
12
|
+
['run', 'pi-supervisor.mjs'],
|
|
13
|
+
['report', 'pi-report.mjs'],
|
|
14
|
+
['visual-once', 'pi-visual-once.mjs'],
|
|
15
|
+
['adapter', 'pi-rpc-adapter.mjs'],
|
|
16
|
+
['visual-review-worker', 'pi-visual-review.mjs'],
|
|
17
|
+
])
|
|
18
|
+
|
|
19
|
+
function main() {
|
|
20
|
+
const subcommand = process.argv[2] || 'once'
|
|
21
|
+
const scriptName = COMMANDS.get(subcommand)
|
|
22
|
+
if (!scriptName) {
|
|
23
|
+
console.error(`Unknown pi-harness command "${subcommand}". Expected one of: ${[...COMMANDS.keys()].join(', ')}`)
|
|
24
|
+
process.exitCode = 1
|
|
25
|
+
return
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const childArgs = [path.join(scriptDir, scriptName)]
|
|
29
|
+
if (subcommand === 'once' || subcommand === 'run') {
|
|
30
|
+
childArgs.push(subcommand)
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const child = spawn(process.execPath, childArgs, {
|
|
34
|
+
cwd: process.cwd(),
|
|
35
|
+
env: process.env,
|
|
36
|
+
stdio: 'inherit',
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
child.on('exit', (code, signal) => {
|
|
40
|
+
if (signal) {
|
|
41
|
+
process.kill(process.pid, signal)
|
|
42
|
+
return
|
|
43
|
+
}
|
|
44
|
+
process.exitCode = code ?? 1
|
|
45
|
+
})
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
main()
|
package/src/index.mjs
ADDED