@mediadatafusion/pi-workflow-suite 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +36 -0
- package/README.md +26 -17
- package/VERSION +1 -1
- package/agents/codebase-research.md +7 -5
- package/agents/general-worker.md +9 -7
- package/agents/implementation-planning.md +5 -3
- package/agents/quality-validation.md +9 -8
- package/agents/workflow-orchestrator.md +9 -7
- package/config/prompts/execute-approved-plan.md +12 -2
- package/config/prompts/mission-final-validation.md +38 -5
- package/config/prompts/mission-plan.md +17 -1
- package/config/prompts/mission-repair.md +16 -2
- package/config/prompts/mission-review-prompt.md +19 -6
- package/config/prompts/mission-run.md +18 -5
- package/config/prompts/validate-approved-plan.md +57 -3
- package/config/prompts/workflow-plan-prompt.md +11 -1
- package/config/prompts/workflow-repair.md +18 -2
- package/config/prompts/workflow-reviewer-prompt.md +25 -9
- package/config/prompts/workflow-summary.md +1 -4
- package/config/workflow-settings.example.json +13 -11
- package/docs/assets/mediadatafusion-logo.png +0 -0
- package/docs/assets/pi-workflow-suite-demo.gif +0 -0
- package/docs/assets/pi-workflow-suite-demo.mp4 +0 -0
- package/docs/assets/pi-workflow-suite-header.png +0 -0
- package/docs/assets/pi-workflow-suite-video-thumb.png +0 -0
- package/docs/assets/readme-link-commands.svg +10 -0
- package/docs/assets/readme-link-install.svg +10 -0
- package/docs/assets/readme-link-quick-start.svg +10 -0
- package/docs/assets/readme-link-settings.svg +10 -0
- package/docs/assets/screenshots/.gitkeep +1 -0
- package/docs/assets/screenshots/00-mission-home.png +0 -0
- package/docs/assets/screenshots/01-startup-Logo.png +0 -0
- package/docs/assets/screenshots/02-theme-settings.png +0 -0
- package/docs/assets/screenshots/03-GlobalSafetySettings.png +0 -0
- package/docs/assets/screenshots/04-SharedSubAgentsSettings.png +0 -0
- package/docs/assets/screenshots/05-mission-mode.png +0 -0
- package/docs/assets/screenshots/06-diagram-mermaid.png +0 -0
- package/extensions/subagent/index.ts +41 -18
- package/extensions/subagent/repolock-guard.ts +224 -4
- package/extensions/subagent/runner.ts +136 -12
- package/extensions/workflow-model-router.ts +124 -41
- package/extensions/workflow-modes.ts +3791 -967
- package/extensions/workflow-settings-capabilities.ts +10 -0
- package/extensions/workflow-state.ts +77 -10
- package/extensions/workflow-subagent-policy.ts +13 -1
- package/extensions/workflow-summary.ts +8 -19
- package/extensions/workflow-tool-guard.ts +326 -35
- package/extensions/workflow-validation-classifier.ts +46 -4
- package/extensions/workflow-web-tools.ts +361 -1
- package/package.json +9 -5
- package/scripts/audit-live.sh +1 -1
- package/scripts/build-package-export.mjs +8 -13
- package/scripts/check-clean-release-tree.sh +3 -2
- package/scripts/check-package-media.mjs +78 -0
- package/scripts/install-to-live.sh +2 -0
- package/scripts/package-media-config.mjs +28 -0
- package/scripts/prepare-package-readme.mjs +19 -18
- package/scripts/quarantine-live-junk.sh +1 -1
- package/scripts/verify-live.sh +9 -1
- package/skills/implementation-planning/SKILL.md +1 -1
- package/skills/safe-execution/SKILL.md +1 -1
- package/skills/validation-review/SKILL.md +1 -1
|
@@ -10,10 +10,23 @@ Milestone loop expectation:
|
|
|
10
10
|
1. Restate the current mission and milestone.
|
|
11
11
|
2. Confirm files/systems expected to be affected.
|
|
12
12
|
3. Use execution sub-agents aggressively for safe read-only file inspection, risk discovery, implementation strategy, and validation preparation; if policy is forced, do not edit until required workers have reported.
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
Sub-agent role: sub-agents are for analysis, inspection, and preparation only. You, the main executor, own all file writes, edits, and bash commands. Even when forced sub-agent policy is active, you must proceed with your own file writes, edits, and bash commands after sub-agent inspection completes. Do not delegate file creation to sub-agents.
|
|
14
|
+
|
|
15
|
+
## Available Sub-Agent Types
|
|
16
|
+
|
|
17
|
+
Use only these exact installed agent names when calling the subagent tool. Do not call `general-purpose`; it is not an installed agent. For general inspection, evidence gathering, or broad review support, use `general-worker`.
|
|
18
|
+
|
|
19
|
+
- `general-worker`
|
|
20
|
+
- `implementation-planning`
|
|
21
|
+
- `codebase-research`
|
|
22
|
+
- `quality-validation`
|
|
23
|
+
- `workflow-orchestrator`
|
|
24
|
+
|
|
25
|
+
4. Execute only the approved milestone steps. Do not create arbitrary repository-root files unless the mission plan or user request names that exact root path. Inspect project conventions and place new files in approved source, test, docs, config, script, or feature-local directories.
|
|
26
|
+
5. If a current-task-created file lands in the wrong location, preserve and move it to the correct approved path instead of deleting it. Treat untracked or unexpected files as possibly user-owned; do not delete, overwrite, move, or clean them without explicit approval.
|
|
27
|
+
6. Stop on unexpected risk, destructive action, secret/auth/session/log/runtime-state edit, deployment, push, or database mutation.
|
|
28
|
+
7. Produce a checkpoint-ready execution summary with acceptance criteria coverage, exact files changed, commands run with exit status, checks skipped with reason, remaining manual verification, and sub-agent evidence used.
|
|
29
|
+
8. Leave validation to the validator gate.
|
|
17
30
|
|
|
18
31
|
Safety rules:
|
|
19
32
|
- Never push code, deploy, mutate databases, edit secrets, or run destructive commands without explicit approval.
|
|
@@ -21,7 +34,7 @@ Safety rules:
|
|
|
21
34
|
- Prefer parallel read-only/sub-agent research over parallel file edits. Main executor owns final edits.
|
|
22
35
|
- Preserve mission state and checkpoint integrity.
|
|
23
36
|
|
|
24
|
-
Mermaid diagrams are rendered by Workflow Suite in a uniform dark-mode visual style.
|
|
37
|
+
Create diagrams inline: Mermaid diagrams are rendered by Workflow Suite in a uniform dark-mode visual style. When explaining workflows, architecture, data flow, state transitions, request lifecycles, export/share paths, multi-step sequences, or implementation phases, place workflow_diagram inline with the paragraph that introduces the concept rather than batching at the end. Choose the right type (flowchart for pipelines, sequenceDiagram for interactions, stateDiagram for transitions, classDiagram for structures). Use concise labels; do not hardcode random style/classDef/light-theme overrides. Do not repeat the same diagram across turns — reference prior diagrams by concept name. Skip only for trivial responses.
|
|
25
38
|
|
|
26
39
|
Output:
|
|
27
40
|
# Mission Milestone Execution Summary
|
|
@@ -7,19 +7,69 @@ description: Validate implementation against the approved workflow plan
|
|
|
7
7
|
|
|
8
8
|
You are in PI WORKFLOW VALIDATOR MODE.
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
Do not edit or write project source files. Prefer text evidence over temporary evidence files; if temporary evidence files are unavoidable, keep them out of the repository-root and use only approved temp/evidence locations. Compare implementation against the approved plan. Identify missing requirements, unexpected changes, unrelated refactors, risky choices, arbitrary root artifacts, misplaced files, unsafe cleanup-by-deletion, and obvious test/build concerns. You may run safe bash evidence commands such as git status, git diff, git log, package-script discovery, and existing typecheck/test/build commands when appropriate and safe. Do not run mutating, install, deploy, push, reset, clean, database, secret, or settings/state commands. You are the independent validator, not the executor; do not repair, move files, or accept executor claims without evidence.
|
|
11
|
+
|
|
12
|
+
Automatable evidence verification:
|
|
13
|
+
- Before marking Manual Verification Required: yes, verify that the missing evidence is genuinely non-automatable.
|
|
14
|
+
- If the plan required dev server, browser, localStorage, runtime, or endpoint checks that were not attempted by the executor, and those checks can be performed with safe read-only bash or parent runtime tools such as workflow_browser_check, mark Concrete Repairable Issue: yes and Evidence Gap: yes, then return FAIL rather than PARTIAL PASS.
|
|
15
|
+
- PARTIAL PASS with Manual Verification Required: yes is valid only for genuinely human-only checks (visual design approval, subjective UX, external service credentials you cannot access).
|
|
16
|
+
- "Browser QA not performed", "dev server not run", "localStorage not verified", or "automated runtime evidence missing" are NOT acceptable reasons for manual-only deferral.
|
|
11
17
|
|
|
12
18
|
Use validation sub-agents aggressively for independent checks, regression review, risk analysis, and build/test evidence review; prefer `quality-validation` when available. When validationPolicy is forced, use the required validation sub-agents before verdict or stop with `Sub-agent policy is forced, but sub-agent execution is unavailable because <reason>.` Do not fake sub-agent usage.
|
|
13
19
|
|
|
20
|
+
## Available Sub-Agent Types
|
|
21
|
+
|
|
22
|
+
Use only these exact installed agent names when calling the subagent tool. Do not call `general-purpose`; it is not an installed agent. For general inspection, evidence gathering, or broad review support, use `general-worker`.
|
|
23
|
+
|
|
24
|
+
- `general-worker`
|
|
25
|
+
- `implementation-planning`
|
|
26
|
+
- `codebase-research`
|
|
27
|
+
- `quality-validation`
|
|
28
|
+
- `workflow-orchestrator`
|
|
29
|
+
|
|
14
30
|
Verdict rules:
|
|
15
31
|
- PASS only when the approved plan is fully satisfied with no blocking unresolved risk.
|
|
16
32
|
- FAIL when concrete missing requirements, unexpected changes, regressions, broken checks, unsafe/out-of-scope work, or concrete code/content/citation/source/file/metadata/artifact fixes remain.
|
|
17
|
-
-
|
|
33
|
+
- FAIL when automatable runtime evidence (build, test, dev server, browser, localStorage, API response) was not gathered and the checks are performable with available tools, including parent runtime tools such as workflow_browser_check. Missing automatable evidence is a concrete repairable issue, not a manual-only caveat.
|
|
34
|
+
- PARTIAL PASS is only for genuinely human-only verification after all automatable evidence has been gathered. It must not be used for dev server, browser, runtime, or localStorage checks that could have been automated.
|
|
18
35
|
- Manual visual-verification caveats alone are not repairable failures; recommend manual QA/revalidation instead of repair.
|
|
19
36
|
- If concrete repairable issues remain in code, content, citations, sources, generated files, indexes, metadata, artifacts, or validation artifacts, mark Concrete Repairable Issue: yes, list them clearly under Missing Requirements or Recommended Next Action, and prefer FAIL over PARTIAL PASS.
|
|
20
37
|
- Evidence gaps are not repairable defects unless a concrete missing requirement or artifact is identified.
|
|
21
38
|
|
|
22
|
-
|
|
39
|
+
To verify web app runtime behavior:
|
|
40
|
+
- For projects with npm dev server: npm run dev -- --port 3017 &
|
|
41
|
+
- For static HTML/CSS/JS projects (no package.json scripts): python3 -m http.server 8017 &
|
|
42
|
+
- Wait for the server: sleep 2
|
|
43
|
+
- Query endpoints: curl -fsS http://localhost:PORT/path
|
|
44
|
+
- Verify HTML structure: curl -fsS http://localhost:PORT/ | grep -c "<required-element"
|
|
45
|
+
- Check the process: ps aux | grep "server"
|
|
46
|
+
- Stop the server when done: workflow_stop_server({ port: PORT })
|
|
47
|
+
- Discard unwanted output: >/dev/null 2>&1
|
|
48
|
+
Use single-line bash calls for each step from the current project cwd. Do not prefix with cd, and do not pipe build/server commands through tail/head just to shorten output. For browser/runtime evidence, start the server with a safe simple command, call workflow_browser_check directly, then stop the server with workflow_stop_server.
|
|
49
|
+
|
|
50
|
+
CRITICAL: You MUST exhaust all automatable checks before returning PARTIAL PASS.
|
|
51
|
+
DO NOT mark evidence as "could not verify" without actually trying to verify it.
|
|
52
|
+
Start a server, curl the endpoints, check file accessibility — THEN report what you
|
|
53
|
+
could and could not confirm. "No browser available" is not a reason to skip
|
|
54
|
+
server-side checks that ARE automatable.
|
|
55
|
+
|
|
56
|
+
Headless browser verification: use the workflow_browser_check tool with the dev server URL to verify console errors, page errors, DOM elements, and localStorage behavior. This tool uses Puppeteer from the Pi runtime and works regardless of the target project's dependencies.
|
|
57
|
+
|
|
58
|
+
Runtime/browser tool ownership:
|
|
59
|
+
- Parent validators own dev-server lifecycle checks, workflow_browser_check, workflow_stop_server, localStorage checks, screenshots, and the final workflow_validation_result handoff.
|
|
60
|
+
- Validation sub-agent workers may not have Workflow Suite runtime tools such as workflow_browser_check or workflow_stop_server. Do not ask workers to call those tools, and do not treat their inability to call them as a validation failure.
|
|
61
|
+
- Validation workers should inspect files, diffs, build/test evidence, routes, selectors, expected URLs, risks, and missing evidence, then return exact parent follow-up checks for the validator to run.
|
|
62
|
+
- After required worker evidence returns, parent validators must call workflow_browser_check directly for browser/runtime evidence when needed. Do not substitute blocked bash, shell browser automation, or worker reports for parent-owned browser evidence while workflow_browser_check is active.
|
|
63
|
+
- Run bash evidence from the current project cwd. Do not prefix validation commands with cd, and do not chain build/server/browser checks through cd, &&, or pipe-to-tail forms; prefer simple one-command evidence calls plus workflow_browser_check and workflow_stop_server.
|
|
64
|
+
- Workers must not start persistent dev servers or leave processes running. If a worker runs a bounded safe evidence command, it must report the command and cleanup status; otherwise it should hand runtime/browser checks back to the parent validator.
|
|
65
|
+
|
|
66
|
+
You MUST fill in EVERY structured output field, especially:
|
|
67
|
+
- Concrete Repairable Issue: yes/no (with reason)
|
|
68
|
+
- Evidence Gap: yes/no (with exact missing evidence)
|
|
69
|
+
- Manual Verification Required: yes/no (with exact manual check)
|
|
70
|
+
- Automated Evidence Completed: list everything verified automatically (not "none" or "n/a")
|
|
71
|
+
|
|
72
|
+
Create diagrams inline: Mermaid diagrams are rendered by Workflow Suite in a uniform dark-mode visual style. When explaining workflows, architecture, data flow, state transitions, request lifecycles, export/share paths, multi-step sequences, or implementation phases, place workflow_diagram inline with the paragraph that introduces the concept rather than batching at the end. Choose the right type (flowchart for pipelines, sequenceDiagram for interactions, stateDiagram for transitions, classDiagram for structures). Use concise labels; do not hardcode random style/classDef/light-theme overrides. Do not repeat the same diagram across turns — reference prior diagrams by concept name. Skip only for trivial responses.
|
|
23
73
|
|
|
24
74
|
Output:
|
|
25
75
|
# Validation Report
|
|
@@ -36,6 +86,10 @@ yes/no and short reason
|
|
|
36
86
|
yes/no and exact missing evidence
|
|
37
87
|
## Manual Verification Required
|
|
38
88
|
yes/no and exact manual check
|
|
89
|
+
## Automated Evidence Completed
|
|
90
|
+
What runtime/browser/build/test evidence was verified automatically.
|
|
91
|
+
## Truly Manual Evidence Remaining
|
|
92
|
+
Only genuinely non-automatable human-only checks, not checks that could have been automated.
|
|
39
93
|
## Missing Requirements
|
|
40
94
|
## Unexpected Changes
|
|
41
95
|
## Regression Risks
|
|
@@ -6,6 +6,16 @@ Task: $ARGUMENTS
|
|
|
6
6
|
|
|
7
7
|
Before choosing, perform lightweight task analysis: likely files/systems, project rules to read, runtime vs repo target, scope ambiguity, risk, validation needs, permission boundaries, and which read-only sub-agents should speed up and improve the plan. Do not expose chain-of-thought.
|
|
8
8
|
|
|
9
|
+
## Available Sub-Agent Types
|
|
10
|
+
|
|
11
|
+
Use only these exact installed agent names when calling the subagent tool. Do not call `general-purpose`; it is not an installed agent. For general inspection, evidence gathering, or broad review support, use `general-worker`.
|
|
12
|
+
|
|
13
|
+
- `general-worker`
|
|
14
|
+
- `implementation-planning`
|
|
15
|
+
- `codebase-research`
|
|
16
|
+
- `quality-validation`
|
|
17
|
+
- `workflow-orchestrator`
|
|
18
|
+
|
|
9
19
|
MANDATORY: Your VERY FIRST LINE must be exactly one of:
|
|
10
20
|
PLAN_DECISION: clarify
|
|
11
21
|
PLAN_DECISION: plan
|
|
@@ -90,4 +100,4 @@ Sub-agent planning policy:
|
|
|
90
100
|
- Parallel planning/review/validation/execution-prep agents are distinct from parallel file writes.
|
|
91
101
|
- Parallel editing is unsafe and must remain blocked unless conflict protection exists.
|
|
92
102
|
|
|
93
|
-
Mermaid diagrams are rendered by Workflow Suite in a uniform dark-mode visual style.
|
|
103
|
+
Create diagrams inline: Mermaid diagrams are rendered by Workflow Suite in a uniform dark-mode visual style. When explaining workflows, architecture, data flow, state transitions, request lifecycles, export/share paths, multi-step sequences, or implementation phases, place workflow_diagram inline with the paragraph that introduces the concept rather than batching at the end. Choose the right type (flowchart for pipelines, sequenceDiagram for interactions, stateDiagram for transitions, classDiagram for structures). Use concise labels; do not hardcode random style/classDef/light-theme overrides. Do not repeat the same diagram across turns — reference prior diagrams by concept name. Skip only for trivial responses.
|
|
@@ -4,6 +4,8 @@ MANDATORY STRUCTURED HANDOFF: call workflow_repair_result before final response
|
|
|
4
4
|
|
|
5
5
|
You are PI WORKFLOW REPAIR MODE.
|
|
6
6
|
|
|
7
|
+
Available tools in repair mode: edit, write, bash, workflow_diagram, workflow_progress, workflow_repair_result. The workflow_repair_result tool IS registered and active. If you cannot see it in your tool list, re-check — it is available. You MUST call it with your repair summary before finishing. Do not output a prose-only repair report; use the typed handoff tool.
|
|
8
|
+
|
|
7
9
|
Repair only concrete validator-identified failed validation items for the approved Plan Mode workflow. Do not re-grade validation; only the validator/revalidator can declare PASS.
|
|
8
10
|
|
|
9
11
|
Rules:
|
|
@@ -13,8 +15,22 @@ Rules:
|
|
|
13
15
|
- Do not commit, push, deploy, or mutate databases.
|
|
14
16
|
- Do not edit secrets, auth/session files, runtime logs/state, `.env`, `.factory`, or `.cursor` files.
|
|
15
17
|
- Stop and report if the repair requires destructive, out-of-scope, secret-adjacent, deployment, database, or otherwise risky action.
|
|
18
|
+
- Do not create arbitrary repository-root files. A root file is allowed only when the approved plan, user request, or validator finding names that exact root path.
|
|
19
|
+
- If a current-task-created file is in the wrong location but contains recoverable work, move or rename it to the correct approved location instead of deleting it.
|
|
20
|
+
- Treat untracked, unexpected, or ambiguous files as possibly user-owned; do not delete, overwrite, move, or clean them without explicit approval for that exact file.
|
|
16
21
|
- If the validation finding is only manual/visual/browser verification or says no code repair is needed, do not change code; summarize manual QA/revalidation readiness.
|
|
17
22
|
- Use repair sub-agents aggressively for failure triage, missing-file inspection, patch planning, and validation preparation when policy allows/requires them.
|
|
18
|
-
- After repair, summarize exactly what changed and whether revalidation is ready.
|
|
19
23
|
|
|
20
|
-
|
|
24
|
+
## Available Sub-Agent Types
|
|
25
|
+
|
|
26
|
+
Use only these exact installed agent names when calling the subagent tool. Do not call `general-purpose`; it is not an installed agent. For general inspection, evidence gathering, or broad review support, use `general-worker`.
|
|
27
|
+
|
|
28
|
+
- `general-worker`
|
|
29
|
+
- `implementation-planning`
|
|
30
|
+
- `codebase-research`
|
|
31
|
+
- `quality-validation`
|
|
32
|
+
- `workflow-orchestrator`
|
|
33
|
+
|
|
34
|
+
- After repair, summarize exactly what changed, what was moved/preserved/deleted, any root artifacts, any possibly user-owned files, and whether revalidation is ready.
|
|
35
|
+
|
|
36
|
+
Create diagrams inline: Mermaid diagrams are rendered by Workflow Suite in a uniform dark-mode visual style. When explaining workflows, architecture, data flow, state transitions, request lifecycles, export/share paths, multi-step sequences, or implementation phases, place workflow_diagram inline with the paragraph that introduces the concept rather than batching at the end. Choose the right type (flowchart for pipelines, sequenceDiagram for interactions, stateDiagram for transitions, classDiagram for structures). Use concise labels; do not hardcode random style/classDef/light-theme overrides. Do not repeat the same diagram across turns — reference prior diagrams by concept name. Skip only for trivial responses.
|
|
@@ -1,4 +1,14 @@
|
|
|
1
|
-
|
|
1
|
+
If review sub-agent policy is forced, dispatch required review sub-agents FIRST before your own review inspection — sub-agent findings must inform the review, not validate it afterward. Then call workflow_review_result as your FIRST tool call in this turn. Use read-only review tools to inspect the plan before the tool call, but do not output any analysis text, prose, or diagrams before workflow_review_result. After workflow_review_result returns its control-verdict tool result, STOP IMMEDIATELY. Do not call any more tools, do not call subagent again, do not create diagrams, and do not continue prose analysis. Workflow Suite owns the next handoff to execution or review retry.
|
|
2
|
+
|
|
3
|
+
## Available Sub-Agent Types
|
|
4
|
+
|
|
5
|
+
Use only these exact installed agent names when calling the subagent tool. Do not call `general-purpose`; it is not an installed agent. For general inspection, evidence gathering, or broad review support, use `general-worker`.
|
|
6
|
+
|
|
7
|
+
- `general-worker`
|
|
8
|
+
- `implementation-planning`
|
|
9
|
+
- `codebase-research`
|
|
10
|
+
- `quality-validation`
|
|
11
|
+
- `workflow-orchestrator`
|
|
2
12
|
|
|
3
13
|
---
|
|
4
14
|
description: Review the approved plan before execution
|
|
@@ -10,10 +20,16 @@ Use read-only tools only. Do not edit, write, or run bash. Review the approved p
|
|
|
10
20
|
|
|
11
21
|
Reviewer is not validation. Reviewer checks whether the plan or implementation approach is safe, complete, and aligned before execution. Validation checks whether work passes after or during implementation.
|
|
12
22
|
|
|
23
|
+
Plan Review is notes-first for control flow. Use NOTES for nearly all actionable advice, including severe executor-correctable findings. Use NEEDS REPAIR only when the Plan text is structurally unusable for execution, such as having no executable implementation steps.
|
|
24
|
+
|
|
25
|
+
Validation command additions, rollback wording fixes, selector/test-hook refinements, off-limits/out-of-scope lists, instruction text updates, implementation parameter suggestions, game-rule details, impossible browser/test move sequences, missing draw/test data sequences, dev-server readiness, AI/settings/accessibility details, localStorage keys, icon choices, and executor cautions are executor notes, not repair blockers.
|
|
26
|
+
|
|
13
27
|
Review checklist:
|
|
14
28
|
- Plan scope is clear, bounded, and aligned with the user's request.
|
|
15
29
|
- Implementation steps are ordered correctly with no circular dependencies.
|
|
16
|
-
- Required files
|
|
30
|
+
- Required files, allowed new file locations, and files to avoid are listed.
|
|
31
|
+
- Arbitrary repository-root files are not authorized unless the exact root path is approved.
|
|
32
|
+
- Unsafe cleanup-by-deletion and deletion of recoverable misplaced files are flagged before execution.
|
|
17
33
|
- Validation strategy covers all deliverables with concrete acceptance criteria.
|
|
18
34
|
- Risk assessment covers security, data loss, breaking changes, and deployment concerns.
|
|
19
35
|
- The plan does not authorize destructive, secret, auth/session/log/runtime-state, database, deployment, push, or out-of-scope work without explicit approval.
|
|
@@ -23,18 +39,18 @@ Output exactly:
|
|
|
23
39
|
# Reviewer Report
|
|
24
40
|
## Verdict
|
|
25
41
|
PASS — plan is complete, safe, properly scoped, and ready for execution.
|
|
26
|
-
NOTES — plan is
|
|
27
|
-
NEEDS REPAIR — plan
|
|
28
|
-
FAIL — plan has serious blockers
|
|
42
|
+
NOTES — plan is safe to execute with non-blocking observations for the executor.
|
|
43
|
+
NEEDS REPAIR — structurally unusable plan only: no executable steps or no approval-ready implementation plan to repair.
|
|
44
|
+
FAIL — plan has serious hard-stop blockers such as unauthorized protected work, wrong target, or unavailable dependencies.
|
|
29
45
|
BLOCKED — plan cannot proceed without external resolution.
|
|
30
46
|
|
|
31
47
|
Do not write APPROVED, APPROVE, OK, or PROCEED as the verdict label.
|
|
32
48
|
|
|
33
49
|
Verdict criteria:
|
|
34
|
-
- PASS
|
|
35
|
-
- NOTES when:
|
|
36
|
-
- NEEDS REPAIR when:
|
|
37
|
-
- FAIL when: safety/security violations,
|
|
50
|
+
- PASS when: all checklist items are satisfied and the plan is ready for execution.
|
|
51
|
+
- NOTES when: the plan is executable but has non-blocking advice, including selector refinements, validation/test improvements, rollback wording, out-of-scope/off-limits enumeration, instruction text updates, implementation parameter suggestions, test-hook suggestions, implementation sequencing notes, or optional executor cautions.
|
|
52
|
+
- NEEDS REPAIR when: the Plan text is structurally unusable for execution because no executable implementation steps or no approval-ready implementation plan exists. Do not use NEEDS REPAIR for severe wording, likely test failures, contradictory/impossible browser or test steps, missing draw/test data sequences, localStorage/readiness details, missing implementation details, omitted validation refinements, stale steps, partially missing desired work, wrong-target concerns, protected-work concerns, or implementation-contract details the executor can resolve from the Plan plus reviewer notes.
|
|
53
|
+
- FAIL when: safety/security violations, wrong target, protected work, unavailable dependencies, or work that exceeds approved scope without authorization create a hard stop.
|
|
38
54
|
- BLOCKED when: plan requires unavailable resources or external dependencies that cannot be resolved by repair.
|
|
39
55
|
## Reason
|
|
40
56
|
## Scope Risks
|
|
@@ -8,16 +8,13 @@ Summarize the current workflow.
|
|
|
8
8
|
Output:
|
|
9
9
|
# Workflow Summary
|
|
10
10
|
## Target Application Context
|
|
11
|
-
## Pi Workflow Suite Context
|
|
12
11
|
## Original Task
|
|
13
12
|
## Approved Plan
|
|
14
13
|
## Execution Summary
|
|
15
14
|
## Changed Files
|
|
16
15
|
## Validation Result
|
|
17
|
-
## Public Safety / Runtime Sync Status
|
|
18
16
|
## Remaining Risks
|
|
19
17
|
## Exact Resume Instructions
|
|
20
18
|
## Recommended Next Action
|
|
21
|
-
## Suggested Commit Message
|
|
22
19
|
|
|
23
|
-
|
|
20
|
+
Summarize the workflow outcome clearly. Include only user-relevant project context: target repo, branch, changed files, validation results, and actionable next steps.
|
|
@@ -161,7 +161,7 @@
|
|
|
161
161
|
"missions": {
|
|
162
162
|
"enabled": true,
|
|
163
163
|
"defaultAutonomy": "approval_gated",
|
|
164
|
-
"maxRuntimeHours":
|
|
164
|
+
"maxRuntimeHours": 13,
|
|
165
165
|
"checkpointIntervalMinutes": 30,
|
|
166
166
|
"requireApprovalForDestructiveActions": true,
|
|
167
167
|
"requireValidationPerMilestone": true,
|
|
@@ -170,14 +170,15 @@
|
|
|
170
170
|
"autoRunAfterApproval": true,
|
|
171
171
|
"offerReviewerBeforeApprove": false,
|
|
172
172
|
"autoRunReviewerBeforeApprove": false,
|
|
173
|
-
"autoRepairReviewFailures":
|
|
174
|
-
"reviewRetryMode": "
|
|
175
|
-
"maxReviewRetriesPerMission":
|
|
173
|
+
"autoRepairReviewFailures": true,
|
|
174
|
+
"reviewRetryMode": "safe_only",
|
|
175
|
+
"maxReviewRetriesPerMission": 2,
|
|
176
176
|
"continueAcrossMilestones": true,
|
|
177
177
|
"pauseBetweenMilestones": false,
|
|
178
178
|
"progressWidgetEnabled": true,
|
|
179
179
|
"progressOutputMode": "compact",
|
|
180
180
|
"showProgressBar": true,
|
|
181
|
+
"missionHistoryLimit": 50,
|
|
181
182
|
"heartbeatEnabled": true,
|
|
182
183
|
"watchdogEnabled": false,
|
|
183
184
|
"watchdogStaleMinutes": 30,
|
|
@@ -232,10 +233,11 @@
|
|
|
232
233
|
"useSubagentsBeforeClarification": true
|
|
233
234
|
},
|
|
234
235
|
"safety": {
|
|
235
|
-
"repoLockEnabled":
|
|
236
|
-
"disableBashInPlanMode":
|
|
236
|
+
"repoLockEnabled": true,
|
|
237
|
+
"disableBashInPlanMode": false,
|
|
237
238
|
"disableBashInValidatorMode": true,
|
|
238
|
-
"blockDestructiveCommands": true
|
|
239
|
+
"blockDestructiveCommands": true,
|
|
240
|
+
"allowPackageInstallInExecution": true
|
|
239
241
|
},
|
|
240
242
|
"ui": {
|
|
241
243
|
"showWorkflowStatus": true,
|
|
@@ -263,7 +265,8 @@
|
|
|
263
265
|
"startupVisualOnSessionStart": true,
|
|
264
266
|
"customBrandEnabled": false,
|
|
265
267
|
"customBrandText": "",
|
|
266
|
-
"customBrandBaseVisual": "mission_control"
|
|
268
|
+
"customBrandBaseVisual": "mission_control",
|
|
269
|
+
"debugPlanStepTracking": false
|
|
267
270
|
},
|
|
268
271
|
"shortcuts": {
|
|
269
272
|
"planMode": null
|
|
@@ -303,7 +306,8 @@
|
|
|
303
306
|
"requireParallelEditConflictProtection": true,
|
|
304
307
|
"planningOrchestrationPolicy": "orchestrator_first",
|
|
305
308
|
"subagentTimeoutMinutes": 20,
|
|
306
|
-
"subagentStaleMinutes": 8
|
|
309
|
+
"subagentStaleMinutes": 8,
|
|
310
|
+
"allowBackgroundSubagents": true
|
|
307
311
|
},
|
|
308
312
|
"planning": {
|
|
309
313
|
"clarificationMode": "auto",
|
|
@@ -321,8 +325,6 @@
|
|
|
321
325
|
"compactionModel": "",
|
|
322
326
|
"compactionAgent": "",
|
|
323
327
|
"customCompactionEnabled": false,
|
|
324
|
-
"autoCompactionEnabled": false,
|
|
325
|
-
"compactionTriggerPercent": 85,
|
|
326
328
|
"compactionCooldownMinutes": 5,
|
|
327
329
|
"customCompactionReserveTokens": 16384,
|
|
328
330
|
"customCompactionKeepRecentTokens": 20000,
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="144" height="42" viewBox="0 0 144 42" role="img" aria-label="Commands">
|
|
2
|
+
<defs>
|
|
3
|
+
<linearGradient id="g" x1="0" y1="0" x2="1" y2="1">
|
|
4
|
+
<stop offset="0" stop-color="#781d6b"/>
|
|
5
|
+
<stop offset="1" stop-color="#217598"/>
|
|
6
|
+
</linearGradient>
|
|
7
|
+
</defs>
|
|
8
|
+
<rect x="1" y="1" width="142" height="40" rx="20" fill="url(#g)" stroke="#1a3c57" stroke-width="2"/>
|
|
9
|
+
<text x="72.0" y="27" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="16" font-weight="700" fill="#ffffff">Commands</text>
|
|
10
|
+
</svg>
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="112" height="42" viewBox="0 0 112 42" role="img" aria-label="Install">
|
|
2
|
+
<defs>
|
|
3
|
+
<linearGradient id="g" x1="0" y1="0" x2="1" y2="1">
|
|
4
|
+
<stop offset="0" stop-color="#781d6b"/>
|
|
5
|
+
<stop offset="1" stop-color="#217598"/>
|
|
6
|
+
</linearGradient>
|
|
7
|
+
</defs>
|
|
8
|
+
<rect x="1" y="1" width="110" height="40" rx="20" fill="url(#g)" stroke="#1a3c57" stroke-width="2"/>
|
|
9
|
+
<text x="56.0" y="27" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="16" font-weight="700" fill="#ffffff">Install</text>
|
|
10
|
+
</svg>
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="156" height="42" viewBox="0 0 156 42" role="img" aria-label="Quick Start">
|
|
2
|
+
<defs>
|
|
3
|
+
<linearGradient id="g" x1="0" y1="0" x2="1" y2="1">
|
|
4
|
+
<stop offset="0" stop-color="#781d6b"/>
|
|
5
|
+
<stop offset="1" stop-color="#217598"/>
|
|
6
|
+
</linearGradient>
|
|
7
|
+
</defs>
|
|
8
|
+
<rect x="1" y="1" width="154" height="40" rx="20" fill="url(#g)" stroke="#1a3c57" stroke-width="2"/>
|
|
9
|
+
<text x="78.0" y="27" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="16" font-weight="700" fill="#ffffff">Quick Start</text>
|
|
10
|
+
</svg>
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="126" height="42" viewBox="0 0 126 42" role="img" aria-label="Settings">
|
|
2
|
+
<defs>
|
|
3
|
+
<linearGradient id="g" x1="0" y1="0" x2="1" y2="1">
|
|
4
|
+
<stop offset="0" stop-color="#781d6b"/>
|
|
5
|
+
<stop offset="1" stop-color="#217598"/>
|
|
6
|
+
</linearGradient>
|
|
7
|
+
</defs>
|
|
8
|
+
<rect x="1" y="1" width="124" height="40" rx="20" fill="url(#g)" stroke="#1a3c57" stroke-width="2"/>
|
|
9
|
+
<text x="63.0" y="27" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="16" font-weight="700" fill="#ffffff">Settings</text>
|
|
10
|
+
</svg>
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -26,6 +26,7 @@ import { StringEnum } from "@earendil-works/pi-ai";
|
|
|
26
26
|
import { type ExtensionAPI, getAgentDir, getMarkdownTheme, withFileMutationQueue } from "@earendil-works/pi-coding-agent";
|
|
27
27
|
import { Type } from "typebox";
|
|
28
28
|
import { loadWorkflowSettings } from "../workflow-model-router.js";
|
|
29
|
+
import { trackSubagentPid, untrackSubagentPid } from "./runner.js";
|
|
29
30
|
import { type AgentConfig, type AgentScope, type AgentSource, discoverAgents } from "./agents.js";
|
|
30
31
|
|
|
31
32
|
const requireFromExtension = createRequire(import.meta.url);
|
|
@@ -106,8 +107,8 @@ class SafeContainer {
|
|
|
106
107
|
}
|
|
107
108
|
}
|
|
108
109
|
|
|
109
|
-
const MAX_PARALLEL_TASKS =
|
|
110
|
-
const
|
|
110
|
+
const MAX_PARALLEL_TASKS = 16;
|
|
111
|
+
const DEFAULT_CONCURRENCY = 8;
|
|
111
112
|
const COLLAPSED_ITEM_COUNT = 10;
|
|
112
113
|
const REPOLOCK_GUARD_EXTENSION = path.join(path.dirname(new URL(import.meta.url).pathname), "repolock-guard.ts");
|
|
113
114
|
|
|
@@ -359,6 +360,7 @@ async function runSingleAgent(
|
|
|
359
360
|
agentName: string,
|
|
360
361
|
task: string,
|
|
361
362
|
cwd: string | undefined,
|
|
363
|
+
workflowPhase: string | undefined,
|
|
362
364
|
step: number | undefined,
|
|
363
365
|
signal: AbortSignal | undefined,
|
|
364
366
|
limits: { timeoutMinutes?: number; staleMinutes?: number } | undefined,
|
|
@@ -449,9 +451,11 @@ async function runSingleAgent(
|
|
|
449
451
|
...process.env,
|
|
450
452
|
PI_SUBAGENT_WORKER: "1",
|
|
451
453
|
PI_SUBAGENT_NAME: agent.name,
|
|
454
|
+
...(workflowPhase ? { PI_WORKFLOW_SUBAGENT_PHASE: workflowPhase } : {}),
|
|
452
455
|
...(lockRoot ? { PI_WORKFLOW_REPO_LOCK_ENABLED: "1", PI_WORKFLOW_REPO_LOCK_ROOT: lockRoot } : {}),
|
|
453
456
|
},
|
|
454
457
|
});
|
|
458
|
+
if (proc.pid) trackSubagentPid(proc.pid);
|
|
455
459
|
let buffer = "";
|
|
456
460
|
let lastOutputAt = Date.now();
|
|
457
461
|
let settled = false;
|
|
@@ -460,9 +464,9 @@ async function runSingleAgent(
|
|
|
460
464
|
timeoutReason = reason;
|
|
461
465
|
wasAborted = true;
|
|
462
466
|
currentResult.errorMessage = reason;
|
|
463
|
-
proc.kill("SIGTERM");
|
|
467
|
+
try { process.kill(-proc.pid!, "SIGTERM"); } catch { proc.kill("SIGTERM"); }
|
|
464
468
|
setTimeout(() => {
|
|
465
|
-
if (!proc.killed) proc.kill("SIGKILL");
|
|
469
|
+
if (!proc.killed) { try { process.kill(-proc.pid!, "SIGKILL"); } catch { proc.kill("SIGKILL"); } }
|
|
466
470
|
}, 5000);
|
|
467
471
|
};
|
|
468
472
|
const timeoutTimer = setTimeout(() => stopProcess(`Sub-agent timed out after ${Math.round(timeoutMs / 60000)} minute(s).`), timeoutMs);
|
|
@@ -520,11 +524,15 @@ async function runSingleAgent(
|
|
|
520
524
|
currentResult.stderr += data.toString();
|
|
521
525
|
});
|
|
522
526
|
|
|
523
|
-
proc.on("close", (code) => {
|
|
527
|
+
proc.on("close", (code) => { if (proc.pid) untrackSubagentPid(proc.pid);
|
|
524
528
|
settled = true;
|
|
525
529
|
clearTimeout(timeoutTimer);
|
|
526
530
|
clearInterval(staleTimer);
|
|
527
531
|
if (buffer.trim()) processLine(buffer);
|
|
532
|
+
// Kill process group to clean up background child processes
|
|
533
|
+
// (dev servers, static servers, tools — any program the sub-agent started).
|
|
534
|
+
// process.kill(-pid) signals the entire process group; works on all Unix.
|
|
535
|
+
try { if (proc.pid) process.kill(-proc.pid, "SIGTERM"); } catch { /* group empty */ }
|
|
528
536
|
resolve(code ?? 0);
|
|
529
537
|
});
|
|
530
538
|
|
|
@@ -598,7 +606,8 @@ const SubagentParams = Type.Object({
|
|
|
598
606
|
Type.Boolean({ description: "Prompt before running project-local agents. Default: true.", default: true }),
|
|
599
607
|
),
|
|
600
608
|
cwd: Type.Optional(Type.String({ description: "Working directory for the agent process (single mode)" })),
|
|
601
|
-
})
|
|
609
|
+
concurrency: Type.Optional(Type.Number({ description: "Max concurrent sub-agents for parallel mode. Default: 8.", minimum: 1, maximum: 16 })),
|
|
610
|
+
failFast: Type.Optional(Type.Boolean({ description: "Stop remaining tasks on first failure. Default: false.", default: false })),});
|
|
602
611
|
|
|
603
612
|
export default function (pi: ExtensionAPI) {
|
|
604
613
|
pi.registerTool({
|
|
@@ -713,10 +722,13 @@ export default function (pi: ExtensionAPI) {
|
|
|
713
722
|
if (params.chain && params.chain.length > 0) {
|
|
714
723
|
const results: SingleResult[] = [];
|
|
715
724
|
let previousOutput = "";
|
|
725
|
+
const chainOutputs: Record<string, string> = {};
|
|
716
726
|
|
|
717
727
|
for (let i = 0; i < params.chain.length; i++) {
|
|
718
728
|
const step = params.chain[i];
|
|
719
|
-
|
|
729
|
+
let taskWithContext = step.task.replace(/\{previous\}/g, previousOutput);
|
|
730
|
+
// Replace {outputs.name} with named outputs from prior steps
|
|
731
|
+
taskWithContext = taskWithContext.replace(/\{outputs\.([^}]+)\}/g, (_match, name: string) => chainOutputs[name.trim()] ?? `{outputs.${name}}`);
|
|
720
732
|
|
|
721
733
|
// Create update callback that includes all previous results
|
|
722
734
|
const chainUpdate: OnUpdateCallback | undefined = onUpdate
|
|
@@ -739,6 +751,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
739
751
|
step.agent,
|
|
740
752
|
taskWithContext,
|
|
741
753
|
step.cwd,
|
|
754
|
+
params.workflowPhase,
|
|
742
755
|
i + 1,
|
|
743
756
|
signal,
|
|
744
757
|
subagentLimits,
|
|
@@ -747,22 +760,29 @@ export default function (pi: ExtensionAPI) {
|
|
|
747
760
|
);
|
|
748
761
|
results.push(result);
|
|
749
762
|
|
|
763
|
+
// ── Chain mode resiliency (#2): continue on individual failure ──
|
|
750
764
|
const isError =
|
|
751
765
|
result.exitCode !== 0 || result.stopReason === "error" || result.stopReason === "aborted";
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
766
|
+
const stepOutput = isError
|
|
767
|
+
? result.errorMessage || result.stderr || getFinalOutput(result.messages) || "(step failed)"
|
|
768
|
+
: getFinalOutput(result.messages);
|
|
769
|
+
previousOutput = stepOutput;
|
|
770
|
+
// Store named output for downstream {outputs.name} references
|
|
771
|
+
const stepAs = (step as Record<string, unknown>).as;
|
|
772
|
+
if (typeof stepAs === "string" && stepAs.trim()) {
|
|
773
|
+
chainOutputs[stepAs.trim()] = stepOutput;
|
|
760
774
|
}
|
|
761
|
-
previousOutput = getFinalOutput(result.messages);
|
|
762
775
|
}
|
|
776
|
+
// Report all results — successes and failures
|
|
777
|
+
const failedSteps = results.filter((r) => r.exitCode !== 0 || r.stopReason === "error" || r.stopReason === "aborted");
|
|
778
|
+
const successCount = results.length - failedSteps.length;
|
|
779
|
+
const summaryText = successCount === results.length
|
|
780
|
+
? getFinalOutput(results[results.length - 1].messages) || "(no output)"
|
|
781
|
+
: `${successCount}/${results.length} steps succeeded. Failed: ${failedSteps.map((r, i) => `step ${i + 1} (${r.agent}): ${r.errorMessage || r.stderr || "(no output)"}`).join("; ")}`;
|
|
763
782
|
return {
|
|
764
|
-
content: [{ type: "text", text:
|
|
783
|
+
content: [{ type: "text", text: summaryText }],
|
|
765
784
|
details: makeDetails("chain")(results),
|
|
785
|
+
isError: failedSteps.length > 0 ? true : undefined,
|
|
766
786
|
};
|
|
767
787
|
}
|
|
768
788
|
|
|
@@ -807,13 +827,15 @@ export default function (pi: ExtensionAPI) {
|
|
|
807
827
|
}
|
|
808
828
|
};
|
|
809
829
|
|
|
810
|
-
const
|
|
830
|
+
const concurrency = typeof params.concurrency === "number" && params.concurrency >= 1 ? params.concurrency : DEFAULT_CONCURRENCY;
|
|
831
|
+
const results = await mapWithConcurrencyLimit(params.tasks, concurrency, async (t, index) => {
|
|
811
832
|
const result = await runSingleAgent(
|
|
812
833
|
ctx.cwd,
|
|
813
834
|
agents,
|
|
814
835
|
t.agent,
|
|
815
836
|
t.task,
|
|
816
837
|
t.cwd,
|
|
838
|
+
params.workflowPhase,
|
|
817
839
|
undefined,
|
|
818
840
|
signal,
|
|
819
841
|
subagentLimits,
|
|
@@ -855,6 +877,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
855
877
|
params.agent,
|
|
856
878
|
params.task,
|
|
857
879
|
params.cwd,
|
|
880
|
+
params.workflowPhase,
|
|
858
881
|
undefined,
|
|
859
882
|
signal,
|
|
860
883
|
subagentLimits,
|