@wnlen/agent-execution-template 0.8.16 → 0.8.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -0
- package/README.zh-CN.md +24 -0
- package/bin/agent-execution-template.js +10 -1
- package/docs/SPEC.md +9 -4
- package/package.json +1 -1
- package/template/en/ai/README.md +1 -0
- package/template/en/ai/project/task.md +26 -3
- package/template/en/ai/template/VERSION +1 -1
- package/template/en/ai/template/execution-policy.md +109 -0
- package/template/en/ai/template/prompt.md +19 -12
- package/template/en/ai/template/protocol.md +9 -64
- package/template/en/ai/template/rules/core.md +10 -27
- package/template/zh/ai/README.md +1 -0
- package/template/zh/ai/project/task.md +20 -2
- package/template/zh/ai/template/VERSION +1 -1
- package/template/zh/ai/template/execution-policy.md +95 -0
- package/template/zh/ai/template/prompt.md +13 -8
- package/template/zh/ai/template/protocol.md +7 -50
- package/template/zh/ai/template/rules/core.md +9 -20
- package/test/selftest.js +41 -10
package/README.md
CHANGED
|
@@ -153,6 +153,31 @@ npx -y @wnlen/agent-execution-template strategy --lang en
|
|
|
153
153
|
| Upgradeable template | Reuse protocol improvements without losing local project memory. |
|
|
154
154
|
| Doctor checks | Validate required files and template version before running the agent. |
|
|
155
155
|
|
|
156
|
+
## How Automatic Continuous Execution Works
|
|
157
|
+
|
|
158
|
+
The user can still give a natural-language goal, for example:
|
|
159
|
+
|
|
160
|
+
```text
|
|
161
|
+
Build the settings page with profile editing, notification toggles, and export entrypoint
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Before execution, the AI decomposes L1 tasks:
|
|
165
|
+
|
|
166
|
+
```text
|
|
167
|
+
- [ ] L1-1 Profile editing Green
|
|
168
|
+
- [ ] L1-2 Notification toggles Green
|
|
169
|
+
- [ ] L1-3 Export entrypoint Yellow
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Because there are two or more L1 tasks, the protocol automatically uses bounded
|
|
173
|
+
continuous execution. Before each L1, the AI plans naturally derived L2/L3 work.
|
|
174
|
+
After completing an L1, it checks and strikes the item, then writes status back
|
|
175
|
+
to `execution_policy.task_tree` in `ai/project/task.md`.
|
|
176
|
+
|
|
177
|
+
Only Red risk stops for confirmation. Green continues automatically, and Yellow
|
|
178
|
+
continues after local low-risk correction. Every checkpoint must include
|
|
179
|
+
evidence: changed files, commands run, and verification results.
|
|
180
|
+
|
|
156
181
|
## Installed Layout
|
|
157
182
|
|
|
158
183
|
```text
|
|
@@ -162,6 +187,7 @@ ai/
|
|
|
162
187
|
template/
|
|
163
188
|
VERSION
|
|
164
189
|
bootstrap.md
|
|
190
|
+
execution-policy.md
|
|
165
191
|
prompt.md
|
|
166
192
|
reconcile.md
|
|
167
193
|
protocol.md
|
package/README.zh-CN.md
CHANGED
|
@@ -163,6 +163,29 @@ npx -y @wnlen/agent-execution-template strategy
|
|
|
163
163
|
| 可升级模板 | 协议可以持续改进,不丢失项目本地记忆。 |
|
|
164
164
|
| Doctor 检查 | 执行前检查必要文件和模板版本。 |
|
|
165
165
|
|
|
166
|
+
## 自动连续执行怎么工作
|
|
167
|
+
|
|
168
|
+
用户仍然只需要说自然语言目标,例如:
|
|
169
|
+
|
|
170
|
+
```text
|
|
171
|
+
实现设置页,包括资料编辑、通知开关和导出入口
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
AI 会在执行前先拆 L1 任务:
|
|
175
|
+
|
|
176
|
+
```text
|
|
177
|
+
- [ ] L1-1 资料编辑 Green
|
|
178
|
+
- [ ] L1-2 通知开关 Green
|
|
179
|
+
- [ ] L1-3 导出入口 Yellow
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
因为 L1 有两个以上,协议会自动使用边界内连续执行。执行每个 L1 前,AI 再规划
|
|
183
|
+
自然衍生的 L2/L3;完成一个 L1 后,在清单中打勾并划掉,同时把状态写回
|
|
184
|
+
`ai/project/task.md` 的 `execution_policy.task_tree`。
|
|
185
|
+
|
|
186
|
+
只有 Red 风险会停下来让你确认。Green 自动继续,Yellow 只做局部低风险修正后继续。
|
|
187
|
+
每个 Checkpoint 都必须带证据:改了哪些文件、跑了哪些命令、验证结果是什么。
|
|
188
|
+
|
|
166
189
|
## 安装后的结构
|
|
167
190
|
|
|
168
191
|
```text
|
|
@@ -172,6 +195,7 @@ ai/
|
|
|
172
195
|
template/
|
|
173
196
|
VERSION
|
|
174
197
|
bootstrap.md
|
|
198
|
+
execution-policy.md
|
|
175
199
|
prompt.md
|
|
176
200
|
reconcile.md
|
|
177
201
|
protocol.md
|
|
@@ -13,6 +13,7 @@ const REQUIRED_FILES = [
|
|
|
13
13
|
"ai/template/LANG",
|
|
14
14
|
"ai/template/VERSION",
|
|
15
15
|
"ai/template/bootstrap.md",
|
|
16
|
+
"ai/template/execution-policy.md",
|
|
16
17
|
"ai/template/prompt.md",
|
|
17
18
|
"ai/template/reconcile.md",
|
|
18
19
|
"ai/template/protocol.md",
|
|
@@ -48,7 +49,13 @@ const TASK_HEALTH_PATTERNS = [
|
|
|
48
49
|
/^type:\s*/m,
|
|
49
50
|
/^priority:\s*/m,
|
|
50
51
|
/^risk_level:\s*/m,
|
|
52
|
+
/^readiness:\s*/m,
|
|
51
53
|
/^execution_policy:/m,
|
|
54
|
+
/^\s+mode:\s*/m,
|
|
55
|
+
/^\s+activation_rule:\s*/m,
|
|
56
|
+
/^\s+task_tree:/m,
|
|
57
|
+
/^\s+risk_gate:/m,
|
|
58
|
+
/^\s+evidence_required:\s*/m,
|
|
52
59
|
/^model_policy:/m,
|
|
53
60
|
/^refs:/m,
|
|
54
61
|
/^permission:/m
|
|
@@ -146,6 +153,7 @@ const TEXT = {
|
|
|
146
153
|
nextTellAgent: "把这句话发给你的 AI coding 工具:",
|
|
147
154
|
nextRunCommand: "运行这个命令:",
|
|
148
155
|
nextReviewProposal: "已有方向修订提案。先审查提案;确认后对 AI 说:",
|
|
156
|
+
nextContinuePrompt: "继续推进这个项目。执行前先拆 L1 任务;若 L1 >= 2,自动启用边界内连续执行;只有 Red 风险停下来确认。",
|
|
149
157
|
repairHint: "缺失的 project 推荐文件可通过重新运行 init 安全补齐;已有 ai/project/** 不会被覆盖。",
|
|
150
158
|
permissionDenied: "无法写入目标路径",
|
|
151
159
|
permissionHint: `请检查 ai/** 的归属和权限。常见修复:
|
|
@@ -250,6 +258,7 @@ Usage:
|
|
|
250
258
|
nextTellAgent: "Send this to your AI coding tool:",
|
|
251
259
|
nextRunCommand: "Run this command:",
|
|
252
260
|
nextReviewProposal: "A direction amendment proposal exists. Review it first; after confirmation, tell the AI:",
|
|
261
|
+
nextContinuePrompt: "Continue this project. Before execution, decompose L1 tasks; if L1 >= 2, automatically use bounded continuous execution; only Red risk stops for confirmation.",
|
|
253
262
|
repairHint: "Missing recommended project files can be safely added by running init again; existing ai/project/** files are not overwritten.",
|
|
254
263
|
permissionDenied: "Cannot write target path",
|
|
255
264
|
permissionHint: `Check ownership and permissions under ai/**. Common fix:
|
|
@@ -583,7 +592,7 @@ function next({ lang = readInstalledLang() } = {}) {
|
|
|
583
592
|
}
|
|
584
593
|
|
|
585
594
|
console.log(`${text.nextTellAgent}
|
|
586
|
-
${
|
|
595
|
+
${text.nextContinuePrompt}
|
|
587
596
|
`);
|
|
588
597
|
}
|
|
589
598
|
|
package/docs/SPEC.md
CHANGED
|
@@ -22,7 +22,7 @@ npx 安装协议 -> AI 整理项目上下文 -> 人类确认 -> AI 生成任务
|
|
|
22
22
|
|
|
23
23
|
```text
|
|
24
24
|
Protocol: v0.8
|
|
25
|
-
Package: @wnlen/agent-execution-template@0.8.
|
|
25
|
+
Package: @wnlen/agent-execution-template@0.8.17
|
|
26
26
|
中文安装: npx -y @wnlen/agent-execution-template init
|
|
27
27
|
英文安装: npx -y @wnlen/agent-execution-template init --lang en
|
|
28
28
|
```
|
|
@@ -181,6 +181,7 @@ ai/
|
|
|
181
181
|
template/
|
|
182
182
|
VERSION
|
|
183
183
|
bootstrap.md
|
|
184
|
+
execution-policy.md
|
|
184
185
|
prompt.md
|
|
185
186
|
reconcile.md
|
|
186
187
|
protocol.md
|
|
@@ -243,6 +244,7 @@ project 是现场。
|
|
|
243
244
|
```text
|
|
244
245
|
ai/template/VERSION
|
|
245
246
|
ai/template/bootstrap.md
|
|
247
|
+
ai/template/execution-policy.md
|
|
246
248
|
ai/template/prompt.md
|
|
247
249
|
ai/template/reconcile.md
|
|
248
250
|
ai/template/protocol.md
|
|
@@ -390,12 +392,13 @@ npx -y @wnlen/agent-execution-template doctor
|
|
|
390
392
|
```text
|
|
391
393
|
Agent Execution Template 检查
|
|
392
394
|
|
|
393
|
-
模板版本: 0.8.
|
|
395
|
+
模板版本: 0.8.17
|
|
394
396
|
模板语言: zh
|
|
395
397
|
|
|
396
398
|
[通过] ai/template/LANG
|
|
397
399
|
[通过] ai/template/VERSION
|
|
398
400
|
[通过] ai/template/bootstrap.md
|
|
401
|
+
[通过] ai/template/execution-policy.md
|
|
399
402
|
[通过] ai/template/prompt.md
|
|
400
403
|
[通过] ai/template/reconcile.md
|
|
401
404
|
[通过] ai/template/protocol.md
|
|
@@ -648,10 +651,11 @@ apply_strategy_update
|
|
|
648
651
|
|
|
649
652
|
## 14. 执行授权策略
|
|
650
653
|
|
|
651
|
-
|
|
654
|
+
执行策略入口写在:
|
|
652
655
|
|
|
653
656
|
```text
|
|
654
657
|
ai/project/task.md.execution_policy
|
|
658
|
+
ai/template/execution-policy.md
|
|
655
659
|
```
|
|
656
660
|
|
|
657
661
|
默认模式是 `auto`。AI 每次执行前先做任务分解和风险判断,再决定使用
|
|
@@ -665,12 +669,13 @@ ai/project/task.md.execution_policy
|
|
|
665
669
|
- L1 为 2 个或更多时自动使用 `bounded_continuous`;
|
|
666
670
|
- 任一 L1 为 Red 时停止等待人类确认;Green 和 Yellow 不阻塞启动。
|
|
667
671
|
|
|
668
|
-
`bounded_continuous`
|
|
672
|
+
`bounded_continuous` 规则集中在 `ai/template/execution-policy.md`。核心要求:
|
|
669
673
|
|
|
670
674
|
- 按 L1 -> L2 -> L3 执行,执行 L1 前规划 L2,执行 L2 前按需规划 L3;
|
|
671
675
|
- 默认最多 3 层,只有当 L3 仍过大、不可验证或不可回退时才动态增加 L4;
|
|
672
676
|
- 每个任务节点必须有风险评级、预期改动范围、验收方式和证据要求;
|
|
673
677
|
- L1 清单必须用待办列表展示,每完成一个 L1 就打勾并划掉;
|
|
678
|
+
- 执行前和执行中必须把任务树写回 `ai/project/task.md.execution_policy.task_tree`;
|
|
674
679
|
- 默认按 `vertical_slice` 推进,每轮都要产生可检查增量;
|
|
675
680
|
- 每个 Checkpoint 必须包含证据:已改文件、已运行命令、验证结果或无法验证原因;
|
|
676
681
|
- Green 可自动继续;
|
package/package.json
CHANGED
package/template/en/ai/README.md
CHANGED
|
@@ -11,6 +11,7 @@ project is the field workspace
|
|
|
11
11
|
|
|
12
12
|
- `template/prompt.md`: AI startup prompt.
|
|
13
13
|
- `template/bootstrap.md`: project discovery and context bootstrap prompt.
|
|
14
|
+
- `template/execution-policy.md`: automatic continuous execution, task tree, risk rubric, and checkpoint rules.
|
|
14
15
|
- `template/reconcile.md`: merge new authoritative material into existing project context.
|
|
15
16
|
- `template/VERSION`: installed template version.
|
|
16
17
|
- `template/protocol.md`: bootstrap flow, execution flow, model division, sync rules.
|
|
@@ -3,6 +3,7 @@ task_id: ""
|
|
|
3
3
|
type: "bugfix | feature | refactor | docs | config | test | research | strategy_update | apply_strategy_update"
|
|
4
4
|
priority: "P0 | P1 | P2 | P3"
|
|
5
5
|
risk_level: "low | medium | high"
|
|
6
|
+
readiness: "draft_for_confirmation | ready_to_execute | blocked"
|
|
6
7
|
depends_on_previous_result: false
|
|
7
8
|
execution_policy:
|
|
8
9
|
mode: "auto | normal | bounded_continuous"
|
|
@@ -10,7 +11,17 @@ execution_policy:
|
|
|
10
11
|
max_depth: 3
|
|
11
12
|
allow_depth_4_when_needed: true
|
|
12
13
|
progress_unit: "vertical_slice"
|
|
13
|
-
task_tree:
|
|
14
|
+
task_tree:
|
|
15
|
+
- id: "L1-1"
|
|
16
|
+
title: ""
|
|
17
|
+
risk: "Green | Yellow | Red"
|
|
18
|
+
status: "pending | running | done | blocked"
|
|
19
|
+
scope:
|
|
20
|
+
allowed: []
|
|
21
|
+
denied: []
|
|
22
|
+
acceptance: []
|
|
23
|
+
evidence: []
|
|
24
|
+
children: []
|
|
14
25
|
checkpoint_budget:
|
|
15
26
|
l1: 0
|
|
16
27
|
l2: 0
|
|
@@ -70,8 +81,11 @@ This file is the current execution contract. Prefer generating it in Bootstrap
|
|
|
70
81
|
Mode from a short human goal plus repository context, then have a human review
|
|
71
82
|
it before execution.
|
|
72
83
|
|
|
73
|
-
Prefer safe assumptions over extra questions
|
|
74
|
-
permissions,
|
|
84
|
+
Prefer safe assumptions over extra questions. The AI should infer scope, risk,
|
|
85
|
+
permissions, and acceptance from the human goal, project context, and repository
|
|
86
|
+
facts. If inference would cross permission or safety boundaries, or acceptance
|
|
87
|
+
cannot be defined, set `readiness` to `blocked` or mark the relevant task node
|
|
88
|
+
`Red` and wait for human confirmation.
|
|
75
89
|
|
|
76
90
|
## Goal
|
|
77
91
|
|
|
@@ -123,6 +137,13 @@ fewer than 2 L1 tasks, use `normal`; if it finds 2 or more L1 tasks, use
|
|
|
123
137
|
- The AI infers goal, scope, acceptance, permissions, and risk from the human
|
|
124
138
|
goal, project context, and repository facts; the human does not need to
|
|
125
139
|
provide each field upfront.
|
|
140
|
+
- `readiness = ready_to_execute` means no Red preflight item exists and the task
|
|
141
|
+
may execute.
|
|
142
|
+
- `readiness = draft_for_confirmation` means human confirmation is required
|
|
143
|
+
before execution.
|
|
144
|
+
- `readiness = blocked` means the task cannot execute and must produce a
|
|
145
|
+
blocked result.
|
|
146
|
+
- Before execution, write the L1 checklist to `execution_policy.task_tree`.
|
|
126
147
|
- Before execution, list the L1 task checklist; mark each L1 complete with a
|
|
127
148
|
checked and struck-through item.
|
|
128
149
|
- Before executing an L1, plan the naturally derived L2 tasks; if an L2 still
|
|
@@ -140,6 +161,8 @@ fewer than 2 L1 tasks, use `normal`; if it finds 2 or more L1 tasks, use
|
|
|
140
161
|
review is about to start.
|
|
141
162
|
- Every checkpoint must include evidence: changed files, commands run,
|
|
142
163
|
verification results, or why verification was not possible.
|
|
164
|
+
- During execution, update `task_tree` node status: `pending`, `running`,
|
|
165
|
+
`done`, or `blocked`.
|
|
143
166
|
- After completion, run one final review; only re-check Yellow, Red, failed
|
|
144
167
|
verification, or high-impact modules.
|
|
145
168
|
- Continuous execution does not change model policy; escalate through
|
|
@@ -1 +1 @@
|
|
|
1
|
-
0.8.
|
|
1
|
+
0.8.17
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# Execution Policy
|
|
2
|
+
|
|
3
|
+
Do not summarize this file.
|
|
4
|
+
During task execution, use this file to choose `normal` or `bounded_continuous`.
|
|
5
|
+
|
|
6
|
+
## Default Policy
|
|
7
|
+
|
|
8
|
+
The default execution policy is `auto`: before each execution, the AI first
|
|
9
|
+
decomposes the task and judges risk, then chooses `normal` or
|
|
10
|
+
`bounded_continuous`. Continuous execution does not depend on a human keyword.
|
|
11
|
+
|
|
12
|
+
Pre-execution planning must:
|
|
13
|
+
|
|
14
|
+
- Infer goal, scope, acceptance, permissions, and verification method from the
|
|
15
|
+
human goal, project context, and repository facts.
|
|
16
|
+
- List the L1 task checklist and assign Green / Yellow / Red risk to each L1.
|
|
17
|
+
- Use `normal` if there are fewer than 2 L1 tasks.
|
|
18
|
+
- Automatically use `bounded_continuous` if there are 2 or more L1 tasks.
|
|
19
|
+
- Stop for human confirmation first if any L1 is Red; Green and Yellow do not
|
|
20
|
+
block startup.
|
|
21
|
+
- Write the task tree to `execution_policy.task_tree` in `ai/project/task.md`.
|
|
22
|
+
|
|
23
|
+
## Task Tree
|
|
24
|
+
|
|
25
|
+
Execute the task tree in L1 -> L2 -> L3 order.
|
|
26
|
+
|
|
27
|
+
- Before executing an L1, plan its naturally derived L2 tasks.
|
|
28
|
+
- Before executing an L2, plan L3 tasks if it still needs decomposition.
|
|
29
|
+
- Default to at most 3 levels. Add L4 dynamically only when L3 would otherwise
|
|
30
|
+
be too large, unverifiable, or hard to revert.
|
|
31
|
+
- Every L1/L2/L3/L4 node must have risk, expected edit scope, acceptance method,
|
|
32
|
+
and evidence requirements.
|
|
33
|
+
- Show the L1 checklist as task items; when an L1 is complete, check it off and
|
|
34
|
+
strike it through.
|
|
35
|
+
- During execution, update each `task_tree` node status: `pending`, `running`,
|
|
36
|
+
`done`, or `blocked`.
|
|
37
|
+
|
|
38
|
+
Recommended node shape:
|
|
39
|
+
|
|
40
|
+
```yaml
|
|
41
|
+
id: "L1-1"
|
|
42
|
+
title: ""
|
|
43
|
+
risk: "Green | Yellow | Red"
|
|
44
|
+
status: "pending | running | done | blocked"
|
|
45
|
+
scope:
|
|
46
|
+
allowed: []
|
|
47
|
+
denied: []
|
|
48
|
+
acceptance: []
|
|
49
|
+
evidence: []
|
|
50
|
+
children: []
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Risk Rubric
|
|
54
|
+
|
|
55
|
+
Green:
|
|
56
|
+
|
|
57
|
+
- Inside current task scope;
|
|
58
|
+
- no new permission, command, network access, or destructive action is needed;
|
|
59
|
+
- acceptance is clear;
|
|
60
|
+
- no product direction, core architecture, data structure, security boundary,
|
|
61
|
+
payment, account, or permission change is needed.
|
|
62
|
+
|
|
63
|
+
Yellow:
|
|
64
|
+
|
|
65
|
+
- Still inside current task scope;
|
|
66
|
+
- local uncertainty or local verification failure exists;
|
|
67
|
+
- a low-risk local correction can continue the work;
|
|
68
|
+
- no permission, scope, command, or acceptance expansion is needed.
|
|
69
|
+
|
|
70
|
+
Red:
|
|
71
|
+
|
|
72
|
+
- Permission expansion, unallowed command, network access, or destructive action
|
|
73
|
+
is needed;
|
|
74
|
+
- product direction, core architecture, data structure, security boundary,
|
|
75
|
+
payment, account, or permission would change;
|
|
76
|
+
- many files must be deleted, a core module must be rewritten, or multiple
|
|
77
|
+
high-cost options require judgment;
|
|
78
|
+
- acceptance cannot be defined, or task goal materially conflicts with project direction.
|
|
79
|
+
|
|
80
|
+
Only Red stops for human confirmation. Green continues automatically. Yellow
|
|
81
|
+
continues after local low-risk correction.
|
|
82
|
+
|
|
83
|
+
## Checkpoint
|
|
84
|
+
|
|
85
|
+
Emit checkpoints only when risk rises, a boundary is about to change, a vertical
|
|
86
|
+
slice is complete, or final review is about to start. Do not report just to
|
|
87
|
+
spend checkpoint budget.
|
|
88
|
+
|
|
89
|
+
Every checkpoint must include:
|
|
90
|
+
|
|
91
|
+
```text
|
|
92
|
+
## Checkpoint
|
|
93
|
+
### Task Tree
|
|
94
|
+
### Progress
|
|
95
|
+
### Completed
|
|
96
|
+
### Evidence
|
|
97
|
+
### Drift Risk: Green / Yellow / Red
|
|
98
|
+
### Recommended Next Step
|
|
99
|
+
### Auto-Continue Decision
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Evidence must include changed files, commands run, verification results, or why
|
|
103
|
+
verification was not possible. A purely subjective Green is not valid.
|
|
104
|
+
|
|
105
|
+
## Model Policy
|
|
106
|
+
|
|
107
|
+
Continuous execution does not change `model_policy`. Still escalate through
|
|
108
|
+
`model_policy` for planning, architecture, failure review, or acceptance
|
|
109
|
+
disputes, and record the reason in `ai/project/metrics.json`.
|
|
@@ -9,6 +9,7 @@ First read:
|
|
|
9
9
|
|
|
10
10
|
1. `ai/template/protocol.md`
|
|
11
11
|
2. `ai/template/rules/core.md`
|
|
12
|
+
3. `ai/template/execution-policy.md`
|
|
12
13
|
|
|
13
14
|
Then choose the mode:
|
|
14
15
|
|
|
@@ -64,13 +65,17 @@ In Task Draft Mode:
|
|
|
64
65
|
risk from the user's current goal, project context, and repository facts; do
|
|
65
66
|
not require the human to provide each field upfront.
|
|
66
67
|
3. Draft `ai/project/task.md` and set `execution_policy.mode` to `auto`.
|
|
67
|
-
4. Before execution, list the L1 checklist
|
|
68
|
-
|
|
69
|
-
`bounded_continuous` if there are 2
|
|
70
|
-
|
|
68
|
+
4. Before execution, list the L1 checklist, mark each L1 Green / Yellow / Red,
|
|
69
|
+
and write it to `execution_policy.task_tree`. Use `normal` if there are
|
|
70
|
+
fewer than 2 L1 tasks; automatically use `bounded_continuous` if there are 2
|
|
71
|
+
or more L1 tasks.
|
|
72
|
+
5. If no Red preflight item exists, set `readiness` to `ready_to_execute`; if
|
|
73
|
+
human confirmation is needed, set it to `draft_for_confirmation`; if the task
|
|
74
|
+
cannot execute, set it to `blocked`.
|
|
75
|
+
6. Stop for human confirmation only when a Red preflight item appears. If the
|
|
71
76
|
human asked to execute or continue, and preflight contains only Green /
|
|
72
77
|
Yellow, proceed directly to Execution Mode.
|
|
73
|
-
|
|
78
|
+
7. Do not modify source or business files in Task Draft Mode.
|
|
74
79
|
|
|
75
80
|
End Task Draft Mode with:
|
|
76
81
|
|
|
@@ -121,13 +126,15 @@ In Execution Mode, read:
|
|
|
121
126
|
2. `ai/project/runtime.md`
|
|
122
127
|
3. `ai/project/task.md`
|
|
123
128
|
|
|
124
|
-
Then
|
|
125
|
-
/ Yellow / Red, and
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
129
|
+
Then follow `ai/template/execution-policy.md` for pre-execution planning: list
|
|
130
|
+
the L1 checklist, mark each L1 Green / Yellow / Red, and write it to
|
|
131
|
+
`execution_policy.task_tree`. Automatically choose `normal` or
|
|
132
|
+
`bounded_continuous` from the L1 count. Plan L2 before executing an L1, and
|
|
133
|
+
plan L3 as needed before executing an L2; default to at most 3 levels, with L4
|
|
134
|
+
allowed when needed. When an L1 is complete, check it off, strike it through,
|
|
135
|
+
and update the `task_tree` node status. Only Red stops for human confirmation;
|
|
136
|
+
Green continues automatically, and Yellow continues after local low-risk
|
|
137
|
+
correction. Write results to:
|
|
131
138
|
|
|
132
139
|
- `ai/project/result.json`
|
|
133
140
|
- `ai/project/result.md`
|
|
@@ -49,71 +49,16 @@ Project Bootstrap / Context Reconcile / Strategy Update -> Project Confirm -> Ta
|
|
|
49
49
|
|
|
50
50
|
## Execution Authorization Modes
|
|
51
51
|
|
|
52
|
-
|
|
53
|
-
decomposes the task and judges risk, then chooses `normal` or
|
|
54
|
-
`bounded_continuous`. Continuous execution does not depend on a human keyword.
|
|
55
|
-
|
|
56
|
-
Pre-execution planning must:
|
|
57
|
-
|
|
58
|
-
- Infer goal, scope, acceptance, permissions, and verification method from the
|
|
59
|
-
human goal, project context, and repository facts.
|
|
60
|
-
- List the L1 task checklist and assign Green / Yellow / Red risk to each L1.
|
|
61
|
-
- Use `normal` if there are fewer than 2 L1 tasks.
|
|
62
|
-
- Automatically use `bounded_continuous` if there are 2 or more L1 tasks.
|
|
63
|
-
- Stop for human confirmation first if any L1 is Red; Green and Yellow do not
|
|
64
|
-
block startup.
|
|
65
|
-
|
|
66
|
-
Bounded continuous execution rules:
|
|
67
|
-
|
|
68
|
-
- Execute the task tree in L1 -> L2 -> L3 order. Before executing an L1, plan
|
|
69
|
-
its naturally derived L2 tasks; before executing an L2, plan L3 tasks if it
|
|
70
|
-
still needs decomposition.
|
|
71
|
-
- Default to at most 3 levels. Add L4 dynamically only when L3 would otherwise
|
|
72
|
-
be too large, unverifiable, or hard to revert.
|
|
73
|
-
- Every L1/L2/L3/L4 node must have risk, expected edit scope, acceptance method,
|
|
74
|
-
and evidence requirements.
|
|
75
|
-
- Show the L1 checklist as task items; when an L1 is complete, check it off and
|
|
76
|
-
strike it through.
|
|
77
|
-
- Default to `vertical_slice` progress: each loop should produce a runnable,
|
|
78
|
-
reviewable, or reversible increment.
|
|
79
|
-
- The AI infers goal, scope, acceptance, and permissions, but must not cross
|
|
80
|
-
project rules, explicit human limits, `permission.modify.denied`, security
|
|
81
|
-
boundaries, or destructive-action limits.
|
|
82
|
-
- `Green` may continue automatically.
|
|
83
|
-
- `Yellow` may continue after local low-risk correction.
|
|
84
|
-
- `Red` must stop for human confirmation.
|
|
85
|
-
- If permission must expand, an unallowed command must run, network access is
|
|
86
|
-
needed, a destructive action is needed, or product direction / core
|
|
87
|
-
architecture would change, the current node must be Red.
|
|
88
|
-
- After all work is complete, run one final review; re-check only Yellow, Red,
|
|
89
|
-
failed verification, or high-impact modules.
|
|
90
|
-
- Every checkpoint must include evidence; a purely subjective Green is not valid.
|
|
91
|
-
- Continuous execution does not change model policy; still escalate through
|
|
92
|
-
`model_policy` for planning, architecture, failure review, or acceptance disputes.
|
|
93
|
-
|
|
94
|
-
Must stop when:
|
|
95
|
-
|
|
96
|
-
- The task would change product direction, core architecture, data structures,
|
|
97
|
-
security boundaries, payment, accounts, or permissions.
|
|
98
|
-
- The task would delete many files or rewrite a core module.
|
|
99
|
-
- The task outline, acceptance, or permission contains a material conflict.
|
|
100
|
-
- The current implementation affects multiple later modules and the task
|
|
101
|
-
contract does not cover that impact.
|
|
102
|
-
- Tests fail and cannot be fixed locally.
|
|
103
|
-
- There are two or more high-cost options that need human judgment.
|
|
104
|
-
|
|
105
|
-
Use this compact checkpoint format:
|
|
52
|
+
Before task execution, read `ai/template/execution-policy.md`.
|
|
106
53
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
### Auto-Continue Decision
|
|
116
|
-
```
|
|
54
|
+
The default execution policy is `auto`: the AI first decomposes L1 tasks and
|
|
55
|
+
judges Green / Yellow / Red risk, then chooses `normal` or `bounded_continuous`.
|
|
56
|
+
Use `normal` when there are fewer than 2 L1 tasks; automatically use
|
|
57
|
+
`bounded_continuous` when there are 2 or more L1 tasks. Only Red stops for
|
|
58
|
+
human confirmation.
|
|
59
|
+
|
|
60
|
+
Task tree, risk rubric, checkpoint evidence, and `task_tree` status update
|
|
61
|
+
rules are defined in `ai/template/execution-policy.md`.
|
|
117
62
|
|
|
118
63
|
## Bootstrap Mode
|
|
119
64
|
|
|
@@ -119,33 +119,16 @@ or dependency files unless the human explicitly authorizes it.
|
|
|
119
119
|
|
|
120
120
|
## Bounded Continuous Execution Gate
|
|
121
121
|
|
|
122
|
-
Before every execution, the AI must
|
|
123
|
-
of waiting for the human to
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
-
|
|
130
|
-
-
|
|
131
|
-
|
|
132
|
-
- Stop for human confirmation if any L1 is Red; Green and Yellow may continue.
|
|
133
|
-
|
|
134
|
-
When enabled:
|
|
135
|
-
|
|
136
|
-
- Execute in L1 -> L2 -> L3 order; plan L2 before executing an L1, and plan L3
|
|
137
|
-
as needed before executing an L2.
|
|
138
|
-
- Default to at most 3 levels; add L4 dynamically only when L3 is still too
|
|
139
|
-
large, unverifiable, or hard to revert.
|
|
140
|
-
- Show the L1 checklist as task items; when an L1 is complete, check it off and
|
|
141
|
-
strike it through.
|
|
142
|
-
- Every task node must have risk, expected edit scope, acceptance method, and
|
|
143
|
-
evidence requirements.
|
|
144
|
-
- The checkpoint budget is a maximum, not a required count.
|
|
145
|
-
- Every checkpoint must include evidence.
|
|
146
|
-
- `Green` may continue automatically.
|
|
147
|
-
- `Yellow` continues after local low-risk correction.
|
|
148
|
-
- `Red` must stop for human confirmation.
|
|
122
|
+
Before every execution, the AI must read `ai/template/execution-policy.md`,
|
|
123
|
+
decompose the task, and judge risk instead of waiting for the human to
|
|
124
|
+
explicitly say "enable continuous execution".
|
|
125
|
+
|
|
126
|
+
Hard gates:
|
|
127
|
+
|
|
128
|
+
- `execution_policy.task_tree` must record the L1 checklist and execution state.
|
|
129
|
+
- Every task node must have Green / Yellow / Red risk.
|
|
130
|
+
- Every checkpoint must include evidence; a purely subjective Green is not valid.
|
|
131
|
+
- Red must stop for human confirmation.
|
|
149
132
|
- Any product direction, core architecture, data structure, security, payment,
|
|
150
133
|
account, permission, large deletion, core rewrite, or high-cost option choice
|
|
151
134
|
must stop.
|
package/template/zh/ai/README.md
CHANGED
|
@@ -11,6 +11,7 @@ project 是现场工作区
|
|
|
11
11
|
|
|
12
12
|
- `template/prompt.md`:AI 启动提示。
|
|
13
13
|
- `template/bootstrap.md`:项目发现和上下文引导提示。
|
|
14
|
+
- `template/execution-policy.md`:自动连续执行、任务树、风险分级和 Checkpoint 规则。
|
|
14
15
|
- `template/reconcile.md`:把新的权威资料合并进现有项目上下文。
|
|
15
16
|
- `template/VERSION`:已安装模板版本。
|
|
16
17
|
- `template/protocol.md`:引导流程、执行流程、模型分工、同步规则。
|
|
@@ -3,6 +3,7 @@ task_id: ""
|
|
|
3
3
|
type: "bugfix | feature | refactor | docs | config | test | research | strategy_update | apply_strategy_update"
|
|
4
4
|
priority: "P0 | P1 | P2 | P3"
|
|
5
5
|
risk_level: "low | medium | high"
|
|
6
|
+
readiness: "draft_for_confirmation | ready_to_execute | blocked"
|
|
6
7
|
depends_on_previous_result: false
|
|
7
8
|
execution_policy:
|
|
8
9
|
mode: "auto | normal | bounded_continuous"
|
|
@@ -10,7 +11,17 @@ execution_policy:
|
|
|
10
11
|
max_depth: 3
|
|
11
12
|
allow_depth_4_when_needed: true
|
|
12
13
|
progress_unit: "vertical_slice"
|
|
13
|
-
task_tree:
|
|
14
|
+
task_tree:
|
|
15
|
+
- id: "L1-1"
|
|
16
|
+
title: ""
|
|
17
|
+
risk: "Green | Yellow | Red"
|
|
18
|
+
status: "pending | running | done | blocked"
|
|
19
|
+
scope:
|
|
20
|
+
allowed: []
|
|
21
|
+
denied: []
|
|
22
|
+
acceptance: []
|
|
23
|
+
evidence: []
|
|
24
|
+
children: []
|
|
14
25
|
checkpoint_budget:
|
|
15
26
|
l1: 0
|
|
16
27
|
l2: 0
|
|
@@ -69,7 +80,9 @@ permission:
|
|
|
69
80
|
这个文件是当前执行契约。优先在引导模式中,根据简短人类目标和仓库上下文生成,
|
|
70
81
|
然后由人类在执行前检查。
|
|
71
82
|
|
|
72
|
-
|
|
83
|
+
优先使用安全假设,少问额外问题。AI 应基于用户目标、项目上下文和仓库事实推断
|
|
84
|
+
范围、风险、权限和验收;如果推断会越过权限、安全边界或验收无法定义,将
|
|
85
|
+
`readiness` 标为 `blocked` 或将相关任务节点标为 `Red`,等待人类确认。
|
|
73
86
|
|
|
74
87
|
## 目标
|
|
75
88
|
|
|
@@ -116,6 +129,10 @@ permission:
|
|
|
116
129
|
|
|
117
130
|
- 目标、范围、验收、权限和风险评级由 AI 基于用户目标、项目上下文和仓库事实推断;
|
|
118
131
|
不要求用户预先逐项提供。
|
|
132
|
+
- `readiness = ready_to_execute` 表示没有 Red 预检项,可以执行。
|
|
133
|
+
- `readiness = draft_for_confirmation` 表示需要人类确认后才能执行。
|
|
134
|
+
- `readiness = blocked` 表示当前任务不可执行,必须写 blocked 结果。
|
|
135
|
+
- 执行前必须把 L1 任务清单写入 `execution_policy.task_tree`。
|
|
119
136
|
- 执行前必须列出 L1 任务清单;每个 L1 用待办列表表示,完成后打勾并划掉。
|
|
120
137
|
- 执行某个 L1 前,AI 先规划自然衍生出的 L2;如果 L2 仍需拆分,再规划 L3。
|
|
121
138
|
- 默认最多 3 层;只有当不拆 L4 会导致 L3 过大或不可验证时,才允许动态增加 L4。
|
|
@@ -125,6 +142,7 @@ permission:
|
|
|
125
142
|
- `checkpoint_budget` 是最多可用检查点预算,不是必须用完的次数;不要为了消耗预算而汇报。
|
|
126
143
|
- 只有在触发 `checkpoint_triggers`、风险升高或准备收尾时才输出 Checkpoint。
|
|
127
144
|
- 每个 Checkpoint 必须包含证据:已改文件、已运行命令、验证结果或无法验证的原因。
|
|
145
|
+
- 执行中必须更新 `task_tree` 节点状态:`pending`、`running`、`done` 或 `blocked`。
|
|
128
146
|
- 完成后只做一次总复盘;只对 Yellow、Red、失败验证或高影响模块做二次抽检。
|
|
129
147
|
- 连续执行不改变模型策略;涉及判断、架构、失败复盘或验收争议时仍按 `model_policy` 升级。
|
|
130
148
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
0.8.
|
|
1
|
+
0.8.17
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# 执行策略
|
|
2
|
+
|
|
3
|
+
不要总结这个文件。
|
|
4
|
+
任务执行时按本文件选择 `normal` 或 `bounded_continuous`。
|
|
5
|
+
|
|
6
|
+
## 默认策略
|
|
7
|
+
|
|
8
|
+
默认执行策略是 `auto`:AI 在每次执行前先做任务分解和风险判断,再决定使用
|
|
9
|
+
`normal` 还是 `bounded_continuous`。启用连续执行不依赖用户说出特定口令。
|
|
10
|
+
|
|
11
|
+
执行前规划必须:
|
|
12
|
+
|
|
13
|
+
- 根据用户目标、项目上下文和仓库事实,推断目标、范围、验收、权限和验证方式。
|
|
14
|
+
- 列出 L1 任务清单,并为每个 L1 生成 Green / Yellow / Red 风险评级。
|
|
15
|
+
- 如果 L1 少于 2 个,使用 `normal`。
|
|
16
|
+
- 如果 L1 为 2 个或更多,自动启用 `bounded_continuous`。
|
|
17
|
+
- 如果任一 L1 为 Red,先停止并让人类确认;Green 和 Yellow 不阻塞启动。
|
|
18
|
+
- 将任务树写入 `ai/project/task.md` 的 `execution_policy.task_tree`。
|
|
19
|
+
|
|
20
|
+
## 任务树
|
|
21
|
+
|
|
22
|
+
任务树按 L1 -> L2 -> L3 执行。
|
|
23
|
+
|
|
24
|
+
- 执行某个 L1 前,先规划它自然衍生出的 L2。
|
|
25
|
+
- 执行某个 L2 前,如果仍需拆分,再规划 L3。
|
|
26
|
+
- 默认最多 3 层。只有当 L3 仍过大、不可验证或不可回退时,才动态增加 L4。
|
|
27
|
+
- L1/L2/L3/L4 都必须有风险评级、预期改动范围、验收方式和证据要求。
|
|
28
|
+
- L1 清单必须用待办列表展示;每完成一个 L1,就打勾并划掉。
|
|
29
|
+
- 执行中必须更新 `task_tree` 节点状态:`pending`、`running`、`done` 或 `blocked`。
|
|
30
|
+
|
|
31
|
+
推荐节点结构:
|
|
32
|
+
|
|
33
|
+
```yaml
|
|
34
|
+
id: "L1-1"
|
|
35
|
+
title: ""
|
|
36
|
+
risk: "Green | Yellow | Red"
|
|
37
|
+
status: "pending | running | done | blocked"
|
|
38
|
+
scope:
|
|
39
|
+
allowed: []
|
|
40
|
+
denied: []
|
|
41
|
+
acceptance: []
|
|
42
|
+
evidence: []
|
|
43
|
+
children: []
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## 风险分级
|
|
47
|
+
|
|
48
|
+
Green:
|
|
49
|
+
|
|
50
|
+
- 在当前任务范围内;
|
|
51
|
+
- 不需要新增权限、命令、网络或破坏性操作;
|
|
52
|
+
- 验收方式明确;
|
|
53
|
+
- 不改变产品方向、核心架构、数据结构、安全边界、支付、账号或权限。
|
|
54
|
+
|
|
55
|
+
Yellow:
|
|
56
|
+
|
|
57
|
+
- 仍在当前任务范围内;
|
|
58
|
+
- 存在局部不确定或局部验证失败;
|
|
59
|
+
- 可以用低风险修正继续;
|
|
60
|
+
- 不需要扩大权限、范围、命令或验收。
|
|
61
|
+
|
|
62
|
+
Red:
|
|
63
|
+
|
|
64
|
+
- 需要扩大权限、运行未允许命令、访问网络或执行破坏性操作;
|
|
65
|
+
- 需要改变产品方向、核心架构、数据结构、安全边界、支付、账号或权限;
|
|
66
|
+
- 需要删除大量文件、重写核心模块或在多个高成本方案之间取舍;
|
|
67
|
+
- 验收不可定义,或任务目标和项目方向发生实质冲突。
|
|
68
|
+
|
|
69
|
+
只有 Red 停止等待人类确认。Green 自动继续。Yellow 做局部低风险修正后继续。
|
|
70
|
+
|
|
71
|
+
## Checkpoint
|
|
72
|
+
|
|
73
|
+
Checkpoint 只在风险升高、边界即将变化、完成垂直切片或准备收尾时输出。
|
|
74
|
+
不要为了消耗预算而汇报。
|
|
75
|
+
|
|
76
|
+
每个 Checkpoint 必须包含:
|
|
77
|
+
|
|
78
|
+
```text
|
|
79
|
+
## Checkpoint
|
|
80
|
+
### 任务树
|
|
81
|
+
### 当前完成度
|
|
82
|
+
### 已完成
|
|
83
|
+
### 证据
|
|
84
|
+
### 偏离风险:Green / Yellow / Red
|
|
85
|
+
### 下一步建议
|
|
86
|
+
### 是否自动继续
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
证据必须包含已改文件、已运行命令、验证结果,或无法验证的原因。
|
|
90
|
+
不接受只有主观判断的 Green。
|
|
91
|
+
|
|
92
|
+
## 模型策略
|
|
93
|
+
|
|
94
|
+
连续执行不改变 `model_policy`。遇到规划、架构、失败复盘或验收争议,
|
|
95
|
+
仍按 `model_policy` 升级,并在 `ai/project/metrics.json` 中记录原因。
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
1. `ai/template/protocol.md`
|
|
11
11
|
2. `ai/template/rules/core.md`
|
|
12
|
+
3. `ai/template/execution-policy.md`
|
|
12
13
|
|
|
13
14
|
然后选择模式:
|
|
14
15
|
|
|
@@ -51,11 +52,14 @@
|
|
|
51
52
|
2. 根据用户当前目标、项目上下文和仓库事实,推断目标、范围、验收、权限、
|
|
52
53
|
验证方式和初始风险;不要要求用户逐项提供。
|
|
53
54
|
3. 起草 `ai/project/task.md`,并将 `execution_policy.mode` 设为 `auto`。
|
|
54
|
-
4. 执行前列出 L1 任务清单并标注 Green / Yellow / Red
|
|
55
|
-
`normal`;L1 为 2
|
|
56
|
-
|
|
55
|
+
4. 执行前列出 L1 任务清单并标注 Green / Yellow / Red,同时写入
|
|
56
|
+
`execution_policy.task_tree`。L1 少于 2 个时使用 `normal`;L1 为 2 个或更多时
|
|
57
|
+
自动使用 `bounded_continuous`。
|
|
58
|
+
5. 如果没有 Red 预检项,将 `readiness` 设为 `ready_to_execute`;如果需要人类确认,
|
|
59
|
+
设为 `draft_for_confirmation`;如果不可执行,设为 `blocked`。
|
|
60
|
+
6. 只有出现 Red 预检项时才停止等待人类确认。若用户要求的是执行或继续,且预检
|
|
57
61
|
只有 Green / Yellow,可以直接进入执行模式。
|
|
58
|
-
|
|
62
|
+
7. 不要在任务草稿模式中修改源码或业务文件。
|
|
59
63
|
|
|
60
64
|
任务草稿模式必须以下面结构结束:
|
|
61
65
|
|
|
@@ -104,10 +108,11 @@
|
|
|
104
108
|
2. `ai/project/runtime.md`
|
|
105
109
|
3. `ai/project/task.md`
|
|
106
110
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
+
然后按 `ai/template/execution-policy.md` 做执行前规划:列出 L1 清单,给每个 L1
|
|
112
|
+
标注 Green / Yellow / Red,并写入 `execution_policy.task_tree`。根据 L1 数量自动选择
|
|
113
|
+
`normal` 或 `bounded_continuous`。执行 L1 前规划 L2,执行 L2 前按需规划 L3;
|
|
114
|
+
默认最多 3 层,必要时允许 L4。每完成一个 L1,在清单中打勾并划掉,并更新
|
|
115
|
+
`task_tree` 节点状态。只有 Red 停止等待人类确认;Green 自动继续,Yellow 做局部
|
|
111
116
|
低风险修正后继续。最后把结果写入:
|
|
112
117
|
|
|
113
118
|
- `ai/project/result.json`
|
|
@@ -43,57 +43,14 @@ ai/project/task.md = 当前执行契约
|
|
|
43
43
|
|
|
44
44
|
## 执行授权模式
|
|
45
45
|
|
|
46
|
-
|
|
47
|
-
`normal` 还是 `bounded_continuous`。启用连续执行不依赖用户说出特定口令。
|
|
48
|
-
|
|
49
|
-
执行前规划必须:
|
|
50
|
-
|
|
51
|
-
- 根据用户目标、项目上下文和仓库事实,推断目标、范围、验收、权限和验证方式。
|
|
52
|
-
- 列出 L1 任务清单,并为每个 L1 生成 Green / Yellow / Red 风险评级。
|
|
53
|
-
- 如果 L1 少于 2 个,使用 `normal`。
|
|
54
|
-
- 如果 L1 为 2 个或更多,自动启用 `bounded_continuous`。
|
|
55
|
-
- 如果任一 L1 为 Red,先停止并让人类确认;Green 和 Yellow 不阻塞启动。
|
|
56
|
-
|
|
57
|
-
边界内连续执行规则:
|
|
58
|
-
|
|
59
|
-
- 任务树按 L1 -> L2 -> L3 执行。执行某个 L1 前,先规划它自然衍生出的 L2;
|
|
60
|
-
执行某个 L2 前,如果仍需拆分,再规划 L3。
|
|
61
|
-
- 默认最多 3 层。只有当 L3 仍过大、不可验证或不可回退时,才动态增加 L4。
|
|
62
|
-
- L1/L2/L3/L4 都必须有风险评级、预期改动范围、验收方式和证据要求。
|
|
63
|
-
- L1 清单必须用待办列表展示;每完成一个 L1,就打勾并划掉。
|
|
64
|
-
- 默认按 `vertical_slice` 推进:每轮都产出可运行、可检查或可回退的增量。
|
|
65
|
-
- 目标、范围、验收和权限由 AI 推断,但不能越过项目规则、显式用户限制、
|
|
66
|
-
`permission.modify.denied`、安全边界或破坏性操作限制。
|
|
67
|
-
- `Green` 可以自动继续。
|
|
68
|
-
- `Yellow` 可以在局部低风险修正后继续。
|
|
69
|
-
- `Red` 必须停止等待人类确认。
|
|
70
|
-
- 如果需要扩大权限、运行未允许命令、访问网络、执行破坏性操作、改变产品方向或核心架构,
|
|
71
|
-
当前节点必须标为 Red。
|
|
72
|
-
- 全部完成后只做一次总复盘;只对 Yellow、Red、验证失败或高影响模块做二次抽检。
|
|
73
|
-
- 每个 Checkpoint 必须给出证据,不接受只有主观判断的 Green。
|
|
74
|
-
- 连续执行不改变模型策略;遇到规划、架构、失败复盘或验收争议,仍按 `model_policy` 升级。
|
|
75
|
-
|
|
76
|
-
必须停止的情况:
|
|
77
|
-
|
|
78
|
-
- 需要改变产品方向、核心架构、数据结构、安全边界、支付、账号或权限。
|
|
79
|
-
- 需要删除大量文件或重写核心模块。
|
|
80
|
-
- 发现任务大纲、验收或权限之间存在实质冲突。
|
|
81
|
-
- 当前实现会影响多个后续模块,且任务契约没有覆盖该影响。
|
|
82
|
-
- 测试失败且无法局部修复。
|
|
83
|
-
- 出现两个以上高成本方案,需要人类裁决。
|
|
84
|
-
|
|
85
|
-
检查点使用紧凑格式:
|
|
46
|
+
任务执行前必须读取 `ai/template/execution-policy.md`。
|
|
86
47
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
### 偏离风险:Green / Yellow / Red
|
|
94
|
-
### 下一步建议
|
|
95
|
-
### 是否自动继续
|
|
96
|
-
```
|
|
48
|
+
执行策略默认是 `auto`:AI 先拆 L1 任务并判断 Green / Yellow / Red,再决定使用
|
|
49
|
+
`normal` 或 `bounded_continuous`。L1 少于 2 个使用 `normal`;L1 为 2 个或更多
|
|
50
|
+
自动启用 `bounded_continuous`。只有 Red 停止等待人类确认。
|
|
51
|
+
|
|
52
|
+
任务树、风险分级、Checkpoint 证据和 `task_tree` 状态更新规则由
|
|
53
|
+
`ai/template/execution-policy.md` 定义。
|
|
97
54
|
|
|
98
55
|
## 引导模式
|
|
99
56
|
|
|
@@ -103,26 +103,15 @@
|
|
|
103
103
|
|
|
104
104
|
## 边界内连续执行门
|
|
105
105
|
|
|
106
|
-
每次执行前,AI
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
-
|
|
112
|
-
-
|
|
113
|
-
-
|
|
114
|
-
|
|
115
|
-
启用后:
|
|
116
|
-
|
|
117
|
-
- 按 L1 -> L2 -> L3 执行;执行某个 L1 前规划 L2,执行某个 L2 前按需规划 L3。
|
|
118
|
-
- 默认最多 3 层;只有当 L3 仍过大、不可验证或不可回退时,才动态增加 L4。
|
|
119
|
-
- L1 清单必须用待办列表展示;每完成一个 L1,就打勾并划掉。
|
|
120
|
-
- 每个任务节点必须有风险评级、预期改动范围、验收方式和证据要求。
|
|
121
|
-
- 检查点预算是上限,不是必须用完的次数。
|
|
122
|
-
- 每个 Checkpoint 必须包含证据。
|
|
123
|
-
- `Green` 可自动继续。
|
|
124
|
-
- `Yellow` 做局部低风险修正后继续。
|
|
125
|
-
- `Red` 必须停止等待人类确认。
|
|
106
|
+
每次执行前,AI 必须读取 `ai/template/execution-policy.md`,先做任务分解和风险判断,
|
|
107
|
+
而不是等待用户显式说“启用连续执行”。
|
|
108
|
+
|
|
109
|
+
硬门禁:
|
|
110
|
+
|
|
111
|
+
- `execution_policy.task_tree` 必须记录 L1 清单和执行状态。
|
|
112
|
+
- 每个任务节点必须有 Green / Yellow / Red 风险评级。
|
|
113
|
+
- 每个 Checkpoint 必须包含证据;不接受只有主观判断的 Green。
|
|
114
|
+
- Red 必须停止等待人类确认。
|
|
126
115
|
- 任何方向、核心架构、数据结构、安全、支付、账号、权限、大量删除、
|
|
127
116
|
核心重写或高成本方案取舍,都必须停止。
|
|
128
117
|
- 需要扩大范围、权限、命令、网络或验收时,必须停止。
|
package/test/selftest.js
CHANGED
|
@@ -51,6 +51,7 @@ function testInitUpdateDoctor() {
|
|
|
51
51
|
assert(read(cwd, "ai/template/LANG") === "zh\n", "init should default to zh template");
|
|
52
52
|
assert(exists(cwd, "ai/template/VERSION"), "init should create template VERSION");
|
|
53
53
|
assert(exists(cwd, "ai/template/bootstrap.md"), "init should create template bootstrap prompt");
|
|
54
|
+
assert(exists(cwd, "ai/template/execution-policy.md"), "init should create execution policy prompt");
|
|
54
55
|
assert(exists(cwd, "ai/template/prompt.md"), "init should create template prompt");
|
|
55
56
|
assert(exists(cwd, "ai/template/reconcile.md"), "init should create template reconcile prompt");
|
|
56
57
|
assert(exists(cwd, "ai/project/inbox/.gitkeep"), "init should create inbox directory");
|
|
@@ -77,16 +78,21 @@ function testInitUpdateDoctor() {
|
|
|
77
78
|
assert(read(cwd, "ai/template/bootstrap.md").includes("未吸收资料"), "bootstrap handoff should audit unabsorbed material");
|
|
78
79
|
assert(read(cwd, "ai/template/bootstrap.md").includes("冲突处理"), "bootstrap handoff should audit conflict handling");
|
|
79
80
|
assert(read(cwd, "ai/template/prompt.md").includes("任务草稿交接"), "execution prompt should include task handoff");
|
|
81
|
+
assert(read(cwd, "ai/template/prompt.md").includes("ai/template/execution-policy.md"), "execution prompt should read execution policy");
|
|
82
|
+
assert(read(cwd, "ai/template/execution-policy.md").includes("风险分级"), "execution policy should include risk rubric");
|
|
83
|
+
assert(read(cwd, "ai/template/execution-policy.md").includes("execution_policy.task_tree"), "execution policy should require task tree persistence");
|
|
80
84
|
assert(read(cwd, "ai/template/prompt.md").includes("默认也只处理 `ai/project/inbox/*.md`"), "execution prompt should narrow inbox reconciliation");
|
|
81
|
-
assert(read(cwd, "ai/template/protocol.md").includes("
|
|
82
|
-
assert(read(cwd, "ai/template/
|
|
83
|
-
assert(read(cwd, "ai/template/
|
|
84
|
-
assert(read(cwd, "ai/template/
|
|
85
|
+
assert(read(cwd, "ai/template/protocol.md").includes("`bounded_continuous`"), "protocol should include bounded continuous execution");
|
|
86
|
+
assert(read(cwd, "ai/template/execution-policy.md").includes("垂直切片"), "protocol should require vertical-slice progress for continuous execution");
|
|
87
|
+
assert(read(cwd, "ai/template/execution-policy.md").includes("L1 为 2 个或更多,自动启用"), "protocol should auto-enable continuous execution from L1 count");
|
|
88
|
+
assert(read(cwd, "ai/template/execution-policy.md").includes("每个 Checkpoint 必须包含"), "protocol should require evidence-backed checkpoints");
|
|
85
89
|
assert(read(cwd, "ai/template/rules/core.md").includes("边界内连续执行门"), "core rules should include bounded continuous execution gate");
|
|
86
90
|
assert(read(cwd, "ai/template/rules/core.md").includes("需要扩大范围、权限、命令、网络或验收时"), "core rules should stop continuous execution before boundary expansion");
|
|
87
91
|
assert(read(cwd, "ai/project/task.md").includes("execution_policy:"), "task template should include execution policy");
|
|
92
|
+
assert(read(cwd, "ai/project/task.md").includes("readiness:"), "task template should include readiness state");
|
|
88
93
|
assert(read(cwd, "ai/project/task.md").includes("activation_rule: \"auto_enable_when_l1_count_gte_2\""), "task template should define automatic activation rule");
|
|
89
94
|
assert(read(cwd, "ai/project/task.md").includes("risk_gate:"), "task template should define risk gate");
|
|
95
|
+
assert(read(cwd, "ai/project/task.md").includes("status: \"pending | running | done | blocked\""), "task template should define task tree node status");
|
|
90
96
|
assert(read(cwd, "ai/project/task.md").includes("progress_unit: \"vertical_slice\""), "task template should define continuous progress unit");
|
|
91
97
|
assert(read(cwd, "ai/template/prompt.md").includes("开始初始化这个项目"), "execution prompt should route natural bootstrap entry");
|
|
92
98
|
assert(read(cwd, "ai/template/prompt.md").includes("开始初始化这个项目,并吸收 ai/project/inbox/ 里的资料"), "execution prompt should route bootstrap with inbox material");
|
|
@@ -142,6 +148,7 @@ function testEnglishInitUpdateDoctor() {
|
|
|
142
148
|
|
|
143
149
|
const initOutput = run(["init", "--lang", "en"], cwd);
|
|
144
150
|
assert(read(cwd, "ai/template/LANG") === "en\n", "init --lang en should install English template");
|
|
151
|
+
assert(exists(cwd, "ai/template/execution-policy.md"), "English init should create execution policy prompt");
|
|
145
152
|
assert(read(cwd, "ai/template/bootstrap.md").includes("Confirmation Dimensions"), "English init should install English bootstrap prompt");
|
|
146
153
|
assert(read(cwd, "ai/template/bootstrap.md").includes("Do not summarize this file"), "English bootstrap prompt should prevent summary-only behavior");
|
|
147
154
|
assert(read(cwd, "ai/template/bootstrap.md").includes("ai/project/refs/final-shape.md"), "English bootstrap prompt should initialize the North Star");
|
|
@@ -154,16 +161,21 @@ function testEnglishInitUpdateDoctor() {
|
|
|
154
161
|
assert(read(cwd, "ai/template/bootstrap.md").includes("Unabsorbed material"), "English bootstrap handoff should audit unabsorbed material");
|
|
155
162
|
assert(read(cwd, "ai/template/bootstrap.md").includes("Conflict handling"), "English bootstrap handoff should audit conflict handling");
|
|
156
163
|
assert(read(cwd, "ai/template/prompt.md").includes("Start initializing this project"), "English execution prompt should route natural bootstrap entry");
|
|
164
|
+
assert(read(cwd, "ai/template/prompt.md").includes("ai/template/execution-policy.md"), "English execution prompt should read execution policy");
|
|
165
|
+
assert(read(cwd, "ai/template/execution-policy.md").includes("Risk Rubric"), "English execution policy should include risk rubric");
|
|
166
|
+
assert(read(cwd, "ai/template/execution-policy.md").includes("execution_policy.task_tree"), "English execution policy should require task tree persistence");
|
|
157
167
|
assert(read(cwd, "ai/template/prompt.md").includes("default to only `ai/project/inbox/*.md`"), "English execution prompt should narrow inbox reconciliation");
|
|
158
168
|
assert(read(cwd, "ai/template/protocol.md").includes("`bounded_continuous`"), "English protocol should include bounded continuous execution");
|
|
159
|
-
assert(read(cwd, "ai/template/
|
|
160
|
-
assert(read(cwd, "ai/template/
|
|
161
|
-
assert(read(cwd, "ai/template/
|
|
169
|
+
assert(read(cwd, "ai/template/execution-policy.md").includes("vertical"), "English protocol should require vertical-slice progress for continuous execution");
|
|
170
|
+
assert(read(cwd, "ai/template/execution-policy.md").includes("Automatically use `bounded_continuous`"), "English protocol should auto-enable continuous execution from L1 count");
|
|
171
|
+
assert(read(cwd, "ai/template/execution-policy.md").includes("Every checkpoint must include"), "English protocol should require evidence-backed checkpoints");
|
|
162
172
|
assert(read(cwd, "ai/template/rules/core.md").includes("Bounded Continuous Execution Gate"), "English core rules should include bounded continuous execution gate");
|
|
163
173
|
assert(read(cwd, "ai/template/rules/core.md").includes("expand scope, permission, commands, network access, or acceptance"), "English core rules should stop continuous execution before boundary expansion");
|
|
164
174
|
assert(read(cwd, "ai/project/task.md").includes("execution_policy:"), "English task template should include execution policy");
|
|
175
|
+
assert(read(cwd, "ai/project/task.md").includes("readiness:"), "English task template should include readiness state");
|
|
165
176
|
assert(read(cwd, "ai/project/task.md").includes("activation_rule: \"auto_enable_when_l1_count_gte_2\""), "English task template should define automatic activation rule");
|
|
166
177
|
assert(read(cwd, "ai/project/task.md").includes("risk_gate:"), "English task template should define risk gate");
|
|
178
|
+
assert(read(cwd, "ai/project/task.md").includes("status: \"pending | running | done | blocked\""), "English task template should define task tree node status");
|
|
167
179
|
assert(read(cwd, "ai/project/task.md").includes("progress_unit: \"vertical_slice\""), "English task template should define continuous progress unit");
|
|
168
180
|
assert(read(cwd, "ai/template/prompt.md").includes("Start initializing this project and absorb the material in ai/project/inbox/"), "English execution prompt should route bootstrap with inbox material");
|
|
169
181
|
assert(read(cwd, "ai/template/prompt.md").includes("instead of bootstrapping again"), "English execution prompt should reconcile inbox material when project context already exists");
|
|
@@ -236,6 +248,25 @@ function testDoctorFailureAndWarning() {
|
|
|
236
248
|
write(taskWarnCwd, "ai/project/task.md", "# Task only\n");
|
|
237
249
|
const taskWarnOutput = run(["doctor"], taskWarnCwd);
|
|
238
250
|
assert(taskWarnOutput.includes("任务 front matter 缺少关键字段"), "doctor should warn incomplete task front matter");
|
|
251
|
+
|
|
252
|
+
const taskPolicyWarnCwd = createTempProject("agent-execution-template-task-policy");
|
|
253
|
+
run(["init"], taskPolicyWarnCwd);
|
|
254
|
+
write(taskPolicyWarnCwd, "ai/project/task.md", `---
|
|
255
|
+
task_id: ""
|
|
256
|
+
type: "feature"
|
|
257
|
+
priority: "P2"
|
|
258
|
+
risk_level: "low"
|
|
259
|
+
readiness: "ready_to_execute"
|
|
260
|
+
execution_policy:
|
|
261
|
+
mode: "auto"
|
|
262
|
+
model_policy: {}
|
|
263
|
+
refs: {}
|
|
264
|
+
permission: {}
|
|
265
|
+
---
|
|
266
|
+
# Task
|
|
267
|
+
`);
|
|
268
|
+
const taskPolicyWarnOutput = run(["doctor"], taskPolicyWarnCwd);
|
|
269
|
+
assert(taskPolicyWarnOutput.includes("任务 front matter 缺少关键字段"), "doctor should warn when execution policy fields are incomplete");
|
|
239
270
|
}
|
|
240
271
|
|
|
241
272
|
function testRefreshBacksUpAndImportsOldProject() {
|
|
@@ -271,14 +302,14 @@ function testNextCommandRoutesByProjectState() {
|
|
|
271
302
|
assert(run(["next"], cwd).includes("开始初始化这个项目"), "next should bootstrap a freshly installed project");
|
|
272
303
|
|
|
273
304
|
write(cwd, "ai/project/project.md", "USER PROJECT MARKER\n");
|
|
274
|
-
assert(run(["next"], cwd).includes("
|
|
305
|
+
assert(run(["next"], cwd).includes("执行前先拆 L1 任务"), "next should continue with automatic execution guidance when no intake is waiting");
|
|
275
306
|
|
|
276
307
|
write(cwd, "ai/project/inbox/product.md", "# Product material\n");
|
|
277
308
|
assert(run(["next"], cwd).includes("整合 ai/project/inbox/ 里的新资料"), "next should route material inbox to reconcile");
|
|
278
309
|
fs.unlinkSync(path.join(cwd, "ai/project/inbox/product.md"));
|
|
279
310
|
|
|
280
311
|
write(cwd, "ai/project/inbox/processed/product.md", "# Processed material\n");
|
|
281
|
-
assert(run(["next"], cwd).includes("
|
|
312
|
+
assert(run(["next"], cwd).includes("执行前先拆 L1 任务"), "next should ignore processed inbox material");
|
|
282
313
|
fs.unlinkSync(path.join(cwd, "ai/project/inbox/processed/product.md"));
|
|
283
314
|
|
|
284
315
|
write(cwd, "ai/project/inbox/ideas/new-direction.md", "# Direction idea\n");
|
|
@@ -286,7 +317,7 @@ function testNextCommandRoutesByProjectState() {
|
|
|
286
317
|
fs.unlinkSync(path.join(cwd, "ai/project/inbox/ideas/new-direction.md"));
|
|
287
318
|
|
|
288
319
|
write(cwd, "ai/project/proposals/final-shape-updates/proposal.md", "---\nstatus: \"applied\"\n---\n");
|
|
289
|
-
assert(run(["next"], cwd).includes("
|
|
320
|
+
assert(run(["next"], cwd).includes("执行前先拆 L1 任务"), "next should ignore already applied proposals");
|
|
290
321
|
|
|
291
322
|
write(cwd, "ai/project/proposals/final-shape-updates/proposal.md", "---\nstatus: \"proposed\"\n---\n");
|
|
292
323
|
assert(run(["next"], cwd).includes("已有方向修订提案"), "next should route existing proposals to human review");
|