workflow-supervisor 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +139 -0
- package/README.md +125 -28
- package/bin/workflow-skills.mjs +201 -1
- package/docs/artifacts.md +9 -0
- package/docs/cli.md +3 -1
- package/docs/portable-delegation.md +19 -1
- package/docs/skill-reference.md +12 -2
- package/docs/troubleshooting.md +34 -0
- package/package.json +8 -2
- package/schemas/dossier-v1.schema.json +38 -0
- package/schemas/worker-report-v1.schema.json +120 -12
- package/skills/acceptance-matrix/SKILL.md +114 -2
- package/skills/acceptance-matrix/agents/openai.yaml +1 -1
- package/skills/dossier-builder/SKILL.md +28 -0
- package/skills/loop-policy/SKILL.md +29 -6
- package/skills/work-unit/SKILL.md +46 -6
- package/skills/workflow-docs/SKILL.md +2 -1
- package/skills/workflow-docs/references/workflow-control.md +93 -6
- package/skills/workflow-supervisor/SKILL.md +195 -46
- package/skills/workflow-supervisor/agents/openai.yaml +2 -2
|
@@ -49,27 +49,19 @@
|
|
|
49
49
|
},
|
|
50
50
|
"evidence": {
|
|
51
51
|
"type": "array",
|
|
52
|
-
"items": {
|
|
53
|
-
"type": "string"
|
|
54
|
-
}
|
|
52
|
+
"items": { "$ref": "#/$defs/evidenceEntry" }
|
|
55
53
|
},
|
|
56
54
|
"checks_run": {
|
|
57
55
|
"type": "array",
|
|
58
|
-
"items": {
|
|
59
|
-
"type": "string"
|
|
60
|
-
}
|
|
56
|
+
"items": { "$ref": "#/$defs/evidenceEntry" }
|
|
61
57
|
},
|
|
62
58
|
"skipped_checks": {
|
|
63
59
|
"type": "array",
|
|
64
|
-
"items": {
|
|
65
|
-
"type": "string"
|
|
66
|
-
}
|
|
60
|
+
"items": { "$ref": "#/$defs/evidenceEntry" }
|
|
67
61
|
},
|
|
68
62
|
"findings": {
|
|
69
63
|
"type": "array",
|
|
70
|
-
"items": {
|
|
71
|
-
"type": "string"
|
|
72
|
-
}
|
|
64
|
+
"items": { "$ref": "#/$defs/evidenceEntry" }
|
|
73
65
|
},
|
|
74
66
|
"blocking_question": {
|
|
75
67
|
"type": ["string", "null"]
|
|
@@ -77,6 +69,13 @@
|
|
|
77
69
|
"next_action": {
|
|
78
70
|
"type": "string"
|
|
79
71
|
},
|
|
72
|
+
"verification_environment": {
|
|
73
|
+
"$ref": "#/$defs/verificationEnvironment"
|
|
74
|
+
},
|
|
75
|
+
"outcome_evaluations": {
|
|
76
|
+
"type": "array",
|
|
77
|
+
"items": { "$ref": "#/$defs/outcomeEvaluation" }
|
|
78
|
+
},
|
|
80
79
|
"adapter": {
|
|
81
80
|
"type": ["object", "null"],
|
|
82
81
|
"additionalProperties": false,
|
|
@@ -115,5 +114,114 @@
|
|
|
115
114
|
"reason": {
|
|
116
115
|
"type": ["string", "null"]
|
|
117
116
|
}
|
|
117
|
+
},
|
|
118
|
+
"$defs": {
|
|
119
|
+
"evidenceEntry": {
|
|
120
|
+
"anyOf": [
|
|
121
|
+
{ "type": "string" },
|
|
122
|
+
{
|
|
123
|
+
"type": "object",
|
|
124
|
+
"additionalProperties": true
|
|
125
|
+
}
|
|
126
|
+
]
|
|
127
|
+
},
|
|
128
|
+
"verificationCapability": {
|
|
129
|
+
"type": "string",
|
|
130
|
+
"enum": [
|
|
131
|
+
"static_diff_inspection",
|
|
132
|
+
"diff_inspection",
|
|
133
|
+
"shell_command",
|
|
134
|
+
"unit_test",
|
|
135
|
+
"integration_test",
|
|
136
|
+
"contract_test",
|
|
137
|
+
"data_contract_test",
|
|
138
|
+
"jsdom_render",
|
|
139
|
+
"api_probe",
|
|
140
|
+
"file_snapshot",
|
|
141
|
+
"generated_html_snapshot",
|
|
142
|
+
"component_tree_snapshot",
|
|
143
|
+
"accessibility_tree_snapshot",
|
|
144
|
+
"state_machine_test",
|
|
145
|
+
"browser_snapshot",
|
|
146
|
+
"human_required",
|
|
147
|
+
"manual_review"
|
|
148
|
+
]
|
|
149
|
+
},
|
|
150
|
+
"capabilityList": {
|
|
151
|
+
"type": "array",
|
|
152
|
+
"items": { "$ref": "#/$defs/verificationCapability" }
|
|
153
|
+
},
|
|
154
|
+
"verificationEnvironment": {
|
|
155
|
+
"type": "object",
|
|
156
|
+
"additionalProperties": false,
|
|
157
|
+
"properties": {
|
|
158
|
+
"shell": { "type": "boolean" },
|
|
159
|
+
"filesystem": { "type": "boolean" },
|
|
160
|
+
"git_diff": { "type": "boolean" },
|
|
161
|
+
"browser": { "type": "boolean" },
|
|
162
|
+
"playwright_mcp": { "type": "boolean" },
|
|
163
|
+
"network": { "type": "boolean" },
|
|
164
|
+
"capabilities": { "$ref": "#/$defs/capabilityList" },
|
|
165
|
+
"limitations": {
|
|
166
|
+
"type": "array",
|
|
167
|
+
"items": { "type": "string" }
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
},
|
|
171
|
+
"evidenceStrength": {
|
|
172
|
+
"type": "object",
|
|
173
|
+
"additionalProperties": false,
|
|
174
|
+
"required": ["strongest_possible", "strongest_available"],
|
|
175
|
+
"properties": {
|
|
176
|
+
"strongest_possible": { "$ref": "#/$defs/capabilityList" },
|
|
177
|
+
"strongest_available": { "$ref": "#/$defs/capabilityList" },
|
|
178
|
+
"limitation": { "type": "string" }
|
|
179
|
+
}
|
|
180
|
+
},
|
|
181
|
+
"outcomeEvaluation": {
|
|
182
|
+
"type": "object",
|
|
183
|
+
"additionalProperties": false,
|
|
184
|
+
"required": [
|
|
185
|
+
"id",
|
|
186
|
+
"source_requirement",
|
|
187
|
+
"expected_outcome",
|
|
188
|
+
"preferred_verification",
|
|
189
|
+
"available_verification",
|
|
190
|
+
"evidence_strength",
|
|
191
|
+
"evidence",
|
|
192
|
+
"invalid_pass_conditions",
|
|
193
|
+
"verdict"
|
|
194
|
+
],
|
|
195
|
+
"properties": {
|
|
196
|
+
"id": { "type": "string", "minLength": 1 },
|
|
197
|
+
"source_requirement": { "type": "string", "minLength": 1 },
|
|
198
|
+
"expected_outcome": { "type": "string", "minLength": 1 },
|
|
199
|
+
"preferred_verification": { "$ref": "#/$defs/capabilityList" },
|
|
200
|
+
"available_verification": { "$ref": "#/$defs/capabilityList" },
|
|
201
|
+
"evidence_strength": { "$ref": "#/$defs/evidenceStrength" },
|
|
202
|
+
"evidence": {
|
|
203
|
+
"type": "array",
|
|
204
|
+
"items": { "$ref": "#/$defs/evidenceEntry" }
|
|
205
|
+
},
|
|
206
|
+
"invalid_pass_conditions": {
|
|
207
|
+
"type": "array",
|
|
208
|
+
"items": { "type": "string" }
|
|
209
|
+
},
|
|
210
|
+
"verdict": {
|
|
211
|
+
"type": "string",
|
|
212
|
+
"enum": ["PASS", "FAIL", "BLOCKED", "CONDITIONAL_PASS"]
|
|
213
|
+
},
|
|
214
|
+
"limitation": { "type": "string" },
|
|
215
|
+
"capability_limitations": {
|
|
216
|
+
"type": "array",
|
|
217
|
+
"items": { "type": "string" }
|
|
218
|
+
},
|
|
219
|
+
"required_external_check": {
|
|
220
|
+
"type": "array",
|
|
221
|
+
"items": { "type": "string" }
|
|
222
|
+
},
|
|
223
|
+
"finding": { "type": "string" }
|
|
224
|
+
}
|
|
225
|
+
}
|
|
118
226
|
}
|
|
119
227
|
}
|
|
@@ -22,6 +22,10 @@ This skill owns evidence rows and supervisor verdict mapping. `$work-unit` may d
|
|
|
22
22
|
- BLOCKED applies when evidence cannot be obtained or sources conflict.
|
|
23
23
|
- Residual risks must not be hidden inside PASS.
|
|
24
24
|
- If residual risks, skipped checks, future work, or next recommended actions contain an unimplemented material source requirement, the matrix status is FAIL or BLOCKED, not PASS.
|
|
25
|
+
- Bug fixes and risky behavior changes require a red-capable feedback loop, or an explicit waiver explaining why no correct loop exists.
|
|
26
|
+
- Treat implementer output as a claim. Verification must map source requirement -> acceptance row -> outcome evidence -> verifier verdict -> supervisor audit.
|
|
27
|
+
- Tests, typecheck, lint, and build are evidence types, not automatic proof. They can satisfy a row only when the row is explicitly technical or the command observes the expected outcome.
|
|
28
|
+
- Outcome rows may use `CONDITIONAL_PASS` only as a row-level verdict for behavior that is strongly inferred but not fully observable in the current environment. A final supervisor PASS still requires material rows to be fully observed as PASS or explicitly waived.
|
|
25
29
|
|
|
26
30
|
## Source Fidelity Rules
|
|
27
31
|
|
|
@@ -46,13 +50,110 @@ If a requirement cannot be verified in the current environment, mark it BLOCKED
|
|
|
46
50
|
|
|
47
51
|
## Row Shape
|
|
48
52
|
|
|
49
|
-
| ID | Source Ref | Requirement | Evidence Required | Verification Method | Adversarial Check | Status | Evidence |
|
|
50
|
-
|
|
53
|
+
| ID | Source Ref | Requirement | Expected Outcome | Evidence Required | Preferred Verification | Available Verification | Evidence Strength | Verification Method | Feedback Loop | Evidence Classification | Adversarial Check | Invalid PASS Conditions | Status | Evidence |
|
|
54
|
+
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
51
55
|
|
|
52
56
|
Use statuses: Pending, PASS, FAIL, BLOCKED, Waived.
|
|
53
57
|
|
|
58
|
+
For outcome evaluation, each material row should also be expressible as:
|
|
59
|
+
|
|
60
|
+
```yaml
|
|
61
|
+
outcome_evaluation:
|
|
62
|
+
id:
|
|
63
|
+
source_requirement:
|
|
64
|
+
expected_outcome:
|
|
65
|
+
preferred_verification:
|
|
66
|
+
- browser_snapshot
|
|
67
|
+
- jsdom_render
|
|
68
|
+
- integration_test
|
|
69
|
+
- api_probe
|
|
70
|
+
- static_diff_inspection
|
|
71
|
+
available_verification:
|
|
72
|
+
- integration_test
|
|
73
|
+
- api_probe
|
|
74
|
+
- static_diff_inspection
|
|
75
|
+
evidence_strength:
|
|
76
|
+
strongest_possible:
|
|
77
|
+
- browser_snapshot
|
|
78
|
+
strongest_available:
|
|
79
|
+
- jsdom_render
|
|
80
|
+
- api_probe
|
|
81
|
+
- static_diff_inspection
|
|
82
|
+
limitation:
|
|
83
|
+
invalid_pass_conditions:
|
|
84
|
+
- tests only
|
|
85
|
+
- typecheck only
|
|
86
|
+
- mocked behavior only
|
|
87
|
+
- hardcoded fixture
|
|
88
|
+
- requirement moved to future work
|
|
89
|
+
- verifier did not inspect diff
|
|
90
|
+
verdict: PASS | FAIL | BLOCKED | CONDITIONAL_PASS
|
|
91
|
+
evidence:
|
|
92
|
+
- exact command, artifact, file, trace, UI state, or inspection result
|
|
93
|
+
limitation:
|
|
94
|
+
required_external_check:
|
|
95
|
+
- manual browser review
|
|
96
|
+
finding:
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
`CONDITIONAL_PASS` is not a final workflow status. It means the behavior is strongly inferred through the strongest available substitute evidence, while a stronger material capability remains unavailable. If that unavailable capability is required to prove the source requirement, the supervisor must mark the material row or workflow BLOCKED unless the user explicitly accepts a waiver or narrower scope.
|
|
100
|
+
|
|
101
|
+
## Capability Manifest
|
|
102
|
+
|
|
103
|
+
Before judging outcome rows, record the verification environment when capability limits are material:
|
|
104
|
+
|
|
105
|
+
```yaml
|
|
106
|
+
verification_environment:
|
|
107
|
+
shell: true | false
|
|
108
|
+
filesystem: true | false
|
|
109
|
+
git_diff: true | false
|
|
110
|
+
browser: true | false
|
|
111
|
+
playwright_mcp: true | false
|
|
112
|
+
network: true | false
|
|
113
|
+
capabilities:
|
|
114
|
+
- static_diff_inspection
|
|
115
|
+
- shell_command
|
|
116
|
+
- unit_test
|
|
117
|
+
- integration_test
|
|
118
|
+
- contract_test
|
|
119
|
+
- data_contract_test
|
|
120
|
+
- jsdom_render
|
|
121
|
+
- api_probe
|
|
122
|
+
- file_snapshot
|
|
123
|
+
- browser_snapshot
|
|
124
|
+
- human_required
|
|
125
|
+
limitations:
|
|
126
|
+
- "Responsive visual layout not verified because browser capability is unavailable"
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Do not require browser snapshots as the core verifier. Use the strongest available observable predicate. If the source requirement truly depends on unavailable browser, visual, service, credential, network, or human-review capability, mark the row BLOCKED or `CONDITIONAL_PASS` with the limitation and required external check. Do not mark the row PASS.
|
|
130
|
+
|
|
54
131
|
For documentation and review workflows, also record a domain-specific review state when useful: Needs Revision, Approved With Caveats, Ready To Publish, SME Review Needed, Legal Review Needed, Stale, or Deferred. Map it back to PASS/FAIL/BLOCKED for supervisor decisions.
|
|
55
132
|
|
|
133
|
+
## Red-Capable Feedback Loops
|
|
134
|
+
|
|
135
|
+
For bug fixes and risky behavior changes, each material acceptance row must name a feedback loop:
|
|
136
|
+
|
|
137
|
+
```yaml
|
|
138
|
+
feedback_loop:
|
|
139
|
+
command_or_evidence:
|
|
140
|
+
red_capable: yes | no | not_applicable
|
|
141
|
+
exact_symptom_or_behavior:
|
|
142
|
+
deterministic: yes | no
|
|
143
|
+
expected_runtime:
|
|
144
|
+
agent_runnable: yes | no
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
`red_capable: yes` means the loop would have failed, or visibly shown the wrong behavior, before the fix. A related check is not red-capable unless it catches the exact symptom or behavior under review.
|
|
148
|
+
|
|
149
|
+
Classify every row's evidence as one of:
|
|
150
|
+
|
|
151
|
+
- `behavior_was_tested`: a red-capable command, test, UI state, artifact check, or reviewer action exercised the exact behavior.
|
|
152
|
+
- `related_check_ran`: a nearby test, build, lint, static check, or inspection ran but does not catch the exact behavior by itself.
|
|
153
|
+
- `substitute_evidence_accepted`: the correct loop is unavailable and the user or governing source accepted substitute evidence.
|
|
154
|
+
|
|
155
|
+
For bug fixes and risky behavior changes, PASS requires `behavior_was_tested` or `substitute_evidence_accepted` with waiver evidence. If no correct test surface exists, record that as an architecture or verification finding. Do not turn it into a quiet skipped check.
|
|
156
|
+
|
|
56
157
|
## Adversarial Checks
|
|
57
158
|
|
|
58
159
|
Consider:
|
|
@@ -81,6 +182,7 @@ Consider:
|
|
|
81
182
|
status: PASS|FAIL|BLOCKED
|
|
82
183
|
verified_work_unit:
|
|
83
184
|
verified_worker:
|
|
185
|
+
verification_environment:
|
|
84
186
|
matrix:
|
|
85
187
|
- id:
|
|
86
188
|
requirement:
|
|
@@ -88,6 +190,14 @@ matrix:
|
|
|
88
190
|
evidence:
|
|
89
191
|
verification_method:
|
|
90
192
|
finding:
|
|
193
|
+
outcome_evaluations:
|
|
194
|
+
- id:
|
|
195
|
+
source_requirement:
|
|
196
|
+
expected_outcome:
|
|
197
|
+
verdict:
|
|
198
|
+
evidence_strength:
|
|
199
|
+
evidence:
|
|
200
|
+
limitation:
|
|
91
201
|
findings:
|
|
92
202
|
residual_risks:
|
|
93
203
|
skipped_checks:
|
|
@@ -102,3 +212,5 @@ After repairs, verification must rerun against the affected rows and any regress
|
|
|
102
212
|
## Rubber-Stamp Guard
|
|
103
213
|
|
|
104
214
|
Reject verification that says only "looks good", "tests pass", or "implemented" without row-by-row evidence. Ask for exact evidence or mark BLOCKED.
|
|
215
|
+
|
|
216
|
+
Reject PASS when the evidence is only tests/typecheck/build unless the row is explicitly scoped as a purely technical requirement or the command observes the expected user/system-visible outcome.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
interface:
|
|
2
2
|
display_name: "Acceptance Matrix"
|
|
3
3
|
short_description: "Turn goals into verifiable criteria"
|
|
4
|
-
default_prompt: "Use $acceptance-matrix to define evidence-backed pass, fail, and
|
|
4
|
+
default_prompt: "Use $acceptance-matrix to define evidence-backed pass, fail, blocked, and row-level conditional outcome criteria with expected outcomes, verification capabilities, evidence strength, invalid PASS conditions, and waiver handling."
|
|
5
5
|
|
|
6
6
|
policy:
|
|
7
7
|
allow_implicit_invocation: false
|
|
@@ -22,10 +22,13 @@ The dossier does not own acceptance design. It references or embeds acceptance r
|
|
|
22
22
|
- known allowed and forbidden surfaces or artifacts
|
|
23
23
|
- acceptance criteria or acceptance draft
|
|
24
24
|
- required checks or evidence
|
|
25
|
+
- expected outcomes, capability limits, and invalid PASS conditions for outcome-bearing work
|
|
25
26
|
- worker role and report expectations
|
|
26
27
|
|
|
27
28
|
If these inputs are missing, create a discovery dossier or return BLOCKED.
|
|
28
29
|
|
|
30
|
+
For bug-fix dossiers and risky behavior-change dossiers, include a red-capable feedback loop or explain why no correct loop exists. The `feedback_loop` field is optional in `DossierV1` during the compatibility phase, but `validate-dossier` emits warnings when risky work omits it.
|
|
31
|
+
|
|
29
32
|
Before delegation, validate the dossier with:
|
|
30
33
|
|
|
31
34
|
```bash
|
|
@@ -66,6 +69,28 @@ reviewers:
|
|
|
66
69
|
acceptance_matrix:
|
|
67
70
|
adversarial_checks:
|
|
68
71
|
required_commands_or_evidence:
|
|
72
|
+
verification_environment:
|
|
73
|
+
shell: true | false
|
|
74
|
+
filesystem: true | false
|
|
75
|
+
git_diff: true | false
|
|
76
|
+
browser: true | false
|
|
77
|
+
playwright_mcp: true | false
|
|
78
|
+
network: true | false
|
|
79
|
+
outcome_evaluations:
|
|
80
|
+
- id:
|
|
81
|
+
source_requirement:
|
|
82
|
+
expected_outcome:
|
|
83
|
+
preferred_verification:
|
|
84
|
+
available_verification:
|
|
85
|
+
evidence_strength:
|
|
86
|
+
invalid_pass_conditions:
|
|
87
|
+
feedback_loop:
|
|
88
|
+
command_or_evidence:
|
|
89
|
+
red_capable: yes | no | not_applicable
|
|
90
|
+
exact_symptom_or_behavior:
|
|
91
|
+
deterministic: yes | no
|
|
92
|
+
expected_runtime:
|
|
93
|
+
agent_runnable: yes | no
|
|
69
94
|
worker_role:
|
|
70
95
|
worker_prompt:
|
|
71
96
|
supervisor_checkpoints:
|
|
@@ -85,6 +110,9 @@ The machine gate requires concrete strings or arrays for the core fields. Use `o
|
|
|
85
110
|
- Include forbidden surfaces even when the worker seems trustworthy.
|
|
86
111
|
- Convert unknowns into open questions, not hidden assumptions.
|
|
87
112
|
- Include adversarial checks for malformed input, stale state, authorization, schema drift, replay, no-op implementation, and untrusted sources when relevant.
|
|
113
|
+
- For outcome-bearing work, require workers to report row-mapped outcome evidence. The worker must not treat tests/typecheck/build as sufficient unless the row is explicitly technical or those commands observe the expected outcome.
|
|
114
|
+
- Include capability limitations and required external checks when an expected outcome depends on browser, visual, live-service, credential, network, or human-review capability that may be unavailable.
|
|
115
|
+
- For bug fixes and risky behavior changes, require a feedback loop that would catch the exact symptom or behavior. A related build, lint, or broad test run is not enough unless waiver evidence accepts it as substitute evidence.
|
|
88
116
|
- Require workers to report skipped checks and assumptions.
|
|
89
117
|
- For non-code work, use evidence such as citations, before/after excerpts, review rubrics, examples, artifact diffs, or explicit user decisions instead of commands.
|
|
90
118
|
- Require repair tickets to cite the verification finding or acceptance row they repair.
|
|
@@ -28,11 +28,12 @@ Do not create goals for small direct tasks. A goal is the state container for op
|
|
|
28
28
|
|
|
29
29
|
## Policy Dimensions
|
|
30
30
|
|
|
31
|
+
- Profile: `lean_work_unit_runner`, `strict_full_workflow`, or `planning_only`; choose before heavy artifacts, goals, workers, or implementation.
|
|
31
32
|
- Intake: whether every required intake decision has an explicit user answer, and which unanswered items must be re-asked before any work can start.
|
|
32
33
|
- Execution path: autonomous_goal or human_in_loop, from completed intake only.
|
|
33
34
|
- Mode: sequential, parallel, staged parallel, or discovery-first, from completed intake only.
|
|
34
35
|
- Approval: none, before worker delegation, before implementation, before verification, before repair, before publication, before irreversible action, before each unit, or path-gated.
|
|
35
|
-
- Delegation orchestration: selected transport, adapter availability, naming scheme, start timing, supervisor checkpoint cadence, terminal report collection, and stop behavior when automated delegation is unavailable.
|
|
36
|
+
- Delegation orchestration: selected transport, adapter availability, naming scheme, start timing, supervisor checkpoint cadence, terminal report collection, native resource close behavior, and stop behavior when automated delegation is unavailable.
|
|
36
37
|
- Repair limit: maximum repair loops per unit.
|
|
37
38
|
- Budget: time, token, command, cost, or file-change limits.
|
|
38
39
|
- Escalation: when to ask the user, delegate to a specialist worker, or stop.
|
|
@@ -45,20 +46,32 @@ Do not create goals for small direct tasks. A goal is the state container for op
|
|
|
45
46
|
Use this default unless the task says otherwise:
|
|
46
47
|
|
|
47
48
|
```yaml
|
|
49
|
+
profile_selection: before goal creation, heavy planning, worker delegation, implementation, verification, repair, final disposition, publication, or irreversible action
|
|
50
|
+
profiles:
|
|
51
|
+
lean_work_unit_runner: large bounded backlog, pure work units, low-footprint direct execution, compact ledger, no default subagents
|
|
52
|
+
strict_full_workflow: ambiguous, high-risk, delegated, source-of-truth, security, external-service, publication, or broad interpretation work
|
|
53
|
+
planning_only: intake, scope review, sequencing, risks, and recommendations without implementation
|
|
48
54
|
intake_required_when: every supervisor invocation before goal creation, planning beyond intake, worker delegation, implementation, verification, repair, final disposition, publication, or irreversible action
|
|
49
55
|
intake_question_count: ask the complete intake packet first; on follow-up ask every unanswered or ambiguous item
|
|
50
|
-
required_intake_decisions: objective_and_source, execution_path, mode, delegation, final_disposition, mutation_boundaries, state_artifacts
|
|
56
|
+
required_intake_decisions: objective_and_source, profile, execution_path, mode, delegation, final_disposition, mutation_boundaries, state_artifacts
|
|
51
57
|
use_judgment_defaults: none; user must answer every required intake decision explicitly
|
|
52
|
-
keyword_shortcuts: forbidden
|
|
58
|
+
keyword_shortcuts: forbidden for path, mode, delegation, final disposition, and boundaries; profile may be selected only from explicit user intent plus controlling source
|
|
53
59
|
mode: from completed intake only
|
|
54
60
|
execution_path: from completed intake only
|
|
55
61
|
approval_gate: path-gated; complete intake before any path-specific plan; human approval for human_in_loop plans; explicit completed-intake authorization for autonomous_goal; explicit completed-intake authorization for irreversible or user-visible publication
|
|
56
62
|
repair_limit_per_unit: 2
|
|
57
63
|
parallel_allowed_when: units do not share mutable surfaces
|
|
58
|
-
worker_delegation_rule: after complete intake, path gate, concrete dossier, and supported
|
|
59
|
-
native_transport_rule: after complete intake, path gate,
|
|
64
|
+
worker_delegation_rule: strict mode after complete intake, path gate, concrete dossier, supported automated transport, and supported resource close; lean mode only after explicit authorization or escalation trigger
|
|
65
|
+
native_transport_rule: after complete intake, path gate, concrete dossier, and confirmed close operation when the environment exposes approved thread or subagent tools
|
|
60
66
|
worker_name_template: wf/<workflow-slug>/<unit-id>-<role>-<dossier-slug>
|
|
61
|
-
supervisor_checkpoint_cadence: after worker start, terminal report, repair ticket creation, re-verification, and final disposition
|
|
67
|
+
supervisor_checkpoint_cadence: after worker start, native resource id capture, terminal report, native resource close, repair ticket creation, re-verification, and final disposition
|
|
68
|
+
native_worker_lifecycle:
|
|
69
|
+
required_fields: worker_name, transport, native_resource_id, status, terminal_report, close_action, close_result
|
|
70
|
+
codex_close_action: close_agent
|
|
71
|
+
final_outcome_gate: blocked if any native worker lacks close_result
|
|
72
|
+
lean_checkpoint_cadence: after each unit ledger update, at user-selected batch size, on blocker, on risk escalation, and final outcome
|
|
73
|
+
lean_unit_readiness: id, source_ref, scope, done, check, status
|
|
74
|
+
lean_resource_gates: no unapproved subagents, no broad scans unless needed for current unit, one active unit by default, stop or ask when memory/process churn threatens throughput
|
|
62
75
|
final_disposition_policy: from completed intake only; if set to ask_at_end, stop and ask at final disposition
|
|
63
76
|
workflow_unit_blocked_after: first material blocker may stop the unit while the Codex goal remains active
|
|
64
77
|
codex_goal_blocked_after: same material blocker across 3 consecutive goal turns and no meaningful progress
|
|
@@ -88,11 +101,21 @@ Do not run units in parallel when they edit the same files, documents, datasets,
|
|
|
88
101
|
|
|
89
102
|
```yaml
|
|
90
103
|
workflow:
|
|
104
|
+
profile:
|
|
91
105
|
intake:
|
|
92
106
|
execution_path:
|
|
93
107
|
mode:
|
|
94
108
|
approval_gates:
|
|
95
109
|
delegation_policy:
|
|
110
|
+
native_worker_lifecycle:
|
|
111
|
+
required_close_action:
|
|
112
|
+
open_native_workers:
|
|
113
|
+
lean_policy:
|
|
114
|
+
ledger:
|
|
115
|
+
unit_readiness:
|
|
116
|
+
batch_checkpoint:
|
|
117
|
+
focused_check:
|
|
118
|
+
escalation_triggers:
|
|
96
119
|
repair_limit:
|
|
97
120
|
parallel_rules:
|
|
98
121
|
budgets:
|
|
@@ -11,12 +11,43 @@ Use this skill to make work small enough that another agent can complete and ver
|
|
|
11
11
|
|
|
12
12
|
Work units can be bounded by code package, document section, source set, stakeholder decision, research question, design screen, workflow step, data slice, risk class, or output artifact. Do not force repository terminology onto non-code work.
|
|
13
13
|
|
|
14
|
+
## Product And Integration Slices
|
|
15
|
+
|
|
16
|
+
When work describes user-facing behavior or integration behavior, prefer tracer-bullet work units. A tracer-bullet unit cuts through the smallest useful set of layers needed to make one behavior observable, demonstrable, and verifiable.
|
|
17
|
+
|
|
18
|
+
Use `slice_type: tracer_bullet` for product implementation that can expose behavior to a user, API caller, integration partner, workflow operator, evaluator, or verifier.
|
|
19
|
+
|
|
20
|
+
Horizontal units are valid only for prefactoring, migration safety, infrastructure, documentation, research, or a dependency that cannot yet be verified as behavior. Use one of these non-product slice types when a tracer bullet is not the right shape:
|
|
21
|
+
|
|
22
|
+
- `prefactor`
|
|
23
|
+
- `migration`
|
|
24
|
+
- `research`
|
|
25
|
+
- `document`
|
|
26
|
+
- `risk_boundary`
|
|
27
|
+
|
|
28
|
+
Every product or integration implementation unit must name:
|
|
29
|
+
|
|
30
|
+
```yaml
|
|
31
|
+
slice_type: tracer_bullet | prefactor | migration | research | document | risk_boundary
|
|
32
|
+
observable_behavior:
|
|
33
|
+
expected_outcome:
|
|
34
|
+
demo_or_verification:
|
|
35
|
+
layers_touched:
|
|
36
|
+
horizontal_slice_justification:
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
For `tracer_bullet`, `observable_behavior`, `expected_outcome`, and `demo_or_verification` are required and `layers_touched` should name the smallest layers needed for that behavior. For horizontal or non-product slice types, set `observable_behavior` and `expected_outcome` to `not_applicable` only when the unit names a concrete `horizontal_slice_justification`.
|
|
40
|
+
|
|
41
|
+
Reject vague horizontal feature phases such as "backend foundation", "frontend pass", "data model work", or "integration prep" unless the unit has a valid non-product `slice_type`, a concrete dependency it unlocks, and a verification method for that slice.
|
|
42
|
+
|
|
14
43
|
## Unit Quality Bar
|
|
15
44
|
|
|
16
45
|
A good work unit has:
|
|
17
46
|
|
|
18
47
|
- one objective
|
|
19
48
|
- a stable unit ID suitable for dossier and worker naming
|
|
49
|
+
- a `slice_type` that matches the work shape
|
|
50
|
+
- observable behavior, expected outcome, and demo or verification for product or integration behavior
|
|
20
51
|
- named dependencies
|
|
21
52
|
- explicit in-scope and out-of-scope surfaces
|
|
22
53
|
- known sources or source gaps
|
|
@@ -34,12 +65,13 @@ Work-unit drafts coarse done criteria only. Use `$acceptance-matrix` when those
|
|
|
34
65
|
|
|
35
66
|
1. Restate the parent objective.
|
|
36
67
|
2. Identify natural boundaries: user workflow, package, document, API contract, risk class, or dependency layer.
|
|
37
|
-
3.
|
|
38
|
-
4.
|
|
39
|
-
5. Mark
|
|
40
|
-
6.
|
|
41
|
-
7.
|
|
42
|
-
8.
|
|
68
|
+
3. For product or integration behavior, split into tracer-bullet units before horizontal layers.
|
|
69
|
+
4. Split remaining work into units that can be verified independently.
|
|
70
|
+
5. Mark dependencies and ordering constraints.
|
|
71
|
+
6. Mark which units can run in parallel only when they do not mutate the same surfaces.
|
|
72
|
+
7. Define readiness and done criteria for each unit.
|
|
73
|
+
8. If sources are absent, create a discovery/intake unit before production work.
|
|
74
|
+
9. Identify the first unit that is safe to dossier.
|
|
43
75
|
|
|
44
76
|
For over-broad one-pass requests, produce a sequencing recommendation and invoke or mirror `$loop-policy` fields for mode, parallel safety, approval gates, and repair limits.
|
|
45
77
|
|
|
@@ -69,6 +101,12 @@ units:
|
|
|
69
101
|
worker_slug:
|
|
70
102
|
title:
|
|
71
103
|
objective:
|
|
104
|
+
slice_type:
|
|
105
|
+
observable_behavior:
|
|
106
|
+
expected_outcome:
|
|
107
|
+
demo_or_verification:
|
|
108
|
+
layers_touched:
|
|
109
|
+
horizontal_slice_justification:
|
|
72
110
|
in_scope:
|
|
73
111
|
out_of_scope:
|
|
74
112
|
dependencies:
|
|
@@ -89,3 +127,5 @@ first_recommended_unit:
|
|
|
89
127
|
## Stop Gates
|
|
90
128
|
|
|
91
129
|
Stop when a unit cannot name a done criterion, required source, or boundary. Ask for a decision or return a smaller discovery unit.
|
|
130
|
+
|
|
131
|
+
Stop when a product or integration implementation unit lacks `observable_behavior`, `expected_outcome`, or `demo_or_verification`. Return a tracer-bullet split instead of a horizontal phase unless the unit has a valid non-product `slice_type` and `horizontal_slice_justification`.
|
|
@@ -61,11 +61,12 @@ For documentation work, start with `DOCUMENTATION-BRIEF.md` unless the user prov
|
|
|
61
61
|
## Artifact Selection
|
|
62
62
|
|
|
63
63
|
- `.workflow/WORKFLOW.md`: overall objective, policy, state, units, and next action.
|
|
64
|
+
- `.workflow/LEDGER.md`: compact lean-runner state for large bounded backlogs, with one row per work unit and targeted check evidence.
|
|
64
65
|
- `.workflow/SOURCE-CORPUS.md`: source map, authority ranking, contradictions, gaps.
|
|
65
66
|
- `.workflow/SPEC.md`: human-reviewable interpretation contract, requirement coverage, Q&A, and approval decision before final work units.
|
|
66
67
|
- `.workflow/WORK-UNITS.md`: decomposition and sequencing.
|
|
67
68
|
- `.workflow/DOSSIER.md`: delegation contract for one unit.
|
|
68
|
-
- `.workflow/WORKER-MAP.md`: worker names, roles, transports, dossiers, dependencies, start conditions, report status, and supervisor checkpoints.
|
|
69
|
+
- `.workflow/WORKER-MAP.md`: worker names, roles, transports, native resource ids, dossiers, dependencies, start conditions, report status, close actions, close results, and supervisor checkpoints.
|
|
69
70
|
- `.workflow/ACCEPTANCE-MATRIX.md`: verifiable done criteria.
|
|
70
71
|
- `.workflow/VERIFICATION-REPORT.md`: evidence-backed PASS/FAIL/BLOCKED report.
|
|
71
72
|
- `.workflow/REPAIR-TICKETS.md`: actionable repair tasks from verifier findings.
|