workflow-supervisor 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,27 +49,19 @@
49
49
  },
50
50
  "evidence": {
51
51
  "type": "array",
52
- "items": {
53
- "type": "string"
54
- }
52
+ "items": { "$ref": "#/$defs/evidenceEntry" }
55
53
  },
56
54
  "checks_run": {
57
55
  "type": "array",
58
- "items": {
59
- "type": "string"
60
- }
56
+ "items": { "$ref": "#/$defs/evidenceEntry" }
61
57
  },
62
58
  "skipped_checks": {
63
59
  "type": "array",
64
- "items": {
65
- "type": "string"
66
- }
60
+ "items": { "$ref": "#/$defs/evidenceEntry" }
67
61
  },
68
62
  "findings": {
69
63
  "type": "array",
70
- "items": {
71
- "type": "string"
72
- }
64
+ "items": { "$ref": "#/$defs/evidenceEntry" }
73
65
  },
74
66
  "blocking_question": {
75
67
  "type": ["string", "null"]
@@ -77,6 +69,13 @@
77
69
  "next_action": {
78
70
  "type": "string"
79
71
  },
72
+ "verification_environment": {
73
+ "$ref": "#/$defs/verificationEnvironment"
74
+ },
75
+ "outcome_evaluations": {
76
+ "type": "array",
77
+ "items": { "$ref": "#/$defs/outcomeEvaluation" }
78
+ },
80
79
  "adapter": {
81
80
  "type": ["object", "null"],
82
81
  "additionalProperties": false,
@@ -115,5 +114,114 @@
115
114
  "reason": {
116
115
  "type": ["string", "null"]
117
116
  }
117
+ },
118
+ "$defs": {
119
+ "evidenceEntry": {
120
+ "anyOf": [
121
+ { "type": "string" },
122
+ {
123
+ "type": "object",
124
+ "additionalProperties": true
125
+ }
126
+ ]
127
+ },
128
+ "verificationCapability": {
129
+ "type": "string",
130
+ "enum": [
131
+ "static_diff_inspection",
132
+ "diff_inspection",
133
+ "shell_command",
134
+ "unit_test",
135
+ "integration_test",
136
+ "contract_test",
137
+ "data_contract_test",
138
+ "jsdom_render",
139
+ "api_probe",
140
+ "file_snapshot",
141
+ "generated_html_snapshot",
142
+ "component_tree_snapshot",
143
+ "accessibility_tree_snapshot",
144
+ "state_machine_test",
145
+ "browser_snapshot",
146
+ "human_required",
147
+ "manual_review"
148
+ ]
149
+ },
150
+ "capabilityList": {
151
+ "type": "array",
152
+ "items": { "$ref": "#/$defs/verificationCapability" }
153
+ },
154
+ "verificationEnvironment": {
155
+ "type": "object",
156
+ "additionalProperties": false,
157
+ "properties": {
158
+ "shell": { "type": "boolean" },
159
+ "filesystem": { "type": "boolean" },
160
+ "git_diff": { "type": "boolean" },
161
+ "browser": { "type": "boolean" },
162
+ "playwright_mcp": { "type": "boolean" },
163
+ "network": { "type": "boolean" },
164
+ "capabilities": { "$ref": "#/$defs/capabilityList" },
165
+ "limitations": {
166
+ "type": "array",
167
+ "items": { "type": "string" }
168
+ }
169
+ }
170
+ },
171
+ "evidenceStrength": {
172
+ "type": "object",
173
+ "additionalProperties": false,
174
+ "required": ["strongest_possible", "strongest_available"],
175
+ "properties": {
176
+ "strongest_possible": { "$ref": "#/$defs/capabilityList" },
177
+ "strongest_available": { "$ref": "#/$defs/capabilityList" },
178
+ "limitation": { "type": "string" }
179
+ }
180
+ },
181
+ "outcomeEvaluation": {
182
+ "type": "object",
183
+ "additionalProperties": false,
184
+ "required": [
185
+ "id",
186
+ "source_requirement",
187
+ "expected_outcome",
188
+ "preferred_verification",
189
+ "available_verification",
190
+ "evidence_strength",
191
+ "evidence",
192
+ "invalid_pass_conditions",
193
+ "verdict"
194
+ ],
195
+ "properties": {
196
+ "id": { "type": "string", "minLength": 1 },
197
+ "source_requirement": { "type": "string", "minLength": 1 },
198
+ "expected_outcome": { "type": "string", "minLength": 1 },
199
+ "preferred_verification": { "$ref": "#/$defs/capabilityList" },
200
+ "available_verification": { "$ref": "#/$defs/capabilityList" },
201
+ "evidence_strength": { "$ref": "#/$defs/evidenceStrength" },
202
+ "evidence": {
203
+ "type": "array",
204
+ "items": { "$ref": "#/$defs/evidenceEntry" }
205
+ },
206
+ "invalid_pass_conditions": {
207
+ "type": "array",
208
+ "items": { "type": "string" }
209
+ },
210
+ "verdict": {
211
+ "type": "string",
212
+ "enum": ["PASS", "FAIL", "BLOCKED", "CONDITIONAL_PASS"]
213
+ },
214
+ "limitation": { "type": "string" },
215
+ "capability_limitations": {
216
+ "type": "array",
217
+ "items": { "type": "string" }
218
+ },
219
+ "required_external_check": {
220
+ "type": "array",
221
+ "items": { "type": "string" }
222
+ },
223
+ "finding": { "type": "string" }
224
+ }
225
+ }
118
226
  }
119
227
  }
@@ -22,6 +22,10 @@ This skill owns evidence rows and supervisor verdict mapping. `$work-unit` may d
22
22
  - BLOCKED applies when evidence cannot be obtained or sources conflict.
23
23
  - Residual risks must not be hidden inside PASS.
24
24
  - If residual risks, skipped checks, future work, or next recommended actions contain an unimplemented material source requirement, the matrix status is FAIL or BLOCKED, not PASS.
25
+ - Bug fixes and risky behavior changes require a red-capable feedback loop, or an explicit waiver explaining why no correct loop exists.
26
+ - Treat implementer output as a claim. Verification must map source requirement -> acceptance row -> outcome evidence -> verifier verdict -> supervisor audit.
27
+ - Tests, typecheck, lint, and build are evidence types, not automatic proof. They can satisfy a row only when the row is explicitly technical or the command observes the expected outcome.
28
+ - Outcome rows may use `CONDITIONAL_PASS` only as a row-level verdict for behavior that is strongly inferred but not fully observable in the current environment. A final supervisor PASS still requires material rows to be fully observed as PASS or explicitly waived.
25
29
 
26
30
  ## Source Fidelity Rules
27
31
 
@@ -46,13 +50,110 @@ If a requirement cannot be verified in the current environment, mark it BLOCKED
46
50
 
47
51
  ## Row Shape
48
52
 
49
- | ID | Source Ref | Requirement | Evidence Required | Verification Method | Adversarial Check | Status | Evidence |
50
- |---|---|---|---|---|---|---|---|
53
+ | ID | Source Ref | Requirement | Expected Outcome | Evidence Required | Preferred Verification | Available Verification | Evidence Strength | Verification Method | Feedback Loop | Evidence Classification | Adversarial Check | Invalid PASS Conditions | Status | Evidence |
54
+ |---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
51
55
 
52
56
  Use statuses: Pending, PASS, FAIL, BLOCKED, Waived.
53
57
 
58
+ For outcome evaluation, each material row should also be expressible as:
59
+
60
+ ```yaml
61
+ outcome_evaluation:
62
+ id:
63
+ source_requirement:
64
+ expected_outcome:
65
+ preferred_verification:
66
+ - browser_snapshot
67
+ - jsdom_render
68
+ - integration_test
69
+ - api_probe
70
+ - static_diff_inspection
71
+ available_verification:
72
+ - integration_test
73
+ - api_probe
74
+ - static_diff_inspection
75
+ evidence_strength:
76
+ strongest_possible:
77
+ - browser_snapshot
78
+ strongest_available:
79
+ - jsdom_render
80
+ - api_probe
81
+ - static_diff_inspection
82
+ limitation:
83
+ invalid_pass_conditions:
84
+ - tests only
85
+ - typecheck only
86
+ - mocked behavior only
87
+ - hardcoded fixture
88
+ - requirement moved to future work
89
+ - verifier did not inspect diff
90
+ verdict: PASS | FAIL | BLOCKED | CONDITIONAL_PASS
91
+ evidence:
92
+ - exact command, artifact, file, trace, UI state, or inspection result
93
+ limitation:
94
+ required_external_check:
95
+ - manual browser review
96
+ finding:
97
+ ```
98
+
99
+ `CONDITIONAL_PASS` is not a final workflow status. It means the behavior is strongly inferred through the strongest available substitute evidence, while a stronger material capability remains unavailable. If that unavailable capability is required to prove the source requirement, the supervisor must mark the material row or workflow BLOCKED unless the user explicitly accepts a waiver or narrower scope.
100
+
101
+ ## Capability Manifest
102
+
103
+ Before judging outcome rows, record the verification environment when capability limits are material:
104
+
105
+ ```yaml
106
+ verification_environment:
107
+ shell: true | false
108
+ filesystem: true | false
109
+ git_diff: true | false
110
+ browser: true | false
111
+ playwright_mcp: true | false
112
+ network: true | false
113
+ capabilities:
114
+ - static_diff_inspection
115
+ - shell_command
116
+ - unit_test
117
+ - integration_test
118
+ - contract_test
119
+ - data_contract_test
120
+ - jsdom_render
121
+ - api_probe
122
+ - file_snapshot
123
+ - browser_snapshot
124
+ - human_required
125
+ limitations:
126
+ - "Responsive visual layout not verified because browser capability is unavailable"
127
+ ```
128
+
129
+ Do not require browser snapshots as the core verifier. Use the strongest available observable predicate. If the source requirement truly depends on unavailable browser, visual, service, credential, network, or human-review capability, mark the row BLOCKED or `CONDITIONAL_PASS` with the limitation and required external check. Do not mark the row PASS.
130
+
54
131
  For documentation and review workflows, also record a domain-specific review state when useful: Needs Revision, Approved With Caveats, Ready To Publish, SME Review Needed, Legal Review Needed, Stale, or Deferred. Map it back to PASS/FAIL/BLOCKED for supervisor decisions.
55
132
 
133
+ ## Red-Capable Feedback Loops
134
+
135
+ For bug fixes and risky behavior changes, each material acceptance row must name a feedback loop:
136
+
137
+ ```yaml
138
+ feedback_loop:
139
+ command_or_evidence:
140
+ red_capable: yes | no | not_applicable
141
+ exact_symptom_or_behavior:
142
+ deterministic: yes | no
143
+ expected_runtime:
144
+ agent_runnable: yes | no
145
+ ```
146
+
147
+ `red_capable: yes` means the loop would have failed, or visibly shown the wrong behavior, before the fix. A related check is not red-capable unless it catches the exact symptom or behavior under review.
148
+
149
+ Classify every row's evidence as one of:
150
+
151
+ - `behavior_was_tested`: a red-capable command, test, UI state, artifact check, or reviewer action exercised the exact behavior.
152
+ - `related_check_ran`: a nearby test, build, lint, static check, or inspection ran but does not catch the exact behavior by itself.
153
+ - `substitute_evidence_accepted`: the correct loop is unavailable and the user or governing source accepted substitute evidence.
154
+
155
+ For bug fixes and risky behavior changes, PASS requires `behavior_was_tested` or `substitute_evidence_accepted` with waiver evidence. If no correct test surface exists, record that as an architecture or verification finding. Do not turn it into a quiet skipped check.
156
+
56
157
  ## Adversarial Checks
57
158
 
58
159
  Consider:
@@ -81,6 +182,7 @@ Consider:
81
182
  status: PASS|FAIL|BLOCKED
82
183
  verified_work_unit:
83
184
  verified_worker:
185
+ verification_environment:
84
186
  matrix:
85
187
  - id:
86
188
  requirement:
@@ -88,6 +190,14 @@ matrix:
88
190
  evidence:
89
191
  verification_method:
90
192
  finding:
193
+ outcome_evaluations:
194
+ - id:
195
+ source_requirement:
196
+ expected_outcome:
197
+ verdict:
198
+ evidence_strength:
199
+ evidence:
200
+ limitation:
91
201
  findings:
92
202
  residual_risks:
93
203
  skipped_checks:
@@ -102,3 +212,5 @@ After repairs, verification must rerun against the affected rows and any regress
102
212
  ## Rubber-Stamp Guard
103
213
 
104
214
  Reject verification that says only "looks good", "tests pass", or "implemented" without row-by-row evidence. Ask for exact evidence or mark BLOCKED.
215
+
216
+ Reject PASS when the evidence is only tests/typecheck/build unless the row is explicitly scoped as a purely technical requirement or the command observes the expected user/system-visible outcome.
@@ -1,7 +1,7 @@
1
1
  interface:
2
2
  display_name: "Acceptance Matrix"
3
3
  short_description: "Turn goals into verifiable criteria"
4
- default_prompt: "Use $acceptance-matrix to define evidence-backed pass, fail, and blocked criteria."
4
+ default_prompt: "Use $acceptance-matrix to define evidence-backed pass, fail, blocked, and row-level conditional outcome criteria with expected outcomes, verification capabilities, evidence strength, invalid PASS conditions, and waiver handling."
5
5
 
6
6
  policy:
7
7
  allow_implicit_invocation: false
@@ -22,10 +22,13 @@ The dossier does not own acceptance design. It references or embeds acceptance r
22
22
  - known allowed and forbidden surfaces or artifacts
23
23
  - acceptance criteria or acceptance draft
24
24
  - required checks or evidence
25
+ - expected outcomes, capability limits, and invalid PASS conditions for outcome-bearing work
25
26
  - worker role and report expectations
26
27
 
27
28
  If these inputs are missing, create a discovery dossier or return BLOCKED.
28
29
 
30
+ For bug-fix dossiers and risky behavior-change dossiers, include a red-capable feedback loop or explain why no correct loop exists. The `feedback_loop` field is optional in `DossierV1` during the compatibility phase, but `validate-dossier` emits warnings when risky work omits it.
31
+
29
32
  Before delegation, validate the dossier with:
30
33
 
31
34
  ```bash
@@ -66,6 +69,28 @@ reviewers:
66
69
  acceptance_matrix:
67
70
  adversarial_checks:
68
71
  required_commands_or_evidence:
72
+ verification_environment:
73
+ shell: true | false
74
+ filesystem: true | false
75
+ git_diff: true | false
76
+ browser: true | false
77
+ playwright_mcp: true | false
78
+ network: true | false
79
+ outcome_evaluations:
80
+ - id:
81
+ source_requirement:
82
+ expected_outcome:
83
+ preferred_verification:
84
+ available_verification:
85
+ evidence_strength:
86
+ invalid_pass_conditions:
87
+ feedback_loop:
88
+ command_or_evidence:
89
+ red_capable: yes | no | not_applicable
90
+ exact_symptom_or_behavior:
91
+ deterministic: yes | no
92
+ expected_runtime:
93
+ agent_runnable: yes | no
69
94
  worker_role:
70
95
  worker_prompt:
71
96
  supervisor_checkpoints:
@@ -85,6 +110,9 @@ The machine gate requires concrete strings or arrays for the core fields. Use `o
85
110
  - Include forbidden surfaces even when the worker seems trustworthy.
86
111
  - Convert unknowns into open questions, not hidden assumptions.
87
112
  - Include adversarial checks for malformed input, stale state, authorization, schema drift, replay, no-op implementation, and untrusted sources when relevant.
113
+ - For outcome-bearing work, require workers to report row-mapped outcome evidence. The worker must not treat tests/typecheck/build as sufficient unless the row is explicitly technical or those commands observe the expected outcome.
114
+ - Include capability limitations and required external checks when an expected outcome depends on browser, visual, live-service, credential, network, or human-review capability that may be unavailable.
115
+ - For bug fixes and risky behavior changes, require a feedback loop that would catch the exact symptom or behavior. A related build, lint, or broad test run is not enough unless waiver evidence accepts it as substitute evidence.
88
116
  - Require workers to report skipped checks and assumptions.
89
117
  - For non-code work, use evidence such as citations, before/after excerpts, review rubrics, examples, artifact diffs, or explicit user decisions instead of commands.
90
118
  - Require repair tickets to cite the verification finding or acceptance row they repair.
@@ -28,11 +28,12 @@ Do not create goals for small direct tasks. A goal is the state container for op
28
28
 
29
29
  ## Policy Dimensions
30
30
 
31
+ - Profile: `lean_work_unit_runner`, `strict_full_workflow`, or `planning_only`; choose before heavy artifacts, goals, workers, or implementation.
31
32
  - Intake: whether every required intake decision has an explicit user answer, and which unanswered items must be re-asked before any work can start.
32
33
  - Execution path: autonomous_goal or human_in_loop, from completed intake only.
33
34
  - Mode: sequential, parallel, staged parallel, or discovery-first, from completed intake only.
34
35
  - Approval: none, before worker delegation, before implementation, before verification, before repair, before publication, before irreversible action, before each unit, or path-gated.
35
- - Delegation orchestration: selected transport, adapter availability, naming scheme, start timing, supervisor checkpoint cadence, terminal report collection, and stop behavior when automated delegation is unavailable.
36
+ - Delegation orchestration: selected transport, adapter availability, naming scheme, start timing, supervisor checkpoint cadence, terminal report collection, native resource close behavior, and stop behavior when automated delegation is unavailable.
36
37
  - Repair limit: maximum repair loops per unit.
37
38
  - Budget: time, token, command, cost, or file-change limits.
38
39
  - Escalation: when to ask the user, delegate to a specialist worker, or stop.
@@ -45,20 +46,32 @@ Do not create goals for small direct tasks. A goal is the state container for op
45
46
  Use this default unless the task says otherwise:
46
47
 
47
48
  ```yaml
49
+ profile_selection: before goal creation, heavy planning, worker delegation, implementation, verification, repair, final disposition, publication, or irreversible action
50
+ profiles:
51
+ lean_work_unit_runner: large bounded backlog, pure work units, low-footprint direct execution, compact ledger, no default subagents
52
+ strict_full_workflow: ambiguous, high-risk, delegated, source-of-truth, security, external-service, publication, or broad interpretation work
53
+ planning_only: intake, scope review, sequencing, risks, and recommendations without implementation
48
54
  intake_required_when: every supervisor invocation before goal creation, planning beyond intake, worker delegation, implementation, verification, repair, final disposition, publication, or irreversible action
49
55
  intake_question_count: ask the complete intake packet first; on follow-up ask every unanswered or ambiguous item
50
- required_intake_decisions: objective_and_source, execution_path, mode, delegation, final_disposition, mutation_boundaries, state_artifacts
56
+ required_intake_decisions: objective_and_source, profile, execution_path, mode, delegation, final_disposition, mutation_boundaries, state_artifacts
51
57
  use_judgment_defaults: none; user must answer every required intake decision explicitly
52
- keyword_shortcuts: forbidden; do not infer path, mode, delegation, final disposition, or boundaries from prompt keywords
58
+ keyword_shortcuts: forbidden for path, mode, delegation, final disposition, and boundaries; profile may be selected only from explicit user intent plus controlling source
53
59
  mode: from completed intake only
54
60
  execution_path: from completed intake only
55
61
  approval_gate: path-gated; complete intake before any path-specific plan; human approval for human_in_loop plans; explicit completed-intake authorization for autonomous_goal; explicit completed-intake authorization for irreversible or user-visible publication
56
62
  repair_limit_per_unit: 2
57
63
  parallel_allowed_when: units do not share mutable surfaces
58
- worker_delegation_rule: after complete intake, path gate, concrete dossier, and supported automated transport
59
- native_transport_rule: after complete intake, path gate, and concrete dossier when the environment exposes approved thread or subagent tools
64
+ worker_delegation_rule: strict mode after complete intake, path gate, concrete dossier, supported automated transport, and supported resource close; lean mode only after explicit authorization or escalation trigger
65
+ native_transport_rule: after complete intake, path gate, concrete dossier, and confirmed close operation when the environment exposes approved thread or subagent tools
60
66
  worker_name_template: wf/<workflow-slug>/<unit-id>-<role>-<dossier-slug>
61
- supervisor_checkpoint_cadence: after worker start, terminal report, repair ticket creation, re-verification, and final disposition
67
+ supervisor_checkpoint_cadence: after worker start, native resource id capture, terminal report, native resource close, repair ticket creation, re-verification, and final disposition
68
+ native_worker_lifecycle:
69
+ required_fields: worker_name, transport, native_resource_id, status, terminal_report, close_action, close_result
70
+ codex_close_action: close_agent
71
+ final_outcome_gate: blocked if any native worker lacks close_result
72
+ lean_checkpoint_cadence: after each unit ledger update, at user-selected batch size, on blocker, on risk escalation, and final outcome
73
+ lean_unit_readiness: id, source_ref, scope, done, check, status
74
+ lean_resource_gates: no unapproved subagents, no broad scans unless needed for current unit, one active unit by default, stop or ask when memory/process churn threatens throughput
62
75
  final_disposition_policy: from completed intake only; if set to ask_at_end, stop and ask at final disposition
63
76
  workflow_unit_blocked_after: first material blocker may stop the unit while the Codex goal remains active
64
77
  codex_goal_blocked_after: same material blocker across 3 consecutive goal turns and no meaningful progress
@@ -88,11 +101,21 @@ Do not run units in parallel when they edit the same files, documents, datasets,
88
101
 
89
102
  ```yaml
90
103
  workflow:
104
+ profile:
91
105
  intake:
92
106
  execution_path:
93
107
  mode:
94
108
  approval_gates:
95
109
  delegation_policy:
110
+ native_worker_lifecycle:
111
+ required_close_action:
112
+ open_native_workers:
113
+ lean_policy:
114
+ ledger:
115
+ unit_readiness:
116
+ batch_checkpoint:
117
+ focused_check:
118
+ escalation_triggers:
96
119
  repair_limit:
97
120
  parallel_rules:
98
121
  budgets:
@@ -11,12 +11,43 @@ Use this skill to make work small enough that another agent can complete and ver
11
11
 
12
12
  Work units can be bounded by code package, document section, source set, stakeholder decision, research question, design screen, workflow step, data slice, risk class, or output artifact. Do not force repository terminology onto non-code work.
13
13
 
14
+ ## Product And Integration Slices
15
+
16
+ When work describes user-facing behavior or integration behavior, prefer tracer-bullet work units. A tracer-bullet unit cuts through the smallest useful set of layers needed to make one behavior observable, demonstrable, and verifiable.
17
+
18
+ Use `slice_type: tracer_bullet` for product implementation that can expose behavior to a user, API caller, integration partner, workflow operator, evaluator, or verifier.
19
+
20
+ Horizontal units are valid only for prefactoring, migration safety, infrastructure, documentation, research, or a dependency that cannot yet be verified as behavior. Use one of these non-product slice types when a tracer bullet is not the right shape:
21
+
22
+ - `prefactor`
23
+ - `migration`
24
+ - `research`
25
+ - `document`
26
+ - `risk_boundary`
27
+
28
+ Every product or integration implementation unit must name:
29
+
30
+ ```yaml
31
+ slice_type: tracer_bullet | prefactor | migration | research | document | risk_boundary
32
+ observable_behavior:
33
+ expected_outcome:
34
+ demo_or_verification:
35
+ layers_touched:
36
+ horizontal_slice_justification:
37
+ ```
38
+
39
+ For `tracer_bullet`, `observable_behavior`, `expected_outcome`, and `demo_or_verification` are required and `layers_touched` should name the smallest layers needed for that behavior. For horizontal or non-product slice types, set `observable_behavior` and `expected_outcome` to `not_applicable` only when the unit names a concrete `horizontal_slice_justification`.
40
+
41
+ Reject vague horizontal feature phases such as "backend foundation", "frontend pass", "data model work", or "integration prep" unless the unit has a valid non-product `slice_type`, a concrete dependency it unlocks, and a verification method for that slice.
42
+
14
43
  ## Unit Quality Bar
15
44
 
16
45
  A good work unit has:
17
46
 
18
47
  - one objective
19
48
  - a stable unit ID suitable for dossier and worker naming
49
+ - a `slice_type` that matches the work shape
50
+ - observable behavior, expected outcome, and demo or verification for product or integration behavior
20
51
  - named dependencies
21
52
  - explicit in-scope and out-of-scope surfaces
22
53
  - known sources or source gaps
@@ -34,12 +65,13 @@ Work-unit drafts coarse done criteria only. Use `$acceptance-matrix` when those
34
65
 
35
66
  1. Restate the parent objective.
36
67
  2. Identify natural boundaries: user workflow, package, document, API contract, risk class, or dependency layer.
37
- 3. Split into units that can be verified independently.
38
- 4. Mark dependencies and ordering constraints.
39
- 5. Mark which units can run in parallel only when they do not mutate the same surfaces.
40
- 6. Define readiness and done criteria for each unit.
41
- 7. If sources are absent, create a discovery/intake unit before production work.
42
- 8. Identify the first unit that is safe to dossier.
68
+ 3. For product or integration behavior, split into tracer-bullet units before horizontal layers.
69
+ 4. Split remaining work into units that can be verified independently.
70
+ 5. Mark dependencies and ordering constraints.
71
+ 6. Mark which units can run in parallel only when they do not mutate the same surfaces.
72
+ 7. Define readiness and done criteria for each unit.
73
+ 8. If sources are absent, create a discovery/intake unit before production work.
74
+ 9. Identify the first unit that is safe to dossier.
43
75
 
44
76
  For over-broad one-pass requests, produce a sequencing recommendation and invoke or mirror `$loop-policy` fields for mode, parallel safety, approval gates, and repair limits.
45
77
 
@@ -69,6 +101,12 @@ units:
69
101
  worker_slug:
70
102
  title:
71
103
  objective:
104
+ slice_type:
105
+ observable_behavior:
106
+ expected_outcome:
107
+ demo_or_verification:
108
+ layers_touched:
109
+ horizontal_slice_justification:
72
110
  in_scope:
73
111
  out_of_scope:
74
112
  dependencies:
@@ -89,3 +127,5 @@ first_recommended_unit:
89
127
  ## Stop Gates
90
128
 
91
129
  Stop when a unit cannot name a done criterion, required source, or boundary. Ask for a decision or return a smaller discovery unit.
130
+
131
+ Stop when a product or integration implementation unit lacks `observable_behavior`, `expected_outcome`, or `demo_or_verification`. Return a tracer-bullet split instead of a horizontal phase unless the unit has a valid non-product `slice_type` and `horizontal_slice_justification`.
@@ -61,11 +61,12 @@ For documentation work, start with `DOCUMENTATION-BRIEF.md` unless the user prov
61
61
  ## Artifact Selection
62
62
 
63
63
  - `.workflow/WORKFLOW.md`: overall objective, policy, state, units, and next action.
64
+ - `.workflow/LEDGER.md`: compact lean-runner state for large bounded backlogs, with one row per work unit and targeted check evidence.
64
65
  - `.workflow/SOURCE-CORPUS.md`: source map, authority ranking, contradictions, gaps.
65
66
  - `.workflow/SPEC.md`: human-reviewable interpretation contract, requirement coverage, Q&A, and approval decision before final work units.
66
67
  - `.workflow/WORK-UNITS.md`: decomposition and sequencing.
67
68
  - `.workflow/DOSSIER.md`: delegation contract for one unit.
68
- - `.workflow/WORKER-MAP.md`: worker names, roles, transports, dossiers, dependencies, start conditions, report status, and supervisor checkpoints.
69
+ - `.workflow/WORKER-MAP.md`: worker names, roles, transports, native resource ids, dossiers, dependencies, start conditions, report status, close actions, close results, and supervisor checkpoints.
69
70
  - `.workflow/ACCEPTANCE-MATRIX.md`: verifiable done criteria.
70
71
  - `.workflow/VERIFICATION-REPORT.md`: evidence-backed PASS/FAIL/BLOCKED report.
71
72
  - `.workflow/REPAIR-TICKETS.md`: actionable repair tasks from verifier findings.