okstra 0.45.1 → 0.47.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/superpowers/plans/2026-06-04-adversarial-verification.md +570 -0
- package/docs/superpowers/plans/2026-06-04-stage-cohesion-planner.md +351 -0
- package/docs/superpowers/plans/2026-06-04-stage-run-batching.md +457 -0
- package/docs/superpowers/specs/2026-05-20-implementation-planning-multi-stage-design.md +2 -0
- package/docs/superpowers/specs/2026-06-04-adversarial-verification-design.md +176 -0
- package/docs/superpowers/specs/2026-06-04-stage-splitting-cost-aware-design.md +98 -0
- package/package.json +1 -1
- package/runtime/BUILD.json +2 -2
- package/runtime/agents/SKILL.md +2 -1
- package/runtime/prompts/launch.template.md +1 -0
- package/runtime/prompts/profiles/_common-contract.md +1 -1
- package/runtime/prompts/profiles/_implementation-deliverable.md +4 -3
- package/runtime/prompts/profiles/_implementation-executor.md +10 -12
- package/runtime/prompts/profiles/error-analysis.md +2 -0
- package/runtime/prompts/profiles/implementation-planning.md +3 -2
- package/runtime/prompts/profiles/implementation.md +1 -0
- package/runtime/prompts/profiles/requirements-discovery.md +2 -0
- package/runtime/python/okstra_ctl/render.py +13 -5
- package/runtime/python/okstra_ctl/run.py +69 -42
- package/runtime/skills/okstra-convergence/SKILL.md +114 -5
- package/runtime/validators/validate-implementation-plan-stages.py +61 -13
|
@@ -46,6 +46,7 @@ Configure this in the `convergence` block of `task-manifest.json`. If the block
|
|
|
46
46
|
| `enabled` | `true` | If `false`, skip the convergence loop and use the existing consensus/divergence method |
|
|
47
47
|
| `maxRounds` | phase-aware: `1` for `requirements-discovery`, `2` otherwise (range 1–3) | Maximum number of re-verification rounds. Discovery's routing/missing-input outputs gain little from a second round; other phases (especially `error-analysis`) keep `2`. Lead resolves the effective value when the manifest omits the key and records it in `config.maxRounds` of the convergence state artifact. |
|
|
48
48
|
| `verificationMode` | `"lightweight"` | `"lightweight"` or `"full-reanalysis"` |
|
|
49
|
+
| `adversarial` | phase-aware: `true` for `requirements-discovery` / `error-analysis`, `false` otherwise | When `true`, Phase 5.5 runs in **adversarial mode** (see §"Adversarial Verification Mode"): verifiers actively try to refute each finding, the burden of proof sits on the claim, and `verificationMode` is forced to `"full-reanalysis"` scoped to the finding's cited evidence. Resolved by `scripts/okstra_ctl/render.py` `_build_convergence_block` and recorded in `config.adversarial` of the convergence state artifact. |
|
|
49
50
|
|
|
50
51
|
**Auto-disable rule (BLOCKING).** Convergence requires ≥2 analyser workers to produce a meaningful consensus tally. When the active profile's `Required workers:` block (see `prompts/profiles/*.md`) resolves to fewer than 2 analyser workers — e.g. `release-handoff` (zero analyser workers, lead-only) — the lead MUST treat `convergence.enabled` as `false` for that run regardless of manifest configuration, skip Phases 5.5 and the plan-body verification round, and record `finalState: "converged"` with `totalRounds: 0` and an explanatory note in `config` (e.g. `"autoDisabled": "fewer-than-two-analysers"`). The plan-body round inherits the same rule via its `gating=false` advisory path.
|
|
51
52
|
|
|
@@ -192,6 +193,62 @@ Use the findings as a guide, but reanalyze the original code/data yourself.
|
|
|
192
193
|
Advantages: High accuracy
|
|
193
194
|
Disadvantages: 2–3 times the cost, increased time
|
|
194
195
|
|
|
196
|
+
## Adversarial Verification Mode
|
|
197
|
+
|
|
198
|
+
Active only when `config.adversarial == true` (default for `requirements-discovery` and `error-analysis`; see §"Configuration"). When `false`, every rule in this section is inert and the collaborative behaviour documented elsewhere in this skill applies unchanged.
|
|
199
|
+
|
|
200
|
+
In adversarial mode the verifier's job inverts: instead of confirming a peer's finding, the verifier **tries to break it**, and the burden of proof sits on the claim — a finding survives only if refutation attempts fail.
|
|
201
|
+
|
|
202
|
+
### Scoped full-reanalysis (BLOCKING)
|
|
203
|
+
|
|
204
|
+
Adversarial mode forces `verificationMode = "full-reanalysis"`, but the re-analysis is **scoped to the evidence the finding under attack cites** (the file paths / line ranges / log lines in its `originEvidence`), plus the immediately surrounding context. The verifier MUST NOT re-read the whole task brief, instruction-set, or `final-report-template.md`. This keeps the documented "single largest avoidable cost in requirements-discovery and error-analysis" (see §"Reverify prompt: required-reading suppression") bounded while making the refutation real rather than a text-only argument.
|
|
205
|
+
|
|
206
|
+
### Adversarial verdict semantics
|
|
207
|
+
|
|
208
|
+
The persisted `verdict` enum is unchanged (`agree | disagree | supplement | verification-error`). The prompt-facing labels are adversarial and map down on persistence:
|
|
209
|
+
|
|
210
|
+
| Prompt label | Persisted `verdict` | Meaning |
|
|
211
|
+
|---|---|---|
|
|
212
|
+
| SURVIVES | `agree` | Actively tried to refute and failed — the claim withstood the attack. |
|
|
213
|
+
| SURVIVES-WITH-CAVEAT | `supplement` | Holds, but a scope limit / extra condition / precondition was found. |
|
|
214
|
+
| REFUTED | `disagree` | The claim was broken (or failed to prove itself). MUST carry a `disagreeBasis`. |
|
|
215
|
+
|
|
216
|
+
Each `disagree` vote records a new field `disagreeBasis`:
|
|
217
|
+
|
|
218
|
+
| `disagreeBasis` | Meaning |
|
|
219
|
+
|---|---|
|
|
220
|
+
| `counter-evidence` | The verifier cited contradicting evidence (`file:line` / log line) in `explanation`. A **hard refute**. |
|
|
221
|
+
| `burden-not-met` | The verifier re-inspected the cited evidence and could neither confirm nor refute → the claim failed to prove itself ("when uncertain, lean to rejection"). |
|
|
222
|
+
|
|
223
|
+
A `disagree` with `disagreeBasis == null` is a contract violation in adversarial mode — every refutation must state which of the two grounds it rests on. Bare "I disagree" without re-inspection is not allowed.
|
|
224
|
+
|
|
225
|
+
### Adversarial classification (replaces the §"Convergence Algorithm" per-round classifier when `adversarial == true`)
|
|
226
|
+
|
|
227
|
+
`verification-error` votes are excluded from numerator and denominator exactly as in the collaborative classifier. For each finding `F` in the queue at a round:
|
|
228
|
+
|
|
229
|
+
```text
|
|
230
|
+
disagrees = [v for v in non-error votes if v.verdict == "disagree"]
|
|
231
|
+
hard_refutes = [v for v in disagrees if v.disagreeBasis == "counter-evidence"]
|
|
232
|
+
all_others_disagree = (every non-discoverer non-error vote is "disagree")
|
|
233
|
+
|
|
234
|
+
IF len(disagrees) == 0:
|
|
235
|
+
resolve F as "full-consensus" (or "partial-consensus" if any SUPPLEMENT/caveat)
|
|
236
|
+
ELIF all_others_disagree:
|
|
237
|
+
resolve F as "worker-unique" # only the discoverer still holds it
|
|
238
|
+
ELIF len(hard_refutes) >= 1:
|
|
239
|
+
# an evidence-backed refute exists and the roster is split → the claim is disputed
|
|
240
|
+
carry F forward; at the LAST executed round classify it "contested"
|
|
241
|
+
ELIF burden-not-met disagrees are a majority of non-error votes (per the Majority definition in the Convergence Algorithm section):
|
|
242
|
+
carry F forward; at the LAST executed round classify it "contested"
|
|
243
|
+
ELSE:
|
|
244
|
+
# a lone weak (burden-not-met) doubt against an otherwise-surviving claim
|
|
245
|
+
resolve F as "partial-consensus"
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
`contested` remains a **final classification only** (per §"Scope and Terminology"): a disputed finding is carried forward through intermediate rounds and labelled `contested` only at the last executed round. For `requirements-discovery` (`effectiveMaxRounds = 1`) the single round IS the last round, so a split-with-hard-refute finding is labelled `contested` in that one round. The final-classifier block of §"Convergence Algorithm" is unchanged; this section only changes how each round's verdicts resolve into queue actions.
|
|
249
|
+
|
|
250
|
+
Design intent: one `counter-evidence` refute is enough to deny a claim consensus (it cannot rise above `contested` no matter how many others AGREE), while a single `burden-not-met` doubt does not by itself sink an otherwise-surviving claim — only a majority of burden-not-met doubts does. When every non-discoverer refutes (all_others_disagree), the finding is worker-unique regardless of whether those refutes were counter-evidence or burden-not-met — only the discoverer still holds it. A SUPPLEMENT/caveat with zero disagrees lands partial-consensus rather than full-consensus, because a caveat means the claim does not pass cleanly (this differs from the collaborative classifier, where SUPPLEMENT counts as full agreement).
|
|
251
|
+
|
|
195
252
|
## Re-verification Agent Dispatch
|
|
196
253
|
|
|
197
254
|
### Sponsorship Optimization
|
|
@@ -282,6 +339,55 @@ For each finding, respond as:
|
|
|
282
339
|
**Verdict**: ...
|
|
283
340
|
```
|
|
284
341
|
|
|
342
|
+
### Adversarial Re-verification Prompt
|
|
343
|
+
|
|
344
|
+
Used instead of the lightweight/full-reanalysis prompt when `config.adversarial == true`. The required anchor headers (§"Required reverify-prompt anchor headers") are identical. The `[Required reading]` clause is suppressed; only the cited-evidence paths of the items under attack are injected (see §"Adversarial Verification Mode" → Scoped full-reanalysis).
|
|
345
|
+
|
|
346
|
+
```
|
|
347
|
+
You are <worker-role> performing ADVERSARIAL re-verification for <task-key> (round <N>).
|
|
348
|
+
|
|
349
|
+
## Instructions
|
|
350
|
+
|
|
351
|
+
Your job is to BREAK each finding below, not to confirm it. For EACH finding,
|
|
352
|
+
open the cited evidence directly and actively search for evidence that the claim
|
|
353
|
+
is wrong, overstated, or unproven. Then respond with exactly one verdict:
|
|
354
|
+
|
|
355
|
+
- **REFUTED**: You broke the claim. State the basis:
|
|
356
|
+
- counter-evidence — you found contradicting evidence (give file:line or log line), OR
|
|
357
|
+
- burden-not-met — you re-inspected the cited evidence and could neither confirm
|
|
358
|
+
nor refute it (the claim has not proven itself).
|
|
359
|
+
- **SURVIVES**: You actively tried to refute it and failed — the claim withstood the attack.
|
|
360
|
+
- **SURVIVES-WITH-CAVEAT**: It holds, but a scope limit / extra condition / missing
|
|
361
|
+
precondition exists (state it).
|
|
362
|
+
|
|
363
|
+
The burden of proof is on the claim. If after inspecting the cited evidence you remain
|
|
364
|
+
uncertain, your verdict is REFUTED with basis = burden-not-met.
|
|
365
|
+
|
|
366
|
+
Inspect ONLY the evidence each finding cites and its immediate surroundings. Do NOT
|
|
367
|
+
re-read the task brief, instruction-set, or report template.
|
|
368
|
+
|
|
369
|
+
## Findings to verify
|
|
370
|
+
|
|
371
|
+
### F-001: <one-line summary>
|
|
372
|
+
**Origin**: <worker role>
|
|
373
|
+
**Cited evidence**: <file paths, line numbers, log lines from origin worker>
|
|
374
|
+
|
|
375
|
+
### F-002: <one-line summary>
|
|
376
|
+
...
|
|
377
|
+
|
|
378
|
+
## Response format
|
|
379
|
+
|
|
380
|
+
### F-001
|
|
381
|
+
**Verdict**: REFUTED | SURVIVES | SURVIVES-WITH-CAVEAT
|
|
382
|
+
**Basis** (only if REFUTED): counter-evidence | burden-not-met
|
|
383
|
+
**Explanation**: <2-3 sentences; for counter-evidence include the file:line you found>
|
|
384
|
+
|
|
385
|
+
### F-002
|
|
386
|
+
...
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
When persisting votes, map SURVIVES→`agree`, SURVIVES-WITH-CAVEAT→`supplement`, REFUTED→`disagree`, and copy the stated Basis into `votes.<worker>.disagreeBasis` (null for non-REFUTED verdicts).
|
|
390
|
+
|
|
285
391
|
### Full Re-analysis Re-verification Prompt
|
|
286
392
|
|
|
287
393
|
```
|
|
@@ -324,10 +430,11 @@ Save it to `runs/<task-type>/state/convergence-<task-type>-<seq>.json`.
|
|
|
324
430
|
|
|
325
431
|
```json
|
|
326
432
|
{
|
|
327
|
-
"schemaVersion": "1.
|
|
433
|
+
"schemaVersion": "1.2",
|
|
328
434
|
"taskKey": "<task-key>",
|
|
329
435
|
"config": {
|
|
330
436
|
"enabled": true,
|
|
437
|
+
"adversarial": false,
|
|
331
438
|
"maxRounds": 2,
|
|
332
439
|
"effectiveMaxRounds": 2,
|
|
333
440
|
"verificationMode": "lightweight"
|
|
@@ -345,7 +452,7 @@ Save it to `runs/<task-type>/state/convergence-<task-type>-<seq>.json`.
|
|
|
345
452
|
{
|
|
346
453
|
"round": 1,
|
|
347
454
|
"votes": {
|
|
348
|
-
"codex-worker": { "verdict": "agree", "explanation": "<brief>" },
|
|
455
|
+
"codex-worker": { "verdict": "agree", "disagreeBasis": null, "explanation": "<brief>" },
|
|
349
456
|
"gemini-worker": { "verdict": "supplement", "explanation": "<brief>" }
|
|
350
457
|
}
|
|
351
458
|
}
|
|
@@ -385,11 +492,13 @@ Save it to `runs/<task-type>/state/convergence-<task-type>-<seq>.json`.
|
|
|
385
492
|
|
|
386
493
|
Schema rules:
|
|
387
494
|
|
|
388
|
-
- `schemaVersion`: literal string `"1.
|
|
495
|
+
- `schemaVersion`: literal string `"1.2"` for all new runs — both adversarial and collaborative. v1.2 adds `config.adversarial` and `votes.<worker>.disagreeBasis`, written as `false` / `null` respectively on collaborative runs. Readers MUST accept `"1.0"` / `"1.1"` / `"1.2"` for historical artifacts and treat any missing field as `null`.
|
|
496
|
+
- `config.adversarial`: boolean. `true` when this run used adversarial verification (default for `requirements-discovery` / `error-analysis`). When `true`, `config.verificationMode` is `"full-reanalysis"` (scoped) and every `disagree` vote carries a non-null `disagreeBasis`.
|
|
389
497
|
- `config.effectiveMaxRounds`: the integer the lead actually used after resolving the phase-aware default (`1` for `requirements-discovery`, `2` otherwise). MUST equal `config.maxRounds` when the manifest explicitly set it.
|
|
390
498
|
- `findings[].ticketIds`: array of ticket keys from Phase 4 grouping (parsed per the Round 0 step 5 rule). MAY be empty when the discovering worker tagged the finding `unknown`.
|
|
391
499
|
- `findings[].rounds[].votes.<worker>.verdict`: enum, one of `agree | disagree | supplement | verification-error`. Lower-case tokens; map upper-case AGREE/DISAGREE/SUPPLEMENT verdicts emitted by workers to their lower-case form before persisting. `verification-error` is reserved for terminal non-result dispatches (§"Worker failure handling in reverify").
|
|
392
|
-
- `findings[].
|
|
500
|
+
- `findings[].rounds[].votes.<worker>.disagreeBasis`: enum `counter-evidence | burden-not-met | null`. Non-null only when `verdict == "disagree"` AND `config.adversarial == true`; `null` (or absent, treated as null) otherwise. See §"Adversarial Verification Mode".
|
|
501
|
+
- `findings[].classification`: enum, one of `full-consensus | partial-consensus | worker-unique | contested`. No other value is permitted.
|
|
393
502
|
- `roundHistory[].inputQueueSize`: queue size at the start of this round.
|
|
394
503
|
- `roundHistory[].resolvedCount`: number of findings that exited the queue this round (sum of full+partial+worker-unique classifications produced this round).
|
|
395
504
|
- `roundHistory[].carriedForwardCount`: queue size at the END of this round — the single definition. In-round insertions into the queue are forbidden, so this always equals `inputQueueSize - resolvedCount`. The pseudocode's per-item `carriedForwardCount += 1` accumulator is a counting convenience that lands on the same value; persist the post-round queue length, not the loop accumulator, if the two ever diverge.
|
|
@@ -397,7 +506,7 @@ Schema rules:
|
|
|
397
506
|
- `roundHistory[].skippedWorkers[]`: per-worker `{worker, reason}` for workers with no items to verify OR with a non-result dispatch.
|
|
398
507
|
- `round2SkippedReason`: literal enum `queue-empty | max-rounds-1 | all-reverify-non-result | not-skipped`. Always present. Use `"not-skipped"` when Round 2 actually ran. Use `"max-rounds-1"` when `effectiveMaxRounds == 1` (Round 2 was never attempted). Use `"queue-empty"` when Round 1 fully drained the queue. Use `"all-reverify-non-result"` when all Round 1 dispatches terminated as non-result.
|
|
399
508
|
- `finalClassificationCounts`: post-loop counts. Required field with keys `fullConsensus`, `partialConsensus`, `contested`, `workerUnique`.
|
|
400
|
-
- `finalState ∈ {converged, max-rounds-reached, aborted-non-result}`. Assigned by the lead at WHILE-loop exit: `converged` when the queue is empty at the end of any round; `max-rounds-reached` when the loop exits because `roundIndex == effectiveMaxRounds` with the queue still non-empty; `aborted-non-result` when the loop exits via the Worker-failure BREAK (
|
|
509
|
+
- `finalState ∈ {converged, max-rounds-reached, aborted-non-result}`. Assigned by the lead at WHILE-loop exit: `converged` when the queue is empty at the end of any round; `max-rounds-reached` when the loop exits because `roundIndex == effectiveMaxRounds` with the queue still non-empty; `aborted-non-result` when the loop exits via the Worker-failure BREAK (per the "Worker failure handling in reverify" section, rule 4). `aborted-non-result` is the new v1.1 value.
|
|
401
510
|
- `totalRounds`: count of rounds actually executed (not `effectiveMaxRounds`). May be `0` when Round 0 produced no queue items (all findings reached consensus during grouping).
|
|
402
511
|
|
|
403
512
|
## Output
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
"""S1–
|
|
2
|
+
"""S1–S9 checks for the Stage Map structure of an approved
|
|
3
3
|
implementation-planning final-report.md. Run from prepare_task_bundle
|
|
4
4
|
of `implementation` task or standalone."""
|
|
5
5
|
|
|
@@ -23,6 +23,11 @@ REQUIRED_SUBSECTIONS = (
|
|
|
23
23
|
"Stage Validation",
|
|
24
24
|
)
|
|
25
25
|
|
|
26
|
+
EXIT_CONTRACT_HEADING = re.compile(r"^###\s+Stage Exit Contract\b", re.M)
|
|
27
|
+
# best-effort path token: only slash-containing paths count as files, so
|
|
28
|
+
# endpoints (`/bar`), env vars (`BAZ_MODE`), and extensionless tokens are skipped.
|
|
29
|
+
PATH_TOKEN = re.compile(r"(?:[\w.@-]+/)+[\w.@-]+")
|
|
30
|
+
|
|
26
31
|
|
|
27
32
|
@dataclass
|
|
28
33
|
class StageMeta:
|
|
@@ -35,7 +40,7 @@ class StageMeta:
|
|
|
35
40
|
|
|
36
41
|
@dataclass
|
|
37
42
|
class ValidationError:
|
|
38
|
-
code: str # S1..
|
|
43
|
+
code: str # S1..S9
|
|
39
44
|
stage: int # 0 = global
|
|
40
45
|
message: str
|
|
41
46
|
|
|
@@ -85,6 +90,20 @@ def _parse_stage_map(text: str) -> Tuple[List[StageMeta], List[ValidationError]]
|
|
|
85
90
|
return rows, errors
|
|
86
91
|
|
|
87
92
|
|
|
93
|
+
def _slice_stage_section(text: str, stage_number: int) -> str:
|
|
94
|
+
"""Return the body of `## 4.5.<n> Stage <n>:` up to the next stage heading."""
|
|
95
|
+
start_m = re.search(
|
|
96
|
+
rf"^##\s+4\.5\.{stage_number}\s+Stage\s+{stage_number}\s*:", text, re.M
|
|
97
|
+
)
|
|
98
|
+
if not start_m:
|
|
99
|
+
return ""
|
|
100
|
+
start = start_m.end()
|
|
101
|
+
nxt = re.search(
|
|
102
|
+
rf"^##\s+4\.5\.{stage_number + 1}\s+Stage\s+", text[start:], re.M
|
|
103
|
+
)
|
|
104
|
+
return text[start: start + nxt.start()] if nxt else text[start:]
|
|
105
|
+
|
|
106
|
+
|
|
88
107
|
def _count_effective_steps(section: str) -> int:
|
|
89
108
|
m = re.search(r"^###\s+Stepwise Execution Order\b", section, re.M)
|
|
90
109
|
if not m:
|
|
@@ -114,19 +133,13 @@ def _count_effective_steps(section: str) -> int:
|
|
|
114
133
|
def _check_each_stage_section(text: str, stages: List[StageMeta]) -> List[ValidationError]:
|
|
115
134
|
errs: List[ValidationError] = []
|
|
116
135
|
for s in stages:
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
136
|
+
if not re.search(
|
|
137
|
+
rf"^##\s+4\.5\.{s.stage_number}\s+Stage\s+{s.stage_number}\s*:", text, re.M
|
|
138
|
+
):
|
|
120
139
|
errs.append(ValidationError("S3", s.stage_number,
|
|
121
140
|
f"stage section '## 4.5.{s.stage_number} Stage {s.stage_number}:' missing"))
|
|
122
141
|
continue
|
|
123
|
-
|
|
124
|
-
start = start_m.end()
|
|
125
|
-
nxt = re.search(
|
|
126
|
-
rf"^##\s+4\.5\.{s.stage_number + 1}\s+Stage\s+",
|
|
127
|
-
text[start:], re.M,
|
|
128
|
-
)
|
|
129
|
-
section = text[start: start + nxt.start()] if nxt else text[start:]
|
|
142
|
+
section = _slice_stage_section(text, s.stage_number)
|
|
130
143
|
|
|
131
144
|
for sub in REQUIRED_SUBSECTIONS:
|
|
132
145
|
if not re.search(rf"^###\s+{re.escape(sub)}\b", section, re.M):
|
|
@@ -181,8 +194,42 @@ def _check_depends_on(stages: List[StageMeta]) -> List[ValidationError]:
|
|
|
181
194
|
return errs
|
|
182
195
|
|
|
183
196
|
|
|
197
|
+
def _extract_exit_contract_files(section: str) -> set:
|
|
198
|
+
m = EXIT_CONTRACT_HEADING.search(section)
|
|
199
|
+
if not m:
|
|
200
|
+
return set()
|
|
201
|
+
body = section[m.end():]
|
|
202
|
+
nxt = re.search(r"^###\s+\w", body, re.M)
|
|
203
|
+
if nxt:
|
|
204
|
+
body = body[: nxt.start()]
|
|
205
|
+
return set(PATH_TOKEN.findall(body))
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _check_parallel_safety(text: str, stages: List[StageMeta]) -> List[ValidationError]:
|
|
209
|
+
"""S9: two `depends-on (none)` stages must not predict the same file —
|
|
210
|
+
otherwise two parallel implementation runs would edit it concurrently."""
|
|
211
|
+
files = {
|
|
212
|
+
s.stage_number: _extract_exit_contract_files(
|
|
213
|
+
_slice_stage_section(text, s.stage_number)
|
|
214
|
+
)
|
|
215
|
+
for s in stages
|
|
216
|
+
if not s.depends_on
|
|
217
|
+
}
|
|
218
|
+
errs: List[ValidationError] = []
|
|
219
|
+
nums = sorted(files)
|
|
220
|
+
for i in range(len(nums)):
|
|
221
|
+
for j in range(i + 1, len(nums)):
|
|
222
|
+
a, b = nums[i], nums[j]
|
|
223
|
+
shared = files[a] & files[b]
|
|
224
|
+
if shared:
|
|
225
|
+
errs.append(ValidationError("S9", 0,
|
|
226
|
+
f"parallel stages {a} and {b} share predicted file(s): "
|
|
227
|
+
f"{', '.join(sorted(shared))}"))
|
|
228
|
+
return errs
|
|
229
|
+
|
|
230
|
+
|
|
184
231
|
def collect_validation_errors(text: str) -> List[ValidationError]:
|
|
185
|
-
"""All S1–
|
|
232
|
+
"""All S1–S9 checks against the report text; empty list means valid.
|
|
186
233
|
|
|
187
234
|
S1 (missing `## 4.5 Stage Map` heading) makes the rest unparseable, so it
|
|
188
235
|
short-circuits. Shared by `main()` (CLI / implementation entry) and the
|
|
@@ -198,6 +245,7 @@ def collect_validation_errors(text: str) -> List[ValidationError]:
|
|
|
198
245
|
if stages:
|
|
199
246
|
errors.extend(_check_each_stage_section(text, stages))
|
|
200
247
|
errors.extend(_check_depends_on(stages))
|
|
248
|
+
errors.extend(_check_parallel_safety(text, stages))
|
|
201
249
|
return errors
|
|
202
250
|
|
|
203
251
|
|