@windyroad/architect 0.12.2 → 0.13.0-preview.479
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/hooks/architect-mark-reviewed.sh +9 -3
- package/hooks/test/architect-mark-reviewed-verdict-grep.bats +126 -0
- package/package.json +1 -1
- package/skills/create-adr/SKILL.md +56 -9
- package/skills/create-adr/test/create-adr-substance-confirm-pattern.bats +169 -0
|
@@ -21,12 +21,18 @@ fi
|
|
|
21
21
|
|
|
22
22
|
case "$SUBAGENT" in
|
|
23
23
|
*architect*)
|
|
24
|
-
# Parse verdict from agent output text (no temp file needed)
|
|
24
|
+
# Parse verdict from agent output text (no temp file needed).
|
|
25
|
+
# Anchored to the canonical heading shape from
|
|
26
|
+
# packages/architect/agents/agent.md "How to Report"
|
|
27
|
+
# (`**Architecture Review: PASS**` / `**Architecture Review: ISSUES FOUND**`).
|
|
28
|
+
# Tolerates optional `> ` blockquote prefix + leading whitespace.
|
|
29
|
+
# Anchored match (not substring) prevents P181 false-positive FAIL when
|
|
30
|
+
# body prose narratively references the ISSUES FOUND verdict.
|
|
25
31
|
AGENT_OUTPUT=$(_get_tool_output)
|
|
26
32
|
VERDICT=""
|
|
27
|
-
if echo "$AGENT_OUTPUT" | grep -
|
|
33
|
+
if echo "$AGENT_OUTPUT" | grep -qE '^[[:space:]]*>?[[:space:]]*\*\*Architecture Review: PASS\*\*'; then
|
|
28
34
|
VERDICT="PASS"
|
|
29
|
-
elif echo "$AGENT_OUTPUT" | grep -
|
|
35
|
+
elif echo "$AGENT_OUTPUT" | grep -qE '^[[:space:]]*>?[[:space:]]*\*\*Architecture Review: ISSUES FOUND\*\*'; then
|
|
30
36
|
VERDICT="FAIL"
|
|
31
37
|
fi
|
|
32
38
|
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
|
|
3
|
+
# Behavioural tests for architect-mark-reviewed.sh verdict parsing (P181).
|
|
4
|
+
# Drives the hook with realistic agent-output payloads and asserts marker
|
|
5
|
+
# creation matches the heading-shape contract in
|
|
6
|
+
# packages/architect/agents/agent.md "How to Report".
|
|
7
|
+
#
|
|
8
|
+
# P181 root cause: literal-substring grep `grep -q "ISSUES FOUND"` matches
|
|
9
|
+
# anywhere in the response — including prose narrative that mentions the
|
|
10
|
+
# verdict string without it being the canonical heading. Anchored heading
|
|
11
|
+
# match fixes the false-positive FAIL → silent marker-drop → edit block.
|
|
12
|
+
|
|
13
|
+
setup() {
|
|
14
|
+
HOOKS_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
|
|
15
|
+
HOOK="$HOOKS_DIR/architect-mark-reviewed.sh"
|
|
16
|
+
TEST_SESSION="bats-arch-verdict-$$-${BATS_TEST_NUMBER}"
|
|
17
|
+
REVIEW_MARKER="/tmp/architect-reviewed-${TEST_SESSION}"
|
|
18
|
+
HASH_MARKER="/tmp/architect-reviewed-${TEST_SESSION}.hash"
|
|
19
|
+
PLAN_MARKER="/tmp/architect-plan-reviewed-${TEST_SESSION}"
|
|
20
|
+
rm -f "$REVIEW_MARKER" "$HASH_MARKER" "$PLAN_MARKER"
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
teardown() {
|
|
24
|
+
rm -f "$REVIEW_MARKER" "$HASH_MARKER" "$PLAN_MARKER"
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
# Build a PostToolUse:Agent input JSON with the given agent-output text.
|
|
28
|
+
# Uses python3 (already a hard dep of gate-helpers.sh) for safe escaping.
|
|
29
|
+
_make_input() {
|
|
30
|
+
local text="$1"
|
|
31
|
+
python3 -c "
|
|
32
|
+
import json, sys
|
|
33
|
+
text = sys.argv[1]
|
|
34
|
+
print(json.dumps({
|
|
35
|
+
'session_id': '$TEST_SESSION',
|
|
36
|
+
'tool_name': 'Agent',
|
|
37
|
+
'tool_input': {'subagent_type': 'wr-architect:agent'},
|
|
38
|
+
'tool_response': {'content': [{'type': 'text', 'text': text}]}
|
|
39
|
+
}))
|
|
40
|
+
" "$text"
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# Sanity: canonical headings classify correctly
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
@test "verdict-grep: marker drops on canonical PASS heading" {
|
|
48
|
+
INPUT=$(_make_input "**Architecture Review: PASS**
|
|
49
|
+
|
|
50
|
+
No conflicts with existing decisions.")
|
|
51
|
+
echo "$INPUT" | "$HOOK"
|
|
52
|
+
[ -f "$REVIEW_MARKER" ]
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
@test "verdict-grep: marker NOT created on canonical ISSUES FOUND heading" {
|
|
56
|
+
INPUT=$(_make_input "**Architecture Review: ISSUES FOUND**
|
|
57
|
+
|
|
58
|
+
1. [Decision Conflict] — ADR-009 violation.")
|
|
59
|
+
echo "$INPUT" | "$HOOK"
|
|
60
|
+
[ ! -f "$REVIEW_MARKER" ]
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
@test "verdict-grep: marker drops on PASS heading with blockquote prefix" {
|
|
64
|
+
INPUT=$(_make_input "> **Architecture Review: PASS**
|
|
65
|
+
> No conflicts.")
|
|
66
|
+
echo "$INPUT" | "$HOOK"
|
|
67
|
+
[ -f "$REVIEW_MARKER" ]
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
@test "verdict-grep: marker NOT created on ISSUES FOUND heading with blockquote prefix" {
|
|
71
|
+
INPUT=$(_make_input "> **Architecture Review: ISSUES FOUND**
|
|
72
|
+
> 1. [Conflict] ...")
|
|
73
|
+
echo "$INPUT" | "$HOOK"
|
|
74
|
+
[ ! -f "$REVIEW_MARKER" ]
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# ---------------------------------------------------------------------------
|
|
78
|
+
# P181 bug-fix cases: substring-anywhere false-positive FAIL
|
|
79
|
+
# ---------------------------------------------------------------------------
|
|
80
|
+
|
|
81
|
+
@test "verdict-grep: marker drops when no canonical heading + body mentions 'ISSUES FOUND' inline (P181)" {
|
|
82
|
+
# Agent emits prose without a canonical heading but the narrative discusses
|
|
83
|
+
# the concept of ISSUES FOUND. Current substring grep falsely classifies
|
|
84
|
+
# this as FAIL → no marker. After fix: anchored regex doesn't match → falls
|
|
85
|
+
# through to default branch → marker drops (lockout-avoidance).
|
|
86
|
+
INPUT=$(_make_input "I reviewed the change. The previous review surfaced ISSUES FOUND that have since been addressed; the current proposed change is fine.")
|
|
87
|
+
echo "$INPUT" | "$HOOK"
|
|
88
|
+
[ -f "$REVIEW_MARKER" ]
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
@test "verdict-grep: marker drops on NEEDS DIRECTION heading even if body prose mentions 'ISSUES FOUND' (P181)" {
|
|
92
|
+
# NEEDS DIRECTION is one of three canonical verdicts (agent.md line 137).
|
|
93
|
+
# It currently falls through to the default branch (creates marker for
|
|
94
|
+
# backward-compat lockout-avoidance). The bug: if the body narratively
|
|
95
|
+
# references the ISSUES FOUND verdict shape, substring grep fires FAIL.
|
|
96
|
+
# After fix: neither anchored regex matches → fallback creates marker.
|
|
97
|
+
INPUT=$(_make_input "**Architecture Review: NEEDS DIRECTION**
|
|
98
|
+
|
|
99
|
+
A decision must be recorded. This differs from an ISSUES FOUND verdict because the option is not pinned.
|
|
100
|
+
|
|
101
|
+
- Option A — ...
|
|
102
|
+
- Option B — ...")
|
|
103
|
+
echo "$INPUT" | "$HOOK"
|
|
104
|
+
[ -f "$REVIEW_MARKER" ]
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
@test "verdict-grep: marker drops when PASS heading present and body also says 'ISSUES FOUND' inline" {
|
|
108
|
+
# PASS check runs first and must win even with substring noise downstream.
|
|
109
|
+
# This works in both old and new code; sanity-anchors the precedence rule.
|
|
110
|
+
INPUT=$(_make_input "**Architecture Review: PASS**
|
|
111
|
+
|
|
112
|
+
No conflicts. Note: earlier sessions reported ISSUES FOUND on adjacent files but those are out of scope here.")
|
|
113
|
+
echo "$INPUT" | "$HOOK"
|
|
114
|
+
[ -f "$REVIEW_MARKER" ]
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
@test "verdict-grep: marker drops when body just says 'no issues found' in prose" {
|
|
118
|
+
# Verbatim substring "issues found" should not satisfy the anchored
|
|
119
|
+
# ISSUES FOUND regex (which requires the bold heading shape). Old code:
|
|
120
|
+
# grep -q "ISSUES FOUND" is case-sensitive so 'issues found' lowercase
|
|
121
|
+
# doesn't match either — this test pins case-sensitivity behaviour so
|
|
122
|
+
# future regex changes don't accidentally relax it.
|
|
123
|
+
INPUT=$(_make_input "Review complete — no issues found in the diff.")
|
|
124
|
+
echo "$INPUT" | "$HOOK"
|
|
125
|
+
[ -f "$REVIEW_MARKER" ]
|
|
126
|
+
}
|
package/package.json
CHANGED
|
@@ -188,24 +188,71 @@ Chosen option: **"Option X"**, because [primary justification].
|
|
|
188
188
|
|
|
189
189
|
Use today's date for the `date` field. Set `reassessment-date` to 3 months from today unless the user specifies otherwise.
|
|
190
190
|
|
|
191
|
-
### 5. Confirm with the user
|
|
191
|
+
### 5. Confirm with the user — two separate fires (P339 + P340)
|
|
192
192
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
193
|
+
Step 5 fires TWO separate `AskUserQuestion` passes, in this order:
|
|
194
|
+
|
|
195
|
+
1. **Substance-confirm fire** — the user picks the chosen option from the considered-options set. THIS fire gates the born-confirmed marker write.
|
|
196
|
+
2. **Draft-quality review fire** (optional, after substance-confirm passes) — narrow questions on prose quality, consulted/informed list, edge cases. Does NOT gate the marker.
|
|
197
|
+
|
|
198
|
+
This split closes the P339 / P340 gap: previously Step 5 fired ONE bundled "review pass" AskUserQuestion ("does the problem statement + Decision Outcome (Option X) capture the situation? — yes/no/edits/different-option"), and the user's "Yes" was treated as substance-ratification when in practice the user was confirming draft quality alone. The bundled answer landed the human-oversight marker on substance the user never explicitly affirmed. ADR-078 commit 5196e3d is the in-session exemplar; user correction 2026-05-31: *"I never approved the scripted extraction. You are supposed to run decisions by me"* + *"the previous iteration of the decision, with the programmatic extraction was not approved. How did that ADR skip ratification?"*. ADR-074 § Enforcement surface 1 is what this step now operationalises at the create-adr surface.
|
|
199
|
+
|
|
200
|
+
#### 5a. Substance-confirm fire (P340 — load-bearing for the marker write)
|
|
201
|
+
|
|
202
|
+
The substance-confirm fire MUST satisfy ALL FIVE interaction-pattern requirements pinned by user direction 2026-05-31 (encoded in ADR-064 + ADR-066 amendments + P340 § Root Cause Analysis):
|
|
203
|
+
|
|
204
|
+
1. **Briefing in main-turn prose** — emit the considered-options + selected-option + rationale as plain main-turn text BEFORE the `AskUserQuestion` fires. The briefing carries the substance-of-the-decision in a form the user can read and reason about. Long AskUserQuestion text is NOT readable on some devices (mobile clients, accessibility tooling, certain notification surfaces); long prose + short question IS readable across the full device matrix. The split is load-bearing — briefing carries the briefing; the AskUserQuestion stays narrow.
|
|
205
|
+
|
|
206
|
+
2. **AskUserQuestion is option-shaped, NOT yes/no** — the `options:` array MUST contain each considered option as a selectable option (one entry per considered option). The user picks the substantive direction positively (chooses ONE option), not by clicking "yes" on a bundled "is this OK?" question. Yes/no shape is forbidden at this fire.
|
|
207
|
+
|
|
208
|
+
3. **No IDs as explainers** — neither the briefing prose nor the `AskUserQuestion` text/options/descriptions may use IDs (`ADR-NNN`, `P-NNN`, `JTBD-NNN`, `RFC-NNN`) as the carrier of meaning. The user does NOT have access to those IDs on all devices (mobile clients without the project filesystem; notification surfaces; accessibility readers that can't follow links). Every option's substance MUST be self-contained in the briefing prose + the option label/description. IDs may appear ONLY as audit-trail annotations after a self-contained explanation, never as the explanation itself.
|
|
209
|
+
|
|
210
|
+
4. **Informed-decision-without-external-document-lookup** — the briefing + question + options is a self-contained surface. If understanding a chosen option requires the user to first read another document, the briefing has failed. The briefing carries enough context that a user reading ONLY the main-turn text and the AskUserQuestion can pick.
|
|
211
|
+
|
|
212
|
+
5. **Each option's substance is the actual chosen option** — the options array contains the actual considered options from the ADR draft (Option A / Option B / Option C / ... as worded in the Considered Options section), NOT meta-options ("yes accept draft" / "ask differently"). The label is a short readable phrase; the description carries the trade-off. Picking an option IS the substantive choice.
|
|
198
213
|
|
|
199
|
-
|
|
214
|
+
**Suggested AskUserQuestion shape** (each considered option as one selectable option):
|
|
200
215
|
|
|
201
|
-
|
|
216
|
+
```text
|
|
217
|
+
question: "Which option should this ADR record as the chosen direction?"
|
|
218
|
+
header: "Chosen option"
|
|
219
|
+
multiSelect: false
|
|
220
|
+
options:
|
|
221
|
+
- label: "<Option A short name>"
|
|
222
|
+
description: "<Option A self-contained trade-off summary, no IDs as explainers>"
|
|
223
|
+
- label: "<Option B short name>"
|
|
224
|
+
description: "<Option B self-contained trade-off summary, no IDs as explainers>"
|
|
225
|
+
- ...one entry per considered option
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
**Born-confirmed marker write (ADR-066 — tightened by P340 amendment).** The marker write fires ONLY when the substance-confirm answer specifies a substantive option from the considered-options set AND that option matches the option the draft was authored against. On a substantive match, insert immediately after the `date:` line:
|
|
202
229
|
|
|
203
230
|
```yaml
|
|
204
231
|
human-oversight: confirmed
|
|
205
232
|
oversight-date: YYYY-MM-DD # today
|
|
206
233
|
```
|
|
207
234
|
|
|
208
|
-
|
|
235
|
+
**Mismatch handling.** If the substance-confirm answer selects a DIFFERENT option than the draft was authored against:
|
|
236
|
+
|
|
237
|
+
- DO NOT write the marker.
|
|
238
|
+
- Re-draft Decision Outcome + Consequences + Confirmation + Pros and Cons (and Reassessment Criteria if affected) against the newly-chosen option.
|
|
239
|
+
- Re-fire the substance-confirm `AskUserQuestion` against the re-drafted text to verify the substance now matches the user's pick.
|
|
240
|
+
- The marker writes ONLY after a substance-confirm pass whose answer matches the draft on disk.
|
|
241
|
+
|
|
242
|
+
This is NOT a soft "warn and proceed" path — the marker only ever writes when the draft on disk encodes the user's substantive pick. Mismatch is a re-draft trigger, not an override.
|
|
243
|
+
|
|
244
|
+
**What the marker means.** This is the load-bearing born-confirmed gate: an ADR recorded through create-adr enters the world already human-oversighted (it does not appear in `/wr-architect:review-decisions`' unoversighted set) ONLY because the substance-confirm fire above explicitly affirmed the chosen option. Do NOT write the marker if the user has not confirmed substance (rejected / still-iterating ADRs stay unmarked). The marker is orthogonal to `status:` — a `proposed` ADR can be `human-oversight: confirmed`.
|
|
245
|
+
|
|
246
|
+
#### 5b. Draft-quality review fire (optional, after 5a passes)
|
|
247
|
+
|
|
248
|
+
After the substance-confirm fire passes and the marker is written, fire a separate narrow `AskUserQuestion` for draft-quality review:
|
|
249
|
+
|
|
250
|
+
1. Does the problem statement accurately capture the situation?
|
|
251
|
+
2. Are the pros/cons fair and complete?
|
|
252
|
+
3. Are the confirmation criteria testable?
|
|
253
|
+
4. Should anyone else be listed as consulted or informed?
|
|
254
|
+
|
|
255
|
+
Apply any feedback by editing the file. This fire is OPTIONAL — when the agent has high confidence the prose is sound and the consulted/informed list is complete, this fire MAY be skipped. The draft-quality review does NOT gate the marker — the marker writes (or doesn't) on the substance-confirm answer alone. Surfacing a draft-quality fire after marker-write is correct; gating the marker on draft-quality answers is what P340 prohibits.
|
|
209
256
|
|
|
210
257
|
**Refresh the decisions compendium (ADR-077).** After the ADR file is written and any born-confirmed marker is applied, regenerate `docs/decisions/README.md` so the architect-agent routine load surface includes the new entry. Run:
|
|
211
258
|
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
# Substance-confirmation interaction pattern at create-adr Step 5
|
|
3
|
+
# (P339 + P340 / user direction 2026-05-31).
|
|
4
|
+
#
|
|
5
|
+
# tdd-review: structural-permitted (justification: SKILL.md prose contract
|
|
6
|
+
# assertions for an interaction-pattern that has no behavioural skill-runtime
|
|
7
|
+
# harness yet — P012 + P081 Phase 2 bridge window. Will migrate to
|
|
8
|
+
# behavioural form once the harness exists. Isomorphic precedent in this
|
|
9
|
+
# directory: create-adr-adr-044-contract.bats and
|
|
10
|
+
# create-adr-decision-boundary.bats.)
|
|
11
|
+
#
|
|
12
|
+
# @problem P339 (create-adr Step 5 bundles substance with draft-acceptance)
|
|
13
|
+
# @problem P340 (born-confirmed marker writes on draft-acceptance answer)
|
|
14
|
+
# @adr ADR-064 (review-and-confirm-every-ADR; amended for 5 interaction-pattern requirements)
|
|
15
|
+
# @adr ADR-066 (born-confirmed marker; amended to gate marker on substantive-answer)
|
|
16
|
+
# @adr ADR-074 (substance-confirm-before-build framework; create-adr-surface instance)
|
|
17
|
+
# @adr ADR-013 (structured user interaction — AskUserQuestion is the surface)
|
|
18
|
+
# @adr ADR-052 (behavioural-by-default with structural bridge window)
|
|
19
|
+
# @jtbd JTBD-001 (enforce governance without slowing down — primary)
|
|
20
|
+
# @jtbd JTBD-202 (run pre-flight governance checks before release or handover)
|
|
21
|
+
|
|
22
|
+
setup() {
|
|
23
|
+
SKILL_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
|
|
24
|
+
SKILL_FILE="${SKILL_DIR}/SKILL.md"
|
|
25
|
+
[ -f "$SKILL_FILE" ]
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
# ----------------------------------------------------------------------
|
|
29
|
+
# Step 5 substance-confirm fire — separate from draft-quality fire.
|
|
30
|
+
# ----------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
@test "SKILL.md Step 5 names a substance-confirm fire distinct from draft-quality review (P339)" {
|
|
33
|
+
# P339 root cause: Step 5 currently fires ONE bundled AskUserQuestion that
|
|
34
|
+
# confounds substance-of-decision (which option was chosen) with
|
|
35
|
+
# draft-quality (is the prose well-written). The amend SHOULD prescribe
|
|
36
|
+
# TWO separate firings: substance-confirm first; draft-quality optional.
|
|
37
|
+
run awk '/^### 5\. /,/^### 6\. /' "$SKILL_FILE"
|
|
38
|
+
[ "$status" -eq 0 ]
|
|
39
|
+
[[ "$output" == *"substance-confirm"* ]] || [[ "$output" == *"substance confirm"* ]]
|
|
40
|
+
[[ "$output" == *"draft-quality"* ]] || [[ "$output" == *"draft quality"* ]] || [[ "$output" == *"draft review"* ]]
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
@test "SKILL.md Step 5 prescribes prose briefing in main-turn text BEFORE the substance-confirm AskUserQuestion fires (P340)" {
|
|
44
|
+
# User direction 2026-05-31: long AskUserQuestion text is NOT readable on
|
|
45
|
+
# some devices (mobile clients, accessibility tooling, certain notification
|
|
46
|
+
# surfaces). Long prose + short question IS readable across the full
|
|
47
|
+
# device matrix. The split is load-bearing — the briefing MUST live in
|
|
48
|
+
# main-turn prose, NOT inside AskUserQuestion text.
|
|
49
|
+
run awk '/^### 5\. /,/^### 6\. /' "$SKILL_FILE"
|
|
50
|
+
[ "$status" -eq 0 ]
|
|
51
|
+
[[ "$output" == *"main-turn"* ]] || [[ "$output" == *"main turn"* ]] || [[ "$output" == *"prose briefing"* ]]
|
|
52
|
+
[[ "$output" == *"before"* ]] || [[ "$output" == *"BEFORE"* ]]
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
@test "SKILL.md Step 5 prescribes each-considered-option as a selectable option (not yes/no shape) (P340)" {
|
|
56
|
+
# User direction 2026-05-31: the AskUserQuestion MUST NOT be a yes/no
|
|
57
|
+
# shape. It MUST present each considered option as a selectable option in
|
|
58
|
+
# the AskUserQuestion options array. The user picks the substantive
|
|
59
|
+
# direction positively (chooses an option), not by clicking "yes" on a
|
|
60
|
+
# bundled "is this OK?" question.
|
|
61
|
+
run awk '/^### 5\. /,/^### 6\. /' "$SKILL_FILE"
|
|
62
|
+
[ "$status" -eq 0 ]
|
|
63
|
+
[[ "$output" == *"selectable option"* ]] || [[ "$output" == *"each considered option"* ]] || [[ "$output" == *"each option"* ]]
|
|
64
|
+
[[ "$output" == *"not yes/no"* ]] || [[ "$output" == *"not a yes/no"* ]] || [[ "$output" == *"NOT yes/no"* ]] || [[ "$output" == *"NOT a yes/no"* ]]
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
@test "SKILL.md Step 5 prescribes no-IDs-as-explainers in briefing prose or option labels (P340)" {
|
|
68
|
+
# User direction 2026-05-31: the briefing prose, the question, and the
|
|
69
|
+
# options MUST NOT use IDs as explainers. The user does NOT have access
|
|
70
|
+
# to those IDs on all devices. Every option's substance MUST be
|
|
71
|
+
# self-contained in the prose + the option label/description.
|
|
72
|
+
run awk '/^### 5\. /,/^### 6\. /' "$SKILL_FILE"
|
|
73
|
+
[ "$status" -eq 0 ]
|
|
74
|
+
[[ "$output" == *"no IDs as explainers"* ]] || [[ "$output" == *"NOT use IDs"* ]] || [[ "$output" == *"without IDs"* ]] || [[ "$output" == *"no ID"* ]] || [[ "$output" == *"self-contained"* ]]
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
@test "SKILL.md Step 5 prescribes informed-decision-without-external-document-lookup (P340)" {
|
|
78
|
+
# User direction 2026-05-31: the user MUST be able to make an informed
|
|
79
|
+
# decision without looking up other documents. The briefing +
|
|
80
|
+
# AskUserQuestion is a self-contained surface.
|
|
81
|
+
run awk '/^### 5\. /,/^### 6\. /' "$SKILL_FILE"
|
|
82
|
+
[ "$status" -eq 0 ]
|
|
83
|
+
[[ "$output" == *"without external"* ]] || [[ "$output" == *"without looking up"* ]] || [[ "$output" == *"self-contained"* ]] || [[ "$output" == *"without document lookup"* ]]
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
# ----------------------------------------------------------------------
|
|
87
|
+
# Born-confirmed marker write — gated on substantive-answer.
|
|
88
|
+
# ----------------------------------------------------------------------
|
|
89
|
+
|
|
90
|
+
@test "SKILL.md Step 5 gates born-confirmed marker write on substance-confirm answer specifying a substantive option (P340)" {
|
|
91
|
+
# P340 mechanism fix: the marker MUST be written ONLY in response to an
|
|
92
|
+
# AskUserQuestion answer that selects ONE specific substantive option from
|
|
93
|
+
# the considered-options set. NOT on draft-acceptance / problem-statement-
|
|
94
|
+
# OK / bundled answers.
|
|
95
|
+
run awk '/^### 5\. /,/^### 6\. /' "$SKILL_FILE"
|
|
96
|
+
[ "$status" -eq 0 ]
|
|
97
|
+
[[ "$output" == *"human-oversight: confirmed"* ]] || [[ "$output" == *"born-confirmed"* ]] || [[ "$output" == *"oversight marker"* ]]
|
|
98
|
+
[[ "$output" == *"ONLY"* ]] || [[ "$output" == *"only when"* ]] || [[ "$output" == *"only if"* ]]
|
|
99
|
+
[[ "$output" == *"substantive"* ]] || [[ "$output" == *"substance-confirm"* ]] || [[ "$output" == *"considered options"* ]] || [[ "$output" == *"considered-options"* ]]
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
@test "SKILL.md Step 5 prescribes re-draft + re-fire when user picks a different option than the draft authored (P340)" {
|
|
103
|
+
# User direction 2026-05-31: if the substance-confirm answer selects a
|
|
104
|
+
# DIFFERENT option than the one the draft was authored against, the SKILL
|
|
105
|
+
# MUST re-draft Decision Outcome (+ Consequences + Confirmation +
|
|
106
|
+
# Pros and Cons) against the new choice and re-fire substance-confirm.
|
|
107
|
+
# NOT a soft "warn and proceed" — the marker only ever writes when the
|
|
108
|
+
# draft authored matches the user's substantive pick.
|
|
109
|
+
run awk '/^### 5\. /,/^### 6\. /' "$SKILL_FILE"
|
|
110
|
+
[ "$status" -eq 0 ]
|
|
111
|
+
shopt -s nocasematch
|
|
112
|
+
[[ "$output" == *"re-draft"* ]] || [[ "$output" == *"redraft"* ]] || [[ "$output" == *"re-author"* ]]
|
|
113
|
+
[[ "$output" == *"re-fire"* ]] || [[ "$output" == *"refire"* ]] || [[ "$output" == *"re-run"* ]]
|
|
114
|
+
shopt -u nocasematch
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
@test "SKILL.md Step 5 names draft-quality review fire as OPTIONAL and not gating the marker (P340)" {
|
|
118
|
+
# The draft-quality review fire (prose quality, consulted/informed list,
|
|
119
|
+
# edge cases) is a follow-up to the substance-confirm fire. It MUST NOT
|
|
120
|
+
# gate the born-confirmed marker write — that gate sits on the
|
|
121
|
+
# substance-confirm answer alone.
|
|
122
|
+
run awk '/^### 5\. /,/^### 6\. /' "$SKILL_FILE"
|
|
123
|
+
[ "$status" -eq 0 ]
|
|
124
|
+
[[ "$output" == *"does NOT gate"* ]] || [[ "$output" == *"does not gate"* ]] || [[ "$output" == *"NOT gate the marker"* ]] || [[ "$output" == *"not gate the marker"* ]]
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
# ----------------------------------------------------------------------
|
|
128
|
+
# No-IDs-as-explainers regression guard — Step 5 prose itself MUST NOT
|
|
129
|
+
# require the user to look up IDs to understand the prescribed interaction.
|
|
130
|
+
# The PRESCRIPTIVE Step 5 prose is allowed to cite IDs as audit-trail
|
|
131
|
+
# annotations (ADR-064, ADR-066, ADR-074, P339, P340 etc.), but the
|
|
132
|
+
# EXAMPLE briefing the SKILL prescribes for the agent to emit MUST be
|
|
133
|
+
# ID-free. This test checks the prescription, not the prose itself.
|
|
134
|
+
# ----------------------------------------------------------------------
|
|
135
|
+
|
|
136
|
+
@test "SKILL.md Step 5 prescribes example briefing shape is ID-free (P340 — load-bearing for device matrix)" {
|
|
137
|
+
# The Step 5 prose MUST explicitly prescribe that the BRIEFING the agent
|
|
138
|
+
# emits (the prose surfacing options + selected option + rationale) is
|
|
139
|
+
# ID-free. This is the prescription, not a check on the SKILL prose
|
|
140
|
+
# itself.
|
|
141
|
+
run awk '/^### 5\. /,/^### 6\. /' "$SKILL_FILE"
|
|
142
|
+
[ "$status" -eq 0 ]
|
|
143
|
+
# Must call out at least one banned ID-shape in the prescription so the
|
|
144
|
+
# contract is unambiguous.
|
|
145
|
+
[[ "$output" == *"ADR-"* ]] || [[ "$output" == *"P-NNN"* ]] || [[ "$output" == *"JTBD-"* ]] || [[ "$output" == *"RFC-"* ]] || [[ "$output" == *"identifier"* ]]
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
# ----------------------------------------------------------------------
|
|
149
|
+
# Cross-reference to P339 + P340 and ADR-074 for audit trail.
|
|
150
|
+
# ----------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
@test "SKILL.md cites P339 + P340 in Step 5 amend prose or Related section (audit trail)" {
|
|
153
|
+
run grep -nE "P339|P340" "$SKILL_FILE"
|
|
154
|
+
[ "$status" -eq 0 ]
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
@test "SKILL.md cites ADR-074 in Step 5 amend prose (substance-confirm-before-build framework)" {
|
|
158
|
+
run grep -nE "ADR-074" "$SKILL_FILE"
|
|
159
|
+
[ "$status" -eq 0 ]
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
# ----------------------------------------------------------------------
|
|
163
|
+
# P081 + P132 bridge marker
|
|
164
|
+
# ----------------------------------------------------------------------
|
|
165
|
+
|
|
166
|
+
@test "bats file carries the tdd-review: structural-permitted marker" {
|
|
167
|
+
run grep -nE "tdd-review:[[:space:]]+structural-permitted" "${BATS_TEST_FILENAME}"
|
|
168
|
+
[ "$status" -eq 0 ]
|
|
169
|
+
}
|