@tekyzinc/gsd-t 4.4.11 → 4.6.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -7
- package/README.md +56 -2
- package/bin/gsd-t-model-profile.cjs +480 -0
- package/bin/gsd-t-model-tier-policy.cjs +156 -1
- package/bin/gsd-t.js +128 -26
- package/commands/gsd-t-debug.md +36 -3
- package/commands/gsd-t-design-decompose.md +31 -3
- package/commands/gsd-t-doc-ripple.md +31 -3
- package/commands/gsd-t-help.md +15 -1
- package/commands/gsd-t-impact.md +31 -3
- package/commands/gsd-t-milestone.md +31 -3
- package/commands/gsd-t-partition.md +39 -3
- package/commands/gsd-t-plan.md +31 -3
- package/commands/gsd-t-prd.md +31 -3
- package/commands/gsd-t-status.md +14 -0
- package/commands/gsd-t-verify.md +35 -2
- package/commands/gsd-t-wave.md +34 -3
- package/docs/requirements.md +8 -1
- package/package.json +1 -1
- package/scripts/gsd-t-auto-route.js +106 -5
- package/scripts/gsd-t-statusline.js +72 -1
- package/templates/CLAUDE-global.md +75 -284
- package/templates/workflows/gsd-t-debug.workflow.js +7 -1
- package/templates/workflows/gsd-t-phase.workflow.js +11 -4
- package/templates/workflows/gsd-t-verify.workflow.js +7 -1
- package/templates/workflows/gsd-t-wave.workflow.js +7 -2
- package/templates/test-helpers/launch-extension.ts +0 -81
|
@@ -7,6 +7,22 @@
|
|
|
7
7
|
4. ALWAYS work autonomously. ONLY ask for user input when truly blocked.
|
|
8
8
|
|
|
9
9
|
|
|
10
|
+
# Output Style (default: CONCISE)
|
|
11
|
+
|
|
12
|
+
**Default to concise output. Optimize for fast scanning, not completeness of prose.** The user wants ALL the information — quickly, organized, scannable. Override per-project by setting `Output Style: verbose` in the project CLAUDE.md.
|
|
13
|
+
|
|
14
|
+
Concise rules (this is the DEFAULT):
|
|
15
|
+
- **Answer first.** First line is the result/verdict. NO preamble ("Let me…", "Great question", "I'll now…"), NO postamble ("Let me know if…").
|
|
16
|
+
- **Bullets over paragraphs.** Default to scannable lists. Use a **table/grid** whenever comparing ≥2 items across dimensions — the user finds grids ideal.
|
|
17
|
+
- **Bold the keywords** so the eye can skip-scan.
|
|
18
|
+
- **Say it once.** Cut hyperbole and filler ("importantly", "it's worth noting", "as you can see", "basically"). No restating the question back.
|
|
19
|
+
- **Layman-first.** Plain words; use a precise technical term only when it IS the right word, then gloss it in one short clause.
|
|
20
|
+
- **Detail on demand.** Put deep "why / how it works internally" behind a one-line offer ("Want the reasoning?") rather than dumping it inline — unless the user asked why.
|
|
21
|
+
- **Keep load-bearing structure:** the dated status banner (first line), any verdict, and explicit warnings stay. Only the *explanatory body* gets tightened.
|
|
22
|
+
|
|
23
|
+
Verbose mode (opt-in, `Output Style: verbose`): full narrative prose, inline rationale, the longer style. Don't apply verbose unless a project requests it.
|
|
24
|
+
|
|
25
|
+
|
|
10
26
|
# GSD-T: Contract-Driven Development
|
|
11
27
|
|
|
12
28
|
## Work Hierarchy
|
|
@@ -78,6 +94,21 @@ GSD-T tracks project version in `.gsd-t/progress.md` using semantic versioning:
|
|
|
78
94
|
- Version is reflected in: `progress.md`, `README.md`, package manifest (if any), and git tags (`v{version}`)
|
|
79
95
|
|
|
80
96
|
|
|
97
|
+
# Git Worktree Location (MANDATORY)
|
|
98
|
+
|
|
99
|
+
**NEVER create a git worktree inside the project's own folder.** A worktree placed under the project tree pollutes `git status`, risks accidental commits/deletes, and breaks tooling that walks the project directory.
|
|
100
|
+
|
|
101
|
+
```
|
|
102
|
+
WHEN creating a worktree directly (git worktree add, isolation: "worktree", etc.):
|
|
103
|
+
└── Path MUST be: ~/Worktrees/<project-name>/<branch-or-task>/
|
|
104
|
+
e.g. /Users/david/Worktrees/GSD-T/fix-context-window-1m
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
- One predictable home for all worktrees: `~/Worktrees/`, namespaced by project name.
|
|
108
|
+
- Create `~/Worktrees/<project-name>/` on demand (`mkdir -p`) before `git worktree add`.
|
|
109
|
+
- Clean up with `git worktree remove` when the branch/task is done — don't leave prunable stragglers.
|
|
110
|
+
- **Exception**: harness-managed worktrees the Agent/Workflow runtime creates under the project's gitignored `.claude/worktrees/` path are the harness's own convention — leave those alone. This rule governs worktrees *you* create directly via Bash or the `isolation: "worktree"` option.
|
|
111
|
+
|
|
81
112
|
# Destructive Action Guard (MANDATORY)
|
|
82
113
|
|
|
83
114
|
**NEVER perform destructive or structural changes without explicit user approval.** This applies at ALL autonomy levels, including Level 3.
|
|
@@ -120,61 +151,20 @@ Even in development, the user may have:
|
|
|
120
151
|
|
|
121
152
|
## Update Notices
|
|
122
153
|
|
|
123
|
-
The hook output
|
|
154
|
+
The session-start hook output is NOT user-visible — so emit a dated status banner as the **first line of every response** (every turn), above any routing header. Source the date from the most recent `[GSD-T NOW]` signal (live clock; the UserPromptSubmit hook emits it each turn). NEVER use `currentDate`/SessionStart banner (both frozen) or intuition. If `[GSD-T NOW]` is absent, fall back to `currentDate` and flag the gap. Trim seconds in the display (`HH:MM TZ`).
|
|
124
155
|
|
|
125
|
-
|
|
126
|
-
-
|
|
127
|
-
-
|
|
128
|
-
-
|
|
129
|
-
|
|
130
|
-
If `[GSD-T NOW]` is absent for any reason, fall back to `currentDate` and flag the gap.
|
|
131
|
-
|
|
132
|
-
**Format** — one line, no changelog noise in steady state:
|
|
133
|
-
|
|
134
|
-
- Steady state (`[GSD-T]` token seen at session start, or no version-check token — default):
|
|
135
|
-
```
|
|
136
|
-
Day: Mon DD, YYYY HH:MM TZ — GSD-T v{version} — CURRENT
|
|
137
|
-
```
|
|
138
|
-
Example: `Sun: May 3, 2026 12:21 PDT — GSD-T v3.19.00 — CURRENT`
|
|
139
|
-
|
|
140
|
-
- Auto-updated this session (`[GSD-T AUTO-UPDATE]` token seen at session start):
|
|
141
|
-
```
|
|
142
|
-
Day: Mon DD, YYYY HH:MM TZ — GSD-T v{old} → v{new} ✅ AUTO-UPDATED
|
|
143
|
-
Changelog: https://github.com/Tekyz-Inc/get-stuff-done-teams/blob/main/CHANGELOG.md
|
|
144
|
-
```
|
|
145
|
-
(The changelog link earns its place here — there's new code to read about.)
|
|
146
|
-
|
|
147
|
-
- Update available, auto-update failed (`[GSD-T UPDATE]` token seen at session start):
|
|
148
|
-
```
|
|
149
|
-
Day: Mon DD, YYYY HH:MM TZ — GSD-T v{installed} → v{latest} ⬆️ UPDATE AVAILABLE (auto-update failed)
|
|
150
|
-
Run: /gsd-t-version-update-all
|
|
151
|
-
Changelog: https://github.com/Tekyz-Inc/get-stuff-done-teams/blob/main/CHANGELOG.md
|
|
152
|
-
```
|
|
153
|
-
Also repeat at the **end** of your first response.
|
|
154
|
-
|
|
155
|
-
(Drop seconds from the displayed banner — keep it to `HH:MM TZ` for readability. The hook emits seconds; you trim.)
|
|
156
|
-
|
|
157
|
-
**Why every response, not just the first**: long sessions span multiple days. A dated header on every turn means the user can scroll back and immediately see when any exchange happened, without inferring from context.
|
|
158
|
-
|
|
159
|
-
**Order**: dated status banner FIRST. Then routing header (if any). Then your response body.
|
|
156
|
+
Format by session-start token:
|
|
157
|
+
- `[GSD-T]` / none (steady state): `Day: Mon DD, YYYY HH:MM TZ — GSD-T v{version} — CURRENT`
|
|
158
|
+
- `[GSD-T AUTO-UPDATE]`: `Day: … — GSD-T v{old} → v{new} ✅ AUTO-UPDATED` + `Changelog: https://github.com/Tekyz-Inc/get-stuff-done-teams/blob/main/CHANGELOG.md`
|
|
159
|
+
- `[GSD-T UPDATE]` (auto-update failed): `Day: … — GSD-T v{installed} → v{latest} ⬆️ UPDATE AVAILABLE (auto-update failed)` + `Run: /gsd-t-version-update-all` + changelog link; also repeat at the END of your first response.
|
|
160
160
|
|
|
161
161
|
## Live Clock Rule (MANDATORY)
|
|
162
162
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
**How to obtain the live clock**:
|
|
166
|
-
1. Read the most recent `[GSD-T NOW]` signal from your context (UserPromptSubmit hook emits it every turn).
|
|
167
|
-
2. If absent, run `node -e "console.log(new Date().toISOString())"` via Bash before writing.
|
|
168
|
-
|
|
169
|
-
**Enforcement**: a PreToolUse hook (`scripts/gsd-t-date-guard.js`) blocks Write/Edit calls whose content contains timestamps drifting more than ±5 minutes from the live system clock. The guard:
|
|
170
|
-
- Validates decision-log entries (`- YYYY-MM-DD HH:MM:`), filename timestamps (`continue-here-YYYY-MM-DDTHHMMSS`), banners (`Day: Mon DD, YYYY HH:MM`), labeled stamps (`Date:`, `Updated:`, `Created:`, etc., with optional TZ abbr / numeric offset / `Z`), and **progress.md table cells carrying `YYYY-MM-DD HH:MM TZ`** (M59, v3.29.10+ — Completed Milestones + Session Log).
|
|
171
|
-
- For Edit, ignores timestamps that appear in BOTH `old_string` and `new_string` (pre-existing context, not new writes).
|
|
172
|
-
- Allowlists machine-written paths (`.gsd-t/events/`, `.gsd-t/transcripts/`, `.gsd-t/metrics/`, `.git/`, `node_modules/`, archives, log files).
|
|
173
|
-
- Fails open on internal error — broken tool calls would be worse than drift.
|
|
163
|
+
Every date/timestamp you write to ANY file (progress.md log, `continue-here-{ts}` filenames, memory, banners, `Date:`/`Updated:` frontmatter, archive headings) MUST come from the live clock: the latest `[GSD-T NOW]`, or `node -e "console.log(new Date().toISOString())"` if absent. Never `currentDate`/frozen banner/intuition.
|
|
174
164
|
|
|
175
|
-
|
|
165
|
+
A PreToolUse hook (`scripts/gsd-t-date-guard.js`) blocks Write/Edit whose timestamps drift >±5 min from the live clock (decision-log lines, filename stamps, banners, labeled `Date:`/`Updated:` stamps, and progress.md table cells); it ignores stamps present in both old/new on Edit, allowlists machine-written paths, and fails open. If blocked, re-read `[GSD-T NOW]`, regenerate, retry — do NOT bypass.
|
|
176
166
|
|
|
177
|
-
|
|
167
|
+
**progress.md timestamp precision (M59+)**: `## Date:`, the Completed-Milestones "Completed" cell, and the Session-Log "Date" cell use `YYYY-MM-DD HH:MM TZ`; forward-only (older date-only rows stay; readers accept both). `archive-meta.json::completedAt` uses `localIsoWithOffset()` from `bin/gsd-t-time-format.cjs` (local-offset ISO), not `toISOString()`.
|
|
178
168
|
|
|
179
169
|
## Conversation vs. Work
|
|
180
170
|
|
|
@@ -196,122 +186,16 @@ If any are missing:
|
|
|
196
186
|
|
|
197
187
|
**Exempt commands** (do not trigger auto-init): `gsd-t-init`, `gsd-t-init-scan-setup`, `gsd-t-help`, `gsd-t-version-update`, `gsd-t-version-update-all`.
|
|
198
188
|
|
|
199
|
-
## Playwright
|
|
200
|
-
|
|
201
|
-
Playwright readiness is enforced by executable code, not prose. Three layers:
|
|
202
|
-
|
|
203
|
-
1. **Bootstrap library** — `bin/playwright-bootstrap.cjs` exports `hasPlaywright`, `detectPackageManager`, `installPlaywright`, `verifyPlaywrightHealth`. `bin/ui-detection.cjs` exports `hasUI`, `detectUIFlavor`. See `.gsd-t/contracts/playwright-bootstrap-contract.md`.
|
|
204
|
-
2. **Workflow-stage gate** — the verify/execute Workflow scripts call `playwright-bootstrap.cjs::installPlaywright()` before any E2E stage when `hasUI(projectDir)` AND `!hasPlaywright(projectDir)`. On install failure the stage halts with a structured `blocked-needs-human` result. (M61: replaced the retired `headless-auto-spawn.cjs` spawn-time gate — the Workflow runtime owns spawning now.)
|
|
205
|
-
3. **Commit-time gate** — `scripts/hooks/pre-commit-playwright-gate` (opt-in via `gsd-t doctor --install-hooks`) blocks commits that touch viewer/UI source files when Playwright tests have not passed since the most recent change. Reads `.gsd-t/.last-playwright-pass`; fails open on missing/corrupt timestamps.
|
|
206
|
-
|
|
207
|
-
Operator overrides:
|
|
208
|
-
- Manual install: `gsd-t setup-playwright [path]` (or `gsd-t doctor --install-playwright`).
|
|
209
|
-
- Health check: `gsd-t doctor` reports `playwright missing` for any UI project without `playwright.config.*`.
|
|
210
|
-
|
|
211
|
-
You no longer need to run a check yourself before testing commands — the Workflow stage runs the readiness gate before E2E.
|
|
212
|
-
|
|
213
|
-
### Playwright No-Focus-Steal Invariant (MANDATORY — all projects)
|
|
214
|
-
|
|
215
|
-
**E2E tests must NEVER steal keyboard focus or pop visible windows during a normal run.** The developer keeps typing in their terminal while tests run. This is non-negotiable on every project.
|
|
216
|
-
|
|
217
|
-
```
|
|
218
|
-
RULES:
|
|
219
|
-
├── Headless is the DEFAULT everywhere. A visible browser is opt-in only (HEADED=1).
|
|
220
|
-
├── Specs/configs MUST NOT hardcode `headless: false` — visibility is decided in ONE
|
|
221
|
-
│ launch helper (or the config), controlled by env var, never per-spec.
|
|
222
|
-
├── MV3 Chrome extensions are NOT an exception: Chrome's NEW headless loads
|
|
223
|
-
│ extensions. The load-bearing invocation is `channel: 'chromium'` + `headless: true`
|
|
224
|
-
│ (Playwright ≥1.49). PITFALL: `headless: true` alone launches the
|
|
225
|
-
│ chromium_headless_shell (OLD headless — silently cannot load extensions), and
|
|
226
|
-
│ passing `--headless=new` as a raw arg fights that binary. Template:
|
|
227
|
-
│ `templates/test-helpers/launch-extension.ts`.
|
|
228
|
-
└── Off-screen windows (`--window-position=-2400,-2400`) are NOT a focus fix on
|
|
229
|
-
macOS — a headed launch ACTIVATES the app and steals the cursor regardless of
|
|
230
|
-
window position. Off-screen is a screen-takeover mitigation only; use it solely
|
|
231
|
-
as a fallback if new-headless regresses extension support.
|
|
232
|
-
```
|
|
233
|
-
|
|
234
|
-
Origin: binvoice 2026-06-10 — extension specs forced `headless: false`, commandeering the cursor on every spec; the prior "off-screen" mitigation stopped the window takeover but not the focus theft. Fixed permanently by the channel-selected new headless (21/21 in 9.8s, zero windows).
|
|
235
|
-
|
|
236
|
-
### Playwright Cleanup
|
|
237
|
-
|
|
238
|
-
After Playwright tests finish (pass or fail), **kill any app/server processes that were started for the tests**. Playwright often launches a dev server (via `webServer` config or manually). These processes must not be left running:
|
|
239
|
-
1. Check for any dev server processes spawned during the test run
|
|
240
|
-
2. Kill them (e.g., `npx kill-port`, or terminate the process directly)
|
|
241
|
-
3. Verify the port is free before proceeding
|
|
242
|
-
|
|
243
|
-
This applies everywhere Playwright tests are executed: execute, test-sync, verify, quick, wave, debug, complete-milestone, and integrate.
|
|
244
|
-
|
|
245
|
-
### E2E Enforcement Rule (MANDATORY)
|
|
246
|
-
|
|
247
|
-
**Running only unit tests when E2E tests exist is a test failure.** This is non-negotiable.
|
|
248
|
-
|
|
249
|
-
```
|
|
250
|
-
BEFORE reporting "tests pass" for ANY task:
|
|
251
|
-
├── Does playwright.config.* or cypress.config.* exist?
|
|
252
|
-
│ YES → You MUST run the full E2E suite. Unit-only results are INCOMPLETE.
|
|
253
|
-
│ NO → Unit/integration tests are sufficient.
|
|
254
|
-
├── Did you run every detected test runner?
|
|
255
|
-
│ NO → Run it now. Do not commit until ALL suites pass.
|
|
256
|
-
└── Report format MUST include all suites:
|
|
257
|
-
"Unit: X/Y pass | E2E: X/Y pass" (or "E2E: N/A — no config")
|
|
258
|
-
```
|
|
259
|
-
|
|
260
|
-
The conditional "if UI/routes/flows changed" in command files applies to **writing new E2E specs**, not to **running existing ones**. You always run existing E2E specs. Always.
|
|
261
|
-
|
|
262
|
-
### E2E Test Quality Standard (MANDATORY)
|
|
189
|
+
## Playwright / E2E Guards (MANDATORY)
|
|
263
190
|
|
|
264
|
-
|
|
191
|
+
Readiness is enforced by code, not by you — the verify/execute Workflow installs Playwright before any E2E stage when `hasUI && !hasPlaywright`, halting `blocked-needs-human` on failure. You do NOT run a pre-check yourself. Manual: `gsd-t setup-playwright [path]`; `gsd-t doctor` reports `playwright missing`. Internals + the commit-time gate: `.gsd-t/contracts/playwright-bootstrap-contract.md`.
|
|
265
192
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
// ← No assertion that the tab's content actually loaded
|
|
273
|
-
|
|
274
|
-
FUNCTIONAL TEST (RIGHT — fails if the feature is broken):
|
|
275
|
-
await page.click('#tab-sessions');
|
|
276
|
-
await expect(page.locator('.session-list')).toContainText('Session 1');
|
|
277
|
-
// ← Proves clicking the tab loaded the session data
|
|
278
|
-
```
|
|
279
|
-
|
|
280
|
-
Every Playwright assertion must verify one of:
|
|
281
|
-
- **State changed**: After click/type/submit, the app state is different (new content, updated data, changed status)
|
|
282
|
-
- **Data flowed**: User input → API call → response rendered (use `page.waitForResponse` or assert on rendered data)
|
|
283
|
-
- **Content loaded**: Navigation/tab switch → destination content appeared (assert on text/data unique to destination)
|
|
284
|
-
- **Widget responded**: Terminal accepted keystrokes and produced output, editor saved changes, form submitted and data persisted
|
|
285
|
-
|
|
286
|
-
**If a test would pass on an empty HTML page with the correct element IDs and no JavaScript, it is not a functional test.** Rewrite it.
|
|
287
|
-
|
|
288
|
-
### Test Data Cleanup (MANDATORY — M58)
|
|
289
|
-
|
|
290
|
-
**Tests that insert data into a project's stores MUST register those inserts with the GSD-T test-data ledger so Verify can purge them.** Tests that leave orphaned `E2E_*` records in production data violate this rule.
|
|
291
|
-
|
|
292
|
-
The supported mechanism is the `withTestData()` Playwright fixture:
|
|
293
|
-
|
|
294
|
-
```ts
|
|
295
|
-
import { test as base } from '@playwright/test';
|
|
296
|
-
import { withTestData } from '@tekyzinc/gsd-t/templates/test-helpers/test-data-fixture';
|
|
297
|
-
|
|
298
|
-
export const test = base.extend(withTestData());
|
|
299
|
-
|
|
300
|
-
test('drag idea creates new column', async ({ page, testData }) => {
|
|
301
|
-
const id = testData.tag('E2E_DRAG'); // → "E2E_DRAG_{runId}_{counter}"
|
|
302
|
-
await testData.register({
|
|
303
|
-
kind: 'localStorage-key-prefix',
|
|
304
|
-
store: 'gsd-t-board:idea:',
|
|
305
|
-
id,
|
|
306
|
-
taggedPrefix: 'E2E_',
|
|
307
|
-
});
|
|
308
|
-
// … UI interactions that insert a row keyed by `${store}${id}` …
|
|
309
|
-
});
|
|
310
|
-
```
|
|
311
|
-
|
|
312
|
-
Three built-in adapters: `localStorage-key-prefix`, `file-json-array`, `sqlite-table-where`. Extend via `registerAdapter(kind, adapter)`. Each adapter refuses to delete a record whose id does not start with the ledger row's `taggedPrefix` (defense in depth — see `.gsd-t/contracts/test-data-tagging-contract.md`).
|
|
313
|
-
|
|
314
|
-
After the E2E suite, `gsd-t-verify` Step 4.5 runs `gsd-t test-data --purge --run "$GSD_T_VERIFY_RUN_ID"`. If any adapter throws or refuses, verify FAILs the gate (block-promotion semantics — equivalent to a failing CI-Parity Gate). Contract: `.gsd-t/contracts/test-data-ledger-contract.md` v1.0.0 STABLE.
|
|
193
|
+
The hard rules (non-negotiable, all projects):
|
|
194
|
+
- **No focus-steal.** E2E must never steal keyboard focus or pop visible windows. Headless is the DEFAULT everywhere; visible is opt-in (`HEADED=1`). Never hardcode `headless: false` in a spec/config — decide visibility in ONE env-controlled launch helper. MV3 extensions: use `channel: 'chromium'` + `headless: true` (new headless loads extensions; bare `headless: true` launches the old shell that can't). Template: `templates/test-helpers/launch-extension.ts`. See [[feedback_playwright_no_focus_steal]].
|
|
195
|
+
- **Cleanup.** After tests (pass or fail), kill any dev-server/app processes started for them and free the port. Applies to execute, test-sync, verify, quick, wave, debug, complete-milestone, integrate.
|
|
196
|
+
- **E2E enforcement.** If `playwright.config.*`/`cypress.config.*` exists, running unit-only is a test FAILURE — run the full E2E suite, every runner, before reporting pass. Report `Unit: X/Y | E2E: X/Y` (or `E2E: N/A`). You always run existing specs; the "if UI changed" conditional governs *writing new* specs only.
|
|
197
|
+
- **Functional, not layout.** Every assertion must prove state changed / data flowed / content loaded / widget responded — not mere existence (`isVisible`/`toBeAttached`). If a test would pass on empty HTML with the right IDs and no JS, rewrite it.
|
|
198
|
+
- **Test-data cleanup.** Tests that insert data MUST register it via the `withTestData()` fixture so verify Step 4.5 (`gsd-t test-data --purge`) can remove it; an adapter throw/refusal FAILs the gate. Adapters refuse to delete ids lacking the ledger `taggedPrefix`. See [[feedback_test_data_cleanup_convention]] + `.gsd-t/contracts/test-data-ledger-contract.md`.
|
|
315
199
|
|
|
316
200
|
## Orthogonal Validation Triad (Mandatory)
|
|
317
201
|
|
|
@@ -337,61 +221,18 @@ Synthesis stage merges results without category collapse. Verdict: `VERIFIED` /
|
|
|
337
221
|
|
|
338
222
|
**Context budget:** Workflow scripts receive a `budget` global (`budget.total`, `budget.spent()`, `budget.remaining()`) tied to the user's per-turn token target. Use it for dynamic loops (`while (budget.total && budget.remaining() > 50_000) { ... }`) or to scale fleet size. Opus 4.7/4.8 ship 1M context windows; the legacy meter at `bin/token-budget.cjs` was retired in M61 — use native `/context` for live in-session usage.
|
|
339
223
|
|
|
340
|
-
##
|
|
341
|
-
|
|
342
|
-
Routine GSD-T actions (milestone → partition → plan → execute → verify → deliver) run from the Claude Code desktop app via Workflows + Skills. CLI residue is intentional and limited to: (a) background hooks the harness fires automatically, (b) jobs that must outlive the desktop session. No routine build / rebuild / debug / deliver action should require terminal keystrokes.
|
|
343
|
-
|
|
344
|
-
## GSD-T Workflows (M61 — v4.0.10+)
|
|
345
|
-
|
|
346
|
-
GSD-T workflows live at `templates/workflows/`. Each workflow is a self-contained native Workflow script that handles one phase of the GSD-T lifecycle. Command files (`commands/gsd-t-*.md`) are thin invokers that call `Workflow({scriptPath, args})`. The `scriptPath` MUST be resolved to an absolute path at invoke time via `gsd-t workflow-path <name>` (M69) — the workflow ships inside the installed `@tekyzinc/gsd-t` package, not the consumer project, so a bare relative `templates/workflows/...` path only resolves from the GSD-T source repo and silently breaks `Workflow()` everywhere else.
|
|
347
|
-
|
|
348
|
-
Canonical scripts:
|
|
349
|
-
- `gsd-t-execute.workflow.js` — preflight → brief → file-disjointness → parallel(domain workers) → integrate → verify-gate
|
|
350
|
-
- `gsd-t-verify.workflow.js` — preflight → verify-gate → M57 CI-parity → M58 test-data purge → parallel(/code-review ultra ∥ Red Team ∥ QA) → synthesis
|
|
351
|
-
- `gsd-t-wave.workflow.js` — composes execute + verify as sub-workflows
|
|
352
|
-
- `gsd-t-integrate.workflow.js` — cross-domain wire-up + light verify-gate
|
|
353
|
-
- `gsd-t-debug.workflow.js` — 2-cycle diagnose/fix/verify (CLAUDE.md Prime Rule)
|
|
354
|
-
- `gsd-t-quick.workflow.js` — preflight + brief + single-task + verify-gate (M56-D4)
|
|
355
|
-
- `gsd-t-phase.workflow.js` — generic upper-stage runner (partition / plan / discuss / impact / milestone / prd / design-decompose / doc-ripple). **M82/M84 Competition Mode (AUTOMATIC):** on eligible upstream phases (partition / milestone / discuss / design-decompose) an Opus solution-space probe runs at phase start and self-decides whether to compete (biased toward competing — a better upstream artifact lowers total downstream cost); when it fires, 3 parallel Self-MoA producers → a judge stage → a finalizer. No flag needed; override with `competition: N` / `competition: 0` / `noCompetition: true`. Partition's judge is the OBJECTIVE file-disjointness oracle (`gsd-t competition-judge --kind partition` — a calculator, not an LLM critic, immune to judge bias, the v1 beachhead); subjective phases use a blind + shuffled + different-model + rubric judge whose pick is finalized deterministically by `--kind generic`. The generative dual of the orthogonal validation triad; watershed rule = generate-and-judge ABOVE the contract, attack-and-filter BELOW. Default off. Contract: `competition-mode-contract.md` v1.0.0. **M83 Plan Hardening:** the `plan` phase runs two blocking gates before execute — a deterministic acceptance-traceability gate (`gsd-t traceability-gate`: every AC binds to a code path + a killing test; the `Headline:` task needs both impl and test) and an adversarial pre-mortem agent (opus, fresh-context, protocol `pre-mortem-subagent.md`: predicts edge-case/dead-deliverable/NFR failures, each → a required test). The temporal dual of the Red Team (attack the design at plan, not just code at verify). Contract: `plan-hardening-contract.md` v1.0.0.
|
|
356
|
-
- `gsd-t-scan.workflow.js` — preflight → volume-probe → pipeline(per-slice deep finder → single verify) → synthesis → document → render (M66: fans out by codebase VOLUME, not a fixed 5-teammate dimension count; M67: deep document phase deterministically produces the full living-doc set + dimension files, per-doc fan-out)
|
|
357
|
-
|
|
358
|
-
**Runtime-native invariant (M81 — v4.0.29+):** the Workflow sandbox provides ONLY `agent/parallel/pipeline/log/phase/budget/args` — NO `require`/`fs`/`path`/`child_process`/`process`, and `args` arrives as a JSON STRING. Each workflow is self-contained: it `JSON.parse`s `args` and delegates every CLI call (preflight, verify-gate, brief, build-coverage, ci-parity, test-data, disjointness) to inline `async` helpers that run the command via an `agent()`'s Bash (preferring project-local `bin/<tool>.cjs`, else the global `gsd-t` PATH binary) and parse the JSON envelope — preserving the M55-D5 project-local-bin invariant. The old `require("./_lib.js")` pattern threw `ReferenceError` on first eval and silently broke every workflow except scan (TD-113, fixed M81); `_lib.js` is retired as a workflow dependency.
|
|
359
|
-
|
|
360
|
-
## Preflight Gate (KEPT from M55)
|
|
361
|
-
|
|
362
|
-
Every Workflow script begins with `lib.runPreflight({projectDir})`. Hard-fails on any `severity:"error"` check (wrong branch, occupied required port). Non-error checks record but do not block. Same envelope feeds `verify-gate` Track 1.
|
|
224
|
+
## GSD-T Workflows (M61+ — v4.0.10+)
|
|
363
225
|
|
|
364
|
-
|
|
365
|
-
Contract: `.gsd-t/contracts/cli-preflight-contract.md` v1.0.0 STABLE.
|
|
226
|
+
Routine actions (milestone → partition → plan → execute → verify → deliver) run from the desktop app via Workflows + Skills — no terminal keystrokes for routine build/debug/deliver. Phase orchestration lives in `templates/workflows/*.workflow.js`; command files are thin invokers calling `Workflow({scriptPath, args})`, where `scriptPath` is resolved to an ABSOLUTE path at invoke time via `gsd-t workflow-path <name>` (a bare relative path silently breaks `Workflow()` outside the source repo).
|
|
366
227
|
|
|
367
|
-
|
|
228
|
+
The deterministic gates each verify-producing Workflow runs (all FAIL-blocking; you don't self-attest them):
|
|
229
|
+
- **Preflight** (`bin/cli-preflight.cjs`) — hard-fails on wrong branch / occupied required port.
|
|
230
|
+
- **Brief-first** — each `agent()` threads `$BRIEF_PATH` (≤2,500-tok snapshot); workers grep it before re-walking the repo. `.gsd-t/briefs/` gitignored.
|
|
231
|
+
- **Verify-gate** (`bin/gsd-t-verify-gate.cjs`) — Track 1 preflight + Track 2 CLI substrate (`tsc`, `biome`/`ruff`, `npm test`, `knip`, `gitleaks`, `scc`/`lizard`); non-zero halts before the triad.
|
|
232
|
+
- **M57 CI-parity** (`build-coverage` + `ci-parity`) + **M58 test-data purge** — then the orthogonal triad (see below) → synthesis.
|
|
233
|
+
- **Competition (M82/M84, auto)** on partition/milestone/discuss/design-decompose; **Plan hardening (M83)** = traceability-gate + pre-mortem before execute. Contracts: `competition-mode-contract.md`, `plan-hardening-contract.md`.
|
|
368
234
|
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
The 3 validation-subagent protocols (`templates/prompts/{qa,red-team,design-verify}-subagent.md`) carry the canonical instruction "If you're about to grep, read, or run a test, check the brief first at `$BRIEF_PATH`."
|
|
372
|
-
|
|
373
|
-
`.gsd-t/briefs/` is gitignored — briefs are per-spawn ephemera, not committed artifacts.
|
|
374
|
-
|
|
375
|
-
Library: `bin/gsd-t-context-brief.cjs::generateBrief(...)`.
|
|
376
|
-
Contract: `.gsd-t/contracts/context-brief-contract.md` v1.0.0 STABLE.
|
|
377
|
-
|
|
378
|
-
## Verify-Gate + Orthogonal Validation Triad (M61 v4.0.10+)
|
|
379
|
-
|
|
380
|
-
`gsd-t verify-gate --json` runs as a deterministic stage inside `gsd-t-verify.workflow.js`. Track 1 = preflight envelope. Track 2 = parallel-CLI substrate (`tsc`, `biome`/`ruff`, `npm test`, `knip`, `gitleaks`, `scc`/`lizard`). Both tracks always run; non-zero exit halts before the orthogonal triad.
|
|
381
|
-
|
|
382
|
-
After verify-gate, `verify.workflow.js` runs two FAIL-blocking gates inherited from M57 + M58:
|
|
383
|
-
- M57 CI-Parity: `gsd-t build-coverage` + `gsd-t ci-parity` (origin TimeTracking v1.10.12 Dockerfile incident)
|
|
384
|
-
- M58 Test-Data Purge: `gsd-t test-data --purge --run <id>` (origin GSD-T-Board v0.1.10 2442 E2E orphans incident)
|
|
385
|
-
|
|
386
|
-
Then the orthogonal validation triad runs as `parallel()` `agent()` stages per `.gsd-t/contracts/orthogonal-validation-contract.md` v1.0.0 STABLE:
|
|
387
|
-
- `/code-review ultra` — cooperative correctness + cleanup (skippable; requires `skipUltraReason`)
|
|
388
|
-
- Red Team — adversarial / security / boundaries (non-skippable; verdict `FAIL` / `GRUDGING-PASS`)
|
|
389
|
-
- QA — test mechanics + shallow-test detection + contract compliance (non-skippable)
|
|
390
|
-
|
|
391
|
-
Synthesis stage merges results WITHOUT collapsing categories. Verdict: `VERIFIED` / `VERIFIED-WITH-WARNINGS` / `VERIFY-FAILED`. `skipUltra=true` is INELIGIBLE for `VERIFIED`.
|
|
392
|
-
|
|
393
|
-
Library: `bin/gsd-t-verify-gate.cjs::runVerifyGate(...)` + `bin/gsd-t-verify-gate-judge.cjs::buildJudgePrompt(...)`.
|
|
394
|
-
Contracts: `verify-gate-contract.md` v1.0.0 STABLE + `orthogonal-validation-contract.md` v1.0.0 STABLE.
|
|
235
|
+
**Runtime-native invariant (M81):** the Workflow sandbox provides ONLY `agent/parallel/pipeline/log/phase/budget/args`; NO `require`/`fs`/`path`/`child_process`/`process`, and `args` is a JSON STRING. Each workflow `JSON.parse`s `args` and delegates every CLI call to an inline `async` helper running it via an `agent()`'s Bash (project-local `bin/<tool>.cjs` first, else global `gsd-t`). `budget` global (`total`/`spent()`/`remaining()`) drives dynamic loops; use native `/context` for live usage.
|
|
395
236
|
|
|
396
237
|
## API Documentation Guard (Swagger/OpenAPI)
|
|
397
238
|
|
|
@@ -421,58 +262,22 @@ KEEP GOING. Only stop for:
|
|
|
421
262
|
|
|
422
263
|
## Pre-Commit Gate (MANDATORY)
|
|
423
264
|
|
|
424
|
-
NEVER commit
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
│ YES → Update .gsd-t/contracts/component-contract.md
|
|
441
|
-
├── Did I add new files or directories?
|
|
442
|
-
│ YES → Update the owning domain's scope.md
|
|
443
|
-
├── Did I implement or change a requirement?
|
|
444
|
-
│ YES → Update docs/requirements.md (mark complete or revise)
|
|
445
|
-
├── Did I add/change/remove a component or change data flow?
|
|
446
|
-
│ YES → Update docs/architecture.md
|
|
447
|
-
├── Did I modify any document, script, or code file?
|
|
448
|
-
│ YES → Add timestamped entry to .gsd-t/progress.md Decision Log
|
|
449
|
-
│ Format: `- YYYY-MM-DD HH:MM: {what was done} — {brief context or result}`
|
|
450
|
-
│ This includes ALL file-modifying activities:
|
|
451
|
-
│ project, feature, scan, gap-analysis, milestone, partition, discuss,
|
|
452
|
-
│ plan, impact, execute, test-sync, integrate, verify, complete-milestone,
|
|
453
|
-
│ wave, quick, debug, promote-debt, populate, setup, init, init-scan-setup,
|
|
454
|
-
│ backlog-add/edit/move/remove/promote/settings, and any manual code changes
|
|
455
|
-
├── Did I make an architectural or design decision?
|
|
456
|
-
│ YES → Also include decision rationale in the progress.md entry
|
|
457
|
-
├── Did I discover or fix tech debt?
|
|
458
|
-
│ YES → Update .gsd-t/techdebt.md
|
|
459
|
-
├── Did I establish a pattern future work should follow?
|
|
460
|
-
│ YES → Update CLAUDE.md or domain constraints.md
|
|
461
|
-
├── Did I add/change tests?
|
|
462
|
-
│ YES → Verify test names and paths are referenced in requirements
|
|
463
|
-
├── Did I change UI, routes, or user flows?
|
|
464
|
-
│ YES → Update affected E2E test specs (Playwright/Cypress)
|
|
465
|
-
├── Did I add a new top-level dir, or change build/CI config?
|
|
466
|
-
│ This is ENFORCED MECHANICALLY by `gsd-t-verify` Step 2.6
|
|
467
|
-
│ (CI-Parity Gate: `gsd-t build-coverage` + `gsd-t ci-parity`,
|
|
468
|
-
│ FAIL-blocking). You do NOT self-attest this — verify runs the
|
|
469
|
-
│ real CI build. It exists because TimeTracking v1.10.12 shipped
|
|
470
|
-
│ VERIFIED+tagged with a new dir absent from the Dockerfile COPY.
|
|
471
|
-
└── Did I run the affected tests?
|
|
472
|
-
YES → Verify they pass. NO → Run them now.
|
|
473
|
-
```
|
|
474
|
-
|
|
475
|
-
If ANY answer is YES and the doc is NOT updated, update it BEFORE committing. No exceptions.
|
|
265
|
+
NEVER commit without this checklist. For each trigger that fired, do the action BEFORE committing — no exceptions.
|
|
266
|
+
|
|
267
|
+
- **Branch** — `git branch --show-current` must match the project CLAUDE.md "Expected branch". Wrong branch → STOP, do not commit, switch first. No guard set → proceed but warn.
|
|
268
|
+
- **API endpoint/response shape changed** → update `.gsd-t/contracts/api-contract.md` + Swagger/OpenAPI spec + verify Swagger URL in CLAUDE.md & README.md.
|
|
269
|
+
- **DB schema changed** → `.gsd-t/contracts/schema-contract.md` + `docs/schema.md`.
|
|
270
|
+
- **UI component interface changed** → `.gsd-t/contracts/component-contract.md`.
|
|
271
|
+
- **New files/dirs** → owning domain's `scope.md`.
|
|
272
|
+
- **Requirement implemented/changed** → `docs/requirements.md`.
|
|
273
|
+
- **Component or data-flow changed** → `docs/architecture.md`.
|
|
274
|
+
- **ANY document/script/code file modified** → timestamped `.gsd-t/progress.md` Decision Log entry (`- YYYY-MM-DD HH:MM: {what} — {result}`); covers every workflow command AND manual edits. Architectural decision → include rationale in that entry.
|
|
275
|
+
- **Tech debt found/fixed** → `.gsd-t/techdebt.md`.
|
|
276
|
+
- **New pattern for future work** → CLAUDE.md or domain `constraints.md`.
|
|
277
|
+
- **Tests added/changed** → test names/paths referenced in requirements.
|
|
278
|
+
- **UI/routes/flows changed** → affected E2E specs.
|
|
279
|
+
- **Affected tests** → run them, confirm pass.
|
|
280
|
+
- New top-level dir / build/CI config: ENFORCED mechanically by `gsd-t-verify` CI-Parity Gate (`build-coverage` + `ci-parity`, FAIL-blocking) — you do NOT self-attest; verify runs the real CI build.
|
|
476
281
|
|
|
477
282
|
## Document Ripple Completion Gate (MANDATORY)
|
|
478
283
|
|
|
@@ -577,31 +382,17 @@ If in doubt, skip research and proceed — research if execution reveals gaps.
|
|
|
577
382
|
|
|
578
383
|
### Next Command Hint
|
|
579
384
|
|
|
580
|
-
When a GSD-T command completes
|
|
581
|
-
|
|
582
|
-
**MANDATORY format** — use this exact structure:
|
|
385
|
+
When a GSD-T command completes and does NOT auto-advance, end your response with a "Next Up" block (triggers the prompt-suggestion ghost text). Exact format:
|
|
583
386
|
|
|
584
387
|
```
|
|
585
|
-
───────────────────────────────────────────────────────────────
|
|
586
|
-
|
|
587
388
|
## ▶ Next Up
|
|
588
389
|
|
|
589
|
-
**{Phase Name}** — {one-line description
|
|
390
|
+
**{Phase Name}** — {one-line description}
|
|
590
391
|
|
|
591
392
|
`/gsd-t-{command}`
|
|
592
|
-
|
|
593
|
-
───────────────────────────────────────────────────────────────
|
|
594
|
-
```
|
|
595
|
-
|
|
596
|
-
If there are alternative commands that also make sense, add them:
|
|
597
|
-
|
|
598
|
-
```
|
|
599
|
-
**Also available:**
|
|
600
|
-
- `/gsd-t-{alt-1}` — {description}
|
|
601
|
-
- `/gsd-t-{alt-2}` — {description}
|
|
602
393
|
```
|
|
603
394
|
|
|
604
|
-
Successor mapping:
|
|
395
|
+
Add `**Also available:**` with `- /gsd-t-{alt} — {desc}` lines if alternatives make sense. Successor mapping:
|
|
605
396
|
| Completed | Next | Also available |
|
|
606
397
|
|-----------|------|----------------|
|
|
607
398
|
| `project` | `milestone` | |
|
|
@@ -20,6 +20,12 @@ export const meta = {
|
|
|
20
20
|
// require("./_lib.js") crashed this workflow on first eval, TD-113). Delegate CLI calls
|
|
21
21
|
// to an agent's Bash; args arrives as a JSON STRING in this runtime. See gsd-t-scan.workflow.js.
|
|
22
22
|
const _args = (typeof args === "string") ? (() => { try { return JSON.parse(args); } catch { return {}; } })() : (args || {});
|
|
23
|
+
// M86: resolved overrides map injected by the invoker (invoke-time injection, M69).
|
|
24
|
+
// Default to {} so the premium fallback literals apply when no invoker injects overrides.
|
|
25
|
+
// overrides values are CONCRETE model ids (resolver envelope); the bare literals below
|
|
26
|
+
// are tier ALIASES. The sandbox runtime accepts BOTH forms in model: — proven live for
|
|
27
|
+
// the concrete-id fable path by probe wf_c9faf817-373 (no HTTP 400).
|
|
28
|
+
const overrides = (_args.overrides && typeof _args.overrides === "object") ? _args.overrides : {};
|
|
23
29
|
const _CLI_ENVELOPE_SCHEMA = {
|
|
24
30
|
type: "object", required: ["ok", "exitCode"], additionalProperties: true,
|
|
25
31
|
properties: { ok: { type: "boolean" }, exitCode: { type: "integer" }, envelope: {}, stdout: { type: "string" }, stderr: { type: "string" }, via: { type: "string" } },
|
|
@@ -94,7 +100,7 @@ for (let cycle = 1; cycle <= 2; cycle++) {
|
|
|
94
100
|
label: `debug-cycle-${cycle}`,
|
|
95
101
|
phase: `Cycle ${cycle}`,
|
|
96
102
|
schema: DEBUG_CYCLE_SCHEMA,
|
|
97
|
-
model: cycle === 1 ? "opus" : "fable",
|
|
103
|
+
model: cycle === 1 ? "opus" : (overrides["debug-cycle-2"] ?? "fable"),
|
|
98
104
|
}).catch((e) => ({
|
|
99
105
|
resolved: false,
|
|
100
106
|
rootCause: `agent error: ${e && e.message}`,
|
|
@@ -51,6 +51,13 @@ export const meta = {
|
|
|
51
51
|
// require("./_lib.js") crashed this workflow on first eval, TD-113). Delegate CLI calls
|
|
52
52
|
// to an agent's Bash; args arrives as a JSON STRING in this runtime. See gsd-t-scan.workflow.js.
|
|
53
53
|
const _args = (typeof args === "string") ? (() => { try { return JSON.parse(args); } catch { return {}; } })() : (args || {});
|
|
54
|
+
// M86: resolved overrides map injected by the invoker (invoke-time injection, M69).
|
|
55
|
+
// Default to {} so the premium fallback literals apply when no invoker injects overrides
|
|
56
|
+
// (preserves byte-identical M85 behavior for callers that have not been updated yet).
|
|
57
|
+
// overrides values are CONCRETE model ids (resolver envelope); the bare literals below
|
|
58
|
+
// are tier ALIASES. The sandbox runtime accepts BOTH forms in model: — proven live for
|
|
59
|
+
// the concrete-id fable path by probe wf_c9faf817-373 (no HTTP 400).
|
|
60
|
+
const overrides = (_args.overrides && typeof _args.overrides === "object") ? _args.overrides : {};
|
|
54
61
|
const _CLI_ENVELOPE_SCHEMA = {
|
|
55
62
|
type: "object", required: ["ok", "exitCode"], additionalProperties: true,
|
|
56
63
|
properties: { ok: { type: "boolean" }, exitCode: { type: "integer" }, envelope: {}, stdout: { type: "string" }, stderr: { type: "string" }, via: { type: "string" } },
|
|
@@ -169,7 +176,7 @@ async function runSolutionSpaceProbe(projectDir, phaseName, { milestone, briefPa
|
|
|
169
176
|
`BIAS TOWARD COMPETING: if you are uncertain, or can name even two plausibly-different approaches, choose compete=true. A wasted competition costs ~3× this one phase; a missed-better-approach costs far more downstream (more pre-mortem blocks, more bugs, more verify cycles). Err on the side of generating options.`,
|
|
170
177
|
`Return JSON per the schema: { "compete": true|false, "reason": "<one sentence>", "approaches": ["<a>","<b>",...] }.`,
|
|
171
178
|
].filter(Boolean).join("\n");
|
|
172
|
-
const opts = { label: "solution-space-probe", schema: _PROBE_SCHEMA, model: "fable" };
|
|
179
|
+
const opts = { label: "solution-space-probe", schema: _PROBE_SCHEMA, model: overrides["solution-space-probe"] ?? "fable" };
|
|
173
180
|
if (phaseNameOpt) opts.phase = phaseNameOpt;
|
|
174
181
|
const r = await agent(prompt, opts).catch(() => null);
|
|
175
182
|
// Probe failure → bias toward competing (fail-toward-options, per the cost logic).
|
|
@@ -195,7 +202,7 @@ async function runPartitionProbe(projectDir, { milestone, briefPath, userInput,
|
|
|
195
202
|
`BIAS TOWARD COMPETING: if ≥3 files/areas are in play or you're unsure, choose compete=true — the file-disjointness oracle will objectively pick the most-parallelizable valid carving among the candidates, so competing is low-risk and high-reward.`,
|
|
196
203
|
`Return JSON per the schema.`,
|
|
197
204
|
].filter(Boolean).join("\n");
|
|
198
|
-
const opts = { label: "partition-probe", schema: _PROBE_SCHEMA, model: "fable" };
|
|
205
|
+
const opts = { label: "partition-probe", schema: _PROBE_SCHEMA, model: overrides["partition-probe"] ?? "fable" };
|
|
199
206
|
if (phaseNameOpt) opts.phase = phaseNameOpt;
|
|
200
207
|
const r = await agent(prompt, opts).catch(() => null);
|
|
201
208
|
if (!r || typeof r.compete !== "boolean") {
|
|
@@ -473,7 +480,7 @@ if (!competitionOn) {
|
|
|
473
480
|
`IMPORTANT: use the CANDIDATE LABEL (A, B, C…) shown above as the "id" in your scores.`,
|
|
474
481
|
].join("\n"),
|
|
475
482
|
{
|
|
476
|
-
label: "judge:rubric", phase: "Judge", model: "fable",
|
|
483
|
+
label: "judge:rubric", phase: "Judge", model: overrides["competition-judge"] ?? "fable",
|
|
477
484
|
schema: {
|
|
478
485
|
type: "object", required: ["scores"], additionalProperties: true,
|
|
479
486
|
properties: { scores: { type: "array", items: { type: "object", additionalProperties: true } } },
|
|
@@ -653,7 +660,7 @@ if (phaseName === "plan" && result && result.status !== "failed") {
|
|
|
653
660
|
`Every blocking finding MUST convert to a concrete requiredTest the plan must adopt. Advisory notes are forbidden.`,
|
|
654
661
|
`Verdict BLOCK if any concrete, falsifiable failure condition lacks a named required test; else CLEARED. Return JSON per the schema.`,
|
|
655
662
|
].join("\n"),
|
|
656
|
-
{ label: "pre-mortem", phase: "Plan Hardening", schema: PRE_MORTEM_SCHEMA, model: "fable" }
|
|
663
|
+
{ label: "pre-mortem", phase: "Plan Hardening", schema: PRE_MORTEM_SCHEMA, model: overrides["pre-mortem"] ?? "fable" }
|
|
657
664
|
).catch((e) => ({ verdict: "BLOCK", findings: [{ severity: "HIGH", condition: `pre-mortem agent error: ${e && e.message}`, requiredTest: "re-run pre-mortem" }], notes: "agent-error" }));
|
|
658
665
|
|
|
659
666
|
result.preMortem = preMortem;
|
|
@@ -37,6 +37,12 @@ export const meta = {
|
|
|
37
37
|
// QA/Red-Team protocol bodies are read by an agent (Read) instead of fs. args arrives as
|
|
38
38
|
// a JSON STRING in this runtime. See gsd-t-scan.workflow.js.
|
|
39
39
|
const _args = (typeof args === "string") ? (() => { try { return JSON.parse(args); } catch { return {}; } })() : (args || {});
|
|
40
|
+
// M86: resolved overrides map injected by the invoker (invoke-time injection, M69).
|
|
41
|
+
// Default to {} so the premium fallback literals apply when no invoker injects overrides.
|
|
42
|
+
// overrides values are CONCRETE model ids (resolver envelope); the bare literals below
|
|
43
|
+
// are tier ALIASES. The sandbox runtime accepts BOTH forms in model: — proven live for
|
|
44
|
+
// the concrete-id fable path by probe wf_c9faf817-373 (no HTTP 400).
|
|
45
|
+
const overrides = (_args.overrides && typeof _args.overrides === "object") ? _args.overrides : {};
|
|
40
46
|
const _CLI_ENVELOPE_SCHEMA = {
|
|
41
47
|
type: "object", required: ["ok", "exitCode"], additionalProperties: true,
|
|
42
48
|
properties: { ok: { type: "boolean" }, exitCode: { type: "integer" }, envelope: {}, stdout: { type: "string" }, stderr: { type: "string" }, via: { type: "string" } },
|
|
@@ -304,7 +310,7 @@ const stages = [
|
|
|
304
310
|
`Verdict is FAIL if you found any CRITICAL or HIGH severity bug; GRUDGING-PASS`,
|
|
305
311
|
`if you searched exhaustively and found nothing. Return JSON per the schema.`,
|
|
306
312
|
].join("\n"),
|
|
307
|
-
{ label: "red-team", phase: "Orthogonal Triad", schema: RED_TEAM_SCHEMA, model: "fable" }
|
|
313
|
+
{ label: "red-team", phase: "Orthogonal Triad", schema: RED_TEAM_SCHEMA, model: overrides["red-team"] ?? "fable" }
|
|
308
314
|
),
|
|
309
315
|
|
|
310
316
|
// Stage C — QA (test execution + shallow-test detection + contract compliance)
|
|
@@ -18,6 +18,11 @@ export const meta = {
|
|
|
18
18
|
// lib.*, but the `require("./_lib.js")` import alone crashed it on first eval in the
|
|
19
19
|
// sandbox (TD-113). Removed. args arrives as a JSON STRING in this runtime, so parse it.
|
|
20
20
|
const _args = (typeof args === "string") ? (() => { try { return JSON.parse(args); } catch { return {}; } })() : (args || {});
|
|
21
|
+
// M86: resolved overrides map injected by the wave invoker (invoke-time injection, M69).
|
|
22
|
+
// Forward to BOTH sub-workflow calls so the profile-tier assignments propagate through
|
|
23
|
+
// the full cycle (pre-mortem r1 #1 CRITICAL: wave was the only entry point that never
|
|
24
|
+
// forwarded overrides, leaving red-team on the premium fallback regardless of profile).
|
|
25
|
+
const overrides = (_args.overrides && typeof _args.overrides === "object") ? _args.overrides : {};
|
|
21
26
|
|
|
22
27
|
const projectDir = _args.projectDir || ".";
|
|
23
28
|
const milestone = _args.milestone || null;
|
|
@@ -29,14 +34,14 @@ if (!milestone || !domains.length) {
|
|
|
29
34
|
}
|
|
30
35
|
|
|
31
36
|
phase("Execute");
|
|
32
|
-
const execResult = await workflow("gsd-t-execute", { milestone, domains, projectDir });
|
|
37
|
+
const execResult = await workflow("gsd-t-execute", { milestone, domains, projectDir, overrides });
|
|
33
38
|
if (execResult.status !== "complete") {
|
|
34
39
|
log(`execute status=${execResult.status} — halting before verify`);
|
|
35
40
|
return { status: execResult.status, stage: "execute", execResult };
|
|
36
41
|
}
|
|
37
42
|
|
|
38
43
|
phase("Verify");
|
|
39
|
-
const verifyResult = await workflow("gsd-t-verify", { milestone, projectDir });
|
|
44
|
+
const verifyResult = await workflow("gsd-t-verify", { milestone, projectDir, overrides });
|
|
40
45
|
|
|
41
46
|
return {
|
|
42
47
|
status: verifyResult.status === "complete" ? "complete" : "verify-failed",
|