@tekyzinc/gsd-t 3.26.10 → 3.27.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,99 @@
1
+ /**
2
+ * bin/model-windows.cjs
3
+ *
4
+ * Single source of truth for Claude model → context-window size (in input
5
+ * tokens). The context meter and every downstream budget module must size the
6
+ * window from the MODEL ACTUALLY RUNNING, not a hardcoded constant.
7
+ *
8
+ * Why this exists
9
+ * ---------------
10
+ * Prior to this module every budget site hardcoded `200000` with a comment
11
+ * "claude-opus-4-6 default". Opus 4.6 and 4.7 (and Sonnet 4.x) ship a
12
+ * 1,000,000-token context window. Hardcoding 200k made the context meter
13
+ * overcount usage 5× and fire the headless handoff at ~64% of context
14
+ * REMAINING. This map fixes that at the source.
15
+ *
16
+ * Resolution strategy
17
+ * -------------------
18
+ * GSD-T jumps between models per-subagent, so a static config value is wrong.
19
+ * The orchestrator session whose transcript the meter reads, however, runs a
20
+ * single model for its lifetime, and every assistant message in the transcript
21
+ * records its `model` id. `windowForModel(modelId)` maps that id to a window.
22
+ *
23
+ * Matching is by longest-prefix so versioned ids resolve even if a future
24
+ * dated suffix appears (e.g. "claude-opus-4-7-20260115" → opus 4.x entry).
25
+ * Unknown / missing model → SAFE_DEFAULT_WINDOW (the large 1M window: a guard
26
+ * that triggers late is worse than one that never undercounts a real 1M
27
+ * session — but see note below; we deliberately pick the large default so the
28
+ * meter does NOT regress to premature handoffs on an unrecognized new model).
29
+ *
30
+ * Zero dependencies. CommonJS. Pure functions.
31
+ */
32
+
33
+ "use strict";
34
+
35
+ // The conservative fallback when a model can't be resolved. We choose the
36
+ // LARGE window (1M) on purpose: the bug we are fixing is premature handoff
37
+ // from a too-SMALL assumed window. An unknown future model is far more likely
38
+ // to have a >=1M window than a 200k one, and an over-large window degrades
39
+ // gracefully (handoff a little late) whereas an under-small one breaks the
40
+ // workflow (handoff way too early, the reported symptom).
41
+ const SAFE_DEFAULT_WINDOW = 1_000_000;
42
+
43
+ // The legacy small window, kept as a named export for the few call sites that
44
+ // must preserve old behavior explicitly (e.g. fixtures, back-compat configs).
45
+ const LEGACY_SMALL_WINDOW = 200_000;
46
+
47
+ // Longest-prefix map: key is a model-id prefix, value is the input-token
48
+ // context window for that model family. Order does not matter — resolution
49
+ // picks the LONGEST matching prefix.
50
+ const MODEL_WINDOWS = Object.freeze({
51
+ // Opus 4.6 / 4.7 — 1M context window.
52
+ "claude-opus-4-6": 1_000_000,
53
+ "claude-opus-4-7": 1_000_000,
54
+ // Generic opus-4 fallback (covers any 4.x point release not listed above).
55
+ "claude-opus-4": 1_000_000,
56
+
57
+ // Sonnet 4.x — 1M context window.
58
+ "claude-sonnet-4": 1_000_000,
59
+
60
+ // Haiku 4.x — 200k context window.
61
+ "claude-haiku-4": 200_000,
62
+
63
+ // Pre-4 families (defensive — older long sessions / replayed transcripts).
64
+ "claude-3-7-sonnet": 200_000,
65
+ "claude-3-5-sonnet": 200_000,
66
+ "claude-3-5-haiku": 200_000,
67
+ "claude-3-opus": 200_000,
68
+ });
69
+
70
+ /**
71
+ * Resolve a context-window size (input tokens) for a Claude model id.
72
+ *
73
+ * @param {string|null|undefined} modelId e.g. "claude-opus-4-7" or
74
+ * "claude-opus-4-7-20260115". Non-string / empty → SAFE_DEFAULT_WINDOW.
75
+ * @returns {number} positive integer window size
76
+ */
77
+ function windowForModel(modelId) {
78
+ if (typeof modelId !== "string" || modelId.length === 0) {
79
+ return SAFE_DEFAULT_WINDOW;
80
+ }
81
+ const id = modelId.trim().toLowerCase();
82
+
83
+ let best = null;
84
+ let bestLen = -1;
85
+ for (const prefix of Object.keys(MODEL_WINDOWS)) {
86
+ if (id.startsWith(prefix) && prefix.length > bestLen) {
87
+ best = MODEL_WINDOWS[prefix];
88
+ bestLen = prefix.length;
89
+ }
90
+ }
91
+ return best != null ? best : SAFE_DEFAULT_WINDOW;
92
+ }
93
+
94
+ module.exports = {
95
+ windowForModel,
96
+ MODEL_WINDOWS,
97
+ SAFE_DEFAULT_WINDOW,
98
+ LEGACY_SMALL_WINDOW,
99
+ };
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Tests for bin/model-windows.cjs — model → context-window resolution.
3
+ *
4
+ * The bug this fixes: the context meter hardcoded a 200k window so an Opus 4.7
5
+ * session (1M window) read as 5× over budget, firing the headless handoff at
6
+ * ~64% of context REMAINING. These tests pin the corrected windows.
7
+ */
8
+
9
+ "use strict";
10
+
11
+ const test = require("node:test");
12
+ const assert = require("node:assert/strict");
13
+
14
+ const {
15
+ windowForModel,
16
+ MODEL_WINDOWS,
17
+ SAFE_DEFAULT_WINDOW,
18
+ LEGACY_SMALL_WINDOW,
19
+ } = require("./model-windows.cjs");
20
+
21
+ test("Opus 4.7 resolves to a 1M window (the reported regression)", () => {
22
+ assert.equal(windowForModel("claude-opus-4-7"), 1_000_000);
23
+ });
24
+
25
+ test("Opus 4.6 resolves to a 1M window", () => {
26
+ assert.equal(windowForModel("claude-opus-4-6"), 1_000_000);
27
+ });
28
+
29
+ test("dated/versioned suffix still resolves via longest-prefix", () => {
30
+ assert.equal(windowForModel("claude-opus-4-7-20260115"), 1_000_000);
31
+ assert.equal(windowForModel("claude-sonnet-4-6-20251201"), 1_000_000);
32
+ });
33
+
34
+ test("Sonnet 4.x resolves to a 1M window", () => {
35
+ assert.equal(windowForModel("claude-sonnet-4-6"), 1_000_000);
36
+ assert.equal(windowForModel("claude-sonnet-4"), 1_000_000);
37
+ });
38
+
39
+ test("Haiku 4.x resolves to the 200k window", () => {
40
+ assert.equal(windowForModel("claude-haiku-4-5-20251001"), 200_000);
41
+ assert.equal(windowForModel("claude-haiku-4"), 200_000);
42
+ });
43
+
44
+ test("longest-prefix wins over a shorter generic prefix", () => {
45
+ // "claude-opus-4-7" (15) must beat "claude-opus-4" (13). Both map to 1M
46
+ // here, so assert the resolution mechanism via a value-independent check:
47
+ // a hypothetical future divergence would surface if this regressed.
48
+ assert.equal(windowForModel("claude-opus-4-7"), MODEL_WINDOWS["claude-opus-4-7"]);
49
+ });
50
+
51
+ test("case-insensitive and whitespace-tolerant", () => {
52
+ assert.equal(windowForModel(" CLAUDE-OPUS-4-7 "), 1_000_000);
53
+ });
54
+
55
+ test("unknown / missing model falls back to the SAFE large default", () => {
56
+ assert.equal(windowForModel("claude-future-99"), SAFE_DEFAULT_WINDOW);
57
+ assert.equal(windowForModel(""), SAFE_DEFAULT_WINDOW);
58
+ assert.equal(windowForModel(null), SAFE_DEFAULT_WINDOW);
59
+ assert.equal(windowForModel(undefined), SAFE_DEFAULT_WINDOW);
60
+ assert.equal(windowForModel(42), SAFE_DEFAULT_WINDOW);
61
+ });
62
+
63
+ test("SAFE_DEFAULT_WINDOW is the large (1M) window, not the legacy 200k", () => {
64
+ // Core anti-regression assertion: the fallback must NOT reintroduce the
65
+ // premature-handoff bug for an unrecognized model.
66
+ assert.equal(SAFE_DEFAULT_WINDOW, 1_000_000);
67
+ assert.equal(LEGACY_SMALL_WINDOW, 200_000);
68
+ assert.notEqual(SAFE_DEFAULT_WINDOW, LEGACY_SMALL_WINDOW);
69
+ });
70
+
71
+ test("every mapped window is a positive integer", () => {
72
+ for (const [k, v] of Object.entries(MODEL_WINDOWS)) {
73
+ assert.ok(Number.isInteger(v) && v > 0, `${k} → ${v} must be a positive int`);
74
+ }
75
+ });
@@ -26,12 +26,43 @@
26
26
 
27
27
  const fs = require('fs');
28
28
  const path = require('path');
29
+ const { SAFE_DEFAULT_WINDOW } = require('./model-windows.cjs');
29
30
 
30
31
  const DEFAULT_K = 5;
31
- const DEFAULT_MODEL_CONTEXT_CAP = 200000;
32
+ // The model context cap IS the model's true window. Default to the model-aware
33
+ // safe LARGE window (1M) — the old 200K literal was correct only for pre-4
34
+ // models and made turn-to-compact predictions fire 5× too early on Opus/Sonnet.
35
+ // Callers may still pass an explicit `modelContextCap`, and resolveContextCap()
36
+ // below prefers a fresh Context Meter reading when available.
37
+ const DEFAULT_MODEL_CONTEXT_CAP = SAFE_DEFAULT_WINDOW;
32
38
  // Claude Code starts auto-compacting ~8% before the model window fills, so the
33
39
  // effective dialog ceiling is 0.92 × modelContextCap.
34
40
  const PRE_COMPACT_HEADROOM = 0.92;
41
+ // Context Meter state — its modelWindowSize is model-aware (bin/model-windows.cjs).
42
+ const METER_STATE_REL = '.gsd-t/.context-meter-state.json';
43
+ const METER_STATE_STALE_MS = 5 * 60 * 1000;
44
+
45
+ /**
46
+ * Resolve the effective model context cap. Priority:
47
+ * 1. explicit opts.modelContextCap (caller override)
48
+ * 2. fresh Context Meter state modelWindowSize (model-aware, written by the
49
+ * meter hook from the running model)
50
+ * 3. DEFAULT_MODEL_CONTEXT_CAP (safe large fallback)
51
+ */
52
+ function resolveContextCap(projectDir, optCap) {
53
+ if (Number.isFinite(optCap) && optCap > 0) return optCap;
54
+ try {
55
+ const fp = path.join(projectDir || '.', METER_STATE_REL);
56
+ const s = JSON.parse(fs.readFileSync(fp, 'utf8'));
57
+ if (s && typeof s.modelWindowSize === 'number' && s.modelWindowSize > 0 && s.timestamp) {
58
+ const age = Date.now() - Date.parse(s.timestamp);
59
+ if (!isNaN(age) && age >= 0 && age <= METER_STATE_STALE_MS) {
60
+ return s.modelWindowSize;
61
+ }
62
+ }
63
+ } catch (_) { /* fall through to default */ }
64
+ return DEFAULT_MODEL_CONTEXT_CAP;
65
+ }
35
66
  const DEFAULT_WARN_THRESHOLD_TURNS = 5;
36
67
  const MIN_HISTORY = 3;
37
68
 
@@ -107,7 +138,8 @@ function _sortTurns(rows) {
107
138
  * @param {string} opts.projectDir
108
139
  * @param {string} opts.sessionId required
109
140
  * @param {number} [opts.k] default 5 (last K turns)
110
- * @param {number} [opts.modelContextCap] default 200000
141
+ * @param {number} [opts.modelContextCap] default: fresh Context Meter
142
+ * modelWindowSize if available, else the model-aware safe window (1M)
111
143
  * @param {number} [opts.warnThresholdTurns] default 5
112
144
  * @returns {{
113
145
  * shouldWarn: boolean,
@@ -124,9 +156,7 @@ function estimateDialogGrowth(opts) {
124
156
  const projectDir = (opts && opts.projectDir) || '.';
125
157
  const sessionId = opts && opts.sessionId;
126
158
  const k = (opts && Number.isFinite(opts.k) && opts.k > 0) ? Math.floor(opts.k) : DEFAULT_K;
127
- const cap = (opts && Number.isFinite(opts.modelContextCap) && opts.modelContextCap > 0)
128
- ? opts.modelContextCap
129
- : DEFAULT_MODEL_CONTEXT_CAP;
159
+ const cap = resolveContextCap(projectDir, opts && opts.modelContextCap);
130
160
  const warnThreshold = (opts && Number.isFinite(opts.warnThresholdTurns) && opts.warnThresholdTurns > 0)
131
161
  ? opts.warnThresholdTurns
132
162
  : DEFAULT_WARN_THRESHOLD_TURNS;
@@ -26,11 +26,18 @@
26
26
 
27
27
  const fs = require("fs");
28
28
  const path = require("path");
29
+ const { SAFE_DEFAULT_WINDOW } = require("./model-windows.cjs");
29
30
 
30
31
  // ── Constants ────────────────────────────────────────────────────────────────
31
32
 
32
33
  const MODEL_RATIOS = { haiku: 1, sonnet: 5, opus: 25 };
33
34
 
35
+ // Fallback context window when no per-model signal is available. Uses the
36
+ // model-windows safe LARGE default (1M) rather than a bare 200K literal: a
37
+ // too-small fallback re-introduces the premature-headless-handoff bug the
38
+ // model-aware sizing was added to fix. See bin/model-windows.cjs.
39
+ const FALLBACK_WINDOW = SAFE_DEFAULT_WINDOW;
40
+
34
41
  // Base token estimates per task type (in haiku-equivalent units)
35
42
  const BASE_ESTIMATES = {
36
43
  execute: 8000,
@@ -106,7 +113,9 @@ function getSessionStatus(projectDir) {
106
113
  const real = readContextMeterState(dir);
107
114
  if (real) {
108
115
  const consumed = real.inputTokens;
109
- const window = real.modelWindowSize > 0 ? real.modelWindowSize : 200000;
116
+ // Primary: the Context Meter writes a model-aware modelWindowSize into
117
+ // state (bin/model-windows.cjs). Fallback only when state predates that.
118
+ const window = real.modelWindowSize > 0 ? real.modelWindowSize : FALLBACK_WINDOW;
110
119
  const estimated_remaining = Math.max(0, window - consumed);
111
120
  const pct = Math.round(real.pct * 10) / 10;
112
121
  const threshold = bandFor(pct, thresholdPct);
@@ -145,7 +154,7 @@ function resolveThresholdPct(dir) {
145
154
  }
146
155
 
147
156
  function getSessionStatusHeuristic(dir, thresholdPct) {
148
- const window = 200000;
157
+ const window = FALLBACK_WINDOW;
149
158
  const consumed = readSessionConsumed(dir);
150
159
  const estimated_remaining = Math.max(0, window - consumed);
151
160
  const pct = window > 0 ? Math.round((consumed / window) * 100 * 10) / 10 : 0;
@@ -180,7 +189,7 @@ function recordUsage(usage) {
180
189
  */
181
190
  function estimateMilestoneCost(remainingTasks, projectDir) {
182
191
  const status = getSessionStatus(projectDir);
183
- const window = status.consumed + status.estimated_remaining || 200000;
192
+ const window = status.consumed + status.estimated_remaining || FALLBACK_WINDOW;
184
193
  const estimatedTokens = remainingTasks.reduce((sum, t) => {
185
194
  return sum + estimateCost(t.model, t.taskType, { complexity: t.complexity, projectDir });
186
195
  }, 0);
@@ -455,6 +455,20 @@ Use these when user asks for help on a specific command:
455
455
  - **Use when**: Final pre-merge gate. Both tracks always run; both report. `ok` is purely deterministic — LLM verdict is advisory.
456
456
  - **CLI**: `gsd-t verify-gate [--skip-track1] [--skip-track2] [--max-concurrency N] [--fail-fast] [--json]`. Exit 0/4/2/3.
457
457
 
458
+ ### build-coverage (M57)
459
+ - **Summary**: Detects new top-level paths in a milestone commit range not referenced by a real CI build input. Coverage is decided by STRUCTURALLY parsing CI files (Dockerfile COPY/ADD source args incl. relative `--from=`; cloudbuild `args`-positional; workflow `run`-positional via a block-scalar-aware YAML walker) — never substring-matching. `node_modules` never counts. No documented false-negative residual.
460
+ - **Auto-invoked**: Yes — by `gsd-t-verify` Step 2.6 (FAIL-blocking, never warning-only)
461
+ - **Files**: `bin/gsd-t-build-coverage.cjs`
462
+ - **Use when**: Final pre-merge gate. Catches the TimeTracking v1.10.12 class (new `hooks/` dir committed, absent from Dockerfile COPY, shipped broken while verify passed).
463
+ - **CLI**: `gsd-t build-coverage [--json] [--base REF] [--head REF] [--project-dir PATH]`. Exit 0/4/2.
464
+
465
+ ### ci-parity (M57)
466
+ - **Summary**: Reproduces the project's actual CI build locally instead of assuming warm-cache local tsc/test parity. Auto-detects CI config (cloudbuild → workflows → Dockerfile RUN → package.json scripts), clears build caches (containment-safe — refuses any config-derived delete resolving outside OR equal-to projectRoot), and auto-runs the real `docker build` when a Dockerfile is present (presence is the trigger, no opt-in flag).
467
+ - **Auto-invoked**: Yes — by `gsd-t-verify` Step 2.6 (FAIL-blocking, never warning-only)
468
+ - **Files**: `bin/gsd-t-ci-parity.cjs`
469
+ - **Use when**: Final pre-merge gate. Catches the TimeTracking v1.10.12 class (noImplicitAny regressions passed a warm-cache local tsc but failed CI's cold build).
470
+ - **CLI**: `gsd-t ci-parity [--project-dir PATH] [--timeout-ms MS] [--json]`. Exit 0/4/2.
471
+
458
472
  ## Unknown Command
459
473
 
460
474
  If user asks for help on unrecognized command:
@@ -68,13 +68,29 @@ Ask user: "Milestone {N-1} is still {status}. Archive it and start new? Or compl
68
68
  node scripts/gsd-t-watch-state.js advance --agent-id "$GSD_T_AGENT_ID" --parent-id "${GSD_T_PARENT_AGENT_ID:-null}" --command gsd-t-milestone --step 4 --step-label "Pre-Partition Assessment" 2>/dev/null || true
69
69
  ```
70
70
 
71
- Before formal partitioning, do a quick assessment:
71
+ Before formal partitioning, do a quick assessment.
72
72
 
73
- - **Complexity estimate**: Simple (1-2 domains), Medium (3-4), Complex (5+)
73
+ **Express scope in GSD-T-native units only.** Per `feedback_no_human_hour_estimates.md` (memory): never use developer-hours, dev-days, sprints, story points, or person-weeks. Use these instead:
74
+
75
+ | Unit | Use for |
76
+ |------|---------|
77
+ | **Domain count** | Partition coarseness (1-2 / 3-4 / 5+) |
78
+ | **Wave count** | How many serial gates the milestone needs |
79
+ | **Spawn count** | Estimated `claude -p` / Task subagent invocations |
80
+ | **Token-spend range** | `$X-Y` based on prior comparable milestones |
81
+ | **Rate-limit-window count** | If the milestone might span > 1 5h Claude Max window |
82
+ | **Parallel-domain count** | How many domains can run concurrently (file-disjoint) |
83
+
84
+ Assessment template:
85
+
86
+ - **Domain count**: Simple (1-2 domains), Medium (3-4), Complex (5+)
87
+ - **Wave count**: estimated based on cross-domain dependencies
88
+ - **Parallel-domain count**: how many can run in the same wave
89
+ - **Token-spend range**: $X-Y based on prior comparable milestones (read `.gsd-t/token-log.md` for trailing-3 comparison)
74
90
  - **Recommended approach**:
75
- - Simple: Consider using /gsd-t-quick for each piece
76
- - Medium: Standard partition → plan → execute flow
77
- - Complex: Partition → discuss → plan → execute → integrate → verify
91
+ - 1-2 domains: Consider using /gsd-t-quick for each piece
92
+ - 3-4 domains: Standard partition → plan → execute flow
93
+ - 5+ domains: Partition → discuss → plan → execute → integrate → verify
78
94
 
79
95
  Present the assessment and ask: "Ready to partition into domains now, or want to discuss first?"
80
96
 
@@ -67,7 +67,7 @@ Append to `.gsd-t/roadmap.md`:
67
67
  - [ ] {each debt item resolved and verified}
68
68
  - [ ] No regression in existing functionality
69
69
  - [ ] {item-specific criteria from techdebt.md}
70
- **Estimated effort**: {combined effort assessment}
70
+ **Estimated scope**: {N domains} / {N waves} / $X-Y token-spend (express in GSD-T units only — never developer-hours/days/sprints per `feedback_no_human_hour_estimates.md`)
71
71
  **Priority**: {CRITICAL — before next feature | HIGH — soon | MEDIUM — planned}
72
72
  ```
73
73
 
@@ -351,7 +351,7 @@ Synthesize ALL findings into a **fresh** `.gsd-t/techdebt.md` (the previous vers
351
351
  - High priority: {N}
352
352
  - Medium priority: {N}
353
353
  - Low priority: {N}
354
- - Total estimated effort: {rough assessment}
354
+ - Total estimated scope: {N domains} / {N waves} / $X-Y token-spend (GSD-T units only — see `feedback_no_human_hour_estimates.md`)
355
355
  - Previous scan archive: techdebt_{previous-date}.md
356
356
 
357
357
  ---
@@ -461,22 +461,22 @@ Review all items marked `Milestone candidate: YES` and group them into logical m
461
461
 
462
462
  ### Suggested: Security Hardening (Critical)
463
463
  Combines: TD-001, TD-003, TD-005
464
- Estimated effort: {assessment}
464
+ Estimated scope: {domain-count} domains, {wave-count} waves, $X-Y token-spend (express in GSD-T units — domain/wave/spawn/token — never human-hours/days/sprints per `feedback_no_human_hour_estimates.md`)
465
465
  Should be prioritized: BEFORE next feature milestone
466
466
 
467
467
  ### Suggested: Performance Optimization (High)
468
468
  Combines: TD-010, TD-012, TD-015
469
- Estimated effort: {assessment}
469
+ Estimated scope: {domain-count} domains, {wave-count} waves, $X-Y token-spend (express in GSD-T units — domain/wave/spawn/token — never human-hours/days/sprints per `feedback_no_human_hour_estimates.md`)
470
470
  Can be scheduled: AFTER current feature work
471
471
 
472
- ### Suggested: Dependency Update Sprint (Medium)
472
+ ### Suggested: Dependency Update (Medium)
473
473
  Combines: TD-020, dependency table items with breaking=yes
474
- Estimated effort: {assessment}
474
+ Estimated scope: {domain-count} domains, {wave-count} waves, $X-Y token-spend (express in GSD-T units — domain/wave/spawn/token — never human-hours/days/sprints per `feedback_no_human_hour_estimates.md`)
475
475
  Can be scheduled: During next maintenance window
476
476
 
477
477
  ### Suggested: Shared Service Extraction (if candidates found)
478
478
  Combines: all "Shared Service Candidates" from quality.md Reusability Analysis
479
- Estimated effort: {assessment}
479
+ Estimated scope: {domain-count} domains, {wave-count} waves, $X-Y token-spend (express in GSD-T units — domain/wave/spawn/token — never human-hours/days/sprints per `feedback_no_human_hour_estimates.md`)
480
480
  Should be prioritized: BEFORE adding new consumer surfaces to the system
481
481
  Note: Use `/gsd-t-partition` Step 1.6 to design the SharedCore domain
482
482
  ```
@@ -102,6 +102,52 @@ Defensive on missing `.gsd-t/ratelimit-map.json` — verify-gate falls back to
102
102
 
103
103
  Contract: `.gsd-t/contracts/verify-gate-contract.md` v1.0.0 STABLE.
104
104
 
105
+ <!-- M57: CI-parity FAIL-blocking gate -->
106
+ ## Step 2.6: CI-Parity Gate (MANDATORY — FAIL-blocking, never warning-only)
107
+
108
+ ```bash
109
+ node scripts/gsd-t-watch-state.js advance --agent-id "$GSD_T_AGENT_ID" --parent-id "${GSD_T_PARENT_AGENT_ID:-null}" --command gsd-t-verify --step 2 --step-label ".6: CI-Parity Gate" 2>/dev/null || true
110
+ ```
111
+
112
+ Origin: TimeTracking v1.10.12 shipped VERIFIED + tagged while Cloud Build
113
+ failed (a new top-level `hooks/` dir was committed but never added to the
114
+ Dockerfile `COPY` directives, and `noImplicitAny` regressions passed a
115
+ warm-cache local `tsc` but failed CI's cold build). `gsd-t-verify` must
116
+ reproduce the project's *actual* CI build, not assume local parity.
117
+
118
+ Run BOTH checks. **Either failing is a verify FAIL — it blocks
119
+ complete-milestone. This is never a warning-only signal.**
120
+
121
+ ```bash
122
+ # 1. Build-coverage — every new top-level path in the milestone range must be
123
+ # referenced by a real CI build input (structural parse, not substring).
124
+ gsd-t build-coverage --json > /tmp/gsd-t-build-coverage.json
125
+ BC_EXIT=$?
126
+
127
+ # 2. CI-parity — reproduce the project's actual CI build locally with caches
128
+ # cleared; auto-runs `docker build` when a Dockerfile is present.
129
+ gsd-t ci-parity --json > /tmp/gsd-t-ci-parity.json
130
+ CP_EXIT=$?
131
+ ```
132
+
133
+ - `build-coverage` exit **4** (`ok:false`, `missing[]` non-empty) → verify
134
+ FAIL. Report each uncovered path; the fix is to add the path to the
135
+ Dockerfile `COPY` / cloudbuild artifact / workflow build input.
136
+ - `ci-parity` exit **4** (`ok:false` — a detected CI command or the real
137
+ `docker build` failed) → verify FAIL. Report the failing command.
138
+ - exit **0** from both → gate passes.
139
+ - exit **2** (usage error, e.g. not a git repo) → record as a structured
140
+ note; not a pass-by-default (investigate before proceeding).
141
+
142
+ Both are pure-deterministic CLI checks (no LLM). They consume the same
143
+ preflight envelope as the M55 verify-gate Track 1, so failing here at
144
+ verify mirrors what CI would do — catching the TimeTracking class before
145
+ the milestone is tagged.
146
+
147
+ Contracts: `.gsd-t/contracts/cli-build-coverage-contract.md` v2.0.0 STABLE,
148
+ `.gsd-t/contracts/ci-parity-contract.md` v2.0.0 STABLE.
149
+ <!-- /M57: CI-parity FAIL-blocking gate -->
150
+
105
151
  ## Step 2.5: High-Risk Domain Gate (MANDATORY — Categories 2 and 7)
106
152
 
107
153
  ```bash
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tekyzinc/gsd-t",
3
- "version": "3.26.10",
3
+ "version": "3.27.10",
4
4
  "description": "GSD-T: Contract-Driven Development for Claude Code — 54 slash commands with headless-by-default workflow spawning, unattended supervisor relay with event stream, graph-powered code analysis, real-time agent dashboard, task telemetry, doc-ripple enforcement, backlog management, impact analysis, test sync, milestone archival, and PRD generation",
5
5
  "author": "Tekyz, Inc.",
6
6
  "license": "MIT",
@@ -82,9 +82,12 @@ const readline = require("readline");
82
82
  * Parse a Claude Code transcript JSONL file.
83
83
  *
84
84
  * @param {string} transcriptPath - absolute path to a Claude Code transcript .jsonl
85
- * @returns {Promise<{system: string, messages: Array<{role: string, content: Array}>} | null>}
85
+ * @returns {Promise<{system: string, messages: Array<{role: string, content: Array}>, model: string|null} | null>}
86
86
  * Resolves to the reconstructed body, or `null` on unreadable file /
87
87
  * catastrophic parse failure. Caller treats `null` as "bail out, fail open".
88
+ * `model` is the last-seen assistant `message.model` id (the model the
89
+ * orchestrator session is running) or `null` if none observed — the
90
+ * context meter uses it to size the context window correctly.
88
91
  */
89
92
  async function parseTranscript(transcriptPath) {
90
93
  if (typeof transcriptPath !== "string" || transcriptPath.length === 0) {
@@ -101,6 +104,10 @@ async function parseTranscript(transcriptPath) {
101
104
 
102
105
  const messages = [];
103
106
  let system = "";
107
+ // Last-seen assistant model id. Claude Code records `message.model` on every
108
+ // assistant turn; the orchestrator session runs one model for its lifetime,
109
+ // so the last value is authoritative for sizing the context window.
110
+ let model = null;
104
111
 
105
112
  let stream;
106
113
  try {
@@ -136,6 +143,9 @@ async function parseTranscript(transcriptPath) {
136
143
  if (!msg || typeof msg !== "object") continue;
137
144
 
138
145
  const role = msg.role || type;
146
+ if (type === "assistant" && typeof msg.model === "string" && msg.model.length > 0) {
147
+ model = msg.model;
148
+ }
139
149
  const content = normalizeContent(msg.content, role);
140
150
  if (content === null) continue;
141
151
 
@@ -152,7 +162,7 @@ async function parseTranscript(transcriptPath) {
152
162
  return null;
153
163
  }
154
164
 
155
- return { system, messages: sanitizeToolPairs(messages) };
165
+ return { system, messages: sanitizeToolPairs(messages), model };
156
166
  }
157
167
 
158
168
  /**
@@ -39,17 +39,17 @@ test("empty path / non-string → returns null", async () => {
39
39
  assert.equal(await parseTranscript(undefined), null);
40
40
  });
41
41
 
42
- test("empty file → returns { system:'', messages:[] }", async () => {
42
+ test("empty file → returns { system:'', messages:[], model:null }", async () => {
43
43
  const { dir, file } = mkTmpFile([]);
44
44
  try {
45
45
  const got = await parseTranscript(file);
46
- assert.deepEqual(got, { system: "", messages: [] });
46
+ assert.deepEqual(got, { system: "", messages: [], model: null });
47
47
  } finally {
48
48
  cleanup(dir);
49
49
  }
50
50
  });
51
51
 
52
- test("file with only unknown event types → { system:'', messages:[] }", async () => {
52
+ test("file with only unknown event types → { system:'', messages:[], model:null }", async () => {
53
53
  const { dir, file } = mkTmpFile([
54
54
  { type: "summary", foo: "bar" },
55
55
  { type: "system", subtype: "hook", hookInfos: [] },
@@ -60,7 +60,7 @@ test("file with only unknown event types → { system:'', messages:[] }", async
60
60
  ]);
61
61
  try {
62
62
  const got = await parseTranscript(file);
63
- assert.deepEqual(got, { system: "", messages: [] });
63
+ assert.deepEqual(got, { system: "", messages: [], model: null });
64
64
  } finally {
65
65
  cleanup(dir);
66
66
  }
@@ -104,6 +104,53 @@ test("normal conversation — string-content user + text assistant", async () =>
104
104
  }
105
105
  });
106
106
 
107
+ test("captures last-seen assistant model id (for window sizing)", async () => {
108
+ const { dir, file } = mkTmpFile([
109
+ { type: "user", message: { role: "user", content: "hi" } },
110
+ {
111
+ type: "assistant",
112
+ message: {
113
+ role: "assistant",
114
+ model: "claude-opus-4-7",
115
+ content: [{ type: "text", text: "first" }],
116
+ },
117
+ },
118
+ { type: "user", message: { role: "user", content: "more" } },
119
+ {
120
+ type: "assistant",
121
+ message: {
122
+ role: "assistant",
123
+ model: "claude-opus-4-7-20260115",
124
+ content: [{ type: "text", text: "second" }],
125
+ },
126
+ },
127
+ ]);
128
+ try {
129
+ const got = await parseTranscript(file);
130
+ // Last assistant model wins (orchestrator session is single-model, but the
131
+ // last value is authoritative if a dated id supersedes a bare one).
132
+ assert.equal(got.model, "claude-opus-4-7-20260115");
133
+ } finally {
134
+ cleanup(dir);
135
+ }
136
+ });
137
+
138
+ test("model stays null when no assistant message carries one", async () => {
139
+ const { dir, file } = mkTmpFile([
140
+ { type: "user", message: { role: "user", content: "hi" } },
141
+ {
142
+ type: "assistant",
143
+ message: { role: "assistant", content: [{ type: "text", text: "no model field" }] },
144
+ },
145
+ ]);
146
+ try {
147
+ const got = await parseTranscript(file);
148
+ assert.equal(got.model, null);
149
+ } finally {
150
+ cleanup(dir);
151
+ }
152
+ });
153
+
107
154
  test("tool_use / tool_result pairing by tool_use_id preserved in order", async () => {
108
155
  const TOOL_ID = "toolu_01ABC";
109
156
  const { dir, file } = mkTmpFile([
@@ -39,9 +39,16 @@
39
39
  const fs = require("fs");
40
40
  const path = require("path");
41
41
 
42
+ const { SAFE_DEFAULT_WINDOW } = require("../bin/model-windows.cjs");
43
+
42
44
  const MAX_STDIN = 1024 * 1024; // 1 MiB
43
45
  const SCHEMA_VERSION = 1;
44
- const DEFAULT_CW_CEILING_TOKENS = 200000; // input-token budget per CW
46
+ // Input-token budget per CW = the model context window. Default to the
47
+ // model-aware safe window (1M); the old 200K literal was correct only for
48
+ // pre-4 models and skewed every actualCwPct calibration ratio 5× on
49
+ // Opus/Sonnet. Event state may still override via cwCeilingTokens (the
50
+ // economics estimator records the model-aware ceiling it actually used).
51
+ const DEFAULT_CW_CEILING_TOKENS = SAFE_DEFAULT_WINDOW;
45
52
 
46
53
  if (require.main === module) {
47
54
  let input = "";