npm - @tekyzinc/gsd-t - Versions diffs - 3.26.11 → 3.27.10 - Mend

@tekyzinc/gsd-t 3.26.11 → 3.27.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/CHANGELOG.md +47 -0
package/README.md +2 -0
package/bin/context-budget-audit.cjs +17 -2
package/bin/gsd-t-build-coverage.cjs +438 -0
package/bin/gsd-t-ci-parity.cjs +500 -0
package/bin/gsd-t-economics.cjs +37 -9
package/bin/gsd-t.js +21 -0
package/bin/model-windows.cjs +99 -0
package/bin/model-windows.test.cjs +75 -0
package/bin/runway-estimator.cjs +35 -5
package/bin/token-budget.cjs +12 -3
package/commands/gsd-t-help.md +14 -0
package/commands/gsd-t-verify.md +46 -0
package/package.json +1 -1
package/scripts/context-meter/transcript-parser.js +12 -2
package/scripts/context-meter/transcript-parser.test.js +51 -4
package/scripts/gsd-t-calibration-hook.js +8 -1
package/scripts/gsd-t-context-meter.e2e.test.js +45 -6
package/scripts/gsd-t-context-meter.js +17 -3
package/scripts/gsd-t-context-meter.test.js +85 -0
package/templates/CLAUDE-global.md +6 -0

package/scripts/gsd-t-context-meter.test.js CHANGED Viewed

@@ -384,3 +384,88 @@ test("12. clock injection — timestamp uses injected clock", async () => {
   const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
   assert.equal(state.timestamp, fixed.toISOString());
 });
+/* ── M-fix: model-aware context window (the reported regression) ───────── */
+test("13. Opus 4.7 @ ~36% of a 1M window stays 'normal' (regression repro)", async () => {
+  // The exact reported symptom: ~360k tokens used on an Opus 4.7 session.
+  // With the old hardcoded 200k window this computed 180% → premature
+  // headless handoff at ~64% of context REMAINING. With model-aware sizing
+  // the window is 1M, so 360k = 36% = normal, no handoff.
+  seedState(tmpRoot, { checkCount: 4 });
+  const out = await runMeter({
+    payload: makePayload(),
+    projectRoot: tmpRoot,
+    _loadConfig: () => makeConfig(), // config still says 200k — must be overridden
+    _parseTranscript: async () => ({ ...FAKE_PARSED, model: "claude-opus-4-7" }),
+    _estimateTokens: () => ({ inputTokens: 360000 }),
+  });
+  // No handoff marker — this is the whole point of the fix.
+  assert.deepEqual(out, {});
+  const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
+  assert.equal(state.modelWindowSize, 1_000_000, "window resolved from model, not config");
+  assert.equal(state.pct, 36, "360k / 1M = 36%");
+  assert.equal(state.threshold, "normal");
+});
+test("14. Opus 4.7 @ 80% of the true 1M window DOES hand off", async () => {
+  // The handoff must still fire at the real 75% threshold against the
+  // corrected window — we keep the guard, we just size it correctly.
+  seedState(tmpRoot, { checkCount: 4 });
+  const out = await runMeter({
+    payload: makePayload(),
+    projectRoot: tmpRoot,
+    _loadConfig: () => makeConfig(),
+    _parseTranscript: async () => ({ ...FAKE_PARSED, model: "claude-opus-4-7-20260115" }),
+    _estimateTokens: () => ({ inputTokens: 800000 }), // 80% of 1M > 75%
+  });
+  assert.equal(out.additionalContext, "next-spawn-headless:true");
+  const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
+  assert.equal(state.modelWindowSize, 1_000_000);
+  assert.equal(state.pct, 80);
+  assert.equal(state.threshold, "threshold");
+});
+test("15. no model in transcript → falls back to config window (back-compat)", async () => {
+  // Existing transcripts / stubs without a model field must behave exactly
+  // as before: config's modelWindowSize governs.
+  seedState(tmpRoot, { checkCount: 4 });
+  const out = await runMeter({
+    payload: makePayload(),
+    projectRoot: tmpRoot,
+    _loadConfig: () => makeConfig({ modelWindowSize: 200000 }),
+    _parseTranscript: async () => FAKE_PARSED, // no `model` key
+    _estimateTokens: () => ({ inputTokens: 160000 }), // 80% of 200k
+  });
+  assert.equal(out.additionalContext, "next-spawn-headless:true");
+  const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
+  assert.equal(state.modelWindowSize, 200000);
+  assert.equal(state.pct, 80);
+});
+test("16. Haiku session correctly sized at 200k (not over-large 1M)", async () => {
+  seedState(tmpRoot, { checkCount: 4 });
+  const out = await runMeter({
+    payload: makePayload(),
+    projectRoot: tmpRoot,
+    _loadConfig: () => makeConfig(),
+    _parseTranscript: async () => ({
+      ...FAKE_PARSED,
+      model: "claude-haiku-4-5-20251001",
+    }),
+    _estimateTokens: () => ({ inputTokens: 170000 }), // 85% of 200k
+  });
+  assert.equal(out.additionalContext, "next-spawn-headless:true");
+  const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
+  assert.equal(state.modelWindowSize, 200000);
+  assert.equal(state.pct, 85);
+});

package/templates/CLAUDE-global.md CHANGED Viewed

@@ -537,6 +537,12 @@ BEFORE EVERY COMMIT:
   │     YES → Verify test names and paths are referenced in requirements
   ├── Did I change UI, routes, or user flows?
   │     YES → Update affected E2E test specs (Playwright/Cypress)
+  ├── Did I add a new top-level dir, or change build/CI config?
+  │     This is ENFORCED MECHANICALLY by `gsd-t-verify` Step 2.6
+  │     (CI-Parity Gate: `gsd-t build-coverage` + `gsd-t ci-parity`,
+  │     FAIL-blocking). You do NOT self-attest this — verify runs the
+  │     real CI build. It exists because TimeTracking v1.10.12 shipped
+  │     VERIFIED+tagged with a new dir absent from the Dockerfile COPY.
   └── Did I run the affected tests?
         YES → Verify they pass. NO → Run them now.
 ```