@tekyzinc/gsd-t 2.74.13 → 2.76.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +116 -0
- package/README.md +71 -1
- package/bin/advisor-integration.js +93 -0
- package/bin/check-headless-sessions.js +140 -0
- package/bin/context-meter-config.cjs +101 -0
- package/bin/context-meter-config.test.cjs +101 -0
- package/bin/gsd-t.js +709 -16
- package/bin/headless-auto-spawn.js +290 -0
- package/bin/model-selector.js +224 -0
- package/bin/runway-estimator.js +242 -0
- package/bin/token-budget.js +96 -89
- package/bin/token-optimizer.js +471 -0
- package/bin/token-telemetry.js +246 -0
- package/commands/gsd-t-audit.md +3 -3
- package/commands/gsd-t-backlog-list.md +38 -0
- package/commands/gsd-t-brainstorm.md +3 -3
- package/commands/gsd-t-complete-milestone.md +24 -0
- package/commands/gsd-t-debug.md +124 -7
- package/commands/gsd-t-discuss.md +10 -3
- package/commands/gsd-t-doc-ripple.md +32 -4
- package/commands/gsd-t-execute.md +107 -52
- package/commands/gsd-t-help.md +19 -0
- package/commands/gsd-t-integrate.md +67 -4
- package/commands/gsd-t-optimization-apply.md +91 -0
- package/commands/gsd-t-optimization-reject.md +94 -0
- package/commands/gsd-t-partition.md +7 -0
- package/commands/gsd-t-pause.md +3 -0
- package/commands/gsd-t-plan.md +10 -3
- package/commands/gsd-t-prd.md +3 -3
- package/commands/gsd-t-quick.md +71 -9
- package/commands/gsd-t-reflect.md +3 -7
- package/commands/gsd-t-resume.md +36 -0
- package/commands/gsd-t-status.md +31 -0
- package/commands/gsd-t-test-sync.md +7 -0
- package/commands/gsd-t-verify.md +12 -5
- package/commands/gsd-t-visualize.md +3 -7
- package/commands/gsd-t-wave.md +82 -18
- package/docs/GSD-T-README.md +52 -0
- package/docs/architecture.md +95 -0
- package/docs/infrastructure.md +117 -0
- package/docs/methodology.md +36 -0
- package/docs/prd-harness-evolution.md +51 -37
- package/docs/requirements.md +66 -0
- package/package.json +1 -1
- package/scripts/context-meter/count-tokens-client.js +221 -0
- package/scripts/context-meter/count-tokens-client.test.js +308 -0
- package/scripts/context-meter/test-injector.js +55 -0
- package/scripts/context-meter/threshold.js +88 -0
- package/scripts/context-meter/threshold.test.js +255 -0
- package/scripts/context-meter/transcript-parser.js +252 -0
- package/scripts/context-meter/transcript-parser.test.js +320 -0
- package/scripts/gsd-t-context-meter.e2e.test.js +415 -0
- package/scripts/gsd-t-context-meter.js +350 -0
- package/scripts/gsd-t-context-meter.test.js +417 -0
- package/scripts/gsd-t-heartbeat.js +2 -2
- package/scripts/gsd-t-statusline.js +23 -8
- package/templates/CLAUDE-global.md +5 -1
- package/templates/CLAUDE-project.md +26 -6
- package/templates/context-meter-config.json +10 -0
- package/templates/prompts/README.md +1 -1
- package/bin/task-counter.cjs +0 -161
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Unit tests for scripts/gsd-t-context-meter.js (M34 Task 4 — CP2 satisfaction).
|
|
5
|
+
*
|
|
6
|
+
* Covers 10 scenarios from the task spec:
|
|
7
|
+
* 1. check-frequency skip
|
|
8
|
+
* 2. check-frequency hit — under threshold
|
|
9
|
+
* 3. check-frequency hit — over threshold
|
|
10
|
+
* 4. missing API key
|
|
11
|
+
* 5. transcript parse failure
|
|
12
|
+
* 6. API timeout / failure
|
|
13
|
+
* 7. state file corruption
|
|
14
|
+
* 8. missing transcript_path in payload
|
|
15
|
+
* 9. atomic write — no stale .tmp file after success
|
|
16
|
+
* 10. fail-open on unexpected throw (loadConfig throws)
|
|
17
|
+
*
|
|
18
|
+
* All dependencies are injected via runMeter's test seams so no real network
|
|
19
|
+
* calls, no real Anthropic API, and no real config-file reads are needed.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
const { test, beforeEach, afterEach } = require("node:test");
|
|
23
|
+
const assert = require("node:assert/strict");
|
|
24
|
+
const fs = require("fs");
|
|
25
|
+
const path = require("path");
|
|
26
|
+
const os = require("os");
|
|
27
|
+
|
|
28
|
+
const { runMeter, defaultState } = require("./gsd-t-context-meter");
|
|
29
|
+
|
|
30
|
+
/* ─────────────────────────── test harness ─────────────────────────── */
|
|
31
|
+
|
|
32
|
+
let tmpRoot;
|
|
33
|
+
|
|
34
|
+
beforeEach(() => {
|
|
35
|
+
tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-t-cm-hook-"));
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
afterEach(() => {
|
|
39
|
+
try {
|
|
40
|
+
fs.rmSync(tmpRoot, { recursive: true, force: true });
|
|
41
|
+
} catch (_) {
|
|
42
|
+
/* ignore */
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
function makeConfig(overrides = {}) {
|
|
47
|
+
return {
|
|
48
|
+
version: 1,
|
|
49
|
+
thresholdPct: 75,
|
|
50
|
+
modelWindowSize: 200000,
|
|
51
|
+
checkFrequency: 5,
|
|
52
|
+
apiKeyEnvVar: "ANTHROPIC_API_KEY",
|
|
53
|
+
statePath: ".gsd-t/.context-meter-state.json",
|
|
54
|
+
logPath: ".gsd-t/context-meter.log",
|
|
55
|
+
timeoutMs: 2000,
|
|
56
|
+
...overrides,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function stateFile(root) {
|
|
61
|
+
return path.join(root, ".gsd-t", ".context-meter-state.json");
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function logFile(root) {
|
|
65
|
+
return path.join(root, ".gsd-t", "context-meter.log");
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function seedState(root, partial) {
|
|
69
|
+
const statePath = stateFile(root);
|
|
70
|
+
fs.mkdirSync(path.dirname(statePath), { recursive: true });
|
|
71
|
+
fs.writeFileSync(
|
|
72
|
+
statePath,
|
|
73
|
+
JSON.stringify({ ...defaultState(), ...partial }, null, 2)
|
|
74
|
+
);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function makePayload() {
|
|
78
|
+
// A phony transcript path — tests inject a fake parseTranscript, so the path
|
|
79
|
+
// doesn't actually need to exist.
|
|
80
|
+
return {
|
|
81
|
+
session_id: "test-session",
|
|
82
|
+
transcript_path: path.join(tmpRoot, "fake-transcript.jsonl"),
|
|
83
|
+
tool_name: "Bash",
|
|
84
|
+
tool_input: {},
|
|
85
|
+
tool_response: {},
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const FAKE_PARSED = {
|
|
90
|
+
system: "",
|
|
91
|
+
messages: [{ role: "user", content: [{ type: "text", text: "hi" }] }],
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
/* ───────────────────────────── tests ───────────────────────────── */
|
|
95
|
+
|
|
96
|
+
test("1. check-frequency skip — API NOT called, counter incremented, stdout {}", async () => {
|
|
97
|
+
seedState(tmpRoot, { checkCount: 3 });
|
|
98
|
+
|
|
99
|
+
const apiCalls = [];
|
|
100
|
+
const out = await runMeter({
|
|
101
|
+
payload: makePayload(),
|
|
102
|
+
projectRoot: tmpRoot,
|
|
103
|
+
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
104
|
+
_loadConfig: () => makeConfig({ checkFrequency: 5 }),
|
|
105
|
+
_parseTranscript: async () => {
|
|
106
|
+
throw new Error("parseTranscript should not be called on skip");
|
|
107
|
+
},
|
|
108
|
+
_countTokens: async () => {
|
|
109
|
+
apiCalls.push("called");
|
|
110
|
+
throw new Error("countTokens should not be called on skip");
|
|
111
|
+
},
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
assert.deepEqual(out, {});
|
|
115
|
+
assert.equal(apiCalls.length, 0);
|
|
116
|
+
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
117
|
+
assert.equal(state.checkCount, 4);
|
|
118
|
+
assert.equal(state.lastError, null);
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
test("2. check-frequency hit — under threshold → {} + state updated", async () => {
|
|
122
|
+
seedState(tmpRoot, { checkCount: 4 });
|
|
123
|
+
|
|
124
|
+
const out = await runMeter({
|
|
125
|
+
payload: makePayload(),
|
|
126
|
+
projectRoot: tmpRoot,
|
|
127
|
+
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
128
|
+
_loadConfig: () => makeConfig(),
|
|
129
|
+
_parseTranscript: async () => FAKE_PARSED,
|
|
130
|
+
_countTokens: async () => ({ inputTokens: 10000 }),
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
assert.deepEqual(out, {});
|
|
134
|
+
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
135
|
+
assert.equal(state.checkCount, 5);
|
|
136
|
+
assert.equal(state.inputTokens, 10000);
|
|
137
|
+
assert.equal(state.pct, 5);
|
|
138
|
+
assert.equal(state.threshold, "normal");
|
|
139
|
+
assert.equal(state.lastError, null);
|
|
140
|
+
assert.equal(state.modelWindowSize, 200000);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
test("3. check-frequency hit — over threshold → additionalContext emitted", async () => {
|
|
144
|
+
seedState(tmpRoot, { checkCount: 4 });
|
|
145
|
+
|
|
146
|
+
const out = await runMeter({
|
|
147
|
+
payload: makePayload(),
|
|
148
|
+
projectRoot: tmpRoot,
|
|
149
|
+
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
150
|
+
_loadConfig: () => makeConfig(),
|
|
151
|
+
_parseTranscript: async () => FAKE_PARSED,
|
|
152
|
+
_countTokens: async () => ({ inputTokens: 160000 }),
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
assert.equal(typeof out.additionalContext, "string");
|
|
156
|
+
assert.match(out.additionalContext, /80\.0%/);
|
|
157
|
+
assert.match(out.additionalContext, /200000/);
|
|
158
|
+
assert.match(out.additionalContext, /\/user:gsd-t-pause/);
|
|
159
|
+
|
|
160
|
+
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
161
|
+
assert.equal(state.checkCount, 5);
|
|
162
|
+
assert.equal(state.pct, 80);
|
|
163
|
+
// v3.0.0 three-band (M35): 80% ∈ [70, 85) → warn
|
|
164
|
+
assert.equal(state.threshold, "warn");
|
|
165
|
+
assert.equal(state.inputTokens, 160000);
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
test("4. missing API key — stdout {}, lastError.code='missing_key', no API call", async () => {
|
|
169
|
+
seedState(tmpRoot, { checkCount: 4 });
|
|
170
|
+
|
|
171
|
+
const apiCalls = [];
|
|
172
|
+
const out = await runMeter({
|
|
173
|
+
payload: makePayload(),
|
|
174
|
+
projectRoot: tmpRoot,
|
|
175
|
+
env: {}, // no ANTHROPIC_API_KEY
|
|
176
|
+
_loadConfig: () => makeConfig(),
|
|
177
|
+
_parseTranscript: async () => FAKE_PARSED,
|
|
178
|
+
_countTokens: async () => {
|
|
179
|
+
apiCalls.push("x");
|
|
180
|
+
return { inputTokens: 1 };
|
|
181
|
+
},
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
assert.deepEqual(out, {});
|
|
185
|
+
assert.equal(apiCalls.length, 0);
|
|
186
|
+
|
|
187
|
+
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
188
|
+
assert.equal(state.checkCount, 5);
|
|
189
|
+
assert.ok(state.lastError, "lastError populated");
|
|
190
|
+
assert.equal(state.lastError.code, "missing_key");
|
|
191
|
+
|
|
192
|
+
// Log file exists and contains the missing_key diagnostic
|
|
193
|
+
assert.ok(fs.existsSync(logFile(tmpRoot)));
|
|
194
|
+
const log = fs.readFileSync(logFile(tmpRoot), "utf8");
|
|
195
|
+
assert.match(log, /missing_key/);
|
|
196
|
+
// And NEVER the API key itself
|
|
197
|
+
assert.ok(!log.includes("sk-test"));
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
test("5. transcript parse failure — returns null → lastError 'parse_failure'", async () => {
|
|
201
|
+
seedState(tmpRoot, { checkCount: 4 });
|
|
202
|
+
|
|
203
|
+
const out = await runMeter({
|
|
204
|
+
payload: makePayload(),
|
|
205
|
+
projectRoot: tmpRoot,
|
|
206
|
+
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
207
|
+
_loadConfig: () => makeConfig(),
|
|
208
|
+
_parseTranscript: async () => null,
|
|
209
|
+
_countTokens: async () => {
|
|
210
|
+
throw new Error("should not call API when parse fails");
|
|
211
|
+
},
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
assert.deepEqual(out, {});
|
|
215
|
+
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
216
|
+
assert.equal(state.checkCount, 5);
|
|
217
|
+
assert.equal(state.lastError.code, "parse_failure");
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
test("6. API timeout / failure — countTokens null → lastError 'api_error', inputTokens reset", async () => {
|
|
221
|
+
seedState(tmpRoot, { checkCount: 4, inputTokens: 99999 });
|
|
222
|
+
|
|
223
|
+
const out = await runMeter({
|
|
224
|
+
payload: makePayload(),
|
|
225
|
+
projectRoot: tmpRoot,
|
|
226
|
+
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
227
|
+
_loadConfig: () => makeConfig({ timeoutMs: 50 }),
|
|
228
|
+
_parseTranscript: async () => FAKE_PARSED,
|
|
229
|
+
_countTokens: async () => null,
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
assert.deepEqual(out, {});
|
|
233
|
+
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
234
|
+
assert.equal(state.lastError.code, "api_error");
|
|
235
|
+
// Choice documented in hook: reset inputTokens to 0 on failure to avoid stale
|
|
236
|
+
// readings tripping threshold false-positives.
|
|
237
|
+
assert.equal(state.inputTokens, 0);
|
|
238
|
+
assert.equal(state.pct, 0);
|
|
239
|
+
assert.equal(state.threshold, "normal");
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
test("7. state file corruption — overwritten with valid defaults + fresh count", async () => {
|
|
243
|
+
const sp = stateFile(tmpRoot);
|
|
244
|
+
fs.mkdirSync(path.dirname(sp), { recursive: true });
|
|
245
|
+
fs.writeFileSync(sp, "not json{");
|
|
246
|
+
|
|
247
|
+
const out = await runMeter({
|
|
248
|
+
payload: makePayload(),
|
|
249
|
+
projectRoot: tmpRoot,
|
|
250
|
+
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
251
|
+
_loadConfig: () => makeConfig({ checkFrequency: 5 }),
|
|
252
|
+
_parseTranscript: async () => FAKE_PARSED,
|
|
253
|
+
_countTokens: async () => ({ inputTokens: 100 }),
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
assert.deepEqual(out, {});
|
|
257
|
+
// Post-write must be valid JSON with defaults + checkCount == 1
|
|
258
|
+
const state = JSON.parse(fs.readFileSync(sp, "utf8"));
|
|
259
|
+
assert.equal(state.version, 1);
|
|
260
|
+
assert.equal(state.checkCount, 1);
|
|
261
|
+
// checkCount=1 % checkFrequency=5 !== 0, so this was a skip path; API not called.
|
|
262
|
+
// Verify API was NOT called on this path by re-running with a throwing stub.
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
test("7b. state file corruption + frequency hit — API called once, state valid", async () => {
|
|
266
|
+
const sp = stateFile(tmpRoot);
|
|
267
|
+
fs.mkdirSync(path.dirname(sp), { recursive: true });
|
|
268
|
+
fs.writeFileSync(sp, "not json{");
|
|
269
|
+
|
|
270
|
+
const out = await runMeter({
|
|
271
|
+
payload: makePayload(),
|
|
272
|
+
projectRoot: tmpRoot,
|
|
273
|
+
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
274
|
+
_loadConfig: () => makeConfig({ checkFrequency: 1 }),
|
|
275
|
+
_parseTranscript: async () => FAKE_PARSED,
|
|
276
|
+
_countTokens: async () => ({ inputTokens: 500 }),
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
assert.deepEqual(out, {});
|
|
280
|
+
const state = JSON.parse(fs.readFileSync(sp, "utf8"));
|
|
281
|
+
assert.equal(state.version, 1);
|
|
282
|
+
assert.equal(state.checkCount, 1);
|
|
283
|
+
assert.equal(state.inputTokens, 500);
|
|
284
|
+
assert.equal(state.lastError, null);
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
test("8. missing transcript_path in payload — lastError 'no_transcript', counter increments", async () => {
|
|
288
|
+
seedState(tmpRoot, { checkCount: 4 });
|
|
289
|
+
|
|
290
|
+
const out = await runMeter({
|
|
291
|
+
payload: { session_id: "x" }, // no transcript_path
|
|
292
|
+
projectRoot: tmpRoot,
|
|
293
|
+
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
294
|
+
_loadConfig: () => makeConfig(),
|
|
295
|
+
_parseTranscript: async () => {
|
|
296
|
+
throw new Error("should not parse when transcript_path missing");
|
|
297
|
+
},
|
|
298
|
+
_countTokens: async () => {
|
|
299
|
+
throw new Error("should not call API when transcript_path missing");
|
|
300
|
+
},
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
assert.deepEqual(out, {});
|
|
304
|
+
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
305
|
+
assert.equal(state.checkCount, 5);
|
|
306
|
+
assert.equal(state.lastError.code, "no_transcript");
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
test("9. atomic write — no .tmp file on disk after successful run", async () => {
|
|
310
|
+
seedState(tmpRoot, { checkCount: 4 });
|
|
311
|
+
|
|
312
|
+
await runMeter({
|
|
313
|
+
payload: makePayload(),
|
|
314
|
+
projectRoot: tmpRoot,
|
|
315
|
+
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
316
|
+
_loadConfig: () => makeConfig(),
|
|
317
|
+
_parseTranscript: async () => FAKE_PARSED,
|
|
318
|
+
_countTokens: async () => ({ inputTokens: 1000 }),
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
const tmp = stateFile(tmpRoot) + ".tmp";
|
|
322
|
+
assert.equal(fs.existsSync(tmp), false, ".tmp file should not exist after rename");
|
|
323
|
+
assert.equal(fs.existsSync(stateFile(tmpRoot)), true, "state file should exist");
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
test("10. fail-open on unexpected throw — loadConfig throws → runMeter returns {}", async () => {
|
|
327
|
+
const out = await runMeter({
|
|
328
|
+
payload: makePayload(),
|
|
329
|
+
projectRoot: tmpRoot,
|
|
330
|
+
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
331
|
+
_loadConfig: () => {
|
|
332
|
+
throw new Error("boom");
|
|
333
|
+
},
|
|
334
|
+
_parseTranscript: async () => FAKE_PARSED,
|
|
335
|
+
_countTokens: async () => ({ inputTokens: 1 }),
|
|
336
|
+
});
|
|
337
|
+
|
|
338
|
+
assert.deepEqual(out, {});
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
test("10b. fail-open — parseTranscript throws synchronously → {}", async () => {
|
|
342
|
+
seedState(tmpRoot, { checkCount: 4 });
|
|
343
|
+
const out = await runMeter({
|
|
344
|
+
payload: makePayload(),
|
|
345
|
+
projectRoot: tmpRoot,
|
|
346
|
+
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
347
|
+
_loadConfig: () => makeConfig(),
|
|
348
|
+
_parseTranscript: () => {
|
|
349
|
+
throw new Error("sync boom");
|
|
350
|
+
},
|
|
351
|
+
_countTokens: async () => ({ inputTokens: 1 }),
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
assert.deepEqual(out, {});
|
|
355
|
+
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
356
|
+
assert.equal(state.lastError.code, "parse_failure");
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
test("10c. fail-open — countTokens throws → {}", async () => {
|
|
360
|
+
seedState(tmpRoot, { checkCount: 4 });
|
|
361
|
+
const out = await runMeter({
|
|
362
|
+
payload: makePayload(),
|
|
363
|
+
projectRoot: tmpRoot,
|
|
364
|
+
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
365
|
+
_loadConfig: () => makeConfig(),
|
|
366
|
+
_parseTranscript: async () => FAKE_PARSED,
|
|
367
|
+
_countTokens: () => {
|
|
368
|
+
throw new Error("sync boom");
|
|
369
|
+
},
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
assert.deepEqual(out, {});
|
|
373
|
+
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
374
|
+
assert.equal(state.lastError.code, "api_error");
|
|
375
|
+
});
|
|
376
|
+
|
|
377
|
+
test("11. log never contains message content — only categories/counts", async () => {
|
|
378
|
+
seedState(tmpRoot, { checkCount: 4 });
|
|
379
|
+
const secretText = "SECRET_MESSAGE_CONTENT_XYZ";
|
|
380
|
+
|
|
381
|
+
await runMeter({
|
|
382
|
+
payload: makePayload(),
|
|
383
|
+
projectRoot: tmpRoot,
|
|
384
|
+
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
385
|
+
_loadConfig: () => makeConfig(),
|
|
386
|
+
_parseTranscript: async () => ({
|
|
387
|
+
system: "",
|
|
388
|
+
messages: [
|
|
389
|
+
{ role: "user", content: [{ type: "text", text: secretText }] },
|
|
390
|
+
],
|
|
391
|
+
}),
|
|
392
|
+
_countTokens: async () => ({ inputTokens: 42 }),
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
const log = fs.readFileSync(logFile(tmpRoot), "utf8");
|
|
396
|
+
assert.ok(!log.includes(secretText), "log must not contain message content");
|
|
397
|
+
assert.match(log, /measure/);
|
|
398
|
+
assert.match(log, /tokens=42/);
|
|
399
|
+
});
|
|
400
|
+
|
|
401
|
+
test("12. clock injection — timestamp uses injected clock", async () => {
|
|
402
|
+
seedState(tmpRoot, { checkCount: 4 });
|
|
403
|
+
const fixed = new Date("2026-04-14T18:00:00.000Z");
|
|
404
|
+
|
|
405
|
+
await runMeter({
|
|
406
|
+
payload: makePayload(),
|
|
407
|
+
projectRoot: tmpRoot,
|
|
408
|
+
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
409
|
+
clock: () => fixed,
|
|
410
|
+
_loadConfig: () => makeConfig(),
|
|
411
|
+
_parseTranscript: async () => FAKE_PARSED,
|
|
412
|
+
_countTokens: async () => ({ inputTokens: 1000 }),
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
416
|
+
assert.equal(state.timestamp, fixed.toISOString());
|
|
417
|
+
});
|
|
@@ -97,8 +97,8 @@ function cleanupOldHeartbeats(gsdtDir) {
|
|
|
97
97
|
const EVENT_HANDLERS = {
|
|
98
98
|
SessionStart: (h) => ({ evt: "session_start", data: { source: h.source, model: h.model } }),
|
|
99
99
|
PostToolUse: (h) => ({ evt: "tool", tool: h.tool_name, agent_id: h.agent_id || null, data: summarize(h.tool_name, h.tool_input) }),
|
|
100
|
-
SubagentStart: (h) => ({ evt: "agent_spawn", data: { agent_id: h.agent_id, agent_type: h.agent_type, parent_id: h.parent_agent_id || h.session_id, tokens:
|
|
101
|
-
SubagentStop: (h) => ({ evt: "agent_stop", data: { agent_id: h.agent_id, agent_type: h.agent_type, parent_id: h.parent_agent_id || h.session_id, tokens:
|
|
100
|
+
SubagentStart: (h) => ({ evt: "agent_spawn", data: { agent_id: h.agent_id, agent_type: h.agent_type, parent_id: h.parent_agent_id || h.session_id, tokens: null } }),
|
|
101
|
+
SubagentStop: (h) => ({ evt: "agent_stop", data: { agent_id: h.agent_id, agent_type: h.agent_type, parent_id: h.parent_agent_id || h.session_id, tokens: null } }),
|
|
102
102
|
TaskCompleted: (h) => ({ evt: "task_done", data: { task: h.task_subject, agent: h.teammate_name } }),
|
|
103
103
|
TeammateIdle: (h) => ({ evt: "agent_idle", data: { agent: h.teammate_name, team: h.team_name } }),
|
|
104
104
|
Notification: (h) => ({ evt: "notification", data: { message: scrubSecrets(h.message), title: scrubSecrets(h.title) } }),
|
|
@@ -6,9 +6,11 @@
|
|
|
6
6
|
// Configure in ~/.claude/settings.json:
|
|
7
7
|
// "statusLine": "node ~/.claude/scripts/gsd-t-statusline.js"
|
|
8
8
|
//
|
|
9
|
-
// Context usage is read from
|
|
10
|
-
//
|
|
11
|
-
//
|
|
9
|
+
// Context usage is read from .gsd-t/.context-meter-state.json (produced by
|
|
10
|
+
// the Context Meter PostToolUse hook). v2.0.0 (M34) — the legacy
|
|
11
|
+
// environment-variable-based context check is retired because Claude Code
|
|
12
|
+
// never populated those env vars. When the state file is absent or stale
|
|
13
|
+
// (>5min), the context segment is omitted.
|
|
12
14
|
//
|
|
13
15
|
// Zero external dependencies.
|
|
14
16
|
|
|
@@ -63,11 +65,24 @@ function contextBar(pct) {
|
|
|
63
65
|
|
|
64
66
|
const root = findProjectRoot();
|
|
65
67
|
|
|
66
|
-
// Context usage from
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
68
|
+
// Context usage from the Context Meter state file.
|
|
69
|
+
// Stale-window check mirrors bin/token-budget.js (5 minutes).
|
|
70
|
+
function readContextPct(projectRoot) {
|
|
71
|
+
if (!projectRoot) return null;
|
|
72
|
+
try {
|
|
73
|
+
const fp = path.join(projectRoot, '.gsd-t', '.context-meter-state.json');
|
|
74
|
+
const raw = fs.readFileSync(fp, 'utf8');
|
|
75
|
+
const s = JSON.parse(raw);
|
|
76
|
+
if (!s || typeof s.pct !== 'number' || !s.timestamp) return null;
|
|
77
|
+
const age = Date.now() - Date.parse(s.timestamp);
|
|
78
|
+
if (isNaN(age) || age > 5 * 60 * 1000 || age < 0) return null;
|
|
79
|
+
return Math.min(100, Math.round(s.pct));
|
|
80
|
+
} catch {
|
|
81
|
+
return null;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const ctxPct = readContextPct(root);
|
|
71
86
|
|
|
72
87
|
// GSD-T project state
|
|
73
88
|
let projectPart = '';
|
|
@@ -366,7 +366,11 @@ This gives the user real-time visibility into which model is handling each opera
|
|
|
366
366
|
- `model: sonnet` — mid-tier reasoning: routine code changes, standard refactors, test writing, QA evaluation, straightforward synthesis
|
|
367
367
|
- `model: opus` — high-stakes reasoning: architecture decisions, security analysis, complex debugging, cross-module refactors, Red Team adversarial QA, quality judgment on critical paths
|
|
368
368
|
|
|
369
|
-
**
|
|
369
|
+
**Context Gate — No Silent Degradation (M35, v2.76.10+)** — The orchestrator reads session context consumption via `bin/token-budget.js` `getSessionStatus()` before each subagent spawn and applies a **three-band model** (`token-budget-contract.md` v3.0.0): `normal` (< 70%, proceed at full quality), `warn` (70–85%, log and proceed at **full quality** — never downgrade models, never skip Red Team / doc-ripple / Design Verification), `stop` (≥ 85%, halt cleanly and hand off to the runway estimator). The older v2.x `downgrade` / `conserve` bands are deleted. Model choice is made surgically per-phase via `bin/model-selector.js` (`model-selection-contract.md` v1.0.0) — sonnet for routine work, opus at declared escalation points, haiku strictly for mechanical tasks — not as a fallback for context pressure. Per-spawn telemetry is captured to `.gsd-t/token-metrics.jsonl` via `bin/token-telemetry.js` (`token-telemetry-contract.md` v1.0.0) and surfaced via `gsd-t metrics --tokens|--halts|--context-window`.
|
|
370
|
+
|
|
371
|
+
**Context Meter (M34, v2.75.10+)** — The real context-window measurement feeding the context gate above. A PostToolUse hook (`scripts/gsd-t-context-meter.js`) runs after every tool call, streams the current Claude Code transcript to the Anthropic `count_tokens` API, and writes the real input-token count plus the resolved threshold band into `.gsd-t/.context-meter-state.json`. `getSessionStatus()` reads that state file (fresh window = 5 minutes) with a historical heuristic fallback when the file is missing or stale. Command files consume the signal via a small bash shim (`CTX_PCT=$(node -e "…tb.getSessionStatus('.').pct")`). Threshold bands (lower-bound inclusive) as of M35: `normal` < 70, `warn` ≥ 70, `stop` ≥ 85. Requires `ANTHROPIC_API_KEY` — `gsd-t doctor` hard-gates on its presence.
|
|
372
|
+
|
|
373
|
+
*Historical note (v2.74.12–v2.74.13)*: between 2026-03 and 2026-04 the orchestrator used `bin/task-counter.cjs` — a proxy that assumed N tasks ≈ M% context used. That itself replaced an earlier env-var-based check (`CLAUDE_CONTEXT_TOKENS_USED` / `CLAUDE_CONTEXT_TOKENS_MAX`) that never worked because Claude Code does not export those variables. Both approaches are retired: the env-var path is removed entirely; `task-counter.cjs` is deleted from the package and from downstream projects via a one-shot migration in `gsd-t update-all`. Do not reintroduce either check.
|
|
370
374
|
|
|
371
375
|
## API Documentation Guard (Swagger/OpenAPI)
|
|
372
376
|
|
|
@@ -26,12 +26,32 @@
|
|
|
26
26
|
<!-- Claude will verify the branch before every commit. -->
|
|
27
27
|
**Expected branch**: {main | master | feature-branch-name}
|
|
28
28
|
|
|
29
|
-
##
|
|
30
|
-
<!--
|
|
31
|
-
<!--
|
|
32
|
-
<!--
|
|
33
|
-
<!--
|
|
34
|
-
<!--
|
|
29
|
+
## Context Gate — No Silent Degradation (M35, v2.76.10+)
|
|
30
|
+
<!-- Three-band context gate per .gsd-t/contracts/token-budget-contract.md v3.0.0: -->
|
|
31
|
+
<!-- - normal (<70%): proceed at full quality -->
|
|
32
|
+
<!-- - warn (70–85%): log to .gsd-t/token-log.md and proceed at FULL quality -->
|
|
33
|
+
<!-- (never downgrade models, never skip Red Team / doc-ripple / Design Verification) -->
|
|
34
|
+
<!-- - stop (≥85%): halt cleanly, runway estimator / headless auto-spawn handles the handoff -->
|
|
35
|
+
<!-- Model choice is made surgically per-phase via bin/model-selector.js (model-selection-contract.md v1.0.0): -->
|
|
36
|
+
<!-- - haiku — mechanical: test runners, file-existence checks, JSON validation, branch guards -->
|
|
37
|
+
<!-- - sonnet — routine: execute, test-sync, doc-ripple wiring, quick, integrate, debug fix-apply -->
|
|
38
|
+
<!-- - opus — high-stakes: partition, discuss, Red Team, verify judgment, debug root-cause, contracts -->
|
|
39
|
+
<!-- Per-spawn telemetry is captured to .gsd-t/token-metrics.jsonl via bin/token-telemetry.js -->
|
|
40
|
+
<!-- (token-telemetry-contract.md v1.0.0) and surfaced via: -->
|
|
41
|
+
<!-- - gsd-t metrics --tokens [--by model,command,phase,milestone] -->
|
|
42
|
+
<!-- - gsd-t metrics --halts -->
|
|
43
|
+
<!-- - gsd-t metrics --context-window -->
|
|
44
|
+
|
|
45
|
+
## Context Meter (M34, v2.75.10+)
|
|
46
|
+
<!-- The Context Meter is a PostToolUse hook that streams the current transcript -->
|
|
47
|
+
<!-- to Anthropic count_tokens and writes the real context % to -->
|
|
48
|
+
<!-- .gsd-t/.context-meter-state.json. bin/token-budget.js reads that file as the -->
|
|
49
|
+
<!-- authoritative session-stop signal, feeding the three-band context gate above. -->
|
|
50
|
+
<!-- Requires ANTHROPIC_API_KEY in the shell environment. -->
|
|
51
|
+
<!-- Threshold bands (lower-bound inclusive) as of M35: normal<70, warn≥70, stop≥85. -->
|
|
52
|
+
<!-- Config: .gsd-t/context-meter-config.json — apiKeyEnvVar, modelWindowSize, thresholdPct, checkFrequency. -->
|
|
53
|
+
<!-- Verify: `npx @tekyzinc/gsd-t doctor` hard-gates on API key + hook + live count_tokens dry-run. -->
|
|
54
|
+
<!-- Historical: v2.74.12 used bin/task-counter.cjs (proxy); v2.74.11 and earlier used CLAUDE_CONTEXT_TOKENS_* env vars (never worked). Both retired in v2.75.10. M35 removed the v2.x downgrade/conserve bands that silently degraded quality. -->
|
|
35
55
|
|
|
36
56
|
<!-- For multi-branch parallel work (e.g., web + mobile in separate terminals), -->
|
|
37
57
|
<!-- each terminal's CLAUDE.md should declare its own expected branch. -->
|
|
@@ -14,7 +14,7 @@ Now command files reference these by path. The orchestrator passes the file path
|
|
|
14
14
|
|
|
15
15
|
## Why per-domain instead of per-task
|
|
16
16
|
|
|
17
|
-
Red Team and Design Verification were originally per-domain. They were promoted to per-task by commits `da6d3ae` and `b68353e`, on the assumption that the orchestrator's
|
|
17
|
+
Red Team and Design Verification were originally per-domain. They were promoted to per-task by commits `da6d3ae` and `b68353e`, on the assumption that the orchestrator's environment-variable-based context self-check would catch context drain before it got bad. That env-var path was never populated by Claude Code — the self-check was vaporware. With it inert, per-task spawning of ~10k-token Red Team subagents drained sessions in 5-10 tasks. Reverting them to per-domain raises the safe task count from ~5 to ~15+. v2.0.0 (M34) replaced the broken check with real `count_tokens` measurements via the Context Meter PostToolUse hook.
|
|
18
18
|
|
|
19
19
|
QA stays per-task because (a) it's much smaller, (b) it grounds against contracts which can drift task by task.
|
|
20
20
|
|