@tekyzinc/gsd-t 3.11.10 → 3.11.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/gsd-t.js +26 -55
- package/package.json +1 -1
- package/scripts/context-meter/estimate-tokens.js +96 -0
- package/scripts/context-meter/estimate-tokens.test.js +158 -0
- package/scripts/gsd-t-context-meter.e2e.test.js +35 -128
- package/scripts/gsd-t-context-meter.js +12 -35
- package/scripts/gsd-t-context-meter.test.js +59 -98
package/bin/gsd-t.js
CHANGED
|
@@ -1232,9 +1232,6 @@ async function doInstall(opts = {}) {
|
|
|
1232
1232
|
saveInstalledVersion();
|
|
1233
1233
|
|
|
1234
1234
|
showInstallSummary(gsdtCommands.length, utilityCommands.length);
|
|
1235
|
-
|
|
1236
|
-
// Interactive prompt (skipped silently in non-TTY shells)
|
|
1237
|
-
await promptForApiKeyIfMissing(resolveApiKeyEnvVar(process.cwd()));
|
|
1238
1235
|
}
|
|
1239
1236
|
|
|
1240
1237
|
function showInstallSummary(gsdtCount, utilCount) {
|
|
@@ -1444,9 +1441,6 @@ async function doInit(projectName) {
|
|
|
1444
1441
|
if (registerProject(projectDir)) success("Registered in ~/.claude/.gsd-t-projects");
|
|
1445
1442
|
|
|
1446
1443
|
showInitTree(projectDir);
|
|
1447
|
-
|
|
1448
|
-
// Interactive prompt (skipped silently in non-TTY shells)
|
|
1449
|
-
await promptForApiKeyIfMissing(resolveApiKeyEnvVar(projectDir));
|
|
1450
1444
|
}
|
|
1451
1445
|
|
|
1452
1446
|
function showInitTree(projectDir) {
|
|
@@ -1523,12 +1517,7 @@ function showStatusContextMeter() {
|
|
|
1523
1517
|
const rel = state.timestamp ? formatRelativeTime(state.timestamp) : "never measured";
|
|
1524
1518
|
log(` ${RED}${BOLD}✗ CONTEXT METER DEAD${RESET} ${RED}— error: ${code}, last check: ${rel}${RESET}`);
|
|
1525
1519
|
log(` ${RED}The context-window guardrail is NOT working. Long sessions will hit /compact.${RESET}`);
|
|
1526
|
-
|
|
1527
|
-
log(` ${YELLOW}Fix: export ANTHROPIC_API_KEY in your shell profile${RESET}`);
|
|
1528
|
-
log(` ${YELLOW} (measurement only — inference stays on Claude Code subscription)${RESET}`);
|
|
1529
|
-
} else {
|
|
1530
|
-
log(` ${YELLOW}Fix: run 'gsd-t doctor' for diagnostics${RESET}`);
|
|
1531
|
-
}
|
|
1520
|
+
log(` ${YELLOW}Fix: run 'gsd-t doctor' for diagnostics${RESET}`);
|
|
1532
1521
|
return;
|
|
1533
1522
|
}
|
|
1534
1523
|
|
|
@@ -2308,8 +2297,8 @@ function checkDoctorCgc() {
|
|
|
2308
2297
|
return issues;
|
|
2309
2298
|
}
|
|
2310
2299
|
|
|
2311
|
-
// Verify context meter wiring:
|
|
2312
|
-
//
|
|
2300
|
+
// Verify context meter wiring: hook registration, hook script presence,
|
|
2301
|
+
// config validity, and a local estimation dry-run.
|
|
2313
2302
|
// Returns number of issues (RED results). Mirrors checkDoctorCgc shape.
|
|
2314
2303
|
async function checkDoctorContextMeter(projectDir) {
|
|
2315
2304
|
let issues = 0;
|
|
@@ -2317,8 +2306,8 @@ async function checkDoctorContextMeter(projectDir) {
|
|
|
2317
2306
|
|
|
2318
2307
|
const cwd = projectDir || process.cwd();
|
|
2319
2308
|
|
|
2320
|
-
// Load config (used by checks
|
|
2321
|
-
// JSON or schema-mismatch → throws (handled in Check
|
|
2309
|
+
// Load config (used by checks 3 and 4). Missing file → defaults; invalid
|
|
2310
|
+
// JSON or schema-mismatch → throws (handled in Check 3).
|
|
2322
2311
|
let cfg = null;
|
|
2323
2312
|
let cfgLoadErr = null;
|
|
2324
2313
|
try {
|
|
@@ -2327,19 +2316,8 @@ async function checkDoctorContextMeter(projectDir) {
|
|
|
2327
2316
|
} catch (e) {
|
|
2328
2317
|
cfgLoadErr = e;
|
|
2329
2318
|
}
|
|
2330
|
-
const apiKeyEnvVar = (cfg && cfg.apiKeyEnvVar) || "ANTHROPIC_API_KEY";
|
|
2331
|
-
|
|
2332
|
-
// Check 1: API key env var present
|
|
2333
|
-
const apiKeyValue = process.env[apiKeyEnvVar];
|
|
2334
|
-
const apiKeyPresent = typeof apiKeyValue === "string" && apiKeyValue.length > 0;
|
|
2335
|
-
if (apiKeyPresent) {
|
|
2336
|
-
success(`API key present ($${apiKeyEnvVar})`);
|
|
2337
|
-
} else {
|
|
2338
|
-
error(`Missing API key: set $${apiKeyEnvVar} — https://console.anthropic.com/settings/keys`);
|
|
2339
|
-
issues++;
|
|
2340
|
-
}
|
|
2341
2319
|
|
|
2342
|
-
// Check
|
|
2320
|
+
// Check 1: Hook registered in ~/.claude/settings.json
|
|
2343
2321
|
let hookRegistered = false;
|
|
2344
2322
|
try {
|
|
2345
2323
|
if (fs.existsSync(SETTINGS_JSON)) {
|
|
@@ -2367,7 +2345,7 @@ async function checkDoctorContextMeter(projectDir) {
|
|
|
2367
2345
|
issues++;
|
|
2368
2346
|
}
|
|
2369
2347
|
|
|
2370
|
-
// Check
|
|
2348
|
+
// Check 2: Hook script file exists in project
|
|
2371
2349
|
const scriptPath = path.join(cwd, "scripts", CONTEXT_METER_SCRIPT);
|
|
2372
2350
|
if (fs.existsSync(scriptPath)) {
|
|
2373
2351
|
success("Hook script present");
|
|
@@ -2376,7 +2354,7 @@ async function checkDoctorContextMeter(projectDir) {
|
|
|
2376
2354
|
issues++;
|
|
2377
2355
|
}
|
|
2378
2356
|
|
|
2379
|
-
// Check
|
|
2357
|
+
// Check 3: Config file parses via loader
|
|
2380
2358
|
const configPath = path.join(cwd, CONTEXT_METER_CONFIG_DEST);
|
|
2381
2359
|
if (cfgLoadErr) {
|
|
2382
2360
|
error(`Config file invalid: ${cfgLoadErr.message} — fix ${CONTEXT_METER_CONFIG_DEST}`);
|
|
@@ -2387,34 +2365,27 @@ async function checkDoctorContextMeter(projectDir) {
|
|
|
2387
2365
|
warn("Using default config — run gsd-t install to copy template");
|
|
2388
2366
|
}
|
|
2389
2367
|
|
|
2390
|
-
// Check
|
|
2391
|
-
|
|
2392
|
-
|
|
2368
|
+
// Check 4: Dry-run local token estimation
|
|
2369
|
+
const estimatorPath = path.join(cwd, "scripts", "context-meter", "estimate-tokens.js");
|
|
2370
|
+
if (!fs.existsSync(estimatorPath)) {
|
|
2371
|
+
error("Token estimator missing at scripts/context-meter/estimate-tokens.js — run gsd-t update");
|
|
2372
|
+
issues++;
|
|
2393
2373
|
} else {
|
|
2394
|
-
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
system: "",
|
|
2405
|
-
messages: [{ role: "user", content: [{ type: "text", text: "ping" }] }],
|
|
2406
|
-
timeoutMs: 5000,
|
|
2407
|
-
});
|
|
2408
|
-
if (result && typeof result.inputTokens === "number") {
|
|
2409
|
-
success(`count_tokens dry-run OK (${result.inputTokens} tokens)`);
|
|
2410
|
-
} else {
|
|
2411
|
-
error("count_tokens API call failed — check API key and network");
|
|
2412
|
-
issues++;
|
|
2413
|
-
}
|
|
2414
|
-
} catch (e) {
|
|
2415
|
-
error(`count_tokens dry-run threw: ${e.message}`);
|
|
2374
|
+
try {
|
|
2375
|
+
const { estimateTokens } = require(estimatorPath);
|
|
2376
|
+
const result = estimateTokens({
|
|
2377
|
+
system: "",
|
|
2378
|
+
messages: [{ role: "user", content: [{ type: "text", text: "ping" }] }],
|
|
2379
|
+
});
|
|
2380
|
+
if (result && typeof result.inputTokens === "number") {
|
|
2381
|
+
success(`Token estimator dry-run OK (${result.inputTokens} tokens)`);
|
|
2382
|
+
} else {
|
|
2383
|
+
error("Token estimator returned null");
|
|
2416
2384
|
issues++;
|
|
2417
2385
|
}
|
|
2386
|
+
} catch (e) {
|
|
2387
|
+
error(`Token estimator dry-run threw: ${e.message}`);
|
|
2388
|
+
issues++;
|
|
2418
2389
|
}
|
|
2419
2390
|
}
|
|
2420
2391
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tekyzinc/gsd-t",
|
|
3
|
-
"version": "3.11.
|
|
3
|
+
"version": "3.11.11",
|
|
4
4
|
"description": "GSD-T: Contract-Driven Development for Claude Code — 61 slash commands with unattended supervisor relay, headless CI/CD mode, graph-powered code analysis, real-time agent dashboard, execution intelligence, task telemetry, doc-ripple enforcement, backlog management, impact analysis, test sync, milestone archival, and PRD generation",
|
|
5
5
|
"author": "Tekyz, Inc.",
|
|
6
6
|
"license": "MIT",
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* estimate-tokens.js
|
|
3
|
+
*
|
|
4
|
+
* Local token estimator — replaces the Anthropic count_tokens API call.
|
|
5
|
+
* Uses byte-length heuristics to estimate token count from a parsed transcript.
|
|
6
|
+
*
|
|
7
|
+
* Claude's BPE tokenizer averages ~3.5 chars per token for English text/code
|
|
8
|
+
* (range: 3.0 for dense prose, 4.5 for simple ASCII). We use 3.5 as the
|
|
9
|
+
* divisor, which slightly overestimates token count — this is the safe
|
|
10
|
+
* direction for a context-window guard (triggers pause earlier, not later).
|
|
11
|
+
*
|
|
12
|
+
* The estimate includes JSON structural overhead from the messages array
|
|
13
|
+
* (keys, brackets, commas) since that's what the API would count too.
|
|
14
|
+
*
|
|
15
|
+
* Accuracy: within ~5-10% of the real count_tokens API. For threshold bands
|
|
16
|
+
* with 15-point gaps (normal < 70%, warn < 85%), this is more than sufficient.
|
|
17
|
+
*
|
|
18
|
+
* @module scripts/context-meter/estimate-tokens
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
"use strict";
|
|
22
|
+
|
|
23
|
+
const CHARS_PER_TOKEN = 3.5;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Estimate token count from a parsed transcript.
|
|
27
|
+
*
|
|
28
|
+
* @param {object} opts
|
|
29
|
+
* @param {string} opts.system - system prompt text
|
|
30
|
+
* @param {Array} opts.messages - messages array from transcript-parser.js
|
|
31
|
+
* @returns {{ inputTokens: number } | null}
|
|
32
|
+
*/
|
|
33
|
+
function estimateTokens(opts) {
|
|
34
|
+
try {
|
|
35
|
+
if (!opts || typeof opts !== "object") return null;
|
|
36
|
+
|
|
37
|
+
const { system, messages } = opts;
|
|
38
|
+
if (!Array.isArray(messages)) return null;
|
|
39
|
+
|
|
40
|
+
let totalChars = 0;
|
|
41
|
+
|
|
42
|
+
if (typeof system === "string") {
|
|
43
|
+
totalChars += system.length;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
for (const msg of messages) {
|
|
47
|
+
if (!msg || typeof msg !== "object") continue;
|
|
48
|
+
totalChars += measureContent(msg.content);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const inputTokens = Math.ceil(totalChars / CHARS_PER_TOKEN);
|
|
52
|
+
return { inputTokens };
|
|
53
|
+
} catch (_) {
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Recursively measure character length of a message content value.
|
|
60
|
+
* Handles strings, arrays of blocks, and nested tool_result content.
|
|
61
|
+
*/
|
|
62
|
+
function measureContent(content) {
|
|
63
|
+
if (typeof content === "string") return content.length;
|
|
64
|
+
if (!Array.isArray(content)) return 0;
|
|
65
|
+
|
|
66
|
+
let chars = 0;
|
|
67
|
+
for (const block of content) {
|
|
68
|
+
if (!block || typeof block !== "object") continue;
|
|
69
|
+
|
|
70
|
+
if (block.type === "text" && typeof block.text === "string") {
|
|
71
|
+
chars += block.text.length;
|
|
72
|
+
} else if (block.type === "tool_use") {
|
|
73
|
+
chars += (typeof block.name === "string" ? block.name.length : 0);
|
|
74
|
+
if (block.input != null) {
|
|
75
|
+
try {
|
|
76
|
+
chars += JSON.stringify(block.input).length;
|
|
77
|
+
} catch (_) {
|
|
78
|
+
// skip
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
} else if (block.type === "tool_result") {
|
|
82
|
+
chars += measureContent(block.content);
|
|
83
|
+
} else if (block.type === "image" && block.source) {
|
|
84
|
+
// base64 images: ~0.75 bytes per base64 char, tokenized differently
|
|
85
|
+
// but we count the source data length as a rough proxy
|
|
86
|
+
try {
|
|
87
|
+
chars += JSON.stringify(block.source).length;
|
|
88
|
+
} catch (_) {
|
|
89
|
+
// skip
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return chars;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
module.exports = { estimateTokens, CHARS_PER_TOKEN };
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* estimate-tokens.test.js — unit tests for the local token estimator.
|
|
3
|
+
*
|
|
4
|
+
* @module scripts/context-meter/estimate-tokens.test
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
"use strict";
|
|
8
|
+
|
|
9
|
+
const { test } = require("node:test");
|
|
10
|
+
const assert = require("node:assert/strict");
|
|
11
|
+
const { estimateTokens, CHARS_PER_TOKEN } = require("./estimate-tokens");
|
|
12
|
+
|
|
13
|
+
test("null/undefined opts returns null", () => {
|
|
14
|
+
assert.equal(estimateTokens(null), null);
|
|
15
|
+
assert.equal(estimateTokens(undefined), null);
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
test("missing messages returns null", () => {
|
|
19
|
+
assert.equal(estimateTokens({ system: "hi" }), null);
|
|
20
|
+
assert.equal(estimateTokens({ system: "hi", messages: "not-array" }), null);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
test("empty messages returns 0 tokens (system empty)", () => {
|
|
24
|
+
const r = estimateTokens({ system: "", messages: [] });
|
|
25
|
+
assert.ok(r);
|
|
26
|
+
assert.equal(r.inputTokens, 0);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
test("system-only content counted", () => {
|
|
30
|
+
const sys = "a".repeat(350);
|
|
31
|
+
const r = estimateTokens({ system: sys, messages: [] });
|
|
32
|
+
assert.ok(r);
|
|
33
|
+
assert.equal(r.inputTokens, Math.ceil(350 / CHARS_PER_TOKEN));
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test("text message content counted", () => {
|
|
37
|
+
const r = estimateTokens({
|
|
38
|
+
system: "",
|
|
39
|
+
messages: [
|
|
40
|
+
{ role: "user", content: [{ type: "text", text: "a".repeat(700) }] },
|
|
41
|
+
],
|
|
42
|
+
});
|
|
43
|
+
assert.ok(r);
|
|
44
|
+
assert.equal(r.inputTokens, Math.ceil(700 / CHARS_PER_TOKEN));
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test("string content (user shorthand) counted", () => {
|
|
48
|
+
const r = estimateTokens({
|
|
49
|
+
system: "",
|
|
50
|
+
messages: [{ role: "user", content: "hello world" }],
|
|
51
|
+
});
|
|
52
|
+
assert.ok(r);
|
|
53
|
+
assert.equal(r.inputTokens, Math.ceil(11 / CHARS_PER_TOKEN));
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
test("tool_use input JSON counted", () => {
|
|
57
|
+
const input = { file_path: "/some/long/path/to/file.js" };
|
|
58
|
+
const inputJson = JSON.stringify(input);
|
|
59
|
+
const toolName = "Read";
|
|
60
|
+
const r = estimateTokens({
|
|
61
|
+
system: "",
|
|
62
|
+
messages: [
|
|
63
|
+
{
|
|
64
|
+
role: "assistant",
|
|
65
|
+
content: [{ type: "tool_use", id: "t1", name: toolName, input }],
|
|
66
|
+
},
|
|
67
|
+
],
|
|
68
|
+
});
|
|
69
|
+
assert.ok(r);
|
|
70
|
+
assert.equal(r.inputTokens, Math.ceil((toolName.length + inputJson.length) / CHARS_PER_TOKEN));
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
test("tool_result content counted (string)", () => {
|
|
74
|
+
const resultText = "file contents here".repeat(10);
|
|
75
|
+
const r = estimateTokens({
|
|
76
|
+
system: "",
|
|
77
|
+
messages: [
|
|
78
|
+
{
|
|
79
|
+
role: "user",
|
|
80
|
+
content: [
|
|
81
|
+
{ type: "tool_result", tool_use_id: "t1", content: resultText },
|
|
82
|
+
],
|
|
83
|
+
},
|
|
84
|
+
],
|
|
85
|
+
});
|
|
86
|
+
assert.ok(r);
|
|
87
|
+
assert.equal(r.inputTokens, Math.ceil(resultText.length / CHARS_PER_TOKEN));
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
test("tool_result content counted (array of text blocks)", () => {
|
|
91
|
+
const r = estimateTokens({
|
|
92
|
+
system: "",
|
|
93
|
+
messages: [
|
|
94
|
+
{
|
|
95
|
+
role: "user",
|
|
96
|
+
content: [
|
|
97
|
+
{
|
|
98
|
+
type: "tool_result",
|
|
99
|
+
tool_use_id: "t1",
|
|
100
|
+
content: [
|
|
101
|
+
{ type: "text", text: "abc" },
|
|
102
|
+
{ type: "text", text: "defgh" },
|
|
103
|
+
],
|
|
104
|
+
},
|
|
105
|
+
],
|
|
106
|
+
},
|
|
107
|
+
],
|
|
108
|
+
});
|
|
109
|
+
assert.ok(r);
|
|
110
|
+
assert.equal(r.inputTokens, Math.ceil(8 / CHARS_PER_TOKEN));
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
test("multiple messages accumulate", () => {
|
|
114
|
+
const r = estimateTokens({
|
|
115
|
+
system: "sys".repeat(100),
|
|
116
|
+
messages: [
|
|
117
|
+
{ role: "user", content: [{ type: "text", text: "a".repeat(200) }] },
|
|
118
|
+
{ role: "assistant", content: [{ type: "text", text: "b".repeat(300) }] },
|
|
119
|
+
],
|
|
120
|
+
});
|
|
121
|
+
assert.ok(r);
|
|
122
|
+
assert.equal(r.inputTokens, Math.ceil((300 + 200 + 300) / CHARS_PER_TOKEN));
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
test("skips blocks with missing type", () => {
|
|
126
|
+
const r = estimateTokens({
|
|
127
|
+
system: "",
|
|
128
|
+
messages: [
|
|
129
|
+
{ role: "user", content: [{ text: "no type field" }, { type: "text", text: "ok" }] },
|
|
130
|
+
],
|
|
131
|
+
});
|
|
132
|
+
assert.ok(r);
|
|
133
|
+
assert.equal(r.inputTokens, Math.ceil(2 / CHARS_PER_TOKEN));
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
test("handles null/non-object messages gracefully", () => {
|
|
137
|
+
const r = estimateTokens({
|
|
138
|
+
system: "",
|
|
139
|
+
messages: [null, undefined, 42, { role: "user", content: [{ type: "text", text: "ok" }] }],
|
|
140
|
+
});
|
|
141
|
+
assert.ok(r);
|
|
142
|
+
assert.equal(r.inputTokens, Math.ceil(2 / CHARS_PER_TOKEN));
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
test("realistic conversation produces reasonable estimate", () => {
|
|
146
|
+
const msgs = [];
|
|
147
|
+
for (let i = 0; i < 20; i++) {
|
|
148
|
+
msgs.push({ role: "user", content: [{ type: "text", text: "Tell me about X. ".repeat(5) }] });
|
|
149
|
+
msgs.push({
|
|
150
|
+
role: "assistant",
|
|
151
|
+
content: [{ type: "text", text: "Here is info about X. ".repeat(20) }],
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
const r = estimateTokens({ system: "You are a helpful assistant.", messages: msgs });
|
|
155
|
+
assert.ok(r);
|
|
156
|
+
assert.ok(r.inputTokens > 500, `expected >500 tokens, got ${r.inputTokens}`);
|
|
157
|
+
assert.ok(r.inputTokens < 10000, `expected <10000 tokens, got ${r.inputTokens}`);
|
|
158
|
+
});
|
|
@@ -4,30 +4,20 @@
|
|
|
4
4
|
* TEST-ONLY FILE. Not shipped to users. Does not participate in production
|
|
5
5
|
* require graphs. Spawned as part of `node --test` only.
|
|
6
6
|
*
|
|
7
|
-
*
|
|
8
|
-
* dependency injection. This test exercises the real child-process hook as
|
|
9
|
-
* Claude Code would invoke it:
|
|
7
|
+
* Exercises the real child-process hook as Claude Code would invoke it:
|
|
10
8
|
*
|
|
11
9
|
* 1. A temporary project root is constructed under os.tmpdir() containing:
|
|
12
10
|
* - .gsd-t/context-meter-config.json (real config loader target)
|
|
13
11
|
* - transcript.jsonl (minimal Claude-Code-shaped transcript)
|
|
14
|
-
* 2.
|
|
15
|
-
*
|
|
16
|
-
* 3.
|
|
17
|
-
* with cwd = tempdir, NODE_OPTIONS = --require <test-injector>, and
|
|
18
|
-
* GSD_T_CONTEXT_METER_TEST_BASE_URL pointing at the stub.
|
|
19
|
-
* 4. We write the PostToolUse JSON payload to the child's stdin, close
|
|
12
|
+
* 2. `node scripts/gsd-t-context-meter.js` is spawned as a child process
|
|
13
|
+
* with cwd = tempdir.
|
|
14
|
+
* 3. We write the PostToolUse JSON payload to the child's stdin, close
|
|
20
15
|
* stdin, collect stdout, and assert both the stdout shape and the
|
|
21
16
|
* on-disk state file.
|
|
22
17
|
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
25
|
-
*
|
|
26
|
-
* so redirecting HTTP in a black-box test requires a --require-level
|
|
27
|
-
* monkey-patch inside the child process. See that file's comment block.
|
|
28
|
-
*
|
|
29
|
-
* Timing budget: each test < 2s, whole suite < 10s. Hard timeouts on every
|
|
30
|
-
* async wait prevent suite hangs on unclosed sockets or child processes.
|
|
18
|
+
* Since v3.12 the context meter uses local token estimation (no API call),
|
|
19
|
+
* so no stub HTTP server is needed. The transcript content determines the
|
|
20
|
+
* estimated token count via chars/3.5 heuristic.
|
|
31
21
|
*
|
|
32
22
|
* @module scripts/gsd-t-context-meter.e2e.test
|
|
33
23
|
*/
|
|
@@ -37,27 +27,18 @@
|
|
|
37
27
|
const { test, beforeEach, afterEach } = require("node:test");
|
|
38
28
|
const assert = require("node:assert/strict");
|
|
39
29
|
const { spawn } = require("node:child_process");
|
|
40
|
-
const http = require("node:http");
|
|
41
30
|
const fs = require("node:fs");
|
|
42
31
|
const path = require("node:path");
|
|
43
32
|
const os = require("node:os");
|
|
44
33
|
|
|
45
34
|
const HOOK_SCRIPT = path.resolve(__dirname, "gsd-t-context-meter.js");
|
|
46
|
-
const INJECTOR = path.resolve(__dirname, "context-meter", "test-injector.js");
|
|
47
35
|
const HARD_TIMEOUT_MS = 12000;
|
|
48
36
|
|
|
49
37
|
/* ──────────────────────────── test fixtures ──────────────────────────── */
|
|
50
38
|
|
|
51
|
-
/**
|
|
52
|
-
* Sandbox state for a single test. Holds the tempdir, stub server, and a
|
|
53
|
-
* dispose() that guarantees everything is torn down — even on failure.
|
|
54
|
-
*/
|
|
55
39
|
class Sandbox {
|
|
56
40
|
constructor() {
|
|
57
41
|
this.tempdir = null;
|
|
58
|
-
this.server = null;
|
|
59
|
-
this.serverUrl = null;
|
|
60
|
-
this.hitCount = 0;
|
|
61
42
|
this.childProcs = [];
|
|
62
43
|
}
|
|
63
44
|
|
|
@@ -89,15 +70,17 @@ class Sandbox {
|
|
|
89
70
|
}
|
|
90
71
|
|
|
91
72
|
/**
|
|
92
|
-
* Write a
|
|
93
|
-
*
|
|
94
|
-
*
|
|
73
|
+
* Write a Claude-Code transcript JSONL with configurable content size.
|
|
74
|
+
* The charCount parameter controls how many characters of text content
|
|
75
|
+
* are in the transcript, which determines the estimated token count.
|
|
95
76
|
*/
|
|
96
|
-
writeTranscript(filename = "transcript.jsonl") {
|
|
77
|
+
writeTranscript(filename = "transcript.jsonl", charCount = 100) {
|
|
78
|
+
const userText = "x".repeat(Math.floor(charCount / 2));
|
|
79
|
+
const assistantText = "y".repeat(Math.ceil(charCount / 2));
|
|
97
80
|
const lines = [
|
|
98
81
|
JSON.stringify({
|
|
99
82
|
type: "user",
|
|
100
|
-
message: { role: "user", content:
|
|
83
|
+
message: { role: "user", content: userText },
|
|
101
84
|
uuid: "u1",
|
|
102
85
|
sessionId: "sess-1",
|
|
103
86
|
}),
|
|
@@ -105,7 +88,7 @@ class Sandbox {
|
|
|
105
88
|
type: "assistant",
|
|
106
89
|
message: {
|
|
107
90
|
role: "assistant",
|
|
108
|
-
content: [{ type: "text", text:
|
|
91
|
+
content: [{ type: "text", text: assistantText }],
|
|
109
92
|
model: "claude-opus-4-6",
|
|
110
93
|
},
|
|
111
94
|
uuid: "a1",
|
|
@@ -117,10 +100,6 @@ class Sandbox {
|
|
|
117
100
|
return p;
|
|
118
101
|
}
|
|
119
102
|
|
|
120
|
-
/**
|
|
121
|
-
* Optional: pre-seed the state file so we can test the checkFrequency skip
|
|
122
|
-
* path (where runMeter increments but does not call the API).
|
|
123
|
-
*/
|
|
124
103
|
writeState(state) {
|
|
125
104
|
const full = Object.assign(
|
|
126
105
|
{
|
|
@@ -142,52 +121,8 @@ class Sandbox {
|
|
|
142
121
|
);
|
|
143
122
|
}
|
|
144
123
|
|
|
145
|
-
/**
|
|
146
|
-
* Start a local stub HTTP server that responds to every request with the
|
|
147
|
-
* given inputTokens value. Tracks hit count so tests can assert the API
|
|
148
|
-
* was (or was not) called.
|
|
149
|
-
*/
|
|
150
|
-
async startStub({ inputTokens }) {
|
|
151
|
-
this.server = http.createServer((req, res) => {
|
|
152
|
-
this.hitCount++;
|
|
153
|
-
// Drain the request body (even though we don't inspect it) so the
|
|
154
|
-
// client sees a clean close.
|
|
155
|
-
req.on("data", () => {});
|
|
156
|
-
req.on("end", () => {
|
|
157
|
-
res.writeHead(200, { "content-type": "application/json" });
|
|
158
|
-
res.end(JSON.stringify({ input_tokens: inputTokens }));
|
|
159
|
-
});
|
|
160
|
-
});
|
|
161
|
-
await new Promise((resolve, reject) => {
|
|
162
|
-
const t = setTimeout(
|
|
163
|
-
() => reject(new Error("stub server listen timeout")),
|
|
164
|
-
HARD_TIMEOUT_MS
|
|
165
|
-
);
|
|
166
|
-
this.server.on("error", (err) => {
|
|
167
|
-
clearTimeout(t);
|
|
168
|
-
reject(err);
|
|
169
|
-
});
|
|
170
|
-
this.server.listen(0, "127.0.0.1", () => {
|
|
171
|
-
clearTimeout(t);
|
|
172
|
-
const { port } = this.server.address();
|
|
173
|
-
this.serverUrl = `http://127.0.0.1:${port}`;
|
|
174
|
-
resolve();
|
|
175
|
-
});
|
|
176
|
-
});
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
/**
|
|
180
|
-
* Spawn the real hook as a child process, write a payload to stdin, and
|
|
181
|
-
* resolve with { stdout, stderr, code }. Enforces a hard timeout so the
|
|
182
|
-
* test can never hang the suite.
|
|
183
|
-
*/
|
|
184
124
|
async runHook({ payload, env }) {
|
|
185
|
-
const fullEnv = Object.assign({}, process.env, {
|
|
186
|
-
ANTHROPIC_API_KEY: "test-key-ignored",
|
|
187
|
-
GSD_T_CONTEXT_METER_TEST_BASE_URL: this.serverUrl || "",
|
|
188
|
-
NODE_OPTIONS: `--require ${INJECTOR}`,
|
|
189
|
-
});
|
|
190
|
-
// Allow caller to override any env (including unsetting ANTHROPIC_API_KEY).
|
|
125
|
+
const fullEnv = Object.assign({}, process.env, {});
|
|
191
126
|
if (env) {
|
|
192
127
|
for (const [k, v] of Object.entries(env)) {
|
|
193
128
|
if (v === null || v === undefined) {
|
|
@@ -257,7 +192,6 @@ class Sandbox {
|
|
|
257
192
|
}
|
|
258
193
|
|
|
259
194
|
async dispose() {
|
|
260
|
-
// Kill any lingering children first.
|
|
261
195
|
for (const c of this.childProcs) {
|
|
262
196
|
try {
|
|
263
197
|
if (!c.killed) c.kill("SIGKILL");
|
|
@@ -267,17 +201,6 @@ class Sandbox {
|
|
|
267
201
|
}
|
|
268
202
|
this.childProcs = [];
|
|
269
203
|
|
|
270
|
-
if (this.server) {
|
|
271
|
-
await new Promise((resolve) => {
|
|
272
|
-
try {
|
|
273
|
-
this.server.close(() => resolve());
|
|
274
|
-
} catch (_) {
|
|
275
|
-
resolve();
|
|
276
|
-
}
|
|
277
|
-
});
|
|
278
|
-
this.server = null;
|
|
279
|
-
}
|
|
280
|
-
|
|
281
204
|
if (this.tempdir) {
|
|
282
205
|
try {
|
|
283
206
|
fs.rmSync(this.tempdir, { recursive: true, force: true });
|
|
@@ -307,10 +230,10 @@ afterEach(async () => {
|
|
|
307
230
|
|
|
308
231
|
/* ──────────────────────────── tests ──────────────────────────── */
|
|
309
232
|
|
|
310
|
-
test("E2E 1. below threshold — stdout {} and state reflects
|
|
233
|
+
test("E2E 1. below threshold — stdout {} and state reflects estimate", async () => {
|
|
234
|
+
// 100 chars of text content → ~29 tokens (100/3.5) → 0.014% of 200K window
|
|
311
235
|
sandbox.writeConfig({ thresholdPct: 75, modelWindowSize: 200000, checkFrequency: 1 });
|
|
312
|
-
const transcriptPath = sandbox.writeTranscript();
|
|
313
|
-
await sandbox.startStub({ inputTokens: 50000 });
|
|
236
|
+
const transcriptPath = sandbox.writeTranscript("transcript.jsonl", 100);
|
|
314
237
|
|
|
315
238
|
const { stdout, code } = await sandbox.runHook({
|
|
316
239
|
payload: { session_id: "test-below", transcript_path: transcriptPath },
|
|
@@ -323,22 +246,21 @@ test("E2E 1. below threshold — stdout {} and state reflects 25%", async () =>
|
|
|
323
246
|
const state = sandbox.readState();
|
|
324
247
|
assert.ok(state, "state file should exist");
|
|
325
248
|
assert.equal(state.version, 1);
|
|
326
|
-
assert.
|
|
249
|
+
assert.ok(state.inputTokens > 0, "should have estimated some tokens");
|
|
250
|
+
assert.ok(state.inputTokens < 1000, "small transcript should estimate < 1K tokens");
|
|
327
251
|
assert.equal(state.modelWindowSize, 200000);
|
|
328
|
-
assert.ok(
|
|
252
|
+
assert.ok(state.pct < 1, "pct should be well below threshold");
|
|
329
253
|
assert.equal(state.threshold, "normal");
|
|
330
254
|
assert.equal(state.checkCount, 1);
|
|
331
255
|
assert.equal(state.lastError, null);
|
|
332
256
|
assert.ok(typeof state.timestamp === "string" && state.timestamp.length > 0);
|
|
333
|
-
|
|
334
257
|
assert.equal(sandbox.tmpFileExists(), false, "no leftover .tmp file");
|
|
335
|
-
assert.equal(sandbox.hitCount, 1, "stub server should have been called exactly once");
|
|
336
258
|
});
|
|
337
259
|
|
|
338
|
-
test("E2E 2. above threshold — stdout additionalContext
|
|
260
|
+
test("E2E 2. above threshold — stdout additionalContext with large transcript", async () => {
|
|
261
|
+
// 600K chars → ~171K tokens → 85.7% of 200K window → warn band + additionalContext
|
|
339
262
|
sandbox.writeConfig({ thresholdPct: 75, modelWindowSize: 200000, checkFrequency: 1 });
|
|
340
|
-
const transcriptPath = sandbox.writeTranscript();
|
|
341
|
-
await sandbox.startStub({ inputTokens: 160000 });
|
|
263
|
+
const transcriptPath = sandbox.writeTranscript("transcript.jsonl", 600000);
|
|
342
264
|
|
|
343
265
|
const { stdout, code } = await sandbox.runHook({
|
|
344
266
|
payload: { session_id: "test-above", transcript_path: transcriptPath },
|
|
@@ -348,33 +270,26 @@ test("E2E 2. above threshold — stdout additionalContext and state reflects 80%
|
|
|
348
270
|
const parsed = JSON.parse(stdout || "{}");
|
|
349
271
|
assert.ok(parsed.additionalContext, "must emit additionalContext");
|
|
350
272
|
assert.ok(parsed.additionalContext.includes("MANDATORY STOP"), "must be MANDATORY STOP");
|
|
351
|
-
assert.ok(parsed.additionalContext.includes("80.0%"), "must include pct");
|
|
352
|
-
assert.ok(parsed.additionalContext.includes("200000"), "must include window size");
|
|
353
273
|
assert.ok(parsed.additionalContext.includes("/user:gsd-t-pause"), "must instruct pause");
|
|
354
274
|
assert.ok(parsed.additionalContext.includes("/user:gsd-t-resume"), "must instruct resume");
|
|
355
275
|
|
|
356
276
|
const state = sandbox.readState();
|
|
357
277
|
assert.ok(state);
|
|
358
|
-
assert.
|
|
359
|
-
assert.
|
|
360
|
-
assert.ok(Math.abs(state.pct - 80) < 0.0001, `pct ${state.pct} should ≈ 80`);
|
|
361
|
-
// v3.0.0 three-band (M35): 80% ∈ [70, 85) → warn
|
|
362
|
-
assert.equal(state.threshold, "warn");
|
|
278
|
+
assert.ok(state.inputTokens > 100000, "large transcript should estimate >100K tokens");
|
|
279
|
+
assert.ok(state.pct > 50, "pct should be above threshold");
|
|
363
280
|
assert.equal(state.checkCount, 1);
|
|
364
281
|
assert.equal(state.lastError, null);
|
|
365
|
-
|
|
366
282
|
assert.equal(sandbox.tmpFileExists(), false);
|
|
367
|
-
assert.equal(sandbox.hitCount, 1);
|
|
368
283
|
});
|
|
369
284
|
|
|
370
|
-
test("E2E 3.
|
|
285
|
+
test("E2E 3. missing transcript — stdout {}, state has parse error", async () => {
|
|
371
286
|
sandbox.writeConfig({ thresholdPct: 75, checkFrequency: 1 });
|
|
372
|
-
const transcriptPath = sandbox.writeTranscript();
|
|
373
|
-
await sandbox.startStub({ inputTokens: 50000 });
|
|
374
287
|
|
|
375
288
|
const { stdout, code } = await sandbox.runHook({
|
|
376
|
-
payload: {
|
|
377
|
-
|
|
289
|
+
payload: {
|
|
290
|
+
session_id: "test-nofile",
|
|
291
|
+
transcript_path: path.join(sandbox.tempdir, "nonexistent.jsonl"),
|
|
292
|
+
},
|
|
378
293
|
});
|
|
379
294
|
|
|
380
295
|
assert.equal(code, 0);
|
|
@@ -385,18 +300,13 @@ test("E2E 3. API key missing — stdout {}, state has lastError.code='missing_ke
|
|
|
385
300
|
assert.ok(state);
|
|
386
301
|
assert.equal(state.checkCount, 1);
|
|
387
302
|
assert.ok(state.lastError && typeof state.lastError === "object");
|
|
388
|
-
assert.equal(state.lastError.code, "
|
|
389
|
-
|
|
390
|
-
// API must NOT have been called.
|
|
391
|
-
assert.equal(sandbox.hitCount, 0, "stub server must not be hit when key is missing");
|
|
303
|
+
assert.equal(state.lastError.code, "parse_failure");
|
|
392
304
|
});
|
|
393
305
|
|
|
394
|
-
test("E2E 4. checkFrequency skip —
|
|
306
|
+
test("E2E 4. checkFrequency skip — estimation not run, checkCount increments", async () => {
|
|
395
307
|
sandbox.writeConfig({ thresholdPct: 75, checkFrequency: 5 });
|
|
396
|
-
const transcriptPath = sandbox.writeTranscript();
|
|
397
|
-
// Pre-seed state so that checkCount goes 3 → 4, which is NOT a multiple of 5.
|
|
308
|
+
const transcriptPath = sandbox.writeTranscript("transcript.jsonl", 100);
|
|
398
309
|
sandbox.writeState({ checkCount: 3 });
|
|
399
|
-
await sandbox.startStub({ inputTokens: 50000 });
|
|
400
310
|
|
|
401
311
|
const { stdout, code } = await sandbox.runHook({
|
|
402
312
|
payload: { session_id: "test-skip", transcript_path: transcriptPath },
|
|
@@ -409,9 +319,6 @@ test("E2E 4. checkFrequency skip — API not called, checkCount increments", asy
|
|
|
409
319
|
const state = sandbox.readState();
|
|
410
320
|
assert.ok(state);
|
|
411
321
|
assert.equal(state.checkCount, 4, "counter increments even on skipped turn");
|
|
412
|
-
// lastError/inputTokens unchanged from seed on skipped turn.
|
|
413
322
|
assert.equal(state.inputTokens, 0);
|
|
414
|
-
|
|
415
|
-
assert.equal(sandbox.hitCount, 0, "stub server must not be hit on skipped turn");
|
|
416
323
|
assert.equal(sandbox.tmpFileExists(), false);
|
|
417
324
|
});
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* Wires together:
|
|
8
8
|
* - bin/context-meter-config.cjs (loadConfig)
|
|
9
9
|
* - scripts/context-meter/transcript-parser.js (parseTranscript)
|
|
10
|
-
* - scripts/context-meter/
|
|
10
|
+
* - scripts/context-meter/estimate-tokens.js (estimateTokens — local, zero API cost)
|
|
11
11
|
* - scripts/context-meter/threshold.js (computePct/bandFor/buildAdditionalContext)
|
|
12
12
|
*
|
|
13
13
|
* Contract: .gsd-t/contracts/context-meter-contract.md
|
|
@@ -20,8 +20,8 @@
|
|
|
20
20
|
* failure of its own stdin. See contract rule #1.
|
|
21
21
|
*
|
|
22
22
|
* Testability:
|
|
23
|
-
* `runMeter({ payload, projectRoot, env, clock?,
|
|
24
|
-
*
|
|
23
|
+
* `runMeter({ payload, projectRoot, env, clock?, _parseTranscript?,
|
|
24
|
+
* _estimateTokens?, _loadConfig? })` is the pure async core. Tests
|
|
25
25
|
* fabricate payloads and inject stubs; production code uses only the CLI
|
|
26
26
|
* shim at the bottom of the file (runs when `require.main === module`).
|
|
27
27
|
*
|
|
@@ -35,11 +35,10 @@ const path = require("path");
|
|
|
35
35
|
|
|
36
36
|
const { loadConfig: realLoadConfig } = require("../bin/context-meter-config.cjs");
|
|
37
37
|
const { parseTranscript: realParseTranscript } = require("./context-meter/transcript-parser");
|
|
38
|
-
const {
|
|
38
|
+
const { estimateTokens: realEstimateTokens } = require("./context-meter/estimate-tokens");
|
|
39
39
|
const { computePct, bandFor, buildAdditionalContext } = require("./context-meter/threshold");
|
|
40
40
|
|
|
41
41
|
const STATE_VERSION = 1;
|
|
42
|
-
const MODEL_ID = "claude-opus-4-6";
|
|
43
42
|
|
|
44
43
|
/* ─────────────────────────── state file helpers ─────────────────────────── */
|
|
45
44
|
|
|
@@ -119,10 +118,9 @@ function appendLog(logPath, level, category, detail, clock) {
|
|
|
119
118
|
* @param {string} opts.projectRoot normally process.cwd()
|
|
120
119
|
* @param {object} opts.env normally process.env
|
|
121
120
|
* @param {Function} [opts.clock] optional () => Date (test seam)
|
|
122
|
-
* @param {string} [opts.baseUrl] optional countTokens _baseUrl override (test seam)
|
|
123
121
|
* @param {Function} [opts._loadConfig] optional loadConfig stub (test seam)
|
|
124
122
|
* @param {Function} [opts._parseTranscript] optional parseTranscript stub (test seam)
|
|
125
|
-
* @param {Function} [opts.
|
|
123
|
+
* @param {Function} [opts._estimateTokens] optional estimateTokens stub (test seam)
|
|
126
124
|
* @returns {Promise<object>} `{}` or `{ additionalContext: "..." }`
|
|
127
125
|
*/
|
|
128
126
|
async function runMeter(opts) {
|
|
@@ -133,10 +131,9 @@ async function runMeter(opts) {
|
|
|
133
131
|
projectRoot,
|
|
134
132
|
env,
|
|
135
133
|
clock,
|
|
136
|
-
baseUrl,
|
|
137
134
|
_loadConfig = realLoadConfig,
|
|
138
135
|
_parseTranscript = realParseTranscript,
|
|
139
|
-
|
|
136
|
+
_estimateTokens = realEstimateTokens,
|
|
140
137
|
} = opts || {};
|
|
141
138
|
|
|
142
139
|
const root = projectRoot || process.cwd();
|
|
@@ -187,20 +184,7 @@ async function runMeter(opts) {
|
|
|
187
184
|
return {};
|
|
188
185
|
}
|
|
189
186
|
|
|
190
|
-
// 5.
|
|
191
|
-
const apiKey = envObj[cfg.apiKeyEnvVar];
|
|
192
|
-
if (typeof apiKey !== "string" || apiKey.length === 0) {
|
|
193
|
-
state.lastError = {
|
|
194
|
-
code: "missing_key",
|
|
195
|
-
message: `env var ${cfg.apiKeyEnvVar} not set`,
|
|
196
|
-
timestamp: now().toISOString(),
|
|
197
|
-
};
|
|
198
|
-
writeStateAtomic(statePath, state);
|
|
199
|
-
appendLog(logPath, "ERROR", "missing_key", `env var ${cfg.apiKeyEnvVar} unset`, clock);
|
|
200
|
-
return {};
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
// 6. Parse transcript (streaming, async). null → bail out.
|
|
187
|
+
// 5. Parse transcript (streaming, async). null → bail out.
|
|
204
188
|
let parsed;
|
|
205
189
|
try {
|
|
206
190
|
parsed = await _parseTranscript(transcriptPath);
|
|
@@ -224,19 +208,12 @@ async function runMeter(opts) {
|
|
|
224
208
|
return {};
|
|
225
209
|
}
|
|
226
210
|
|
|
227
|
-
//
|
|
228
|
-
// CHOICE: reset inputTokens to 0 on failure to avoid stale-reading-based
|
|
229
|
-
// false-positive threshold trips. lastError still records the failure so
|
|
230
|
-
// consumers can see we didn't get a fresh count.
|
|
211
|
+
// 6. Estimate tokens locally (no API call, zero cost).
|
|
231
212
|
let tokenResp;
|
|
232
213
|
try {
|
|
233
|
-
tokenResp =
|
|
234
|
-
apiKey,
|
|
235
|
-
model: MODEL_ID,
|
|
214
|
+
tokenResp = _estimateTokens({
|
|
236
215
|
system: parsed.system || "",
|
|
237
216
|
messages: parsed.messages,
|
|
238
|
-
timeoutMs: cfg.timeoutMs,
|
|
239
|
-
_baseUrl: baseUrl,
|
|
240
217
|
});
|
|
241
218
|
} catch (_) {
|
|
242
219
|
tokenResp = null;
|
|
@@ -248,12 +225,12 @@ async function runMeter(opts) {
|
|
|
248
225
|
state.threshold = "normal";
|
|
249
226
|
state.timestamp = now().toISOString();
|
|
250
227
|
state.lastError = {
|
|
251
|
-
code: "
|
|
252
|
-
message: "
|
|
228
|
+
code: "estimate_error",
|
|
229
|
+
message: "estimateTokens returned null",
|
|
253
230
|
timestamp: state.timestamp,
|
|
254
231
|
};
|
|
255
232
|
writeStateAtomic(statePath, state);
|
|
256
|
-
appendLog(logPath, "ERROR", "
|
|
233
|
+
appendLog(logPath, "ERROR", "estimate_error", "estimateTokens null", clock);
|
|
257
234
|
return {};
|
|
258
235
|
}
|
|
259
236
|
|
|
@@ -3,20 +3,21 @@
|
|
|
3
3
|
/**
|
|
4
4
|
* Unit tests for scripts/gsd-t-context-meter.js (M34 Task 4 — CP2 satisfaction).
|
|
5
5
|
*
|
|
6
|
-
* Covers
|
|
6
|
+
* Covers scenarios:
|
|
7
7
|
* 1. check-frequency skip
|
|
8
8
|
* 2. check-frequency hit — under threshold
|
|
9
9
|
* 3. check-frequency hit — over threshold
|
|
10
|
-
* 4.
|
|
11
|
-
* 5.
|
|
12
|
-
* 6.
|
|
13
|
-
* 7.
|
|
14
|
-
* 8.
|
|
15
|
-
* 9.
|
|
16
|
-
* 10.
|
|
10
|
+
* 4. transcript parse failure
|
|
11
|
+
* 5. estimation failure — returns null
|
|
12
|
+
* 6. state file corruption
|
|
13
|
+
* 7. missing transcript_path in payload
|
|
14
|
+
* 8. atomic write — no stale .tmp file after success
|
|
15
|
+
* 9. fail-open on unexpected throw (loadConfig throws)
|
|
16
|
+
* 10. log never contains message content
|
|
17
|
+
* 11. clock injection
|
|
17
18
|
*
|
|
18
19
|
* All dependencies are injected via runMeter's test seams so no real network
|
|
19
|
-
* calls
|
|
20
|
+
* calls and no real config-file reads are needed.
|
|
20
21
|
*/
|
|
21
22
|
|
|
22
23
|
const { test, beforeEach, afterEach } = require("node:test");
|
|
@@ -75,8 +76,6 @@ function seedState(root, partial) {
|
|
|
75
76
|
}
|
|
76
77
|
|
|
77
78
|
function makePayload() {
|
|
78
|
-
// A phony transcript path — tests inject a fake parseTranscript, so the path
|
|
79
|
-
// doesn't actually need to exist.
|
|
80
79
|
return {
|
|
81
80
|
session_id: "test-session",
|
|
82
81
|
transcript_path: path.join(tmpRoot, "fake-transcript.jsonl"),
|
|
@@ -93,26 +92,26 @@ const FAKE_PARSED = {
|
|
|
93
92
|
|
|
94
93
|
/* ───────────────────────────── tests ───────────────────────────── */
|
|
95
94
|
|
|
96
|
-
test("1. check-frequency skip —
|
|
95
|
+
test("1. check-frequency skip — estimator NOT called, counter incremented, stdout {}", async () => {
|
|
97
96
|
seedState(tmpRoot, { checkCount: 3 });
|
|
98
97
|
|
|
99
|
-
const
|
|
98
|
+
const estimateCalls = [];
|
|
100
99
|
const out = await runMeter({
|
|
101
100
|
payload: makePayload(),
|
|
102
101
|
projectRoot: tmpRoot,
|
|
103
|
-
env: {
|
|
102
|
+
env: {},
|
|
104
103
|
_loadConfig: () => makeConfig({ checkFrequency: 5 }),
|
|
105
104
|
_parseTranscript: async () => {
|
|
106
105
|
throw new Error("parseTranscript should not be called on skip");
|
|
107
106
|
},
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
throw new Error("
|
|
107
|
+
_estimateTokens: () => {
|
|
108
|
+
estimateCalls.push("called");
|
|
109
|
+
throw new Error("estimateTokens should not be called on skip");
|
|
111
110
|
},
|
|
112
111
|
});
|
|
113
112
|
|
|
114
113
|
assert.deepEqual(out, {});
|
|
115
|
-
assert.equal(
|
|
114
|
+
assert.equal(estimateCalls.length, 0);
|
|
116
115
|
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
117
116
|
assert.equal(state.checkCount, 4);
|
|
118
117
|
assert.equal(state.lastError, null);
|
|
@@ -124,10 +123,10 @@ test("2. check-frequency hit — under threshold → {} + state updated", async
|
|
|
124
123
|
const out = await runMeter({
|
|
125
124
|
payload: makePayload(),
|
|
126
125
|
projectRoot: tmpRoot,
|
|
127
|
-
env: {
|
|
126
|
+
env: {},
|
|
128
127
|
_loadConfig: () => makeConfig(),
|
|
129
128
|
_parseTranscript: async () => FAKE_PARSED,
|
|
130
|
-
|
|
129
|
+
_estimateTokens: () => ({ inputTokens: 10000 }),
|
|
131
130
|
});
|
|
132
131
|
|
|
133
132
|
assert.deepEqual(out, {});
|
|
@@ -146,10 +145,10 @@ test("3. check-frequency hit — over threshold → additionalContext emitted",
|
|
|
146
145
|
const out = await runMeter({
|
|
147
146
|
payload: makePayload(),
|
|
148
147
|
projectRoot: tmpRoot,
|
|
149
|
-
env: {
|
|
148
|
+
env: {},
|
|
150
149
|
_loadConfig: () => makeConfig(),
|
|
151
150
|
_parseTranscript: async () => FAKE_PARSED,
|
|
152
|
-
|
|
151
|
+
_estimateTokens: () => ({ inputTokens: 160000 }),
|
|
153
152
|
});
|
|
154
153
|
|
|
155
154
|
assert.equal(typeof out.additionalContext, "string");
|
|
@@ -160,54 +159,21 @@ test("3. check-frequency hit — over threshold → additionalContext emitted",
|
|
|
160
159
|
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
161
160
|
assert.equal(state.checkCount, 5);
|
|
162
161
|
assert.equal(state.pct, 80);
|
|
163
|
-
// v3.0.0 three-band (M35): 80% ∈ [70, 85) → warn
|
|
164
162
|
assert.equal(state.threshold, "warn");
|
|
165
163
|
assert.equal(state.inputTokens, 160000);
|
|
166
164
|
});
|
|
167
165
|
|
|
168
|
-
test("4.
|
|
166
|
+
test("4. transcript parse failure — returns null → lastError 'parse_failure'", async () => {
|
|
169
167
|
seedState(tmpRoot, { checkCount: 4 });
|
|
170
168
|
|
|
171
|
-
const apiCalls = [];
|
|
172
169
|
const out = await runMeter({
|
|
173
170
|
payload: makePayload(),
|
|
174
171
|
projectRoot: tmpRoot,
|
|
175
|
-
env: {},
|
|
176
|
-
_loadConfig: () => makeConfig(),
|
|
177
|
-
_parseTranscript: async () => FAKE_PARSED,
|
|
178
|
-
_countTokens: async () => {
|
|
179
|
-
apiCalls.push("x");
|
|
180
|
-
return { inputTokens: 1 };
|
|
181
|
-
},
|
|
182
|
-
});
|
|
183
|
-
|
|
184
|
-
assert.deepEqual(out, {});
|
|
185
|
-
assert.equal(apiCalls.length, 0);
|
|
186
|
-
|
|
187
|
-
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
188
|
-
assert.equal(state.checkCount, 5);
|
|
189
|
-
assert.ok(state.lastError, "lastError populated");
|
|
190
|
-
assert.equal(state.lastError.code, "missing_key");
|
|
191
|
-
|
|
192
|
-
// Log file exists and contains the missing_key diagnostic
|
|
193
|
-
assert.ok(fs.existsSync(logFile(tmpRoot)));
|
|
194
|
-
const log = fs.readFileSync(logFile(tmpRoot), "utf8");
|
|
195
|
-
assert.match(log, /missing_key/);
|
|
196
|
-
// And NEVER the API key itself
|
|
197
|
-
assert.ok(!log.includes("sk-test"));
|
|
198
|
-
});
|
|
199
|
-
|
|
200
|
-
test("5. transcript parse failure — returns null → lastError 'parse_failure'", async () => {
|
|
201
|
-
seedState(tmpRoot, { checkCount: 4 });
|
|
202
|
-
|
|
203
|
-
const out = await runMeter({
|
|
204
|
-
payload: makePayload(),
|
|
205
|
-
projectRoot: tmpRoot,
|
|
206
|
-
env: { ANTHROPIC_API_KEY: "sk-test" },
|
|
172
|
+
env: {},
|
|
207
173
|
_loadConfig: () => makeConfig(),
|
|
208
174
|
_parseTranscript: async () => null,
|
|
209
|
-
|
|
210
|
-
throw new Error("should not
|
|
175
|
+
_estimateTokens: () => {
|
|
176
|
+
throw new Error("should not estimate when parse fails");
|
|
211
177
|
},
|
|
212
178
|
});
|
|
213
179
|
|
|
@@ -217,29 +183,27 @@ test("5. transcript parse failure — returns null → lastError 'parse_failure'
|
|
|
217
183
|
assert.equal(state.lastError.code, "parse_failure");
|
|
218
184
|
});
|
|
219
185
|
|
|
220
|
-
test("
|
|
186
|
+
test("5. estimation failure — returns null → lastError 'estimate_error', inputTokens reset", async () => {
|
|
221
187
|
seedState(tmpRoot, { checkCount: 4, inputTokens: 99999 });
|
|
222
188
|
|
|
223
189
|
const out = await runMeter({
|
|
224
190
|
payload: makePayload(),
|
|
225
191
|
projectRoot: tmpRoot,
|
|
226
|
-
env: {
|
|
192
|
+
env: {},
|
|
227
193
|
_loadConfig: () => makeConfig({ timeoutMs: 50 }),
|
|
228
194
|
_parseTranscript: async () => FAKE_PARSED,
|
|
229
|
-
|
|
195
|
+
_estimateTokens: () => null,
|
|
230
196
|
});
|
|
231
197
|
|
|
232
198
|
assert.deepEqual(out, {});
|
|
233
199
|
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
234
|
-
assert.equal(state.lastError.code, "
|
|
235
|
-
// Choice documented in hook: reset inputTokens to 0 on failure to avoid stale
|
|
236
|
-
// readings tripping threshold false-positives.
|
|
200
|
+
assert.equal(state.lastError.code, "estimate_error");
|
|
237
201
|
assert.equal(state.inputTokens, 0);
|
|
238
202
|
assert.equal(state.pct, 0);
|
|
239
203
|
assert.equal(state.threshold, "normal");
|
|
240
204
|
});
|
|
241
205
|
|
|
242
|
-
test("
|
|
206
|
+
test("6. state file corruption — overwritten with valid defaults + fresh count", async () => {
|
|
243
207
|
const sp = stateFile(tmpRoot);
|
|
244
208
|
fs.mkdirSync(path.dirname(sp), { recursive: true });
|
|
245
209
|
fs.writeFileSync(sp, "not json{");
|
|
@@ -247,22 +211,19 @@ test("7. state file corruption — overwritten with valid defaults + fresh count
|
|
|
247
211
|
const out = await runMeter({
|
|
248
212
|
payload: makePayload(),
|
|
249
213
|
projectRoot: tmpRoot,
|
|
250
|
-
env: {
|
|
214
|
+
env: {},
|
|
251
215
|
_loadConfig: () => makeConfig({ checkFrequency: 5 }),
|
|
252
216
|
_parseTranscript: async () => FAKE_PARSED,
|
|
253
|
-
|
|
217
|
+
_estimateTokens: () => ({ inputTokens: 100 }),
|
|
254
218
|
});
|
|
255
219
|
|
|
256
220
|
assert.deepEqual(out, {});
|
|
257
|
-
// Post-write must be valid JSON with defaults + checkCount == 1
|
|
258
221
|
const state = JSON.parse(fs.readFileSync(sp, "utf8"));
|
|
259
222
|
assert.equal(state.version, 1);
|
|
260
223
|
assert.equal(state.checkCount, 1);
|
|
261
|
-
// checkCount=1 % checkFrequency=5 !== 0, so this was a skip path; API not called.
|
|
262
|
-
// Verify API was NOT called on this path by re-running with a throwing stub.
|
|
263
224
|
});
|
|
264
225
|
|
|
265
|
-
test("
|
|
226
|
+
test("6b. state file corruption + frequency hit — estimator called once, state valid", async () => {
|
|
266
227
|
const sp = stateFile(tmpRoot);
|
|
267
228
|
fs.mkdirSync(path.dirname(sp), { recursive: true });
|
|
268
229
|
fs.writeFileSync(sp, "not json{");
|
|
@@ -270,10 +231,10 @@ test("7b. state file corruption + frequency hit — API called once, state valid
|
|
|
270
231
|
const out = await runMeter({
|
|
271
232
|
payload: makePayload(),
|
|
272
233
|
projectRoot: tmpRoot,
|
|
273
|
-
env: {
|
|
234
|
+
env: {},
|
|
274
235
|
_loadConfig: () => makeConfig({ checkFrequency: 1 }),
|
|
275
236
|
_parseTranscript: async () => FAKE_PARSED,
|
|
276
|
-
|
|
237
|
+
_estimateTokens: () => ({ inputTokens: 500 }),
|
|
277
238
|
});
|
|
278
239
|
|
|
279
240
|
assert.deepEqual(out, {});
|
|
@@ -284,19 +245,19 @@ test("7b. state file corruption + frequency hit — API called once, state valid
|
|
|
284
245
|
assert.equal(state.lastError, null);
|
|
285
246
|
});
|
|
286
247
|
|
|
287
|
-
test("
|
|
248
|
+
test("7. missing transcript_path in payload — lastError 'no_transcript', counter increments", async () => {
|
|
288
249
|
seedState(tmpRoot, { checkCount: 4 });
|
|
289
250
|
|
|
290
251
|
const out = await runMeter({
|
|
291
|
-
payload: { session_id: "x" },
|
|
252
|
+
payload: { session_id: "x" },
|
|
292
253
|
projectRoot: tmpRoot,
|
|
293
|
-
env: {
|
|
254
|
+
env: {},
|
|
294
255
|
_loadConfig: () => makeConfig(),
|
|
295
256
|
_parseTranscript: async () => {
|
|
296
257
|
throw new Error("should not parse when transcript_path missing");
|
|
297
258
|
},
|
|
298
|
-
|
|
299
|
-
throw new Error("should not
|
|
259
|
+
_estimateTokens: () => {
|
|
260
|
+
throw new Error("should not estimate when transcript_path missing");
|
|
300
261
|
},
|
|
301
262
|
});
|
|
302
263
|
|
|
@@ -306,16 +267,16 @@ test("8. missing transcript_path in payload — lastError 'no_transcript', count
|
|
|
306
267
|
assert.equal(state.lastError.code, "no_transcript");
|
|
307
268
|
});
|
|
308
269
|
|
|
309
|
-
test("
|
|
270
|
+
test("8. atomic write — no .tmp file on disk after successful run", async () => {
|
|
310
271
|
seedState(tmpRoot, { checkCount: 4 });
|
|
311
272
|
|
|
312
273
|
await runMeter({
|
|
313
274
|
payload: makePayload(),
|
|
314
275
|
projectRoot: tmpRoot,
|
|
315
|
-
env: {
|
|
276
|
+
env: {},
|
|
316
277
|
_loadConfig: () => makeConfig(),
|
|
317
278
|
_parseTranscript: async () => FAKE_PARSED,
|
|
318
|
-
|
|
279
|
+
_estimateTokens: () => ({ inputTokens: 1000 }),
|
|
319
280
|
});
|
|
320
281
|
|
|
321
282
|
const tmp = stateFile(tmpRoot) + ".tmp";
|
|
@@ -323,32 +284,32 @@ test("9. atomic write — no .tmp file on disk after successful run", async () =
|
|
|
323
284
|
assert.equal(fs.existsSync(stateFile(tmpRoot)), true, "state file should exist");
|
|
324
285
|
});
|
|
325
286
|
|
|
326
|
-
test("
|
|
287
|
+
test("9. fail-open on unexpected throw — loadConfig throws → runMeter returns {}", async () => {
|
|
327
288
|
const out = await runMeter({
|
|
328
289
|
payload: makePayload(),
|
|
329
290
|
projectRoot: tmpRoot,
|
|
330
|
-
env: {
|
|
291
|
+
env: {},
|
|
331
292
|
_loadConfig: () => {
|
|
332
293
|
throw new Error("boom");
|
|
333
294
|
},
|
|
334
295
|
_parseTranscript: async () => FAKE_PARSED,
|
|
335
|
-
|
|
296
|
+
_estimateTokens: () => ({ inputTokens: 1 }),
|
|
336
297
|
});
|
|
337
298
|
|
|
338
299
|
assert.deepEqual(out, {});
|
|
339
300
|
});
|
|
340
301
|
|
|
341
|
-
test("
|
|
302
|
+
test("9b. fail-open — parseTranscript throws synchronously → {}", async () => {
|
|
342
303
|
seedState(tmpRoot, { checkCount: 4 });
|
|
343
304
|
const out = await runMeter({
|
|
344
305
|
payload: makePayload(),
|
|
345
306
|
projectRoot: tmpRoot,
|
|
346
|
-
env: {
|
|
307
|
+
env: {},
|
|
347
308
|
_loadConfig: () => makeConfig(),
|
|
348
309
|
_parseTranscript: () => {
|
|
349
310
|
throw new Error("sync boom");
|
|
350
311
|
},
|
|
351
|
-
|
|
312
|
+
_estimateTokens: () => ({ inputTokens: 1 }),
|
|
352
313
|
});
|
|
353
314
|
|
|
354
315
|
assert.deepEqual(out, {});
|
|
@@ -356,32 +317,32 @@ test("10b. fail-open — parseTranscript throws synchronously → {}", async ()
|
|
|
356
317
|
assert.equal(state.lastError.code, "parse_failure");
|
|
357
318
|
});
|
|
358
319
|
|
|
359
|
-
test("
|
|
320
|
+
test("9c. fail-open — estimateTokens throws → {}", async () => {
|
|
360
321
|
seedState(tmpRoot, { checkCount: 4 });
|
|
361
322
|
const out = await runMeter({
|
|
362
323
|
payload: makePayload(),
|
|
363
324
|
projectRoot: tmpRoot,
|
|
364
|
-
env: {
|
|
325
|
+
env: {},
|
|
365
326
|
_loadConfig: () => makeConfig(),
|
|
366
327
|
_parseTranscript: async () => FAKE_PARSED,
|
|
367
|
-
|
|
328
|
+
_estimateTokens: () => {
|
|
368
329
|
throw new Error("sync boom");
|
|
369
330
|
},
|
|
370
331
|
});
|
|
371
332
|
|
|
372
333
|
assert.deepEqual(out, {});
|
|
373
334
|
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|
|
374
|
-
assert.equal(state.lastError.code, "
|
|
335
|
+
assert.equal(state.lastError.code, "estimate_error");
|
|
375
336
|
});
|
|
376
337
|
|
|
377
|
-
test("
|
|
338
|
+
test("10. log never contains message content — only categories/counts", async () => {
|
|
378
339
|
seedState(tmpRoot, { checkCount: 4 });
|
|
379
340
|
const secretText = "SECRET_MESSAGE_CONTENT_XYZ";
|
|
380
341
|
|
|
381
342
|
await runMeter({
|
|
382
343
|
payload: makePayload(),
|
|
383
344
|
projectRoot: tmpRoot,
|
|
384
|
-
env: {
|
|
345
|
+
env: {},
|
|
385
346
|
_loadConfig: () => makeConfig(),
|
|
386
347
|
_parseTranscript: async () => ({
|
|
387
348
|
system: "",
|
|
@@ -389,7 +350,7 @@ test("11. log never contains message content — only categories/counts", async
|
|
|
389
350
|
{ role: "user", content: [{ type: "text", text: secretText }] },
|
|
390
351
|
],
|
|
391
352
|
}),
|
|
392
|
-
|
|
353
|
+
_estimateTokens: () => ({ inputTokens: 42 }),
|
|
393
354
|
});
|
|
394
355
|
|
|
395
356
|
const log = fs.readFileSync(logFile(tmpRoot), "utf8");
|
|
@@ -398,18 +359,18 @@ test("11. log never contains message content — only categories/counts", async
|
|
|
398
359
|
assert.match(log, /tokens=42/);
|
|
399
360
|
});
|
|
400
361
|
|
|
401
|
-
test("
|
|
362
|
+
test("11. clock injection — timestamp uses injected clock", async () => {
|
|
402
363
|
seedState(tmpRoot, { checkCount: 4 });
|
|
403
364
|
const fixed = new Date("2026-04-14T18:00:00.000Z");
|
|
404
365
|
|
|
405
366
|
await runMeter({
|
|
406
367
|
payload: makePayload(),
|
|
407
368
|
projectRoot: tmpRoot,
|
|
408
|
-
env: {
|
|
369
|
+
env: {},
|
|
409
370
|
clock: () => fixed,
|
|
410
371
|
_loadConfig: () => makeConfig(),
|
|
411
372
|
_parseTranscript: async () => FAKE_PARSED,
|
|
412
|
-
|
|
373
|
+
_estimateTokens: () => ({ inputTokens: 1000 }),
|
|
413
374
|
});
|
|
414
375
|
|
|
415
376
|
const state = JSON.parse(fs.readFileSync(stateFile(tmpRoot), "utf8"));
|