@hegemonart/get-design-done 1.59.7 → 1.59.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +59 -0
- package/README.md +2 -2
- package/SKILL.md +1 -1
- package/agents/design-authority-watcher.md +24 -5
- package/bin/gdd-graph +4 -1
- package/hooks/_hook-emit.js +113 -29
- package/hooks/budget-enforcer.ts +104 -5
- package/hooks/gdd-mcp-circuit-breaker.js +72 -3
- package/hooks/gdd-sessionstart-recap.js +23 -14
- package/hooks/hooks.json +2 -2
- package/package.json +2 -2
- package/reference/bandit-integration.md +13 -2
- package/reference/prices/claude.md +11 -0
- package/reference/runtime-models.md +9 -9
- package/reference/schemas/generated.d.ts +4 -0
- package/reference/schemas/runtime-models.schema.json +5 -0
- package/scripts/bootstrap.cjs +40 -8
- package/scripts/install.cjs +23 -1
- package/scripts/lib/bandit-router.cjs +47 -5
- package/scripts/lib/budget-enforcer.cjs +34 -5
- package/scripts/lib/detect/cli.cjs +13 -3
- package/scripts/lib/install/converters/cursor.cjs +11 -19
- package/scripts/lib/install/installer.cjs +72 -21
- package/scripts/lib/install/merge.cjs +31 -3
- package/scripts/lib/install/parse-runtime-models.cjs +9 -1
- package/scripts/lib/install/runtime-artifact-layout.cjs +42 -8
- package/scripts/lib/manifest/harnesses.json +29 -1
- package/scripts/lib/manifest/skills.json +1 -1
- package/scripts/lib/model-id.cjs +141 -0
- package/scripts/lib/session-runner/index.ts +87 -16
- package/scripts/skill-templates/bandit-reset/SKILL.md +2 -0
- package/scripts/skill-templates/bandit-status/SKILL.md +4 -1
- package/scripts/skill-templates/darkmode/SKILL.md +1 -1
- package/scripts/skill-templates/graphify/SKILL.md +6 -6
- package/scripts/skill-templates/quick/SKILL.md +3 -1
- package/scripts/skill-templates/reflect/SKILL.md +1 -1
- package/scripts/skill-templates/router/SKILL.md +4 -2
- package/sdk/cli/index.js +132 -55
- package/sdk/dashboard/data/source.cjs +50 -4
- package/sdk/event-stream/writer.ts +112 -30
- package/sdk/mcp/gdd-mcp/server.js +49 -36
- package/sdk/mcp/gdd-mcp/tools/shared.ts +20 -2
- package/sdk/mcp/gdd-state/server.js +107 -41
- package/sdk/primitives/lockfile.cjs +26 -5
- package/sdk/state/index.ts +91 -17
- package/sdk/state/lockfile.ts +47 -8
- package/skills/bandit-reset/SKILL.md +2 -0
- package/skills/bandit-status/SKILL.md +4 -1
- package/skills/darkmode/SKILL.md +1 -1
- package/skills/graphify/SKILL.md +6 -6
- package/skills/quick/SKILL.md +3 -1
- package/skills/reflect/SKILL.md +1 -1
- package/skills/router/SKILL.md +4 -2
|
@@ -25,6 +25,32 @@ const DEFAULT_FILE = path.join(REPO_ROOT, 'reference', 'mcp-budget.default.json'
|
|
|
25
25
|
|
|
26
26
|
const TRACKED_TOOL_RE = /^mcp__.*use_(figma|paper|pencil)$/;
|
|
27
27
|
|
|
28
|
+
// Bounded fallback window (ms) for counting volume when no session id is
|
|
29
|
+
// available on the payload. Without this, `total_calls` would count every row
|
|
30
|
+
// ever appended to the ledger — so after `max_calls_per_task` cumulative calls
|
|
31
|
+
// across ALL sessions for the lifetime of the file, every mutation is blocked
|
|
32
|
+
// forever (and a BLOCKER is appended to STATE.md each time). The volume gate is
|
|
33
|
+
// meant to be PER-TASK; this window keeps the fallback path per-task-ish so a
|
|
34
|
+
// long-lived user is never permanently locked out.
|
|
35
|
+
const SESSIONLESS_WINDOW_MS = 6 * 60 * 60 * 1000; // 6 hours
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Resolve the current session id from the hook payload (Claude Code passes
|
|
39
|
+
* `session_id`; tolerate `sessionId`), falling back to GDD_SESSION_ID, else
|
|
40
|
+
* null. A non-null id makes the volume window exact (count only this session's
|
|
41
|
+
* rows); null falls back to the bounded time window.
|
|
42
|
+
*
|
|
43
|
+
* @param {any} payload
|
|
44
|
+
* @returns {string|null}
|
|
45
|
+
*/
|
|
46
|
+
function resolveSessionId(payload) {
|
|
47
|
+
const fromPayload = payload && (payload.session_id || payload.sessionId);
|
|
48
|
+
if (typeof fromPayload === 'string' && fromPayload.length > 0) return fromPayload;
|
|
49
|
+
const fromEnv = process.env.GDD_SESSION_ID;
|
|
50
|
+
if (typeof fromEnv === 'string' && fromEnv.length > 0) return fromEnv;
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
|
|
28
54
|
function loadBudget(cwd) {
|
|
29
55
|
let defaults = { max_calls_per_task: 30, max_consecutive_timeouts: 3, reset_on_success: true };
|
|
30
56
|
try {
|
|
@@ -106,7 +132,25 @@ function classifyOutcome(toolResponse) {
|
|
|
106
132
|
return 'error';
|
|
107
133
|
}
|
|
108
134
|
|
|
109
|
-
|
|
135
|
+
/**
|
|
136
|
+
* Read the ledger and compute the prior volume + consecutive-timeout state
|
|
137
|
+
* for the CURRENT task window only — not the whole-file lifetime.
|
|
138
|
+
*
|
|
139
|
+
* Window membership for a row:
|
|
140
|
+
* - If a current session id is known AND the row carries a `session` field:
|
|
141
|
+
* the row counts iff `row.session === sessionId`.
|
|
142
|
+
* - Otherwise (sessionless harness/tests, or legacy rows without `session`):
|
|
143
|
+
* the row counts iff its timestamp is within SESSIONLESS_WINDOW_MS of now.
|
|
144
|
+
*
|
|
145
|
+
* This bounds the volume count so a long-lived ledger can never permanently
|
|
146
|
+
* trip `volumeBreak`, while keeping rapid same-task calls (the common case and
|
|
147
|
+
* the existing test scenario) counted together.
|
|
148
|
+
*
|
|
149
|
+
* @param {string} filePath
|
|
150
|
+
* @param {string|null} sessionId
|
|
151
|
+
* @param {number} nowMs
|
|
152
|
+
*/
|
|
153
|
+
function readJsonlTail(filePath, sessionId, nowMs) {
|
|
110
154
|
if (!fs.existsSync(filePath)) return { lastRow: null, total_calls: 0, consecutive_timeouts: 0 };
|
|
111
155
|
let total = 0;
|
|
112
156
|
let lastTimeoutsChain = 0;
|
|
@@ -118,6 +162,25 @@ function readJsonlTail(filePath) {
|
|
|
118
162
|
if (!t) continue;
|
|
119
163
|
let row;
|
|
120
164
|
try { row = JSON.parse(t); } catch { continue; }
|
|
165
|
+
|
|
166
|
+
// Decide whether this row belongs to the current task window.
|
|
167
|
+
let inWindow;
|
|
168
|
+
if (sessionId !== null && typeof row.session === 'string' && row.session.length > 0) {
|
|
169
|
+
inWindow = row.session === sessionId;
|
|
170
|
+
} else {
|
|
171
|
+
const rowMs = typeof row.ts === 'string' ? Date.parse(row.ts) : NaN;
|
|
172
|
+
// Unparseable timestamps fall back to "in window" so we never
|
|
173
|
+
// under-count; a malformed-ts row is treated as recent.
|
|
174
|
+
inWindow = Number.isNaN(rowMs) ? true : (nowMs - rowMs) <= SESSIONLESS_WINDOW_MS;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (!inWindow) {
|
|
178
|
+
// Out-of-window rows reset the streak — a new task/session must not
|
|
179
|
+
// inherit a stale consecutive-timeout chain.
|
|
180
|
+
lastTimeoutsChain = 0;
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
|
|
121
184
|
total++;
|
|
122
185
|
if (row.outcome === 'timeout') lastTimeoutsChain++;
|
|
123
186
|
else lastTimeoutsChain = 0;
|
|
@@ -158,7 +221,9 @@ async function main() {
|
|
|
158
221
|
const budget = loadBudget(cwd);
|
|
159
222
|
const ledgerPath = path.join(cwd, '.design', 'telemetry', 'mcp-budget.jsonl');
|
|
160
223
|
|
|
161
|
-
const
|
|
224
|
+
const sessionId = resolveSessionId(payload);
|
|
225
|
+
const nowMs = Date.now();
|
|
226
|
+
const prior = readJsonlTail(ledgerPath, sessionId, nowMs);
|
|
162
227
|
const outcome = classifyOutcome(payload?.tool_response);
|
|
163
228
|
const total_calls = prior.total_calls + 1;
|
|
164
229
|
const consecutive_timeouts = outcome === 'timeout'
|
|
@@ -166,12 +231,16 @@ async function main() {
|
|
|
166
231
|
: (budget.reset_on_success && outcome === 'success' ? 0 : prior.consecutive_timeouts);
|
|
167
232
|
|
|
168
233
|
const row = {
|
|
169
|
-
ts: new Date().toISOString(),
|
|
234
|
+
ts: new Date(nowMs).toISOString(),
|
|
170
235
|
tool,
|
|
171
236
|
outcome,
|
|
172
237
|
consecutive_timeouts,
|
|
173
238
|
total_calls,
|
|
174
239
|
};
|
|
240
|
+
// Stamp the session id so future calls can scope the volume window exactly.
|
|
241
|
+
// Omitted when unknown (keeps the row schema stable for the sessionless path,
|
|
242
|
+
// which relies on the time window instead).
|
|
243
|
+
if (sessionId !== null) row.session = sessionId;
|
|
175
244
|
appendJsonl(ledgerPath, row);
|
|
176
245
|
|
|
177
246
|
const timeoutBreak = consecutive_timeouts >= budget.max_consecutive_timeouts;
|
|
@@ -57,17 +57,21 @@ function detectHarness() {
|
|
|
57
57
|
}
|
|
58
58
|
|
|
59
59
|
// ---------------------------------------------------------------------------
|
|
60
|
-
//
|
|
60
|
+
// Event emit (best-effort) — delegate to the shared _hook-emit helper, which
|
|
61
|
+
// uses the SDK writer when loadable (modern Node) and an inline JSONL appender
|
|
62
|
+
// otherwise. The previous direct `require('../sdk/event-stream')` resolved to
|
|
63
|
+
// the `.ts` ESM index and threw under plain `node` on Node 22.0–22.17, leaving
|
|
64
|
+
// recap.emitted permanently no-op'd. emitEvent lands the line on every Node.
|
|
61
65
|
// ---------------------------------------------------------------------------
|
|
62
66
|
|
|
63
|
-
function
|
|
67
|
+
function getEmitEvent() {
|
|
64
68
|
try {
|
|
65
|
-
const m = require('
|
|
66
|
-
if (m && typeof m.
|
|
69
|
+
const m = require('./_hook-emit.js');
|
|
70
|
+
if (m && typeof m.emitEvent === 'function') return m.emitEvent;
|
|
67
71
|
} catch {
|
|
68
|
-
/* swallow —
|
|
72
|
+
/* swallow — telemetry is optional infrastructure */
|
|
69
73
|
}
|
|
70
|
-
return function
|
|
74
|
+
return function noopEmit(_ev) {
|
|
71
75
|
/* no-op */
|
|
72
76
|
};
|
|
73
77
|
}
|
|
@@ -87,9 +91,12 @@ function readStateMd(paths) {
|
|
|
87
91
|
}
|
|
88
92
|
|
|
89
93
|
const frontmatter = {};
|
|
90
|
-
|
|
94
|
+
// Tolerate CRLF line endings — the STATE.md mutator preserves CRLF, so a
|
|
95
|
+
// strict `\n`-only anchor fails to match the frontmatter block on Windows
|
|
96
|
+
// checkouts and the recap silently reports an empty cycle/decisions diff.
|
|
97
|
+
const fmMatch = body.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n/);
|
|
91
98
|
if (fmMatch) {
|
|
92
|
-
for (const line of fmMatch[1].split(
|
|
99
|
+
for (const line of fmMatch[1].split(/\r?\n/)) {
|
|
93
100
|
const m = line.match(/^(\w+):\s*(.+)$/);
|
|
94
101
|
if (m) frontmatter[m[1]] = m[2].trim();
|
|
95
102
|
}
|
|
@@ -273,9 +280,9 @@ async function main() {
|
|
|
273
280
|
}
|
|
274
281
|
|
|
275
282
|
// Best-effort event emit.
|
|
276
|
-
const
|
|
283
|
+
const emitEvent = getEmitEvent();
|
|
277
284
|
try {
|
|
278
|
-
|
|
285
|
+
emitEvent({
|
|
279
286
|
type: 'recap.emitted',
|
|
280
287
|
timestamp: new Date().toISOString(),
|
|
281
288
|
sessionId: process.env.GDD_SESSION_ID || 'sessionstart-hook',
|
|
@@ -300,9 +307,11 @@ async function main() {
|
|
|
300
307
|
process.exit(0);
|
|
301
308
|
}
|
|
302
309
|
|
|
303
|
-
try
|
|
304
|
-
|
|
305
|
-
|
|
310
|
+
// `main` is async: a sync try/catch cannot observe a rejected promise, so a
|
|
311
|
+
// throw inside an `await` boundary would escape as an unhandled rejection and
|
|
312
|
+
// exit non-zero — violating the silent-exit-0 contract for SessionStart hooks.
|
|
313
|
+
// Attach `.catch` so every failure mode is swallowed and we exit 0.
|
|
314
|
+
main().catch((err) => {
|
|
306
315
|
try {
|
|
307
316
|
process.stderr.write(
|
|
308
317
|
'[gdd-sessionstart-recap] uncaught: ' +
|
|
@@ -313,4 +322,4 @@ try {
|
|
|
313
322
|
/* swallow */
|
|
314
323
|
}
|
|
315
324
|
process.exit(0);
|
|
316
|
-
}
|
|
325
|
+
});
|
package/hooks/hooks.json
CHANGED
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
],
|
|
46
46
|
"PreToolUse": [
|
|
47
47
|
{
|
|
48
|
-
"matcher": "Agent",
|
|
48
|
+
"matcher": "Task|Agent",
|
|
49
49
|
"hooks": [
|
|
50
50
|
{
|
|
51
51
|
"type": "command",
|
|
@@ -119,7 +119,7 @@
|
|
|
119
119
|
]
|
|
120
120
|
},
|
|
121
121
|
{
|
|
122
|
-
"matcher": "Agent",
|
|
122
|
+
"matcher": "Task|Agent",
|
|
123
123
|
"hooks": [
|
|
124
124
|
{
|
|
125
125
|
"type": "command",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hegemonart/get-design-done",
|
|
3
|
-
"version": "1.59.
|
|
3
|
+
"version": "1.59.9",
|
|
4
4
|
"description": "A design-quality pipeline for AI coding agents: brief, explore, plan, design, and verify UI work against your design system.",
|
|
5
5
|
"author": "Hegemon",
|
|
6
6
|
"homepage": "https://github.com/hegemonart/get-design-done",
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
},
|
|
11
11
|
"license": "MIT",
|
|
12
12
|
"engines": {
|
|
13
|
-
"node": ">=22"
|
|
13
|
+
"node": ">=22.6.0"
|
|
14
14
|
},
|
|
15
15
|
"files": [
|
|
16
16
|
".claude-plugin/",
|
|
@@ -10,7 +10,7 @@ description: Bandit posterior + production-integration shim cheat sheet - signat
|
|
|
10
10
|
|
|
11
11
|
**Phase 27.5 (v1.27.5).** Reference for the bandit production-integration surface. Authoring or modifying a caller of the bandit posterior? Debugging a routing decision at the code level? Start here.
|
|
12
12
|
|
|
13
|
-
For ops-level guidance (when bandit fires, how to disable, posterior inspection),
|
|
13
|
+
For ops-level guidance (when bandit fires, how to disable, posterior inspection), use the read-only diagnostic surfaces: `/gdd:bandit-status` (per-arm posterior snapshots) and `/gdd:bandit-reset` (confirm-then-reset). The `adaptive_mode` gate below covers enable/disable.
|
|
14
14
|
|
|
15
15
|
In-scope modules:
|
|
16
16
|
|
|
@@ -104,6 +104,17 @@ Phase 27.5 passes `wallTimeMs: 0` always (D-08 unchanged from Phase 23.5).
|
|
|
104
104
|
|
|
105
105
|
---
|
|
106
106
|
|
|
107
|
+
## Where adaptive routing actually learns
|
|
108
|
+
|
|
109
|
+
This is a deliberate design boundary, not a bug - read it before assuming the bandit "learns" in every runtime.
|
|
110
|
+
|
|
111
|
+
- **The posterior is updated only on the SDK / headless path.** `recordOutcome` (the learning update that moves `alpha`/`beta`) is called from `scripts/lib/session-runner/index.ts` after a session terminates. That path runs in the SDK / headless `session-runner` execution model. It is the only place a reward is folded back into the posterior.
|
|
112
|
+
- **In interactive Claude Code with `adaptive_mode: full`, the bandit samples but does not currently learn from in-session outcomes.** When a plugin/interactive run consults the bandit, `consultBandit` performs a Thompson sample from the *configured priors* (and whatever the SDK path has already written), and `pull()` bumps `last_used` + `count` - but no `recordOutcome` fires from an interactive Claude Code hook, so the success/fail posterior does not move within the interactive session. With an un-seeded posterior, sampling therefore reflects the informed `TIER_PRIOR` (which leans toward the higher tiers, e.g. opus). Wiring `recordOutcome` into an interactive hook is intentionally out of scope for this phase.
|
|
113
|
+
- **`adaptive_mode` defaults to `static` - the feature is opt-in.** Per `scripts/lib/adaptive-mode.cjs`, the default mode is `static`, in which the bandit is fully silent (no reads, no writes) and `default-tier:` is authoritative. Adaptive routing only engages when an operator explicitly sets `adaptive_mode: full` in `.design/budget.json`.
|
|
114
|
+
- **Contextual dimensions are supplied by the caller, not inferred here.** The `bin` (glob-count bucket via `binForGlobCount`) and `delegate` dimensions are passed in at the call site; the router does not derive them from ambient session state.
|
|
115
|
+
|
|
116
|
+
Net: enable `adaptive_mode: full` and run the SDK/headless `session-runner` path to accumulate a posterior that genuinely reflects observed outcomes. In interactive Claude Code, `full` mode gives you prior-driven Thompson sampling, not in-session reinforcement.
|
|
117
|
+
|
|
107
118
|
## `adaptive_mode` gate semantics
|
|
108
119
|
|
|
109
120
|
Phase 23.5 ladder (D-07):
|
|
@@ -154,7 +165,7 @@ Phase 27.5 wires these consumers:
|
|
|
154
165
|
|
|
155
166
|
## Cross-references
|
|
156
167
|
|
|
157
|
-
- `
|
|
168
|
+
- `/gdd:bandit-status` + `/gdd:bandit-reset` - read-only operator surfaces (when bandit fires, posterior inspection, reset). Disable/enable is the `adaptive_mode` gate in `.design/budget.json` (see above).
|
|
158
169
|
- `reference/peer-protocols.md` - Phase 27 ACP/ASP cheat sheet (peer-CLI delegation transport).
|
|
159
170
|
- `scripts/lib/bandit-router.cjs` - Phase 23.5 primitives surface.
|
|
160
171
|
- `scripts/lib/bandit-router/integration.cjs` - Phase 27.5 production shim.
|
|
@@ -13,6 +13,17 @@
|
|
|
13
13
|
| claude-sonnet-4-7 | sonnet | 3.00 | 15.00 | 0.30 |
|
|
14
14
|
| claude-sonnet-4-6 | sonnet | 3.00 | 15.00 | 0.30 |
|
|
15
15
|
| claude-opus-4-7 | opus | 15.00 | 75.00 | 1.50 |
|
|
16
|
+
| claude-opus-4-8 | opus | 15.00 | 75.00 | 1.50 |
|
|
17
|
+
|
|
18
|
+
> **>200k-input (1M-context) pricing note.** The rates above are the
|
|
19
|
+
> standard (≤200k-input) per-1M-token prices. Anthropic's flagship
|
|
20
|
+
> `claude-opus-4-8` ships a 1M-context (`[1m]`) variant; long-context
|
|
21
|
+
> requests above the 200k-input threshold may be billed at a higher
|
|
22
|
+
> tiered rate. This table tracks only the standard tier today; the
|
|
23
|
+
> >200k tiered figure will be added as a separate row/column once
|
|
24
|
+
> Anthropic publishes it. The parser is positional-by-header and
|
|
25
|
+
> tolerates right-edge columns, so a future `>200k_input_per_1m`
|
|
26
|
+
> column can be appended without breaking cost lookups.
|
|
16
27
|
|
|
17
28
|
## size_budget → conservative token ranges
|
|
18
29
|
|
|
@@ -44,21 +44,21 @@ Anthropic's first-party runtime. Public tier docs at https://docs.anthropic.com/
|
|
|
44
44
|
{
|
|
45
45
|
"id": "claude",
|
|
46
46
|
"tier_to_model": {
|
|
47
|
-
"opus": { "model": "claude-opus-4-
|
|
48
|
-
"sonnet": { "model": "claude-sonnet-4-6" },
|
|
49
|
-
"haiku": { "model": "claude-haiku-4-5" }
|
|
47
|
+
"opus": { "model": "claude-opus-4-8", "context_window": 1000000 },
|
|
48
|
+
"sonnet": { "model": "claude-sonnet-4-6", "context_window": 200000 },
|
|
49
|
+
"haiku": { "model": "claude-haiku-4-5", "context_window": 200000 }
|
|
50
50
|
},
|
|
51
51
|
"reasoning_class_to_model": {
|
|
52
|
-
"high": { "model": "claude-opus-4-
|
|
53
|
-
"medium": { "model": "claude-sonnet-4-6" },
|
|
54
|
-
"low": { "model": "claude-haiku-4-5" }
|
|
52
|
+
"high": { "model": "claude-opus-4-8", "context_window": 1000000 },
|
|
53
|
+
"medium": { "model": "claude-sonnet-4-6", "context_window": 200000 },
|
|
54
|
+
"low": { "model": "claude-haiku-4-5", "context_window": 200000 }
|
|
55
55
|
},
|
|
56
56
|
"provenance": [
|
|
57
57
|
{
|
|
58
58
|
"source_url": "https://docs.anthropic.com/en/docs/about-claude/models",
|
|
59
|
-
"retrieved_at": "2026-
|
|
60
|
-
"last_validated_cycle": "2026-
|
|
61
|
-
"note": "Anthropic public model catalog — first-party runtime."
|
|
59
|
+
"retrieved_at": "2026-06-10T00:00:00.000Z",
|
|
60
|
+
"last_validated_cycle": "2026-06-10-v1.59.9",
|
|
61
|
+
"note": "Anthropic public model catalog — first-party runtime. Opus tier moved to claude-opus-4-8 (1M-context [1m] variant) this cycle."
|
|
62
62
|
}
|
|
63
63
|
]
|
|
64
64
|
}
|
|
@@ -1115,6 +1115,10 @@ export interface ModelRow {
|
|
|
1115
1115
|
* Optional internal/provider model ID for runtimes whose API identifiers differ from the public name (D-03).
|
|
1116
1116
|
*/
|
|
1117
1117
|
provider_model_id?: string;
|
|
1118
|
+
/**
|
|
1119
|
+
* Optional context-window size (max tokens) for this model, recorded as machine-readable metadata. The Anthropic opus tier ships the 1M-context [1m] variant (1000000). Recorded as fact this cycle; not yet a budgeting driver (deferred — no consumer is wired).
|
|
1120
|
+
*/
|
|
1121
|
+
context_window?: number;
|
|
1118
1122
|
}
|
|
1119
1123
|
|
|
1120
1124
|
export type RuntimeModelsSchema = RuntimeModelsTierToModelMap;
|
|
@@ -121,6 +121,11 @@
|
|
|
121
121
|
"type": "string",
|
|
122
122
|
"minLength": 1,
|
|
123
123
|
"description": "Optional internal/provider model ID for runtimes whose API identifiers differ from the public name (D-03)."
|
|
124
|
+
},
|
|
125
|
+
"context_window": {
|
|
126
|
+
"type": "integer",
|
|
127
|
+
"minimum": 1,
|
|
128
|
+
"description": "Optional context-window size (max tokens) for this model, recorded as machine-readable metadata. The Anthropic opus tier ships the 1M-context [1m] variant (1000000). Recorded as fact this cycle; not yet a budgeting driver (deferred — no consumer is wired)."
|
|
124
129
|
}
|
|
125
130
|
}
|
|
126
131
|
}
|
package/scripts/bootstrap.cjs
CHANGED
|
@@ -148,6 +148,14 @@ function filesEqual(a, b) {
|
|
|
148
148
|
}
|
|
149
149
|
}
|
|
150
150
|
|
|
151
|
+
/**
|
|
152
|
+
* Network timeout (ms) for the git clone/pull. SessionStart hooks must never
|
|
153
|
+
* block the harness: without a timeout, a hung network connection would stall
|
|
154
|
+
* the whole session-start sequence indefinitely. spawnSync kills the child
|
|
155
|
+
* with `killSignal` once this elapses and reports it as a failure.
|
|
156
|
+
*/
|
|
157
|
+
const GIT_TIMEOUT_MS = 15000;
|
|
158
|
+
|
|
151
159
|
/**
|
|
152
160
|
* Match the .sh `clone_or_update`:
|
|
153
161
|
* - target/.git exists → `git -C target pull --quiet --ff-only`, log on fail
|
|
@@ -157,8 +165,14 @@ function filesEqual(a, b) {
|
|
|
157
165
|
* We invoke the `git` CLI directly via spawnSync. spawnSync('git', …) is fine —
|
|
158
166
|
* the prohibition is on spawnSync('bash', …).
|
|
159
167
|
*
|
|
168
|
+
* Returns true ONLY when the repo is in a good post-condition (pull/clone
|
|
169
|
+
* succeeded, or a pre-existing non-git dir we intentionally skip). Returns
|
|
170
|
+
* false when a network op failed or timed out — so the caller can withhold the
|
|
171
|
+
* success marker and retry next session instead of recording failure as done.
|
|
172
|
+
*
|
|
160
173
|
* @param {string} repoUrl
|
|
161
174
|
* @param {string} target
|
|
175
|
+
* @returns {boolean} success
|
|
162
176
|
*/
|
|
163
177
|
function cloneOrUpdate(repoUrl, target) {
|
|
164
178
|
let isGitCheckout = false;
|
|
@@ -177,16 +191,22 @@ function cloneOrUpdate(repoUrl, target) {
|
|
|
177
191
|
const r = spawnSync('git', ['-C', target, 'pull', '--quiet', '--ff-only'], {
|
|
178
192
|
stdio: ['ignore', 'ignore', 'ignore'],
|
|
179
193
|
windowsHide: true,
|
|
194
|
+
timeout: GIT_TIMEOUT_MS,
|
|
195
|
+
killSignal: 'SIGKILL',
|
|
180
196
|
});
|
|
181
197
|
if (r.error || r.status !== 0) {
|
|
182
|
-
|
|
198
|
+
const why = r.error && r.error.code === 'ETIMEDOUT' ? 'timed out' : 'failed';
|
|
199
|
+
log(`pull ${why} for ${target} (continuing)`);
|
|
200
|
+
return false;
|
|
183
201
|
}
|
|
184
|
-
return;
|
|
202
|
+
return true;
|
|
185
203
|
}
|
|
186
204
|
|
|
187
205
|
if (targetExists) {
|
|
188
206
|
log(`${target} exists and is not a git checkout — skipping`);
|
|
189
|
-
|
|
207
|
+
// A pre-existing non-git dir is a stable post-condition, not a failure:
|
|
208
|
+
// re-running won't change it, so don't force a retry every session.
|
|
209
|
+
return true;
|
|
190
210
|
}
|
|
191
211
|
|
|
192
212
|
// Defense in depth: refuse repoUrl / target arguments that look like git
|
|
@@ -196,7 +216,7 @@ function cloneOrUpdate(repoUrl, target) {
|
|
|
196
216
|
if (typeof repoUrl !== 'string' || repoUrl.startsWith('-') ||
|
|
197
217
|
typeof target !== 'string' || target.startsWith('-')) {
|
|
198
218
|
log(`refusing suspicious clone args for ${repoUrl} -> ${target}`);
|
|
199
|
-
return;
|
|
219
|
+
return false;
|
|
200
220
|
}
|
|
201
221
|
|
|
202
222
|
log(`cloning ${repoUrl} -> ${target}`);
|
|
@@ -205,10 +225,15 @@ function cloneOrUpdate(repoUrl, target) {
|
|
|
205
225
|
const r = spawnSync('git', ['clone', '--quiet', '--depth', '1', '--', repoUrl, target], {
|
|
206
226
|
stdio: ['ignore', 'ignore', 'ignore'],
|
|
207
227
|
windowsHide: true,
|
|
228
|
+
timeout: GIT_TIMEOUT_MS,
|
|
229
|
+
killSignal: 'SIGKILL',
|
|
208
230
|
});
|
|
209
231
|
if (r.error || r.status !== 0) {
|
|
210
|
-
|
|
232
|
+
const why = r.error && r.error.code === 'ETIMEDOUT' ? 'timed out' : 'failed';
|
|
233
|
+
log(`clone ${why} for ${repoUrl}`);
|
|
234
|
+
return false;
|
|
211
235
|
}
|
|
236
|
+
return true;
|
|
212
237
|
}
|
|
213
238
|
|
|
214
239
|
/**
|
|
@@ -315,7 +340,7 @@ function run(opts = {}) {
|
|
|
315
340
|
}
|
|
316
341
|
|
|
317
342
|
// Required library: VoltAgent/awesome-design-md.
|
|
318
|
-
cloneOrUpdate(
|
|
343
|
+
const repoOk = cloneOrUpdate(
|
|
319
344
|
'https://github.com/VoltAgent/awesome-design-md.git',
|
|
320
345
|
ctx.awesomeRepoTarget
|
|
321
346
|
);
|
|
@@ -332,8 +357,15 @@ function run(opts = {}) {
|
|
|
332
357
|
// Phase 10.1: .design/budget.json + .design/telemetry/ (D-12).
|
|
333
358
|
ensureDesignDir(cwd);
|
|
334
359
|
|
|
335
|
-
// Record success
|
|
336
|
-
|
|
360
|
+
// Record success ONLY when the network provisioning actually succeeded.
|
|
361
|
+
// Writing the marker unconditionally records a failed clone as "done" and
|
|
362
|
+
// never retries — leaving the required library permanently absent. Gating on
|
|
363
|
+
// repoOk means a transient network failure/timeout is retried next session.
|
|
364
|
+
if (repoOk) {
|
|
365
|
+
copyManifestToMarker(ctx.manifest, ctx.marker);
|
|
366
|
+
} else {
|
|
367
|
+
log('skipping success marker — provisioning incomplete, will retry next session');
|
|
368
|
+
}
|
|
337
369
|
|
|
338
370
|
return 0;
|
|
339
371
|
}
|
package/scripts/install.cjs
CHANGED
|
@@ -211,6 +211,28 @@ async function main() {
|
|
|
211
211
|
}
|
|
212
212
|
runtimes = picked.runtimes;
|
|
213
213
|
if (picked.location) location = picked.location;
|
|
214
|
+
} else if (uninstall) {
|
|
215
|
+
// B4 fix (Phase 59.8): bare `--uninstall` in a non-TTY context must NOT
|
|
216
|
+
// silently default to removing claude. The interactive path is the only
|
|
217
|
+
// safe way to pick what to remove without an explicit flag; in non-TTY
|
|
218
|
+
// we refuse and require an explicit runtime flag so a scripted/CI
|
|
219
|
+
// invocation can never destroy an install the operator didn't name.
|
|
220
|
+
// (See the comment at shouldUseInteractive: bare --uninstall is meant to
|
|
221
|
+
// trigger the interactive select-which-to-remove flow.)
|
|
222
|
+
process.stderr.write(
|
|
223
|
+
[
|
|
224
|
+
'Refusing to uninstall: no runtime specified and not running in an',
|
|
225
|
+
'interactive terminal.',
|
|
226
|
+
'',
|
|
227
|
+
'Re-run with an explicit runtime flag, e.g.:',
|
|
228
|
+
' npx @hegemonart/get-design-done --uninstall --claude',
|
|
229
|
+
' npx @hegemonart/get-design-done --uninstall --all',
|
|
230
|
+
'',
|
|
231
|
+
'Run with --help to list available runtime flags.',
|
|
232
|
+
'',
|
|
233
|
+
].join('\n'),
|
|
234
|
+
);
|
|
235
|
+
process.exit(2);
|
|
214
236
|
} else {
|
|
215
237
|
// Non-TTY zero-flag fallback: back-compat with v1.23.5 behaviour.
|
|
216
238
|
runtimes = ['claude'];
|
|
@@ -359,7 +381,7 @@ async function maybeNudgePeerCli({ flags }) {
|
|
|
359
381
|
'✓ Detected peer CLIs: ' + detectedDisplay,
|
|
360
382
|
'',
|
|
361
383
|
'gdd v1.27.0 introduced optional peer-CLI delegation. With your',
|
|
362
|
-
'
|
|
384
|
+
"agents' frontmatter `delegate_to:` set, gdd can route specific",
|
|
363
385
|
'roles through these peer CLIs (cost or quality wins per Phase 23.5',
|
|
364
386
|
'bandit). You can change this anytime via .design/config.json.',
|
|
365
387
|
'',
|
|
@@ -38,7 +38,9 @@
|
|
|
38
38
|
* - The `prior_class` value is persisted on the arm so subsequent
|
|
39
39
|
* reads + decay calculations preserve it (forward-compat).
|
|
40
40
|
*
|
|
41
|
-
* Atomic .tmp + rename
|
|
41
|
+
* Atomic per-pid-unique .tmp + rename (Phase 59-8 C2: unique tmp name per
|
|
42
|
+
* process so parallel waves never interleave writes on one scratch file).
|
|
43
|
+
* Discounted Thompson via per-arm time-decay
|
|
42
44
|
* factor `rho^days_since_last_use` applied at sample time, not stored.
|
|
43
45
|
*
|
|
44
46
|
* Reward computation (D-06): two-stage lexicographic — UNCHANGED.
|
|
@@ -57,6 +59,17 @@ const path = require('node:path');
|
|
|
57
59
|
const DEFAULT_POSTERIOR_PATH = '.design/telemetry/posterior.json';
|
|
58
60
|
const SCHEMA_VERSION = '1.0.0';
|
|
59
61
|
|
|
62
|
+
// C2 fix (Phase 59-8): monotonic per-process counter for tmp-file naming.
|
|
63
|
+
// Combined with process.pid it guarantees that two concurrent writers — even
|
|
64
|
+
// within the same process, even firing in the same millisecond — never target
|
|
65
|
+
// the same `.tmp` path. The old fixed `p + '.tmp'` name let parallel agent
|
|
66
|
+
// waves interleave partial writes on one tmp file, producing truncated JSON
|
|
67
|
+
// that loadPosterior() then silently reset to an empty posterior (losing all
|
|
68
|
+
// learned arms). Unique tmp + atomic rename makes a half-written file
|
|
69
|
+
// invisible to readers: rename is atomic on the same filesystem, so a reader
|
|
70
|
+
// sees either the old complete file or the new complete file, never a partial.
|
|
71
|
+
let _tmpCounter = 0;
|
|
72
|
+
|
|
60
73
|
// Decay factor — 60-day half-life.
|
|
61
74
|
const DEFAULT_DECAY = 0.988;
|
|
62
75
|
|
|
@@ -136,6 +149,12 @@ function loadPosterior(opts = {}) {
|
|
|
136
149
|
}
|
|
137
150
|
return data;
|
|
138
151
|
} catch {
|
|
152
|
+
// Corrupt-JSON recovery (preserved, Phase 59-8 C2): fall back to an empty
|
|
153
|
+
// posterior. With the per-pid unique-tmp + atomic-rename write discipline
|
|
154
|
+
// (see savePosterior), a reader can no longer observe a half-written file
|
|
155
|
+
// — rename publishes the complete file in one step — so this branch should
|
|
156
|
+
// now only fire on genuine on-disk corruption (e.g. external truncation),
|
|
157
|
+
// not on a write/read race during a parallel agent wave.
|
|
139
158
|
return { schema_version: SCHEMA_VERSION, generated_at: new Date().toISOString(), arms: [] };
|
|
140
159
|
}
|
|
141
160
|
}
|
|
@@ -159,9 +178,19 @@ function savePosterior(posterior, opts = {}) {
|
|
|
159
178
|
const p = resolvePath(opts);
|
|
160
179
|
fs.mkdirSync(path.dirname(p), { recursive: true });
|
|
161
180
|
posterior.generated_at = new Date().toISOString();
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
181
|
+
// C2 fix (Phase 59-8): per-process-unique tmp name (pid + monotonic
|
|
182
|
+
// counter) so concurrent writers never collide on the same scratch file.
|
|
183
|
+
// The atomic rename then publishes the fully-written file in one step.
|
|
184
|
+
const tmp = `${p}.${process.pid}.${_tmpCounter++}.tmp`;
|
|
185
|
+
try {
|
|
186
|
+
fs.writeFileSync(tmp, JSON.stringify(posterior, null, 2));
|
|
187
|
+
fs.renameSync(tmp, p);
|
|
188
|
+
} catch (err) {
|
|
189
|
+
// Best-effort cleanup of the orphaned tmp on failure so a crashed
|
|
190
|
+
// write never leaves stale scratch files behind. ENOENT is fine.
|
|
191
|
+
try { fs.unlinkSync(tmp); } catch { /* already gone */ }
|
|
192
|
+
throw err;
|
|
193
|
+
}
|
|
165
194
|
return p;
|
|
166
195
|
}
|
|
167
196
|
|
|
@@ -347,7 +376,20 @@ function decayArm(arm, opts = {}) {
|
|
|
347
376
|
const factor = Math.pow(decay, days);
|
|
348
377
|
// Decay shrinks both α and β toward the prior. We never go below the
|
|
349
378
|
// initial prior strength — caller can rebuild a fresh prior via reset().
|
|
350
|
-
|
|
379
|
+
//
|
|
380
|
+
// C1 fix (Phase 59-8): decay MUST target the SAME prior the arm was
|
|
381
|
+
// bootstrapped with. The arm persists `prior_class` (Phase 29 Plan 06 /
|
|
382
|
+
// D-04), so pass it through to priorFor — otherwise a promoted-incubator
|
|
383
|
+
// arm (Beta(2,8)) would drift back toward the informed TIER_PRIOR while
|
|
384
|
+
// idle, undoing the D-04 preferential-selection suppression. Default-class
|
|
385
|
+
// arms have no `prior_class` field, so `arm.prior_class` is undefined and
|
|
386
|
+
// priorFor falls through to the Phase 23.5 informed prior (byte-for-byte
|
|
387
|
+
// unchanged).
|
|
388
|
+
const { alpha: pa, beta: pb } = priorFor(
|
|
389
|
+
arm.tier,
|
|
390
|
+
opts.strength ?? PRIOR_STRENGTH,
|
|
391
|
+
arm.prior_class,
|
|
392
|
+
);
|
|
351
393
|
return {
|
|
352
394
|
alpha: pa + factor * Math.max(0, arm.alpha - pa),
|
|
353
395
|
beta: pb + factor * Math.max(0, arm.beta - pb),
|
|
@@ -52,6 +52,8 @@
|
|
|
52
52
|
const fs = require('node:fs');
|
|
53
53
|
const path = require('node:path');
|
|
54
54
|
|
|
55
|
+
const { normalizeModelId, tierForModelId } = require('./model-id.cjs');
|
|
56
|
+
|
|
55
57
|
const REPO_ROOT_GUESS = path.resolve(__dirname, '..', '..');
|
|
56
58
|
const DEFAULT_RUNTIME_ID = 'claude';
|
|
57
59
|
const VALID_TIERS = Object.freeze(['opus', 'sonnet', 'haiku']);
|
|
@@ -326,10 +328,18 @@ function computeCost(args, opts) {
|
|
|
326
328
|
tokens_out: Number(args.tokens_out || 0),
|
|
327
329
|
cache_hit: args.cache_hit === true,
|
|
328
330
|
};
|
|
331
|
+
// Normalize the model id (strip a trailing `[1m]`/`[200k]` variant suffix)
|
|
332
|
+
// BEFORE table lookup so e.g. `claude-opus-4-8[1m]` matches the
|
|
333
|
+
// `claude-opus-4-8` row. The variant encodes a context-window SKU; the
|
|
334
|
+
// current price tables are keyed on the base id.
|
|
335
|
+
const rawModelId = typeof args.model_id === 'string' && args.model_id.length > 0
|
|
336
|
+
? args.model_id
|
|
337
|
+
: null;
|
|
338
|
+
const normalizedModelId = rawModelId !== null
|
|
339
|
+
? (normalizeModelId(rawModelId).base || rawModelId)
|
|
340
|
+
: null;
|
|
329
341
|
const q = {
|
|
330
|
-
model_id:
|
|
331
|
-
? args.model_id
|
|
332
|
-
: null,
|
|
342
|
+
model_id: normalizedModelId,
|
|
333
343
|
tier: typeof args.tier === 'string' && args.tier.length > 0
|
|
334
344
|
? args.tier
|
|
335
345
|
: null,
|
|
@@ -365,14 +375,33 @@ function computeCost(args, opts) {
|
|
|
365
375
|
}
|
|
366
376
|
}
|
|
367
377
|
|
|
368
|
-
// Branch 5: nothing matched.
|
|
378
|
+
// Branch 5: nothing matched. Rather than silently returning a null cost
|
|
379
|
+
// (which downstream aggregators treat as $0 — a frontier model billed as
|
|
380
|
+
// free), compute a CONSERVATIVE CEILING at the OPUS rate from the claude
|
|
381
|
+
// price table. An unknown/new model is thus priced LOUDLY (cost_estimated)
|
|
382
|
+
// and CONSERVATIVELY (opus ceiling), never $0 and never the sonnet rate.
|
|
383
|
+
const reason = rows.length === 0 ? 'runtime_table_missing' : 'model_not_found';
|
|
384
|
+
const claudeRows = loadPriceTable(DEFAULT_RUNTIME_ID, opts);
|
|
385
|
+
const opusRow = findPriceRow(claudeRows, { tier: 'opus' });
|
|
386
|
+
if (opusRow !== null) {
|
|
387
|
+
return {
|
|
388
|
+
cost_usd: applyFormula(opusRow, tokens),
|
|
389
|
+
model: normalizedModelId,
|
|
390
|
+
tier: 'opus',
|
|
391
|
+
runtime_used: DEFAULT_RUNTIME_ID,
|
|
392
|
+
fallback: true,
|
|
393
|
+
reason,
|
|
394
|
+
cost_estimated: true,
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
// Even the opus row is unavailable → genuinely cannot price. Keep null.
|
|
369
398
|
return {
|
|
370
399
|
cost_usd: null,
|
|
371
400
|
model: null,
|
|
372
401
|
tier: q.tier,
|
|
373
402
|
runtime_used: null,
|
|
374
403
|
fallback: false,
|
|
375
|
-
reason
|
|
404
|
+
reason,
|
|
376
405
|
};
|
|
377
406
|
}
|
|
378
407
|
|