@hegemonart/get-design-done 1.32.0 → 1.33.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +57 -0
- package/NOTICE +43 -5
- package/README.md +13 -0
- package/package.json +4 -2
- package/reference/gdd-runtime-audit.md +111 -0
- package/reference/gdd-threat-model.md +336 -0
- package/reference/registry.json +14 -0
- package/reference/schemas/pressure-scenario.schema.json +69 -0
- package/scripts/lib/peer-cli/acp-client.cjs +9 -1
- package/scripts/lib/peer-cli/asp-client.cjs +10 -1
- package/scripts/lib/peer-cli/sanitize-env.cjs +198 -0
- package/scripts/lib/redact.cjs +20 -1
- package/scripts/lib/skill-behavior/runner.cjs +187 -0
- package/scripts/lib/skill-behavior/stub-invoker.cjs +95 -0
- package/scripts/lib/skill-behavior/telemetry.cjs +379 -0
- package/scripts/lib/transports/ws.cjs +67 -3
- package/sdk/mcp/gdd-state/schemas/add_blocker.schema.json +2 -0
- package/sdk/mcp/gdd-state/schemas/add_decision.schema.json +1 -0
- package/sdk/mcp/gdd-state/schemas/add_must_have.schema.json +1 -0
- package/sdk/mcp/gdd-state/schemas/checkpoint.schema.json +1 -0
- package/sdk/mcp/gdd-state/schemas/frontmatter_update.schema.json +1 -1
- package/sdk/mcp/gdd-state/schemas/get.schema.json +2 -1
- package/sdk/mcp/gdd-state/schemas/probe_connections.schema.json +2 -0
- package/sdk/mcp/gdd-state/schemas/resolve_blocker.schema.json +1 -0
- package/sdk/mcp/gdd-state/server.js +137 -48
- package/sdk/mcp/gdd-state/tools/add_blocker.ts +2 -0
- package/sdk/mcp/gdd-state/tools/add_decision.ts +2 -0
- package/sdk/mcp/gdd-state/tools/add_must_have.ts +2 -0
- package/sdk/mcp/gdd-state/tools/checkpoint.ts +2 -0
- package/sdk/mcp/gdd-state/tools/frontmatter_update.ts +2 -0
- package/sdk/mcp/gdd-state/tools/get.ts +2 -0
- package/sdk/mcp/gdd-state/tools/probe_connections.ts +2 -0
- package/sdk/mcp/gdd-state/tools/resolve_blocker.ts +2 -0
- package/sdk/mcp/gdd-state/tools/set_status.ts +2 -0
- package/sdk/mcp/gdd-state/tools/shared.ts +117 -7
- package/sdk/mcp/gdd-state/tools/transition_stage.ts +2 -0
- package/sdk/mcp/gdd-state/tools/update_progress.ts +2 -0
- package/scripts/lib/cli/index.ts +0 -29
- package/scripts/lib/error-classifier.cjs +0 -29
- package/scripts/lib/event-stream/index.ts +0 -29
- package/scripts/lib/gdd-errors/index.ts +0 -29
- package/scripts/lib/gdd-state/index.ts +0 -29
- package/scripts/lib/iteration-budget.cjs +0 -29
- package/scripts/lib/jittered-backoff.cjs +0 -29
- package/scripts/lib/lockfile.cjs +0 -29
- package/scripts/mcp-servers/gdd-mcp/server.ts +0 -35
- package/scripts/mcp-servers/gdd-state/server.ts +0 -34
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* telemetry.cjs — reflector-telemetry layer for the pressure-scenario harness
|
|
3
|
+
* (Plan 33-05). The third leg of Phase 33: it CONSUMES the 33-01 runner result
|
|
4
|
+
* ({ scenario, target_skill, pass, compliance_hits, violation_hits }), records a
|
|
5
|
+
* scenario-failure event to a JSONL artifact, detects SUSTAINED failure, and on
|
|
6
|
+
* sustained failure produces a PROPOSE-ONLY reflector content-edit draft via the
|
|
7
|
+
* same incubator/apply-reflections surface the shipped reflector-kfm-proposer
|
|
8
|
+
* uses.
|
|
9
|
+
*
|
|
10
|
+
* Why this module exists: behavior tests only matter if a sustained failure
|
|
11
|
+
* prompts a content fix. This closes that loop — a failing run is recorded; when
|
|
12
|
+
* a scenario fails ≥3 of its last 10 runs (D-07 threshold), the reflector
|
|
13
|
+
* proposes a skill-content edit for human review via /gdd:apply-reflections. The
|
|
14
|
+
* proposal NEVER auto-edits a skill (Phase 11/29 propose-only SC; Phase 33
|
|
15
|
+
* out-of-scope: "Auto-applying reflector-proposed skill edits — propose-only").
|
|
16
|
+
*
|
|
17
|
+
* Decisions honored:
|
|
18
|
+
* * D-07 — telemetry → .design/telemetry/skill-behavior.jsonl (runtime
|
|
19
|
+
* artifact, gitignored, local); sustained-failure signal = ≥3 of the last 10
|
|
20
|
+
* runs failing for a scenario; reflector consumption is STUB-tested (no live
|
|
21
|
+
* runs — all paths + the clock are injectable so tests use a tmp dir).
|
|
22
|
+
* * D-06 — this module is exercised by the DEFAULT suite (no API key / no LLM).
|
|
23
|
+
*
|
|
24
|
+
* Injectability / purity:
|
|
25
|
+
* The JSONL path, the incubator root, `fs`, and the clock (`now`) are ALL
|
|
26
|
+
* injectable via opts so every test writes to an os.tmpdir() dir and NOTHING
|
|
27
|
+
* touches the real .design/ tree. The runner (33-01) does NOT stamp a `ts`;
|
|
28
|
+
* the timestamp is stamped HERE via the injected `now`.
|
|
29
|
+
*
|
|
30
|
+
* Pattern references (style mirrored, NOT imported):
|
|
31
|
+
* * scripts/lib/event-chain.cjs — house JSONL append (defensive mkdir -p +
|
|
32
|
+
* append, never-throw) + findRepoRoot + line-by-line read idiom.
|
|
33
|
+
* * scripts/lib/reflector-kfm-proposer.cjs — shouldPropose-style stability gate
|
|
34
|
+
* + proposeKfmDraft writing a proposal-only draft under
|
|
35
|
+
* .design/reflections/incubator/<slug>/CATALOGUE-ENTRY.md.
|
|
36
|
+
*
|
|
37
|
+
* Public API:
|
|
38
|
+
* recordRun(result, opts) → event | null (append on pass:false)
|
|
39
|
+
* readRuns(scenario, opts) → Array<event> (tail JSONL, filter)
|
|
40
|
+
* isSustainedFailure(scenario, opts) → boolean (≥3 of last 10 failed)
|
|
41
|
+
* maybeProposeReflection(scenario, opts) → { action:'drafted', path, slug }
|
|
42
|
+
* | { action:'skipped', reason }
|
|
43
|
+
*
|
|
44
|
+
* Pure CommonJS, deps = node:fs + node:path ONLY. No npm dependencies.
|
|
45
|
+
*/
|
|
46
|
+
|
|
47
|
+
'use strict';
|
|
48
|
+
|
|
49
|
+
const nodeFs = require('node:fs');
|
|
50
|
+
const path = require('node:path');
|
|
51
|
+
|
|
52
|
+
// -------------------------------------------------------------------
|
|
53
|
+
// Constants
|
|
54
|
+
// -------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
const EVENT_TYPE = 'skill_behavior_failure';
|
|
57
|
+
const DEFAULT_JSONL_REL = '.design/telemetry/skill-behavior.jsonl';
|
|
58
|
+
const DEFAULT_INCUBATOR_REL = '.design/reflections/incubator';
|
|
59
|
+
const SUSTAINED_WINDOW = 10; // D-07: look at the last N runs
|
|
60
|
+
const SUSTAINED_THRESHOLD = 3; // D-07: ≥3 failures of the last 10 == sustained
|
|
61
|
+
const INCUBATOR_PREFIX = 'skill-edit-';
|
|
62
|
+
|
|
63
|
+
// -------------------------------------------------------------------
|
|
64
|
+
// Helpers
|
|
65
|
+
// -------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Walk up from a start dir until a package.json is found (repo root). Mirrors
|
|
69
|
+
* the reflector-kfm-proposer / event-chain findRepoRoot idiom.
|
|
70
|
+
*
|
|
71
|
+
* @param {string} [startDir]
|
|
72
|
+
* @returns {string}
|
|
73
|
+
*/
|
|
74
|
+
function findRepoRoot(startDir) {
|
|
75
|
+
let dir = startDir || __dirname;
|
|
76
|
+
for (let i = 0; i < 12; i++) {
|
|
77
|
+
if (nodeFs.existsSync(path.join(dir, 'package.json'))) return dir;
|
|
78
|
+
const parent = path.dirname(dir);
|
|
79
|
+
if (parent === dir) break;
|
|
80
|
+
dir = parent;
|
|
81
|
+
}
|
|
82
|
+
return path.resolve(__dirname, '..', '..', '..');
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Resolve the JSONL emit path: explicit opts.jsonlPath wins (absolute or
|
|
87
|
+
* relative to cwd); otherwise <repoRoot>/.design/telemetry/skill-behavior.jsonl.
|
|
88
|
+
*/
|
|
89
|
+
function resolveJsonlPath(opts) {
|
|
90
|
+
const o = opts || {};
|
|
91
|
+
if (o.jsonlPath) {
|
|
92
|
+
return path.isAbsolute(o.jsonlPath)
|
|
93
|
+
? o.jsonlPath
|
|
94
|
+
: path.resolve(o.repoRoot || process.cwd(), o.jsonlPath);
|
|
95
|
+
}
|
|
96
|
+
return path.join(o.repoRoot || findRepoRoot(), DEFAULT_JSONL_REL);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Resolve the incubator draft root: explicit opts.incubatorRoot wins; otherwise
|
|
101
|
+
* <repoRoot>/.design/reflections/incubator.
|
|
102
|
+
*/
|
|
103
|
+
function resolveIncubatorRoot(opts) {
|
|
104
|
+
const o = opts || {};
|
|
105
|
+
if (o.incubatorRoot) {
|
|
106
|
+
return path.isAbsolute(o.incubatorRoot)
|
|
107
|
+
? o.incubatorRoot
|
|
108
|
+
: path.resolve(o.repoRoot || process.cwd(), o.incubatorRoot);
|
|
109
|
+
}
|
|
110
|
+
return path.join(o.repoRoot || findRepoRoot(), DEFAULT_INCUBATOR_REL);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Kebab-case slug from a free-text scenario name (mirrors the reflector-kfm
|
|
115
|
+
* deriveSlug semantics — ASCII-only, dash-collapsed, ≤40 chars).
|
|
116
|
+
*/
|
|
117
|
+
function deriveSlug(text) {
|
|
118
|
+
const raw = typeof text === 'string' ? text : '';
|
|
119
|
+
let s = raw.toLowerCase();
|
|
120
|
+
s = s.replace(/[^\x20-\x7e]+/g, '');
|
|
121
|
+
s = s.replace(/[^a-z0-9]+/g, '-');
|
|
122
|
+
s = s.replace(/-+/g, '-');
|
|
123
|
+
s = s.replace(/^-+|-+$/g, '');
|
|
124
|
+
if (s.length > 40) s = s.slice(0, 40);
|
|
125
|
+
s = s.replace(/-+$/g, '');
|
|
126
|
+
return s || 'unnamed';
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// -------------------------------------------------------------------
|
|
130
|
+
// recordRun — emit a scenario-failure event to the JSONL artifact
|
|
131
|
+
// -------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Append ONE scenario-failure event to the JSONL artifact when a 33-01 runner
|
|
135
|
+
* result has pass:false. The timestamp is stamped HERE via the injected clock
|
|
136
|
+
* (the runner does not emit a `ts`). On a passing result, returns null (the
|
|
137
|
+
* sustained-failure detector reads failures only).
|
|
138
|
+
*
|
|
139
|
+
* Never throws on a missing .design/ tree — mkdir -p the parent defensively and
|
|
140
|
+
* swallow write errors (mirrors event-chain.cjs).
|
|
141
|
+
*
|
|
142
|
+
* EVENT SHAPE:
|
|
143
|
+
* { event_type:'skill_behavior_failure', scenario, target_skill?, pass:false,
|
|
144
|
+
* compliance_hits, violation_hits, ts }
|
|
145
|
+
*
|
|
146
|
+
* @param {{ scenario:string, target_skill?:string, pass:boolean,
|
|
147
|
+
* compliance_hits?:number, violation_hits?:number }} result
|
|
148
|
+
* @param {{ jsonlPath?:string, fs?:typeof import('node:fs'),
|
|
149
|
+
* now?:() => number|string, repoRoot?:string }} [opts]
|
|
150
|
+
* @returns {object | null} the appended event, or null on a passing result
|
|
151
|
+
*/
|
|
152
|
+
function recordRun(result, opts) {
|
|
153
|
+
const o = opts || {};
|
|
154
|
+
const fs = o.fs || nodeFs;
|
|
155
|
+
const now = typeof o.now === 'function' ? o.now : () => new Date().toISOString();
|
|
156
|
+
|
|
157
|
+
if (!result || typeof result !== 'object') return null;
|
|
158
|
+
// Detector reads FAILURES only — a passing run emits nothing.
|
|
159
|
+
if (result.pass !== false) return null;
|
|
160
|
+
|
|
161
|
+
const event = {
|
|
162
|
+
event_type: EVENT_TYPE,
|
|
163
|
+
scenario: result.scenario,
|
|
164
|
+
pass: false,
|
|
165
|
+
compliance_hits: Number.isFinite(result.compliance_hits) ? result.compliance_hits : 0,
|
|
166
|
+
violation_hits: Number.isFinite(result.violation_hits) ? result.violation_hits : 0,
|
|
167
|
+
ts: now(),
|
|
168
|
+
};
|
|
169
|
+
// Preserve target_skill when the runner supplied it (useful for the proposal).
|
|
170
|
+
if (result.target_skill !== undefined) event.target_skill = result.target_skill;
|
|
171
|
+
|
|
172
|
+
const jsonlPath = resolveJsonlPath(o);
|
|
173
|
+
try {
|
|
174
|
+
fs.mkdirSync(path.dirname(jsonlPath), { recursive: true });
|
|
175
|
+
fs.appendFileSync(jsonlPath, JSON.stringify(event) + '\n', { flag: 'a' });
|
|
176
|
+
} catch (err) {
|
|
177
|
+
// Defensive: telemetry must never crash a run. Mirror event-chain.cjs.
|
|
178
|
+
try {
|
|
179
|
+
process.stderr.write(
|
|
180
|
+
`[skill-behavior-telemetry] write failed: ${err && err.message ? err.message : String(err)}\n`,
|
|
181
|
+
);
|
|
182
|
+
} catch (_e) {
|
|
183
|
+
/* swallow */
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return event;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// -------------------------------------------------------------------
|
|
190
|
+
// readRuns — tail the JSONL, filter by scenario
|
|
191
|
+
// -------------------------------------------------------------------
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Read the JSONL artifact and return every recorded event for `scenario`, in
|
|
195
|
+
* file order (oldest → newest). Defensive on a missing file: returns []. Invalid
|
|
196
|
+
* JSON lines are skipped.
|
|
197
|
+
*
|
|
198
|
+
* @param {string} scenario
|
|
199
|
+
* @param {{ jsonlPath?:string, fs?:typeof import('node:fs'), repoRoot?:string }} [opts]
|
|
200
|
+
* @returns {Array<object>}
|
|
201
|
+
*/
|
|
202
|
+
function readRuns(scenario, opts) {
|
|
203
|
+
const o = opts || {};
|
|
204
|
+
const fs = o.fs || nodeFs;
|
|
205
|
+
const jsonlPath = resolveJsonlPath(o);
|
|
206
|
+
if (!fs.existsSync(jsonlPath)) return [];
|
|
207
|
+
|
|
208
|
+
let raw;
|
|
209
|
+
try {
|
|
210
|
+
raw = fs.readFileSync(jsonlPath, 'utf8');
|
|
211
|
+
} catch (_e) {
|
|
212
|
+
return [];
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
const out = [];
|
|
216
|
+
for (const line of raw.split('\n')) {
|
|
217
|
+
if (line.trim() === '') continue;
|
|
218
|
+
let rec;
|
|
219
|
+
try {
|
|
220
|
+
rec = JSON.parse(line);
|
|
221
|
+
} catch (_e) {
|
|
222
|
+
continue; // skip malformed line
|
|
223
|
+
}
|
|
224
|
+
if (rec && rec.scenario === scenario) out.push(rec);
|
|
225
|
+
}
|
|
226
|
+
return out;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// -------------------------------------------------------------------
|
|
230
|
+
// isSustainedFailure — ≥3 of the last 10 runs failed for a scenario (D-07)
|
|
231
|
+
// -------------------------------------------------------------------
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Sustained-failure detector. Considers the LAST 10 runs for `scenario` and
|
|
235
|
+
* returns true iff ≥3 of them failed (D-07). Accepts EITHER an in-memory
|
|
236
|
+
* opts.window (array of `{ pass }` objects — for unit tests) OR reads the
|
|
237
|
+
* on-disk JSONL tail via readRuns().
|
|
238
|
+
*
|
|
239
|
+
* Boundary: 2/10 → false, 3/10 → true; strictly windowed to the last 10 (older
|
|
240
|
+
* failures excluded).
|
|
241
|
+
*
|
|
242
|
+
* Note: recordRun only persists FAILURE events, so the on-disk path counts each
|
|
243
|
+
* recorded row as a failure. The in-memory window path inspects `pass` so tests
|
|
244
|
+
* can mix pass/fail entries to exercise the windowing math precisely.
|
|
245
|
+
*
|
|
246
|
+
* @param {string} scenario
|
|
247
|
+
* @param {{ window?:Array<{pass:boolean}>, jsonlPath?:string,
|
|
248
|
+
* fs?:typeof import('node:fs'), window_size?:number,
|
|
249
|
+
* threshold?:number, repoRoot?:string }} [opts]
|
|
250
|
+
* @returns {boolean}
|
|
251
|
+
*/
|
|
252
|
+
function isSustainedFailure(scenario, opts) {
|
|
253
|
+
const o = opts || {};
|
|
254
|
+
const windowSize = Number.isInteger(o.window_size) && o.window_size > 0 ? o.window_size : SUSTAINED_WINDOW;
|
|
255
|
+
const threshold = Number.isInteger(o.threshold) && o.threshold > 0 ? o.threshold : SUSTAINED_THRESHOLD;
|
|
256
|
+
|
|
257
|
+
let runs;
|
|
258
|
+
if (Array.isArray(o.window)) {
|
|
259
|
+
runs = o.window;
|
|
260
|
+
} else {
|
|
261
|
+
runs = readRuns(scenario, o);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Strictly the LAST `windowSize` runs.
|
|
265
|
+
const tail = runs.slice(-windowSize);
|
|
266
|
+
// A row counts as a failure when pass === false. On-disk rows are all failures
|
|
267
|
+
// (recordRun only persists pass:false), so a missing `pass` defaults to failed
|
|
268
|
+
// for the disk path; the in-memory window always carries an explicit `pass`.
|
|
269
|
+
const failures = tail.filter((r) => r && r.pass !== true).length;
|
|
270
|
+
return failures >= threshold;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// -------------------------------------------------------------------
|
|
274
|
+
// maybeProposeReflection — propose-only reflector content-edit draft
|
|
275
|
+
// -------------------------------------------------------------------
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Reflector consumption point (mirrors reflector-kfm-proposer's shouldPropose +
|
|
279
|
+
* proposeKfmDraft idiom): gate on isSustainedFailure(scenario); if NOT sustained
|
|
280
|
+
* return { action:'skipped', reason:'below_sustained_threshold' }; if sustained,
|
|
281
|
+
* write a PROPOSE-ONLY draft under the (injectable) incubator root at
|
|
282
|
+
* <incubatorRoot>/skill-edit-<scenario>/CATALOGUE-ENTRY.md naming the failing
|
|
283
|
+
* scenario/skill + the sustained-failure signal + a TODO for the content edit,
|
|
284
|
+
* and return { action:'drafted', path, slug }.
|
|
285
|
+
*
|
|
286
|
+
* This draft lands in the SAME incubator tree that
|
|
287
|
+
* scripts/lib/apply-reflections/incubator-proposals.cjs surfaces in
|
|
288
|
+
* /gdd:apply-reflections — so a maintainer reviews + accepts/rejects the proposed
|
|
289
|
+
* skill edit there. It NEVER auto-edits a skill (Phase 11/29 propose-only SC;
|
|
290
|
+
* Phase 33 out-of-scope).
|
|
291
|
+
*
|
|
292
|
+
* @param {string} scenario
|
|
293
|
+
* @param {{ window?:Array<{pass:boolean}>, jsonlPath?:string,
|
|
294
|
+
* incubatorRoot?:string, fs?:typeof import('node:fs'),
|
|
295
|
+
* now?:() => number|string, target_skill?:string,
|
|
296
|
+
* repoRoot?:string }} [opts]
|
|
297
|
+
* @returns {{ action:'drafted', path:string, slug:string }
|
|
298
|
+
* | { action:'skipped', reason:string }}
|
|
299
|
+
*/
|
|
300
|
+
function maybeProposeReflection(scenario, opts) {
|
|
301
|
+
const o = opts || {};
|
|
302
|
+
const fs = o.fs || nodeFs;
|
|
303
|
+
const now = typeof o.now === 'function' ? o.now : () => new Date().toISOString();
|
|
304
|
+
|
|
305
|
+
// Stability gate — the ≥3/10 sustained-failure threshold (analogous to the
|
|
306
|
+
// reflector-kfm ≥K gate).
|
|
307
|
+
if (!isSustainedFailure(scenario, o)) {
|
|
308
|
+
return { action: 'skipped', reason: 'below_sustained_threshold' };
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
const slug = `${INCUBATOR_PREFIX}${deriveSlug(scenario)}`;
|
|
312
|
+
const incubatorRoot = resolveIncubatorRoot(o);
|
|
313
|
+
const draftDir = path.join(incubatorRoot, slug);
|
|
314
|
+
const draftPath = path.join(draftDir, 'CATALOGUE-ENTRY.md');
|
|
315
|
+
|
|
316
|
+
// Best-effort target_skill: prefer an injected hint, else the latest recorded
|
|
317
|
+
// failure event for this scenario (recordRun stamps target_skill).
|
|
318
|
+
let targetSkill = o.target_skill;
|
|
319
|
+
if (!targetSkill && !Array.isArray(o.window)) {
|
|
320
|
+
const recorded = readRuns(scenario, o);
|
|
321
|
+
const last = recorded.length ? recorded[recorded.length - 1] : null;
|
|
322
|
+
if (last && last.target_skill) targetSkill = last.target_skill;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
const body = [
|
|
326
|
+
`# Skill-edit proposal — ${scenario}`,
|
|
327
|
+
'',
|
|
328
|
+
`**Source:** skill-behavior-telemetry (pressure-scenario harness)`,
|
|
329
|
+
`**Failing scenario:** ${scenario}`,
|
|
330
|
+
`**Target skill:** ${targetSkill || 'TODO: <skill that failed under pressure>'}`,
|
|
331
|
+
`**Signal:** sustained failure — ≥${SUSTAINED_THRESHOLD} of the last ${SUSTAINED_WINDOW} runs failed (D-07).`,
|
|
332
|
+
'',
|
|
333
|
+
`Drafted ${now()}. **PROPOSE-ONLY** — review via \`/gdd:apply-reflections\`.`,
|
|
334
|
+
'This draft NEVER auto-edits a skill (Phase 11/29 propose-only SC; Phase 33 out-of-scope).',
|
|
335
|
+
'',
|
|
336
|
+
'## Rationalization signal',
|
|
337
|
+
'',
|
|
338
|
+
`The "${scenario}" pressure scenario is failing repeatedly: the target skill is`,
|
|
339
|
+
'not holding under pressure (an agent is rationalizing past its HARD-GATE /',
|
|
340
|
+
'rationalization table). A content edit is proposed to close the loophole.',
|
|
341
|
+
'',
|
|
342
|
+
'## Proposed content edit',
|
|
343
|
+
'',
|
|
344
|
+
`- TODO: identify which rationalization the "${scenario}" scenario exploits.`,
|
|
345
|
+
'- TODO: add / strengthen the counter-rationalization row in the target skill',
|
|
346
|
+
" (the '| Thought | Reality |' table) OR tighten its <HARD-GATE> wording.",
|
|
347
|
+
'- TODO: re-run `npm run test:behavior` for this scenario to confirm GREEN.',
|
|
348
|
+
'',
|
|
349
|
+
].join('\n');
|
|
350
|
+
|
|
351
|
+
try {
|
|
352
|
+
fs.mkdirSync(draftDir, { recursive: true });
|
|
353
|
+
fs.writeFileSync(draftPath, body);
|
|
354
|
+
} catch (err) {
|
|
355
|
+
// A draft-write failure must not crash the harness; surface as skipped.
|
|
356
|
+
return { action: 'skipped', reason: `draft_write_failed: ${err && err.message ? err.message : String(err)}` };
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
return { action: 'drafted', path: draftPath, slug };
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// -------------------------------------------------------------------
|
|
363
|
+
// Exports
|
|
364
|
+
// -------------------------------------------------------------------
|
|
365
|
+
|
|
366
|
+
module.exports = {
|
|
367
|
+
recordRun,
|
|
368
|
+
readRuns,
|
|
369
|
+
isSustainedFailure,
|
|
370
|
+
maybeProposeReflection,
|
|
371
|
+
// Exposed for tests / higher-level integration.
|
|
372
|
+
EVENT_TYPE,
|
|
373
|
+
DEFAULT_JSONL_REL,
|
|
374
|
+
DEFAULT_INCUBATOR_REL,
|
|
375
|
+
SUSTAINED_WINDOW,
|
|
376
|
+
SUSTAINED_THRESHOLD,
|
|
377
|
+
_deriveSlug: deriveSlug,
|
|
378
|
+
_findRepoRoot: findRepoRoot,
|
|
379
|
+
};
|
|
@@ -24,7 +24,9 @@
|
|
|
24
24
|
'use strict';
|
|
25
25
|
|
|
26
26
|
const http = require('node:http');
|
|
27
|
+
const crypto = require('node:crypto');
|
|
27
28
|
const { readFileSync, existsSync } = require('node:fs');
|
|
29
|
+
const path = require('node:path');
|
|
28
30
|
const { probeOptional } = require('../probe-optional.cjs');
|
|
29
31
|
|
|
30
32
|
const ws = probeOptional('ws');
|
|
@@ -56,16 +58,62 @@ function* readEventsSync(path) {
|
|
|
56
58
|
}
|
|
57
59
|
}
|
|
58
60
|
|
|
61
|
+
/**
|
|
62
|
+
* Defensively read `.design/config.json`. Returns the parsed object or `{}`
|
|
63
|
+
* on ANY failure (missing file, bad JSON, read error) — NEVER throws. The
|
|
64
|
+
* transport must still start when no config is present, so this mirrors the
|
|
65
|
+
* house defensive-fs idiom.
|
|
66
|
+
*
|
|
67
|
+
* @returns {Record<string, any>}
|
|
68
|
+
*/
|
|
69
|
+
function readDesignConfig() {
|
|
70
|
+
try {
|
|
71
|
+
const cfgPath = path.join(process.cwd(), '.design', 'config.json');
|
|
72
|
+
if (!existsSync(cfgPath)) return {};
|
|
73
|
+
const parsed = JSON.parse(readFileSync(cfgPath, 'utf8'));
|
|
74
|
+
return parsed && typeof parsed === 'object' ? parsed : {};
|
|
75
|
+
} catch {
|
|
76
|
+
return {};
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Resolve the bind host once, before listen (D-04). Order:
|
|
82
|
+
* opts.host → env GDD_WS_BIND_HOST → .design/config.json#event_stream.bind_host → '127.0.0.1'
|
|
83
|
+
* The DEFAULT (no opt, no env, no config) is loopback only — remote bind is
|
|
84
|
+
* an explicit operator opt-in.
|
|
85
|
+
*
|
|
86
|
+
* @param {{ host?: unknown }} opts
|
|
87
|
+
* @returns {string}
|
|
88
|
+
*/
|
|
89
|
+
function resolveBindHost(opts) {
|
|
90
|
+
if (typeof opts.host === 'string' && opts.host.trim()) {
|
|
91
|
+
return opts.host.trim();
|
|
92
|
+
}
|
|
93
|
+
const envHost = process.env['GDD_WS_BIND_HOST'];
|
|
94
|
+
if (typeof envHost === 'string' && envHost.trim()) {
|
|
95
|
+
return envHost.trim();
|
|
96
|
+
}
|
|
97
|
+
const cfg = readDesignConfig();
|
|
98
|
+
const cfgHost =
|
|
99
|
+
cfg && cfg.event_stream && typeof cfg.event_stream.bind_host === 'string'
|
|
100
|
+
? cfg.event_stream.bind_host.trim()
|
|
101
|
+
: '';
|
|
102
|
+
if (cfgHost) return cfgHost;
|
|
103
|
+
return '127.0.0.1';
|
|
104
|
+
}
|
|
105
|
+
|
|
59
106
|
/**
|
|
60
107
|
* Start the WebSocket server. Returns a handle with `close()`.
|
|
61
108
|
*
|
|
62
109
|
* @param {{
|
|
63
110
|
* port: number,
|
|
64
111
|
* token: string,
|
|
112
|
+
* host?: string,
|
|
65
113
|
* tailFrom?: string,
|
|
66
114
|
* subscribe?: (handler: (ev: unknown) => void) => () => void,
|
|
67
115
|
* }} opts
|
|
68
|
-
* @returns {Promise<{close: () => void, port: number}>}
|
|
116
|
+
* @returns {Promise<{close: () => void, port: number, host: string}>}
|
|
69
117
|
*/
|
|
70
118
|
async function startServer(opts) {
|
|
71
119
|
if (typeof opts.port !== 'number' || !Number.isFinite(opts.port)) {
|
|
@@ -75,6 +123,9 @@ async function startServer(opts) {
|
|
|
75
123
|
throw new TypeError('startServer: token (string, ≥8 chars) required');
|
|
76
124
|
}
|
|
77
125
|
|
|
126
|
+
// Resolve the bind host once (D-04): default 127.0.0.1 (loopback only).
|
|
127
|
+
const host = resolveBindHost(opts);
|
|
128
|
+
|
|
78
129
|
const httpServer = http.createServer((_req, res) => {
|
|
79
130
|
res.statusCode = 426; // Upgrade Required
|
|
80
131
|
res.setHeader('Content-Type', 'text/plain');
|
|
@@ -109,7 +160,16 @@ async function startServer(opts) {
|
|
|
109
160
|
|
|
110
161
|
httpServer.on('upgrade', (req, socket, head) => {
|
|
111
162
|
const auth = req.headers['authorization'];
|
|
112
|
-
|
|
163
|
+
const expected = `Bearer ${opts.token}`;
|
|
164
|
+
// Constant-time compare (D-04, D-12 node:crypto built-in). The length
|
|
165
|
+
// pre-check is REQUIRED — timingSafeEqual throws on a length mismatch —
|
|
166
|
+
// and is acceptable here because the secret is the TOKEN bytes, not its
|
|
167
|
+
// length. A missing/short/mismatched token still yields the 401 close.
|
|
168
|
+
const ok =
|
|
169
|
+
typeof auth === 'string' &&
|
|
170
|
+
Buffer.byteLength(auth) === Buffer.byteLength(expected) &&
|
|
171
|
+
crypto.timingSafeEqual(Buffer.from(auth), Buffer.from(expected));
|
|
172
|
+
if (!ok) {
|
|
113
173
|
socket.write('HTTP/1.1 401 Unauthorized\r\nConnection: close\r\n\r\n');
|
|
114
174
|
socket.destroy();
|
|
115
175
|
return;
|
|
@@ -142,12 +202,16 @@ async function startServer(opts) {
|
|
|
142
202
|
|
|
143
203
|
await new Promise((resolve, reject) => {
|
|
144
204
|
httpServer.once('error', reject);
|
|
145
|
-
httpServer.listen(opts.port, () => resolve(undefined));
|
|
205
|
+
httpServer.listen(opts.port, host, () => resolve(undefined));
|
|
146
206
|
});
|
|
147
207
|
|
|
148
208
|
const addr = httpServer.address();
|
|
149
209
|
return {
|
|
150
210
|
port: typeof addr === 'object' && addr ? addr.port : opts.port,
|
|
211
|
+
host:
|
|
212
|
+
typeof addr === 'object' && addr && typeof addr.address === 'string'
|
|
213
|
+
? addr.address
|
|
214
|
+
: host,
|
|
151
215
|
close() {
|
|
152
216
|
try {
|
|
153
217
|
unsub();
|
|
@@ -15,10 +15,12 @@
|
|
|
15
15
|
"text": {
|
|
16
16
|
"type": "string",
|
|
17
17
|
"minLength": 1,
|
|
18
|
+
"maxLength": 8192,
|
|
18
19
|
"description": "Human-readable blocker description."
|
|
19
20
|
},
|
|
20
21
|
"stage": {
|
|
21
22
|
"type": "string",
|
|
23
|
+
"maxLength": 64,
|
|
22
24
|
"description": "Optional. Defaults to <position>.stage."
|
|
23
25
|
},
|
|
24
26
|
"date": {
|
|
@@ -13,7 +13,8 @@
|
|
|
13
13
|
"properties": {
|
|
14
14
|
"fields": {
|
|
15
15
|
"type": "array",
|
|
16
|
-
"
|
|
16
|
+
"maxItems": 64,
|
|
17
|
+
"items": { "type": "string", "minLength": 1, "maxLength": 256 },
|
|
17
18
|
"description": "Optional projection. When present, limit data.state to these top-level keys. Unknown keys are ignored (no error)."
|
|
18
19
|
}
|
|
19
20
|
}
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
"probe_results": {
|
|
16
16
|
"type": "array",
|
|
17
17
|
"minItems": 1,
|
|
18
|
+
"maxItems": 256,
|
|
18
19
|
"items": {
|
|
19
20
|
"type": "object",
|
|
20
21
|
"additionalProperties": false,
|
|
@@ -23,6 +24,7 @@
|
|
|
23
24
|
"name": {
|
|
24
25
|
"type": "string",
|
|
25
26
|
"minLength": 1,
|
|
27
|
+
"maxLength": 256,
|
|
26
28
|
"description": "Connection name (e.g. \"figma\", \"refero\")."
|
|
27
29
|
},
|
|
28
30
|
"status": {
|