@tekyzinc/gsd-t 3.22.11 → 3.23.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +96 -0
- package/README.md +1 -0
- package/bin/gsd-t.js +91 -1
- package/bin/journey-coverage-cli.cjs +130 -0
- package/bin/journey-coverage.cjs +347 -0
- package/docs/architecture.md +26 -0
- package/docs/requirements.md +22 -0
- package/package.json +1 -1
- package/scripts/gsd-t-transcript.html +34 -23
- package/scripts/hooks/gsd-t-conversation-capture.js +186 -5
- package/scripts/hooks/pre-commit-journey-coverage +81 -0
- package/templates/prompts/red-team-subagent.md +53 -0
|
@@ -16,16 +16,20 @@
|
|
|
16
16
|
* - Never throws to the caller — catches all errors, logs to stderr, exits 0.
|
|
17
17
|
* - `content` is capped at 16 KB per frame; over-cap writes `truncated: true`.
|
|
18
18
|
* - Append-only; never overwrites an existing in-session NDJSON file.
|
|
19
|
-
* - Project-dir discovery: prefers `GSD_T_PROJECT_DIR`, then
|
|
19
|
+
* - Project-dir discovery: prefers `GSD_T_PROJECT_DIR`, then decodes
|
|
20
|
+
* `payload.transcript_path`'s `~/.claude/projects/{slug}` to a real project
|
|
21
|
+
* root (session-specific signal — required when multiple parallel Claude
|
|
22
|
+
* Code sessions share one node-runtime hook process), then `payload.cwd`,
|
|
20
23
|
* then walks up from `process.cwd()` looking for `.gsd-t/progress.md`.
|
|
21
24
|
* Silent no-op if no project dir found.
|
|
22
25
|
*
|
|
23
|
-
* Contract: .gsd-t/contracts/conversation-capture-contract.md v1.
|
|
26
|
+
* Contract: .gsd-t/contracts/conversation-capture-contract.md v1.2.0
|
|
24
27
|
*/
|
|
25
28
|
|
|
26
29
|
const fs = require('fs');
|
|
27
30
|
const path = require('path');
|
|
28
31
|
const crypto = require('crypto');
|
|
32
|
+
const os = require('os');
|
|
29
33
|
|
|
30
34
|
const DEFAULT_SCRIPT_GUARD_MS = 5000;
|
|
31
35
|
const CONTENT_CAP_BYTES = 16 * 1024; // 16 KB
|
|
@@ -69,15 +73,102 @@ function _walkUpForProject(startDir) {
|
|
|
69
73
|
return null;
|
|
70
74
|
}
|
|
71
75
|
|
|
76
|
+
// Decode a Claude Code project slug (the directory name under
|
|
77
|
+
// `~/.claude/projects/`) back to an absolute project root that contains a
|
|
78
|
+
// `.gsd-t/` directory. The slug encoding is lossy — `/` and literal `-` both
|
|
79
|
+
// map to `-` — so we DFS-walk the filesystem, greedily consuming token runs as
|
|
80
|
+
// directory names. First match whose `.gsd-t/` exists wins. Returns null if
|
|
81
|
+
// nothing matches or the slug is malformed.
|
|
82
|
+
//
|
|
83
|
+
// Why this exists: Claude Code Stop hook payloads carry `transcript_path` =
|
|
84
|
+
// `~/.claude/projects/{slug}/{sessionId}.jsonl`. The hook runs as one shared
|
|
85
|
+
// node-runtime process across parallel Claude Code sessions, so `process.cwd()`
|
|
86
|
+
// can resolve to the wrong project. The slug is the only *session-specific*
|
|
87
|
+
// signal we have for project routing.
|
|
88
|
+
function _slugToProjectDir(slug) {
|
|
89
|
+
if (typeof slug !== 'string' || slug.length === 0) return null;
|
|
90
|
+
if (slug[0] !== '-') return null; // must encode leading '/'
|
|
91
|
+
// Reject anything that could traversal-escape after decode.
|
|
92
|
+
if (slug.includes('/') || slug.includes('\\') || slug.includes('\0')) return null;
|
|
93
|
+
if (slug.includes('..')) return null;
|
|
94
|
+
const tokens = slug.slice(1).split('-'); // strip leading '-' (the leading '/')
|
|
95
|
+
if (tokens.length === 0 || tokens.some((t) => t.length === 0)) return null;
|
|
96
|
+
// DFS over how many '-'-separated tokens form each directory segment.
|
|
97
|
+
// Greedy preference: try fewest tokens first (most '/' separators) so deeper
|
|
98
|
+
// paths win when both interpretations exist.
|
|
99
|
+
function walk(prefix, idx) {
|
|
100
|
+
if (idx >= tokens.length) {
|
|
101
|
+
try {
|
|
102
|
+
if (fs.existsSync(path.join(prefix, '.gsd-t'))) return prefix;
|
|
103
|
+
} catch (_) { /* swallow */ }
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
for (let k = 1; k <= tokens.length - idx; k++) {
|
|
107
|
+
const seg = tokens.slice(idx, idx + k).join('-');
|
|
108
|
+
const next = path.join(prefix, seg);
|
|
109
|
+
// Re-validate after path.join — defense against weird inputs.
|
|
110
|
+
if (next.includes('..')) continue;
|
|
111
|
+
let exists = false;
|
|
112
|
+
try { exists = fs.existsSync(next); } catch (_) { exists = false; }
|
|
113
|
+
if (!exists) continue;
|
|
114
|
+
const found = walk(next, idx + k);
|
|
115
|
+
if (found) return found;
|
|
116
|
+
}
|
|
117
|
+
return null;
|
|
118
|
+
}
|
|
119
|
+
return walk('/', 0);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Extract the slug ({dir-name} under `~/.claude/projects/`) from a
|
|
123
|
+
// transcript_path. Returns null on malformed input.
|
|
124
|
+
function _slugFromTranscriptPath(p) {
|
|
125
|
+
if (typeof p !== 'string' || !path.isAbsolute(p)) return null;
|
|
126
|
+
const home = process.env.HOME || os.homedir();
|
|
127
|
+
if (!home) return null;
|
|
128
|
+
const root = path.resolve(home, '.claude', 'projects') + path.sep;
|
|
129
|
+
const resolved = path.resolve(p);
|
|
130
|
+
if (!resolved.startsWith(root)) return null;
|
|
131
|
+
const rest = resolved.slice(root.length);
|
|
132
|
+
// First path segment after the projects/ root is the slug.
|
|
133
|
+
const sep = rest.indexOf(path.sep);
|
|
134
|
+
const slug = sep === -1 ? rest : rest.slice(0, sep);
|
|
135
|
+
if (!slug) return null;
|
|
136
|
+
return slug;
|
|
137
|
+
}
|
|
138
|
+
|
|
72
139
|
function _resolveProjectDir(payload) {
|
|
140
|
+
// 1. Explicit env override (preserved for tests + operator overrides).
|
|
73
141
|
const env = process.env.GSD_T_PROJECT_DIR;
|
|
74
142
|
if (env && fs.existsSync(path.join(env, '.gsd-t'))) return env;
|
|
143
|
+
// 2. Session-specific signal: decode the transcript_path slug. This is the
|
|
144
|
+
// ONLY source that's per-session under parallel Claude Code instances —
|
|
145
|
+
// cwd / walk-up resolve to whichever project the hook process happens to
|
|
146
|
+
// inherit, which misroutes frames across projects.
|
|
147
|
+
if (payload && typeof payload.transcript_path === 'string') {
|
|
148
|
+
const slug = _slugFromTranscriptPath(payload.transcript_path);
|
|
149
|
+
if (slug) {
|
|
150
|
+
const fromSlug = _slugToProjectDir(slug);
|
|
151
|
+
if (fromSlug) return fromSlug;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
// 3. payload.cwd (Claude Code may carry it on some events).
|
|
75
155
|
if (payload && typeof payload.cwd === 'string' && path.isAbsolute(payload.cwd)
|
|
76
156
|
&& fs.existsSync(path.join(payload.cwd, '.gsd-t'))) {
|
|
77
157
|
return payload.cwd;
|
|
78
158
|
}
|
|
159
|
+
// 4. Last resort: walk up from process.cwd(). Known-unreliable for parallel
|
|
160
|
+
// sessions sharing a node-runtime hook process — emit a one-line warning
|
|
161
|
+
// so misroutes are diagnosable from stderr.
|
|
79
162
|
const walked = _walkUpForProject(process.cwd());
|
|
80
|
-
if (walked)
|
|
163
|
+
if (walked) {
|
|
164
|
+
try {
|
|
165
|
+
process.stderr.write(
|
|
166
|
+
'gsd-t-conversation-capture: project-dir resolved via cwd walk-up (' +
|
|
167
|
+
walked + ') — unreliable for parallel sessions\n'
|
|
168
|
+
);
|
|
169
|
+
} catch (_) { /* noop */ }
|
|
170
|
+
return walked;
|
|
171
|
+
}
|
|
81
172
|
return null;
|
|
82
173
|
}
|
|
83
174
|
|
|
@@ -134,9 +225,93 @@ function _extractUserContent(payload) {
|
|
|
134
225
|
return null;
|
|
135
226
|
}
|
|
136
227
|
|
|
228
|
+
// Tail-read the last `bytes` of a file as UTF-8. Returns '' on any error.
|
|
229
|
+
// Used so multi-MB transcripts don't get fully loaded into RAM.
|
|
230
|
+
function _readFileTail(filePath, bytes) {
|
|
231
|
+
let fd = -1;
|
|
232
|
+
try {
|
|
233
|
+
const st = fs.statSync(filePath);
|
|
234
|
+
if (!st.isFile()) return '';
|
|
235
|
+
const size = st.size;
|
|
236
|
+
if (size === 0) return '';
|
|
237
|
+
const want = Math.min(bytes, size);
|
|
238
|
+
const start = size - want;
|
|
239
|
+
fd = fs.openSync(filePath, 'r');
|
|
240
|
+
const buf = Buffer.alloc(want);
|
|
241
|
+
fs.readSync(fd, buf, 0, want, start);
|
|
242
|
+
let str = buf.toString('utf8');
|
|
243
|
+
// If we sliced mid-line, drop the (possibly malformed) leading partial.
|
|
244
|
+
if (start > 0) {
|
|
245
|
+
const nl = str.indexOf('\n');
|
|
246
|
+
if (nl >= 0) str = str.slice(nl + 1);
|
|
247
|
+
}
|
|
248
|
+
return str;
|
|
249
|
+
} catch (_) {
|
|
250
|
+
return '';
|
|
251
|
+
} finally {
|
|
252
|
+
if (fd >= 0) { try { fs.closeSync(fd); } catch (_) { /* noop */ } }
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Validate `transcript_path` from a hook payload before reading. Stop hooks
|
|
257
|
+
// from Claude Code put the file under `~/.claude/projects/`; we lock to that
|
|
258
|
+
// to defeat path-traversal attempts (BUG-1 sanitizer pattern). Fail open
|
|
259
|
+
// (return null) on anything suspicious.
|
|
260
|
+
function _safeTranscriptPath(p) {
|
|
261
|
+
if (typeof p !== 'string' || p.length === 0) return null;
|
|
262
|
+
if (!path.isAbsolute(p)) return null;
|
|
263
|
+
const home = process.env.HOME || os.homedir();
|
|
264
|
+
if (!home) return null;
|
|
265
|
+
const allowedRoot = path.resolve(home, '.claude', 'projects') + path.sep;
|
|
266
|
+
const resolved = path.resolve(p);
|
|
267
|
+
if (!resolved.startsWith(allowedRoot)) return null;
|
|
268
|
+
return resolved;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Pull the assistant body out of a Claude Code transcript JSONL by scanning
|
|
272
|
+
// from the tail. Each line is one event; the latest `type === 'assistant'`
|
|
273
|
+
// row carries the message. Concatenate all `text`-type content blocks; ignore
|
|
274
|
+
// tool_use / tool_result / thinking blocks. Returns null if no assistant row
|
|
275
|
+
// is found or every candidate is body-less (tool_use only).
|
|
276
|
+
function _readAssistantFromTranscript(transcriptPath) {
|
|
277
|
+
const safe = _safeTranscriptPath(transcriptPath);
|
|
278
|
+
if (!safe) return null;
|
|
279
|
+
const tail = _readFileTail(safe, 64 * 1024);
|
|
280
|
+
if (!tail) return null;
|
|
281
|
+
const lines = tail.split('\n');
|
|
282
|
+
for (let i = lines.length - 1; i >= 0; i--) {
|
|
283
|
+
const line = lines[i];
|
|
284
|
+
if (!line) continue;
|
|
285
|
+
let row;
|
|
286
|
+
try { row = JSON.parse(line); } catch (_) { continue; }
|
|
287
|
+
if (!row || row.type !== 'assistant') continue;
|
|
288
|
+
// Skip subagent turns — orchestrator transcripts record both, but only
|
|
289
|
+
// the orchestrator's own assistant turn belongs in this in-session file.
|
|
290
|
+
if (row.isSidechain === true) continue;
|
|
291
|
+
const msg = row.message;
|
|
292
|
+
if (!msg) continue;
|
|
293
|
+
const blocks = msg.content;
|
|
294
|
+
if (typeof blocks === 'string') return blocks;
|
|
295
|
+
if (!Array.isArray(blocks)) continue;
|
|
296
|
+
const texts = [];
|
|
297
|
+
for (const b of blocks) {
|
|
298
|
+
if (b && b.type === 'text' && typeof b.text === 'string') texts.push(b.text);
|
|
299
|
+
}
|
|
300
|
+
if (texts.length === 0) continue; // tool_use-only turn — keep scanning
|
|
301
|
+
return texts.join('');
|
|
302
|
+
}
|
|
303
|
+
return null;
|
|
304
|
+
}
|
|
305
|
+
|
|
137
306
|
function _extractAssistantContent(payload) {
|
|
138
|
-
//
|
|
139
|
-
//
|
|
307
|
+
// PRIMARY: Claude Code Stop hook payload carries `transcript_path` to the
|
|
308
|
+
// orchestrator's JSONL. Read the most recent assistant row from the tail.
|
|
309
|
+
if (payload && typeof payload.transcript_path === 'string') {
|
|
310
|
+
const fromTranscript = _readAssistantFromTranscript(payload.transcript_path);
|
|
311
|
+
if (fromTranscript != null) return fromTranscript;
|
|
312
|
+
}
|
|
313
|
+
// Fallback shapes — kept so older / non-Claude-Code payload shapes still
|
|
314
|
+
// work (and so unit tests can exercise the hook without a real transcript).
|
|
140
315
|
if (payload && typeof payload.assistant_message === 'string') return payload.assistant_message;
|
|
141
316
|
if (payload && payload.message && typeof payload.message.content === 'string') {
|
|
142
317
|
return payload.message.content;
|
|
@@ -253,6 +428,12 @@ module.exports = {
|
|
|
253
428
|
_buildToolUseFrame,
|
|
254
429
|
_appendFrame,
|
|
255
430
|
_handle,
|
|
431
|
+
_extractAssistantContent,
|
|
432
|
+
_readAssistantFromTranscript,
|
|
433
|
+
_safeTranscriptPath,
|
|
434
|
+
_readFileTail,
|
|
435
|
+
_slugFromTranscriptPath,
|
|
436
|
+
_slugToProjectDir,
|
|
256
437
|
CONTENT_CAP_BYTES,
|
|
257
438
|
},
|
|
258
439
|
};
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# GSD-T journey-coverage gate (M52 D1)
|
|
3
|
+
# Blocks commits that touch viewer-source files when uncovered listeners are
|
|
4
|
+
# detected by `bin/journey-coverage-cli.cjs --staged-only`.
|
|
5
|
+
#
|
|
6
|
+
# Install (opt-in): gsd-t doctor --install-journey-hook
|
|
7
|
+
# Remove: rm .git/hooks/pre-commit (or remove the marker block if
|
|
8
|
+
# merged into an existing hook)
|
|
9
|
+
#
|
|
10
|
+
# Exit codes:
|
|
11
|
+
# 0 — clean (no viewer-source files staged, OR coverage clean, OR fail-open)
|
|
12
|
+
# 1 — blocked (uncovered listener in staged viewer-source files)
|
|
13
|
+
#
|
|
14
|
+
# Fail-open philosophy: a broken hook is worse than a permissive one. Detector
|
|
15
|
+
# internal exception → exit 0 with a stderr warning, never block.
|
|
16
|
+
|
|
17
|
+
set -e
|
|
18
|
+
|
|
19
|
+
ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
|
|
20
|
+
|
|
21
|
+
VIEWER_SOURCE_PATTERNS=(
|
|
22
|
+
"scripts/gsd-t-transcript.html"
|
|
23
|
+
"scripts/gsd-t-dashboard-server.js"
|
|
24
|
+
"bin/gsd-t-dashboard"
|
|
25
|
+
"e2e/journeys/"
|
|
26
|
+
"e2e/viewer/"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
STAGED="$(git diff --cached --name-only --diff-filter=AM 2>/dev/null || true)"
|
|
30
|
+
[ -z "$STAGED" ] && exit 0
|
|
31
|
+
|
|
32
|
+
matches=""
|
|
33
|
+
while IFS= read -r f; do
|
|
34
|
+
for p in "${VIEWER_SOURCE_PATTERNS[@]}"; do
|
|
35
|
+
case "$f" in
|
|
36
|
+
"$p"|"$p"*) matches="$matches $f"; break ;;
|
|
37
|
+
esac
|
|
38
|
+
done
|
|
39
|
+
done <<EOF
|
|
40
|
+
$STAGED
|
|
41
|
+
EOF
|
|
42
|
+
|
|
43
|
+
if [ -z "$matches" ]; then
|
|
44
|
+
exit 0
|
|
45
|
+
fi
|
|
46
|
+
|
|
47
|
+
CLI="$ROOT/bin/journey-coverage-cli.cjs"
|
|
48
|
+
if [ ! -f "$CLI" ]; then
|
|
49
|
+
echo "[journey-coverage] WARNING: $CLI not found — fail-open." >&2
|
|
50
|
+
exit 0
|
|
51
|
+
fi
|
|
52
|
+
|
|
53
|
+
# Run the CLI in --staged-only mode. Capture stdout+stderr; rely on exit code.
|
|
54
|
+
set +e
|
|
55
|
+
OUT="$(node "$CLI" --staged-only --project-dir "$ROOT" 2>&1)"
|
|
56
|
+
RC=$?
|
|
57
|
+
set -e
|
|
58
|
+
|
|
59
|
+
case "$RC" in
|
|
60
|
+
0)
|
|
61
|
+
exit 0
|
|
62
|
+
;;
|
|
63
|
+
4)
|
|
64
|
+
echo "[journey-coverage] BLOCKED: uncovered viewer listener in staged files." >&2
|
|
65
|
+
echo "$OUT" >&2
|
|
66
|
+
echo "" >&2
|
|
67
|
+
echo " Add a journey spec under e2e/journeys/ and update .gsd-t/journey-manifest.json." >&2
|
|
68
|
+
exit 1
|
|
69
|
+
;;
|
|
70
|
+
2)
|
|
71
|
+
echo "[journey-coverage] BLOCKED: manifest missing/unreadable." >&2
|
|
72
|
+
echo "$OUT" >&2
|
|
73
|
+
exit 1
|
|
74
|
+
;;
|
|
75
|
+
*)
|
|
76
|
+
# Unknown exit — fail-open so a detector bug doesn't break workflow.
|
|
77
|
+
echo "[journey-coverage] WARNING: detector exited $RC (fail-open):" >&2
|
|
78
|
+
echo "$OUT" >&2
|
|
79
|
+
exit 0
|
|
80
|
+
;;
|
|
81
|
+
esac
|
|
@@ -42,3 +42,56 @@ Summary:
|
|
|
42
42
|
- VERDICT: `FAIL` ({N} bugs found) | `GRUDGING PASS` (exhaustive search, nothing found)
|
|
43
43
|
|
|
44
44
|
Write findings to `.gsd-t/red-team-report.md`. If bugs found, also append to `.gsd-t/qa-issues.md`.
|
|
45
|
+
|
|
46
|
+
## Test Pass-Through — Journey Edition (M52)
|
|
47
|
+
|
|
48
|
+
**Activates when**: `.gsd-t/journey-manifest.json` exists AND `e2e/journeys/` is non-empty (M52 D2 has landed).
|
|
49
|
+
|
|
50
|
+
**Goal**: Prove the journey specs catch real regressions in the journeys they
|
|
51
|
+
claim to cover. A journey spec that only checks "the button exists and is
|
|
52
|
+
clickable" passes through any breakage to the user — that is what this
|
|
53
|
+
category attacks.
|
|
54
|
+
|
|
55
|
+
**Protocol**:
|
|
56
|
+
|
|
57
|
+
1. For each spec in `.gsd-t/journey-manifest.json`, identify the listener(s) it covers.
|
|
58
|
+
2. Write a deliberately-broken patch to `scripts/gsd-t-transcript.html` that
|
|
59
|
+
targets that listener — examples:
|
|
60
|
+
- Remove the listener entirely (`addEventListener` line stripped).
|
|
61
|
+
- Comment out the side-effect inside the handler (e.g. `_ssSet` call).
|
|
62
|
+
- Swap a sessionStorage key name (e.g. splitterPct key → `'XXX'`).
|
|
63
|
+
- Stub the handler to early-return (`if (true) return;` at top).
|
|
64
|
+
- Reverse a state mutation (`next ? 'true' : 'false'` → `next ? 'false' : 'true'`).
|
|
65
|
+
3. Run the journey spec against the broken viewer.
|
|
66
|
+
4. **PASS**: spec FAILS (red) → revert patch → spec PASSES (green). Record `caught`.
|
|
67
|
+
5. **FAIL**: spec PASSES with broken viewer → SHALLOW SPEC, must be tightened.
|
|
68
|
+
Record `pass-through` and rewrite the assertion to verify state change /
|
|
69
|
+
data flow / content load.
|
|
70
|
+
6. Write at least 5 broken patches across different specs. Each pass-through
|
|
71
|
+
is a verdict-level FAIL until rewritten.
|
|
72
|
+
|
|
73
|
+
**Hook end-to-end exercise** (also part of this category):
|
|
74
|
+
- Stage a viewer-source diff that adds a NEW listener with no manifest entry.
|
|
75
|
+
- Confirm `pre-commit-journey-coverage` blocks the commit (exit 1).
|
|
76
|
+
- Update `.gsd-t/journey-manifest.json` with a covering entry.
|
|
77
|
+
- Confirm the hook now allows the commit (exit 0).
|
|
78
|
+
- Both transitions logged in `.gsd-t/red-team-report.md` § "M52 JOURNEY-EDITION RED TEAM".
|
|
79
|
+
|
|
80
|
+
**Findings format** in `.gsd-t/red-team-report.md` (append-only section):
|
|
81
|
+
```
|
|
82
|
+
## M52 JOURNEY-EDITION RED TEAM — {date}
|
|
83
|
+
|
|
84
|
+
### Patch {N}: {short-name}
|
|
85
|
+
- **Spec**: {spec-name}
|
|
86
|
+
- **Broken-line(s)**: file:line — {one-line description of patch}
|
|
87
|
+
- **Expected**: spec FAILS, caught the regression
|
|
88
|
+
- **Actual**: {fail|pass-through}
|
|
89
|
+
- **Verdict**: caught | PASS-THROUGH (must rewrite spec)
|
|
90
|
+
|
|
91
|
+
### Hook end-to-end
|
|
92
|
+
- Block exercise: {git diff details, exit code, stderr summary}
|
|
93
|
+
- Unblock exercise: {manifest update, exit code, stderr summary}
|
|
94
|
+
|
|
95
|
+
### VERDICT
|
|
96
|
+
{GRUDGING PASS — N patches written, all caught | FAIL — {M} pass-through(s)}
|
|
97
|
+
```
|