@tekyzinc/gsd-t 3.13.16 → 3.16.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +44 -0
  2. package/README.md +1 -0
  3. package/bin/gsd-t-benchmark-orchestrator.js +437 -0
  4. package/bin/gsd-t-capture-lint.cjs +276 -0
  5. package/bin/gsd-t-completion-check.cjs +106 -0
  6. package/bin/gsd-t-orchestrator-config.cjs +64 -0
  7. package/bin/gsd-t-orchestrator-queue.cjs +180 -0
  8. package/bin/gsd-t-orchestrator-recover.cjs +231 -0
  9. package/bin/gsd-t-orchestrator-worker.cjs +219 -0
  10. package/bin/gsd-t-orchestrator.js +534 -0
  11. package/bin/gsd-t-stream-feed-client.cjs +151 -0
  12. package/bin/gsd-t-task-brief-compactor.cjs +89 -0
  13. package/bin/gsd-t-task-brief-template.cjs +96 -0
  14. package/bin/gsd-t-task-brief.js +249 -0
  15. package/bin/gsd-t-token-backfill.cjs +366 -0
  16. package/bin/gsd-t-token-capture.cjs +306 -0
  17. package/bin/gsd-t-token-dashboard.cjs +318 -0
  18. package/bin/gsd-t-token-regenerate-log.cjs +129 -0
  19. package/bin/gsd-t-transcript-tee.cjs +246 -0
  20. package/bin/gsd-t-unattended-heartbeat.cjs +188 -0
  21. package/bin/gsd-t-unattended-platform.cjs +191 -27
  22. package/bin/gsd-t-unattended-safety.cjs +8 -1
  23. package/bin/gsd-t-unattended.cjs +192 -31
  24. package/bin/gsd-t.js +329 -2
  25. package/bin/supervisor-pid-fingerprint.cjs +126 -0
  26. package/commands/gsd-t-debug.md +63 -51
  27. package/commands/gsd-t-design-decompose.md +2 -7
  28. package/commands/gsd-t-doc-ripple.md +20 -11
  29. package/commands/gsd-t-execute.md +82 -50
  30. package/commands/gsd-t-integrate.md +43 -16
  31. package/commands/gsd-t-plan.md +20 -7
  32. package/commands/gsd-t-prd.md +19 -12
  33. package/commands/gsd-t-quick.md +64 -29
  34. package/commands/gsd-t-resume.md +51 -4
  35. package/commands/gsd-t-unattended.md +19 -20
  36. package/commands/gsd-t-verify.md +48 -32
  37. package/commands/gsd-t-visualize.md +19 -17
  38. package/commands/gsd-t-wave.md +29 -27
  39. package/docs/architecture.md +16 -0
  40. package/docs/m40-benchmark-report.md +35 -0
  41. package/docs/requirements.md +20 -0
  42. package/package.json +1 -1
  43. package/scripts/gsd-t-dashboard-server.js +291 -4
  44. package/scripts/gsd-t-dashboard.html +31 -1
  45. package/scripts/gsd-t-design-review-server.js +3 -1
  46. package/scripts/gsd-t-stream-feed-server.js +428 -0
  47. package/scripts/gsd-t-stream-feed.html +1168 -0
  48. package/scripts/gsd-t-token-aggregator.js +373 -0
  49. package/scripts/gsd-t-transcript.html +422 -0
  50. package/scripts/hooks/gsd-t-in-session-probe.js +62 -0
  51. package/scripts/hooks/pre-commit-capture-lint +26 -0
  52. package/templates/CLAUDE-global.md +69 -0
  53. package/scripts/gsd-t-agent-dashboard-server.js +0 -424
  54. package/scripts/gsd-t-agent-dashboard.html +0 -1043
@@ -0,0 +1,231 @@
1
+ 'use strict';
2
+ /**
3
+ * M40 D6-T1 — Orchestrator Recovery
4
+ *
5
+ * Reconstructs run state from an existing `.gsd-t/orchestrator/state.json`
6
+ * so the orchestrator can be resumed after a crash/SIGINT/kill.
7
+ *
8
+ * Contract: stream-json-sink v1.1.0 (for workerPid/taskId attribution),
9
+ * completion-signal v1.x (assertCompletion reconciliation),
10
+ * wave-join v1.x (wave ordering + second-fail-halt policy).
11
+ *
12
+ * Rules:
13
+ * - state.json absent → { mode: 'fresh' }
14
+ * - status ∈ {done,failed,stopped,interrupted,completed} → { mode: 'terminal' }
15
+ * - status === 'running' → { mode: 'resume', ... }
16
+ * - Every task with status === 'running' is reconciled via assertCompletion:
17
+ * ok → DONE (status rewritten, retry count preserved)
18
+ * missing only progress → flagged for operator triage (status: 'ambiguous')
19
+ * missing commit+test+etc → FAILED (respects existing retryCount — if already
20
+ * retried once, second-fail-halt is triggered by caller)
21
+ * - A live `workerPid` on a task gets a best-effort `kill -0` liveness check; stale
22
+ * pid is logged but treated as crashed (not silently reclaimed).
23
+ * - currentWave = first wave with any task not in {done, ambiguous}; null if all done.
24
+ *
25
+ * The caller (the orchestrator `--resume` CLI flag, D6-T2) decides what to do with
26
+ * ambiguous tasks — this module never silently claims completion.
27
+ */
28
+
29
+ const fs = require('fs');
30
+ const path = require('path');
31
+
32
+ const STATE_DIR = path.join('.gsd-t', 'orchestrator');
33
+ const STATE_FILE = 'state.json';
34
+
35
+ const TERMINAL_STATUSES = new Set(['done', 'failed', 'stopped', 'interrupted', 'completed']);
36
+
37
+ // ── Public API ───────────────────────────────────────────────────────────────
38
+
39
+ /**
40
+ * @param {object} opts
41
+ * @param {string} opts.projectDir
42
+ * @param {Function} [opts.assertCompletionImpl] - for testing (default: real impl)
43
+ * @param {Function} [opts.pidLivenessCheck] - for testing (default: process.kill(pid, 0))
44
+ * @param {Function} [opts.now] - iso string; default new Date().toISOString()
45
+ * @returns {object} { mode, currentWave, tasks, state, ambiguous, notes }
46
+ */
47
+ function recoverRunState(opts) {
48
+ const projectDir = (opts && opts.projectDir) || process.cwd();
49
+ const statePath = path.join(projectDir, STATE_DIR, STATE_FILE);
50
+ const notes = [];
51
+
52
+ if (!fs.existsSync(statePath)) {
53
+ return { mode: 'fresh', currentWave: null, tasks: {}, state: null, ambiguous: [], notes };
54
+ }
55
+
56
+ let state;
57
+ try {
58
+ state = JSON.parse(fs.readFileSync(statePath, 'utf8'));
59
+ } catch (err) {
60
+ notes.push(`state.json parse failed: ${err.message} — treating as terminal/corrupt`);
61
+ return { mode: 'terminal', currentWave: null, tasks: {}, state: null, ambiguous: [], notes };
62
+ }
63
+ if (!state || typeof state !== 'object') {
64
+ notes.push('state.json is not an object — treating as terminal/corrupt');
65
+ return { mode: 'terminal', currentWave: null, tasks: {}, state: null, ambiguous: [], notes };
66
+ }
67
+
68
+ if (TERMINAL_STATUSES.has(state.status)) {
69
+ return { mode: 'terminal', currentWave: null, tasks: state.tasks || {}, state, ambiguous: [], notes };
70
+ }
71
+
72
+ // Non-terminal: reconcile every in-flight task.
73
+ const assertCompletionImpl = (opts && opts.assertCompletionImpl) || defaultAssertCompletion;
74
+ const pidCheck = (opts && opts.pidLivenessCheck) || defaultPidLiveness;
75
+ const expectedBranch = (opts && opts.expectedBranch) || state.expectedBranch || 'main';
76
+
77
+ const tasks = { ...(state.tasks || {}) };
78
+ const ambiguous = [];
79
+
80
+ for (const [taskId, t] of Object.entries(tasks)) {
81
+ if (t.status !== 'running') continue;
82
+
83
+ // PID liveness — informational only
84
+ if (t.workerPid) {
85
+ const alive = pidCheck(t.workerPid);
86
+ if (!alive) notes.push(`task ${taskId}: workerPid ${t.workerPid} no longer alive (crashed)`);
87
+ }
88
+
89
+ // Reconcile via assertCompletion
90
+ let result;
91
+ try {
92
+ result = assertCompletionImpl({
93
+ taskId: t.canonicalId || taskId,
94
+ projectDir,
95
+ expectedBranch,
96
+ taskStart: t.startedAt,
97
+ ownedPatterns: t.ownedPatterns || [],
98
+ skipTest: true, // recovery shouldn't rerun the test suite per task
99
+ });
100
+ } catch (err) {
101
+ notes.push(`task ${taskId}: assertCompletion threw — ${err.message}; treating as failed`);
102
+ tasks[taskId] = {
103
+ ...t,
104
+ status: 'failed',
105
+ endedAt: (opts && opts.now ? opts.now() : new Date().toISOString()),
106
+ missing: ['recovery_assert_threw'],
107
+ recoverySource: 'assert_error',
108
+ };
109
+ continue;
110
+ }
111
+
112
+ const missing = Array.isArray(result && result.missing) ? result.missing : [];
113
+
114
+ if (result && result.ok) {
115
+ tasks[taskId] = {
116
+ ...t,
117
+ status: 'done',
118
+ endedAt: t.endedAt || (opts && opts.now ? opts.now() : new Date().toISOString()),
119
+ missing: [],
120
+ recoverySource: 'recovered_ok',
121
+ };
122
+ continue;
123
+ }
124
+
125
+ // Ambiguous case: commit present BUT progress entry missing (and no other issues).
126
+ // Don't silently claim done; caller must triage.
127
+ const onlyProgressMissing = missing.length === 1 && missing[0] === 'no_progress_entry';
128
+ if (onlyProgressMissing) {
129
+ tasks[taskId] = {
130
+ ...t,
131
+ status: 'ambiguous',
132
+ missing,
133
+ recoverySource: 'commit_without_progress',
134
+ };
135
+ ambiguous.push(taskId);
136
+ notes.push(`task ${taskId}: commit found but no progress.md entry — flagged for triage`);
137
+ continue;
138
+ }
139
+
140
+ // Otherwise: treat as failed. Preserve retryCount.
141
+ tasks[taskId] = {
142
+ ...t,
143
+ status: 'failed',
144
+ endedAt: (opts && opts.now ? opts.now() : new Date().toISOString()),
145
+ missing,
146
+ recoverySource: 'reconcile_failed',
147
+ };
148
+ }
149
+
150
+ const currentWave = firstIncompleteWave(tasks);
151
+
152
+ return {
153
+ mode: 'resume',
154
+ currentWave,
155
+ tasks,
156
+ state,
157
+ ambiguous,
158
+ notes,
159
+ };
160
+ }
161
+
162
+ /**
163
+ * Write back a reconciled state.json (same path) with recovered task statuses.
164
+ * The orchestrator `--resume` flow uses this to persist the recovery results
165
+ * before it begins retrying / continuing.
166
+ */
167
+ function writeRecoveredState(projectDir, reconciled) {
168
+ const statePath = path.join(projectDir, STATE_DIR, STATE_FILE);
169
+ try { fs.mkdirSync(path.dirname(statePath), { recursive: true }); } catch { /* exists */ }
170
+ const tmp = statePath + '.tmp';
171
+ fs.writeFileSync(tmp, JSON.stringify(reconciled, null, 2) + '\n');
172
+ fs.renameSync(tmp, statePath);
173
+ }
174
+
175
+ /**
176
+ * Archive the existing state.json to .gsd-t/orchestrator/archive/{ts}/state.json
177
+ * and remove the original. Used when mode === 'terminal' and a fresh run is desired.
178
+ * Returns { archived: true, archivePath } or { archived: false } if nothing to archive.
179
+ */
180
+ function archiveState(projectDir, opts) {
181
+ const statePath = path.join(projectDir, STATE_DIR, STATE_FILE);
182
+ if (!fs.existsSync(statePath)) return { archived: false };
183
+ const ts = (opts && opts.timestamp) || new Date().toISOString().replace(/[:.]/g, '-');
184
+ const archiveDir = path.join(projectDir, STATE_DIR, 'archive', ts);
185
+ fs.mkdirSync(archiveDir, { recursive: true });
186
+ const dest = path.join(archiveDir, STATE_FILE);
187
+ fs.renameSync(statePath, dest);
188
+ return { archived: true, archivePath: dest };
189
+ }
190
+
191
+ // ── Helpers ──────────────────────────────────────────────────────────────────
192
+
193
+ function firstIncompleteWave(tasks) {
194
+ let min = Infinity;
195
+ for (const t of Object.values(tasks)) {
196
+ if (!t || typeof t !== 'object') continue;
197
+ if (t.status === 'done' || t.status === 'ambiguous') continue;
198
+ if (t.wave != null && Number.isFinite(Number(t.wave))) {
199
+ const w = Number(t.wave);
200
+ if (w < min) min = w;
201
+ }
202
+ }
203
+ return min === Infinity ? null : min;
204
+ }
205
+
206
+ function defaultAssertCompletion(opts) {
207
+ const { assertCompletion } = require('./gsd-t-completion-check.cjs');
208
+ return assertCompletion(opts);
209
+ }
210
+
211
+ function defaultPidLiveness(pid) {
212
+ if (!pid || !Number.isFinite(Number(pid))) return false;
213
+ try {
214
+ // signal 0 doesn't deliver a signal — only checks existence/permission.
215
+ process.kill(Number(pid), 0);
216
+ return true;
217
+ } catch (err) {
218
+ // ESRCH = no such process; EPERM = exists but not ours (treat as alive)
219
+ if (err && err.code === 'EPERM') return true;
220
+ return false;
221
+ }
222
+ }
223
+
224
+ module.exports = {
225
+ recoverRunState,
226
+ writeRecoveredState,
227
+ archiveState,
228
+ // exported for tests
229
+ _firstIncompleteWave: firstIncompleteWave,
230
+ _TERMINAL_STATUSES: TERMINAL_STATUSES,
231
+ };
@@ -0,0 +1,219 @@
1
+ 'use strict';
2
+
3
+ const { spawn } = require('child_process');
4
+ const path = require('path');
5
+ const { assertCompletion } = require('./gsd-t-completion-check.cjs');
6
+ const transcriptTee = require('./gsd-t-transcript-tee.cjs');
7
+
8
+ const DEFAULT_CLAUDE_BIN = 'claude';
9
+
10
+ function nowIso() {
11
+ return new Date().toISOString();
12
+ }
13
+
14
+ function pickClaudeBin(env) {
15
+ return env.GSD_T_CLAUDE_BIN || DEFAULT_CLAUDE_BIN;
16
+ }
17
+
18
+ function buildArgs(task) {
19
+ const model = (task && task.model) || 'sonnet';
20
+ return [
21
+ '-p',
22
+ '--dangerously-skip-permissions',
23
+ '--output-format', 'stream-json',
24
+ '--verbose',
25
+ '--model', model
26
+ ];
27
+ }
28
+
29
+ function emitBoundary(onFrame, task, state, extra) {
30
+ if (typeof onFrame !== 'function') return;
31
+ const frame = {
32
+ type: 'task-boundary',
33
+ taskId: task.id,
34
+ domain: task.domain || null,
35
+ wave: task.wave == null ? null : task.wave,
36
+ state,
37
+ ts: nowIso()
38
+ };
39
+ if (extra) Object.assign(frame, extra);
40
+ try { onFrame(frame); } catch (_) { /* onFrame must not kill worker */ }
41
+ }
42
+
43
+ function parseLines(buffer, onLine) {
44
+ let start = 0;
45
+ for (let i = 0; i < buffer.length; i++) {
46
+ if (buffer[i] === '\n') {
47
+ const line = buffer.slice(start, i);
48
+ if (line) onLine(line);
49
+ start = i + 1;
50
+ }
51
+ }
52
+ return buffer.slice(start);
53
+ }
54
+
55
+ function runWorker(opts) {
56
+ const {
57
+ task,
58
+ brief,
59
+ config,
60
+ onFrame,
61
+ env = process.env,
62
+ spawnImpl = spawn
63
+ } = opts || {};
64
+
65
+ if (!task || !task.id) throw new Error('runWorker requires task with id');
66
+ if (typeof brief !== 'string' || !brief.length) throw new Error('runWorker requires non-empty brief');
67
+ if (!config || !config.projectDir) throw new Error('runWorker requires config.projectDir');
68
+ if (!config.workerTimeoutMs) throw new Error('runWorker requires config.workerTimeoutMs');
69
+
70
+ const startMs = Date.now();
71
+ const taskStart = nowIso();
72
+ const bin = pickClaudeBin(env);
73
+ const args = buildArgs(task);
74
+
75
+ // M42 D1 — allocate a spawn-id and open the transcript registry entry
76
+ const parentSpawnId = (opts && opts.parentSpawnId) || env.GSD_T_SPAWN_ID || null;
77
+ const spawnId = (opts && opts.spawnId) || transcriptTee.allocateSpawnId({ parentId: parentSpawnId });
78
+ let transcriptOpened = false;
79
+ try {
80
+ transcriptTee.openTranscript({
81
+ spawnId,
82
+ projectDir: config.projectDir,
83
+ meta: {
84
+ parentId: parentSpawnId,
85
+ command: 'orchestrator-worker',
86
+ description: `task=${task.id} domain=${task.domain || '-'} wave=${task.wave == null ? '-' : task.wave}`,
87
+ model: (task && task.model) || 'sonnet',
88
+ },
89
+ });
90
+ transcriptOpened = true;
91
+ } catch (_) { /* tee is best-effort */ }
92
+
93
+ emitBoundary(onFrame, task, 'start');
94
+
95
+ return new Promise((resolve) => {
96
+ const child = spawnImpl(bin, args, {
97
+ cwd: config.projectDir,
98
+ env: { ...env, GSD_T_PROJECT_DIR: config.projectDir, GSD_T_SPAWN_ID: spawnId },
99
+ stdio: ['pipe', 'pipe', 'pipe']
100
+ });
101
+
102
+ const workerPid = child && typeof child.pid === 'number' ? child.pid : null;
103
+ emitBoundary(onFrame, task, 'pid', { workerPid });
104
+ if (transcriptOpened && workerPid != null) {
105
+ try {
106
+ const idx = transcriptTee._readIndex(config.projectDir);
107
+ const i = idx.spawns.findIndex((s) => s.spawnId === spawnId);
108
+ if (i >= 0) { idx.spawns[i].workerPid = workerPid; transcriptTee._writeIndex(config.projectDir, idx); }
109
+ } catch (_) {}
110
+ }
111
+ if (typeof opts.onSpawn === 'function') {
112
+ try { opts.onSpawn({ child, pid: workerPid, spawnId }); } catch (_) {}
113
+ }
114
+
115
+ let stdoutBuf = '';
116
+ let stderrBuf = '';
117
+ let timedOut = false;
118
+ let killTimer = null;
119
+
120
+ const handleLine = (line) => {
121
+ if (transcriptOpened) {
122
+ try { transcriptTee.appendFrame({ spawnId, projectDir: config.projectDir, frame: line }); } catch (_) {}
123
+ }
124
+ try {
125
+ const frame = JSON.parse(line);
126
+ if (typeof onFrame === 'function') onFrame(frame);
127
+ } catch (_) {
128
+ if (typeof onFrame === 'function') {
129
+ onFrame({ type: 'raw', line });
130
+ }
131
+ }
132
+ };
133
+
134
+ child.stdout.on('data', (chunk) => {
135
+ stdoutBuf += chunk.toString('utf8');
136
+ stdoutBuf = parseLines(stdoutBuf, handleLine);
137
+ });
138
+ child.stderr.on('data', (chunk) => {
139
+ stderrBuf += chunk.toString('utf8');
140
+ });
141
+
142
+ const timeout = setTimeout(() => {
143
+ timedOut = true;
144
+ const elapsed = Date.now() - startMs;
145
+ if (config.logger && typeof config.logger.log === 'function') {
146
+ config.logger.log(`[worker_timeout] task=${task.id} budget=${config.workerTimeoutMs}ms elapsed=${elapsed}ms`);
147
+ }
148
+ try { child.kill('SIGTERM'); } catch (_) {}
149
+ killTimer = setTimeout(() => { try { child.kill('SIGKILL'); } catch (_) {} }, 5000);
150
+ }, config.workerTimeoutMs);
151
+
152
+ child.on('error', (err) => {
153
+ clearTimeout(timeout);
154
+ if (killTimer) clearTimeout(killTimer);
155
+ const durationMs = Date.now() - startMs;
156
+ emitBoundary(onFrame, task, 'failed', { reason: 'spawn_error', error: String(err), workerPid });
157
+ resolve({
158
+ result: { ok: false, missing: ['spawn_error'], details: { error: String(err) } },
159
+ exitCode: -1,
160
+ durationMs,
161
+ timedOut: false,
162
+ stderr: stderrBuf,
163
+ workerPid
164
+ });
165
+ });
166
+
167
+ child.on('exit', async (code, signal) => {
168
+ clearTimeout(timeout);
169
+ if (killTimer) clearTimeout(killTimer);
170
+ if (stdoutBuf.length) {
171
+ stdoutBuf = parseLines(stdoutBuf + '\n', handleLine);
172
+ }
173
+ const durationMs = Date.now() - startMs;
174
+ const exitCode = code == null ? (signal ? -1 : 0) : code;
175
+
176
+ let result;
177
+ try {
178
+ result = assertCompletion({
179
+ taskId: task.canonicalId || task.id,
180
+ projectDir: config.projectDir,
181
+ expectedBranch: task.expectedBranch || config.expectedBranch || 'main',
182
+ taskStart,
183
+ skipTest: !!task.skipTest,
184
+ ownedPatterns: task.ownedPatterns || []
185
+ });
186
+ } catch (err) {
187
+ result = { ok: false, missing: ['completion_check_error'], details: { error: String(err) } };
188
+ }
189
+
190
+ if (timedOut) {
191
+ result = {
192
+ ok: false,
193
+ missing: ['worker_exited_via_timeout', ...(result.missing || [])],
194
+ details: { ...(result.details || {}), timedOut: true, budget: config.workerTimeoutMs }
195
+ };
196
+ } else if (exitCode !== 0) {
197
+ const missing = result.missing ? [...result.missing] : [];
198
+ if (!missing.includes('worker_exit_nonzero')) missing.unshift('worker_exit_nonzero');
199
+ result = { ok: false, missing, details: { ...(result.details || {}), exitCode, signal: signal || null, stderr: stderrBuf.slice(-2000) } };
200
+ }
201
+
202
+ emitBoundary(onFrame, task, result.ok ? 'done' : 'failed', { exitCode, durationMs, workerPid });
203
+ if (transcriptOpened) {
204
+ try { transcriptTee.closeTranscript({ spawnId, projectDir: config.projectDir, status: result.ok ? 'done' : 'failed' }); } catch (_) {}
205
+ }
206
+ resolve({ result, exitCode, durationMs, timedOut, stderr: stderrBuf, workerPid, spawnId });
207
+ });
208
+
209
+ child.stdin.on('error', () => { /* ignore — covered by child exit */ });
210
+ child.stdin.write(brief);
211
+ child.stdin.end();
212
+ });
213
+ }
214
+
215
+ module.exports = {
216
+ runWorker,
217
+ buildArgs,
218
+ DEFAULT_CLAUDE_BIN
219
+ };