wogiflow 2.33.0 → 2.34.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -244,10 +244,15 @@ function enforceTokenBudget(usageLog, dailyBudget, now, jobName, estimatedTokens
244
244
  }
245
245
  const key = d.toISOString().slice(0, 10); // YYYY-MM-DD
246
246
  const dayLog = (usageLog && usageLog[key]) || {};
247
- const usedToday = Object.values(dayLog).reduce((a, b) => a + (Number(b) || 0), 0);
247
+ // F17 (R-379): use explicit Number.isFinite guard so a legitimate 0 isn't
248
+ // collapsed by `|| 0` falsy-fallthrough (per naming-conventions.md).
249
+ const usedToday = Object.values(dayLog).reduce(
250
+ (a, b) => a + (Number.isFinite(Number(b)) ? Number(b) : 0),
251
+ 0
252
+ );
248
253
  const estimated = Number.isFinite(estimatedTokens)
249
254
  ? estimatedTokens
250
- : (DEFAULT_TOKENS_PER_INVOCATION[jobName] || 0);
255
+ : (DEFAULT_TOKENS_PER_INVOCATION[jobName] ?? 0);
251
256
  const projectedAfter = usedToday + estimated;
252
257
  if (!Number.isFinite(dailyBudget) || dailyBudget <= 0) {
253
258
  return {
@@ -344,18 +349,11 @@ function isTransientError(err) {
344
349
  return false;
345
350
  }
346
351
 
347
- /**
348
- * Compute the cron-friendly "yesterday" anchor for `git diff @{yesterday}..HEAD`.
349
- * Returns the ISO date 24h ago.
350
- *
351
- * @param {Date|number|string} [now]
352
- * @returns {string}
353
- */
354
- function yesterdayIsoDate(now = Date.now()) {
355
- const d = new Date(now);
356
- d.setUTCDate(d.getUTCDate() - 1);
357
- return d.toISOString();
358
- }
352
+ // F20 (R-379): removed `yesterdayIsoDate(now)` — exported but never imported
353
+ // anywhere in scope. The runner uses `git log --since="24 hours ago"` for
354
+ // CI-portability reasons (shallow checkouts don't have reflog state for
355
+ // `@{yesterday}`), and no other consumer wants the ISO-date form. Removed
356
+ // to avoid a maintenance-trap export. Re-add if a real consumer materializes.
359
357
 
360
358
  module.exports = {
361
359
  JOB_NAMES,
@@ -373,5 +371,4 @@ module.exports = {
373
371
  updateDedupIssue,
374
372
  validateModelName,
375
373
  isTransientError,
376
- yesterdayIsoDate,
377
374
  };
@@ -39,6 +39,41 @@ const { parseFrontmatter } = require('./skill-portability');
39
39
  const { listBundleFiles } = require('./skill-export-agentskills');
40
40
 
41
41
  const DEFAULT_LICENSE = 'MIT';
42
+
43
+ /**
44
+ * Sanitize a skill name for use in path construction.
45
+ *
46
+ * F9 (R-379): the previous code passed `frontmatter.name` directly into
47
+ * `path.join(outDir, \`skills/${name}/...\`)` — a malicious skill with
48
+ * `name: ../../../etc` would escape the output directory.
49
+ *
50
+ * Strategy: strip path separators and `..` sequences (replace with safe
51
+ * placeholders so we still produce a useful name), reject empty results,
52
+ * and reject names that try to be hidden (.dotfile) or absolute (`/foo`).
53
+ *
54
+ * @param {string} raw
55
+ * @returns {string} sanitized name
56
+ * @throws {Error} if the name is empty after sanitization or otherwise
57
+ * unrecoverable.
58
+ */
59
+ function sanitizePluginName(raw) {
60
+ if (typeof raw !== 'string' || !raw.trim()) {
61
+ throw new Error('sanitizePluginName: name must be a non-empty string');
62
+ }
63
+ let s = raw
64
+ .replace(/[/\\]/g, '-') // path separators → dash
65
+ .replace(/\.\./g, '--') // .. sequences → dash-dash
66
+ .replace(/\0/g, ''); // strip nulls just in case
67
+ // Reject leading dot (would create hidden directory) and leading dash
68
+ // (would look like a CLI flag in some contexts).
69
+ s = s.replace(/^[.\-]+/, '');
70
+ // Trim again after substitutions
71
+ s = s.trim();
72
+ if (!s) {
73
+ throw new Error(`sanitizePluginName: name "${raw}" sanitizes to empty`);
74
+ }
75
+ return s;
76
+ }
42
77
  const DEFAULT_AUTHOR = 'wogiflow';
43
78
 
44
79
  /**
@@ -89,7 +124,12 @@ function exportToClaudePlugin(skillDir, opts = {}) {
89
124
  frontmatter = parseFrontmatter(safeReadFile(skillMdPath));
90
125
  }
91
126
 
92
- const name = opts.name ?? frontmatter.name ?? path.basename(skillDir);
127
+ // F9 (R-379): sanitize `name` before using it in path construction. A
128
+ // skill author who sets `name: ../../../etc` in frontmatter could escape
129
+ // the output directory via `path.join(outDir, 'skills/../../../etc/SKILL.md')`.
130
+ // Strip path separators and `..` sequences; reject empty/dotfile names.
131
+ const rawName = opts.name ?? frontmatter.name ?? path.basename(skillDir);
132
+ const name = sanitizePluginName(rawName);
93
133
  const version = opts.version ?? frontmatter.version ?? '0.0.0';
94
134
  const description = frontmatter.description ?? '';
95
135
  const license = frontmatter.license ?? DEFAULT_LICENSE;
@@ -50,8 +50,13 @@ const BLOCKER_PATTERNS = [
50
50
  { pattern: /\bflow-utils\b/, label: 'flow-utils import/reference' },
51
51
  { pattern: /require\(['"][^'"]*\/scripts\/flow[-/]/, label: 'WogiFlow scripts/ require()' },
52
52
  { pattern: /from\s+['"][^'"]*\/scripts\/flow[-/]/, label: 'WogiFlow scripts/ import' },
53
- // Slash-command invocations (any /wogi-* with a word char after)
54
- { pattern: /\/wogi-[a-z][a-z0-9-]*/i, label: '/wogi-* slash command' },
53
+ // Slash-command invocations (any /wogi-* with a word char after).
54
+ // F7 (R-379): require a lookbehind for start-of-line, whitespace, or
55
+ // quote/bracket — so legitimate file paths like
56
+ // `.claude/skills/wogi-start/skill.md` or `/workflows/wogi-status` don't
57
+ // trip a false-positive blocker. Lookbehind (not capturing group) so the
58
+ // matched substring is the slash-command itself, e.g. `/wogi-finalize`.
59
+ { pattern: /(?<=^|[\s`'"(\[])\/wogi-[a-z][a-z0-9-]*\b/im, label: '/wogi-* slash command' },
55
60
  // Shell invocations of the local flow CLI
56
61
  { pattern: /\.\/scripts\/flow\b/, label: 'local ./scripts/flow CLI call' },
57
62
  { pattern: /\bflow\s+(?:wogi-|skill\s+|story\s+|start\s+|status\b|ready\b|finalize\b)/, label: 'flow CLI subcommand specific to WogiFlow' },
@@ -237,7 +242,12 @@ function assessSkillPortability(skillDir, opts = {}) {
237
242
  });
238
243
  }
239
244
 
240
- // Explicit author declaration: portable: false short-circuits scanning.
245
+ // Explicit author declaration. F14 (R-379): previously the comment claimed
246
+ // `portable: false` "short-circuits scanning" — but there was no early
247
+ // return; the function scanned anyway, producing a needlessly long blocker
248
+ // list for skills the author already marked non-portable. Short-circuit
249
+ // now matches the comment: return early so the caller gets a single,
250
+ // clear blocker ("author opted out") instead of dozens of pattern hits.
241
251
  const declaredPortable = typeof manifest.portable === 'string'
242
252
  ? manifest.portable.toLowerCase() === 'true'
243
253
  : null;
@@ -248,6 +258,14 @@ function assessSkillPortability(skillDir, opts = {}) {
248
258
  match: 'portable: false',
249
259
  label: 'manifest declares portable: false',
250
260
  });
261
+ // Short-circuit: author opted out, no need to enumerate every pattern hit.
262
+ return {
263
+ portable: false,
264
+ blockers,
265
+ manifest,
266
+ scannedFiles: [],
267
+ skillMdPath,
268
+ };
251
269
  }
252
270
 
253
271
  // Compose pattern list: builtin + extras.
@@ -22,6 +22,21 @@ const http = require('node:http');
22
22
  const readline = require('node:readline');
23
23
  const { safeJsonParseContent } = require('./utils');
24
24
 
25
+ // S5 (wf-ee87a24e): the version this long-lived server process loaded at boot.
26
+ // Compared against the on-disk package.json to detect a mid-session
27
+ // `npm i wogiflow@latest` that left this process running stale code.
28
+ const SERVER_VERSION = (() => {
29
+ try { return require('../package.json').version || null; } catch (_err) { return null; }
30
+ })();
31
+ function readDiskVersion() {
32
+ try {
33
+ const fs = require('node:fs');
34
+ const pkgPath = require('node:path').join(__dirname, '..', 'package.json');
35
+ const raw = fs.readFileSync(pkgPath, 'utf-8'); // fresh read, bypasses require cache
36
+ return JSON.parse(raw).version || null;
37
+ } catch (_err) { return null; }
38
+ }
39
+
25
40
  // ============================================================
26
41
  // Constants
27
42
  // ============================================================
@@ -129,8 +144,11 @@ When you receive a message:
129
144
  2. If it's a question or investigation request → do the work, then ALWAYS send results back
130
145
  3. If it's a status check → respond with your current task status
131
146
 
132
- CRITICALALWAYS REPLY TO THE MANAGER:
133
- After completing ANY work triggered by a channel message, you MUST send results back using the workspace_send_message tool with to: "manager". The user only sees the manager terminal if you don't reply, they never see your results.
147
+ SUSTAINED EXECUTION a task dispatch runs to COMPLETION across turns:
148
+ A "/wogi-" dispatch (especially one you decompose into sub-tasks) is NOT a one-turn request. Work through ALL sub-tasks in the same session; the Stop hook's continuation gate will keep you going while the task is in-progress with work remaining. Do NOT stop to "report progress" mid-task only reply when the task is COMPLETE or you are ESCALATING a blocker.
149
+
150
+ CRITICAL — REPLY TO THE MANAGER WHEN THE TASK IS DONE OR BLOCKED:
151
+ When the dispatched task is complete (or you must escalate), you MUST send results back using the workspace_send_message tool with to: "manager". The user only sees the manager terminal — if you don't reply, they never see your results.
134
152
 
135
153
  Example: workspace_send_message(to: "manager", message: "## Investigation Results\\n\\n1. Found the bug in X\\n2. Root cause: Y\\n3. Fix: Z")
136
154
 
@@ -466,18 +484,60 @@ function broadcastSSE(event) {
466
484
 
467
485
  const channelTracking = require('./workspace-channel-tracking');
468
486
 
487
+ // S4 (wf-87611c5e): the channel server is the only process that sees every
488
+ // inbound dispatch, so it owns the "ack-received" timestamp used by GET /status.
489
+ let _lastInboundAt = 0;
490
+ const STATUS_STALENESS_MS = (() => {
491
+ const raw = parseInt(process.env.WOGI_STATUS_STALENESS_MS || '', 10);
492
+ return Number.isFinite(raw) && raw > 0 ? raw : 300000;
493
+ })();
494
+
469
495
  // ============================================================
470
496
  // HTTP Server
471
497
  // ============================================================
472
498
 
473
499
  const server = http.createServer(async (req, res) => {
474
- // Health check — minimal info, no topology exposure
500
+ // Health check — minimal info, no topology exposure. PURE liveness: "the
501
+ // server process is up." Says nothing about whether the agent is working —
502
+ // use /status for that (S4).
475
503
  if (req.method === 'GET' && req.url === '/health') {
476
504
  res.writeHead(200, { 'Content-Type': 'application/json' });
477
505
  res.end(JSON.stringify({ status: 'ok', repo: REPO_NAME, port: PORT }));
478
506
  return;
479
507
  }
480
508
 
509
+ // Activity status (S4 / wf-87611c5e) — the real execution state, so a manager
510
+ // can never mistake a channel POST `ok` for "work happening". Derived from the
511
+ // worker's own state files + the last inbound dispatch this server saw.
512
+ if (req.method === 'GET' && req.url === '/status') {
513
+ let body;
514
+ try {
515
+ const path = require('node:path');
516
+ const stateDir = path.join(process.cwd(), '.workflow', 'state');
517
+ body = channelTracking.computeWorkerStatus({
518
+ stateDir,
519
+ repoName: REPO_NAME,
520
+ lastInboundAt: _lastInboundAt || undefined,
521
+ stalenessMs: STATUS_STALENESS_MS
522
+ });
523
+ body.port = PORT;
524
+ // S5: version-drift signal — if the on-disk wogiflow differs from what this
525
+ // long-lived server loaded, a `flow workspace restart` is required to load it.
526
+ const diskVersion = readDiskVersion();
527
+ body.serverVersion = SERVER_VERSION;
528
+ body.diskVersion = diskVersion;
529
+ body.versionDrift = Boolean(SERVER_VERSION && diskVersion && SERVER_VERSION !== diskVersion);
530
+ if (body.versionDrift) {
531
+ body.restartRequired = `Server is running ${SERVER_VERSION} but ${diskVersion} is on disk — run 'flow workspace restart ${REPO_NAME}'`;
532
+ }
533
+ } catch (_err) {
534
+ body = { repo: REPO_NAME, port: PORT, state: 'unknown' };
535
+ }
536
+ res.writeHead(200, { 'Content-Type': 'application/json' });
537
+ res.end(JSON.stringify(body));
538
+ return;
539
+ }
540
+
481
541
  // SSE endpoint for event subscriptions
482
542
  if (req.method === 'GET' && req.url?.startsWith('/events')) {
483
543
  const lastEventId = req.headers['last-event-id'] || '';
@@ -485,6 +545,44 @@ const server = http.createServer(async (req, res) => {
485
545
  return;
486
546
  }
487
547
 
548
+ // Manager-triggered restart (S5 / wf-ee87a24e). Writes the wogi-claude
549
+ // wrapper's restart flag and SIGTERMs this server's parent (the claude
550
+ // process). The wrapper relaunches claude with a FRESH require cache —
551
+ // reloading any upgraded wogiflow code, and claude respawns this MCP server.
552
+ // No PID tracking needed; reuses the proven task-boundary restart loop.
553
+ if (req.method === 'POST' && (req.url === '/restart' || req.url === '/control/restart')) {
554
+ const rawFrom = req.headers['x-wogi-from'] || '';
555
+ // localhost-bound already; additionally require the manager as sender.
556
+ if (rawFrom && rawFrom !== 'manager' && rawFrom !== 'workspace-manager') {
557
+ res.writeHead(403, { 'Content-Type': 'application/json' });
558
+ res.end(JSON.stringify({ ok: false, error: 'restart may only be triggered by the manager' }));
559
+ return;
560
+ }
561
+ let scheduled = false;
562
+ try {
563
+ const fs = require('node:fs');
564
+ const nodePath = require('node:path');
565
+ const flagPath = process.env.WOGI_RESTART_FLAG ||
566
+ nodePath.join(process.cwd(), '.workflow', 'state', 'restart-requested');
567
+ fs.mkdirSync(nodePath.dirname(flagPath), { recursive: true });
568
+ fs.writeFileSync(flagPath, JSON.stringify({
569
+ version: 1, reason: 'manager-restart', repo: REPO_NAME,
570
+ triggeredAt: new Date().toISOString()
571
+ }, null, 2));
572
+ // Defer the SIGTERM briefly so the HTTP response flushes first.
573
+ const ppid = process.ppid;
574
+ setTimeout(() => { try { process.kill(ppid, 'SIGTERM'); } catch (_err) { /* parent gone */ } }, 150);
575
+ scheduled = true;
576
+ } catch (err) {
577
+ res.writeHead(500, { 'Content-Type': 'application/json' });
578
+ res.end(JSON.stringify({ ok: false, error: err.message }));
579
+ return;
580
+ }
581
+ res.writeHead(202, { 'Content-Type': 'application/json' });
582
+ res.end(JSON.stringify({ ok: true, scheduled, repo: REPO_NAME, note: 'worker restarting; channel server will respawn with fresh code' }));
583
+ return;
584
+ }
585
+
488
586
  // Receive webhook (POST)
489
587
  if (req.method === 'POST') {
490
588
  const { body, truncated } = await collectBody(req, MAX_BODY_BYTES);
@@ -508,6 +606,11 @@ const server = http.createServer(async (req, res) => {
508
606
  cleanBody = body.substring(effortMatch[0].length);
509
607
  }
510
608
 
609
+ // S4: record when a dispatch arrived (ack-received signal for /status).
610
+ if (channelTracking.DISPATCH_BODY_PATTERN.test(cleanBody)) {
611
+ _lastInboundAt = Date.now();
612
+ }
613
+
511
614
  // Forward as channel notification to Claude Code
512
615
  const meta = {
513
616
  from,
@@ -115,11 +115,112 @@ function tryReconcileInboundCompletion(ctx, tracking) {
115
115
  }
116
116
  }
117
117
 
118
+ // ============================================================
119
+ // Worker activity status (epic-workspace-sustained-exec / S4, wf-87611c5e)
120
+ // ============================================================
121
+
122
+ const fsNode = require('node:fs');
123
+ const pathNode = require('node:path');
124
+
125
+ const ACTIVE_PHASES = new Set(['coding', 'validating']);
126
+ const DEFAULT_STALENESS_MS = 300000; // 5 min
127
+
128
+ function _safeRead(p) {
129
+ try { return JSON.parse(fsNode.readFileSync(p, 'utf-8')); } catch (_err) { return null; }
130
+ }
131
+ function _mtimeMs(p) {
132
+ try { return fsNode.statSync(p).mtimeMs; } catch (_err) { return 0; }
133
+ }
134
+
135
+ /**
136
+ * Derive the worker's real execution state for GET /status. Distinguishes
137
+ * ack-received / work-started / in-progress / complete / blocked / idle so the
138
+ * manager can never mistake a channel POST `ok` (or `/health` ok) for progress.
139
+ *
140
+ * Pure-ish (reads files from stateDir); injectable for tests.
141
+ *
142
+ * @param {Object} opts
143
+ * @param {string} opts.stateDir worker .workflow/state dir
144
+ * @param {string} [opts.repoName]
145
+ * @param {number} [opts.lastInboundAt] ms epoch of the last dispatch POST the server saw
146
+ * @param {number} [opts.stalenessMs] heartbeat freshness window
147
+ * @param {number} [opts.now]
148
+ * @returns {{repo, state, taskId, subtasks:{total,remaining}, lastHeartbeatAt, lastSha, phase}}
149
+ */
150
+ function computeWorkerStatus(opts = {}) {
151
+ const stateDir = opts.stateDir;
152
+ const now = opts.now || Date.now();
153
+ const stalenessMs = Number.isFinite(opts.stalenessMs) ? opts.stalenessMs : DEFAULT_STALENESS_MS;
154
+ const out = {
155
+ repo: opts.repoName || null,
156
+ state: 'idle',
157
+ taskId: null,
158
+ subtasks: { total: 0, remaining: 0 },
159
+ lastHeartbeatAt: null,
160
+ lastSha: null,
161
+ phase: null
162
+ };
163
+ try {
164
+ if (!stateDir) return out;
165
+ const ready = _safeRead(pathNode.join(stateDir, 'ready.json')) || {};
166
+ const phaseData = _safeRead(pathNode.join(stateDir, 'workflow-phase.json')) || {};
167
+ const ledger = _safeRead(pathNode.join(stateDir, 'subtask-state.json'));
168
+ const counter = _safeRead(pathNode.join(stateDir, 'worker-continuation.json'));
169
+ const phase = typeof phaseData.phase === 'string' ? phaseData.phase : null;
170
+ out.phase = phase;
171
+
172
+ const inProgress = (ready.inProgress || [])[0] || null;
173
+
174
+ // Activity freshness: newest mtime of the files a working worker touches.
175
+ const lastActivityMs = Math.max(
176
+ _mtimeMs(pathNode.join(stateDir, 'workflow-phase.json')),
177
+ _mtimeMs(pathNode.join(stateDir, 'subtask-state.json')),
178
+ _mtimeMs(pathNode.join(stateDir, 'worker-continuation.json'))
179
+ );
180
+ if (lastActivityMs > 0) out.lastHeartbeatAt = new Date(lastActivityMs).toISOString();
181
+ const activityFresh = lastActivityMs > 0 && (now - lastActivityMs) < stalenessMs;
182
+
183
+ if (!inProgress) {
184
+ const recent = (ready.recentlyCompleted || [])[0] || null;
185
+ const completedTs = recent && recent.completedAt ? Date.parse(recent.completedAt) : NaN;
186
+ if (Number.isFinite(completedTs) && (now - completedTs) < stalenessMs) {
187
+ out.state = 'complete';
188
+ out.taskId = recent.id || null;
189
+ } else {
190
+ out.state = 'idle';
191
+ }
192
+ return out;
193
+ }
194
+
195
+ out.taskId = inProgress.id || null;
196
+ if (ledger && (!ledger.taskId || ledger.taskId === out.taskId) && Array.isArray(ledger.subtasks)) {
197
+ const open = ledger.subtasks.filter(s => s && (s.status === 'pending' || s.status === 'in_progress')).length;
198
+ out.subtasks = { total: ledger.subtasks.length, remaining: open };
199
+ }
200
+
201
+ const escalated = counter && counter.taskId === out.taskId && counter.escalated === true;
202
+ if (escalated) {
203
+ out.state = 'blocked';
204
+ } else if (ACTIVE_PHASES.has(phase)) {
205
+ out.state = activityFresh ? 'in-progress' : 'work-started';
206
+ } else {
207
+ // Picked up (in inProgress) but not yet in active-work phase.
208
+ out.state = 'ack-received';
209
+ }
210
+ return out;
211
+ } catch (_err) {
212
+ return out; // fail-open: never 500
213
+ }
214
+ }
215
+
118
216
  module.exports = {
119
217
  TASK_ID_PATTERN,
120
218
  DISPATCH_BODY_PATTERN,
121
219
  QUESTION_BODY_PATTERN,
122
220
  COMPLETION_BODY_PATTERN,
123
221
  tryRecordInboundDispatch,
124
- tryReconcileInboundCompletion
222
+ tryReconcileInboundCompletion,
223
+ computeWorkerStatus,
224
+ ACTIVE_PHASES,
225
+ DEFAULT_STALENESS_MS
125
226
  };
@@ -136,6 +136,33 @@ function reconcileDispatch(workspaceRoot, taskId, status, reason) {
136
136
  return null;
137
137
  }
138
138
 
139
+ /**
140
+ * Refresh a pending dispatch's deadline on a worker-progress heartbeat
141
+ * (epic-workspace-sustained-exec / S3). A worker grinding through a decomposed
142
+ * task across many turns would otherwise blow past expectedDeadline and be
143
+ * misclassified as a silent-halt. Each heartbeat pushes the deadline out and
144
+ * records lastHeartbeatAt. Keeps status 'pending'. Returns the record or null.
145
+ *
146
+ * @param {string} workspaceRoot
147
+ * @param {string} taskId
148
+ * @param {number} [extendMs=DEFAULT_DURATION_MS]
149
+ */
150
+ function refreshDispatchDeadline(workspaceRoot, taskId, extendMs) {
151
+ const state = loadState(workspaceRoot);
152
+ const ms = Number.isFinite(extendMs) && extendMs > 0 ? extendMs : DEFAULT_DURATION_MS;
153
+ for (let i = state.dispatches.length - 1; i >= 0; i--) {
154
+ const r = state.dispatches[i];
155
+ if (r && r.taskId === taskId && r.status === 'pending') {
156
+ r.lastHeartbeatAt = new Date().toISOString();
157
+ r.expectedDeadline = new Date(Date.now() + ms).toISOString();
158
+ r.heartbeatCount = (r.heartbeatCount || 0) + 1;
159
+ saveState(workspaceRoot, state);
160
+ return r;
161
+ }
162
+ }
163
+ return null;
164
+ }
165
+
139
166
  /**
140
167
  * Read all currently-active dispatch records (not archived).
141
168
  *
@@ -306,6 +333,7 @@ module.exports = {
306
333
  MAX_ACTIVE,
307
334
  recordDispatch,
308
335
  reconcileDispatch,
336
+ refreshDispatchDeadline,
309
337
  readDispatches,
310
338
  getOverdueDispatches,
311
339
  attachCompletionSummary,
@@ -25,6 +25,10 @@ const MESSAGE_TYPES = [
25
25
  'task-complete', // "I finished my side of feature Z"
26
26
  'worker-stopped', // Graceful Stop hook — worker session ending, not necessarily at task completion
27
27
  'worker-ready', // Fresh worker session with empty queue — "got anything for me?" (wf-restart-handoff)
28
+ 'worker-progress', // Heartbeat on a forced continuation — work ongoing, NOT a stop (epic-workspace-sustained-exec S3)
29
+ 'worker-blocked', // Escalation: gate hit a cap / no-progress / validation failure — needs manager (S2/S3)
30
+ 'worker-idle', // Real terminal stop: nothing in progress and nothing queued (S3)
31
+ 'worker-awaiting-approval', // Spec written, in spec_review — waiting on manager GO, NOT done (S3)
28
32
  'needs-help', // "I'm stuck, can you check X on your side?"
29
33
  'heads-up', // "I'm about to change Y, just FYI"
30
34
  'impact-query', // Pre-dev: "I'm about to change X, will this break you?"
@@ -96,6 +100,32 @@ function createMessage({ from, to, type, subject, body, priority, diff, suggeste
96
100
  // Message Persistence (Criterion 2 — lifecycle)
97
101
  // ============================================================
98
102
 
103
+ /**
104
+ * Atomically write a JSON file: tmp + fsync(file) + rename (+ best-effort dir
105
+ * fsync). Guarantees a concurrent reader sees old-or-new, never torn JSON, and
106
+ * survives the SIGTERM/relaunch boundary. (epic-workspace-sustained-exec / S3 —
107
+ * the manager was reading partial worker→manager messages off the bus.)
108
+ * @param {string} filePath
109
+ * @param {string} data
110
+ */
111
+ function atomicWriteFile(filePath, data) {
112
+ const dir = path.dirname(filePath);
113
+ fs.mkdirSync(dir, { recursive: true });
114
+ const tmp = `${filePath}.tmp.${process.pid}.${Math.random().toString(36).slice(2, 8)}`;
115
+ const fd = fs.openSync(tmp, 'w');
116
+ try {
117
+ fs.writeSync(fd, data);
118
+ fs.fsyncSync(fd);
119
+ } finally {
120
+ fs.closeSync(fd);
121
+ }
122
+ fs.renameSync(tmp, filePath);
123
+ try {
124
+ const dfd = fs.openSync(dir, 'r');
125
+ try { fs.fsyncSync(dfd); } finally { fs.closeSync(dfd); }
126
+ } catch (_err) { /* directory fsync best-effort (not supported on all FS) */ }
127
+ }
128
+
99
129
  /**
100
130
  * Save a message to the workspace message bus
101
131
  * @param {string} workspaceRoot
@@ -104,10 +134,8 @@ function createMessage({ from, to, type, subject, body, priority, diff, suggeste
104
134
  */
105
135
  function saveMessage(workspaceRoot, message) {
106
136
  const messagesDir = path.join(workspaceRoot, '.workspace', 'messages');
107
- fs.mkdirSync(messagesDir, { recursive: true });
108
-
109
137
  const filePath = path.join(messagesDir, `${message.id}.json`);
110
- fs.writeFileSync(filePath, JSON.stringify(message, null, 2));
138
+ atomicWriteFile(filePath, JSON.stringify(message, null, 2));
111
139
  return filePath;
112
140
  }
113
141
 
@@ -178,7 +206,7 @@ function updateMessageStatus(workspaceRoot, messageId, newStatus, extra = {}) {
178
206
  }
179
207
  }
180
208
  }
181
- fs.writeFileSync(filePath, JSON.stringify(message, null, 2));
209
+ atomicWriteFile(filePath, JSON.stringify(message, null, 2));
182
210
  return message;
183
211
  } catch (_err) {
184
212
  return null;