wogiflow 2.33.0 → 2.34.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/.workflow/templates/partials/methodology-rules.hbs +3 -1
  2. package/lib/scheduled-mode.js +12 -15
  3. package/lib/skill-export-claude-plugin.js +41 -1
  4. package/lib/skill-portability.js +21 -3
  5. package/lib/workspace-channel-server.js +116 -3
  6. package/lib/workspace-channel-tracking.js +102 -1
  7. package/lib/workspace-dispatch-tracking.js +28 -0
  8. package/lib/workspace-messages.js +32 -4
  9. package/lib/workspace-subtask-state.js +215 -0
  10. package/lib/workspace.js +81 -0
  11. package/package.json +2 -2
  12. package/scripts/flow +17 -0
  13. package/scripts/flow-constants.js +3 -1
  14. package/scripts/flow-io.js +17 -0
  15. package/scripts/flow-paths.js +81 -0
  16. package/scripts/flow-schedule.js +23 -6
  17. package/scripts/flow-scheduled-runner.js +53 -8
  18. package/scripts/flow-standards-checker.js +37 -0
  19. package/scripts/flow-utils.js +2 -0
  20. package/scripts/hooks/adapters/claude-code.js +6 -2
  21. package/scripts/hooks/core/git-safety-gate.js +34 -15
  22. package/scripts/hooks/core/long-input-enforcement.js +49 -39
  23. package/scripts/hooks/core/overdue-dispatches.js +28 -6
  24. package/scripts/hooks/core/phase-gate.js +34 -5
  25. package/scripts/hooks/core/phase-read-gate.js +62 -10
  26. package/scripts/hooks/core/session-start-worker.js +52 -0
  27. package/scripts/hooks/core/stop-orchestrator.js +17 -2
  28. package/scripts/hooks/core/validation.js +8 -0
  29. package/scripts/hooks/core/worker-continuation-gate.js +487 -0
  30. package/scripts/hooks/core/workspace-stop-gates.js +21 -0
  31. package/scripts/hooks/core/workspace-stop-notify.js +174 -59
  32. package/scripts/hooks/entry/claude-code/post-tool-use.js +26 -0
  33. package/.claude/rules/README.md +0 -36
  34. package/.claude/rules/_internal/README.md +0 -64
  35. package/.claude/rules/_internal/document-structure.md +0 -77
  36. package/.claude/rules/_internal/dual-repo-management.md +0 -174
  37. package/.claude/rules/_internal/feature-refactoring-cleanup.md +0 -87
  38. package/.claude/rules/_internal/github-releases.md +0 -71
  39. package/.claude/rules/_internal/model-management.md +0 -35
  40. package/.claude/rules/_internal/self-maintenance.md +0 -87
  41. package/.claude/rules/_internal/worker-tool-first-turn.md +0 -82
  42. package/.claude/rules/alternative-execpolicy-toml-command-policy.md +0 -11
  43. package/.claude/rules/alternative-hand-edit-ready-json-to-register-orpha.md +0 -11
  44. package/.claude/rules/alternative-hook-args-exec-form.md +0 -6
  45. package/.claude/rules/alternative-permission-ruleset-per-phase.md +0 -11
  46. package/.claude/rules/alternative-short-name.md +0 -12
  47. package/.claude/rules/alternative-wogi-flow-as-mcp-client-oauth-manager.md +0 -11
  48. package/.claude/rules/architecture/component-reuse.md +0 -38
  49. package/.claude/rules/architecture/hook-three-layer.md +0 -68
  50. package/.claude/rules/code-style/naming-conventions.md +0 -107
  51. package/.claude/rules/dual-repo-architecture-2026-02-28.md +0 -18
  52. package/.claude/rules/github-release-workflow-2026-01-30.md +0 -16
  53. package/.claude/rules/operations/git-workflows.md +0 -92
  54. package/.claude/rules/operations/scratch-directory.md +0 -54
  55. package/.claude/skills/figma-analyzer/knowledge/learnings.md +0 -11
  56. package/.workflow/specs/architecture.md.template +0 -24
  57. package/.workflow/specs/stack.md.template +0 -33
  58. package/.workflow/specs/testing.md.template +0 -36
@@ -48,10 +48,12 @@ All fail-open. Bypass for tests via `--skip-gates`. Config: `storyFlow.*`.
48
48
 
49
49
  - **Tool-First Turn**: every turn after `UserPromptSubmit` must contain ≥1 tool call. In strict mode (default), the first content block must be `tool_use`. Pure-text responses are invisible to the user.
50
50
  - **Three-State End-of-Turn**: exactly one of ACTION (`/wogi-start <nextId>`), ESCALATION (channel-dispatch `## QUESTION:`), or IDLE.
51
+ - **Never idle while in-progress**: IDLE is valid ONLY when nothing is in-progress. If a task is in-progress and you hit an approval / phase-read / architect / research gate, that is NOT a stopping point — PROCEED by satisfying the gate legitimately (read the phase doc, decompose, provide evidence; autonomous = pre-approved), or ESCALATE via channel. The Stop-hook continuation gate drives a proceed-or-escalate continuation and auto-escalates to the manager after repeated no-progress turns.
52
+ - **Gate circumvention is PROHIBITED**: never create a git worktree to reach an "ungated" context, never hand-write gate-satisfying markers, never edit `.workflow/state` files to fake gate satisfaction, never change directory to dodge a gate. Gates resolve phase from the canonical main-repo state, not your cwd — circumvention is both forbidden and ineffective.
51
53
  - **Hedging forbidden**: "awaiting your signal", "let me know", "standing by", "should I continue".
52
54
  - **No direct user prompts**: `AskUserQuestion` is blocked; questions go through channel dispatch.
53
55
 
54
- Enforced by: `worker-tool-first-gate.js` (G1/G4/Gap B), `worker-boundary-gate.js`, `flow-worker-question-classifier.js`. Config: `workspace.toolFirstTurnGate.{enabled,strict}`, `workspace.blockAskUserQuestionInWorker`, `workspace.aiWorkerQuestionClassifier.*`. Long-form: `.claude/rules/_internal/worker-tool-first-turn.md`.
56
+ Enforced by: `worker-tool-first-gate.js` (G1/G4/Gap B), `worker-continuation-gate.js` (in-progress stall fallback — RC1, wf-e5e57361), `worker-boundary-gate.js`, `flow-worker-question-classifier.js`; phase resolved worktree-stably via `getCanonicalStateDir()` with fail-closed for in-progress tasks (RC2). Config: `workspace.toolFirstTurnGate.{enabled,strict}`, `workspace.continuationGate.*`, `workspace.blockAskUserQuestionInWorker`, `workspace.aiWorkerQuestionClassifier.*`. Long-form: `.claude/rules/_internal/worker-tool-first-turn.md`.
55
57
 
56
58
  ---
57
59
 
@@ -244,10 +244,15 @@ function enforceTokenBudget(usageLog, dailyBudget, now, jobName, estimatedTokens
244
244
  }
245
245
  const key = d.toISOString().slice(0, 10); // YYYY-MM-DD
246
246
  const dayLog = (usageLog && usageLog[key]) || {};
247
- const usedToday = Object.values(dayLog).reduce((a, b) => a + (Number(b) || 0), 0);
247
+ // F17 (R-379): use explicit Number.isFinite guard so a legitimate 0 isn't
248
+ // collapsed by `|| 0` falsy-fallthrough (per naming-conventions.md).
249
+ const usedToday = Object.values(dayLog).reduce(
250
+ (a, b) => a + (Number.isFinite(Number(b)) ? Number(b) : 0),
251
+ 0
252
+ );
248
253
  const estimated = Number.isFinite(estimatedTokens)
249
254
  ? estimatedTokens
250
- : (DEFAULT_TOKENS_PER_INVOCATION[jobName] || 0);
255
+ : (DEFAULT_TOKENS_PER_INVOCATION[jobName] ?? 0);
251
256
  const projectedAfter = usedToday + estimated;
252
257
  if (!Number.isFinite(dailyBudget) || dailyBudget <= 0) {
253
258
  return {
@@ -344,18 +349,11 @@ function isTransientError(err) {
344
349
  return false;
345
350
  }
346
351
 
347
- /**
348
- * Compute the cron-friendly "yesterday" anchor for `git diff @{yesterday}..HEAD`.
349
- * Returns the ISO date 24h ago.
350
- *
351
- * @param {Date|number|string} [now]
352
- * @returns {string}
353
- */
354
- function yesterdayIsoDate(now = Date.now()) {
355
- const d = new Date(now);
356
- d.setUTCDate(d.getUTCDate() - 1);
357
- return d.toISOString();
358
- }
352
+ // F20 (R-379): removed `yesterdayIsoDate(now)` — exported but never imported
353
+ // anywhere in scope. The runner uses `git log --since="24 hours ago"` for
354
+ // CI-portability reasons (shallow checkouts don't have reflog state for
355
+ // `@{yesterday}`), and no other consumer wants the ISO-date form. Removed
356
+ // to avoid a maintenance-trap export. Re-add if a real consumer materializes.
359
357
 
360
358
  module.exports = {
361
359
  JOB_NAMES,
@@ -373,5 +371,4 @@ module.exports = {
373
371
  updateDedupIssue,
374
372
  validateModelName,
375
373
  isTransientError,
376
- yesterdayIsoDate,
377
374
  };
@@ -39,6 +39,41 @@ const { parseFrontmatter } = require('./skill-portability');
39
39
  const { listBundleFiles } = require('./skill-export-agentskills');
40
40
 
41
41
  const DEFAULT_LICENSE = 'MIT';
42
+
43
+ /**
44
+ * Sanitize a skill name for use in path construction.
45
+ *
46
+ * F9 (R-379): the previous code passed `frontmatter.name` directly into
47
+ * `path.join(outDir, \`skills/${name}/...\`)` — a malicious skill with
48
+ * `name: ../../../etc` would escape the output directory.
49
+ *
50
+ * Strategy: strip path separators and `..` sequences (replace with safe
51
+ * placeholders so we still produce a useful name), reject empty results,
52
+ * and reject names that try to be hidden (.dotfile) or absolute (`/foo`).
53
+ *
54
+ * @param {string} raw
55
+ * @returns {string} sanitized name
56
+ * @throws {Error} if the name is empty after sanitization or otherwise
57
+ * unrecoverable.
58
+ */
59
+ function sanitizePluginName(raw) {
60
+ if (typeof raw !== 'string' || !raw.trim()) {
61
+ throw new Error('sanitizePluginName: name must be a non-empty string');
62
+ }
63
+ let s = raw
64
+ .replace(/[/\\]/g, '-') // path separators → dash
65
+ .replace(/\.\./g, '--') // .. sequences → dash-dash
66
+ .replace(/\0/g, ''); // strip nulls just in case
67
+ // Reject leading dot (would create hidden directory) and leading dash
68
+ // (would look like a CLI flag in some contexts).
69
+ s = s.replace(/^[.\-]+/, '');
70
+ // Trim again after substitutions
71
+ s = s.trim();
72
+ if (!s) {
73
+ throw new Error(`sanitizePluginName: name "${raw}" sanitizes to empty`);
74
+ }
75
+ return s;
76
+ }
42
77
  const DEFAULT_AUTHOR = 'wogiflow';
43
78
 
44
79
  /**
@@ -89,7 +124,12 @@ function exportToClaudePlugin(skillDir, opts = {}) {
89
124
  frontmatter = parseFrontmatter(safeReadFile(skillMdPath));
90
125
  }
91
126
 
92
- const name = opts.name ?? frontmatter.name ?? path.basename(skillDir);
127
+ // F9 (R-379): sanitize `name` before using it in path construction. A
128
+ // skill author who sets `name: ../../../etc` in frontmatter could escape
129
+ // the output directory via `path.join(outDir, 'skills/../../../etc/SKILL.md')`.
130
+ // Strip path separators and `..` sequences; reject empty/dotfile names.
131
+ const rawName = opts.name ?? frontmatter.name ?? path.basename(skillDir);
132
+ const name = sanitizePluginName(rawName);
93
133
  const version = opts.version ?? frontmatter.version ?? '0.0.0';
94
134
  const description = frontmatter.description ?? '';
95
135
  const license = frontmatter.license ?? DEFAULT_LICENSE;
@@ -50,8 +50,13 @@ const BLOCKER_PATTERNS = [
50
50
  { pattern: /\bflow-utils\b/, label: 'flow-utils import/reference' },
51
51
  { pattern: /require\(['"][^'"]*\/scripts\/flow[-/]/, label: 'WogiFlow scripts/ require()' },
52
52
  { pattern: /from\s+['"][^'"]*\/scripts\/flow[-/]/, label: 'WogiFlow scripts/ import' },
53
- // Slash-command invocations (any /wogi-* with a word char after)
54
- { pattern: /\/wogi-[a-z][a-z0-9-]*/i, label: '/wogi-* slash command' },
53
+ // Slash-command invocations (any /wogi-* with a word char after).
54
+ // F7 (R-379): require a lookbehind for start-of-line, whitespace, or
55
+ // quote/bracket — so legitimate file paths like
56
+ // `.claude/skills/wogi-start/skill.md` or `/workflows/wogi-status` don't
57
+ // trip a false-positive blocker. Lookbehind (not capturing group) so the
58
+ // matched substring is the slash-command itself, e.g. `/wogi-finalize`.
59
+ { pattern: /(?<=^|[\s`'"(\[])\/wogi-[a-z][a-z0-9-]*\b/im, label: '/wogi-* slash command' },
55
60
  // Shell invocations of the local flow CLI
56
61
  { pattern: /\.\/scripts\/flow\b/, label: 'local ./scripts/flow CLI call' },
57
62
  { pattern: /\bflow\s+(?:wogi-|skill\s+|story\s+|start\s+|status\b|ready\b|finalize\b)/, label: 'flow CLI subcommand specific to WogiFlow' },
@@ -237,7 +242,12 @@ function assessSkillPortability(skillDir, opts = {}) {
237
242
  });
238
243
  }
239
244
 
240
- // Explicit author declaration: portable: false short-circuits scanning.
245
+ // Explicit author declaration. F14 (R-379): previously the comment claimed
246
+ // `portable: false` "short-circuits scanning" — but there was no early
247
+ // return; the function scanned anyway, producing a needlessly long blocker
248
+ // list for skills the author already marked non-portable. Short-circuit
249
+ // now matches the comment: return early so the caller gets a single,
250
+ // clear blocker ("author opted out") instead of dozens of pattern hits.
241
251
  const declaredPortable = typeof manifest.portable === 'string'
242
252
  ? manifest.portable.toLowerCase() === 'true'
243
253
  : null;
@@ -248,6 +258,14 @@ function assessSkillPortability(skillDir, opts = {}) {
248
258
  match: 'portable: false',
249
259
  label: 'manifest declares portable: false',
250
260
  });
261
+ // Short-circuit: author opted out, no need to enumerate every pattern hit.
262
+ return {
263
+ portable: false,
264
+ blockers,
265
+ manifest,
266
+ scannedFiles: [],
267
+ skillMdPath,
268
+ };
251
269
  }
252
270
 
253
271
  // Compose pattern list: builtin + extras.
@@ -22,6 +22,21 @@ const http = require('node:http');
22
22
  const readline = require('node:readline');
23
23
  const { safeJsonParseContent } = require('./utils');
24
24
 
25
+ // S5 (wf-ee87a24e): the version this long-lived server process loaded at boot.
26
+ // Compared against the on-disk package.json to detect a mid-session
27
+ // `npm i wogiflow@latest` that left this process running stale code.
28
+ const SERVER_VERSION = (() => {
29
+ try { return require('../package.json').version || null; } catch (_err) { return null; }
30
+ })();
31
+ function readDiskVersion() {
32
+ try {
33
+ const fs = require('node:fs');
34
+ const pkgPath = require('node:path').join(__dirname, '..', 'package.json');
35
+ const raw = fs.readFileSync(pkgPath, 'utf-8'); // fresh read, bypasses require cache
36
+ return JSON.parse(raw).version || null;
37
+ } catch (_err) { return null; }
38
+ }
39
+
25
40
  // ============================================================
26
41
  // Constants
27
42
  // ============================================================
@@ -129,8 +144,21 @@ When you receive a message:
129
144
  2. If it's a question or investigation request → do the work, then ALWAYS send results back
130
145
  3. If it's a status check → respond with your current task status
131
146
 
132
- CRITICALALWAYS REPLY TO THE MANAGER:
133
- After completing ANY work triggered by a channel message, you MUST send results back using the workspace_send_message tool with to: "manager". The user only sees the manager terminal if you don't reply, they never see your results.
147
+ SUSTAINED EXECUTION a task dispatch runs to COMPLETION across turns:
148
+ A "/wogi-" dispatch (especially one you decompose into sub-tasks) is NOT a one-turn request. Work through ALL sub-tasks in the same session; the Stop hook's continuation gate will keep you going while the task is in-progress with work remaining. Do NOT stop to "report progress" mid-task only reply when the task is COMPLETE or you are ESCALATING a blocker.
149
+
150
+ NEVER IDLE WHILE A TASK IS IN-PROGRESS: If you hit an approval / phase-read / architect / research gate, that is NOT a stopping point. Exactly one of these must happen:
151
+ • PROCEED by SATISFYING the gate legitimately — read the required phase doc, decompose the task, provide the required evidence. In autonomous mode you are PRE-APPROVED; do not wait for approval that no one is there to give.
152
+ • ESCALATE to the manager via channel ("## QUESTION: <blocker>") if you genuinely need the manager/user, then end the turn.
153
+
154
+ GATE CIRCUMVENTION IS PROHIBITED (and pointless — gates resolve phase from the canonical main-repo state, not your working directory):
155
+ ✗ Do NOT create a git worktree to reach an "ungated" context.
156
+ ✗ Do NOT hand-write gate-satisfying markers, or edit .workflow/state files, to fake gate satisfaction.
157
+ ✗ Do NOT change directory to dodge a gate.
158
+ A blocked tool call is an instruction to satisfy the gate, never a puzzle to route around.
159
+
160
+ CRITICAL — REPLY TO THE MANAGER WHEN THE TASK IS DONE OR BLOCKED:
161
+ When the dispatched task is complete (or you must escalate), you MUST send results back using the workspace_send_message tool with to: "manager". The user only sees the manager terminal — if you don't reply, they never see your results.
134
162
 
135
163
  Example: workspace_send_message(to: "manager", message: "## Investigation Results\\n\\n1. Found the bug in X\\n2. Root cause: Y\\n3. Fix: Z")
136
164
 
@@ -466,18 +494,60 @@ function broadcastSSE(event) {
466
494
 
467
495
  const channelTracking = require('./workspace-channel-tracking');
468
496
 
497
+ // S4 (wf-87611c5e): the channel server is the only process that sees every
498
+ // inbound dispatch, so it owns the "ack-received" timestamp used by GET /status.
499
+ let _lastInboundAt = 0;
500
+ const STATUS_STALENESS_MS = (() => {
501
+ const raw = parseInt(process.env.WOGI_STATUS_STALENESS_MS || '', 10);
502
+ return Number.isFinite(raw) && raw > 0 ? raw : 300000;
503
+ })();
504
+
469
505
  // ============================================================
470
506
  // HTTP Server
471
507
  // ============================================================
472
508
 
473
509
  const server = http.createServer(async (req, res) => {
474
- // Health check — minimal info, no topology exposure
510
+ // Health check — minimal info, no topology exposure. PURE liveness: "the
511
+ // server process is up." Says nothing about whether the agent is working —
512
+ // use /status for that (S4).
475
513
  if (req.method === 'GET' && req.url === '/health') {
476
514
  res.writeHead(200, { 'Content-Type': 'application/json' });
477
515
  res.end(JSON.stringify({ status: 'ok', repo: REPO_NAME, port: PORT }));
478
516
  return;
479
517
  }
480
518
 
519
+ // Activity status (S4 / wf-87611c5e) — the real execution state, so a manager
520
+ // can never mistake a channel POST `ok` for "work happening". Derived from the
521
+ // worker's own state files + the last inbound dispatch this server saw.
522
+ if (req.method === 'GET' && req.url === '/status') {
523
+ let body;
524
+ try {
525
+ const path = require('node:path');
526
+ const stateDir = path.join(process.cwd(), '.workflow', 'state');
527
+ body = channelTracking.computeWorkerStatus({
528
+ stateDir,
529
+ repoName: REPO_NAME,
530
+ lastInboundAt: _lastInboundAt || undefined,
531
+ stalenessMs: STATUS_STALENESS_MS
532
+ });
533
+ body.port = PORT;
534
+ // S5: version-drift signal — if the on-disk wogiflow differs from what this
535
+ // long-lived server loaded, a `flow workspace restart` is required to load it.
536
+ const diskVersion = readDiskVersion();
537
+ body.serverVersion = SERVER_VERSION;
538
+ body.diskVersion = diskVersion;
539
+ body.versionDrift = Boolean(SERVER_VERSION && diskVersion && SERVER_VERSION !== diskVersion);
540
+ if (body.versionDrift) {
541
+ body.restartRequired = `Server is running ${SERVER_VERSION} but ${diskVersion} is on disk — run 'flow workspace restart ${REPO_NAME}'`;
542
+ }
543
+ } catch (_err) {
544
+ body = { repo: REPO_NAME, port: PORT, state: 'unknown' };
545
+ }
546
+ res.writeHead(200, { 'Content-Type': 'application/json' });
547
+ res.end(JSON.stringify(body));
548
+ return;
549
+ }
550
+
481
551
  // SSE endpoint for event subscriptions
482
552
  if (req.method === 'GET' && req.url?.startsWith('/events')) {
483
553
  const lastEventId = req.headers['last-event-id'] || '';
@@ -485,6 +555,44 @@ const server = http.createServer(async (req, res) => {
485
555
  return;
486
556
  }
487
557
 
558
+ // Manager-triggered restart (S5 / wf-ee87a24e). Writes the wogi-claude
559
+ // wrapper's restart flag and SIGTERMs this server's parent (the claude
560
+ // process). The wrapper relaunches claude with a FRESH require cache —
561
+ // reloading any upgraded wogiflow code, and claude respawns this MCP server.
562
+ // No PID tracking needed; reuses the proven task-boundary restart loop.
563
+ if (req.method === 'POST' && (req.url === '/restart' || req.url === '/control/restart')) {
564
+ const rawFrom = req.headers['x-wogi-from'] || '';
565
+ // localhost-bound already; additionally require the manager as sender.
566
+ if (rawFrom && rawFrom !== 'manager' && rawFrom !== 'workspace-manager') {
567
+ res.writeHead(403, { 'Content-Type': 'application/json' });
568
+ res.end(JSON.stringify({ ok: false, error: 'restart may only be triggered by the manager' }));
569
+ return;
570
+ }
571
+ let scheduled = false;
572
+ try {
573
+ const fs = require('node:fs');
574
+ const nodePath = require('node:path');
575
+ const flagPath = process.env.WOGI_RESTART_FLAG ||
576
+ nodePath.join(process.cwd(), '.workflow', 'state', 'restart-requested');
577
+ fs.mkdirSync(nodePath.dirname(flagPath), { recursive: true });
578
+ fs.writeFileSync(flagPath, JSON.stringify({
579
+ version: 1, reason: 'manager-restart', repo: REPO_NAME,
580
+ triggeredAt: new Date().toISOString()
581
+ }, null, 2));
582
+ // Defer the SIGTERM briefly so the HTTP response flushes first.
583
+ const ppid = process.ppid;
584
+ setTimeout(() => { try { process.kill(ppid, 'SIGTERM'); } catch (_err) { /* parent gone */ } }, 150);
585
+ scheduled = true;
586
+ } catch (err) {
587
+ res.writeHead(500, { 'Content-Type': 'application/json' });
588
+ res.end(JSON.stringify({ ok: false, error: err.message }));
589
+ return;
590
+ }
591
+ res.writeHead(202, { 'Content-Type': 'application/json' });
592
+ res.end(JSON.stringify({ ok: true, scheduled, repo: REPO_NAME, note: 'worker restarting; channel server will respawn with fresh code' }));
593
+ return;
594
+ }
595
+
488
596
  // Receive webhook (POST)
489
597
  if (req.method === 'POST') {
490
598
  const { body, truncated } = await collectBody(req, MAX_BODY_BYTES);
@@ -508,6 +616,11 @@ const server = http.createServer(async (req, res) => {
508
616
  cleanBody = body.substring(effortMatch[0].length);
509
617
  }
510
618
 
619
+ // S4: record when a dispatch arrived (ack-received signal for /status).
620
+ if (channelTracking.DISPATCH_BODY_PATTERN.test(cleanBody)) {
621
+ _lastInboundAt = Date.now();
622
+ }
623
+
511
624
  // Forward as channel notification to Claude Code
512
625
  const meta = {
513
626
  from,
@@ -115,11 +115,112 @@ function tryReconcileInboundCompletion(ctx, tracking) {
115
115
  }
116
116
  }
117
117
 
118
+ // ============================================================
119
+ // Worker activity status (epic-workspace-sustained-exec / S4, wf-87611c5e)
120
+ // ============================================================
121
+
122
+ const fsNode = require('node:fs');
123
+ const pathNode = require('node:path');
124
+
125
+ const ACTIVE_PHASES = new Set(['coding', 'validating']);
126
+ const DEFAULT_STALENESS_MS = 300000; // 5 min
127
+
128
+ function _safeRead(p) {
129
+ try { return JSON.parse(fsNode.readFileSync(p, 'utf-8')); } catch (_err) { return null; }
130
+ }
131
+ function _mtimeMs(p) {
132
+ try { return fsNode.statSync(p).mtimeMs; } catch (_err) { return 0; }
133
+ }
134
+
135
+ /**
136
+ * Derive the worker's real execution state for GET /status. Distinguishes
137
+ * ack-received / work-started / in-progress / complete / blocked / idle so the
138
+ * manager can never mistake a channel POST `ok` (or `/health` ok) for progress.
139
+ *
140
+ * Pure-ish (reads files from stateDir); injectable for tests.
141
+ *
142
+ * @param {Object} opts
143
+ * @param {string} opts.stateDir worker .workflow/state dir
144
+ * @param {string} [opts.repoName]
145
+ * @param {number} [opts.lastInboundAt] ms epoch of the last dispatch POST the server saw
146
+ * @param {number} [opts.stalenessMs] heartbeat freshness window
147
+ * @param {number} [opts.now]
148
+ * @returns {{repo, state, taskId, subtasks:{total,remaining}, lastHeartbeatAt, lastSha, phase}}
149
+ */
150
+ function computeWorkerStatus(opts = {}) {
151
+ const stateDir = opts.stateDir;
152
+ const now = opts.now || Date.now();
153
+ const stalenessMs = Number.isFinite(opts.stalenessMs) ? opts.stalenessMs : DEFAULT_STALENESS_MS;
154
+ const out = {
155
+ repo: opts.repoName || null,
156
+ state: 'idle',
157
+ taskId: null,
158
+ subtasks: { total: 0, remaining: 0 },
159
+ lastHeartbeatAt: null,
160
+ lastSha: null,
161
+ phase: null
162
+ };
163
+ try {
164
+ if (!stateDir) return out;
165
+ const ready = _safeRead(pathNode.join(stateDir, 'ready.json')) || {};
166
+ const phaseData = _safeRead(pathNode.join(stateDir, 'workflow-phase.json')) || {};
167
+ const ledger = _safeRead(pathNode.join(stateDir, 'subtask-state.json'));
168
+ const counter = _safeRead(pathNode.join(stateDir, 'worker-continuation.json'));
169
+ const phase = typeof phaseData.phase === 'string' ? phaseData.phase : null;
170
+ out.phase = phase;
171
+
172
+ const inProgress = (ready.inProgress || [])[0] || null;
173
+
174
+ // Activity freshness: newest mtime of the files a working worker touches.
175
+ const lastActivityMs = Math.max(
176
+ _mtimeMs(pathNode.join(stateDir, 'workflow-phase.json')),
177
+ _mtimeMs(pathNode.join(stateDir, 'subtask-state.json')),
178
+ _mtimeMs(pathNode.join(stateDir, 'worker-continuation.json'))
179
+ );
180
+ if (lastActivityMs > 0) out.lastHeartbeatAt = new Date(lastActivityMs).toISOString();
181
+ const activityFresh = lastActivityMs > 0 && (now - lastActivityMs) < stalenessMs;
182
+
183
+ if (!inProgress) {
184
+ const recent = (ready.recentlyCompleted || [])[0] || null;
185
+ const completedTs = recent && recent.completedAt ? Date.parse(recent.completedAt) : NaN;
186
+ if (Number.isFinite(completedTs) && (now - completedTs) < stalenessMs) {
187
+ out.state = 'complete';
188
+ out.taskId = recent.id || null;
189
+ } else {
190
+ out.state = 'idle';
191
+ }
192
+ return out;
193
+ }
194
+
195
+ out.taskId = inProgress.id || null;
196
+ if (ledger && (!ledger.taskId || ledger.taskId === out.taskId) && Array.isArray(ledger.subtasks)) {
197
+ const open = ledger.subtasks.filter(s => s && (s.status === 'pending' || s.status === 'in_progress')).length;
198
+ out.subtasks = { total: ledger.subtasks.length, remaining: open };
199
+ }
200
+
201
+ const escalated = counter && counter.taskId === out.taskId && counter.escalated === true;
202
+ if (escalated) {
203
+ out.state = 'blocked';
204
+ } else if (ACTIVE_PHASES.has(phase)) {
205
+ out.state = activityFresh ? 'in-progress' : 'work-started';
206
+ } else {
207
+ // Picked up (in inProgress) but not yet in active-work phase.
208
+ out.state = 'ack-received';
209
+ }
210
+ return out;
211
+ } catch (_err) {
212
+ return out; // fail-open: never 500
213
+ }
214
+ }
215
+
118
216
  module.exports = {
119
217
  TASK_ID_PATTERN,
120
218
  DISPATCH_BODY_PATTERN,
121
219
  QUESTION_BODY_PATTERN,
122
220
  COMPLETION_BODY_PATTERN,
123
221
  tryRecordInboundDispatch,
124
- tryReconcileInboundCompletion
222
+ tryReconcileInboundCompletion,
223
+ computeWorkerStatus,
224
+ ACTIVE_PHASES,
225
+ DEFAULT_STALENESS_MS
125
226
  };
@@ -136,6 +136,33 @@ function reconcileDispatch(workspaceRoot, taskId, status, reason) {
136
136
  return null;
137
137
  }
138
138
 
139
+ /**
140
+ * Refresh a pending dispatch's deadline on a worker-progress heartbeat
141
+ * (epic-workspace-sustained-exec / S3). A worker grinding through a decomposed
142
+ * task across many turns would otherwise blow past expectedDeadline and be
143
+ * misclassified as a silent-halt. Each heartbeat pushes the deadline out and
144
+ * records lastHeartbeatAt. Keeps status 'pending'. Returns the record or null.
145
+ *
146
+ * @param {string} workspaceRoot
147
+ * @param {string} taskId
148
+ * @param {number} [extendMs=DEFAULT_DURATION_MS]
149
+ */
150
+ function refreshDispatchDeadline(workspaceRoot, taskId, extendMs) {
151
+ const state = loadState(workspaceRoot);
152
+ const ms = Number.isFinite(extendMs) && extendMs > 0 ? extendMs : DEFAULT_DURATION_MS;
153
+ for (let i = state.dispatches.length - 1; i >= 0; i--) {
154
+ const r = state.dispatches[i];
155
+ if (r && r.taskId === taskId && r.status === 'pending') {
156
+ r.lastHeartbeatAt = new Date().toISOString();
157
+ r.expectedDeadline = new Date(Date.now() + ms).toISOString();
158
+ r.heartbeatCount = (r.heartbeatCount || 0) + 1;
159
+ saveState(workspaceRoot, state);
160
+ return r;
161
+ }
162
+ }
163
+ return null;
164
+ }
165
+
139
166
  /**
140
167
  * Read all currently-active dispatch records (not archived).
141
168
  *
@@ -306,6 +333,7 @@ module.exports = {
306
333
  MAX_ACTIVE,
307
334
  recordDispatch,
308
335
  reconcileDispatch,
336
+ refreshDispatchDeadline,
309
337
  readDispatches,
310
338
  getOverdueDispatches,
311
339
  attachCompletionSummary,
@@ -25,6 +25,10 @@ const MESSAGE_TYPES = [
25
25
  'task-complete', // "I finished my side of feature Z"
26
26
  'worker-stopped', // Graceful Stop hook — worker session ending, not necessarily at task completion
27
27
  'worker-ready', // Fresh worker session with empty queue — "got anything for me?" (wf-restart-handoff)
28
+ 'worker-progress', // Heartbeat on a forced continuation — work ongoing, NOT a stop (epic-workspace-sustained-exec S3)
29
+ 'worker-blocked', // Escalation: gate hit a cap / no-progress / validation failure — needs manager (S2/S3)
30
+ 'worker-idle', // Real terminal stop: nothing in progress and nothing queued (S3)
31
+ 'worker-awaiting-approval', // Spec written, in spec_review — waiting on manager GO, NOT done (S3)
28
32
  'needs-help', // "I'm stuck, can you check X on your side?"
29
33
  'heads-up', // "I'm about to change Y, just FYI"
30
34
  'impact-query', // Pre-dev: "I'm about to change X, will this break you?"
@@ -96,6 +100,32 @@ function createMessage({ from, to, type, subject, body, priority, diff, suggeste
96
100
  // Message Persistence (Criterion 2 — lifecycle)
97
101
  // ============================================================
98
102
 
103
+ /**
104
+ * Atomically write a JSON file: tmp + fsync(file) + rename (+ best-effort dir
105
+ * fsync). Guarantees a concurrent reader sees old-or-new, never torn JSON, and
106
+ * survives the SIGTERM/relaunch boundary. (epic-workspace-sustained-exec / S3 —
107
+ * the manager was reading partial worker→manager messages off the bus.)
108
+ * @param {string} filePath
109
+ * @param {string} data
110
+ */
111
+ function atomicWriteFile(filePath, data) {
112
+ const dir = path.dirname(filePath);
113
+ fs.mkdirSync(dir, { recursive: true });
114
+ const tmp = `${filePath}.tmp.${process.pid}.${Math.random().toString(36).slice(2, 8)}`;
115
+ const fd = fs.openSync(tmp, 'w');
116
+ try {
117
+ fs.writeSync(fd, data);
118
+ fs.fsyncSync(fd);
119
+ } finally {
120
+ fs.closeSync(fd);
121
+ }
122
+ fs.renameSync(tmp, filePath);
123
+ try {
124
+ const dfd = fs.openSync(dir, 'r');
125
+ try { fs.fsyncSync(dfd); } finally { fs.closeSync(dfd); }
126
+ } catch (_err) { /* directory fsync best-effort (not supported on all FS) */ }
127
+ }
128
+
99
129
  /**
100
130
  * Save a message to the workspace message bus
101
131
  * @param {string} workspaceRoot
@@ -104,10 +134,8 @@ function createMessage({ from, to, type, subject, body, priority, diff, suggeste
104
134
  */
105
135
  function saveMessage(workspaceRoot, message) {
106
136
  const messagesDir = path.join(workspaceRoot, '.workspace', 'messages');
107
- fs.mkdirSync(messagesDir, { recursive: true });
108
-
109
137
  const filePath = path.join(messagesDir, `${message.id}.json`);
110
- fs.writeFileSync(filePath, JSON.stringify(message, null, 2));
138
+ atomicWriteFile(filePath, JSON.stringify(message, null, 2));
111
139
  return filePath;
112
140
  }
113
141
 
@@ -178,7 +206,7 @@ function updateMessageStatus(workspaceRoot, messageId, newStatus, extra = {}) {
178
206
  }
179
207
  }
180
208
  }
181
- fs.writeFileSync(filePath, JSON.stringify(message, null, 2));
209
+ atomicWriteFile(filePath, JSON.stringify(message, null, 2));
182
210
  return message;
183
211
  } catch (_err) {
184
212
  return null;