wogiflow 2.32.0 → 2.34.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/.claude/docs/claude-code-compatibility.md +51 -0
  2. package/.claude/docs/scheduled-mode.md +213 -0
  3. package/.claude/docs/skill-portability.md +190 -0
  4. package/.claude/rules/alternative-hook-args-exec-form.md +6 -0
  5. package/.claude/settings.json +2 -1
  6. package/.claude/skills/_template/skill.md +1 -0
  7. package/.claude/skills/conventional-commit/knowledge/examples.md +65 -0
  8. package/.claude/skills/conventional-commit/skill.md +76 -0
  9. package/bin/flow +16 -0
  10. package/lib/scheduled-mode.js +374 -0
  11. package/lib/skill-export-agentskills.js +211 -0
  12. package/lib/skill-export-claude-plugin.js +183 -0
  13. package/lib/skill-portability.js +342 -0
  14. package/lib/skill-registry.js +32 -2
  15. package/lib/workspace-channel-server.js +106 -3
  16. package/lib/workspace-channel-tracking.js +102 -1
  17. package/lib/workspace-dispatch-tracking.js +28 -0
  18. package/lib/workspace-messages.js +32 -4
  19. package/lib/workspace-subtask-state.js +215 -0
  20. package/lib/workspace.js +81 -0
  21. package/package.json +2 -2
  22. package/scripts/flow +25 -0
  23. package/scripts/flow-config-defaults.js +20 -0
  24. package/scripts/flow-constants.js +3 -1
  25. package/scripts/flow-schedule.js +486 -0
  26. package/scripts/flow-scheduled-runner.js +659 -0
  27. package/scripts/flow-skill-export.js +334 -0
  28. package/scripts/flow-standards-checker.js +37 -0
  29. package/scripts/hooks/adapters/claude-code.js +18 -3
  30. package/scripts/hooks/core/git-safety-gate.js +118 -27
  31. package/scripts/hooks/core/long-input-enforcement.js +139 -4
  32. package/scripts/hooks/core/overdue-dispatches.js +28 -6
  33. package/scripts/hooks/core/session-start-worker.js +52 -0
  34. package/scripts/hooks/core/stop-orchestrator.js +17 -2
  35. package/scripts/hooks/core/validation.js +8 -0
  36. package/scripts/hooks/core/worker-continuation-gate.js +326 -0
  37. package/scripts/hooks/core/workspace-stop-gates.js +21 -0
  38. package/scripts/hooks/core/workspace-stop-notify.js +174 -59
  39. package/scripts/hooks/entry/claude-code/post-tool-use.js +26 -0
@@ -71,6 +71,83 @@ const SOURCE_LINK_PATTERNS = [
71
71
  /\bwf-[a-f0-9]{8}\b/i // bare wf-ID reference
72
72
  ];
73
73
 
74
+ /**
75
+ * Strip quoted/pasted content from a prompt so item + line counts reflect
76
+ * what the USER is actually requesting, not what they're illustrating.
77
+ *
78
+ * Removes:
79
+ * - Fenced code blocks (``` … ```) — pasted code or transcript output
80
+ * - Lines starting with `⏺` — pasted Claude Code transcript bullet
81
+ * - Lines starting with ` ⎿ ` — pasted Claude Code tool-result indent
82
+ * - Lines starting with `>` (markdown blockquote, indented or not) — quoted source
83
+ * - Indented blocks of 4+ leading spaces directly after a fence-less line
84
+ * (informal code-block convention — git diff output, REPL traces, etc.)
85
+ *
86
+ * Conservative: only strips when stripping changes the count classification —
87
+ * downstream callers compare strip vs. raw and use the lower count if it crosses
88
+ * the threshold. (Tested directly via the helper export; the classifier wires
89
+ * it into both detectLongFormPrompt and hasTaskSignals.)
90
+ *
91
+ * Why this matters: the current turn's user prompt was a short narrative + a
92
+ * ~70-line PASTED transcript inside a fenced block. The raw line count crossed
93
+ * the threshold, the imperatives inside the transcript ("fix", "add", "rm")
94
+ * crossed the task-signal threshold, and the gate fired — even though the user
95
+ * pasted the transcript to ILLUSTRATE a bug, not to deliver work items.
96
+ *
97
+ * @param {string} text
98
+ * @returns {string} stripped text (always a string; '' if input wasn't)
99
+ */
100
+ function stripQuotedContent(text) {
101
+ if (typeof text !== 'string') return '';
102
+
103
+ // 1. Strip fenced code blocks (greedy, but match per-block so unclosed
104
+ // fences don't eat the rest of the prompt).
105
+ let stripped = text.replace(/^```[^\n]*\n[\s\S]*?\n```\s*$/gm, '');
106
+
107
+ // 2. Strip pasted-transcript / blockquote lines.
108
+ const lines = stripped.split('\n');
109
+ const kept = [];
110
+ for (const line of lines) {
111
+ // ⏺ — Claude Code transcript bullet
112
+ if (/^\s*⏺/.test(line)) continue;
113
+ // ⎿ — Claude Code tool-result continuation marker
114
+ if (/^\s*⎿/.test(line)) continue;
115
+ // > — markdown blockquote (any indent level)
116
+ if (/^\s*>/.test(line)) continue;
117
+ // 4+ leading-space "code-by-indentation" lines that don't look like
118
+ // a markdown list item (those start with `- ` / `* ` / `N. ` AFTER spaces).
119
+ if (/^ {4,}\S/.test(line) && !/^\s*(?:[-*]|\d+[.)])\s+/.test(line)) continue;
120
+ kept.push(line);
121
+ }
122
+ return kept.join('\n');
123
+ }
124
+
125
+ /**
126
+ * Detect a Claude Code skill-body echo. When the AI calls `Skill(...)`, the
127
+ * harness surfaces the full skill prompt + args back as a "user message" via
128
+ * UserPromptSubmit. These are AI-composed, not user-typed; firing the gate
129
+ * on them creates a deadlock (the AI can't dismiss its own skill args, and
130
+ * extract-review needs Bash which is also gated).
131
+ *
132
+ * Detection: the prompt contains ≥2 structural markers that only appear in
133
+ * Claude Code skill bodies (heading hierarchies, "ARGUMENTS: {args}" template,
134
+ * etc.). These are exceedingly unlikely to appear in user-typed prose.
135
+ *
136
+ * @param {string} text
137
+ * @returns {boolean}
138
+ */
139
+ function isSkillBodyEcho(text) {
140
+ if (typeof text !== 'string' || text.length < 500) return false;
141
+ let hits = 0;
142
+ for (const marker of SKILL_BODY_MARKERS) {
143
+ if (text.includes(marker)) {
144
+ hits++;
145
+ if (hits >= 2) return true;
146
+ }
147
+ }
148
+ return false;
149
+ }
150
+
74
151
  function countDiscreteItems(text) {
75
152
  if (typeof text !== 'string') return 0;
76
153
  let count = 0;
@@ -83,9 +160,12 @@ function countDiscreteItems(text) {
83
160
 
84
161
  function detectLongFormPrompt(text) {
85
162
  if (typeof text !== 'string' || !text.trim()) return false;
86
- const lineCount = text.split('\n').filter(l => l.trim()).length;
163
+ // Strip quoted/pasted content before counting — only the USER's own words
164
+ // contribute to thresholds (otherwise the gate fires on illustrative pastes).
165
+ const stripped = stripQuotedContent(text);
166
+ const lineCount = stripped.split('\n').filter(l => l.trim()).length;
87
167
  if (lineCount > LONG_LINE_THRESHOLD) return true;
88
- if (countDiscreteItems(text) >= LONG_ITEM_THRESHOLD) return true;
168
+ if (countDiscreteItems(stripped) >= LONG_ITEM_THRESHOLD) return true;
89
169
  return false;
90
170
  }
91
171
 
@@ -116,6 +196,27 @@ const SYSTEM_CONTENT_PREFIXES = [
116
196
  '<bash-stderr>'
117
197
  ];
118
198
 
199
+ // Skill-body markers that indicate the prompt is a Claude Code skill body
200
+ // being echoed back to the model after an AI Skill(...) invocation. When
201
+ // the AI calls `Skill(skill="wogi-start", args="...long...")`, Claude Code
202
+ // surfaces the full skill prompt + args as the next "user message" — going
203
+ // through UserPromptSubmit. The args are AI-composed, not user-typed, so
204
+ // the gate must NOT fire on them. We detect this by the structural markers
205
+ // that only ever appear in skill body bodies (not in regular user prose).
206
+ // Treating it as a user prompt was the deadlock shape from the wogiflow-cli
207
+ // 2026-05-13 incident — see the bug report transcript in this commit's body.
208
+ const SKILL_BODY_MARKERS = [
209
+ '**UNIVERSAL ENTRY POINT**',
210
+ '## Request Triage (AI-Driven Routing',
211
+ '### Command Catalog',
212
+ '### Pre-Routing Checks (Automatic)',
213
+ 'Routing order: Task ID',
214
+ '## Phase Execution (MANDATORY)',
215
+ '## Mandatory Rules',
216
+ 'ARGUMENTS: {args}',
217
+ '## How It Works (MANDATORY',
218
+ ];
219
+
119
220
  /**
120
221
  * Detect content that originates from the system (tool results, sub-agent
121
222
  * notifications, slash-command framings) rather than user typing. These
@@ -137,9 +238,14 @@ function isSystemOriginatedContent(text) {
137
238
 
138
239
  function hasTaskSignals(text) {
139
240
  if (typeof text !== 'string') return false;
241
+ // Imperatives inside pasted code/transcript/blockquotes are illustrative,
242
+ // not the user's own work-creating instructions. Count only on the USER's
243
+ // own words. (Without this, pasted error logs containing "fix" / "add"
244
+ // / "remove" trip the gate as if the user were ordering 5 tasks.)
245
+ const stripped = stripQuotedContent(text);
140
246
  let imperativeHits = 0;
141
247
  for (const re of TASK_IMPERATIVES) {
142
- const m = text.match(new RegExp(re.source, 'gi'));
248
+ const m = stripped.match(new RegExp(re.source, 'gi'));
143
249
  if (m) imperativeHits += m.length;
144
250
  }
145
251
  return imperativeHits >= 2;
@@ -176,6 +282,13 @@ function shouldForceExtractReview({ text, source, env = process.env } = {}) {
176
282
  if (isSystemOriginatedContent(text)) {
177
283
  return { forced: false, level: 'pass', reason: 'system-originated-content' };
178
284
  }
285
+ // Deadlock fix (2026-05-13): AI-composed Skill args get surfaced back as
286
+ // a "user message" by the harness. Detect the skill-body echo signature
287
+ // and skip the gate — the args are AI-decomposed, not user-typed, so
288
+ // item-reconciliation has no source to reconcile against.
289
+ if (isSkillBodyEcho(text)) {
290
+ return { forced: false, level: 'pass', reason: 'skill-body-echo' };
291
+ }
179
292
  if (!detectLongFormPrompt(text)) {
180
293
  return { forced: false, level: 'pass', reason: 'below-long-input-threshold' };
181
294
  }
@@ -308,6 +421,20 @@ function checkLongInputPendingGate(toolName, toolInput) {
308
421
  if (/flow\s+extract-zero-loss/.test(cmd)) return { blocked: false };
309
422
  if (/flow\s+long-input/.test(cmd)) return { blocked: false };
310
423
  if (/flow-source-fidelity\.js/.test(cmd)) return { blocked: false };
424
+ // EMERGENCY ESCAPE (2026-05-13 deadlock fix): when the `flow` CLI is
425
+ // unavailable (e.g., target project has no node_modules/wogiflow on PATH,
426
+ // or the CLI itself is broken), allow the user to manually clear the
427
+ // marker file via `rm`. Scoped narrowly to the exact marker path so it
428
+ // can't be used as a general-purpose Bash escape.
429
+ if (/^\s*rm\s+(?:-[a-zA-Z]+\s+)?(?:["']?)\.workflow\/state\/long-input-pending\.json(?:["']?)\s*$/.test(cmd)) {
430
+ return { blocked: false };
431
+ }
432
+ // Also allow the node-script equivalent (for sessions where `rm` is
433
+ // unavailable, e.g. some Windows shells). Matches both `fs.unlinkSync(...)`
434
+ // and `require('fs').unlinkSync(...)` forms.
435
+ if (/unlinkSync\s*\(\s*['"]\.workflow\/state\/long-input-pending\.json['"]\s*\)/.test(cmd)) {
436
+ return { blocked: false };
437
+ }
311
438
  // Falls through to block for everything else
312
439
  }
313
440
 
@@ -334,6 +461,11 @@ function checkLongInputPendingGate(toolName, toolInput) {
334
461
  ' 2. (ESCAPE HATCH) If this prompt genuinely does NOT create work',
335
462
  ' (e.g., it\'s a log dump or pure question), dismiss with:',
336
463
  ' `flow long-input-pending dismiss --reason="<concrete reason>"`',
464
+ ' 3. (EMERGENCY) If both paths above fail (e.g., `flow` CLI missing',
465
+ ' or broken), manually clear the marker file:',
466
+ ' `rm .workflow/state/long-input-pending.json`',
467
+ ' (This Bash command is explicitly allowed by the gate as a',
468
+ ' deadlock escape.)',
337
469
  '',
338
470
  'Read/Glob/Grep tools remain available for investigation.'
339
471
  ].join('\n')
@@ -345,10 +477,12 @@ module.exports = {
345
477
  LONG_LINE_THRESHOLD,
346
478
  LONG_ITEM_THRESHOLD,
347
479
  SYSTEM_CONTENT_PREFIXES,
480
+ SKILL_BODY_MARKERS,
348
481
  detectLongFormPrompt,
349
482
  hasSourceLink,
350
483
  hasTaskSignals,
351
484
  isSystemOriginatedContent,
485
+ isSkillBodyEcho,
352
486
  isChannelDispatchInWorker,
353
487
  shouldForceExtractReview,
354
488
  buildEnforcementMessage,
@@ -357,5 +491,6 @@ module.exports = {
357
491
  isLongInputPending,
358
492
  readLongInputPending,
359
493
  checkLongInputPendingGate,
360
- countDiscreteItems
494
+ countDiscreteItems,
495
+ stripQuotedContent
361
496
  };
@@ -53,7 +53,7 @@ function formatLine(record, now) {
53
53
  */
54
54
  function sweepAndReconcile(workspaceRoot) {
55
55
  let reconciled = 0;
56
- let readMessages, reconcileDispatch, readDispatches;
56
+ let readMessages, reconcileDispatch, readDispatches, refreshDispatchDeadline;
57
57
  try {
58
58
  const libMessages = path.resolve(__dirname, '..', '..', '..', 'lib', 'workspace-messages.js');
59
59
  const libTracking = path.resolve(__dirname, '..', '..', '..', 'lib', 'workspace-dispatch-tracking.js');
@@ -61,6 +61,7 @@ function sweepAndReconcile(workspaceRoot) {
61
61
  const tracking = require(libTracking);
62
62
  reconcileDispatch = tracking.reconcileDispatch;
63
63
  readDispatches = tracking.readDispatches;
64
+ refreshDispatchDeadline = tracking.refreshDispatchDeadline;
64
65
  } catch (_err) {
65
66
  return 0; // Fail-open
66
67
  }
@@ -78,13 +79,32 @@ function sweepAndReconcile(workspaceRoot) {
78
79
  if (r.taskId && !byTaskId.has(r.taskId)) byTaskId.set(r.taskId, r);
79
80
  }
80
81
 
81
- // Pull both message types. readMessages throws on missing dir internally
82
- // but guards with existsSync, so it's safe.
82
+ // S3 (wf-d3ae1717): heartbeats refresh the deadline (work ongoing, NOT a
83
+ // silent halt); terminal types resolve the dispatch. worker-progress is
84
+ // applied FIRST so a heartbeat that arrived before a terminal doesn't keep a
85
+ // since-resolved dispatch alive.
86
+ try {
87
+ const heartbeats = readMessages(workspaceRoot, { type: 'worker-progress' });
88
+ if (refreshDispatchDeadline) {
89
+ for (const hb of heartbeats) {
90
+ const taskId = hb.taskId;
91
+ if (!taskId || !byTaskId.has(taskId)) continue;
92
+ try { refreshDispatchDeadline(workspaceRoot, taskId); } catch (_err) { /* per-record */ }
93
+ }
94
+ }
95
+ } catch (_err) { /* heartbeats are best-effort */ }
96
+
97
+ // Pull terminal message types. readMessages throws on missing dir internally
98
+ // but guards with existsSync, so it's safe. worker-blocked / worker-idle /
99
+ // worker-awaiting-approval are terminal stops alongside the legacy pair.
83
100
  let messages = [];
84
101
  try {
85
102
  const completes = readMessages(workspaceRoot, { type: 'task-complete' });
86
103
  const stops = readMessages(workspaceRoot, { type: 'worker-stopped' });
87
- messages = completes.concat(stops);
104
+ const blocked = readMessages(workspaceRoot, { type: 'worker-blocked' });
105
+ const idle = readMessages(workspaceRoot, { type: 'worker-idle' });
106
+ const awaiting = readMessages(workspaceRoot, { type: 'worker-awaiting-approval' });
107
+ messages = completes.concat(stops, blocked, idle, awaiting);
88
108
  } catch (_err) {
89
109
  return 0;
90
110
  }
@@ -93,8 +113,10 @@ function sweepAndReconcile(workspaceRoot) {
93
113
  const taskId = msg.taskId || (msg.type === 'task-complete' ? msg.subject : null);
94
114
  if (!taskId || !byTaskId.has(taskId)) continue;
95
115
  try {
96
- const status = msg.type === 'worker-stopped' ? 'graceful-stop' : 'completed';
97
- const reason = msg.type === 'worker-stopped' ? (msg.reason || 'graceful') : null;
116
+ // task-complete completed; everything else is a non-overdue graceful
117
+ // stop (the reason field distinguishes blocked / awaiting / idle / graceful).
118
+ const status = msg.type === 'task-complete' ? 'completed' : 'graceful-stop';
119
+ const reason = msg.type === 'task-complete' ? null : (msg.reason || msg.type);
98
120
  const result = reconcileDispatch(workspaceRoot, taskId, status, reason);
99
121
  if (result) {
100
122
  reconciled++;
@@ -40,6 +40,58 @@ function handleWorkerSessionStart() {
40
40
  const { isWorker, shouldAnnounceReady, announceWorkerReady } = require(WORKER_READY_LIB);
41
41
  if (!isWorker()) return { branch: 'skip', reason: 'not-worker' };
42
42
 
43
+ // S5 (wf-ee87a24e): RESUME-IN-PROGRESS. If this restarted session has a task
44
+ // still in `inProgress` with sub-tasks remaining (durable S1 ledger), resume
45
+ // THAT task — do NOT fall through to "announce idle" (which would orphan it)
46
+ // or pick a different next task. The durable ledger means completed sub-tasks
47
+ // are NOT redone. Also post a worker-ready ack so the manager actively
48
+ // re-triggers if the resume wake-up was missed.
49
+ try {
50
+ const { PATHS, safeJsonParse } = require('../../flow-utils');
51
+ const ready = safeJsonParse(path.join(PATHS.state, 'ready.json'), { inProgress: [] });
52
+ const inProgress = (ready.inProgress || [])[0] || null;
53
+ if (inProgress && inProgress.id) {
54
+ let remaining = null, total = null;
55
+ try {
56
+ const subtaskState = require(path.join(__dirname, '..', '..', '..', 'lib', 'workspace-subtask-state.js'));
57
+ const summary = subtaskState.summary(inProgress.id);
58
+ remaining = summary.remaining; total = summary.total;
59
+ } catch (_err) { /* ledger optional */ }
60
+ // Only treat as resumable if there is remaining decomposed work, OR no
61
+ // ledger exists at all (single-step task interrupted mid-flight).
62
+ if (remaining === null || remaining > 0) {
63
+ // Best-effort ack so the manager knows the worker is back on this task.
64
+ // Bypass shouldAnnounceReady's empty-queue gating (it returns
65
+ // 'in-progress-not-empty' here by design) — for a resume we WANT the
66
+ // manager pinged. announceWorkerReady dedups via hasPendingAnnounce.
67
+ try {
68
+ const wr = require(WORKER_READY_LIB);
69
+ const wsRoot = process.env.WOGI_WORKSPACE_ROOT;
70
+ const repoName = process.env.WOGI_REPO_NAME;
71
+ if (wsRoot && repoName && repoName !== 'manager') {
72
+ wr.announceWorkerReady(wsRoot, repoName);
73
+ }
74
+ } catch (_err) { /* ack is best-effort */ }
75
+ const ctx = [
76
+ `⚡ WORKSPACE SESSION START — RESUMING IN-PROGRESS TASK`,
77
+ '',
78
+ `This worker restarted with task ${inProgress.id} still in progress${total != null ? ` (${remaining} of ${total} sub-task(s) remaining)` : ''}.`,
79
+ `Durable sub-task state is on disk — completed sub-tasks are recorded and must NOT be redone.`,
80
+ '',
81
+ 'AUTONOMOUS MODE CONTRACT (workspace worker):',
82
+ ' • Resume the SAME task — do not pick a different one, do not go idle.',
83
+ ' • Read .workflow/state/subtask-state.json to see which sub-tasks remain.',
84
+ ' • Grind to completion; only stop when done (flow done) or genuinely blocked.',
85
+ '',
86
+ `ACT NOW: Invoke Skill(skill="wogi-start", args="${inProgress.id}")`
87
+ ].join('\n');
88
+ return { branch: 'resume-in-progress', context: ctx, taskId: inProgress.id, remaining, total };
89
+ }
90
+ }
91
+ } catch (err) {
92
+ if (process.env.DEBUG) console.error(`[session-start-worker] resume-in-progress check failed (fail-open): ${err.message}`);
93
+ }
94
+
43
95
  // Check for queued work first — if any, tell the model to pick it up
44
96
  // instead of announcing idle readiness.
45
97
  let pickup;
@@ -87,8 +87,12 @@ async function orchestrateStop({ parsedInput }) {
87
87
  };
88
88
  }
89
89
 
90
+ // S3 (wf-d3ae1717): the worker-stopped emission used to fire HERE,
91
+ // unconditionally, before any gate decided to continue — so the manager saw
92
+ // "stopped mid-work" on every turn boundary. It now fires only at a genuine
93
+ // stop (end of this function) with a precise terminal type, and a
94
+ // worker-progress heartbeat fires from the continuation gate instead.
90
95
  const workspaceNotify = require('./workspace-stop-notify');
91
- await workspaceNotify.notifyWorkerStopped();
92
96
 
93
97
  const restartCoordinator = require('./task-boundary-restart-coordinator');
94
98
  const restartResult = await restartCoordinator.handleTaskBoundaryRestart({ parsedInput });
@@ -120,7 +124,18 @@ async function orchestrateStop({ parsedInput }) {
120
124
  const wsResult = await workspaceGates.checkWorkspaceStopGates({ parsedInput });
121
125
  if (wsResult?.shouldReturn) return wsResult.result;
122
126
 
123
- return await checkLoopExit();
127
+ // Genuine stop path: no gate forced continuation. Emit a precise terminal
128
+ // worker signal ONLY when we're actually allowing the turn to end (canExit).
129
+ // continueToNext / blocked-continue are not terminal stops.
130
+ const loopResult = await checkLoopExit();
131
+ try {
132
+ if (loopResult?.canExit === true) {
133
+ await workspaceNotify.notifyWorkerTerminal();
134
+ }
135
+ } catch (err) {
136
+ if (process.env.DEBUG) console.error(`[Stop] terminal notify error (fail-open): ${err.message}`);
137
+ }
138
+ return loopResult;
124
139
  }
125
140
 
126
141
  module.exports = { orchestrateStop };
@@ -222,6 +222,14 @@ async function runValidation(options = {}) {
222
222
 
223
223
  return {
224
224
  passed: allPassed,
225
+ // F6 (R-379): signal `blocked` so the adapter's `decision: 'block'` path
226
+ // actually fires when validation fails. Without this, the `continueOnBlock`
227
+ // wiring in transformPostToolUse is inert (decision is always undefined).
228
+ // With it, lint/typecheck failure after Edit/Write feeds back to Claude
229
+ // and (per the continueOnBlock setting) the turn continues so Claude can
230
+ // fix the error in-loop — which is what CLAUDE.md's "validate after every
231
+ // file edit" rule needs.
232
+ blocked: !allPassed,
225
233
  skipped: false,
226
234
  results,
227
235
  summary: generateValidationSummary(results, filePath)