@united-workforce/cli 0.2.1-rc.9 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/README.md +15 -8
  2. package/dist/__tests__/adapter-json-roundtrip.test.js +1 -1
  3. package/dist/__tests__/adapter-json-roundtrip.test.js.map +1 -1
  4. package/dist/__tests__/agent-resolution-llm-free.test.d.ts +2 -0
  5. package/dist/__tests__/agent-resolution-llm-free.test.d.ts.map +1 -0
  6. package/dist/__tests__/agent-resolution-llm-free.test.js +30 -0
  7. package/dist/__tests__/agent-resolution-llm-free.test.js.map +1 -0
  8. package/dist/__tests__/build-step-entry.test.d.ts +2 -0
  9. package/dist/__tests__/build-step-entry.test.d.ts.map +1 -0
  10. package/dist/__tests__/build-step-entry.test.js +173 -0
  11. package/dist/__tests__/build-step-entry.test.js.map +1 -0
  12. package/dist/__tests__/clear-thread-failed-attempts.test.d.ts +2 -0
  13. package/dist/__tests__/clear-thread-failed-attempts.test.d.ts.map +1 -0
  14. package/dist/__tests__/clear-thread-failed-attempts.test.js +93 -0
  15. package/dist/__tests__/clear-thread-failed-attempts.test.js.map +1 -0
  16. package/dist/__tests__/config.test.js +26 -302
  17. package/dist/__tests__/config.test.js.map +1 -1
  18. package/dist/__tests__/current-role.test.js +7 -6
  19. package/dist/__tests__/current-role.test.js.map +1 -1
  20. package/dist/__tests__/e2e-mock-agent.test.js +20 -23
  21. package/dist/__tests__/e2e-mock-agent.test.js.map +1 -1
  22. package/dist/__tests__/issue-180-workflow-ref-removed.test.d.ts +2 -0
  23. package/dist/__tests__/issue-180-workflow-ref-removed.test.d.ts.map +1 -0
  24. package/dist/__tests__/issue-180-workflow-ref-removed.test.js +40 -0
  25. package/dist/__tests__/issue-180-workflow-ref-removed.test.js.map +1 -0
  26. package/dist/__tests__/moderator-evaluate.test.js +9 -50
  27. package/dist/__tests__/moderator-evaluate.test.js.map +1 -1
  28. package/dist/__tests__/pid-recycling.test.d.ts +2 -0
  29. package/dist/__tests__/pid-recycling.test.d.ts.map +1 -0
  30. package/dist/__tests__/pid-recycling.test.js +271 -0
  31. package/dist/__tests__/pid-recycling.test.js.map +1 -0
  32. package/dist/__tests__/prompt.test.js +321 -0
  33. package/dist/__tests__/prompt.test.js.map +1 -1
  34. package/dist/__tests__/resolve-head-hash.test.js +4 -4
  35. package/dist/__tests__/resolve-head-hash.test.js.map +1 -1
  36. package/dist/__tests__/setup-agent-discovery.test.js +21 -30
  37. package/dist/__tests__/setup-agent-discovery.test.js.map +1 -1
  38. package/dist/__tests__/setup-complexity.test.js +2 -168
  39. package/dist/__tests__/setup-complexity.test.js.map +1 -1
  40. package/dist/__tests__/setup-no-llm.test.d.ts +2 -0
  41. package/dist/__tests__/setup-no-llm.test.d.ts.map +1 -0
  42. package/dist/__tests__/setup-no-llm.test.js +52 -0
  43. package/dist/__tests__/setup-no-llm.test.js.map +1 -0
  44. package/dist/__tests__/solve-issue-tea-worktree.test.js +24 -27
  45. package/dist/__tests__/solve-issue-tea-worktree.test.js.map +1 -1
  46. package/dist/__tests__/step-ask.test.d.ts +2 -0
  47. package/dist/__tests__/step-ask.test.d.ts.map +1 -0
  48. package/dist/__tests__/step-ask.test.js +499 -0
  49. package/dist/__tests__/step-ask.test.js.map +1 -0
  50. package/dist/__tests__/step-show-json.test.js +1 -0
  51. package/dist/__tests__/step-show-json.test.js.map +1 -1
  52. package/dist/__tests__/step-timing.test.js +2 -0
  53. package/dist/__tests__/step-timing.test.js.map +1 -1
  54. package/dist/__tests__/store-global-cas.test.js +2 -2
  55. package/dist/__tests__/store-global-cas.test.js.map +1 -1
  56. package/dist/__tests__/store-unified-threads.test.js +9 -9
  57. package/dist/__tests__/store-unified-threads.test.js.map +1 -1
  58. package/dist/__tests__/thread-cancel-status.test.js +6 -6
  59. package/dist/__tests__/thread-cancel-status.test.js.map +1 -1
  60. package/dist/__tests__/thread-list-filters.test.js +344 -9
  61. package/dist/__tests__/thread-list-filters.test.js.map +1 -1
  62. package/dist/__tests__/thread-poke.test.d.ts +2 -0
  63. package/dist/__tests__/thread-poke.test.d.ts.map +1 -0
  64. package/dist/__tests__/thread-poke.test.js +412 -0
  65. package/dist/__tests__/thread-poke.test.js.map +1 -0
  66. package/dist/__tests__/thread-resume.test.js +10 -14
  67. package/dist/__tests__/thread-resume.test.js.map +1 -1
  68. package/dist/__tests__/thread-show-status.test.js +17 -28
  69. package/dist/__tests__/thread-show-status.test.js.map +1 -1
  70. package/dist/__tests__/thread-suspend-step.test.js +8 -14
  71. package/dist/__tests__/thread-suspend-step.test.js.map +1 -1
  72. package/dist/__tests__/thread-suspended-display.test.js +10 -22
  73. package/dist/__tests__/thread-suspended-display.test.js.map +1 -1
  74. package/dist/__tests__/thread.test.js +4 -4
  75. package/dist/__tests__/thread.test.js.map +1 -1
  76. package/dist/__tests__/validate-semantic.test.js +49 -21
  77. package/dist/__tests__/validate-semantic.test.js.map +1 -1
  78. package/dist/__tests__/workflow-list-recursive.test.d.ts +2 -0
  79. package/dist/__tests__/workflow-list-recursive.test.d.ts.map +1 -0
  80. package/dist/__tests__/workflow-list-recursive.test.js +283 -0
  81. package/dist/__tests__/workflow-list-recursive.test.js.map +1 -0
  82. package/dist/__tests__/workflow-resolution.test.js +36 -21
  83. package/dist/__tests__/workflow-resolution.test.js.map +1 -1
  84. package/dist/__tests__/workflow-show-resolution.test.d.ts +2 -0
  85. package/dist/__tests__/workflow-show-resolution.test.d.ts.map +1 -0
  86. package/dist/__tests__/workflow-show-resolution.test.js +210 -0
  87. package/dist/__tests__/workflow-show-resolution.test.js.map +1 -0
  88. package/dist/__tests__/workflow-validate.test.d.ts +2 -0
  89. package/dist/__tests__/workflow-validate.test.d.ts.map +1 -0
  90. package/dist/__tests__/workflow-validate.test.js +687 -0
  91. package/dist/__tests__/workflow-validate.test.js.map +1 -0
  92. package/dist/background/background.d.ts +22 -1
  93. package/dist/background/background.d.ts.map +1 -1
  94. package/dist/background/background.js +83 -6
  95. package/dist/background/background.js.map +1 -1
  96. package/dist/background/index.d.ts +1 -1
  97. package/dist/background/index.d.ts.map +1 -1
  98. package/dist/background/index.js +1 -1
  99. package/dist/background/index.js.map +1 -1
  100. package/dist/background/types.d.ts +1 -0
  101. package/dist/background/types.d.ts.map +1 -1
  102. package/dist/cli.js +66 -31
  103. package/dist/cli.js.map +1 -1
  104. package/dist/commands/config.d.ts +3 -1
  105. package/dist/commands/config.d.ts.map +1 -1
  106. package/dist/commands/config.js +7 -33
  107. package/dist/commands/config.js.map +1 -1
  108. package/dist/commands/prompt.d.ts.map +1 -1
  109. package/dist/commands/prompt.js +15 -2
  110. package/dist/commands/prompt.js.map +1 -1
  111. package/dist/commands/setup.d.ts +7 -39
  112. package/dist/commands/setup.d.ts.map +1 -1
  113. package/dist/commands/setup.js +27 -302
  114. package/dist/commands/setup.js.map +1 -1
  115. package/dist/commands/step.d.ts +44 -1
  116. package/dist/commands/step.d.ts.map +1 -1
  117. package/dist/commands/step.js +255 -11
  118. package/dist/commands/step.js.map +1 -1
  119. package/dist/commands/thread.d.ts +16 -3
  120. package/dist/commands/thread.d.ts.map +1 -1
  121. package/dist/commands/thread.js +379 -140
  122. package/dist/commands/thread.js.map +1 -1
  123. package/dist/commands/workflow.d.ts +9 -1
  124. package/dist/commands/workflow.d.ts.map +1 -1
  125. package/dist/commands/workflow.js +130 -6
  126. package/dist/commands/workflow.js.map +1 -1
  127. package/dist/moderator/__tests__/evaluate.test.js +31 -17
  128. package/dist/moderator/__tests__/evaluate.test.js.map +1 -1
  129. package/dist/moderator/evaluate.d.ts.map +1 -1
  130. package/dist/moderator/evaluate.js +4 -16
  131. package/dist/moderator/evaluate.js.map +1 -1
  132. package/dist/moderator/index.d.ts +1 -2
  133. package/dist/moderator/index.d.ts.map +1 -1
  134. package/dist/moderator/index.js +0 -1
  135. package/dist/moderator/index.js.map +1 -1
  136. package/dist/moderator/types.d.ts +6 -10
  137. package/dist/moderator/types.d.ts.map +1 -1
  138. package/dist/moderator/types.js +1 -3
  139. package/dist/moderator/types.js.map +1 -1
  140. package/dist/schemas.d.ts +2 -0
  141. package/dist/schemas.d.ts.map +1 -1
  142. package/dist/schemas.js +5 -3
  143. package/dist/schemas.js.map +1 -1
  144. package/dist/store.d.ts +28 -9
  145. package/dist/store.d.ts.map +1 -1
  146. package/dist/store.js +75 -16
  147. package/dist/store.js.map +1 -1
  148. package/dist/validate-semantic.d.ts.map +1 -1
  149. package/dist/validate-semantic.js +83 -66
  150. package/dist/validate-semantic.js.map +1 -1
  151. package/dist/validate.d.ts +6 -0
  152. package/dist/validate.d.ts.map +1 -1
  153. package/dist/validate.js +24 -0
  154. package/dist/validate.js.map +1 -1
  155. package/package.json +8 -10
  156. package/src/__tests__/adapter-json-roundtrip.test.ts +1 -1
  157. package/src/__tests__/agent-resolution-llm-free.test.ts +39 -0
  158. package/src/__tests__/build-step-entry.test.ts +203 -0
  159. package/src/__tests__/clear-thread-failed-attempts.test.ts +122 -0
  160. package/src/__tests__/config.test.ts +33 -321
  161. package/src/__tests__/current-role.test.ts +7 -6
  162. package/src/__tests__/e2e-mock-agent.test.ts +20 -23
  163. package/src/__tests__/fixtures/e2e-count.workflow.yaml +1 -0
  164. package/src/__tests__/fixtures/e2e-linear.workflow.yaml +1 -0
  165. package/src/__tests__/fixtures/{e2e-mustache.workflow.yaml → e2e-liquid.workflow.yaml} +3 -2
  166. package/src/__tests__/fixtures/e2e-loop.workflow.yaml +1 -0
  167. package/src/__tests__/fixtures/e2e-suspend.mock.yaml +2 -2
  168. package/src/__tests__/fixtures/e2e-suspend.workflow.yaml +6 -10
  169. package/src/__tests__/issue-180-workflow-ref-removed.test.ts +43 -0
  170. package/src/__tests__/moderator-evaluate.test.ts +9 -52
  171. package/src/__tests__/pid-recycling.test.ts +328 -0
  172. package/src/__tests__/prompt.test.ts +397 -0
  173. package/src/__tests__/resolve-head-hash.test.ts +4 -4
  174. package/src/__tests__/setup-agent-discovery.test.ts +26 -51
  175. package/src/__tests__/setup-complexity.test.ts +1 -203
  176. package/src/__tests__/setup-no-llm.test.ts +68 -0
  177. package/src/__tests__/solve-issue-tea-worktree.test.ts +24 -30
  178. package/src/__tests__/step-ask.test.ts +670 -0
  179. package/src/__tests__/step-show-json.test.ts +1 -0
  180. package/src/__tests__/step-timing.test.ts +2 -0
  181. package/src/__tests__/store-global-cas.test.ts +2 -2
  182. package/src/__tests__/store-unified-threads.test.ts +9 -9
  183. package/src/__tests__/thread-cancel-status.test.ts +6 -6
  184. package/src/__tests__/thread-list-filters.test.ts +434 -8
  185. package/src/__tests__/thread-poke.test.ts +545 -0
  186. package/src/__tests__/thread-resume.test.ts +10 -14
  187. package/src/__tests__/thread-show-status.test.ts +17 -29
  188. package/src/__tests__/thread-suspend-step.test.ts +8 -14
  189. package/src/__tests__/thread-suspended-display.test.ts +10 -22
  190. package/src/__tests__/thread.test.ts +4 -4
  191. package/src/__tests__/validate-semantic.test.ts +59 -31
  192. package/src/__tests__/workflow-list-recursive.test.ts +370 -0
  193. package/src/__tests__/workflow-resolution.test.ts +39 -21
  194. package/src/__tests__/workflow-show-resolution.test.ts +285 -0
  195. package/src/__tests__/workflow-validate.test.ts +806 -0
  196. package/src/background/background.ts +88 -6
  197. package/src/background/index.ts +2 -0
  198. package/src/background/types.ts +1 -0
  199. package/src/cli.ts +97 -47
  200. package/src/commands/config.ts +7 -35
  201. package/src/commands/prompt.ts +15 -2
  202. package/src/commands/setup.ts +29 -357
  203. package/src/commands/step.ts +339 -12
  204. package/src/commands/thread.ts +463 -169
  205. package/src/commands/workflow.ts +159 -4
  206. package/src/moderator/__tests__/evaluate.test.ts +34 -17
  207. package/src/moderator/evaluate.ts +5 -17
  208. package/src/moderator/index.ts +1 -6
  209. package/src/moderator/types.ts +6 -14
  210. package/src/schemas.ts +13 -3
  211. package/src/store.ts +86 -20
  212. package/src/validate-semantic.ts +109 -78
  213. package/src/validate.ts +27 -0
  214. package/dist/__tests__/setup-validate.test.d.ts +0 -2
  215. package/dist/__tests__/setup-validate.test.d.ts.map +0 -1
  216. package/dist/__tests__/setup-validate.test.js +0 -108
  217. package/dist/__tests__/setup-validate.test.js.map +0 -1
  218. package/src/__tests__/setup-validate.test.ts +0 -148
  219. /package/src/__tests__/fixtures/{e2e-mustache.mock.yaml → e2e-liquid.mock.yaml} +0 -0
@@ -1,16 +1,16 @@
1
1
  import { execFileSync, spawn } from "node:child_process";
2
2
  import { access, readFile } from "node:fs/promises";
3
- import { dirname, isAbsolute, resolve as resolvePath } from "node:path";
3
+ import { dirname, isAbsolute, join, resolve as resolvePath } from "node:path";
4
4
  import { validate } from "@ocas/core";
5
- import { createThreadIndexEntry, markThreadSuspended, updateThreadHead, } from "@united-workforce/protocol";
5
+ import { createThreadIndexEntry, markThreadSuspended, SUSPEND_STATUS, updateThreadHead, } from "@united-workforce/protocol";
6
6
  import { createProcessLogger, extractUlidTimestamp, generateUlid, } from "@united-workforce/util";
7
7
  import { getEnvPath, loadWorkflowConfig } from "@united-workforce/util-agent";
8
8
  import { config as loadDotenv } from "dotenv";
9
9
  import { parse } from "yaml";
10
- import { createMarker, deleteMarker, isThreadRunning } from "../background/index.js";
10
+ import { createMarker, deleteMarker, getProcessStartTime, isMarkerValid, isThreadRunning, readMarker, } from "../background/index.js";
11
11
  import { createIncludeTag } from "../include.js";
12
- import { evaluate, isSuspendResult } from "../moderator/index.js";
13
- import { completeThread, createUwfStore, getThread, loadActiveThreads, loadHistoryThreads, loadWorkflowRegistry, resolveWorkflowHash, setThread, } from "../store.js";
12
+ import { evaluate } from "../moderator/index.js";
13
+ import { completeThread, createUwfStore, findRegistryName, getThread, loadActiveThreads, loadHistoryThreads, loadWorkflowRegistry, resolveWorkflowHash, setThread, } from "../store.js";
14
14
  import { checkWorkflowFilenameConsistency, isCasRef, parseWorkflowPayload } from "../validate.js";
15
15
  import { validateWorkflow } from "../validate-semantic.js";
16
16
  import { collectOrderedSteps, expandOutput, fail, walkChain, } from "./shared.js";
@@ -18,56 +18,52 @@ import { materializeWorkflowPayload } from "./workflow.js";
18
18
  const END_ROLE = "$END";
19
19
  const START_ROLE = "$START";
20
20
  export const THREAD_READ_DEFAULT_QUOTA = 4000;
21
- function buildStepOutputFromEvaluation(workflowHash, threadId, head, status, evaluation, background) {
22
- const done = status === "completed";
23
- let currentRole = null;
24
- let suspendedRole = null;
25
- let suspendMessage = null;
26
- if (evaluation.ok) {
27
- if (isSuspendResult(evaluation.value)) {
28
- suspendedRole = evaluation.value.suspendedRole;
29
- suspendMessage = evaluation.value.prompt;
30
- }
31
- else if (evaluation.value.role !== END_ROLE) {
32
- currentRole = evaluation.value.role;
33
- }
21
+ /**
22
+ * Read the suspend reason from an agent output if it is an engine-level suspend
23
+ * (coroutine yield). Returns the reason string when `$status === "$SUSPEND"`,
24
+ * or `null` otherwise. A suspend output with no `reason` yields an empty string.
25
+ */
26
+ function readSuspendReason(lastOutput) {
27
+ if (lastOutput[STATUS_KEY] !== SUSPEND_STATUS) {
28
+ return null;
34
29
  }
30
+ const reason = lastOutput.reason;
31
+ return typeof reason === "string" ? reason : "";
32
+ }
33
+ function buildSuspendStepOutput(workflowHash, threadId, head, suspendedRole, suspendMessage) {
35
34
  return {
36
35
  workflow: workflowHash,
37
36
  thread: threadId,
38
37
  head,
39
- status,
40
- currentRole,
38
+ status: "suspended",
39
+ currentRole: null,
41
40
  suspendedRole,
42
41
  suspendMessage,
43
- done,
44
- background,
42
+ done: false,
43
+ background: null,
44
+ error: null,
45
45
  };
46
46
  }
47
- function resolveSuspendFieldsFromGraph(uwf, head, workflowRef) {
47
+ function resolveSuspendFieldsFromOutput(uwf, head) {
48
48
  const chain = walkChain(uwf, head);
49
49
  const { lastRole, lastOutput } = resolveEvaluateArgs(uwf, chain);
50
- const workflow = loadWorkflowPayload(uwf, workflowRef);
51
- const result = evaluate(workflow.graph, lastRole, lastOutput);
52
- if (result.ok && isSuspendResult(result.value)) {
53
- return {
54
- suspendedRole: result.value.suspendedRole,
55
- suspendMessage: result.value.prompt,
56
- };
50
+ const reason = readSuspendReason(lastOutput);
51
+ if (reason !== null) {
52
+ return { suspendedRole: lastRole, suspendMessage: reason };
57
53
  }
58
54
  return { suspendedRole: null, suspendMessage: null };
59
55
  }
60
- function resolveSuspendFieldsForShow(entry, status, uwf, head, workflowRef) {
56
+ function resolveSuspendFieldsForShow(entry, status, uwf, head) {
61
57
  if (status !== "suspended") {
62
58
  return { suspendedRole: null, suspendMessage: null };
63
59
  }
64
60
  if (entry.suspendedRole !== null && entry.suspendMessage !== null) {
65
61
  return { suspendedRole: entry.suspendedRole, suspendMessage: entry.suspendMessage };
66
62
  }
67
- const fromGraph = resolveSuspendFieldsFromGraph(uwf, head, workflowRef);
63
+ const fromOutput = resolveSuspendFieldsFromOutput(uwf, head);
68
64
  return {
69
- suspendedRole: entry.suspendedRole ?? fromGraph.suspendedRole,
70
- suspendMessage: entry.suspendMessage ?? fromGraph.suspendMessage,
65
+ suspendedRole: entry.suspendedRole ?? fromOutput.suspendedRole,
66
+ suspendMessage: entry.suspendMessage ?? fromOutput.suspendMessage,
71
67
  };
72
68
  }
73
69
  async function ensureThreadSuspendMetadata(varStore, threadId, entry, suspendedRole, suspendMessage) {
@@ -78,16 +74,14 @@ async function ensureThreadSuspendMetadata(varStore, threadId, entry, suspendedR
78
74
  setThread(varStore, threadId, updated);
79
75
  return updated;
80
76
  }
81
- async function resolveActiveThreadStatus(storageRoot, threadId, uwf, head, workflowRef) {
77
+ async function resolveActiveThreadStatus(storageRoot, threadId, uwf, head) {
82
78
  const runningMarker = await isThreadRunning(storageRoot, threadId);
83
79
  if (runningMarker !== null) {
84
80
  return "running";
85
81
  }
86
82
  const chain = walkChain(uwf, head);
87
- const { lastRole, lastOutput } = resolveEvaluateArgs(uwf, chain);
88
- const workflow = loadWorkflowPayload(uwf, workflowRef);
89
- const result = evaluate(workflow.graph, lastRole, lastOutput);
90
- if (result.ok && isSuspendResult(result.value)) {
83
+ const { lastOutput } = resolveEvaluateArgs(uwf, chain);
84
+ if (readSuspendReason(lastOutput) !== null) {
91
85
  return "suspended";
92
86
  }
93
87
  return "idle";
@@ -99,12 +93,15 @@ async function resolveActiveThreadStatus(storageRoot, threadId, uwf, head, workf
99
93
  function resolveCurrentRole(uwf, head, workflowRef) {
100
94
  const chain = walkChain(uwf, head);
101
95
  const { lastRole, lastOutput } = resolveEvaluateArgs(uwf, chain);
96
+ if (readSuspendReason(lastOutput) !== null) {
97
+ return null;
98
+ }
102
99
  const workflow = loadWorkflowPayload(uwf, workflowRef);
103
100
  const result = evaluate(workflow.graph, lastRole, lastOutput);
104
101
  if (!result.ok) {
105
102
  return null;
106
103
  }
107
- if (isSuspendResult(result.value) || result.value.role === END_ROLE) {
104
+ if (result.value.role === END_ROLE) {
108
105
  return null;
109
106
  }
110
107
  return result.value.role;
@@ -113,10 +110,12 @@ const PL_THREAD_START = "7HNQ4B2X";
113
110
  const PL_MODERATOR = "M3K8V9T1";
114
111
  const PL_AGENT_SPAWN = "R5J2W8N4";
115
112
  const PL_AGENT_DONE = "C6P9E3H7";
113
+ const PL_AGENT_ERROR = "Z3F7K8M2";
116
114
  const PL_THREAD_ARCHIVED = "F4D8Q2K5";
117
115
  const PL_STEP_ERROR = "B8T5N1V6";
118
116
  const PL_BACKGROUND_START = "X7Q4W9M2";
119
117
  const PL_THREAD_RESUME = "K2R7M4N8";
118
+ const PL_THREAD_POKE = "P4Q9R3X7";
120
119
  function buildResumePrompt(graphPrompt, supplement) {
121
120
  if (supplement === null || supplement === "") {
122
121
  return graphPrompt;
@@ -147,18 +146,19 @@ async function workflowFileExists(dir, name, ext) {
147
146
  }
148
147
  }
149
148
  /**
150
- * Search for a workflow file in a given directory (checks both .workflow/ and .workflows/).
149
+ * Search for a workflow file in a given directory (checks both .workflows/ and .workflow/).
150
+ * `.workflows/` (primary) takes priority over `.workflow/` (legacy fallback).
151
151
  */
152
152
  async function findWorkflowInDir(dir, name) {
153
- // Check .workflow/ directory first (preferred)
153
+ // Check .workflows/ directory first (primary)
154
154
  for (const ext of [".yaml", ".yml"]) {
155
- const result = await workflowFileExists(resolvePath(dir, ".workflow"), name, ext);
155
+ const result = await workflowFileExists(resolvePath(dir, ".workflows"), name, ext);
156
156
  if (result !== null) {
157
157
  return result;
158
158
  }
159
159
  }
160
160
  for (const indexName of ["index.yaml", "index.yml"]) {
161
- const candidate = resolvePath(dir, ".workflow", name, indexName);
161
+ const candidate = resolvePath(dir, ".workflows", name, indexName);
162
162
  try {
163
163
  await access(candidate);
164
164
  return candidate;
@@ -167,15 +167,15 @@ async function findWorkflowInDir(dir, name) {
167
167
  /* not found */
168
168
  }
169
169
  }
170
- // Check .workflows/ directory as fallback (legacy)
170
+ // Check .workflow/ directory as fallback (legacy)
171
171
  for (const ext of [".yaml", ".yml"]) {
172
- const result = await workflowFileExists(resolvePath(dir, ".workflows"), name, ext);
172
+ const result = await workflowFileExists(resolvePath(dir, ".workflow"), name, ext);
173
173
  if (result !== null) {
174
174
  return result;
175
175
  }
176
176
  }
177
177
  for (const indexName of ["index.yaml", "index.yml"]) {
178
- const candidate = resolvePath(dir, ".workflows", name, indexName);
178
+ const candidate = resolvePath(dir, ".workflow", name, indexName);
179
179
  try {
180
180
  await access(candidate);
181
181
  return candidate;
@@ -186,8 +186,21 @@ async function findWorkflowInDir(dir, name) {
186
186
  }
187
187
  return null;
188
188
  }
189
+ /** Check if a directory contains a .git marker (directory or file). */
190
+ async function hasGitMarker(dir) {
191
+ try {
192
+ await access(join(dir, ".git"));
193
+ return true;
194
+ }
195
+ catch {
196
+ return false;
197
+ }
198
+ }
189
199
  /**
190
- * Traverse parent directories looking for `.workflow/<name>.yaml` or `.workflow/<name>.yml`.
200
+ * Traverse parent directories looking for a workflow named `name` under
201
+ * `.workflows/` (primary) or `.workflow/` (legacy fallback). Within each
202
+ * directory the lookup checks flat YAML files (`<name>.yaml`/`.yml`) and
203
+ * folder-based layouts (`<name>/index.yaml`/`.yml`).
191
204
  * Returns the absolute path if found, otherwise null.
192
205
  * Stops at filesystem root or .git directory.
193
206
  */
@@ -199,6 +212,10 @@ async function findWorkflowInParents(startDir, name) {
199
212
  if (found !== null) {
200
213
  return found;
201
214
  }
215
+ // Stop at .git boundary (repo root)
216
+ if (await hasGitMarker(currentDir)) {
217
+ break;
218
+ }
202
219
  // Stop at filesystem root
203
220
  if (currentDir === root) {
204
221
  break;
@@ -346,8 +363,8 @@ export async function cmdThreadShow(storageRoot, threadId) {
346
363
  if (workflow === null) {
347
364
  fail(`failed to resolve workflow from head: ${activeHead}`);
348
365
  }
349
- // Determine if this is a completed/cancelled thread
350
- if (entry.status === "completed" || entry.status === "cancelled") {
366
+ // Determine if this is an ended/cancelled thread
367
+ if (entry.status === "end" || entry.status === "cancelled") {
351
368
  const hint = null;
352
369
  return {
353
370
  workflow,
@@ -359,13 +376,14 @@ export async function cmdThreadShow(storageRoot, threadId) {
359
376
  suspendMessage: null,
360
377
  done: true,
361
378
  background: null,
379
+ error: null,
362
380
  hint,
363
381
  };
364
382
  }
365
383
  // Active thread
366
- const status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, activeHead, workflow);
384
+ const status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, activeHead);
367
385
  const currentRole = resolveCurrentRole(uwf, activeHead, workflow);
368
- const suspendFields = resolveSuspendFieldsForShow(entry, status, uwf, activeHead, workflow);
386
+ const suspendFields = resolveSuspendFieldsForShow(entry, status, uwf, activeHead);
369
387
  const hint = status === "suspended"
370
388
  ? `Thread is suspended. Resume with: uwf thread resume ${threadId}`
371
389
  : null;
@@ -379,15 +397,25 @@ export async function cmdThreadShow(storageRoot, threadId) {
379
397
  suspendMessage: suspendFields.suspendMessage,
380
398
  done: false,
381
399
  background: null,
400
+ error: null,
382
401
  hint,
383
402
  };
384
403
  }
385
- async function threadListItemFromActive(storageRoot, uwf, threadId, head) {
404
+ async function threadListItemFromActive(storageRoot, uwf, threadId, head, registry) {
386
405
  const workflow = resolveWorkflowFromHead(uwf, head);
387
406
  if (workflow === null) {
388
- return null;
407
+ // Head CAS node missing or unrecognized — treat as corrupt rather than silently skipping
408
+ return {
409
+ thread: threadId,
410
+ workflow: "",
411
+ head,
412
+ status: "corrupt",
413
+ currentRole: null,
414
+ statusDisplay: "corrupt",
415
+ workflowName: null,
416
+ };
389
417
  }
390
- const status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, head, workflow);
418
+ const status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, head);
391
419
  const statusDisplay = status === "suspended" ? `${status} [suspended]` : status;
392
420
  return {
393
421
  thread: threadId,
@@ -396,35 +424,67 @@ async function threadListItemFromActive(storageRoot, uwf, threadId, head) {
396
424
  status,
397
425
  currentRole: resolveCurrentRole(uwf, head, workflow),
398
426
  statusDisplay,
427
+ workflowName: findRegistryName(registry, workflow),
399
428
  };
400
429
  }
401
- async function collectActiveThreads(storageRoot, uwf, index) {
430
+ async function collectActiveThreads(storageRoot, uwf, index, registry) {
402
431
  const items = [];
403
432
  for (const [threadId, entry] of Object.entries(index)) {
404
- const item = await threadListItemFromActive(storageRoot, uwf, threadId, entry.head);
405
- if (item !== null) {
406
- items.push(item);
433
+ try {
434
+ const item = await threadListItemFromActive(storageRoot, uwf, threadId, entry.head, registry);
435
+ if (item !== null) {
436
+ items.push(item);
437
+ }
438
+ }
439
+ catch (err) {
440
+ const message = err instanceof Error ? err.message : String(err);
441
+ process.stderr.write(`warning: thread ${threadId} is corrupt: ${message}\n`);
442
+ items.push({
443
+ thread: threadId,
444
+ workflow: "",
445
+ head: entry.head,
446
+ status: "corrupt",
447
+ currentRole: null,
448
+ statusDisplay: "corrupt",
449
+ workflowName: null,
450
+ });
407
451
  }
408
452
  }
409
453
  return items;
410
454
  }
411
- function collectCompletedThreads(uwf, activeIds) {
455
+ function collectCompletedThreads(uwf, activeIds, registry) {
412
456
  const items = [];
413
457
  const history = loadHistoryThreads(uwf.varStore);
414
458
  const seen = new Set(); // Deduplication (issue #470)
415
459
  for (const [threadId, entry] of Object.entries(history)) {
416
460
  if (!activeIds.has(threadId) && !seen.has(threadId)) {
417
461
  seen.add(threadId);
418
- const status = entry.status;
419
- const workflow = resolveWorkflowFromHead(uwf, entry.head);
420
- items.push({
421
- thread: threadId,
422
- workflow: workflow ?? "",
423
- head: entry.head,
424
- status,
425
- currentRole: null,
426
- statusDisplay: status,
427
- });
462
+ try {
463
+ const status = entry.status;
464
+ const workflow = resolveWorkflowFromHead(uwf, entry.head);
465
+ items.push({
466
+ thread: threadId,
467
+ workflow: workflow ?? "",
468
+ head: entry.head,
469
+ status,
470
+ currentRole: null,
471
+ statusDisplay: status,
472
+ workflowName: workflow !== null ? findRegistryName(registry, workflow) : null,
473
+ });
474
+ }
475
+ catch (err) {
476
+ const message = err instanceof Error ? err.message : String(err);
477
+ process.stderr.write(`warning: completed thread ${threadId} is corrupt: ${message}\n`);
478
+ items.push({
479
+ thread: threadId,
480
+ workflow: "",
481
+ head: entry.head,
482
+ status: "corrupt",
483
+ currentRole: null,
484
+ statusDisplay: "corrupt",
485
+ workflowName: null,
486
+ });
487
+ }
428
488
  }
429
489
  }
430
490
  return items;
@@ -455,23 +515,28 @@ function applyPagination(items, skip, take) {
455
515
  const takeCount = take ?? items.length;
456
516
  return items.slice(skipCount, skipCount + takeCount);
457
517
  }
458
- export async function cmdThreadList(storageRoot, statusFilter, afterMs, beforeMs, skip, take) {
518
+ export async function cmdThreadList(storageRoot, statusFilter, afterMs, beforeMs, skip, take, showAll = false) {
459
519
  const uwf = await createUwfStore(storageRoot);
460
520
  const index = loadActiveThreads(uwf.varStore);
521
+ const registry = loadWorkflowRegistry(uwf.varStore);
522
+ // Resolve the effective filter:
523
+ // - explicit --status wins (showAll has no effect)
524
+ // - otherwise: --all → no filter; default → ["idle", "running"]
525
+ const effectiveFilter = statusFilter !== null ? statusFilter : showAll ? null : ["idle", "running", "corrupt"];
461
526
  // Collect active threads
462
- let items = await collectActiveThreads(storageRoot, uwf, index);
527
+ let items = await collectActiveThreads(storageRoot, uwf, index, registry);
463
528
  // Collect completed threads (if relevant for status filter)
464
- const includeCompleted = statusFilter === null ||
465
- statusFilter.includes("completed") ||
466
- statusFilter.includes("cancelled");
529
+ const includeCompleted = effectiveFilter === null ||
530
+ effectiveFilter.includes("end") ||
531
+ effectiveFilter.includes("cancelled");
467
532
  if (includeCompleted) {
468
533
  const activeIds = new Set(items.map((i) => i.thread));
469
- const completedItems = collectCompletedThreads(uwf, activeIds);
534
+ const completedItems = collectCompletedThreads(uwf, activeIds, registry);
470
535
  items = items.concat(completedItems);
471
536
  }
472
537
  // Apply status filter
473
- if (statusFilter !== null) {
474
- items = items.filter((item) => statusFilter.includes(item.status));
538
+ if (effectiveFilter !== null) {
539
+ items = items.filter((item) => effectiveFilter.includes(item.status));
475
540
  }
476
541
  // Apply time range filters
477
542
  items = applyTimeFilters(items, afterMs, beforeMs);
@@ -697,11 +762,9 @@ function resolveAgentConfig(config, workflow, role, agentOverride) {
697
762
  }
698
763
  return agentConfig;
699
764
  }
700
- function spawnAgent(plog, agent, threadId, role, edgePrompt, cwd) {
701
- const argv = [...agent.args, "--thread", threadId, "--role", role, "--prompt", edgePrompt];
702
- let stdout;
765
+ function executeAgentCommand(agent, argv, cwd, plog) {
703
766
  try {
704
- stdout = execFileSync(agent.command, argv, {
767
+ return execFileSync(agent.command, argv, {
705
768
  encoding: "utf8",
706
769
  stdio: ["ignore", "pipe", "pipe"],
707
770
  maxBuffer: 50 * 1024 * 1024, // 50 MB — stream-json output can be large
@@ -721,14 +784,17 @@ function spawnAgent(plog, agent, threadId, role, edgePrompt, cwd) {
721
784
  const detail = stderr.trim() !== "" ? `: ${stderr.trim()}` : "";
722
785
  failStep(plog, `agent command failed (${agent.command})${detail}`);
723
786
  }
787
+ }
788
+ function parseAgentOutput(stdout, plog) {
724
789
  const line = stdout.trim().split("\n").pop()?.trim() ?? "";
725
- let parsed;
726
790
  try {
727
- parsed = JSON.parse(line);
791
+ return JSON.parse(line);
728
792
  }
729
793
  catch {
730
794
  failStep(plog, `agent stdout last line is not valid JSON: ${line || "(empty)"}`);
731
795
  }
796
+ }
797
+ function validateAndNormalizeOutput(parsed, line, plog) {
732
798
  const obj = parsed;
733
799
  if (typeof obj !== "object" ||
734
800
  obj === null ||
@@ -736,10 +802,33 @@ function spawnAgent(plog, agent, threadId, role, edgePrompt, cwd) {
736
802
  !isCasRef(obj.stepHash)) {
737
803
  failStep(plog, `agent stdout JSON missing valid stepHash: ${line}`);
738
804
  }
805
+ // Normalize isError / errorMessage so downstream code can rely on them.
806
+ // Legacy adapters that don't emit these fields default to isError=false.
807
+ if (obj.isError !== undefined && typeof obj.isError !== "boolean") {
808
+ failStep(plog, `agent stdout JSON has non-boolean isError: ${line}`);
809
+ }
810
+ if (obj.isError === undefined) {
811
+ obj.isError = false;
812
+ }
813
+ if (obj.errorMessage !== undefined &&
814
+ obj.errorMessage !== null &&
815
+ typeof obj.errorMessage !== "string") {
816
+ failStep(plog, `agent stdout JSON has non-string errorMessage: ${line}`);
817
+ }
818
+ if (obj.errorMessage === undefined) {
819
+ obj.errorMessage = null;
820
+ }
739
821
  return obj;
740
822
  }
823
+ function spawnAgent(plog, agent, threadId, role, edgePrompt, cwd) {
824
+ const argv = [...agent.args, "--thread", threadId, "--role", role, "--prompt", edgePrompt];
825
+ const stdout = executeAgentCommand(agent, argv, cwd, plog);
826
+ const line = stdout.trim().split("\n").pop()?.trim() ?? "";
827
+ const parsed = parseAgentOutput(stdout, plog);
828
+ return validateAndNormalizeOutput(parsed, line, plog);
829
+ }
741
830
  function archiveThread(uwf, threadId, _workflow, _head) {
742
- completeThread(uwf.varStore, threadId, "completed");
831
+ completeThread(uwf.varStore, threadId, "end");
743
832
  }
744
833
  export async function cmdThreadResume(storageRoot, threadId, supplement, agentOverride) {
745
834
  const runningMarker = await isThreadRunning(storageRoot, threadId);
@@ -754,15 +843,15 @@ export async function cmdThreadResume(storageRoot, threadId, supplement, agentOv
754
843
  const headHash = entry.head;
755
844
  const chain = walkChain(uwf, headHash);
756
845
  const workflowHash = chain.start.workflow;
757
- // Check entry.status first for completed/cancelled (like in cmdThreadShow)
846
+ // Check entry.status first for end/cancelled (like in cmdThreadShow)
758
847
  let status;
759
- if (entry.status === "completed" || entry.status === "cancelled") {
848
+ if (entry.status === "end" || entry.status === "cancelled") {
760
849
  status = entry.status;
761
850
  }
762
851
  else {
763
- status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, headHash, workflowHash);
852
+ status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, headHash);
764
853
  }
765
- if (status !== "suspended" && status !== "completed") {
854
+ if (status !== "suspended" && status !== "end") {
766
855
  fail(`thread cannot be resumed: ${threadId} (status: ${status})`);
767
856
  }
768
857
  const plog = createProcessLogger({
@@ -770,7 +859,7 @@ export async function cmdThreadResume(storageRoot, threadId, supplement, agentOv
770
859
  context: { thread: threadId, workflow: workflowHash },
771
860
  });
772
861
  if (status === "suspended") {
773
- const suspendFields = resolveSuspendFieldsForShow(entry, status, uwf, headHash, workflowHash);
862
+ const suspendFields = resolveSuspendFieldsForShow(entry, status, uwf, headHash);
774
863
  if (suspendFields.suspendedRole === null) {
775
864
  fail(`thread is suspended but suspendedRole is missing: ${threadId}`);
776
865
  }
@@ -784,27 +873,136 @@ export async function cmdThreadResume(storageRoot, threadId, supplement, agentOv
784
873
  prompt: resumePrompt,
785
874
  });
786
875
  }
787
- // status === "completed"
876
+ // status === "end"
788
877
  const workflow = loadWorkflowPayload(uwf, workflowHash);
789
878
  const startResult = evaluate(workflow.graph, START_ROLE, { [STATUS_KEY]: "resume" });
790
879
  if (!startResult.ok) {
791
880
  fail(`failed to evaluate $START: ${startResult.error.message}`);
792
881
  }
793
- if (isSuspendResult(startResult.value)) {
794
- fail("workflow cannot start with $SUSPEND");
795
- }
796
882
  if (startResult.value.role === END_ROLE) {
797
883
  fail("workflow cannot start with $END");
798
884
  }
799
885
  const startRole = startResult.value.role;
800
- const completedResumePrompt = buildResumePrompt(startResult.value.prompt, supplement);
886
+ const endResumePrompt = buildResumePrompt(startResult.value.prompt, supplement);
801
887
  const updatedEntry = { ...entry, status: "idle", completedAt: null };
802
888
  setThread(uwf.varStore, threadId, updatedEntry);
803
889
  plog.log(PL_THREAD_RESUME, `resume completed role=${startRole} supplement=${supplement !== null}`, null);
804
890
  return cmdThreadStepOnce(storageRoot, threadId, agentOverride, plog, {
805
891
  role: startRole,
806
- prompt: completedResumePrompt,
892
+ prompt: endResumePrompt,
893
+ });
894
+ }
895
+ /**
896
+ * Validate that a thread can be poked. Returns the existing entry and the head StepNode payload.
897
+ * Fails (process exit) when the thread is missing, running, completed, cancelled, or has no
898
+ * StepNode at its head.
899
+ */
900
+ async function validatePokePreconditions(storageRoot, uwf, threadId) {
901
+ const runningMarker = await isThreadRunning(storageRoot, threadId);
902
+ if (runningMarker !== null) {
903
+ fail(`thread already executing in background (PID: ${runningMarker.pid})`);
904
+ }
905
+ const entry = getThread(uwf.varStore, threadId);
906
+ if (entry === null) {
907
+ fail(`thread not active: ${threadId}`);
908
+ }
909
+ if (entry.status === "end" || entry.status === "cancelled") {
910
+ fail(`thread cannot be poked: ${threadId} (status: ${entry.status})`);
911
+ }
912
+ const oldHead = entry.head;
913
+ const oldHeadNode = uwf.store.cas.get(oldHead);
914
+ if (oldHeadNode === null) {
915
+ fail(`CAS node not found: ${oldHead}`);
916
+ }
917
+ if (oldHeadNode.type !== uwf.schemas.stepNode) {
918
+ fail("thread cannot be poked: no step to replace (head is StartNode)");
919
+ }
920
+ return { entry, oldHead, oldHeadPayload: oldHeadNode.payload };
921
+ }
922
+ /**
923
+ * Resolve the next role from the post-poke chain state, used for the StepOutput.currentRole field.
924
+ * Returns null when the next role is $END, evaluation fails, or the result is a suspend.
925
+ */
926
+ function resolveCurrentRoleFromChain(uwfAfter, workflow, replacedHash) {
927
+ const chainAfter = walkChain(uwfAfter, replacedHash);
928
+ const { lastRole, lastOutput } = resolveEvaluateArgs(uwfAfter, chainAfter);
929
+ if (readSuspendReason(lastOutput) !== null) {
930
+ return null;
931
+ }
932
+ const afterResult = evaluate(workflow.graph, lastRole, lastOutput);
933
+ if (!afterResult.ok) {
934
+ return null;
935
+ }
936
+ if (afterResult.value.role === END_ROLE) {
937
+ return null;
938
+ }
939
+ return afterResult.value.role;
940
+ }
941
+ /**
942
+ * Poke a thread: re-run the agent on the head step with a supplementary prompt,
943
+ * replacing the head step's output. The new step's `prev` points to the OLD head's
944
+ * `prev` — semantically replacing (not appending to) the head. The moderator is NOT
945
+ * re-evaluated for routing; the role of the head step is re-used.
946
+ */
947
+ export async function cmdThreadPoke(storageRoot, threadId, prompt, agentOverride) {
948
+ const uwf = await createUwfStore(storageRoot);
949
+ const { entry, oldHeadPayload } = await validatePokePreconditions(storageRoot, uwf, threadId);
950
+ const chain = walkChain(uwf, entry.head);
951
+ const workflowHash = chain.start.workflow;
952
+ const threadCwd = chain.start.cwd;
953
+ const plog = createProcessLogger({
954
+ storageRoot,
955
+ context: { thread: threadId, workflow: workflowHash },
807
956
  });
957
+ // Resolve the agent: --agent override wins; otherwise read from old head step's `agent` field.
958
+ const config = await loadWorkflowConfig(storageRoot);
959
+ const workflow = loadWorkflowPayload(uwf, workflowHash);
960
+ const role = oldHeadPayload.role;
961
+ const agent = agentOverride !== null
962
+ ? resolveAgentConfig(config, workflow, role, agentOverride)
963
+ : parseAgentOverride(oldHeadPayload.agent);
964
+ const effectiveCwd = oldHeadPayload.cwd !== "" ? oldHeadPayload.cwd : threadCwd;
965
+ plog.log(PL_THREAD_POKE, `poke role=${role} agent=${agent.command}`, null);
966
+ plog.log(PL_AGENT_SPAWN, `spawning agent command=${agent.command}`, {
967
+ args: [...agent.args, threadId, role].join(" "),
968
+ });
969
+ loadDotenv({ path: getEnvPath(storageRoot) });
970
+ // Spawn the agent. The agent will create a new StepNode with prev=oldHead (it reads
971
+ // the active thread head). After the agent returns, we rewrite that node's prev so
972
+ // that the new head replaces the old head instead of appending after it.
973
+ const agentResult = spawnAgent(plog, agent, threadId, role, prompt, effectiveCwd);
974
+ const agentStepHash = agentResult.stepHash;
975
+ plog.log(PL_AGENT_DONE, `agent returned head=${agentStepHash}`, null);
976
+ const uwfAfter = await createUwfStore(storageRoot);
977
+ const agentNode = uwfAfter.store.cas.get(agentStepHash);
978
+ if (agentNode === null || agentNode.type !== uwfAfter.schemas.stepNode) {
979
+ failStep(plog, `agent returned hash that is not a StepNode: ${agentStepHash}`);
980
+ }
981
+ const agentPayload = agentNode.payload;
982
+ // Rewrite the new step so that its `prev` points to the OLD head's prev (replace semantics).
983
+ const replacedPayload = {
984
+ ...agentPayload,
985
+ prev: oldHeadPayload.prev,
986
+ };
987
+ const replacedHash = await uwfAfter.store.cas.put(uwfAfter.schemas.stepNode, replacedPayload);
988
+ const replacedNode = uwfAfter.store.cas.get(replacedHash);
989
+ if (replacedNode === null || !validate(uwfAfter.store, replacedNode)) {
990
+ failStep(plog, "rewritten StepNode failed schema validation");
991
+ }
992
+ // Update thread head to the replaced step. Status becomes idle (no moderator re-route).
993
+ setThread(uwfAfter.varStore, threadId, updateThreadHead(entry, replacedHash));
994
+ return {
995
+ workflow: workflowHash,
996
+ thread: threadId,
997
+ head: replacedHash,
998
+ status: "idle",
999
+ currentRole: resolveCurrentRoleFromChain(uwfAfter, workflow, replacedHash),
1000
+ suspendedRole: null,
1001
+ suspendMessage: null,
1002
+ done: false,
1003
+ background: null,
1004
+ error: null,
1005
+ };
808
1006
  }
809
1007
  export function validateCount(count) {
810
1008
  if (count < 1 || !Number.isInteger(count)) {
@@ -813,11 +1011,12 @@ export function validateCount(count) {
813
1011
  }
814
1012
  export async function cmdThreadExec(storageRoot, threadId, agentOverride, count, background, backgroundWorker) {
815
1013
  validateCount(count);
816
- // Check if thread is already running in background (unless we ARE the background worker)
1014
+ // Reject concurrent exec on the same thread (unless we ARE the background worker,
1015
+ // which hasn't created its own marker yet at this point).
817
1016
  if (!backgroundWorker) {
818
1017
  const runningMarker = await isThreadRunning(storageRoot, threadId);
819
1018
  if (runningMarker !== null) {
820
- fail(`thread already executing in background (PID: ${runningMarker.pid})`);
1019
+ fail(`thread ${threadId} is already being executed by PID ${runningMarker.pid}`);
821
1020
  }
822
1021
  }
823
1022
  const workflowHash = await resolveActiveThreadWorkflowHash(storageRoot, threadId);
@@ -829,17 +1028,15 @@ export async function cmdThreadExec(storageRoot, threadId, agentOverride, count,
829
1028
  // Spawn background process
830
1029
  return cmdThreadStepBackground(storageRoot, threadId, agentOverride, count, plog, workflowHash);
831
1030
  }
832
- // If we're the background worker, create marker before execution
833
- let markerCreated = false;
834
- if (backgroundWorker) {
835
- await createMarker(storageRoot, {
836
- thread: threadId,
837
- workflow: workflowHash,
838
- pid: process.pid,
839
- startedAt: Date.now(),
840
- });
841
- markerCreated = true;
842
- }
1031
+ // Create running marker so `thread list` shows "running" during execution
1032
+ // and concurrent `exec` on the same thread is rejected (see check above).
1033
+ await createMarker(storageRoot, {
1034
+ thread: threadId,
1035
+ workflow: workflowHash,
1036
+ pid: process.pid,
1037
+ startedAt: Date.now(),
1038
+ processStartTime: getProcessStartTime(process.pid),
1039
+ });
843
1040
  try {
844
1041
  const results = [];
845
1042
  for (let i = 0; i < count; i++) {
@@ -852,10 +1049,7 @@ export async function cmdThreadExec(storageRoot, threadId, agentOverride, count,
852
1049
  return results;
853
1050
  }
854
1051
  finally {
855
- // Cleanup marker if we created one
856
- if (markerCreated) {
857
- await deleteMarker(storageRoot, threadId);
858
- }
1052
+ await deleteMarker(storageRoot, threadId);
859
1053
  }
860
1054
  }
861
1055
  async function resolveActiveThreadWorkflowHash(storageRoot, threadId) {
@@ -903,6 +1097,7 @@ async function cmdThreadStepBackground(storageRoot, threadId, agentOverride, cou
903
1097
  suspendMessage: null,
904
1098
  done: false,
905
1099
  background: true,
1100
+ error: null,
906
1101
  },
907
1102
  ];
908
1103
  }
@@ -917,17 +1112,19 @@ function resolveResumeStepTarget(resume, chain, threadCwd, plog) {
917
1112
  }
918
1113
  async function resolveModeratorStepTarget(_storageRoot, threadId, entry, headHash, workflowHash, workflow, uwf, chain, threadCwd, plog) {
919
1114
  const { lastRole, lastOutput } = resolveEvaluateArgs(uwf, chain);
1115
+ // Intercept an already-suspended head before the moderator: a thread whose
1116
+ // head step yielded `$status: "$SUSPEND"` stays suspended (idempotent re-exec).
1117
+ const suspendReason = readSuspendReason(lastOutput);
1118
+ if (suspendReason !== null) {
1119
+ await ensureThreadSuspendMetadata(uwf.varStore, threadId, entry, lastRole, suspendReason);
1120
+ plog.log(PL_MODERATOR, `moderator action=suspend suspendedRole=${lastRole}`, null);
1121
+ return buildSuspendStepOutput(workflowHash, threadId, headHash, lastRole, suspendReason);
1122
+ }
920
1123
  const nextResult = evaluate(workflow.graph, lastRole, lastOutput);
921
1124
  if (!nextResult.ok) {
922
1125
  failStep(plog, `moderator evaluate failed: ${nextResult.error.message}`);
923
1126
  }
924
- plog.log(PL_MODERATOR, `moderator ${isSuspendResult(nextResult.value)
925
- ? `action=suspend suspendedRole=${nextResult.value.suspendedRole}`
926
- : `role=${nextResult.value.role}`} prompt=${nextResult.value.prompt}`, null);
927
- if (isSuspendResult(nextResult.value)) {
928
- await ensureThreadSuspendMetadata(uwf.varStore, threadId, entry, nextResult.value.suspendedRole, nextResult.value.prompt);
929
- return buildStepOutputFromEvaluation(workflowHash, threadId, headHash, "suspended", nextResult, null);
930
- }
1127
+ plog.log(PL_MODERATOR, `moderator role=${nextResult.value.role} prompt=${nextResult.value.prompt}`, null);
931
1128
  if (nextResult.value.role === END_ROLE) {
932
1129
  plog.log(PL_THREAD_ARCHIVED, `thread archived head=${headHash}`, null);
933
1130
  archiveThread(uwf, threadId, workflowHash, headHash);
@@ -935,12 +1132,13 @@ async function resolveModeratorStepTarget(_storageRoot, threadId, entry, headHas
935
1132
  workflow: workflowHash,
936
1133
  thread: threadId,
937
1134
  head: headHash,
938
- status: "completed",
1135
+ status: "end",
939
1136
  currentRole: null,
940
1137
  suspendedRole: null,
941
1138
  suspendMessage: null,
942
1139
  done: true,
943
1140
  background: null,
1141
+ error: null,
944
1142
  };
945
1143
  }
946
1144
  return {
@@ -954,20 +1152,24 @@ async function finalizeAgentStep(_storageRoot, threadId, workflowHash, workflow,
954
1152
  setThread(uwfAfter.varStore, threadId, updateThreadHead(priorEntry, newHead));
955
1153
  const chainAfter = walkChain(uwfAfter, newHead);
956
1154
  const { lastRole: lastRoleAfter, lastOutput: lastOutputAfter } = resolveEvaluateArgs(uwfAfter, chainAfter);
1155
+ // Intercept `$status: "$SUSPEND"` before the moderator (coroutine yield): the
1156
+ // step is already in CAS and the head has advanced — mark the thread suspended
1157
+ // and return without routing through the graph.
1158
+ const suspendReason = readSuspendReason(lastOutputAfter);
1159
+ if (suspendReason !== null) {
1160
+ setThread(uwfAfter.varStore, threadId, markThreadSuspended(getThread(uwfAfter.varStore, threadId) ?? createThreadIndexEntry(newHead), lastRoleAfter, suspendReason));
1161
+ return buildSuspendStepOutput(workflowHash, threadId, newHead, lastRoleAfter, suspendReason);
1162
+ }
957
1163
  const afterResult = evaluate(workflow.graph, lastRoleAfter, lastOutputAfter);
958
1164
  if (!afterResult.ok) {
959
1165
  failStep(plog, `post-step moderator evaluate failed: ${afterResult.error.message}`);
960
1166
  }
961
- if (isSuspendResult(afterResult.value)) {
962
- setThread(uwfAfter.varStore, threadId, markThreadSuspended(getThread(uwfAfter.varStore, threadId) ?? createThreadIndexEntry(newHead), afterResult.value.suspendedRole, afterResult.value.prompt));
963
- return buildStepOutputFromEvaluation(workflowHash, threadId, newHead, "suspended", afterResult, null);
964
- }
965
1167
  const done = afterResult.value.role === END_ROLE;
966
1168
  if (done) {
967
1169
  plog.log(PL_THREAD_ARCHIVED, `thread archived head=${newHead}`, null);
968
1170
  archiveThread(uwfAfter, threadId, workflowHash, newHead);
969
1171
  }
970
- const status = done ? "completed" : "idle";
1172
+ const status = done ? "end" : "idle";
971
1173
  const currentRole = done ? null : afterResult.value.role;
972
1174
  return {
973
1175
  workflow: workflowHash,
@@ -979,6 +1181,7 @@ async function finalizeAgentStep(_storageRoot, threadId, workflowHash, workflow,
979
1181
  suspendMessage: null,
980
1182
  done,
981
1183
  background: null,
1184
+ error: null,
982
1185
  };
983
1186
  }
984
1187
  async function cmdThreadStepOnce(storageRoot, threadId, agentOverride, plog, resume = null) {
@@ -1013,6 +1216,26 @@ async function cmdThreadStepOnce(storageRoot, threadId, agentOverride, plog, res
1013
1216
  if (newNode === null || newNode.type !== uwfAfter.schemas.stepNode) {
1014
1217
  failStep(plog, `agent returned hash that is not a StepNode: ${newHead}`);
1015
1218
  }
1219
+ // Recoverable failure: agent persisted a failed StepNode (e.g. frontmatter
1220
+ // validation exhausted retries) but the engine MUST NOT advance head. The
1221
+ // moderator graph is also untouched — the same role will be replayed on the
1222
+ // next exec (until eventual success records `previousAttempts` linking the
1223
+ // failed step hashes).
1224
+ if (agentResult.isError === true) {
1225
+ plog.log(PL_AGENT_ERROR, `agent reported recoverable failure stepHash=${newHead} message=${agentResult.errorMessage ?? ""}`, null);
1226
+ return {
1227
+ workflow: workflowHash,
1228
+ thread: threadId,
1229
+ head: headHash,
1230
+ status: "idle",
1231
+ currentRole: role,
1232
+ suspendedRole: null,
1233
+ suspendMessage: null,
1234
+ done: false,
1235
+ background: null,
1236
+ error: { stepHash: newHead, message: agentResult.errorMessage ?? "agent reported error" },
1237
+ };
1238
+ }
1016
1239
  return finalizeAgentStep(storageRoot, threadId, workflowHash, workflow, newHead, uwfAfter, plog);
1017
1240
  }
1018
1241
  async function resolveHeadHash(storageRoot, threadId) {
@@ -1043,7 +1266,9 @@ export async function cmdThreadRead(storageRoot, threadId, quota = THREAD_READ_D
1043
1266
  });
1044
1267
  }
1045
1268
  /**
1046
- * Stop background execution of a thread (but keep thread active)
1269
+ * Stop background execution of a thread (but keep thread active).
1270
+ * Validates process identity before sending signals to prevent killing
1271
+ * unrelated processes when PIDs are recycled.
1047
1272
  */
1048
1273
  export async function cmdThreadStop(storageRoot, threadId) {
1049
1274
  const uwf = await createUwfStore(storageRoot);
@@ -1051,14 +1276,22 @@ export async function cmdThreadStop(storageRoot, threadId) {
1051
1276
  if (entry === null) {
1052
1277
  fail(`thread not active: ${threadId}`);
1053
1278
  }
1054
- // Check if thread is running in background and terminate it
1055
- const runningMarker = await isThreadRunning(storageRoot, threadId);
1056
- if (runningMarker === null) {
1279
+ // Read the raw marker to check process identity
1280
+ const marker = await readMarker(storageRoot, threadId);
1281
+ if (marker === null) {
1057
1282
  process.stderr.write(`Warning: thread ${threadId} is not currently running\n`);
1058
1283
  return { thread: threadId, stopped: false };
1059
1284
  }
1285
+ // Validate that the marker's PID still belongs to the same process
1286
+ if (!isMarkerValid(marker)) {
1287
+ // Stale marker — PID was recycled or process died. Do NOT send a signal.
1288
+ process.stderr.write(`Warning: thread ${threadId} was not actually running (stale marker cleaned up)\n`);
1289
+ await deleteMarker(storageRoot, threadId);
1290
+ return { thread: threadId, stopped: false };
1291
+ }
1292
+ // Process identity confirmed — safe to send SIGTERM
1060
1293
  try {
1061
- process.kill(runningMarker.pid, "SIGTERM");
1294
+ process.kill(marker.pid, "SIGTERM");
1062
1295
  }
1063
1296
  catch {
1064
1297
  // Process may have already exited, ignore error
@@ -1067,7 +1300,9 @@ export async function cmdThreadStop(storageRoot, threadId) {
1067
1300
  return { thread: threadId, stopped: true };
1068
1301
  }
1069
1302
  /**
1070
- * Cancel a thread (stop execution + move to history)
1303
+ * Cancel a thread (stop execution + move to history).
1304
+ * Validates process identity before sending signals to prevent killing
1305
+ * unrelated processes when PIDs are recycled.
1071
1306
  */
1072
1307
  export async function cmdThreadCancel(storageRoot, threadId) {
1073
1308
  const uwf = await createUwfStore(storageRoot);
@@ -1075,15 +1310,19 @@ export async function cmdThreadCancel(storageRoot, threadId) {
1075
1310
  if (entry === null) {
1076
1311
  fail(`thread not active: ${threadId}`);
1077
1312
  }
1078
- // Check if thread is running in background and terminate it
1079
- const runningMarker = await isThreadRunning(storageRoot, threadId);
1080
- if (runningMarker !== null) {
1081
- try {
1082
- process.kill(runningMarker.pid, "SIGTERM");
1083
- }
1084
- catch {
1085
- // Process may have already exited, ignore error
1313
+ // Read the raw marker and validate process identity before sending signals
1314
+ const marker = await readMarker(storageRoot, threadId);
1315
+ if (marker !== null) {
1316
+ if (isMarkerValid(marker)) {
1317
+ // Process identity confirmed — safe to send SIGTERM
1318
+ try {
1319
+ process.kill(marker.pid, "SIGTERM");
1320
+ }
1321
+ catch {
1322
+ // Process may have already exited, ignore error
1323
+ }
1086
1324
  }
1325
+ // Always delete the marker (stale or not) — cancellation proceeds
1087
1326
  await deleteMarker(storageRoot, threadId);
1088
1327
  }
1089
1328
  completeThread(uwf.varStore, threadId, "cancelled");