@united-workforce/cli 0.2.1-rc.9 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/README.md +15 -8
  2. package/dist/__tests__/adapter-json-roundtrip.test.js +1 -1
  3. package/dist/__tests__/adapter-json-roundtrip.test.js.map +1 -1
  4. package/dist/__tests__/agent-resolution-llm-free.test.d.ts +2 -0
  5. package/dist/__tests__/agent-resolution-llm-free.test.d.ts.map +1 -0
  6. package/dist/__tests__/agent-resolution-llm-free.test.js +30 -0
  7. package/dist/__tests__/agent-resolution-llm-free.test.js.map +1 -0
  8. package/dist/__tests__/build-step-entry.test.d.ts +2 -0
  9. package/dist/__tests__/build-step-entry.test.d.ts.map +1 -0
  10. package/dist/__tests__/build-step-entry.test.js +173 -0
  11. package/dist/__tests__/build-step-entry.test.js.map +1 -0
  12. package/dist/__tests__/clear-thread-failed-attempts.test.d.ts +2 -0
  13. package/dist/__tests__/clear-thread-failed-attempts.test.d.ts.map +1 -0
  14. package/dist/__tests__/clear-thread-failed-attempts.test.js +93 -0
  15. package/dist/__tests__/clear-thread-failed-attempts.test.js.map +1 -0
  16. package/dist/__tests__/config.test.js +26 -302
  17. package/dist/__tests__/config.test.js.map +1 -1
  18. package/dist/__tests__/current-role.test.js +7 -6
  19. package/dist/__tests__/current-role.test.js.map +1 -1
  20. package/dist/__tests__/e2e-mock-agent.test.js +20 -23
  21. package/dist/__tests__/e2e-mock-agent.test.js.map +1 -1
  22. package/dist/__tests__/issue-180-workflow-ref-removed.test.d.ts +2 -0
  23. package/dist/__tests__/issue-180-workflow-ref-removed.test.d.ts.map +1 -0
  24. package/dist/__tests__/issue-180-workflow-ref-removed.test.js +40 -0
  25. package/dist/__tests__/issue-180-workflow-ref-removed.test.js.map +1 -0
  26. package/dist/__tests__/moderator-evaluate.test.js +9 -50
  27. package/dist/__tests__/moderator-evaluate.test.js.map +1 -1
  28. package/dist/__tests__/pid-recycling.test.d.ts +2 -0
  29. package/dist/__tests__/pid-recycling.test.d.ts.map +1 -0
  30. package/dist/__tests__/pid-recycling.test.js +271 -0
  31. package/dist/__tests__/pid-recycling.test.js.map +1 -0
  32. package/dist/__tests__/prompt.test.js +321 -0
  33. package/dist/__tests__/prompt.test.js.map +1 -1
  34. package/dist/__tests__/resolve-head-hash.test.js +4 -4
  35. package/dist/__tests__/resolve-head-hash.test.js.map +1 -1
  36. package/dist/__tests__/setup-agent-discovery.test.js +21 -30
  37. package/dist/__tests__/setup-agent-discovery.test.js.map +1 -1
  38. package/dist/__tests__/setup-complexity.test.js +2 -168
  39. package/dist/__tests__/setup-complexity.test.js.map +1 -1
  40. package/dist/__tests__/setup-no-llm.test.d.ts +2 -0
  41. package/dist/__tests__/setup-no-llm.test.d.ts.map +1 -0
  42. package/dist/__tests__/setup-no-llm.test.js +52 -0
  43. package/dist/__tests__/setup-no-llm.test.js.map +1 -0
  44. package/dist/__tests__/solve-issue-tea-worktree.test.js +24 -27
  45. package/dist/__tests__/solve-issue-tea-worktree.test.js.map +1 -1
  46. package/dist/__tests__/step-ask.test.d.ts +2 -0
  47. package/dist/__tests__/step-ask.test.d.ts.map +1 -0
  48. package/dist/__tests__/step-ask.test.js +499 -0
  49. package/dist/__tests__/step-ask.test.js.map +1 -0
  50. package/dist/__tests__/step-show-json.test.js +1 -0
  51. package/dist/__tests__/step-show-json.test.js.map +1 -1
  52. package/dist/__tests__/step-timing.test.js +2 -0
  53. package/dist/__tests__/step-timing.test.js.map +1 -1
  54. package/dist/__tests__/store-global-cas.test.js +2 -2
  55. package/dist/__tests__/store-global-cas.test.js.map +1 -1
  56. package/dist/__tests__/store-unified-threads.test.js +9 -9
  57. package/dist/__tests__/store-unified-threads.test.js.map +1 -1
  58. package/dist/__tests__/thread-cancel-status.test.js +6 -6
  59. package/dist/__tests__/thread-cancel-status.test.js.map +1 -1
  60. package/dist/__tests__/thread-list-filters.test.js +344 -9
  61. package/dist/__tests__/thread-list-filters.test.js.map +1 -1
  62. package/dist/__tests__/thread-poke.test.d.ts +2 -0
  63. package/dist/__tests__/thread-poke.test.d.ts.map +1 -0
  64. package/dist/__tests__/thread-poke.test.js +412 -0
  65. package/dist/__tests__/thread-poke.test.js.map +1 -0
  66. package/dist/__tests__/thread-resume.test.js +10 -14
  67. package/dist/__tests__/thread-resume.test.js.map +1 -1
  68. package/dist/__tests__/thread-show-status.test.js +17 -28
  69. package/dist/__tests__/thread-show-status.test.js.map +1 -1
  70. package/dist/__tests__/thread-suspend-step.test.js +8 -14
  71. package/dist/__tests__/thread-suspend-step.test.js.map +1 -1
  72. package/dist/__tests__/thread-suspended-display.test.js +10 -22
  73. package/dist/__tests__/thread-suspended-display.test.js.map +1 -1
  74. package/dist/__tests__/thread.test.js +4 -4
  75. package/dist/__tests__/thread.test.js.map +1 -1
  76. package/dist/__tests__/validate-semantic.test.js +49 -21
  77. package/dist/__tests__/validate-semantic.test.js.map +1 -1
  78. package/dist/__tests__/workflow-list-recursive.test.d.ts +2 -0
  79. package/dist/__tests__/workflow-list-recursive.test.d.ts.map +1 -0
  80. package/dist/__tests__/workflow-list-recursive.test.js +283 -0
  81. package/dist/__tests__/workflow-list-recursive.test.js.map +1 -0
  82. package/dist/__tests__/workflow-resolution.test.js +36 -21
  83. package/dist/__tests__/workflow-resolution.test.js.map +1 -1
  84. package/dist/__tests__/workflow-show-resolution.test.d.ts +2 -0
  85. package/dist/__tests__/workflow-show-resolution.test.d.ts.map +1 -0
  86. package/dist/__tests__/workflow-show-resolution.test.js +210 -0
  87. package/dist/__tests__/workflow-show-resolution.test.js.map +1 -0
  88. package/dist/__tests__/workflow-validate.test.d.ts +2 -0
  89. package/dist/__tests__/workflow-validate.test.d.ts.map +1 -0
  90. package/dist/__tests__/workflow-validate.test.js +687 -0
  91. package/dist/__tests__/workflow-validate.test.js.map +1 -0
  92. package/dist/background/background.d.ts +22 -1
  93. package/dist/background/background.d.ts.map +1 -1
  94. package/dist/background/background.js +83 -6
  95. package/dist/background/background.js.map +1 -1
  96. package/dist/background/index.d.ts +1 -1
  97. package/dist/background/index.d.ts.map +1 -1
  98. package/dist/background/index.js +1 -1
  99. package/dist/background/index.js.map +1 -1
  100. package/dist/background/types.d.ts +1 -0
  101. package/dist/background/types.d.ts.map +1 -1
  102. package/dist/cli.js +66 -31
  103. package/dist/cli.js.map +1 -1
  104. package/dist/commands/config.d.ts +3 -1
  105. package/dist/commands/config.d.ts.map +1 -1
  106. package/dist/commands/config.js +7 -33
  107. package/dist/commands/config.js.map +1 -1
  108. package/dist/commands/prompt.d.ts.map +1 -1
  109. package/dist/commands/prompt.js +15 -2
  110. package/dist/commands/prompt.js.map +1 -1
  111. package/dist/commands/setup.d.ts +7 -39
  112. package/dist/commands/setup.d.ts.map +1 -1
  113. package/dist/commands/setup.js +27 -302
  114. package/dist/commands/setup.js.map +1 -1
  115. package/dist/commands/step.d.ts +44 -1
  116. package/dist/commands/step.d.ts.map +1 -1
  117. package/dist/commands/step.js +255 -11
  118. package/dist/commands/step.js.map +1 -1
  119. package/dist/commands/thread.d.ts +16 -3
  120. package/dist/commands/thread.d.ts.map +1 -1
  121. package/dist/commands/thread.js +379 -140
  122. package/dist/commands/thread.js.map +1 -1
  123. package/dist/commands/workflow.d.ts +9 -1
  124. package/dist/commands/workflow.d.ts.map +1 -1
  125. package/dist/commands/workflow.js +130 -6
  126. package/dist/commands/workflow.js.map +1 -1
  127. package/dist/moderator/__tests__/evaluate.test.js +31 -17
  128. package/dist/moderator/__tests__/evaluate.test.js.map +1 -1
  129. package/dist/moderator/evaluate.d.ts.map +1 -1
  130. package/dist/moderator/evaluate.js +4 -16
  131. package/dist/moderator/evaluate.js.map +1 -1
  132. package/dist/moderator/index.d.ts +1 -2
  133. package/dist/moderator/index.d.ts.map +1 -1
  134. package/dist/moderator/index.js +0 -1
  135. package/dist/moderator/index.js.map +1 -1
  136. package/dist/moderator/types.d.ts +6 -10
  137. package/dist/moderator/types.d.ts.map +1 -1
  138. package/dist/moderator/types.js +1 -3
  139. package/dist/moderator/types.js.map +1 -1
  140. package/dist/schemas.d.ts +2 -0
  141. package/dist/schemas.d.ts.map +1 -1
  142. package/dist/schemas.js +5 -3
  143. package/dist/schemas.js.map +1 -1
  144. package/dist/store.d.ts +28 -9
  145. package/dist/store.d.ts.map +1 -1
  146. package/dist/store.js +75 -16
  147. package/dist/store.js.map +1 -1
  148. package/dist/validate-semantic.d.ts.map +1 -1
  149. package/dist/validate-semantic.js +83 -66
  150. package/dist/validate-semantic.js.map +1 -1
  151. package/dist/validate.d.ts +6 -0
  152. package/dist/validate.d.ts.map +1 -1
  153. package/dist/validate.js +24 -0
  154. package/dist/validate.js.map +1 -1
  155. package/package.json +8 -10
  156. package/src/__tests__/adapter-json-roundtrip.test.ts +1 -1
  157. package/src/__tests__/agent-resolution-llm-free.test.ts +39 -0
  158. package/src/__tests__/build-step-entry.test.ts +203 -0
  159. package/src/__tests__/clear-thread-failed-attempts.test.ts +122 -0
  160. package/src/__tests__/config.test.ts +33 -321
  161. package/src/__tests__/current-role.test.ts +7 -6
  162. package/src/__tests__/e2e-mock-agent.test.ts +20 -23
  163. package/src/__tests__/fixtures/e2e-count.workflow.yaml +1 -0
  164. package/src/__tests__/fixtures/e2e-linear.workflow.yaml +1 -0
  165. package/src/__tests__/fixtures/{e2e-mustache.workflow.yaml → e2e-liquid.workflow.yaml} +3 -2
  166. package/src/__tests__/fixtures/e2e-loop.workflow.yaml +1 -0
  167. package/src/__tests__/fixtures/e2e-suspend.mock.yaml +2 -2
  168. package/src/__tests__/fixtures/e2e-suspend.workflow.yaml +6 -10
  169. package/src/__tests__/issue-180-workflow-ref-removed.test.ts +43 -0
  170. package/src/__tests__/moderator-evaluate.test.ts +9 -52
  171. package/src/__tests__/pid-recycling.test.ts +328 -0
  172. package/src/__tests__/prompt.test.ts +397 -0
  173. package/src/__tests__/resolve-head-hash.test.ts +4 -4
  174. package/src/__tests__/setup-agent-discovery.test.ts +26 -51
  175. package/src/__tests__/setup-complexity.test.ts +1 -203
  176. package/src/__tests__/setup-no-llm.test.ts +68 -0
  177. package/src/__tests__/solve-issue-tea-worktree.test.ts +24 -30
  178. package/src/__tests__/step-ask.test.ts +670 -0
  179. package/src/__tests__/step-show-json.test.ts +1 -0
  180. package/src/__tests__/step-timing.test.ts +2 -0
  181. package/src/__tests__/store-global-cas.test.ts +2 -2
  182. package/src/__tests__/store-unified-threads.test.ts +9 -9
  183. package/src/__tests__/thread-cancel-status.test.ts +6 -6
  184. package/src/__tests__/thread-list-filters.test.ts +434 -8
  185. package/src/__tests__/thread-poke.test.ts +545 -0
  186. package/src/__tests__/thread-resume.test.ts +10 -14
  187. package/src/__tests__/thread-show-status.test.ts +17 -29
  188. package/src/__tests__/thread-suspend-step.test.ts +8 -14
  189. package/src/__tests__/thread-suspended-display.test.ts +10 -22
  190. package/src/__tests__/thread.test.ts +4 -4
  191. package/src/__tests__/validate-semantic.test.ts +59 -31
  192. package/src/__tests__/workflow-list-recursive.test.ts +370 -0
  193. package/src/__tests__/workflow-resolution.test.ts +39 -21
  194. package/src/__tests__/workflow-show-resolution.test.ts +285 -0
  195. package/src/__tests__/workflow-validate.test.ts +806 -0
  196. package/src/background/background.ts +88 -6
  197. package/src/background/index.ts +2 -0
  198. package/src/background/types.ts +1 -0
  199. package/src/cli.ts +97 -47
  200. package/src/commands/config.ts +7 -35
  201. package/src/commands/prompt.ts +15 -2
  202. package/src/commands/setup.ts +29 -357
  203. package/src/commands/step.ts +339 -12
  204. package/src/commands/thread.ts +463 -169
  205. package/src/commands/workflow.ts +159 -4
  206. package/src/moderator/__tests__/evaluate.test.ts +34 -17
  207. package/src/moderator/evaluate.ts +5 -17
  208. package/src/moderator/index.ts +1 -6
  209. package/src/moderator/types.ts +6 -14
  210. package/src/schemas.ts +13 -3
  211. package/src/store.ts +86 -20
  212. package/src/validate-semantic.ts +109 -78
  213. package/src/validate.ts +27 -0
  214. package/dist/__tests__/setup-validate.test.d.ts +0 -2
  215. package/dist/__tests__/setup-validate.test.d.ts.map +0 -1
  216. package/dist/__tests__/setup-validate.test.js +0 -108
  217. package/dist/__tests__/setup-validate.test.js.map +0 -1
  218. package/src/__tests__/setup-validate.test.ts +0 -148
  219. /package/src/__tests__/fixtures/{e2e-mustache.mock.yaml → e2e-liquid.mock.yaml} +0 -0
@@ -1,6 +1,6 @@
1
1
  import { execFileSync, spawn } from "node:child_process";
2
2
  import { access, readFile } from "node:fs/promises";
3
- import { dirname, isAbsolute, resolve as resolvePath } from "node:path";
3
+ import { dirname, isAbsolute, join, resolve as resolvePath } from "node:path";
4
4
  import type { VarStore } from "@ocas/core";
5
5
  import { validate } from "@ocas/core";
6
6
  import type {
@@ -22,6 +22,7 @@ import type {
22
22
  import {
23
23
  createThreadIndexEntry,
24
24
  markThreadSuspended,
25
+ SUSPEND_STATUS,
25
26
  updateThreadHead,
26
27
  } from "@united-workforce/protocol";
27
28
  import {
@@ -34,12 +35,20 @@ import type { AdapterOutput } from "@united-workforce/util-agent";
34
35
  import { getEnvPath, loadWorkflowConfig } from "@united-workforce/util-agent";
35
36
  import { config as loadDotenv } from "dotenv";
36
37
  import { parse } from "yaml";
37
- import { createMarker, deleteMarker, isThreadRunning } from "../background/index.js";
38
+ import {
39
+ createMarker,
40
+ deleteMarker,
41
+ getProcessStartTime,
42
+ isMarkerValid,
43
+ isThreadRunning,
44
+ readMarker,
45
+ } from "../background/index.js";
38
46
  import { createIncludeTag } from "../include.js";
39
- import { evaluate, isSuspendResult } from "../moderator/index.js";
47
+ import { evaluate } from "../moderator/index.js";
40
48
  import {
41
49
  completeThread,
42
50
  createUwfStore,
51
+ findRegistryName,
43
52
  getThread,
44
53
  loadActiveThreads,
45
54
  loadHistoryThreads,
@@ -47,6 +56,7 @@ import {
47
56
  resolveWorkflowHash,
48
57
  setThread,
49
58
  type UwfStore,
59
+ type WorkflowRegistry,
50
60
  } from "../store.js";
51
61
  import { checkWorkflowFilenameConsistency, isCasRef, parseWorkflowPayload } from "../validate.js";
52
62
  import { validateWorkflow } from "../validate-semantic.js";
@@ -64,53 +74,49 @@ const END_ROLE = "$END";
64
74
  const START_ROLE = "$START";
65
75
  export const THREAD_READ_DEFAULT_QUOTA = 4000;
66
76
 
67
- function buildStepOutputFromEvaluation(
77
+ /**
78
+ * Read the suspend reason from an agent output if it is an engine-level suspend
79
+ * (coroutine yield). Returns the reason string when `$status === "$SUSPEND"`,
80
+ * or `null` otherwise. A suspend output with no `reason` yields an empty string.
81
+ */
82
+ function readSuspendReason(lastOutput: Record<string, unknown>): string | null {
83
+ if (lastOutput[STATUS_KEY] !== SUSPEND_STATUS) {
84
+ return null;
85
+ }
86
+ const reason = lastOutput.reason;
87
+ return typeof reason === "string" ? reason : "";
88
+ }
89
+
90
+ function buildSuspendStepOutput(
68
91
  workflowHash: CasRef,
69
92
  threadId: ThreadId,
70
93
  head: CasRef,
71
- status: ThreadStatus,
72
- evaluation: ReturnType<typeof evaluate>,
73
- background: boolean | null,
94
+ suspendedRole: string,
95
+ suspendMessage: string,
74
96
  ): StepOutput {
75
- const done = status === "completed";
76
- let currentRole: string | null = null;
77
- let suspendedRole: string | null = null;
78
- let suspendMessage: string | null = null;
79
- if (evaluation.ok) {
80
- if (isSuspendResult(evaluation.value)) {
81
- suspendedRole = evaluation.value.suspendedRole;
82
- suspendMessage = evaluation.value.prompt;
83
- } else if (evaluation.value.role !== END_ROLE) {
84
- currentRole = evaluation.value.role;
85
- }
86
- }
87
97
  return {
88
98
  workflow: workflowHash,
89
99
  thread: threadId,
90
100
  head,
91
- status,
92
- currentRole,
101
+ status: "suspended",
102
+ currentRole: null,
93
103
  suspendedRole,
94
104
  suspendMessage,
95
- done,
96
- background,
105
+ done: false,
106
+ background: null,
107
+ error: null,
97
108
  };
98
109
  }
99
110
 
100
- function resolveSuspendFieldsFromGraph(
111
+ function resolveSuspendFieldsFromOutput(
101
112
  uwf: UwfStore,
102
113
  head: CasRef,
103
- workflowRef: CasRef,
104
114
  ): { suspendedRole: string | null; suspendMessage: string | null } {
105
115
  const chain = walkChain(uwf, head);
106
116
  const { lastRole, lastOutput } = resolveEvaluateArgs(uwf, chain);
107
- const workflow = loadWorkflowPayload(uwf, workflowRef);
108
- const result = evaluate(workflow.graph, lastRole, lastOutput);
109
- if (result.ok && isSuspendResult(result.value)) {
110
- return {
111
- suspendedRole: result.value.suspendedRole,
112
- suspendMessage: result.value.prompt,
113
- };
117
+ const reason = readSuspendReason(lastOutput);
118
+ if (reason !== null) {
119
+ return { suspendedRole: lastRole, suspendMessage: reason };
114
120
  }
115
121
  return { suspendedRole: null, suspendMessage: null };
116
122
  }
@@ -120,7 +126,6 @@ function resolveSuspendFieldsForShow(
120
126
  status: ThreadStatus,
121
127
  uwf: UwfStore,
122
128
  head: CasRef,
123
- workflowRef: CasRef,
124
129
  ): { suspendedRole: string | null; suspendMessage: string | null } {
125
130
  if (status !== "suspended") {
126
131
  return { suspendedRole: null, suspendMessage: null };
@@ -128,10 +133,10 @@ function resolveSuspendFieldsForShow(
128
133
  if (entry.suspendedRole !== null && entry.suspendMessage !== null) {
129
134
  return { suspendedRole: entry.suspendedRole, suspendMessage: entry.suspendMessage };
130
135
  }
131
- const fromGraph = resolveSuspendFieldsFromGraph(uwf, head, workflowRef);
136
+ const fromOutput = resolveSuspendFieldsFromOutput(uwf, head);
132
137
  return {
133
- suspendedRole: entry.suspendedRole ?? fromGraph.suspendedRole,
134
- suspendMessage: entry.suspendMessage ?? fromGraph.suspendMessage,
138
+ suspendedRole: entry.suspendedRole ?? fromOutput.suspendedRole,
139
+ suspendMessage: entry.suspendMessage ?? fromOutput.suspendMessage,
135
140
  };
136
141
  }
137
142
 
@@ -155,7 +160,6 @@ async function resolveActiveThreadStatus(
155
160
  threadId: ThreadId,
156
161
  uwf: UwfStore,
157
162
  head: CasRef,
158
- workflowRef: CasRef,
159
163
  ): Promise<ThreadStatus> {
160
164
  const runningMarker = await isThreadRunning(storageRoot, threadId);
161
165
  if (runningMarker !== null) {
@@ -163,10 +167,8 @@ async function resolveActiveThreadStatus(
163
167
  }
164
168
 
165
169
  const chain = walkChain(uwf, head);
166
- const { lastRole, lastOutput } = resolveEvaluateArgs(uwf, chain);
167
- const workflow = loadWorkflowPayload(uwf, workflowRef);
168
- const result = evaluate(workflow.graph, lastRole, lastOutput);
169
- if (result.ok && isSuspendResult(result.value)) {
170
+ const { lastOutput } = resolveEvaluateArgs(uwf, chain);
171
+ if (readSuspendReason(lastOutput) !== null) {
170
172
  return "suspended";
171
173
  }
172
174
 
@@ -180,12 +182,15 @@ async function resolveActiveThreadStatus(
180
182
  function resolveCurrentRole(uwf: UwfStore, head: CasRef, workflowRef: CasRef): string | null {
181
183
  const chain = walkChain(uwf, head);
182
184
  const { lastRole, lastOutput } = resolveEvaluateArgs(uwf, chain);
185
+ if (readSuspendReason(lastOutput) !== null) {
186
+ return null;
187
+ }
183
188
  const workflow = loadWorkflowPayload(uwf, workflowRef);
184
189
  const result = evaluate(workflow.graph, lastRole, lastOutput);
185
190
  if (!result.ok) {
186
191
  return null;
187
192
  }
188
- if (isSuspendResult(result.value) || result.value.role === END_ROLE) {
193
+ if (result.value.role === END_ROLE) {
189
194
  return null;
190
195
  }
191
196
  return result.value.role;
@@ -195,10 +200,12 @@ const PL_THREAD_START = "7HNQ4B2X";
195
200
  const PL_MODERATOR = "M3K8V9T1";
196
201
  const PL_AGENT_SPAWN = "R5J2W8N4";
197
202
  const PL_AGENT_DONE = "C6P9E3H7";
203
+ const PL_AGENT_ERROR = "Z3F7K8M2";
198
204
  const PL_THREAD_ARCHIVED = "F4D8Q2K5";
199
205
  const PL_STEP_ERROR = "B8T5N1V6";
200
206
  const PL_BACKGROUND_START = "X7Q4W9M2";
201
207
  const PL_THREAD_RESUME = "K2R7M4N8";
208
+ const PL_THREAD_POKE = "P4Q9R3X7";
202
209
 
203
210
  type ResumeStepConfig = {
204
211
  role: string;
@@ -246,18 +253,19 @@ async function workflowFileExists(dir: string, name: string, ext: string): Promi
246
253
  }
247
254
 
248
255
  /**
249
- * Search for a workflow file in a given directory (checks both .workflow/ and .workflows/).
256
+ * Search for a workflow file in a given directory (checks both .workflows/ and .workflow/).
257
+ * `.workflows/` (primary) takes priority over `.workflow/` (legacy fallback).
250
258
  */
251
259
  async function findWorkflowInDir(dir: string, name: string): Promise<string | null> {
252
- // Check .workflow/ directory first (preferred)
260
+ // Check .workflows/ directory first (primary)
253
261
  for (const ext of [".yaml", ".yml"]) {
254
- const result = await workflowFileExists(resolvePath(dir, ".workflow"), name, ext);
262
+ const result = await workflowFileExists(resolvePath(dir, ".workflows"), name, ext);
255
263
  if (result !== null) {
256
264
  return result;
257
265
  }
258
266
  }
259
267
  for (const indexName of ["index.yaml", "index.yml"]) {
260
- const candidate = resolvePath(dir, ".workflow", name, indexName);
268
+ const candidate = resolvePath(dir, ".workflows", name, indexName);
261
269
  try {
262
270
  await access(candidate);
263
271
  return candidate;
@@ -266,15 +274,15 @@ async function findWorkflowInDir(dir: string, name: string): Promise<string | nu
266
274
  }
267
275
  }
268
276
 
269
- // Check .workflows/ directory as fallback (legacy)
277
+ // Check .workflow/ directory as fallback (legacy)
270
278
  for (const ext of [".yaml", ".yml"]) {
271
- const result = await workflowFileExists(resolvePath(dir, ".workflows"), name, ext);
279
+ const result = await workflowFileExists(resolvePath(dir, ".workflow"), name, ext);
272
280
  if (result !== null) {
273
281
  return result;
274
282
  }
275
283
  }
276
284
  for (const indexName of ["index.yaml", "index.yml"]) {
277
- const candidate = resolvePath(dir, ".workflows", name, indexName);
285
+ const candidate = resolvePath(dir, ".workflow", name, indexName);
278
286
  try {
279
287
  await access(candidate);
280
288
  return candidate;
@@ -286,8 +294,21 @@ async function findWorkflowInDir(dir: string, name: string): Promise<string | nu
286
294
  return null;
287
295
  }
288
296
 
297
+ /** Check if a directory contains a .git marker (directory or file). */
298
+ async function hasGitMarker(dir: string): Promise<boolean> {
299
+ try {
300
+ await access(join(dir, ".git"));
301
+ return true;
302
+ } catch {
303
+ return false;
304
+ }
305
+ }
306
+
289
307
  /**
290
- * Traverse parent directories looking for `.workflow/<name>.yaml` or `.workflow/<name>.yml`.
308
+ * Traverse parent directories looking for a workflow named `name` under
309
+ * `.workflows/` (primary) or `.workflow/` (legacy fallback). Within each
310
+ * directory the lookup checks flat YAML files (`<name>.yaml`/`.yml`) and
311
+ * folder-based layouts (`<name>/index.yaml`/`.yml`).
291
312
  * Returns the absolute path if found, otherwise null.
292
313
  * Stops at filesystem root or .git directory.
293
314
  */
@@ -301,6 +322,11 @@ async function findWorkflowInParents(startDir: string, name: string): Promise<st
301
322
  return found;
302
323
  }
303
324
 
325
+ // Stop at .git boundary (repo root)
326
+ if (await hasGitMarker(currentDir)) {
327
+ break;
328
+ }
329
+
304
330
  // Stop at filesystem root
305
331
  if (currentDir === root) {
306
332
  break;
@@ -492,8 +518,8 @@ export async function cmdThreadShow(
492
518
  fail(`failed to resolve workflow from head: ${activeHead}`);
493
519
  }
494
520
 
495
- // Determine if this is a completed/cancelled thread
496
- if (entry.status === "completed" || entry.status === "cancelled") {
521
+ // Determine if this is an ended/cancelled thread
522
+ if (entry.status === "end" || entry.status === "cancelled") {
497
523
  const hint = null;
498
524
  return {
499
525
  workflow,
@@ -505,14 +531,15 @@ export async function cmdThreadShow(
505
531
  suspendMessage: null,
506
532
  done: true,
507
533
  background: null,
534
+ error: null,
508
535
  hint,
509
536
  };
510
537
  }
511
538
 
512
539
  // Active thread
513
- const status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, activeHead, workflow);
540
+ const status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, activeHead);
514
541
  const currentRole = resolveCurrentRole(uwf, activeHead, workflow);
515
- const suspendFields = resolveSuspendFieldsForShow(entry, status, uwf, activeHead, workflow);
542
+ const suspendFields = resolveSuspendFieldsForShow(entry, status, uwf, activeHead);
516
543
 
517
544
  const hint =
518
545
  status === "suspended"
@@ -529,6 +556,7 @@ export async function cmdThreadShow(
529
556
  suspendMessage: suspendFields.suspendMessage,
530
557
  done: false,
531
558
  background: null,
559
+ error: null,
532
560
  hint,
533
561
  };
534
562
  }
@@ -538,6 +566,8 @@ export type ThreadListItemWithStatus = ThreadListItem & {
538
566
  currentRole: string | null;
539
567
  /** Display label with status marker for suspended threads */
540
568
  statusDisplay: string;
569
+ /** Resolved workflow name from registry, or null if orphaned (hash not in registry) */
570
+ workflowName: string | null;
541
571
  };
542
572
 
543
573
  export type ThreadShowOutput = StepOutput & {
@@ -550,13 +580,23 @@ async function threadListItemFromActive(
550
580
  uwf: UwfStore,
551
581
  threadId: ThreadId,
552
582
  head: CasRef,
583
+ registry: WorkflowRegistry,
553
584
  ): Promise<ThreadListItemWithStatus | null> {
554
585
  const workflow = resolveWorkflowFromHead(uwf, head);
555
586
  if (workflow === null) {
556
- return null;
587
+ // Head CAS node missing or unrecognized — treat as corrupt rather than silently skipping
588
+ return {
589
+ thread: threadId,
590
+ workflow: "" as CasRef,
591
+ head,
592
+ status: "corrupt",
593
+ currentRole: null,
594
+ statusDisplay: "corrupt",
595
+ workflowName: null,
596
+ };
557
597
  }
558
598
 
559
- const status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, head, workflow);
599
+ const status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, head);
560
600
  const statusDisplay = status === "suspended" ? `${status} [suspended]` : status;
561
601
 
562
602
  return {
@@ -566,6 +606,7 @@ async function threadListItemFromActive(
566
606
  status,
567
607
  currentRole: resolveCurrentRole(uwf, head, workflow),
568
608
  statusDisplay,
609
+ workflowName: findRegistryName(registry, workflow),
569
610
  };
570
611
  }
571
612
 
@@ -573,12 +614,33 @@ async function collectActiveThreads(
573
614
  storageRoot: string,
574
615
  uwf: UwfStore,
575
616
  index: ThreadsIndex,
617
+ registry: WorkflowRegistry,
576
618
  ): Promise<ThreadListItemWithStatus[]> {
577
619
  const items: ThreadListItemWithStatus[] = [];
578
620
  for (const [threadId, entry] of Object.entries(index)) {
579
- const item = await threadListItemFromActive(storageRoot, uwf, threadId as ThreadId, entry.head);
580
- if (item !== null) {
581
- items.push(item);
621
+ try {
622
+ const item = await threadListItemFromActive(
623
+ storageRoot,
624
+ uwf,
625
+ threadId as ThreadId,
626
+ entry.head,
627
+ registry,
628
+ );
629
+ if (item !== null) {
630
+ items.push(item);
631
+ }
632
+ } catch (err) {
633
+ const message = err instanceof Error ? err.message : String(err);
634
+ process.stderr.write(`warning: thread ${threadId} is corrupt: ${message}\n`);
635
+ items.push({
636
+ thread: threadId as ThreadId,
637
+ workflow: "" as CasRef,
638
+ head: entry.head,
639
+ status: "corrupt",
640
+ currentRole: null,
641
+ statusDisplay: "corrupt",
642
+ workflowName: null,
643
+ });
582
644
  }
583
645
  }
584
646
  return items;
@@ -587,6 +649,7 @@ async function collectActiveThreads(
587
649
  function collectCompletedThreads(
588
650
  uwf: UwfStore,
589
651
  activeIds: Set<ThreadId>,
652
+ registry: WorkflowRegistry,
590
653
  ): ThreadListItemWithStatus[] {
591
654
  const items: ThreadListItemWithStatus[] = [];
592
655
  const history = loadHistoryThreads(uwf.varStore);
@@ -594,16 +657,31 @@ function collectCompletedThreads(
594
657
  for (const [threadId, entry] of Object.entries(history)) {
595
658
  if (!activeIds.has(threadId as ThreadId) && !seen.has(threadId as ThreadId)) {
596
659
  seen.add(threadId as ThreadId);
597
- const status = entry.status;
598
- const workflow = resolveWorkflowFromHead(uwf, entry.head);
599
- items.push({
600
- thread: threadId as ThreadId,
601
- workflow: workflow ?? "",
602
- head: entry.head,
603
- status,
604
- currentRole: null,
605
- statusDisplay: status,
606
- });
660
+ try {
661
+ const status = entry.status;
662
+ const workflow = resolveWorkflowFromHead(uwf, entry.head);
663
+ items.push({
664
+ thread: threadId as ThreadId,
665
+ workflow: workflow ?? "",
666
+ head: entry.head,
667
+ status,
668
+ currentRole: null,
669
+ statusDisplay: status,
670
+ workflowName: workflow !== null ? findRegistryName(registry, workflow) : null,
671
+ });
672
+ } catch (err) {
673
+ const message = err instanceof Error ? err.message : String(err);
674
+ process.stderr.write(`warning: completed thread ${threadId} is corrupt: ${message}\n`);
675
+ items.push({
676
+ thread: threadId as ThreadId,
677
+ workflow: "" as CasRef,
678
+ head: entry.head,
679
+ status: "corrupt",
680
+ currentRole: null,
681
+ statusDisplay: "corrupt",
682
+ workflowName: null,
683
+ });
684
+ }
607
685
  }
608
686
  }
609
687
  return items;
@@ -649,27 +727,35 @@ export async function cmdThreadList(
649
727
  beforeMs: number | null,
650
728
  skip: number | null,
651
729
  take: number | null,
730
+ showAll: boolean = false,
652
731
  ): Promise<ThreadListItemWithStatus[]> {
653
732
  const uwf = await createUwfStore(storageRoot);
654
733
  const index = loadActiveThreads(uwf.varStore);
734
+ const registry = loadWorkflowRegistry(uwf.varStore);
735
+
736
+ // Resolve the effective filter:
737
+ // - explicit --status wins (showAll has no effect)
738
+ // - otherwise: --all → no filter; default → ["idle", "running"]
739
+ const effectiveFilter: ThreadStatus[] | null =
740
+ statusFilter !== null ? statusFilter : showAll ? null : ["idle", "running", "corrupt"];
655
741
 
656
742
  // Collect active threads
657
- let items = await collectActiveThreads(storageRoot, uwf, index);
743
+ let items = await collectActiveThreads(storageRoot, uwf, index, registry);
658
744
 
659
745
  // Collect completed threads (if relevant for status filter)
660
746
  const includeCompleted =
661
- statusFilter === null ||
662
- statusFilter.includes("completed") ||
663
- statusFilter.includes("cancelled");
747
+ effectiveFilter === null ||
748
+ effectiveFilter.includes("end") ||
749
+ effectiveFilter.includes("cancelled");
664
750
  if (includeCompleted) {
665
751
  const activeIds = new Set(items.map((i) => i.thread));
666
- const completedItems = collectCompletedThreads(uwf, activeIds);
752
+ const completedItems = collectCompletedThreads(uwf, activeIds, registry);
667
753
  items = items.concat(completedItems);
668
754
  }
669
755
 
670
756
  // Apply status filter
671
- if (statusFilter !== null) {
672
- items = items.filter((item) => statusFilter.includes(item.status));
757
+ if (effectiveFilter !== null) {
758
+ items = items.filter((item) => effectiveFilter.includes(item.status));
673
759
  }
674
760
 
675
761
  // Apply time range filters
@@ -985,18 +1071,14 @@ function resolveAgentConfig(
985
1071
  return agentConfig;
986
1072
  }
987
1073
 
988
- function spawnAgent(
989
- plog: ProcessLogger,
1074
+ function executeAgentCommand(
990
1075
  agent: AgentConfig,
991
- threadId: ThreadId,
992
- role: string,
993
- edgePrompt: string,
1076
+ argv: readonly string[],
994
1077
  cwd: string,
995
- ): AdapterOutput {
996
- const argv = [...agent.args, "--thread", threadId, "--role", role, "--prompt", edgePrompt];
997
- let stdout: string;
1078
+ plog: ProcessLogger,
1079
+ ): string {
998
1080
  try {
999
- stdout = execFileSync(agent.command, argv, {
1081
+ return execFileSync(agent.command, argv, {
1000
1082
  encoding: "utf8",
1001
1083
  stdio: ["ignore", "pipe", "pipe"],
1002
1084
  maxBuffer: 50 * 1024 * 1024, // 50 MB — stream-json output can be large
@@ -1019,14 +1101,22 @@ function spawnAgent(
1019
1101
  const detail = stderr.trim() !== "" ? `: ${stderr.trim()}` : "";
1020
1102
  failStep(plog, `agent command failed (${agent.command})${detail}`);
1021
1103
  }
1104
+ }
1022
1105
 
1106
+ function parseAgentOutput(stdout: string, plog: ProcessLogger): unknown {
1023
1107
  const line = stdout.trim().split("\n").pop()?.trim() ?? "";
1024
- let parsed: unknown;
1025
1108
  try {
1026
- parsed = JSON.parse(line);
1109
+ return JSON.parse(line);
1027
1110
  } catch {
1028
1111
  failStep(plog, `agent stdout last line is not valid JSON: ${line || "(empty)"}`);
1029
1112
  }
1113
+ }
1114
+
1115
+ function validateAndNormalizeOutput(
1116
+ parsed: unknown,
1117
+ line: string,
1118
+ plog: ProcessLogger,
1119
+ ): AdapterOutput {
1030
1120
  const obj = parsed as Record<string, unknown>;
1031
1121
  if (
1032
1122
  typeof obj !== "object" ||
@@ -1036,11 +1126,44 @@ function spawnAgent(
1036
1126
  ) {
1037
1127
  failStep(plog, `agent stdout JSON missing valid stepHash: ${line}`);
1038
1128
  }
1129
+ // Normalize isError / errorMessage so downstream code can rely on them.
1130
+ // Legacy adapters that don't emit these fields default to isError=false.
1131
+ if (obj.isError !== undefined && typeof obj.isError !== "boolean") {
1132
+ failStep(plog, `agent stdout JSON has non-boolean isError: ${line}`);
1133
+ }
1134
+ if (obj.isError === undefined) {
1135
+ obj.isError = false;
1136
+ }
1137
+ if (
1138
+ obj.errorMessage !== undefined &&
1139
+ obj.errorMessage !== null &&
1140
+ typeof obj.errorMessage !== "string"
1141
+ ) {
1142
+ failStep(plog, `agent stdout JSON has non-string errorMessage: ${line}`);
1143
+ }
1144
+ if (obj.errorMessage === undefined) {
1145
+ obj.errorMessage = null;
1146
+ }
1039
1147
  return obj as unknown as AdapterOutput;
1040
1148
  }
1041
1149
 
1150
+ function spawnAgent(
1151
+ plog: ProcessLogger,
1152
+ agent: AgentConfig,
1153
+ threadId: ThreadId,
1154
+ role: string,
1155
+ edgePrompt: string,
1156
+ cwd: string,
1157
+ ): AdapterOutput {
1158
+ const argv = [...agent.args, "--thread", threadId, "--role", role, "--prompt", edgePrompt];
1159
+ const stdout = executeAgentCommand(agent, argv, cwd, plog);
1160
+ const line = stdout.trim().split("\n").pop()?.trim() ?? "";
1161
+ const parsed = parseAgentOutput(stdout, plog);
1162
+ return validateAndNormalizeOutput(parsed, line, plog);
1163
+ }
1164
+
1042
1165
  function archiveThread(uwf: UwfStore, threadId: ThreadId, _workflow: CasRef, _head: CasRef): void {
1043
- completeThread(uwf.varStore, threadId, "completed");
1166
+ completeThread(uwf.varStore, threadId, "end");
1044
1167
  }
1045
1168
 
1046
1169
  export async function cmdThreadResume(
@@ -1064,15 +1187,15 @@ export async function cmdThreadResume(
1064
1187
  const chain = walkChain(uwf, headHash);
1065
1188
  const workflowHash = chain.start.workflow;
1066
1189
 
1067
- // Check entry.status first for completed/cancelled (like in cmdThreadShow)
1190
+ // Check entry.status first for end/cancelled (like in cmdThreadShow)
1068
1191
  let status: ThreadStatus;
1069
- if (entry.status === "completed" || entry.status === "cancelled") {
1192
+ if (entry.status === "end" || entry.status === "cancelled") {
1070
1193
  status = entry.status;
1071
1194
  } else {
1072
- status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, headHash, workflowHash);
1195
+ status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, headHash);
1073
1196
  }
1074
1197
 
1075
- if (status !== "suspended" && status !== "completed") {
1198
+ if (status !== "suspended" && status !== "end") {
1076
1199
  fail(`thread cannot be resumed: ${threadId} (status: ${status})`);
1077
1200
  }
1078
1201
 
@@ -1082,7 +1205,7 @@ export async function cmdThreadResume(
1082
1205
  });
1083
1206
 
1084
1207
  if (status === "suspended") {
1085
- const suspendFields = resolveSuspendFieldsForShow(entry, status, uwf, headHash, workflowHash);
1208
+ const suspendFields = resolveSuspendFieldsForShow(entry, status, uwf, headHash);
1086
1209
  if (suspendFields.suspendedRole === null) {
1087
1210
  fail(`thread is suspended but suspendedRole is missing: ${threadId}`);
1088
1211
  }
@@ -1104,21 +1227,18 @@ export async function cmdThreadResume(
1104
1227
  });
1105
1228
  }
1106
1229
 
1107
- // status === "completed"
1230
+ // status === "end"
1108
1231
  const workflow = loadWorkflowPayload(uwf, workflowHash);
1109
1232
  const startResult = evaluate(workflow.graph, START_ROLE, { [STATUS_KEY]: "resume" });
1110
1233
  if (!startResult.ok) {
1111
1234
  fail(`failed to evaluate $START: ${startResult.error.message}`);
1112
1235
  }
1113
- if (isSuspendResult(startResult.value)) {
1114
- fail("workflow cannot start with $SUSPEND");
1115
- }
1116
1236
  if (startResult.value.role === END_ROLE) {
1117
1237
  fail("workflow cannot start with $END");
1118
1238
  }
1119
1239
 
1120
1240
  const startRole = startResult.value.role;
1121
- const completedResumePrompt = buildResumePrompt(startResult.value.prompt, supplement);
1241
+ const endResumePrompt = buildResumePrompt(startResult.value.prompt, supplement);
1122
1242
 
1123
1243
  const updatedEntry = { ...entry, status: "idle" as const, completedAt: null };
1124
1244
  setThread(uwf.varStore, threadId, updatedEntry);
@@ -1131,10 +1251,155 @@ export async function cmdThreadResume(
1131
1251
 
1132
1252
  return cmdThreadStepOnce(storageRoot, threadId, agentOverride, plog, {
1133
1253
  role: startRole,
1134
- prompt: completedResumePrompt,
1254
+ prompt: endResumePrompt,
1135
1255
  });
1136
1256
  }
1137
1257
 
1258
+ /**
1259
+ * Validate that a thread can be poked. Returns the existing entry and the head StepNode payload.
1260
+ * Fails (process exit) when the thread is missing, running, completed, cancelled, or has no
1261
+ * StepNode at its head.
1262
+ */
1263
+ async function validatePokePreconditions(
1264
+ storageRoot: string,
1265
+ uwf: UwfStore,
1266
+ threadId: ThreadId,
1267
+ ): Promise<{ entry: ThreadIndexEntry; oldHead: CasRef; oldHeadPayload: StepNodePayload }> {
1268
+ const runningMarker = await isThreadRunning(storageRoot, threadId);
1269
+ if (runningMarker !== null) {
1270
+ fail(`thread already executing in background (PID: ${runningMarker.pid})`);
1271
+ }
1272
+
1273
+ const entry = getThread(uwf.varStore, threadId);
1274
+ if (entry === null) {
1275
+ fail(`thread not active: ${threadId}`);
1276
+ }
1277
+
1278
+ if (entry.status === "end" || entry.status === "cancelled") {
1279
+ fail(`thread cannot be poked: ${threadId} (status: ${entry.status})`);
1280
+ }
1281
+
1282
+ const oldHead = entry.head;
1283
+ const oldHeadNode = uwf.store.cas.get(oldHead);
1284
+ if (oldHeadNode === null) {
1285
+ fail(`CAS node not found: ${oldHead}`);
1286
+ }
1287
+ if (oldHeadNode.type !== uwf.schemas.stepNode) {
1288
+ fail("thread cannot be poked: no step to replace (head is StartNode)");
1289
+ }
1290
+
1291
+ return { entry, oldHead, oldHeadPayload: oldHeadNode.payload as StepNodePayload };
1292
+ }
1293
+
1294
+ /**
1295
+ * Resolve the next role from the post-poke chain state, used for the StepOutput.currentRole field.
1296
+ * Returns null when the next role is $END, evaluation fails, or the result is a suspend.
1297
+ */
1298
+ function resolveCurrentRoleFromChain(
1299
+ uwfAfter: UwfStore,
1300
+ workflow: WorkflowPayload,
1301
+ replacedHash: CasRef,
1302
+ ): string | null {
1303
+ const chainAfter = walkChain(uwfAfter, replacedHash);
1304
+ const { lastRole, lastOutput } = resolveEvaluateArgs(uwfAfter, chainAfter);
1305
+ if (readSuspendReason(lastOutput) !== null) {
1306
+ return null;
1307
+ }
1308
+ const afterResult = evaluate(workflow.graph, lastRole, lastOutput);
1309
+ if (!afterResult.ok) {
1310
+ return null;
1311
+ }
1312
+ if (afterResult.value.role === END_ROLE) {
1313
+ return null;
1314
+ }
1315
+ return afterResult.value.role;
1316
+ }
1317
+
1318
+ /**
1319
+ * Poke a thread: re-run the agent on the head step with a supplementary prompt,
1320
+ * replacing the head step's output. The new step's `prev` points to the OLD head's
1321
+ * `prev` — semantically replacing (not appending to) the head. The moderator is NOT
1322
+ * re-evaluated for routing; the role of the head step is re-used.
1323
+ */
1324
+ export async function cmdThreadPoke(
1325
+ storageRoot: string,
1326
+ threadId: ThreadId,
1327
+ prompt: string,
1328
+ agentOverride: string | null,
1329
+ ): Promise<StepOutput> {
1330
+ const uwf = await createUwfStore(storageRoot);
1331
+ const { entry, oldHeadPayload } = await validatePokePreconditions(storageRoot, uwf, threadId);
1332
+
1333
+ const chain = walkChain(uwf, entry.head);
1334
+ const workflowHash = chain.start.workflow;
1335
+ const threadCwd = chain.start.cwd;
1336
+
1337
+ const plog = createProcessLogger({
1338
+ storageRoot,
1339
+ context: { thread: threadId, workflow: workflowHash },
1340
+ });
1341
+
1342
+ // Resolve the agent: --agent override wins; otherwise read from old head step's `agent` field.
1343
+ const config = await loadWorkflowConfig(storageRoot);
1344
+ const workflow = loadWorkflowPayload(uwf, workflowHash);
1345
+ const role = oldHeadPayload.role;
1346
+ const agent =
1347
+ agentOverride !== null
1348
+ ? resolveAgentConfig(config, workflow, role, agentOverride)
1349
+ : parseAgentOverride(oldHeadPayload.agent);
1350
+
1351
+ const effectiveCwd = oldHeadPayload.cwd !== "" ? oldHeadPayload.cwd : threadCwd;
1352
+
1353
+ plog.log(PL_THREAD_POKE, `poke role=${role} agent=${agent.command}`, null);
1354
+ plog.log(PL_AGENT_SPAWN, `spawning agent command=${agent.command}`, {
1355
+ args: [...agent.args, threadId, role].join(" "),
1356
+ });
1357
+
1358
+ loadDotenv({ path: getEnvPath(storageRoot) });
1359
+
1360
+ // Spawn the agent. The agent will create a new StepNode with prev=oldHead (it reads
1361
+ // the active thread head). After the agent returns, we rewrite that node's prev so
1362
+ // that the new head replaces the old head instead of appending after it.
1363
+ const agentResult = spawnAgent(plog, agent, threadId, role, prompt, effectiveCwd);
1364
+ const agentStepHash = agentResult.stepHash as CasRef;
1365
+
1366
+ plog.log(PL_AGENT_DONE, `agent returned head=${agentStepHash}`, null);
1367
+
1368
+ const uwfAfter = await createUwfStore(storageRoot);
1369
+ const agentNode = uwfAfter.store.cas.get(agentStepHash);
1370
+ if (agentNode === null || agentNode.type !== uwfAfter.schemas.stepNode) {
1371
+ failStep(plog, `agent returned hash that is not a StepNode: ${agentStepHash}`);
1372
+ }
1373
+ const agentPayload = agentNode.payload as StepNodePayload;
1374
+
1375
+ // Rewrite the new step so that its `prev` points to the OLD head's prev (replace semantics).
1376
+ const replacedPayload: StepNodePayload = {
1377
+ ...agentPayload,
1378
+ prev: oldHeadPayload.prev,
1379
+ };
1380
+ const replacedHash = await uwfAfter.store.cas.put(uwfAfter.schemas.stepNode, replacedPayload);
1381
+ const replacedNode = uwfAfter.store.cas.get(replacedHash);
1382
+ if (replacedNode === null || !validate(uwfAfter.store, replacedNode)) {
1383
+ failStep(plog, "rewritten StepNode failed schema validation");
1384
+ }
1385
+
1386
+ // Update thread head to the replaced step. Status becomes idle (no moderator re-route).
1387
+ setThread(uwfAfter.varStore, threadId, updateThreadHead(entry, replacedHash));
1388
+
1389
+ return {
1390
+ workflow: workflowHash,
1391
+ thread: threadId,
1392
+ head: replacedHash,
1393
+ status: "idle",
1394
+ currentRole: resolveCurrentRoleFromChain(uwfAfter, workflow, replacedHash),
1395
+ suspendedRole: null,
1396
+ suspendMessage: null,
1397
+ done: false,
1398
+ background: null,
1399
+ error: null,
1400
+ };
1401
+ }
1402
+
1138
1403
  export function validateCount(count: number): void {
1139
1404
  if (count < 1 || !Number.isInteger(count)) {
1140
1405
  throw new Error(`--count must be a positive integer, got: ${count}`);
@@ -1151,11 +1416,12 @@ export async function cmdThreadExec(
1151
1416
  ): Promise<StepOutput[]> {
1152
1417
  validateCount(count);
1153
1418
 
1154
- // Check if thread is already running in background (unless we ARE the background worker)
1419
+ // Reject concurrent exec on the same thread (unless we ARE the background worker,
1420
+ // which hasn't created its own marker yet at this point).
1155
1421
  if (!backgroundWorker) {
1156
1422
  const runningMarker = await isThreadRunning(storageRoot, threadId);
1157
1423
  if (runningMarker !== null) {
1158
- fail(`thread already executing in background (PID: ${runningMarker.pid})`);
1424
+ fail(`thread ${threadId} is already being executed by PID ${runningMarker.pid}`);
1159
1425
  }
1160
1426
  }
1161
1427
 
@@ -1170,17 +1436,15 @@ export async function cmdThreadExec(
1170
1436
  return cmdThreadStepBackground(storageRoot, threadId, agentOverride, count, plog, workflowHash);
1171
1437
  }
1172
1438
 
1173
- // If we're the background worker, create marker before execution
1174
- let markerCreated = false;
1175
- if (backgroundWorker) {
1176
- await createMarker(storageRoot, {
1177
- thread: threadId,
1178
- workflow: workflowHash,
1179
- pid: process.pid,
1180
- startedAt: Date.now(),
1181
- });
1182
- markerCreated = true;
1183
- }
1439
+ // Create running marker so `thread list` shows "running" during execution
1440
+ // and concurrent `exec` on the same thread is rejected (see check above).
1441
+ await createMarker(storageRoot, {
1442
+ thread: threadId,
1443
+ workflow: workflowHash,
1444
+ pid: process.pid,
1445
+ startedAt: Date.now(),
1446
+ processStartTime: getProcessStartTime(process.pid),
1447
+ });
1184
1448
 
1185
1449
  try {
1186
1450
  const results: StepOutput[] = [];
@@ -1193,10 +1457,7 @@ export async function cmdThreadExec(
1193
1457
  }
1194
1458
  return results;
1195
1459
  } finally {
1196
- // Cleanup marker if we created one
1197
- if (markerCreated) {
1198
- await deleteMarker(storageRoot, threadId);
1199
- }
1460
+ await deleteMarker(storageRoot, threadId);
1200
1461
  }
1201
1462
  }
1202
1463
 
@@ -1264,6 +1525,7 @@ async function cmdThreadStepBackground(
1264
1525
  suspendMessage: null,
1265
1526
  done: false,
1266
1527
  background: true,
1528
+ error: null,
1267
1529
  },
1268
1530
  ];
1269
1531
  }
@@ -1296,6 +1558,16 @@ async function resolveModeratorStepTarget(
1296
1558
  plog: ProcessLogger,
1297
1559
  ): Promise<StepOutput | AgentStepTarget> {
1298
1560
  const { lastRole, lastOutput } = resolveEvaluateArgs(uwf, chain);
1561
+
1562
+ // Intercept an already-suspended head before the moderator: a thread whose
1563
+ // head step yielded `$status: "$SUSPEND"` stays suspended (idempotent re-exec).
1564
+ const suspendReason = readSuspendReason(lastOutput);
1565
+ if (suspendReason !== null) {
1566
+ await ensureThreadSuspendMetadata(uwf.varStore, threadId, entry, lastRole, suspendReason);
1567
+ plog.log(PL_MODERATOR, `moderator action=suspend suspendedRole=${lastRole}`, null);
1568
+ return buildSuspendStepOutput(workflowHash, threadId, headHash, lastRole, suspendReason);
1569
+ }
1570
+
1299
1571
  const nextResult = evaluate(workflow.graph, lastRole, lastOutput);
1300
1572
  if (!nextResult.ok) {
1301
1573
  failStep(plog, `moderator evaluate failed: ${nextResult.error.message}`);
@@ -1303,32 +1575,10 @@ async function resolveModeratorStepTarget(
1303
1575
 
1304
1576
  plog.log(
1305
1577
  PL_MODERATOR,
1306
- `moderator ${
1307
- isSuspendResult(nextResult.value)
1308
- ? `action=suspend suspendedRole=${nextResult.value.suspendedRole}`
1309
- : `role=${nextResult.value.role}`
1310
- } prompt=${nextResult.value.prompt}`,
1578
+ `moderator role=${nextResult.value.role} prompt=${nextResult.value.prompt}`,
1311
1579
  null,
1312
1580
  );
1313
1581
 
1314
- if (isSuspendResult(nextResult.value)) {
1315
- await ensureThreadSuspendMetadata(
1316
- uwf.varStore,
1317
- threadId,
1318
- entry,
1319
- nextResult.value.suspendedRole,
1320
- nextResult.value.prompt,
1321
- );
1322
- return buildStepOutputFromEvaluation(
1323
- workflowHash,
1324
- threadId,
1325
- headHash,
1326
- "suspended",
1327
- nextResult,
1328
- null,
1329
- );
1330
- }
1331
-
1332
1582
  if (nextResult.value.role === END_ROLE) {
1333
1583
  plog.log(PL_THREAD_ARCHIVED, `thread archived head=${headHash}`, null);
1334
1584
  archiveThread(uwf, threadId, workflowHash, headHash);
@@ -1336,12 +1586,13 @@ async function resolveModeratorStepTarget(
1336
1586
  workflow: workflowHash,
1337
1587
  thread: threadId,
1338
1588
  head: headHash,
1339
- status: "completed",
1589
+ status: "end",
1340
1590
  currentRole: null,
1341
1591
  suspendedRole: null,
1342
1592
  suspendMessage: null,
1343
1593
  done: true,
1344
1594
  background: null,
1595
+ error: null,
1345
1596
  };
1346
1597
  }
1347
1598
 
@@ -1369,29 +1620,27 @@ async function finalizeAgentStep(
1369
1620
  uwfAfter,
1370
1621
  chainAfter,
1371
1622
  );
1372
- const afterResult = evaluate(workflow.graph, lastRoleAfter, lastOutputAfter);
1373
- if (!afterResult.ok) {
1374
- failStep(plog, `post-step moderator evaluate failed: ${afterResult.error.message}`);
1375
- }
1376
1623
 
1377
- if (isSuspendResult(afterResult.value)) {
1624
+ // Intercept `$status: "$SUSPEND"` before the moderator (coroutine yield): the
1625
+ // step is already in CAS and the head has advanced — mark the thread suspended
1626
+ // and return without routing through the graph.
1627
+ const suspendReason = readSuspendReason(lastOutputAfter);
1628
+ if (suspendReason !== null) {
1378
1629
  setThread(
1379
1630
  uwfAfter.varStore,
1380
1631
  threadId,
1381
1632
  markThreadSuspended(
1382
1633
  getThread(uwfAfter.varStore, threadId) ?? createThreadIndexEntry(newHead),
1383
- afterResult.value.suspendedRole,
1384
- afterResult.value.prompt,
1634
+ lastRoleAfter,
1635
+ suspendReason,
1385
1636
  ),
1386
1637
  );
1387
- return buildStepOutputFromEvaluation(
1388
- workflowHash,
1389
- threadId,
1390
- newHead,
1391
- "suspended",
1392
- afterResult,
1393
- null,
1394
- );
1638
+ return buildSuspendStepOutput(workflowHash, threadId, newHead, lastRoleAfter, suspendReason);
1639
+ }
1640
+
1641
+ const afterResult = evaluate(workflow.graph, lastRoleAfter, lastOutputAfter);
1642
+ if (!afterResult.ok) {
1643
+ failStep(plog, `post-step moderator evaluate failed: ${afterResult.error.message}`);
1395
1644
  }
1396
1645
 
1397
1646
  const done = afterResult.value.role === END_ROLE;
@@ -1400,7 +1649,7 @@ async function finalizeAgentStep(
1400
1649
  archiveThread(uwfAfter, threadId, workflowHash, newHead);
1401
1650
  }
1402
1651
 
1403
- const status: ThreadStatus = done ? "completed" : "idle";
1652
+ const status: ThreadStatus = done ? "end" : "idle";
1404
1653
  const currentRole = done ? null : afterResult.value.role;
1405
1654
 
1406
1655
  return {
@@ -1413,6 +1662,7 @@ async function finalizeAgentStep(
1413
1662
  suspendMessage: null,
1414
1663
  done,
1415
1664
  background: null,
1665
+ error: null,
1416
1666
  };
1417
1667
  }
1418
1668
 
@@ -1476,6 +1726,31 @@ async function cmdThreadStepOnce(
1476
1726
  failStep(plog, `agent returned hash that is not a StepNode: ${newHead}`);
1477
1727
  }
1478
1728
 
1729
+ // Recoverable failure: agent persisted a failed StepNode (e.g. frontmatter
1730
+ // validation exhausted retries) but the engine MUST NOT advance head. The
1731
+ // moderator graph is also untouched — the same role will be replayed on the
1732
+ // next exec (until eventual success records `previousAttempts` linking the
1733
+ // failed step hashes).
1734
+ if (agentResult.isError === true) {
1735
+ plog.log(
1736
+ PL_AGENT_ERROR,
1737
+ `agent reported recoverable failure stepHash=${newHead} message=${agentResult.errorMessage ?? ""}`,
1738
+ null,
1739
+ );
1740
+ return {
1741
+ workflow: workflowHash,
1742
+ thread: threadId,
1743
+ head: headHash,
1744
+ status: "idle",
1745
+ currentRole: role,
1746
+ suspendedRole: null,
1747
+ suspendMessage: null,
1748
+ done: false,
1749
+ background: null,
1750
+ error: { stepHash: newHead, message: agentResult.errorMessage ?? "agent reported error" },
1751
+ };
1752
+ }
1753
+
1479
1754
  return finalizeAgentStep(storageRoot, threadId, workflowHash, workflow, newHead, uwfAfter, plog);
1480
1755
  }
1481
1756
 
@@ -1526,7 +1801,9 @@ export type CancelOutput = {
1526
1801
  };
1527
1802
 
1528
1803
  /**
1529
- * Stop background execution of a thread (but keep thread active)
1804
+ * Stop background execution of a thread (but keep thread active).
1805
+ * Validates process identity before sending signals to prevent killing
1806
+ * unrelated processes when PIDs are recycled.
1530
1807
  */
1531
1808
  export async function cmdThreadStop(storageRoot: string, threadId: ThreadId): Promise<StopOutput> {
1532
1809
  const uwf = await createUwfStore(storageRoot);
@@ -1535,15 +1812,26 @@ export async function cmdThreadStop(storageRoot: string, threadId: ThreadId): Pr
1535
1812
  fail(`thread not active: ${threadId}`);
1536
1813
  }
1537
1814
 
1538
- // Check if thread is running in background and terminate it
1539
- const runningMarker = await isThreadRunning(storageRoot, threadId);
1540
- if (runningMarker === null) {
1815
+ // Read the raw marker to check process identity
1816
+ const marker = await readMarker(storageRoot, threadId);
1817
+ if (marker === null) {
1541
1818
  process.stderr.write(`Warning: thread ${threadId} is not currently running\n`);
1542
1819
  return { thread: threadId, stopped: false };
1543
1820
  }
1544
1821
 
1822
+ // Validate that the marker's PID still belongs to the same process
1823
+ if (!isMarkerValid(marker)) {
1824
+ // Stale marker — PID was recycled or process died. Do NOT send a signal.
1825
+ process.stderr.write(
1826
+ `Warning: thread ${threadId} was not actually running (stale marker cleaned up)\n`,
1827
+ );
1828
+ await deleteMarker(storageRoot, threadId);
1829
+ return { thread: threadId, stopped: false };
1830
+ }
1831
+
1832
+ // Process identity confirmed — safe to send SIGTERM
1545
1833
  try {
1546
- process.kill(runningMarker.pid, "SIGTERM");
1834
+ process.kill(marker.pid, "SIGTERM");
1547
1835
  } catch {
1548
1836
  // Process may have already exited, ignore error
1549
1837
  }
@@ -1553,7 +1841,9 @@ export async function cmdThreadStop(storageRoot: string, threadId: ThreadId): Pr
1553
1841
  }
1554
1842
 
1555
1843
  /**
1556
- * Cancel a thread (stop execution + move to history)
1844
+ * Cancel a thread (stop execution + move to history).
1845
+ * Validates process identity before sending signals to prevent killing
1846
+ * unrelated processes when PIDs are recycled.
1557
1847
  */
1558
1848
  export async function cmdThreadCancel(
1559
1849
  storageRoot: string,
@@ -1565,14 +1855,18 @@ export async function cmdThreadCancel(
1565
1855
  fail(`thread not active: ${threadId}`);
1566
1856
  }
1567
1857
 
1568
- // Check if thread is running in background and terminate it
1569
- const runningMarker = await isThreadRunning(storageRoot, threadId);
1570
- if (runningMarker !== null) {
1571
- try {
1572
- process.kill(runningMarker.pid, "SIGTERM");
1573
- } catch {
1574
- // Process may have already exited, ignore error
1858
+ // Read the raw marker and validate process identity before sending signals
1859
+ const marker = await readMarker(storageRoot, threadId);
1860
+ if (marker !== null) {
1861
+ if (isMarkerValid(marker)) {
1862
+ // Process identity confirmed — safe to send SIGTERM
1863
+ try {
1864
+ process.kill(marker.pid, "SIGTERM");
1865
+ } catch {
1866
+ // Process may have already exited, ignore error
1867
+ }
1575
1868
  }
1869
+ // Always delete the marker (stale or not) — cancellation proceeds
1576
1870
  await deleteMarker(storageRoot, threadId);
1577
1871
  }
1578
1872