@united-workforce/cli 0.6.1 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/README.md +120 -5
  2. package/dist/.build-fingerprint +1 -1
  3. package/dist/__tests__/agent-resolution-llm-free.test.js +9 -2
  4. package/dist/__tests__/agent-resolution-llm-free.test.js.map +1 -1
  5. package/dist/__tests__/broker-prompt.test.d.ts +10 -0
  6. package/dist/__tests__/broker-prompt.test.d.ts.map +1 -0
  7. package/dist/__tests__/broker-prompt.test.js +129 -0
  8. package/dist/__tests__/broker-prompt.test.js.map +1 -0
  9. package/dist/__tests__/broker-step-active-turns.test.d.ts +20 -0
  10. package/dist/__tests__/broker-step-active-turns.test.d.ts.map +1 -0
  11. package/dist/__tests__/broker-step-active-turns.test.js +428 -0
  12. package/dist/__tests__/broker-step-active-turns.test.js.map +1 -0
  13. package/dist/__tests__/broker-step-turn-chain-phase2.test.d.ts +13 -0
  14. package/dist/__tests__/broker-step-turn-chain-phase2.test.d.ts.map +1 -0
  15. package/dist/__tests__/broker-step-turn-chain-phase2.test.js +429 -0
  16. package/dist/__tests__/broker-step-turn-chain-phase2.test.js.map +1 -0
  17. package/dist/__tests__/config.test.js +33 -37
  18. package/dist/__tests__/config.test.js.map +1 -1
  19. package/dist/__tests__/e2e-broker-step-suspend.test.d.ts +18 -0
  20. package/dist/__tests__/e2e-broker-step-suspend.test.d.ts.map +1 -0
  21. package/dist/__tests__/e2e-broker-step-suspend.test.js +313 -0
  22. package/dist/__tests__/e2e-broker-step-suspend.test.js.map +1 -0
  23. package/dist/__tests__/e2e-broker-step.test.d.ts +13 -0
  24. package/dist/__tests__/e2e-broker-step.test.d.ts.map +1 -0
  25. package/dist/__tests__/e2e-broker-step.test.js +278 -0
  26. package/dist/__tests__/e2e-broker-step.test.js.map +1 -0
  27. package/dist/__tests__/e2e-mock-agent.test.js +1 -1
  28. package/dist/__tests__/e2e-mock-agent.test.js.map +1 -1
  29. package/dist/__tests__/e2e-thread-resume-timeout-suspend.test.d.ts +28 -0
  30. package/dist/__tests__/e2e-thread-resume-timeout-suspend.test.d.ts.map +1 -0
  31. package/dist/__tests__/e2e-thread-resume-timeout-suspend.test.js +322 -0
  32. package/dist/__tests__/e2e-thread-resume-timeout-suspend.test.js.map +1 -0
  33. package/dist/__tests__/log-tag-validity.test.d.ts +2 -0
  34. package/dist/__tests__/log-tag-validity.test.d.ts.map +1 -0
  35. package/dist/__tests__/log-tag-validity.test.js +110 -0
  36. package/dist/__tests__/log-tag-validity.test.js.map +1 -0
  37. package/dist/__tests__/setup-agent-discovery.test.js +35 -23
  38. package/dist/__tests__/setup-agent-discovery.test.js.map +1 -1
  39. package/dist/__tests__/setup-no-llm.test.js +5 -2
  40. package/dist/__tests__/setup-no-llm.test.js.map +1 -1
  41. package/dist/__tests__/step-ask.test.js +9 -6
  42. package/dist/__tests__/step-ask.test.js.map +1 -1
  43. package/dist/__tests__/step-show-json.test.js +5 -5
  44. package/dist/__tests__/step-show-json.test.js.map +1 -1
  45. package/dist/__tests__/step-show-text.test.d.ts +2 -0
  46. package/dist/__tests__/step-show-text.test.d.ts.map +1 -0
  47. package/dist/__tests__/step-show-text.test.js +192 -0
  48. package/dist/__tests__/step-show-text.test.js.map +1 -0
  49. package/dist/__tests__/step-turns-cli-subprocess.test.d.ts +21 -0
  50. package/dist/__tests__/step-turns-cli-subprocess.test.d.ts.map +1 -0
  51. package/dist/__tests__/step-turns-cli-subprocess.test.js +356 -0
  52. package/dist/__tests__/step-turns-cli-subprocess.test.js.map +1 -0
  53. package/dist/__tests__/step-turns-panorama-phase3.test.d.ts +21 -0
  54. package/dist/__tests__/step-turns-panorama-phase3.test.d.ts.map +1 -0
  55. package/dist/__tests__/step-turns-panorama-phase3.test.js +476 -0
  56. package/dist/__tests__/step-turns-panorama-phase3.test.js.map +1 -0
  57. package/dist/__tests__/step-turns.test.d.ts +24 -0
  58. package/dist/__tests__/step-turns.test.d.ts.map +1 -0
  59. package/dist/__tests__/step-turns.test.js +646 -0
  60. package/dist/__tests__/step-turns.test.js.map +1 -0
  61. package/dist/__tests__/store-turn-chain.test.d.ts +2 -0
  62. package/dist/__tests__/store-turn-chain.test.d.ts.map +1 -0
  63. package/dist/__tests__/store-turn-chain.test.js +341 -0
  64. package/dist/__tests__/store-turn-chain.test.js.map +1 -0
  65. package/dist/__tests__/thread-agent-failure-suspended.test.js +3 -3
  66. package/dist/__tests__/thread-agent-failure-suspended.test.js.map +1 -1
  67. package/dist/__tests__/thread-list-limit-offset.test.d.ts +24 -0
  68. package/dist/__tests__/thread-list-limit-offset.test.d.ts.map +1 -0
  69. package/dist/__tests__/thread-list-limit-offset.test.js +254 -0
  70. package/dist/__tests__/thread-list-limit-offset.test.js.map +1 -0
  71. package/dist/__tests__/thread-list-template-ms-date.test.js +7 -2
  72. package/dist/__tests__/thread-list-template-ms-date.test.js.map +1 -1
  73. package/dist/__tests__/thread-poke.test.js +6 -6
  74. package/dist/__tests__/thread-poke.test.js.map +1 -1
  75. package/dist/__tests__/thread-resume.test.js +2 -2
  76. package/dist/__tests__/thread-resume.test.js.map +1 -1
  77. package/dist/__tests__/thread-suspend-step.test.js +1 -1
  78. package/dist/__tests__/thread-suspend-step.test.js.map +1 -1
  79. package/dist/__tests__/thread.test.js +28 -14
  80. package/dist/__tests__/thread.test.js.map +1 -1
  81. package/dist/cli.js +910 -344
  82. package/dist/cli.js.map +1 -1
  83. package/dist/commands/broker-step.d.ts +117 -0
  84. package/dist/commands/broker-step.d.ts.map +1 -0
  85. package/dist/commands/broker-step.js +654 -0
  86. package/dist/commands/broker-step.js.map +1 -0
  87. package/dist/commands/config.d.ts.map +1 -1
  88. package/dist/commands/config.js +2 -23
  89. package/dist/commands/config.js.map +1 -1
  90. package/dist/commands/prompt.d.ts.map +1 -1
  91. package/dist/commands/prompt.js +43 -51
  92. package/dist/commands/prompt.js.map +1 -1
  93. package/dist/commands/setup.d.ts +6 -4
  94. package/dist/commands/setup.d.ts.map +1 -1
  95. package/dist/commands/setup.js +24 -27
  96. package/dist/commands/setup.js.map +1 -1
  97. package/dist/commands/step.d.ts +54 -6
  98. package/dist/commands/step.d.ts.map +1 -1
  99. package/dist/commands/step.js +484 -134
  100. package/dist/commands/step.js.map +1 -1
  101. package/dist/commands/thread.d.ts +4 -0
  102. package/dist/commands/thread.d.ts.map +1 -1
  103. package/dist/commands/thread.js +77 -151
  104. package/dist/commands/thread.js.map +1 -1
  105. package/dist/output-mappers.d.ts +8 -0
  106. package/dist/output-mappers.d.ts.map +1 -1
  107. package/dist/output-mappers.js +72 -18
  108. package/dist/output-mappers.js.map +1 -1
  109. package/dist/schemas.d.ts +3 -0
  110. package/dist/schemas.d.ts.map +1 -1
  111. package/dist/schemas.js +17 -3
  112. package/dist/schemas.js.map +1 -1
  113. package/dist/store.d.ts +147 -1
  114. package/dist/store.d.ts.map +1 -1
  115. package/dist/store.js +254 -1
  116. package/dist/store.js.map +1 -1
  117. package/dist/text-renderers.d.ts.map +1 -1
  118. package/dist/text-renderers.js +27 -2
  119. package/dist/text-renderers.js.map +1 -1
  120. package/package.json +7 -5
  121. package/src/__tests__/agent-resolution-llm-free.test.ts +14 -2
  122. package/src/__tests__/broker-prompt.test.ts +142 -0
  123. package/src/__tests__/broker-step-active-turns.test.ts +509 -0
  124. package/src/__tests__/broker-step-turn-chain-phase2.test.ts +525 -0
  125. package/src/__tests__/config.test.ts +35 -39
  126. package/src/__tests__/e2e-broker-step-suspend.test.ts +351 -0
  127. package/src/__tests__/e2e-broker-step.test.ts +320 -0
  128. package/src/__tests__/e2e-mock-agent.test.ts +1 -1
  129. package/src/__tests__/e2e-thread-resume-timeout-suspend.test.ts +360 -0
  130. package/src/__tests__/log-tag-validity.test.ts +124 -0
  131. package/src/__tests__/setup-agent-discovery.test.ts +35 -23
  132. package/src/__tests__/setup-no-llm.test.ts +5 -2
  133. package/src/__tests__/step-ask.test.ts +9 -6
  134. package/src/__tests__/step-show-json.test.ts +5 -5
  135. package/src/__tests__/step-show-text.test.ts +236 -0
  136. package/src/__tests__/step-turns-cli-subprocess.test.ts +411 -0
  137. package/src/__tests__/step-turns-panorama-phase3.test.ts +579 -0
  138. package/src/__tests__/step-turns.test.ts +734 -0
  139. package/src/__tests__/store-turn-chain.test.ts +386 -0
  140. package/src/__tests__/thread-agent-failure-suspended.test.ts +3 -3
  141. package/src/__tests__/thread-list-limit-offset.test.ts +305 -0
  142. package/src/__tests__/thread-list-template-ms-date.test.ts +7 -2
  143. package/src/__tests__/thread-poke.test.ts +6 -6
  144. package/src/__tests__/thread-resume.test.ts +2 -2
  145. package/src/__tests__/thread-suspend-step.test.ts +1 -1
  146. package/src/__tests__/thread.test.ts +29 -15
  147. package/src/cli.ts +1056 -483
  148. package/src/commands/broker-step.ts +913 -0
  149. package/src/commands/config.ts +2 -24
  150. package/src/commands/prompt.ts +43 -51
  151. package/src/commands/setup.ts +25 -29
  152. package/src/commands/step.ts +645 -176
  153. package/src/commands/thread.ts +87 -192
  154. package/src/output-mappers.ts +99 -21
  155. package/src/schemas.ts +32 -2
  156. package/src/store.ts +297 -2
  157. package/src/text-renderers.ts +35 -2
  158. package/dist/__tests__/adapter-json-roundtrip.test.d.ts +0 -2
  159. package/dist/__tests__/adapter-json-roundtrip.test.d.ts.map +0 -1
  160. package/dist/__tests__/adapter-json-roundtrip.test.js +0 -160
  161. package/dist/__tests__/adapter-json-roundtrip.test.js.map +0 -1
  162. package/dist/__tests__/spawn-agent-json.test.d.ts +0 -2
  163. package/dist/__tests__/spawn-agent-json.test.d.ts.map +0 -1
  164. package/dist/__tests__/spawn-agent-json.test.js +0 -79
  165. package/dist/__tests__/spawn-agent-json.test.js.map +0 -1
  166. package/src/__tests__/adapter-json-roundtrip.test.ts +0 -193
  167. package/src/__tests__/spawn-agent-json.test.ts +0 -100
@@ -0,0 +1,360 @@
1
+ /**
2
+ * Spec 4 (issue #435, Phase 2) — verification contract for the RFC #95 loop
3
+ * `timeout → suspend (checkpoint) → resume`.
4
+ *
5
+ * This is verification-only: NO resume code changed in Phase 2. The test proves
6
+ * the *existing* `uwf thread resume` path already satisfies the timeout-suspend
7
+ * resume contract by wiring the spec-3 producer to the resume consumer:
8
+ *
9
+ * 1. Drive a real sumeru send-timeout through `executeBrokerStep` (the SSE
10
+ * stream ends in `suspend`, exactly as Spec 3 verifies) so the thread's
11
+ * head step is a genuine `$status: "$SUSPEND"` node and the `(threadId,
12
+ * role)` broker session is mapped to the sumeru session.
13
+ * 2. Seed the thread to `suspended` (mirroring what `finalizeAgentStep` does
14
+ * after a suspended broker step) and assert `cmdThreadShow` reports
15
+ * `suspended` with the timeout reason — a valid resume precondition.
16
+ * 3. Call `cmdThreadResume`. Assert it is accepted, issues a FRESH
17
+ * `broker.send()` for the suspended role on the SAME mapped session (so the
18
+ * sumeru adapter resumes by `nativeId` rather than starting over), delivers
19
+ * the `-p` supplement as the continuation prompt, and — when that resumed
20
+ * send now completes (`kind:"completed"`) — advances the thread out of
21
+ * `suspended` (here straight to `end`).
22
+ *
23
+ * The second send is a `done` stream, so the gate opens and the thread proceeds;
24
+ * if it had timed out again it would simply re-arm `suspended` (Spec 3 path),
25
+ * never an error.
26
+ */
27
+
28
+ import { mkdtemp, rm, writeFile } from "node:fs/promises";
29
+ import { tmpdir } from "node:os";
30
+ import { join } from "node:path";
31
+ import { putSchema } from "@ocas/core";
32
+ import type { CasRef, ThreadId, WorkflowConfig, WorkflowPayload } from "@united-workforce/protocol";
33
+ import { createProcessLogger } from "@united-workforce/util";
34
+ import { getConfigPath } from "@united-workforce/util-agent";
35
+ import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
36
+ import { executeBrokerStep, openBrokerSessionStore } from "../commands/broker-step.js";
37
+ import { cmdThreadResume, cmdThreadShow } from "../commands/thread.js";
38
+ import { createUwfStore, type UwfStore } from "../store.js";
39
+ import { seedThreads } from "./thread-test-helpers.js";
40
+
41
+ type FetchCall = { url: string; method: string; body: string };
42
+
43
+ function sseFrame(id: number, event: string, data: unknown): string {
44
+ return `id: ${id}\nevent: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
45
+ }
46
+
47
+ function buildSseResponse(frames: string[]): Response {
48
+ const encoder = new TextEncoder();
49
+ const stream = new ReadableStream<Uint8Array>({
50
+ start(controller) {
51
+ for (const frame of frames) controller.enqueue(encoder.encode(frame));
52
+ controller.close();
53
+ },
54
+ });
55
+ return new Response(stream, {
56
+ status: 200,
57
+ headers: { "Content-Type": "text/event-stream; charset=utf-8" },
58
+ });
59
+ }
60
+
61
+ function buildJsonResponse(status: number, body: unknown): Response {
62
+ return new Response(JSON.stringify(body), {
63
+ status,
64
+ headers: { "Content-Type": "application/json" },
65
+ });
66
+ }
67
+
68
+ const PLANNER_OUTPUT_SCHEMA = {
69
+ title: "planner-output",
70
+ type: "object" as const,
71
+ required: ["$status", "plan"],
72
+ properties: {
73
+ $status: { type: "string" as const, enum: ["done", "failed"] },
74
+ plan: { type: "string" as const },
75
+ },
76
+ additionalProperties: false,
77
+ };
78
+
79
+ const PLANNER_RAW_OUTPUT = `---
80
+ $status: done
81
+ plan: ship it
82
+ ---
83
+ the plan body`;
84
+
85
+ const HOST = "http://127.0.0.1:7900";
86
+ const GATEWAY = "planner-gw";
87
+ const ALIAS = "planner-agent";
88
+ const SESSION_ID = "ses_resume_e2e";
89
+ const THREAD_ID = "06FCBROKERRESUMESTEP0001" as ThreadId;
90
+ const ROLE = "planner";
91
+ const NATIVE_ID = "ses_native_abc";
92
+ const ELAPSED_MS = 1800000;
93
+ const WORKFLOW_NAME = "broker-resume-e2e";
94
+ const SUPPLEMENT = "继续上次未完成的任务";
95
+
96
+ function buildConfig(): WorkflowConfig {
97
+ return {
98
+ agents: { [ALIAS]: { host: HOST, gateway: GATEWAY } },
99
+ defaultAgent: ALIAS,
100
+ agentOverrides: null,
101
+ };
102
+ }
103
+
104
+ /**
105
+ * Write the on-disk `config.yaml` that `cmdThreadResume` reloads via
106
+ * `loadWorkflowConfig`. Must use the Phase-3 `{host, gateway}` shape (the
107
+ * normalizer rejects the legacy `{command}` form).
108
+ */
109
+ async function writeConfig(storageRoot: string): Promise<void> {
110
+ const yaml = `defaultAgent: ${ALIAS}\nagentOverrides: null\nagents:\n ${ALIAS}:\n host: ${HOST}\n gateway: ${GATEWAY}\n`;
111
+ await writeFile(getConfigPath(storageRoot), yaml, "utf8");
112
+ }
113
+
114
+ async function buildWorkflow(uwf: UwfStore): Promise<{
115
+ workflow: WorkflowPayload;
116
+ startHash: CasRef;
117
+ }> {
118
+ const frontmatterHash = (await putSchema(uwf.store, PLANNER_OUTPUT_SCHEMA)) as CasRef;
119
+ const workflow: WorkflowPayload = {
120
+ version: 1,
121
+ name: WORKFLOW_NAME,
122
+ description: "broker step resume end-to-end",
123
+ roles: {
124
+ planner: {
125
+ description: "plans things",
126
+ goal: "produce a plan",
127
+ capabilities: [],
128
+ procedure: "think hard",
129
+ output: "frontmatter+body",
130
+ frontmatter: frontmatterHash,
131
+ },
132
+ },
133
+ graph: {
134
+ planner: {
135
+ // Non-empty $END prompt: the resumed `done` stream routes through the
136
+ // post-step moderator, which rejects an empty edge template.
137
+ done: { role: "$END", prompt: "done", location: null },
138
+ },
139
+ },
140
+ };
141
+ const startHash = (await uwf.store.cas.put(uwf.schemas.startNode, {
142
+ workflow: await uwf.store.cas.put(uwf.schemas.workflow, workflow),
143
+ prompt: "p",
144
+ cwd: "/tmp/work",
145
+ })) as CasRef;
146
+ return { workflow, startHash };
147
+ }
148
+
149
+ function suspendStream(): Response {
150
+ return buildSseResponse([
151
+ sseFrame(1, "turn", {
152
+ type: "@sumeru/turn",
153
+ value: { index: 0, role: "user", content: "edge prompt", timestamp: "", toolCalls: null },
154
+ }),
155
+ sseFrame(2, "turn", {
156
+ type: "@sumeru/turn",
157
+ value: { index: 1, role: "assistant", content: "draft1", timestamp: "", toolCalls: null },
158
+ }),
159
+ sseFrame(3, "suspend", {
160
+ type: "@sumeru/suspend",
161
+ value: { reason: "timeout", nativeId: NATIVE_ID, elapsedMs: ELAPSED_MS },
162
+ }),
163
+ ]);
164
+ }
165
+
166
+ function completedStream(): Response {
167
+ return buildSseResponse([
168
+ sseFrame(1, "turn", {
169
+ type: "@sumeru/turn",
170
+ value: {
171
+ index: 1,
172
+ role: "assistant",
173
+ content: PLANNER_RAW_OUTPUT,
174
+ timestamp: "",
175
+ toolCalls: null,
176
+ },
177
+ }),
178
+ sseFrame(2, "done", {
179
+ type: "@sumeru/summary",
180
+ value: { turnCount: 2, tokens: { in: 9, out: 4 }, durationMs: 42 },
181
+ }),
182
+ ]);
183
+ }
184
+
185
+ function resolveFetchUrl(input: string | URL | Request): string {
186
+ if (typeof input === "string") return input;
187
+ if (input instanceof URL) return input.href;
188
+ return input.url;
189
+ }
190
+
191
+ function makePlog(tmpDir: string) {
192
+ return createProcessLogger({
193
+ storageRoot: tmpDir,
194
+ context: { thread: THREAD_ID, workflow: WORKFLOW_NAME },
195
+ });
196
+ }
197
+
198
+ describe("uwf thread resume — timeout-suspended thread resumes via fresh send (issue #435)", () => {
199
+ let tmpDir: string;
200
+ let savedOcasHome: string | undefined;
201
+ let calls: FetchCall[];
202
+ // First send (the step that suspends) → suspend stream; every send after the
203
+ // first (the resume) → completed stream. A counter, not a swap, so the resume
204
+ // genuinely re-enters the same stub.
205
+ let messageCallCount: number;
206
+
207
+ beforeEach(async () => {
208
+ savedOcasHome = process.env.OCAS_HOME;
209
+ tmpDir = await mkdtemp(join(tmpdir(), "broker-resume-e2e-"));
210
+ process.env.OCAS_HOME = join(tmpDir, "cas");
211
+ calls = [];
212
+ messageCallCount = 0;
213
+ vi.stubGlobal(
214
+ "fetch",
215
+ async (input: string | URL | Request, init: RequestInit | undefined): Promise<Response> => {
216
+ const url = resolveFetchUrl(input);
217
+ const method = init?.method ?? "GET";
218
+ const body = typeof init?.body === "string" ? init.body : "";
219
+ calls.push({ url, method, body });
220
+ if (url.endsWith(`/gateways/${GATEWAY}/sessions`)) {
221
+ return buildJsonResponse(201, {
222
+ type: "@sumeru/session",
223
+ value: { id: SESSION_ID, gateway: GATEWAY },
224
+ });
225
+ }
226
+ if (url.endsWith(`/sessions/${SESSION_ID}/messages`)) {
227
+ messageCallCount += 1;
228
+ return messageCallCount === 1 ? suspendStream() : completedStream();
229
+ }
230
+ return buildJsonResponse(500, { error: "unexpected url", url });
231
+ },
232
+ );
233
+ });
234
+
235
+ afterEach(async () => {
236
+ vi.unstubAllGlobals();
237
+ if (savedOcasHome === undefined) delete process.env.OCAS_HOME;
238
+ else process.env.OCAS_HOME = savedOcasHome;
239
+ await rm(tmpDir, { recursive: true, force: true });
240
+ });
241
+
242
+ /**
243
+ * Drive a real send-timeout through `executeBrokerStep`, then seed the thread
244
+ * to `suspended` at the produced `$SUSPEND` step (mirroring `finalizeAgentStep`
245
+ * after a suspended broker step). Returns the suspend step hash and reason.
246
+ */
247
+ async function suspendThread(
248
+ uwf: UwfStore,
249
+ workflow: WorkflowPayload,
250
+ startHash: CasRef,
251
+ ): Promise<{ suspendHash: CasRef; reason: string }> {
252
+ const result = await executeBrokerStep({
253
+ storageRoot: tmpDir,
254
+ uwf,
255
+ config: buildConfig(),
256
+ workflow,
257
+ threadId: THREAD_ID,
258
+ role: ROLE,
259
+ edgePrompt: "make a plan",
260
+ effectiveCwd: "/tmp/work",
261
+ startHash,
262
+ prevHash: null,
263
+ agentOverride: null,
264
+ previousAttempts: null,
265
+ plog: makePlog(tmpDir),
266
+ });
267
+
268
+ const reason = (result.frontmatter as Record<string, unknown>).reason as string;
269
+ await seedThreads(tmpDir, {
270
+ [THREAD_ID]: {
271
+ head: result.stepHash,
272
+ status: "suspended",
273
+ suspendedRole: ROLE,
274
+ suspendMessage: reason,
275
+ completedAt: null,
276
+ },
277
+ });
278
+ return { suspendHash: result.stepHash, reason };
279
+ }
280
+
281
+ test("a timeout-suspended thread is shown as suspended, then resume advances it past the gate", async () => {
282
+ const uwf = await createUwfStore(tmpDir);
283
+ const { workflow, startHash } = await buildWorkflow(uwf);
284
+ await writeConfig(tmpDir);
285
+
286
+ const { suspendHash, reason } = await suspendThread(uwf, workflow, startHash);
287
+
288
+ // Precondition: the thread sits in `suspended` carrying the timeout reason.
289
+ const show = await cmdThreadShow(tmpDir, THREAD_ID);
290
+ expect(show.status).toBe("suspended");
291
+ expect(show.suspendedRole).toBe(ROLE);
292
+ expect(show.suspendMessage).toBe(reason);
293
+ expect(show.suspendMessage as string).toContain(NATIVE_ID);
294
+
295
+ // Resume is accepted and the resumed send completes, so the thread leaves
296
+ // `suspended` and advances (here straight to `end` via the `done` edge).
297
+ const resumeOut = await cmdThreadResume(tmpDir, THREAD_ID, SUPPLEMENT, null);
298
+ expect(resumeOut.status).toBe("end");
299
+ expect(resumeOut.done).toBe(true);
300
+ expect(resumeOut.error).toBeNull();
301
+ expect(resumeOut.head).not.toBe(suspendHash);
302
+
303
+ // And `thread show` agrees the gate is gone.
304
+ const showAfter = await cmdThreadShow(tmpDir, THREAD_ID);
305
+ expect(showAfter.status).toBe("end");
306
+ expect(showAfter.suspendedRole).toBeNull();
307
+ expect(showAfter.suspendMessage).toBeNull();
308
+ });
309
+
310
+ test("resume issues a FRESH send reusing the SAME mapped session (no new createSession)", async () => {
311
+ const uwf = await createUwfStore(tmpDir);
312
+ const { workflow, startHash } = await buildWorkflow(uwf);
313
+ await writeConfig(tmpDir);
314
+
315
+ await suspendThread(uwf, workflow, startHash);
316
+ await cmdThreadResume(tmpDir, THREAD_ID, SUPPLEMENT, null);
317
+
318
+ // Exactly ONE createSession (during the suspend) — resume reuses the cached
319
+ // (threadId, role) → sessionId mapping rather than spawning a new session.
320
+ const createCalls = calls.filter((c) => c.url.endsWith(`/gateways/${GATEWAY}/sessions`));
321
+ expect(createCalls).toHaveLength(1);
322
+
323
+ // TWO sends, both addressed to the SAME session id: the suspended send and
324
+ // the resume continuation. The sumeru adapter resumes by nativeId off this
325
+ // shared session.
326
+ const messageCalls = calls.filter((c) => c.url.endsWith("/messages"));
327
+ expect(messageCalls).toHaveLength(2);
328
+ for (const call of messageCalls) {
329
+ expect(call.url).toContain(`/sessions/${SESSION_ID}/messages`);
330
+ }
331
+
332
+ // The broker session row still points at the same session for a future resume.
333
+ const sessionStore = openBrokerSessionStore(tmpDir);
334
+ try {
335
+ const row = sessionStore.getSession(THREAD_ID, ROLE);
336
+ expect(row?.sessionId).toBe(SESSION_ID);
337
+ expect(row?.host).toBe(HOST);
338
+ expect(row?.gateway).toBe(GATEWAY);
339
+ } finally {
340
+ sessionStore.close();
341
+ }
342
+ });
343
+
344
+ test("the -p supplement is delivered as the continuation prompt on the resume send", async () => {
345
+ const uwf = await createUwfStore(tmpDir);
346
+ const { workflow, startHash } = await buildWorkflow(uwf);
347
+ await writeConfig(tmpDir);
348
+
349
+ await suspendThread(uwf, workflow, startHash);
350
+ await cmdThreadResume(tmpDir, THREAD_ID, SUPPLEMENT, null);
351
+
352
+ const messageCalls = calls.filter((c) => c.url.endsWith("/messages"));
353
+ const resumeSend = messageCalls[1];
354
+ expect(resumeSend).toBeDefined();
355
+ // The resume prompt = suspend reason + the operator supplement; both ride
356
+ // the assembled prompt body of the continuation send.
357
+ expect(resumeSend?.body).toContain(SUPPLEMENT);
358
+ expect(resumeSend?.body).toContain(NATIVE_ID);
359
+ });
360
+ });
@@ -0,0 +1,124 @@
1
+ import { readdir, readFile, stat } from "node:fs/promises";
2
+ import { join } from "node:path";
3
+ import { describe, expect, test } from "vitest";
4
+
5
+ /**
6
+ * Static regression guard for log tags (#426).
7
+ *
8
+ * Every `log()` call site uses a hand-written 8-char Crockford Base32 tag.
9
+ * Crockford Base32 excludes I, L, O, U to avoid visual ambiguity, and
10
+ * `assertValidLogTag()` (util/process-logger/log-tag.ts) throws at runtime
11
+ * when a tag contains an illegal character.
12
+ *
13
+ * The bug: `PL_FRONTMATTER_FAIL = "F4FA1L7Z"` (a leet spelling of
14
+ * "FRONTMATTER FAIL") smuggled an `L` into the tag. It only fires on the
15
+ * frontmatter-extraction-failure path, so it stayed dormant until a planner
16
+ * step failed extraction — then the failure logger itself crashed the process,
17
+ * masking the real error.
18
+ *
19
+ * This test scans the source of the cli + broker packages and asserts that
20
+ * EVERY literal tag — whether written inline as `log("XXXXXXXX", ...)` or as a
21
+ * `const PL_* = "XXXXXXXX"` constant — is a valid Crockford Base32 tag. A new
22
+ * illegal tag, in any file, fails here at build time instead of at runtime.
23
+ */
24
+
25
+ // Crockford Base32 alphabet — no I, L, O, U (mirrors util/src/base32.ts).
26
+ const CROCKFORD_BASE32_ALPHABET = "0123456789ABCDEFGHJKMNPQRSTVWXYZ";
27
+ const TAG_CHAR_SET = new Set(CROCKFORD_BASE32_ALPHABET.split(""));
28
+ const TAG_LENGTH = 8;
29
+
30
+ function isValidLogTag(tag: string): boolean {
31
+ if (tag.length !== TAG_LENGTH) {
32
+ return false;
33
+ }
34
+ for (const ch of tag) {
35
+ if (!TAG_CHAR_SET.has(ch.toUpperCase())) {
36
+ return false;
37
+ }
38
+ }
39
+ return true;
40
+ }
41
+
42
+ // Roots scanned for log-tag literals, relative to this test file.
43
+ const SCAN_ROOTS = [
44
+ join(__dirname, ".."), // packages/cli/src
45
+ join(__dirname, "..", "..", "..", "broker", "src"), // packages/broker/src
46
+ ];
47
+
48
+ async function collectTsFiles(dir: string): Promise<string[]> {
49
+ const out: string[] = [];
50
+ let names: string[];
51
+ try {
52
+ names = await readdir(dir);
53
+ } catch {
54
+ return out;
55
+ }
56
+ for (const name of names) {
57
+ if (name === "node_modules" || name === "dist") {
58
+ continue;
59
+ }
60
+ const full = join(dir, name);
61
+ const info = await stat(full);
62
+ if (info.isDirectory()) {
63
+ out.push(...(await collectTsFiles(full)));
64
+ } else if (info.isFile() && name.endsWith(".ts") && !name.endsWith(".test.ts")) {
65
+ out.push(full);
66
+ }
67
+ }
68
+ return out;
69
+ }
70
+
71
+ type TagOccurrence = {
72
+ tag: string;
73
+ file: string;
74
+ context: string;
75
+ };
76
+
77
+ // Matches `log("XXXXXXXX"` call sites and `... = "XXXXXXXX"` tag constants.
78
+ // The capturing group grabs an 8-char alphanumeric literal; isValidLogTag then
79
+ // decides legality. We intentionally over-collect (any 8-char string assigned
80
+ // to a PL_/TAG const or passed as log()'s first arg) and validate each.
81
+ const LOG_CALL_RE = /\blog\(\s*"([0-9A-Za-z]{8})"/g;
82
+ const TAG_CONST_RE =
83
+ /\bconst\s+(?:PL_[A-Z0-9_]+|[A-Z0-9_]*TAG[A-Z0-9_]*)\s*=\s*"([0-9A-Za-z]{8})"/g;
84
+
85
+ async function collectTagOccurrences(): Promise<TagOccurrence[]> {
86
+ const occurrences: TagOccurrence[] = [];
87
+ for (const root of SCAN_ROOTS) {
88
+ const files = await collectTsFiles(root);
89
+ for (const file of files) {
90
+ const content = await readFile(file, "utf8");
91
+ for (const re of [LOG_CALL_RE, TAG_CONST_RE]) {
92
+ re.lastIndex = 0;
93
+ let m: RegExpExecArray | null = re.exec(content);
94
+ while (m !== null) {
95
+ occurrences.push({ tag: m[1], file, context: m[0] });
96
+ m = re.exec(content);
97
+ }
98
+ }
99
+ }
100
+ }
101
+ return occurrences;
102
+ }
103
+
104
+ describe("log tag validity (#426 regression guard)", () => {
105
+ test("collects at least the known PL_ tag constants", async () => {
106
+ const occurrences = await collectTagOccurrences();
107
+ // Sanity: the scan must actually find tags, otherwise the regex/paths broke
108
+ // and the guard below would pass vacuously.
109
+ expect(occurrences.length).toBeGreaterThanOrEqual(10);
110
+ });
111
+
112
+ test("every log tag literal in cli + broker is valid Crockford Base32", async () => {
113
+ const occurrences = await collectTagOccurrences();
114
+ const invalid = occurrences.filter((o) => !isValidLogTag(o.tag));
115
+ const report = invalid.map((o) => ` ${o.tag} (${o.context}) in ${o.file}`).join("\n");
116
+ expect(invalid, `Illegal Crockford Base32 log tags found:\n${report}`).toEqual([]);
117
+ });
118
+
119
+ test("the specific F4FA1L7Z bug (#426) stays fixed", async () => {
120
+ const occurrences = await collectTagOccurrences();
121
+ const offenders = occurrences.filter((o) => o.tag === "F4FA1L7Z");
122
+ expect(offenders).toEqual([]);
123
+ });
124
+ });
@@ -10,19 +10,19 @@ import { _agentNameFromBinary, _printAgentMenu, cmdSetup } from "../commands/set
10
10
 
11
11
  describe("_agentNameFromBinary", () => {
12
12
  test("strips uwf- prefix", () => {
13
- expect(_agentNameFromBinary("uwf-hermes")).toBe("hermes");
13
+ expect(_agentNameFromBinary("uwf-builtin")).toBe("builtin");
14
14
  });
15
15
 
16
16
  test("strips uwf- prefix for compound names", () => {
17
- expect(_agentNameFromBinary("uwf-claude-code")).toBe("claude-code");
17
+ expect(_agentNameFromBinary("uwf-some-gateway")).toBe("some-gateway");
18
18
  });
19
19
 
20
20
  test("returns as-is when no uwf- prefix", () => {
21
- expect(_agentNameFromBinary("hermes")).toBe("hermes");
21
+ expect(_agentNameFromBinary("builtin")).toBe("builtin");
22
22
  });
23
23
 
24
- test("handles uwf-builtin", () => {
25
- expect(_agentNameFromBinary("uwf-builtin")).toBe("builtin");
24
+ test("handles uwf-mock", () => {
25
+ expect(_agentNameFromBinary("uwf-mock")).toBe("mock");
26
26
  });
27
27
  });
28
28
 
@@ -35,10 +35,10 @@ describe("_printAgentMenu", () => {
35
35
  logs.push(args.join(" "));
36
36
  });
37
37
 
38
- _printAgentMenu(["uwf-hermes", "uwf-claude-code"]);
38
+ _printAgentMenu(["uwf-builtin", "uwf-mock"]);
39
39
 
40
- expect(logs.some((l) => l.includes("Hermes"))).toBe(true);
41
- expect(logs.some((l) => l.includes("Claude Code"))).toBe(true);
40
+ expect(logs.some((l) => l.includes("Built-in"))).toBe(true);
41
+ expect(logs.some((l) => l.includes("Mock"))).toBe(true);
42
42
 
43
43
  vi.restoreAllMocks();
44
44
  });
@@ -76,24 +76,27 @@ describe("cmdSetup agent configuration (engine config is LLM-free, issue #143)",
76
76
 
77
77
  expect(result.defaultAgent).toBe("claude-code");
78
78
  const config = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
79
- expect(config.agents["claude-code"]).toEqual({ command: "uwf-claude-code", args: [] });
79
+ expect(config.agents["claude-code"]).toEqual({
80
+ host: "http://127.0.0.1:7900",
81
+ gateway: "claude-code",
82
+ });
80
83
  expect(config.defaultAgent).toBe("claude-code");
81
84
  });
82
85
 
83
86
  test("preserves existing agents when adding new one", async () => {
84
- await cmdSetup({ agent: "hermes", storageRoot });
87
+ await cmdSetup({ agent: "builtin", storageRoot });
85
88
  await cmdSetup({ agent: "claude-code", storageRoot });
86
89
 
87
90
  const config = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
88
- expect(config.agents.hermes).toBeDefined();
91
+ expect(config.agents.builtin).toBeDefined();
89
92
  expect(config.agents["claude-code"]).toBeDefined();
90
93
  expect(config.defaultAgent).toBe("claude-code");
91
94
  });
92
95
 
93
96
  test("updates defaultAgent on re-run with different agent", async () => {
94
- await cmdSetup({ agent: "hermes", storageRoot });
97
+ await cmdSetup({ agent: "mock", storageRoot });
95
98
  const config1 = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
96
- expect(config1.defaultAgent).toBe("hermes");
99
+ expect(config1.defaultAgent).toBe("mock");
97
100
 
98
101
  await cmdSetup({ agent: "builtin", storageRoot });
99
102
  const config2 = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
@@ -101,14 +104,17 @@ describe("cmdSetup agent configuration (engine config is LLM-free, issue #143)",
101
104
  });
102
105
 
103
106
  test("normalizes agent name with uwf- prefix to bare name", async () => {
104
- const result = await cmdSetup({ agent: "uwf-hermes", storageRoot });
107
+ const result = await cmdSetup({ agent: "uwf-builtin", storageRoot });
105
108
 
106
- expect(result.defaultAgent).toBe("hermes");
109
+ expect(result.defaultAgent).toBe("builtin");
107
110
  const config = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
108
- expect(config.agents.hermes).toEqual({ command: "uwf-hermes", args: [] });
109
- expect(config.defaultAgent).toBe("hermes");
111
+ expect(config.agents.builtin).toEqual({
112
+ host: "http://127.0.0.1:7900",
113
+ gateway: "builtin",
114
+ });
115
+ expect(config.defaultAgent).toBe("builtin");
110
116
  // Verify no duplicate uwf- prefix
111
- expect(config.agents["uwf-hermes"]).toBeUndefined();
117
+ expect(config.agents["uwf-builtin"]).toBeUndefined();
112
118
  });
113
119
 
114
120
  test("normalizes uwf-claude-code to claude-code", async () => {
@@ -116,7 +122,10 @@ describe("cmdSetup agent configuration (engine config is LLM-free, issue #143)",
116
122
 
117
123
  expect(result.defaultAgent).toBe("claude-code");
118
124
  const config = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
119
- expect(config.agents["claude-code"]).toEqual({ command: "uwf-claude-code", args: [] });
125
+ expect(config.agents["claude-code"]).toEqual({
126
+ host: "http://127.0.0.1:7900",
127
+ gateway: "claude-code",
128
+ });
120
129
  expect(config.defaultAgent).toBe("claude-code");
121
130
  // Verify no duplicate uwf- prefix
122
131
  expect(config.agents["uwf-claude-code"]).toBeUndefined();
@@ -128,15 +137,18 @@ describe("cmdSetup agent configuration (engine config is LLM-free, issue #143)",
128
137
  mkdirSync(storageRoot, { recursive: true });
129
138
  writeFileSync(
130
139
  join(storageRoot, "config.yaml"),
131
- "providers:\n openai: { baseUrl: x, apiKey: y }\nmodels:\n default: { provider: openai, name: gpt-4o }\ndefaultModel: default\nagents:\n hermes: { command: uwf-hermes, args: [] }\ndefaultAgent: hermes\n",
140
+ "providers:\n openai: { baseUrl: x, apiKey: y }\nmodels:\n default: { provider: openai, name: gpt-4o }\ndefaultModel: default\nagents:\n builtin: { host: 'http://127.0.0.1:7900', gateway: builtin }\ndefaultAgent: builtin\n",
132
141
  "utf8",
133
142
  );
134
- await cmdSetup({ agent: "hermes", storageRoot });
143
+ await cmdSetup({ agent: "builtin", storageRoot });
135
144
  const config = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
136
145
  expect(config.providers).toBeUndefined();
137
146
  expect(config.models).toBeUndefined();
138
147
  expect(config.defaultModel).toBeUndefined();
139
- expect(config.agents.hermes).toEqual({ command: "uwf-hermes", args: [] });
140
- expect(config.defaultAgent).toBe("hermes");
148
+ expect(config.agents.builtin).toEqual({
149
+ host: "http://127.0.0.1:7900",
150
+ gateway: "builtin",
151
+ });
152
+ expect(config.defaultAgent).toBe("builtin");
141
153
  });
142
154
  });
@@ -30,7 +30,7 @@ describe("cmdSetup — non-interactive, no LLM args (issue #143)", () => {
30
30
  tempDir = mkdtempSync(join(tmpdir(), "uwf-setup-"));
31
31
  writeFileSync(
32
32
  join(tempDir, "config.yaml"),
33
- "agents:\n hermes: { command: uwf-hermes, args: [] }\ndefaultAgent: hermes\nagentOverrides:\n solve-issue:\n coder: claude-code\n",
33
+ "agents:\n hermes: { host: 'http://127.0.0.1:7900', gateway: hermes }\ndefaultAgent: hermes\nagentOverrides:\n solve-issue:\n coder: claude-code\n",
34
34
  "utf8",
35
35
  );
36
36
  await cmdSetup({ agent: "hermes", storageRoot: tempDir });
@@ -50,7 +50,10 @@ describe("cmdSetup — non-interactive, no LLM args (issue #143)", () => {
50
50
  >;
51
51
  expect(cfg.defaultAgent).toBe("claude-code");
52
52
  const agents = cfg.agents as Record<string, unknown>;
53
- expect(agents["claude-code"]).toEqual({ command: "uwf-claude-code", args: [] });
53
+ expect(agents["claude-code"]).toEqual({
54
+ host: "http://127.0.0.1:7900",
55
+ gateway: "claude-code",
56
+ });
54
57
  });
55
58
  });
56
59