@united-workforce/cli 0.7.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/README.md +32 -5
  2. package/dist/.build-fingerprint +1 -0
  3. package/dist/__tests__/broker-step-active-turns.test.d.ts +20 -0
  4. package/dist/__tests__/broker-step-active-turns.test.d.ts.map +1 -0
  5. package/dist/__tests__/broker-step-active-turns.test.js +428 -0
  6. package/dist/__tests__/broker-step-active-turns.test.js.map +1 -0
  7. package/dist/__tests__/broker-step-turn-chain-phase2.test.d.ts +13 -0
  8. package/dist/__tests__/broker-step-turn-chain-phase2.test.d.ts.map +1 -0
  9. package/dist/__tests__/broker-step-turn-chain-phase2.test.js +429 -0
  10. package/dist/__tests__/broker-step-turn-chain-phase2.test.js.map +1 -0
  11. package/dist/__tests__/e2e-broker-step-suspend.test.d.ts +18 -0
  12. package/dist/__tests__/e2e-broker-step-suspend.test.d.ts.map +1 -0
  13. package/dist/__tests__/e2e-broker-step-suspend.test.js +313 -0
  14. package/dist/__tests__/e2e-broker-step-suspend.test.js.map +1 -0
  15. package/dist/__tests__/e2e-thread-resume-timeout-suspend.test.d.ts +28 -0
  16. package/dist/__tests__/e2e-thread-resume-timeout-suspend.test.d.ts.map +1 -0
  17. package/dist/__tests__/e2e-thread-resume-timeout-suspend.test.js +322 -0
  18. package/dist/__tests__/e2e-thread-resume-timeout-suspend.test.js.map +1 -0
  19. package/dist/__tests__/log-tag-validity.test.d.ts +2 -0
  20. package/dist/__tests__/log-tag-validity.test.d.ts.map +1 -0
  21. package/dist/__tests__/log-tag-validity.test.js +110 -0
  22. package/dist/__tests__/log-tag-validity.test.js.map +1 -0
  23. package/dist/__tests__/setup-agent-discovery.test.js +23 -23
  24. package/dist/__tests__/setup-agent-discovery.test.js.map +1 -1
  25. package/dist/__tests__/step-show-json.test.js +5 -5
  26. package/dist/__tests__/step-show-json.test.js.map +1 -1
  27. package/dist/__tests__/step-show-text.test.d.ts +2 -0
  28. package/dist/__tests__/step-show-text.test.d.ts.map +1 -0
  29. package/dist/__tests__/step-show-text.test.js +192 -0
  30. package/dist/__tests__/step-show-text.test.js.map +1 -0
  31. package/dist/__tests__/step-turns-cli-subprocess.test.d.ts +21 -0
  32. package/dist/__tests__/step-turns-cli-subprocess.test.d.ts.map +1 -0
  33. package/dist/__tests__/step-turns-cli-subprocess.test.js +356 -0
  34. package/dist/__tests__/step-turns-cli-subprocess.test.js.map +1 -0
  35. package/dist/__tests__/step-turns-panorama-phase3.test.d.ts +21 -0
  36. package/dist/__tests__/step-turns-panorama-phase3.test.d.ts.map +1 -0
  37. package/dist/__tests__/step-turns-panorama-phase3.test.js +476 -0
  38. package/dist/__tests__/step-turns-panorama-phase3.test.js.map +1 -0
  39. package/dist/__tests__/step-turns.test.d.ts +24 -0
  40. package/dist/__tests__/step-turns.test.d.ts.map +1 -0
  41. package/dist/__tests__/step-turns.test.js +646 -0
  42. package/dist/__tests__/step-turns.test.js.map +1 -0
  43. package/dist/__tests__/store-turn-chain.test.d.ts +2 -0
  44. package/dist/__tests__/store-turn-chain.test.d.ts.map +1 -0
  45. package/dist/__tests__/store-turn-chain.test.js +341 -0
  46. package/dist/__tests__/store-turn-chain.test.js.map +1 -0
  47. package/dist/__tests__/thread-list-limit-offset.test.d.ts +24 -0
  48. package/dist/__tests__/thread-list-limit-offset.test.d.ts.map +1 -0
  49. package/dist/__tests__/thread-list-limit-offset.test.js +254 -0
  50. package/dist/__tests__/thread-list-limit-offset.test.js.map +1 -0
  51. package/dist/__tests__/thread-list-template-ms-date.test.js +7 -2
  52. package/dist/__tests__/thread-list-template-ms-date.test.js.map +1 -1
  53. package/dist/__tests__/thread.test.js +28 -14
  54. package/dist/__tests__/thread.test.js.map +1 -1
  55. package/dist/cli.js +910 -344
  56. package/dist/cli.js.map +1 -1
  57. package/dist/commands/broker-step.d.ts +10 -3
  58. package/dist/commands/broker-step.d.ts.map +1 -1
  59. package/dist/commands/broker-step.js +231 -27
  60. package/dist/commands/broker-step.js.map +1 -1
  61. package/dist/commands/prompt.d.ts.map +1 -1
  62. package/dist/commands/prompt.js +42 -50
  63. package/dist/commands/prompt.js.map +1 -1
  64. package/dist/commands/setup.d.ts +6 -4
  65. package/dist/commands/setup.d.ts.map +1 -1
  66. package/dist/commands/setup.js +16 -26
  67. package/dist/commands/setup.js.map +1 -1
  68. package/dist/commands/step.d.ts +48 -1
  69. package/dist/commands/step.d.ts.map +1 -1
  70. package/dist/commands/step.js +496 -3
  71. package/dist/commands/step.js.map +1 -1
  72. package/dist/output-mappers.d.ts +8 -0
  73. package/dist/output-mappers.d.ts.map +1 -1
  74. package/dist/output-mappers.js +72 -18
  75. package/dist/output-mappers.js.map +1 -1
  76. package/dist/schemas.d.ts +3 -0
  77. package/dist/schemas.d.ts.map +1 -1
  78. package/dist/schemas.js +17 -3
  79. package/dist/schemas.js.map +1 -1
  80. package/dist/store.d.ts +147 -1
  81. package/dist/store.d.ts.map +1 -1
  82. package/dist/store.js +254 -1
  83. package/dist/store.js.map +1 -1
  84. package/dist/text-renderers.d.ts.map +1 -1
  85. package/dist/text-renderers.js +27 -2
  86. package/dist/text-renderers.js.map +1 -1
  87. package/package.json +7 -6
  88. package/src/__tests__/broker-step-active-turns.test.ts +509 -0
  89. package/src/__tests__/broker-step-turn-chain-phase2.test.ts +525 -0
  90. package/src/__tests__/e2e-broker-step-suspend.test.ts +351 -0
  91. package/src/__tests__/e2e-thread-resume-timeout-suspend.test.ts +360 -0
  92. package/src/__tests__/log-tag-validity.test.ts +124 -0
  93. package/src/__tests__/setup-agent-discovery.test.ts +23 -23
  94. package/src/__tests__/step-show-json.test.ts +5 -5
  95. package/src/__tests__/step-show-text.test.ts +236 -0
  96. package/src/__tests__/step-turns-cli-subprocess.test.ts +411 -0
  97. package/src/__tests__/step-turns-panorama-phase3.test.ts +579 -0
  98. package/src/__tests__/step-turns.test.ts +734 -0
  99. package/src/__tests__/store-turn-chain.test.ts +386 -0
  100. package/src/__tests__/thread-list-limit-offset.test.ts +305 -0
  101. package/src/__tests__/thread-list-template-ms-date.test.ts +7 -2
  102. package/src/__tests__/thread.test.ts +29 -15
  103. package/src/cli.ts +1056 -483
  104. package/src/commands/broker-step.ts +315 -38
  105. package/src/commands/prompt.ts +42 -50
  106. package/src/commands/setup.ts +16 -28
  107. package/src/commands/step.ts +655 -3
  108. package/src/output-mappers.ts +99 -21
  109. package/src/schemas.ts +32 -2
  110. package/src/store.ts +297 -2
  111. package/src/text-renderers.ts +35 -2
@@ -0,0 +1,360 @@
1
+ /**
2
+ * Spec 4 (issue #435, Phase 2) — verification contract for the RFC #95 loop
3
+ * `timeout → suspend (checkpoint) → resume`.
4
+ *
5
+ * This is verification-only: NO resume code changed in Phase 2. The test proves
6
+ * the *existing* `uwf thread resume` path already satisfies the timeout-suspend
7
+ * resume contract by wiring the spec-3 producer to the resume consumer:
8
+ *
9
+ * 1. Drive a real sumeru send-timeout through `executeBrokerStep` (the SSE
10
+ * stream ends in `suspend`, exactly as Spec 3 verifies) so the thread's
11
+ * head step is a genuine `$status: "$SUSPEND"` node and the `(threadId,
12
+ * role)` broker session is mapped to the sumeru session.
13
+ * 2. Seed the thread to `suspended` (mirroring what `finalizeAgentStep` does
14
+ * after a suspended broker step) and assert `cmdThreadShow` reports
15
+ * `suspended` with the timeout reason — a valid resume precondition.
16
+ * 3. Call `cmdThreadResume`. Assert it is accepted, issues a FRESH
17
+ * `broker.send()` for the suspended role on the SAME mapped session (so the
18
+ * sumeru adapter resumes by `nativeId` rather than starting over), delivers
19
+ * the `-p` supplement as the continuation prompt, and — when that resumed
20
+ * send now completes (`kind:"completed"`) — advances the thread out of
21
+ * `suspended` (here straight to `end`).
22
+ *
23
+ * The second send is a `done` stream, so the gate opens and the thread proceeds;
24
+ * if it had timed out again it would simply re-arm `suspended` (Spec 3 path),
25
+ * never an error.
26
+ */
27
+
28
+ import { mkdtemp, rm, writeFile } from "node:fs/promises";
29
+ import { tmpdir } from "node:os";
30
+ import { join } from "node:path";
31
+ import { putSchema } from "@ocas/core";
32
+ import type { CasRef, ThreadId, WorkflowConfig, WorkflowPayload } from "@united-workforce/protocol";
33
+ import { createProcessLogger } from "@united-workforce/util";
34
+ import { getConfigPath } from "@united-workforce/util-agent";
35
+ import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
36
+ import { executeBrokerStep, openBrokerSessionStore } from "../commands/broker-step.js";
37
+ import { cmdThreadResume, cmdThreadShow } from "../commands/thread.js";
38
+ import { createUwfStore, type UwfStore } from "../store.js";
39
+ import { seedThreads } from "./thread-test-helpers.js";
40
+
41
+ type FetchCall = { url: string; method: string; body: string };
42
+
43
+ function sseFrame(id: number, event: string, data: unknown): string {
44
+ return `id: ${id}\nevent: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
45
+ }
46
+
47
+ function buildSseResponse(frames: string[]): Response {
48
+ const encoder = new TextEncoder();
49
+ const stream = new ReadableStream<Uint8Array>({
50
+ start(controller) {
51
+ for (const frame of frames) controller.enqueue(encoder.encode(frame));
52
+ controller.close();
53
+ },
54
+ });
55
+ return new Response(stream, {
56
+ status: 200,
57
+ headers: { "Content-Type": "text/event-stream; charset=utf-8" },
58
+ });
59
+ }
60
+
61
+ function buildJsonResponse(status: number, body: unknown): Response {
62
+ return new Response(JSON.stringify(body), {
63
+ status,
64
+ headers: { "Content-Type": "application/json" },
65
+ });
66
+ }
67
+
68
+ const PLANNER_OUTPUT_SCHEMA = {
69
+ title: "planner-output",
70
+ type: "object" as const,
71
+ required: ["$status", "plan"],
72
+ properties: {
73
+ $status: { type: "string" as const, enum: ["done", "failed"] },
74
+ plan: { type: "string" as const },
75
+ },
76
+ additionalProperties: false,
77
+ };
78
+
79
+ const PLANNER_RAW_OUTPUT = `---
80
+ $status: done
81
+ plan: ship it
82
+ ---
83
+ the plan body`;
84
+
85
+ const HOST = "http://127.0.0.1:7900";
86
+ const GATEWAY = "planner-gw";
87
+ const ALIAS = "planner-agent";
88
+ const SESSION_ID = "ses_resume_e2e";
89
+ const THREAD_ID = "06FCBROKERRESUMESTEP0001" as ThreadId;
90
+ const ROLE = "planner";
91
+ const NATIVE_ID = "ses_native_abc";
92
+ const ELAPSED_MS = 1800000;
93
+ const WORKFLOW_NAME = "broker-resume-e2e";
94
+ const SUPPLEMENT = "继续上次未完成的任务";
95
+
96
+ function buildConfig(): WorkflowConfig {
97
+ return {
98
+ agents: { [ALIAS]: { host: HOST, gateway: GATEWAY } },
99
+ defaultAgent: ALIAS,
100
+ agentOverrides: null,
101
+ };
102
+ }
103
+
104
+ /**
105
+ * Write the on-disk `config.yaml` that `cmdThreadResume` reloads via
106
+ * `loadWorkflowConfig`. Must use the Phase-3 `{host, gateway}` shape (the
107
+ * normalizer rejects the legacy `{command}` form).
108
+ */
109
+ async function writeConfig(storageRoot: string): Promise<void> {
110
+ const yaml = `defaultAgent: ${ALIAS}\nagentOverrides: null\nagents:\n ${ALIAS}:\n host: ${HOST}\n gateway: ${GATEWAY}\n`;
111
+ await writeFile(getConfigPath(storageRoot), yaml, "utf8");
112
+ }
113
+
114
+ async function buildWorkflow(uwf: UwfStore): Promise<{
115
+ workflow: WorkflowPayload;
116
+ startHash: CasRef;
117
+ }> {
118
+ const frontmatterHash = (await putSchema(uwf.store, PLANNER_OUTPUT_SCHEMA)) as CasRef;
119
+ const workflow: WorkflowPayload = {
120
+ version: 1,
121
+ name: WORKFLOW_NAME,
122
+ description: "broker step resume end-to-end",
123
+ roles: {
124
+ planner: {
125
+ description: "plans things",
126
+ goal: "produce a plan",
127
+ capabilities: [],
128
+ procedure: "think hard",
129
+ output: "frontmatter+body",
130
+ frontmatter: frontmatterHash,
131
+ },
132
+ },
133
+ graph: {
134
+ planner: {
135
+ // Non-empty $END prompt: the resumed `done` stream routes through the
136
+ // post-step moderator, which rejects an empty edge template.
137
+ done: { role: "$END", prompt: "done", location: null },
138
+ },
139
+ },
140
+ };
141
+ const startHash = (await uwf.store.cas.put(uwf.schemas.startNode, {
142
+ workflow: await uwf.store.cas.put(uwf.schemas.workflow, workflow),
143
+ prompt: "p",
144
+ cwd: "/tmp/work",
145
+ })) as CasRef;
146
+ return { workflow, startHash };
147
+ }
148
+
149
+ function suspendStream(): Response {
150
+ return buildSseResponse([
151
+ sseFrame(1, "turn", {
152
+ type: "@sumeru/turn",
153
+ value: { index: 0, role: "user", content: "edge prompt", timestamp: "", toolCalls: null },
154
+ }),
155
+ sseFrame(2, "turn", {
156
+ type: "@sumeru/turn",
157
+ value: { index: 1, role: "assistant", content: "draft1", timestamp: "", toolCalls: null },
158
+ }),
159
+ sseFrame(3, "suspend", {
160
+ type: "@sumeru/suspend",
161
+ value: { reason: "timeout", nativeId: NATIVE_ID, elapsedMs: ELAPSED_MS },
162
+ }),
163
+ ]);
164
+ }
165
+
166
+ function completedStream(): Response {
167
+ return buildSseResponse([
168
+ sseFrame(1, "turn", {
169
+ type: "@sumeru/turn",
170
+ value: {
171
+ index: 1,
172
+ role: "assistant",
173
+ content: PLANNER_RAW_OUTPUT,
174
+ timestamp: "",
175
+ toolCalls: null,
176
+ },
177
+ }),
178
+ sseFrame(2, "done", {
179
+ type: "@sumeru/summary",
180
+ value: { turnCount: 2, tokens: { in: 9, out: 4 }, durationMs: 42 },
181
+ }),
182
+ ]);
183
+ }
184
+
185
+ function resolveFetchUrl(input: string | URL | Request): string {
186
+ if (typeof input === "string") return input;
187
+ if (input instanceof URL) return input.href;
188
+ return input.url;
189
+ }
190
+
191
+ function makePlog(tmpDir: string) {
192
+ return createProcessLogger({
193
+ storageRoot: tmpDir,
194
+ context: { thread: THREAD_ID, workflow: WORKFLOW_NAME },
195
+ });
196
+ }
197
+
198
+ describe("uwf thread resume — timeout-suspended thread resumes via fresh send (issue #435)", () => {
199
+ let tmpDir: string;
200
+ let savedOcasHome: string | undefined;
201
+ let calls: FetchCall[];
202
+ // First send (the step that suspends) → suspend stream; every send after the
203
+ // first (the resume) → completed stream. A counter, not a swap, so the resume
204
+ // genuinely re-enters the same stub.
205
+ let messageCallCount: number;
206
+
207
+ beforeEach(async () => {
208
+ savedOcasHome = process.env.OCAS_HOME;
209
+ tmpDir = await mkdtemp(join(tmpdir(), "broker-resume-e2e-"));
210
+ process.env.OCAS_HOME = join(tmpDir, "cas");
211
+ calls = [];
212
+ messageCallCount = 0;
213
+ vi.stubGlobal(
214
+ "fetch",
215
+ async (input: string | URL | Request, init: RequestInit | undefined): Promise<Response> => {
216
+ const url = resolveFetchUrl(input);
217
+ const method = init?.method ?? "GET";
218
+ const body = typeof init?.body === "string" ? init.body : "";
219
+ calls.push({ url, method, body });
220
+ if (url.endsWith(`/gateways/${GATEWAY}/sessions`)) {
221
+ return buildJsonResponse(201, {
222
+ type: "@sumeru/session",
223
+ value: { id: SESSION_ID, gateway: GATEWAY },
224
+ });
225
+ }
226
+ if (url.endsWith(`/sessions/${SESSION_ID}/messages`)) {
227
+ messageCallCount += 1;
228
+ return messageCallCount === 1 ? suspendStream() : completedStream();
229
+ }
230
+ return buildJsonResponse(500, { error: "unexpected url", url });
231
+ },
232
+ );
233
+ });
234
+
235
+ afterEach(async () => {
236
+ vi.unstubAllGlobals();
237
+ if (savedOcasHome === undefined) delete process.env.OCAS_HOME;
238
+ else process.env.OCAS_HOME = savedOcasHome;
239
+ await rm(tmpDir, { recursive: true, force: true });
240
+ });
241
+
242
+ /**
243
+ * Drive a real send-timeout through `executeBrokerStep`, then seed the thread
244
+ * to `suspended` at the produced `$SUSPEND` step (mirroring `finalizeAgentStep`
245
+ * after a suspended broker step). Returns the suspend step hash and reason.
246
+ */
247
+ async function suspendThread(
248
+ uwf: UwfStore,
249
+ workflow: WorkflowPayload,
250
+ startHash: CasRef,
251
+ ): Promise<{ suspendHash: CasRef; reason: string }> {
252
+ const result = await executeBrokerStep({
253
+ storageRoot: tmpDir,
254
+ uwf,
255
+ config: buildConfig(),
256
+ workflow,
257
+ threadId: THREAD_ID,
258
+ role: ROLE,
259
+ edgePrompt: "make a plan",
260
+ effectiveCwd: "/tmp/work",
261
+ startHash,
262
+ prevHash: null,
263
+ agentOverride: null,
264
+ previousAttempts: null,
265
+ plog: makePlog(tmpDir),
266
+ });
267
+
268
+ const reason = (result.frontmatter as Record<string, unknown>).reason as string;
269
+ await seedThreads(tmpDir, {
270
+ [THREAD_ID]: {
271
+ head: result.stepHash,
272
+ status: "suspended",
273
+ suspendedRole: ROLE,
274
+ suspendMessage: reason,
275
+ completedAt: null,
276
+ },
277
+ });
278
+ return { suspendHash: result.stepHash, reason };
279
+ }
280
+
281
+ test("a timeout-suspended thread is shown as suspended, then resume advances it past the gate", async () => {
282
+ const uwf = await createUwfStore(tmpDir);
283
+ const { workflow, startHash } = await buildWorkflow(uwf);
284
+ await writeConfig(tmpDir);
285
+
286
+ const { suspendHash, reason } = await suspendThread(uwf, workflow, startHash);
287
+
288
+ // Precondition: the thread sits in `suspended` carrying the timeout reason.
289
+ const show = await cmdThreadShow(tmpDir, THREAD_ID);
290
+ expect(show.status).toBe("suspended");
291
+ expect(show.suspendedRole).toBe(ROLE);
292
+ expect(show.suspendMessage).toBe(reason);
293
+ expect(show.suspendMessage as string).toContain(NATIVE_ID);
294
+
295
+ // Resume is accepted and the resumed send completes, so the thread leaves
296
+ // `suspended` and advances (here straight to `end` via the `done` edge).
297
+ const resumeOut = await cmdThreadResume(tmpDir, THREAD_ID, SUPPLEMENT, null);
298
+ expect(resumeOut.status).toBe("end");
299
+ expect(resumeOut.done).toBe(true);
300
+ expect(resumeOut.error).toBeNull();
301
+ expect(resumeOut.head).not.toBe(suspendHash);
302
+
303
+ // And `thread show` agrees the gate is gone.
304
+ const showAfter = await cmdThreadShow(tmpDir, THREAD_ID);
305
+ expect(showAfter.status).toBe("end");
306
+ expect(showAfter.suspendedRole).toBeNull();
307
+ expect(showAfter.suspendMessage).toBeNull();
308
+ });
309
+
310
+ test("resume issues a FRESH send reusing the SAME mapped session (no new createSession)", async () => {
311
+ const uwf = await createUwfStore(tmpDir);
312
+ const { workflow, startHash } = await buildWorkflow(uwf);
313
+ await writeConfig(tmpDir);
314
+
315
+ await suspendThread(uwf, workflow, startHash);
316
+ await cmdThreadResume(tmpDir, THREAD_ID, SUPPLEMENT, null);
317
+
318
+ // Exactly ONE createSession (during the suspend) — resume reuses the cached
319
+ // (threadId, role) → sessionId mapping rather than spawning a new session.
320
+ const createCalls = calls.filter((c) => c.url.endsWith(`/gateways/${GATEWAY}/sessions`));
321
+ expect(createCalls).toHaveLength(1);
322
+
323
+ // TWO sends, both addressed to the SAME session id: the suspended send and
324
+ // the resume continuation. The sumeru adapter resumes by nativeId off this
325
+ // shared session.
326
+ const messageCalls = calls.filter((c) => c.url.endsWith("/messages"));
327
+ expect(messageCalls).toHaveLength(2);
328
+ for (const call of messageCalls) {
329
+ expect(call.url).toContain(`/sessions/${SESSION_ID}/messages`);
330
+ }
331
+
332
+ // The broker session row still points at the same session for a future resume.
333
+ const sessionStore = openBrokerSessionStore(tmpDir);
334
+ try {
335
+ const row = sessionStore.getSession(THREAD_ID, ROLE);
336
+ expect(row?.sessionId).toBe(SESSION_ID);
337
+ expect(row?.host).toBe(HOST);
338
+ expect(row?.gateway).toBe(GATEWAY);
339
+ } finally {
340
+ sessionStore.close();
341
+ }
342
+ });
343
+
344
+ test("the -p supplement is delivered as the continuation prompt on the resume send", async () => {
345
+ const uwf = await createUwfStore(tmpDir);
346
+ const { workflow, startHash } = await buildWorkflow(uwf);
347
+ await writeConfig(tmpDir);
348
+
349
+ await suspendThread(uwf, workflow, startHash);
350
+ await cmdThreadResume(tmpDir, THREAD_ID, SUPPLEMENT, null);
351
+
352
+ const messageCalls = calls.filter((c) => c.url.endsWith("/messages"));
353
+ const resumeSend = messageCalls[1];
354
+ expect(resumeSend).toBeDefined();
355
+ // The resume prompt = suspend reason + the operator supplement; both ride
356
+ // the assembled prompt body of the continuation send.
357
+ expect(resumeSend?.body).toContain(SUPPLEMENT);
358
+ expect(resumeSend?.body).toContain(NATIVE_ID);
359
+ });
360
+ });
@@ -0,0 +1,124 @@
1
+ import { readdir, readFile, stat } from "node:fs/promises";
2
+ import { join } from "node:path";
3
+ import { describe, expect, test } from "vitest";
4
+
5
+ /**
6
+ * Static regression guard for log tags (#426).
7
+ *
8
+ * Every `log()` call site uses a hand-written 8-char Crockford Base32 tag.
9
+ * Crockford Base32 excludes I, L, O, U to avoid visual ambiguity, and
10
+ * `assertValidLogTag()` (util/process-logger/log-tag.ts) throws at runtime
11
+ * when a tag contains an illegal character.
12
+ *
13
+ * The bug: `PL_FRONTMATTER_FAIL = "F4FA1L7Z"` (a leet spelling of
14
+ * "FRONTMATTER FAIL") smuggled an `L` into the tag. It only fires on the
15
+ * frontmatter-extraction-failure path, so it stayed dormant until a planner
16
+ * step failed extraction — then the failure logger itself crashed the process,
17
+ * masking the real error.
18
+ *
19
+ * This test scans the source of the cli + broker packages and asserts that
20
+ * EVERY literal tag — whether written inline as `log("XXXXXXXX", ...)` or as a
21
+ * `const PL_* = "XXXXXXXX"` constant — is a valid Crockford Base32 tag. A new
22
+ * illegal tag, in any file, fails here at build time instead of at runtime.
23
+ */
24
+
25
+ // Crockford Base32 alphabet — no I, L, O, U (mirrors util/src/base32.ts).
26
+ const CROCKFORD_BASE32_ALPHABET = "0123456789ABCDEFGHJKMNPQRSTVWXYZ";
27
+ const TAG_CHAR_SET = new Set(CROCKFORD_BASE32_ALPHABET.split(""));
28
+ const TAG_LENGTH = 8;
29
+
30
+ function isValidLogTag(tag: string): boolean {
31
+ if (tag.length !== TAG_LENGTH) {
32
+ return false;
33
+ }
34
+ for (const ch of tag) {
35
+ if (!TAG_CHAR_SET.has(ch.toUpperCase())) {
36
+ return false;
37
+ }
38
+ }
39
+ return true;
40
+ }
41
+
42
+ // Roots scanned for log-tag literals, relative to this test file.
43
+ const SCAN_ROOTS = [
44
+ join(__dirname, ".."), // packages/cli/src
45
+ join(__dirname, "..", "..", "..", "broker", "src"), // packages/broker/src
46
+ ];
47
+
48
+ async function collectTsFiles(dir: string): Promise<string[]> {
49
+ const out: string[] = [];
50
+ let names: string[];
51
+ try {
52
+ names = await readdir(dir);
53
+ } catch {
54
+ return out;
55
+ }
56
+ for (const name of names) {
57
+ if (name === "node_modules" || name === "dist") {
58
+ continue;
59
+ }
60
+ const full = join(dir, name);
61
+ const info = await stat(full);
62
+ if (info.isDirectory()) {
63
+ out.push(...(await collectTsFiles(full)));
64
+ } else if (info.isFile() && name.endsWith(".ts") && !name.endsWith(".test.ts")) {
65
+ out.push(full);
66
+ }
67
+ }
68
+ return out;
69
+ }
70
+
71
+ type TagOccurrence = {
72
+ tag: string;
73
+ file: string;
74
+ context: string;
75
+ };
76
+
77
+ // Matches `log("XXXXXXXX"` call sites and `... = "XXXXXXXX"` tag constants.
78
+ // The capturing group grabs an 8-char alphanumeric literal; isValidLogTag then
79
+ // decides legality. We intentionally over-collect (any 8-char string assigned
80
+ // to a PL_/TAG const or passed as log()'s first arg) and validate each.
81
+ const LOG_CALL_RE = /\blog\(\s*"([0-9A-Za-z]{8})"/g;
82
+ const TAG_CONST_RE =
83
+ /\bconst\s+(?:PL_[A-Z0-9_]+|[A-Z0-9_]*TAG[A-Z0-9_]*)\s*=\s*"([0-9A-Za-z]{8})"/g;
84
+
85
+ async function collectTagOccurrences(): Promise<TagOccurrence[]> {
86
+ const occurrences: TagOccurrence[] = [];
87
+ for (const root of SCAN_ROOTS) {
88
+ const files = await collectTsFiles(root);
89
+ for (const file of files) {
90
+ const content = await readFile(file, "utf8");
91
+ for (const re of [LOG_CALL_RE, TAG_CONST_RE]) {
92
+ re.lastIndex = 0;
93
+ let m: RegExpExecArray | null = re.exec(content);
94
+ while (m !== null) {
95
+ occurrences.push({ tag: m[1], file, context: m[0] });
96
+ m = re.exec(content);
97
+ }
98
+ }
99
+ }
100
+ }
101
+ return occurrences;
102
+ }
103
+
104
+ describe("log tag validity (#426 regression guard)", () => {
105
+ test("collects at least the known PL_ tag constants", async () => {
106
+ const occurrences = await collectTagOccurrences();
107
+ // Sanity: the scan must actually find tags, otherwise the regex/paths broke
108
+ // and the guard below would pass vacuously.
109
+ expect(occurrences.length).toBeGreaterThanOrEqual(10);
110
+ });
111
+
112
+ test("every log tag literal in cli + broker is valid Crockford Base32", async () => {
113
+ const occurrences = await collectTagOccurrences();
114
+ const invalid = occurrences.filter((o) => !isValidLogTag(o.tag));
115
+ const report = invalid.map((o) => ` ${o.tag} (${o.context}) in ${o.file}`).join("\n");
116
+ expect(invalid, `Illegal Crockford Base32 log tags found:\n${report}`).toEqual([]);
117
+ });
118
+
119
+ test("the specific F4FA1L7Z bug (#426) stays fixed", async () => {
120
+ const occurrences = await collectTagOccurrences();
121
+ const offenders = occurrences.filter((o) => o.tag === "F4FA1L7Z");
122
+ expect(offenders).toEqual([]);
123
+ });
124
+ });
@@ -10,19 +10,19 @@ import { _agentNameFromBinary, _printAgentMenu, cmdSetup } from "../commands/set
10
10
 
11
11
  describe("_agentNameFromBinary", () => {
12
12
  test("strips uwf- prefix", () => {
13
- expect(_agentNameFromBinary("uwf-hermes")).toBe("hermes");
13
+ expect(_agentNameFromBinary("uwf-builtin")).toBe("builtin");
14
14
  });
15
15
 
16
16
  test("strips uwf- prefix for compound names", () => {
17
- expect(_agentNameFromBinary("uwf-claude-code")).toBe("claude-code");
17
+ expect(_agentNameFromBinary("uwf-some-gateway")).toBe("some-gateway");
18
18
  });
19
19
 
20
20
  test("returns as-is when no uwf- prefix", () => {
21
- expect(_agentNameFromBinary("hermes")).toBe("hermes");
21
+ expect(_agentNameFromBinary("builtin")).toBe("builtin");
22
22
  });
23
23
 
24
- test("handles uwf-builtin", () => {
25
- expect(_agentNameFromBinary("uwf-builtin")).toBe("builtin");
24
+ test("handles uwf-mock", () => {
25
+ expect(_agentNameFromBinary("uwf-mock")).toBe("mock");
26
26
  });
27
27
  });
28
28
 
@@ -35,10 +35,10 @@ describe("_printAgentMenu", () => {
35
35
  logs.push(args.join(" "));
36
36
  });
37
37
 
38
- _printAgentMenu(["uwf-hermes", "uwf-claude-code"]);
38
+ _printAgentMenu(["uwf-builtin", "uwf-mock"]);
39
39
 
40
- expect(logs.some((l) => l.includes("Hermes"))).toBe(true);
41
- expect(logs.some((l) => l.includes("Claude Code"))).toBe(true);
40
+ expect(logs.some((l) => l.includes("Built-in"))).toBe(true);
41
+ expect(logs.some((l) => l.includes("Mock"))).toBe(true);
42
42
 
43
43
  vi.restoreAllMocks();
44
44
  });
@@ -84,19 +84,19 @@ describe("cmdSetup agent configuration (engine config is LLM-free, issue #143)",
84
84
  });
85
85
 
86
86
  test("preserves existing agents when adding new one", async () => {
87
- await cmdSetup({ agent: "hermes", storageRoot });
87
+ await cmdSetup({ agent: "builtin", storageRoot });
88
88
  await cmdSetup({ agent: "claude-code", storageRoot });
89
89
 
90
90
  const config = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
91
- expect(config.agents.hermes).toBeDefined();
91
+ expect(config.agents.builtin).toBeDefined();
92
92
  expect(config.agents["claude-code"]).toBeDefined();
93
93
  expect(config.defaultAgent).toBe("claude-code");
94
94
  });
95
95
 
96
96
  test("updates defaultAgent on re-run with different agent", async () => {
97
- await cmdSetup({ agent: "hermes", storageRoot });
97
+ await cmdSetup({ agent: "mock", storageRoot });
98
98
  const config1 = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
99
- expect(config1.defaultAgent).toBe("hermes");
99
+ expect(config1.defaultAgent).toBe("mock");
100
100
 
101
101
  await cmdSetup({ agent: "builtin", storageRoot });
102
102
  const config2 = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
@@ -104,17 +104,17 @@ describe("cmdSetup agent configuration (engine config is LLM-free, issue #143)",
104
104
  });
105
105
 
106
106
  test("normalizes agent name with uwf- prefix to bare name", async () => {
107
- const result = await cmdSetup({ agent: "uwf-hermes", storageRoot });
107
+ const result = await cmdSetup({ agent: "uwf-builtin", storageRoot });
108
108
 
109
- expect(result.defaultAgent).toBe("hermes");
109
+ expect(result.defaultAgent).toBe("builtin");
110
110
  const config = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
111
- expect(config.agents.hermes).toEqual({
111
+ expect(config.agents.builtin).toEqual({
112
112
  host: "http://127.0.0.1:7900",
113
- gateway: "hermes",
113
+ gateway: "builtin",
114
114
  });
115
- expect(config.defaultAgent).toBe("hermes");
115
+ expect(config.defaultAgent).toBe("builtin");
116
116
  // Verify no duplicate uwf- prefix
117
- expect(config.agents["uwf-hermes"]).toBeUndefined();
117
+ expect(config.agents["uwf-builtin"]).toBeUndefined();
118
118
  });
119
119
 
120
120
  test("normalizes uwf-claude-code to claude-code", async () => {
@@ -137,18 +137,18 @@ describe("cmdSetup agent configuration (engine config is LLM-free, issue #143)",
137
137
  mkdirSync(storageRoot, { recursive: true });
138
138
  writeFileSync(
139
139
  join(storageRoot, "config.yaml"),
140
- "providers:\n openai: { baseUrl: x, apiKey: y }\nmodels:\n default: { provider: openai, name: gpt-4o }\ndefaultModel: default\nagents:\n hermes: { host: 'http://127.0.0.1:7900', gateway: hermes }\ndefaultAgent: hermes\n",
140
+ "providers:\n openai: { baseUrl: x, apiKey: y }\nmodels:\n default: { provider: openai, name: gpt-4o }\ndefaultModel: default\nagents:\n builtin: { host: 'http://127.0.0.1:7900', gateway: builtin }\ndefaultAgent: builtin\n",
141
141
  "utf8",
142
142
  );
143
- await cmdSetup({ agent: "hermes", storageRoot });
143
+ await cmdSetup({ agent: "builtin", storageRoot });
144
144
  const config = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
145
145
  expect(config.providers).toBeUndefined();
146
146
  expect(config.models).toBeUndefined();
147
147
  expect(config.defaultModel).toBeUndefined();
148
- expect(config.agents.hermes).toEqual({
148
+ expect(config.agents.builtin).toEqual({
149
149
  host: "http://127.0.0.1:7900",
150
- gateway: "hermes",
150
+ gateway: "builtin",
151
151
  });
152
- expect(config.defaultAgent).toBe("hermes");
152
+ expect(config.defaultAgent).toBe("builtin");
153
153
  });
154
154
  });
@@ -169,7 +169,7 @@ describe("cmdStepShow JSON serialization", () => {
169
169
  expect(jsonOutput).toContain("\\n");
170
170
 
171
171
  const parsed = JSON.parse(jsonOutput);
172
- expect(parsed.turns[0].toolCalls[0].args).toContain("\n");
172
+ expect(parsed.detail.turns[0].toolCalls[0].args).toContain("\n");
173
173
  });
174
174
 
175
175
  test("escapes tabs in tool call args", async () => {
@@ -239,7 +239,7 @@ describe("cmdStepShow JSON serialization", () => {
239
239
 
240
240
  expect(() => JSON.parse(jsonOutput)).not.toThrow();
241
241
  const parsed = JSON.parse(jsonOutput);
242
- expect(parsed.turns).toBeDefined();
242
+ expect(parsed.detail.turns).toBeDefined();
243
243
  });
244
244
 
245
245
  test("handles Unicode control characters", async () => {
@@ -291,7 +291,7 @@ describe("cmdStepShow JSON serialization", () => {
291
291
 
292
292
  expect(() => JSON.parse(jsonOutput)).not.toThrow();
293
293
  const parsed = JSON.parse(jsonOutput);
294
- expect(parsed.turns).toHaveLength(2);
294
+ expect(parsed.detail.turns).toHaveLength(2);
295
295
  });
296
296
 
297
297
  test("YAML output format is unaffected", async () => {
@@ -333,7 +333,7 @@ describe("cmdStepShow JSON serialization", () => {
333
333
 
334
334
  expect(() => JSON.parse(jsonOutput)).not.toThrow();
335
335
  const parsed = JSON.parse(jsonOutput);
336
- expect(parsed.turns).toBeDefined();
336
+ expect(parsed.detail.turns).toBeDefined();
337
337
  });
338
338
 
339
339
  test("handles large step with multiple tool calls", async () => {
@@ -369,6 +369,6 @@ describe("cmdStepShow JSON serialization", () => {
369
369
  expect(() => JSON.parse(jsonOutput)).not.toThrow();
370
370
 
371
371
  const parsed = JSON.parse(jsonOutput);
372
- expect(parsed.turns).toHaveLength(25);
372
+ expect(parsed.detail.turns).toHaveLength(25);
373
373
  });
374
374
  });