npm - @united-workforce/cli - Versions diffs - 0.7.0 → 0.8.1 - Mend

@united-workforce/cli 0.7.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

package/README.md +32 -5
package/dist/.build-fingerprint +1 -0
package/dist/__tests__/broker-step-active-turns.test.d.ts +20 -0
package/dist/__tests__/broker-step-active-turns.test.d.ts.map +1 -0
package/dist/__tests__/broker-step-active-turns.test.js +428 -0
package/dist/__tests__/broker-step-active-turns.test.js.map +1 -0
package/dist/__tests__/broker-step-turn-chain-phase2.test.d.ts +13 -0
package/dist/__tests__/broker-step-turn-chain-phase2.test.d.ts.map +1 -0
package/dist/__tests__/broker-step-turn-chain-phase2.test.js +429 -0
package/dist/__tests__/broker-step-turn-chain-phase2.test.js.map +1 -0
package/dist/__tests__/e2e-broker-step-suspend.test.d.ts +18 -0
package/dist/__tests__/e2e-broker-step-suspend.test.d.ts.map +1 -0
package/dist/__tests__/e2e-broker-step-suspend.test.js +313 -0
package/dist/__tests__/e2e-broker-step-suspend.test.js.map +1 -0
package/dist/__tests__/e2e-thread-resume-timeout-suspend.test.d.ts +28 -0
package/dist/__tests__/e2e-thread-resume-timeout-suspend.test.d.ts.map +1 -0
package/dist/__tests__/e2e-thread-resume-timeout-suspend.test.js +322 -0
package/dist/__tests__/e2e-thread-resume-timeout-suspend.test.js.map +1 -0
package/dist/__tests__/log-tag-validity.test.d.ts +2 -0
package/dist/__tests__/log-tag-validity.test.d.ts.map +1 -0
package/dist/__tests__/log-tag-validity.test.js +110 -0
package/dist/__tests__/log-tag-validity.test.js.map +1 -0
package/dist/__tests__/setup-agent-discovery.test.js +23 -23
package/dist/__tests__/setup-agent-discovery.test.js.map +1 -1
package/dist/__tests__/step-show-json.test.js +5 -5
package/dist/__tests__/step-show-json.test.js.map +1 -1
package/dist/__tests__/step-show-text.test.d.ts +2 -0
package/dist/__tests__/step-show-text.test.d.ts.map +1 -0
package/dist/__tests__/step-show-text.test.js +192 -0
package/dist/__tests__/step-show-text.test.js.map +1 -0
package/dist/__tests__/step-turns-cli-subprocess.test.d.ts +21 -0
package/dist/__tests__/step-turns-cli-subprocess.test.d.ts.map +1 -0
package/dist/__tests__/step-turns-cli-subprocess.test.js +356 -0
package/dist/__tests__/step-turns-cli-subprocess.test.js.map +1 -0
package/dist/__tests__/step-turns-panorama-phase3.test.d.ts +21 -0
package/dist/__tests__/step-turns-panorama-phase3.test.d.ts.map +1 -0
package/dist/__tests__/step-turns-panorama-phase3.test.js +476 -0
package/dist/__tests__/step-turns-panorama-phase3.test.js.map +1 -0
package/dist/__tests__/step-turns.test.d.ts +24 -0
package/dist/__tests__/step-turns.test.d.ts.map +1 -0
package/dist/__tests__/step-turns.test.js +646 -0
package/dist/__tests__/step-turns.test.js.map +1 -0
package/dist/__tests__/store-turn-chain.test.d.ts +2 -0
package/dist/__tests__/store-turn-chain.test.d.ts.map +1 -0
package/dist/__tests__/store-turn-chain.test.js +341 -0
package/dist/__tests__/store-turn-chain.test.js.map +1 -0
package/dist/__tests__/thread-list-limit-offset.test.d.ts +24 -0
package/dist/__tests__/thread-list-limit-offset.test.d.ts.map +1 -0
package/dist/__tests__/thread-list-limit-offset.test.js +254 -0
package/dist/__tests__/thread-list-limit-offset.test.js.map +1 -0
package/dist/__tests__/thread-list-template-ms-date.test.js +7 -2
package/dist/__tests__/thread-list-template-ms-date.test.js.map +1 -1
package/dist/__tests__/thread.test.js +28 -14
package/dist/__tests__/thread.test.js.map +1 -1
package/dist/cli.js +910 -344
package/dist/cli.js.map +1 -1
package/dist/commands/broker-step.d.ts +10 -3
package/dist/commands/broker-step.d.ts.map +1 -1
package/dist/commands/broker-step.js +231 -27
package/dist/commands/broker-step.js.map +1 -1
package/dist/commands/prompt.d.ts.map +1 -1
package/dist/commands/prompt.js +42 -50
package/dist/commands/prompt.js.map +1 -1
package/dist/commands/setup.d.ts +6 -4
package/dist/commands/setup.d.ts.map +1 -1
package/dist/commands/setup.js +16 -26
package/dist/commands/setup.js.map +1 -1
package/dist/commands/step.d.ts +48 -1
package/dist/commands/step.d.ts.map +1 -1
package/dist/commands/step.js +496 -3
package/dist/commands/step.js.map +1 -1
package/dist/output-mappers.d.ts +8 -0
package/dist/output-mappers.d.ts.map +1 -1
package/dist/output-mappers.js +72 -18
package/dist/output-mappers.js.map +1 -1
package/dist/schemas.d.ts +3 -0
package/dist/schemas.d.ts.map +1 -1
package/dist/schemas.js +17 -3
package/dist/schemas.js.map +1 -1
package/dist/store.d.ts +147 -1
package/dist/store.d.ts.map +1 -1
package/dist/store.js +254 -1
package/dist/store.js.map +1 -1
package/dist/text-renderers.d.ts.map +1 -1
package/dist/text-renderers.js +27 -2
package/dist/text-renderers.js.map +1 -1
package/package.json +7 -6
package/src/__tests__/broker-step-active-turns.test.ts +509 -0
package/src/__tests__/broker-step-turn-chain-phase2.test.ts +525 -0
package/src/__tests__/e2e-broker-step-suspend.test.ts +351 -0
package/src/__tests__/e2e-thread-resume-timeout-suspend.test.ts +360 -0
package/src/__tests__/log-tag-validity.test.ts +124 -0
package/src/__tests__/setup-agent-discovery.test.ts +23 -23
package/src/__tests__/step-show-json.test.ts +5 -5
package/src/__tests__/step-show-text.test.ts +236 -0
package/src/__tests__/step-turns-cli-subprocess.test.ts +411 -0
package/src/__tests__/step-turns-panorama-phase3.test.ts +579 -0
package/src/__tests__/step-turns.test.ts +734 -0
package/src/__tests__/store-turn-chain.test.ts +386 -0
package/src/__tests__/thread-list-limit-offset.test.ts +305 -0
package/src/__tests__/thread-list-template-ms-date.test.ts +7 -2
package/src/__tests__/thread.test.ts +29 -15
package/src/cli.ts +1056 -483
package/src/commands/broker-step.ts +315 -38
package/src/commands/prompt.ts +42 -50
package/src/commands/setup.ts +16 -28
package/src/commands/step.ts +655 -3
package/src/output-mappers.ts +99 -21
package/src/schemas.ts +32 -2
package/src/store.ts +297 -2
package/src/text-renderers.ts +35 -2

package/src/__tests__/e2e-thread-resume-timeout-suspend.test.ts ADDED Viewed

@@ -0,0 +1,360 @@
+/**
+ * Spec 4 (issue #435, Phase 2) — verification contract for the RFC #95 loop
+ * `timeout → suspend (checkpoint) → resume`.
+ *
+ * This is verification-only: NO resume code changed in Phase 2. The test proves
+ * the *existing* `uwf thread resume` path already satisfies the timeout-suspend
+ * resume contract by wiring the spec-3 producer to the resume consumer:
+ *
+ *   1. Drive a real sumeru send-timeout through `executeBrokerStep` (the SSE
+ *      stream ends in `suspend`, exactly as Spec 3 verifies) so the thread's
+ *      head step is a genuine `$status: "$SUSPEND"` node and the `(threadId,
+ *      role)` broker session is mapped to the sumeru session.
+ *   2. Seed the thread to `suspended` (mirroring what `finalizeAgentStep` does
+ *      after a suspended broker step) and assert `cmdThreadShow` reports
+ *      `suspended` with the timeout reason — a valid resume precondition.
+ *   3. Call `cmdThreadResume`. Assert it is accepted, issues a FRESH
+ *      `broker.send()` for the suspended role on the SAME mapped session (so the
+ *      sumeru adapter resumes by `nativeId` rather than starting over), delivers
+ *      the `-p` supplement as the continuation prompt, and — when that resumed
+ *      send now completes (`kind:"completed"`) — advances the thread out of
+ *      `suspended` (here straight to `end`).
+ *
+ * The second send is a `done` stream, so the gate opens and the thread proceeds;
+ * if it had timed out again it would simply re-arm `suspended` (Spec 3 path),
+ * never an error.
+ */
+import { mkdtemp, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { putSchema } from "@ocas/core";
+import type { CasRef, ThreadId, WorkflowConfig, WorkflowPayload } from "@united-workforce/protocol";
+import { createProcessLogger } from "@united-workforce/util";
+import { getConfigPath } from "@united-workforce/util-agent";
+import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
+import { executeBrokerStep, openBrokerSessionStore } from "../commands/broker-step.js";
+import { cmdThreadResume, cmdThreadShow } from "../commands/thread.js";
+import { createUwfStore, type UwfStore } from "../store.js";
+import { seedThreads } from "./thread-test-helpers.js";
+type FetchCall = { url: string; method: string; body: string };
+function sseFrame(id: number, event: string, data: unknown): string {
+  return `id: ${id}\nevent: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
+}
+function buildSseResponse(frames: string[]): Response {
+  const encoder = new TextEncoder();
+  const stream = new ReadableStream<Uint8Array>({
+    start(controller) {
+      for (const frame of frames) controller.enqueue(encoder.encode(frame));
+      controller.close();
+    },
+  });
+  return new Response(stream, {
+    status: 200,
+    headers: { "Content-Type": "text/event-stream; charset=utf-8" },
+  });
+}
+function buildJsonResponse(status: number, body: unknown): Response {
+  return new Response(JSON.stringify(body), {
+    status,
+    headers: { "Content-Type": "application/json" },
+  });
+}
+const PLANNER_OUTPUT_SCHEMA = {
+  title: "planner-output",
+  type: "object" as const,
+  required: ["$status", "plan"],
+  properties: {
+    $status: { type: "string" as const, enum: ["done", "failed"] },
+    plan: { type: "string" as const },
+  },
+  additionalProperties: false,
+};
+const PLANNER_RAW_OUTPUT = `---
+$status: done
+plan: ship it
+---
+the plan body`;
+const HOST = "http://127.0.0.1:7900";
+const GATEWAY = "planner-gw";
+const ALIAS = "planner-agent";
+const SESSION_ID = "ses_resume_e2e";
+const THREAD_ID = "06FCBROKERRESUMESTEP0001" as ThreadId;
+const ROLE = "planner";
+const NATIVE_ID = "ses_native_abc";
+const ELAPSED_MS = 1800000;
+const WORKFLOW_NAME = "broker-resume-e2e";
+const SUPPLEMENT = "继续上次未完成的任务";
+function buildConfig(): WorkflowConfig {
+  return {
+    agents: { [ALIAS]: { host: HOST, gateway: GATEWAY } },
+    defaultAgent: ALIAS,
+    agentOverrides: null,
+  };
+}
+/**
+ * Write the on-disk `config.yaml` that `cmdThreadResume` reloads via
+ * `loadWorkflowConfig`. Must use the Phase-3 `{host, gateway}` shape (the
+ * normalizer rejects the legacy `{command}` form).
+ */
+async function writeConfig(storageRoot: string): Promise<void> {
+  const yaml = `defaultAgent: ${ALIAS}\nagentOverrides: null\nagents:\n  ${ALIAS}:\n    host: ${HOST}\n    gateway: ${GATEWAY}\n`;
+  await writeFile(getConfigPath(storageRoot), yaml, "utf8");
+}
+async function buildWorkflow(uwf: UwfStore): Promise<{
+  workflow: WorkflowPayload;
+  startHash: CasRef;
+}> {
+  const frontmatterHash = (await putSchema(uwf.store, PLANNER_OUTPUT_SCHEMA)) as CasRef;
+  const workflow: WorkflowPayload = {
+    version: 1,
+    name: WORKFLOW_NAME,
+    description: "broker step resume end-to-end",
+    roles: {
+      planner: {
+        description: "plans things",
+        goal: "produce a plan",
+        capabilities: [],
+        procedure: "think hard",
+        output: "frontmatter+body",
+        frontmatter: frontmatterHash,
+      },
+    },
+    graph: {
+      planner: {
+        // Non-empty $END prompt: the resumed `done` stream routes through the
+        // post-step moderator, which rejects an empty edge template.
+        done: { role: "$END", prompt: "done", location: null },
+      },
+    },
+  };
+  const startHash = (await uwf.store.cas.put(uwf.schemas.startNode, {
+    workflow: await uwf.store.cas.put(uwf.schemas.workflow, workflow),
+    prompt: "p",
+    cwd: "/tmp/work",
+  })) as CasRef;
+  return { workflow, startHash };
+}
+function suspendStream(): Response {
+  return buildSseResponse([
+    sseFrame(1, "turn", {
+      type: "@sumeru/turn",
+      value: { index: 0, role: "user", content: "edge prompt", timestamp: "", toolCalls: null },
+    }),
+    sseFrame(2, "turn", {
+      type: "@sumeru/turn",
+      value: { index: 1, role: "assistant", content: "draft1", timestamp: "", toolCalls: null },
+    }),
+    sseFrame(3, "suspend", {
+      type: "@sumeru/suspend",
+      value: { reason: "timeout", nativeId: NATIVE_ID, elapsedMs: ELAPSED_MS },
+    }),
+  ]);
+}
+function completedStream(): Response {
+  return buildSseResponse([
+    sseFrame(1, "turn", {
+      type: "@sumeru/turn",
+      value: {
+        index: 1,
+        role: "assistant",
+        content: PLANNER_RAW_OUTPUT,
+        timestamp: "",
+        toolCalls: null,
+      },
+    }),
+    sseFrame(2, "done", {
+      type: "@sumeru/summary",
+      value: { turnCount: 2, tokens: { in: 9, out: 4 }, durationMs: 42 },
+    }),
+  ]);
+}
+function resolveFetchUrl(input: string | URL | Request): string {
+  if (typeof input === "string") return input;
+  if (input instanceof URL) return input.href;
+  return input.url;
+}
+function makePlog(tmpDir: string) {
+  return createProcessLogger({
+    storageRoot: tmpDir,
+    context: { thread: THREAD_ID, workflow: WORKFLOW_NAME },
+  });
+}
+describe("uwf thread resume — timeout-suspended thread resumes via fresh send (issue #435)", () => {
+  let tmpDir: string;
+  let savedOcasHome: string | undefined;
+  let calls: FetchCall[];
+  // First send (the step that suspends) → suspend stream; every send after the
+  // first (the resume) → completed stream. A counter, not a swap, so the resume
+  // genuinely re-enters the same stub.
+  let messageCallCount: number;
+  beforeEach(async () => {
+    savedOcasHome = process.env.OCAS_HOME;
+    tmpDir = await mkdtemp(join(tmpdir(), "broker-resume-e2e-"));
+    process.env.OCAS_HOME = join(tmpDir, "cas");
+    calls = [];
+    messageCallCount = 0;
+    vi.stubGlobal(
+      "fetch",
+      async (input: string | URL | Request, init: RequestInit | undefined): Promise<Response> => {
+        const url = resolveFetchUrl(input);
+        const method = init?.method ?? "GET";
+        const body = typeof init?.body === "string" ? init.body : "";
+        calls.push({ url, method, body });
+        if (url.endsWith(`/gateways/${GATEWAY}/sessions`)) {
+          return buildJsonResponse(201, {
+            type: "@sumeru/session",
+            value: { id: SESSION_ID, gateway: GATEWAY },
+          });
+        }
+        if (url.endsWith(`/sessions/${SESSION_ID}/messages`)) {
+          messageCallCount += 1;
+          return messageCallCount === 1 ? suspendStream() : completedStream();
+        }
+        return buildJsonResponse(500, { error: "unexpected url", url });
+      },
+    );
+  });
+  afterEach(async () => {
+    vi.unstubAllGlobals();
+    if (savedOcasHome === undefined) delete process.env.OCAS_HOME;
+    else process.env.OCAS_HOME = savedOcasHome;
+    await rm(tmpDir, { recursive: true, force: true });
+  });
+  /**
+   * Drive a real send-timeout through `executeBrokerStep`, then seed the thread
+   * to `suspended` at the produced `$SUSPEND` step (mirroring `finalizeAgentStep`
+   * after a suspended broker step). Returns the suspend step hash and reason.
+   */
+  async function suspendThread(
+    uwf: UwfStore,
+    workflow: WorkflowPayload,
+    startHash: CasRef,
+  ): Promise<{ suspendHash: CasRef; reason: string }> {
+    const result = await executeBrokerStep({
+      storageRoot: tmpDir,
+      uwf,
+      config: buildConfig(),
+      workflow,
+      threadId: THREAD_ID,
+      role: ROLE,
+      edgePrompt: "make a plan",
+      effectiveCwd: "/tmp/work",
+      startHash,
+      prevHash: null,
+      agentOverride: null,
+      previousAttempts: null,
+      plog: makePlog(tmpDir),
+    });
+    const reason = (result.frontmatter as Record<string, unknown>).reason as string;
+    await seedThreads(tmpDir, {
+      [THREAD_ID]: {
+        head: result.stepHash,
+        status: "suspended",
+        suspendedRole: ROLE,
+        suspendMessage: reason,
+        completedAt: null,
+      },
+    });
+    return { suspendHash: result.stepHash, reason };
+  }
+  test("a timeout-suspended thread is shown as suspended, then resume advances it past the gate", async () => {
+    const uwf = await createUwfStore(tmpDir);
+    const { workflow, startHash } = await buildWorkflow(uwf);
+    await writeConfig(tmpDir);
+    const { suspendHash, reason } = await suspendThread(uwf, workflow, startHash);
+    // Precondition: the thread sits in `suspended` carrying the timeout reason.
+    const show = await cmdThreadShow(tmpDir, THREAD_ID);
+    expect(show.status).toBe("suspended");
+    expect(show.suspendedRole).toBe(ROLE);
+    expect(show.suspendMessage).toBe(reason);
+    expect(show.suspendMessage as string).toContain(NATIVE_ID);
+    // Resume is accepted and the resumed send completes, so the thread leaves
+    // `suspended` and advances (here straight to `end` via the `done` edge).
+    const resumeOut = await cmdThreadResume(tmpDir, THREAD_ID, SUPPLEMENT, null);
+    expect(resumeOut.status).toBe("end");
+    expect(resumeOut.done).toBe(true);
+    expect(resumeOut.error).toBeNull();
+    expect(resumeOut.head).not.toBe(suspendHash);
+    // And `thread show` agrees the gate is gone.
+    const showAfter = await cmdThreadShow(tmpDir, THREAD_ID);
+    expect(showAfter.status).toBe("end");
+    expect(showAfter.suspendedRole).toBeNull();
+    expect(showAfter.suspendMessage).toBeNull();
+  });
+  test("resume issues a FRESH send reusing the SAME mapped session (no new createSession)", async () => {
+    const uwf = await createUwfStore(tmpDir);
+    const { workflow, startHash } = await buildWorkflow(uwf);
+    await writeConfig(tmpDir);
+    await suspendThread(uwf, workflow, startHash);
+    await cmdThreadResume(tmpDir, THREAD_ID, SUPPLEMENT, null);
+    // Exactly ONE createSession (during the suspend) — resume reuses the cached
+    // (threadId, role) → sessionId mapping rather than spawning a new session.
+    const createCalls = calls.filter((c) => c.url.endsWith(`/gateways/${GATEWAY}/sessions`));
+    expect(createCalls).toHaveLength(1);
+    // TWO sends, both addressed to the SAME session id: the suspended send and
+    // the resume continuation. The sumeru adapter resumes by nativeId off this
+    // shared session.
+    const messageCalls = calls.filter((c) => c.url.endsWith("/messages"));
+    expect(messageCalls).toHaveLength(2);
+    for (const call of messageCalls) {
+      expect(call.url).toContain(`/sessions/${SESSION_ID}/messages`);
+    }
+    // The broker session row still points at the same session for a future resume.
+    const sessionStore = openBrokerSessionStore(tmpDir);
+    try {
+      const row = sessionStore.getSession(THREAD_ID, ROLE);
+      expect(row?.sessionId).toBe(SESSION_ID);
+      expect(row?.host).toBe(HOST);
+      expect(row?.gateway).toBe(GATEWAY);
+    } finally {
+      sessionStore.close();
+    }
+  });
+  test("the -p supplement is delivered as the continuation prompt on the resume send", async () => {
+    const uwf = await createUwfStore(tmpDir);
+    const { workflow, startHash } = await buildWorkflow(uwf);
+    await writeConfig(tmpDir);
+    await suspendThread(uwf, workflow, startHash);
+    await cmdThreadResume(tmpDir, THREAD_ID, SUPPLEMENT, null);
+    const messageCalls = calls.filter((c) => c.url.endsWith("/messages"));
+    const resumeSend = messageCalls[1];
+    expect(resumeSend).toBeDefined();
+    // The resume prompt = suspend reason + the operator supplement; both ride
+    // the assembled prompt body of the continuation send.
+    expect(resumeSend?.body).toContain(SUPPLEMENT);
+    expect(resumeSend?.body).toContain(NATIVE_ID);
+  });
+});

package/src/__tests__/log-tag-validity.test.ts ADDED Viewed

@@ -0,0 +1,124 @@
+import { readdir, readFile, stat } from "node:fs/promises";
+import { join } from "node:path";
+import { describe, expect, test } from "vitest";
+/**
+ * Static regression guard for log tags (#426).
+ *
+ * Every `log()` call site uses a hand-written 8-char Crockford Base32 tag.
+ * Crockford Base32 excludes I, L, O, U to avoid visual ambiguity, and
+ * `assertValidLogTag()` (util/process-logger/log-tag.ts) throws at runtime
+ * when a tag contains an illegal character.
+ *
+ * The bug: `PL_FRONTMATTER_FAIL = "F4FA1L7Z"` (a leet spelling of
+ * "FRONTMATTER FAIL") smuggled an `L` into the tag. It only fires on the
+ * frontmatter-extraction-failure path, so it stayed dormant until a planner
+ * step failed extraction — then the failure logger itself crashed the process,
+ * masking the real error.
+ *
+ * This test scans the source of the cli + broker packages and asserts that
+ * EVERY literal tag — whether written inline as `log("XXXXXXXX", ...)` or as a
+ * `const PL_* = "XXXXXXXX"` constant — is a valid Crockford Base32 tag. A new
+ * illegal tag, in any file, fails here at build time instead of at runtime.
+ */
+// Crockford Base32 alphabet — no I, L, O, U (mirrors util/src/base32.ts).
+const CROCKFORD_BASE32_ALPHABET = "0123456789ABCDEFGHJKMNPQRSTVWXYZ";
+const TAG_CHAR_SET = new Set(CROCKFORD_BASE32_ALPHABET.split(""));
+const TAG_LENGTH = 8;
+function isValidLogTag(tag: string): boolean {
+  if (tag.length !== TAG_LENGTH) {
+    return false;
+  }
+  for (const ch of tag) {
+    if (!TAG_CHAR_SET.has(ch.toUpperCase())) {
+      return false;
+    }
+  }
+  return true;
+}
+// Roots scanned for log-tag literals, relative to this test file.
+const SCAN_ROOTS = [
+  join(__dirname, ".."), // packages/cli/src
+  join(__dirname, "..", "..", "..", "broker", "src"), // packages/broker/src
+];
+async function collectTsFiles(dir: string): Promise<string[]> {
+  const out: string[] = [];
+  let names: string[];
+  try {
+    names = await readdir(dir);
+  } catch {
+    return out;
+  }
+  for (const name of names) {
+    if (name === "node_modules" || name === "dist") {
+      continue;
+    }
+    const full = join(dir, name);
+    const info = await stat(full);
+    if (info.isDirectory()) {
+      out.push(...(await collectTsFiles(full)));
+    } else if (info.isFile() && name.endsWith(".ts") && !name.endsWith(".test.ts")) {
+      out.push(full);
+    }
+  }
+  return out;
+}
+type TagOccurrence = {
+  tag: string;
+  file: string;
+  context: string;
+};
+// Matches `log("XXXXXXXX"` call sites and `... = "XXXXXXXX"` tag constants.
+// The capturing group grabs an 8-char alphanumeric literal; isValidLogTag then
+// decides legality. We intentionally over-collect (any 8-char string assigned
+// to a PL_/TAG const or passed as log()'s first arg) and validate each.
+const LOG_CALL_RE = /\blog\(\s*"([0-9A-Za-z]{8})"/g;
+const TAG_CONST_RE =
+  /\bconst\s+(?:PL_[A-Z0-9_]+|[A-Z0-9_]*TAG[A-Z0-9_]*)\s*=\s*"([0-9A-Za-z]{8})"/g;
+async function collectTagOccurrences(): Promise<TagOccurrence[]> {
+  const occurrences: TagOccurrence[] = [];
+  for (const root of SCAN_ROOTS) {
+    const files = await collectTsFiles(root);
+    for (const file of files) {
+      const content = await readFile(file, "utf8");
+      for (const re of [LOG_CALL_RE, TAG_CONST_RE]) {
+        re.lastIndex = 0;
+        let m: RegExpExecArray | null = re.exec(content);
+        while (m !== null) {
+          occurrences.push({ tag: m[1], file, context: m[0] });
+          m = re.exec(content);
+        }
+      }
+    }
+  }
+  return occurrences;
+}
+describe("log tag validity (#426 regression guard)", () => {
+  test("collects at least the known PL_ tag constants", async () => {
+    const occurrences = await collectTagOccurrences();
+    // Sanity: the scan must actually find tags, otherwise the regex/paths broke
+    // and the guard below would pass vacuously.
+    expect(occurrences.length).toBeGreaterThanOrEqual(10);
+  });
+  test("every log tag literal in cli + broker is valid Crockford Base32", async () => {
+    const occurrences = await collectTagOccurrences();
+    const invalid = occurrences.filter((o) => !isValidLogTag(o.tag));
+    const report = invalid.map((o) => `  ${o.tag}  (${o.context})  in ${o.file}`).join("\n");
+    expect(invalid, `Illegal Crockford Base32 log tags found:\n${report}`).toEqual([]);
+  });
+  test("the specific F4FA1L7Z bug (#426) stays fixed", async () => {
+    const occurrences = await collectTagOccurrences();
+    const offenders = occurrences.filter((o) => o.tag === "F4FA1L7Z");
+    expect(offenders).toEqual([]);
+  });
+});

package/src/__tests__/setup-agent-discovery.test.ts CHANGED Viewed

@@ -10,19 +10,19 @@ import { _agentNameFromBinary, _printAgentMenu, cmdSetup } from "../commands/set
 describe("_agentNameFromBinary", () => {
   test("strips uwf- prefix", () => {
-    expect(_agentNameFromBinary("uwf-hermes")).toBe("hermes");
+    expect(_agentNameFromBinary("uwf-builtin")).toBe("builtin");
   });
   test("strips uwf- prefix for compound names", () => {
-    expect(_agentNameFromBinary("uwf-claude-code")).toBe("claude-code");
+    expect(_agentNameFromBinary("uwf-some-gateway")).toBe("some-gateway");
   });
   test("returns as-is when no uwf- prefix", () => {
-    expect(_agentNameFromBinary("hermes")).toBe("hermes");
+    expect(_agentNameFromBinary("builtin")).toBe("builtin");
   });
-  test("handles uwf-builtin", () => {
-    expect(_agentNameFromBinary("uwf-builtin")).toBe("builtin");
+  test("handles uwf-mock", () => {
+    expect(_agentNameFromBinary("uwf-mock")).toBe("mock");
   });
 });
@@ -35,10 +35,10 @@ describe("_printAgentMenu", () => {
       logs.push(args.join(" "));
     });
-    _printAgentMenu(["uwf-hermes", "uwf-claude-code"]);
+    _printAgentMenu(["uwf-builtin", "uwf-mock"]);
-    expect(logs.some((l) => l.includes("Hermes"))).toBe(true);
-    expect(logs.some((l) => l.includes("Claude Code"))).toBe(true);
+    expect(logs.some((l) => l.includes("Built-in"))).toBe(true);
+    expect(logs.some((l) => l.includes("Mock"))).toBe(true);
     vi.restoreAllMocks();
   });
@@ -84,19 +84,19 @@ describe("cmdSetup agent configuration (engine config is LLM-free, issue #143)",
   });
   test("preserves existing agents when adding new one", async () => {
-    await cmdSetup({ agent: "hermes", storageRoot });
+    await cmdSetup({ agent: "builtin", storageRoot });
     await cmdSetup({ agent: "claude-code", storageRoot });
     const config = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
-    expect(config.agents.hermes).toBeDefined();
+    expect(config.agents.builtin).toBeDefined();
     expect(config.agents["claude-code"]).toBeDefined();
     expect(config.defaultAgent).toBe("claude-code");
   });
   test("updates defaultAgent on re-run with different agent", async () => {
-    await cmdSetup({ agent: "hermes", storageRoot });
+    await cmdSetup({ agent: "mock", storageRoot });
     const config1 = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
-    expect(config1.defaultAgent).toBe("hermes");
+    expect(config1.defaultAgent).toBe("mock");
     await cmdSetup({ agent: "builtin", storageRoot });
     const config2 = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
@@ -104,17 +104,17 @@ describe("cmdSetup agent configuration (engine config is LLM-free, issue #143)",
   });
   test("normalizes agent name with uwf- prefix to bare name", async () => {
-    const result = await cmdSetup({ agent: "uwf-hermes", storageRoot });
+    const result = await cmdSetup({ agent: "uwf-builtin", storageRoot });
-    expect(result.defaultAgent).toBe("hermes");
+    expect(result.defaultAgent).toBe("builtin");
     const config = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
-    expect(config.agents.hermes).toEqual({
+    expect(config.agents.builtin).toEqual({
       host: "http://127.0.0.1:7900",
-      gateway: "hermes",
+      gateway: "builtin",
     });
-    expect(config.defaultAgent).toBe("hermes");
+    expect(config.defaultAgent).toBe("builtin");
     // Verify no duplicate uwf- prefix
-    expect(config.agents["uwf-hermes"]).toBeUndefined();
+    expect(config.agents["uwf-builtin"]).toBeUndefined();
   });
   test("normalizes uwf-claude-code to claude-code", async () => {
@@ -137,18 +137,18 @@ describe("cmdSetup agent configuration (engine config is LLM-free, issue #143)",
     mkdirSync(storageRoot, { recursive: true });
     writeFileSync(
       join(storageRoot, "config.yaml"),
-      "providers:\n  openai: { baseUrl: x, apiKey: y }\nmodels:\n  default: { provider: openai, name: gpt-4o }\ndefaultModel: default\nagents:\n  hermes: { host: 'http://127.0.0.1:7900', gateway: hermes }\ndefaultAgent: hermes\n",
+      "providers:\n  openai: { baseUrl: x, apiKey: y }\nmodels:\n  default: { provider: openai, name: gpt-4o }\ndefaultModel: default\nagents:\n  builtin: { host: 'http://127.0.0.1:7900', gateway: builtin }\ndefaultAgent: builtin\n",
       "utf8",
     );
-    await cmdSetup({ agent: "hermes", storageRoot });
+    await cmdSetup({ agent: "builtin", storageRoot });
     const config = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
     expect(config.providers).toBeUndefined();
     expect(config.models).toBeUndefined();
     expect(config.defaultModel).toBeUndefined();
-    expect(config.agents.hermes).toEqual({
+    expect(config.agents.builtin).toEqual({
       host: "http://127.0.0.1:7900",
-      gateway: "hermes",
+      gateway: "builtin",
     });
-    expect(config.defaultAgent).toBe("hermes");
+    expect(config.defaultAgent).toBe("builtin");
   });
 });

package/src/__tests__/step-show-json.test.ts CHANGED Viewed

@@ -169,7 +169,7 @@ describe("cmdStepShow JSON serialization", () => {
     expect(jsonOutput).toContain("\\n");
     const parsed = JSON.parse(jsonOutput);
-    expect(parsed.turns[0].toolCalls[0].args).toContain("\n");
+    expect(parsed.detail.turns[0].toolCalls[0].args).toContain("\n");
   });
   test("escapes tabs in tool call args", async () => {
@@ -239,7 +239,7 @@ describe("cmdStepShow JSON serialization", () => {
     expect(() => JSON.parse(jsonOutput)).not.toThrow();
     const parsed = JSON.parse(jsonOutput);
-    expect(parsed.turns).toBeDefined();
+    expect(parsed.detail.turns).toBeDefined();
   });
   test("handles Unicode control characters", async () => {
@@ -291,7 +291,7 @@ describe("cmdStepShow JSON serialization", () => {
     expect(() => JSON.parse(jsonOutput)).not.toThrow();
     const parsed = JSON.parse(jsonOutput);
-    expect(parsed.turns).toHaveLength(2);
+    expect(parsed.detail.turns).toHaveLength(2);
   });
   test("YAML output format is unaffected", async () => {
@@ -333,7 +333,7 @@ describe("cmdStepShow JSON serialization", () => {
     expect(() => JSON.parse(jsonOutput)).not.toThrow();
     const parsed = JSON.parse(jsonOutput);
-    expect(parsed.turns).toBeDefined();
+    expect(parsed.detail.turns).toBeDefined();
   });
   test("handles large step with multiple tool calls", async () => {
@@ -369,6 +369,6 @@ describe("cmdStepShow JSON serialization", () => {
     expect(() => JSON.parse(jsonOutput)).not.toThrow();
     const parsed = JSON.parse(jsonOutput);
-    expect(parsed.turns).toHaveLength(25);
+    expect(parsed.detail.turns).toHaveLength(25);
   });
 });