npm - @lumoai/cli - Versions diffs - 1.23.0 → 1.24.0 - Mend

@lumoai/cli 1.23.0 → 1.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/assets/skill/SKILL.md +8 -2
package/assets/skill/references/criteria.md +15 -1
package/assets/skill/references/verify.md +72 -0
package/dist/cli/src/commands/task-criteria-set.js +20 -0
package/dist/cli/src/commands/verify.js +206 -0
package/dist/cli/src/index.js +7 -0
package/dist/shared/src/acceptance-evidence.js +42 -0
package/dist/shared/src/index.js +8 -1
package/package.json +1 -1

package/assets/skill/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: lumo
-description: 'Use the Lumo CLI to load task context, manage session bindings, and run tasks / projects / milestones / sprints / docs / memory from the terminal. Activate when: the user mentions a Lumo task identifier (LUM-42 etc.), asks to load task background/context, wants to bind/check/detach a Claude Code session''s task, is about to start work on a task, or wants to create/update/list/show/comment on tasks, projects, milestones, sprints, documents, artifacts, Figma links, or memory. Triggers on: "LUM-", "task context", "load context", "session start", "session attach", "session status", "session detach", "which task", "what task am I on", "work on LUM", "session wrap", "wrap up session", "进度评论", "卡住检测", "fragment usage vote", "mark used fragments", "which fragments did I use", "--used", "上下文使用投票", "标记用过的记忆", "create task", "new task", "file a task", "list tasks", "my tasks", "show task", "view task", "comment on task", "update task", "change task status", "rename task", "reassign task", "mark task as done", "lumo next", "next task", "what should I work on", "推荐下一个任务", "list projects", "what projects", "milestone", "里程碑", "list/create/update/delete/show milestone", "set milestone", "attach/unbind milestone", "tasks in milestone", "search milestones", "find milestone", "milestone health", "at-risk", "overdue", "archive/unarchive milestone", "归档里程碑", "milestone summary", "里程碑复盘", "reorder/move milestone", "排序里程碑", "milestone add/remove", "挂任务到里程碑", "auth login", "log in", "logout", "sign out", "switch account", "whoami", "who am I", "current workspace", "登录", "切换账号", "create/update/list/show/delete doc", "write doc", "写文档", "新建文档", "修改文档", "查看文档", "bind/unbind doc", "把文档关联到任务", "doc scope", "personal/workspace doc", "tag", "add/remove tag", "标签", "share/unshare doc", "分享文档", "doc share-list", "viewer/editor/manager", "doc tree", "doc move", "move/reparent doc", "移动文档", "sprint", "冲刺", "迭代", "create/list/show/update/delete sprint", "start/close sprint", "开始/关闭冲刺", "add to sprint", "active sprints", "sprint summary", "冲刺总结", "把任务挂到冲刺", "sprint health", "sprint risk", "is this sprint at risk", "冲刺风险", "冲刺健康度", "sprint blockers", "冲刺阻塞", "lumo update", "upgrade lumo", "升级 lumo", "new lumo version", "lumo setup", "install lumo skill/hooks", "wire up lumo", "set up lumo", "安装 lumo", "配置 lumo", "task artifact", "artifact add/list/show/update/rm", "spec artifact", "record/attach spec", "attach plan", "记录 spec", "查看 artifact", figma, attach figma, figma link, 关联 figma, 设计稿, figma design, "memory", "记忆", "remember", "record a memory", "记一条", "promote memory", "promote to project", "沉淀", "task/project memory", "retrieval", "取全文", "拉全文", "task slack show", "看 thread", "show slack thread", "task web show", "web 正文", "task figma context", "figma metadata", "task comments list", "list comments", "看评论", "task pr show", "查看 PR", "show pr", "PR 详情", "task deps", "dependency", "dependencies", "依赖", "依赖边", "blocked by", "blocker", "confirm dependency", "dismiss dependency", "确认依赖", "忽略依赖", "import google doc", "sync google doc", "google drive", "doc import-gdoc", "doc sync", "导入/同步 google 文档", "mark blocked", "blocked tag", "标记 blocked", "stuck", "repeatedly failing", "worktree", "git worktree", "并行 worktree", "scaffold worktree", "新建 worktree", "node_modules 软链", "worktree 隔离", "lumo worktree add/rm/list", "task criteria", "criteria set", "criteria list", "acceptance criteria", "验收标准", "验收合约", "draft criteria", "草拟验收", "definition of done", "task lineage", "lineage", "causal trail", "审计", "因果链", "成本归因", "trace context", "--signal", "usage signal health", "auto usage audit", "自动使用审计", "signal-health".'
+description: 'Use the Lumo CLI to load task context, manage session bindings, and run tasks / projects / milestones / sprints / docs / memory from the terminal. Activate when: the user mentions a Lumo task identifier (LUM-42 etc.), asks to load task background/context, wants to bind/check/detach a Claude Code session''s task, is about to start work on a task, or wants to create/update/list/show/comment on tasks, projects, milestones, sprints, documents, artifacts, Figma links, or memory. Triggers on: "LUM-", "task context", "load context", "session start", "session attach", "session status", "session detach", "which task", "what task am I on", "work on LUM", "session wrap", "wrap up session", "进度评论", "卡住检测", "fragment usage vote", "mark used fragments", "which fragments did I use", "--used", "上下文使用投票", "标记用过的记忆", "create task", "new task", "file a task", "list tasks", "my tasks", "show task", "view task", "comment on task", "update task", "change task status", "rename task", "reassign task", "mark task as done", "lumo next", "next task", "what should I work on", "推荐下一个任务", "list projects", "what projects", "milestone", "里程碑", "list/create/update/delete/show milestone", "set milestone", "attach/unbind milestone", "tasks in milestone", "search milestones", "find milestone", "milestone health", "at-risk", "overdue", "archive/unarchive milestone", "归档里程碑", "milestone summary", "里程碑复盘", "reorder/move milestone", "排序里程碑", "milestone add/remove", "挂任务到里程碑", "auth login", "log in", "logout", "sign out", "switch account", "whoami", "who am I", "current workspace", "登录", "切换账号", "create/update/list/show/delete doc", "write doc", "写文档", "新建文档", "修改文档", "查看文档", "bind/unbind doc", "把文档关联到任务", "doc scope", "personal/workspace doc", "tag", "add/remove tag", "标签", "share/unshare doc", "分享文档", "doc share-list", "viewer/editor/manager", "doc tree", "doc move", "move/reparent doc", "移动文档", "sprint", "冲刺", "迭代", "create/list/show/update/delete sprint", "start/close sprint", "开始/关闭冲刺", "add to sprint", "active sprints", "sprint summary", "冲刺总结", "把任务挂到冲刺", "sprint health", "sprint risk", "is this sprint at risk", "冲刺风险", "冲刺健康度", "sprint blockers", "冲刺阻塞", "lumo update", "upgrade lumo", "升级 lumo", "new lumo version", "lumo setup", "install lumo skill/hooks", "wire up lumo", "set up lumo", "安装 lumo", "配置 lumo", "task artifact", "artifact add/list/show/update/rm", "spec artifact", "record/attach spec", "attach plan", "记录 spec", "查看 artifact", figma, attach figma, figma link, 关联 figma, 设计稿, figma design, "memory", "记忆", "remember", "record a memory", "记一条", "promote memory", "promote to project", "沉淀", "task/project memory", "retrieval", "取全文", "拉全文", "task slack show", "看 thread", "show slack thread", "task web show", "web 正文", "task figma context", "figma metadata", "task comments list", "list comments", "看评论", "task pr show", "查看 PR", "show pr", "PR 详情", "task deps", "dependency", "dependencies", "依赖", "依赖边", "blocked by", "blocker", "confirm dependency", "dismiss dependency", "确认依赖", "忽略依赖", "import google doc", "sync google doc", "google drive", "doc import-gdoc", "doc sync", "导入/同步 google 文档", "mark blocked", "blocked tag", "标记 blocked", "stuck", "repeatedly failing", "worktree", "git worktree", "并行 worktree", "scaffold worktree", "新建 worktree", "node_modules 软链", "worktree 隔离", "lumo worktree add/rm/list", "task criteria", "criteria set", "criteria list", "acceptance criteria", "验收标准", "验收合约", "draft criteria", "草拟验收", "definition of done", "lumo verify", "verify task", "machine verification", "verification round", "机器验收", "自验", "验收轮", "claim done", "宣称完成", "--cause", "contract drift", "合约漂移", "task lineage", "lineage", "causal trail", "审计", "因果链", "成本归因", "trace context", "--signal", "usage signal health", "auto usage audit", "自动使用审计", "signal-health".'
 ---
 ## Prerequisites
@@ -26,6 +26,7 @@ The command catalog below is a **map**: it lists every command grouped by domain
 | `task create/update/list/show/comment`, `next`                                    | [references/tasks.md](references/tasks.md)                     |
 | `task artifact*`, `task figma*`                                                   | [references/artifacts-figma.md](references/artifacts-figma.md) |
 | `task criteria set/list`, drafting the acceptance contract                        | [references/criteria.md](references/criteria.md)               |
+| `verify` — machine verification loop, claim-done flow                             | [references/verify.md](references/verify.md)                   |
 | `project list`, `milestone*`                                                      | [references/milestones.md](references/milestones.md)           |
 | `doc*`                                                                            | [references/docs.md](references/docs.md)                       |
 | `sprint*`                                                                         | [references/sprints.md](references/sprints.md)                 |
@@ -72,9 +73,13 @@ The command catalog below is a **map**: it lists every command grouped by domain
 **Acceptance criteria（验收合约）** — see [criteria.md](references/criteria.md)
-- `lumo task criteria set <task> --file <criteria.json> [--human]` — submit the whole contract: default = initial agent draft (AGENT_DRAFT, locked once submitted); `--human` = a HUMAN_EDIT revision transcribed from the conversation (desired final list; items with `id` keep/update, missing ones are deleted)
+- `lumo task criteria set <task> --file <criteria.json> [--human] [--cause <tag>]` — submit the whole contract: default = initial agent draft (AGENT_DRAFT, locked once submitted); `--human` = a HUMAN_EDIT revision transcribed from the conversation (desired final list; items with `id` keep/update, missing ones are deleted); `--cause` (with `--human`) annotates why the contract drifted: `NEW_INFO | SCOPE_CHANGE | DRAFT_BLIND_SPOT | GRANULARITY | OTHER`
 - `lumo task criteria list <task>` — print the contract (id, MACHINE/HUMAN, provenance source@round, checkpointer)
+**Verification（机器验收循环）** — see [verify.md](references/verify.md)
+- `lumo verify [task] [--timeout <seconds>]` — run every MACHINE criterion's checkpointer locally, report one structured PASS/FAIL verdict per criterion to the server, print next actions. Defaults to the session-bound task. Round cap 3: an all-pass round moves the task to IN_REVIEW (agent stops there); a round-3 fail escalates to a human (stop retrying). **Run this before claiming a task is done.**
 **Artifacts & Figma** — see [artifacts-figma.md](references/artifacts-figma.md)
 - `lumo task artifact add/update/list/show/rm` — record spec/plan products on a task
@@ -131,5 +136,6 @@ Typical flow when a user says "help me with LUM-42":
 3. Review unresolved items, PR-review todos, and the task description
 4. **If the task has no acceptance criteria** (context shows the 草拟提醒 instead of a contract): draft 3–7 outcome-level criteria and submit them with `lumo task criteria set` **before writing the first line of code** — see [criteria.md](references/criteria.md) for the drafting guide
 5. Begin working on the task
+6. **Before claiming the work is done: run `lumo verify`** — the machine half of the acceptance loop. Fix failures and re-run (round cap 3). On all-pass the task moves to IN_REVIEW and you stop; never set DONE yourself after a verify loop — that adjudication is human-only. See [verify.md](references/verify.md)
 **Git-suggest at start:** when the session is unbound, session-start may infer the task from the git branch / recent commits and print a suggestion — `检测到 LUM-N … 运行 lumo session attach LUM-N 绑定。` — **without** binding. Confirm it's the right task, then run `lumo session attach <LUM-N>` yourself (binding only happens on an explicit attach). See [sessions.md](references/sessions.md) for the full session-start behavior.

package/assets/skill/references/criteria.md CHANGED Viewed

@@ -105,6 +105,14 @@ verdict is never** — human verdicts only enter through human-initiated paths
 Only use `--human` for decisions a human actually made (in conversation, in a
 comment, in review). Never use it to work around your own lock.
+Optionally annotate **why** the contract drifted with
+`--cause <NEW_INFO|SCOPE_CHANGE|DRAFT_BLIND_SPOT|GRANULARITY|OTHER>` — pick
+the tag from the human's stated reason (new information, scope moved, the
+draft missed it, wrong granularity). The tag lands in the drift record
+(TaskActivity payload), feeding the Slice-3 drift-cause distribution. Every
+criteria add/update/delete is mirrored as a structured `CRITERION_CHANGED`
+activity automatically; `--cause` just enriches it.
 ### `lumo task criteria list <task>`
 Print the contract: `<id>  [MACHINE|HUMAN]  SOURCE@rN  [evidence]  statement`
@@ -121,4 +129,10 @@ before a `--human` revision. Empty contract prints a drafting pointer.
 - **`lumo task context`**: the `## 验收标准（合约）` section appears after the
   task description, before memory.
 - Review-time gap findings (`REVIEW_ADDED`, appended at the round they
-  surface) arrive via the verification loop (任务 ③), not via `criteria set`.
+  surface) arrive via the verification loop, not via `criteria set`.
+## After the contract: the verification loop
+The contract is judged by `lumo verify` — run it before claiming the task is
+done. See [verify.md](verify.md) for the loop (round cap 3, IN_REVIEW on
+all-pass, escalation on a round-3 fail).

package/assets/skill/references/verify.md ADDED Viewed

@@ -0,0 +1,72 @@
+# lumo verify — machine verification loop（机器验收循环）
+`lumo verify` is the machine half of the acceptance system (Acceptance v1,
+LUM-343). It executes every **MACHINE** criterion's checkpointer in the local
+repo, reports one structured PASS/FAIL verdict per criterion to the server,
+and prints what to do next. The judge lives server-side: round numbering, the
+3-round cap, escalation, and the IN_REVIEW transition all happen there
+(执行在客户端，裁判在服务端).
+## The claim-done rule
+**Before claiming a task is complete — in conversation, in a wrap-up, or by
+touching its status — run `lumo verify`.** The loop replaces "I read the code
+and it looks done" with executed evidence.
+```
+lumo verify              # session-bound task
+lumo verify LUM-42       # explicit task (overrides the session binding)
+lumo verify --timeout 900  # per-checkpointer timeout in seconds (default 600)
+```
+## What one round does
+1. Loads the task's acceptance contract and picks out MACHINE criteria.
+2. Runs each checkpointer locally (shell, cwd = current directory), one at a
+   time, echoing PASS/FAIL as it goes.
+3. POSTs the structured verdicts; the server records one VerificationRun per
+   criterion at round = previous max + 1 and mirrors each verdict as a
+   TaskActivity event.
+4. Prints the round outcome:
+   - **All PASS** → the task transitions to **IN_REVIEW** (existing state
+     machine + TASK_IN_REVIEW notification). **Stop here.** Human
+     adjudication and any HUMAN criteria take over; never set DONE yourself.
+   - **Any FAIL** → task status is untouched; the unmet criteria are printed
+     as next actions (statement, checkpointer, failure tail). Fix and re-run.
+   - **Round 3 still failing** → the loop escalates: a human is notified
+     (AGENT_VERIFY, requires action) and further `lumo verify` rounds are
+     rejected with 409. **Stop retrying**; fix only what the human directs.
+Exit code 0 = all passed (or nothing to run); 1 = failures, escalation, or
+errors.
+## Verdict semantics (what the CLI sends)
+- checkpointer exits 0 → `PASS` with evidence `cmd:<command>#exit=0`
+- non-zero exit → `FAIL`, reason = output tail, enum `CRITERION_UNMET`
+- spawn failure / timeout → `FAIL`, enum `CHECK_EXECUTION_ERROR`
+evidencePointer is **not free text** — the server only accepts
+`commit:<hash>`, `file:<path>:<line>`, or `cmd:<command>#exit=<code>`.
+Verdicts are PASS|FAIL only; the agent path cannot write HUMAN verdicts or
+`PASS_WITH_FOLLOWUP` (red line — those enter via human-initiated UI paths
+only).
+## Edge cases
+- **No contract yet** → error pointing at `lumo task criteria set`; draft the
+  contract first (criteria.md golden rule).
+- **HUMAN-only contract (zero MACHINE criteria)** → nothing to run; the CLI
+  says so and suggests handing off for human review
+  (`lumo task update <id> --status in_review`). No server write happens.
+- **A round must cover every MACHINE criterion** — the CLI always runs all of
+  them; the server rejects partial rounds.
+- Criteria added during review (`REVIEW_ADDED`) appear in the contract and
+  are picked up automatically by the next round.
+## Round discipline
+Rounds are a hard budget of 3, not a retry loop. Between rounds, actually fix
+the failures — re-running without changes burns a round and (at round 3)
+pages a human. A FAIL round never changes task status; only an all-pass round
+moves it (to IN_REVIEW, never further).

package/dist/cli/src/commands/task-criteria-set.js CHANGED Viewed

@@ -7,6 +7,13 @@ const doc_input_1 = require("../lib/doc-input");
 const path_guard_1 = require("../lib/path-guard");
 const sanitize_1 = require("../lib/sanitize");
 const task_criteria_list_1 = require("./task-criteria-list");
+const CAUSE_TAGS = [
+    'NEW_INFO',
+    'SCOPE_CHANGE',
+    'DRAFT_BLIND_SPOT',
+    'GRANULARITY',
+    'OTHER',
+];
 /** Client-side shape gate: fail fast on obviously malformed JSON before the
  * round-trip. Full validation (statement length, MACHINE→checkpointer …)
  * stays server-side. */
@@ -69,6 +76,18 @@ async function taskCriteriaSet(identifier, options) {
         console.error(`Error: ${parsed.error}`);
         return 1;
     }
+    let causeTag;
+    if (options.cause !== undefined) {
+        if (!options.human) {
+            console.error('Error: --cause annotates contract drift and requires --human.');
+            return 1;
+        }
+        causeTag = options.cause.toUpperCase();
+        if (!CAUSE_TAGS.includes(causeTag)) {
+            console.error(`Error: invalid --cause "${options.cause}". One of: ${CAUSE_TAGS.join(' | ')}`);
+            return 1;
+        }
+    }
     const creds = (0, config_1.readCredentials)();
     if (!creds) {
         console.error('Error: not logged in. Run `lumo auth login` first.');
@@ -93,6 +112,7 @@ async function taskCriteriaSet(identifier, options) {
             body: JSON.stringify({
                 source: options.human ? 'HUMAN_EDIT' : 'AGENT_DRAFT',
                 criteria: parsed.items,
+                ...(causeTag ? { causeTag } : {}),
             }),
         });
     }

package/dist/cli/src/commands/verify.js ADDED Viewed

@@ -0,0 +1,206 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.runCheckpointer = runCheckpointer;
+exports.verify = verify;
+const child_process_1 = require("child_process");
+const config_1 = require("../lib/config");
+const api_1 = require("../lib/api");
+const sanitize_1 = require("../lib/sanitize");
+const acceptance_evidence_1 = require("../../../shared/src/acceptance-evidence");
+const DEFAULT_TIMEOUT_SECONDS = 600;
+const OUTPUT_TAIL_CHARS = 1_500;
+const MAX_OUTPUT_BUFFER = 10 * 1024 * 1024;
+function tail(s, max) {
+    return s.length > max ? `…${s.slice(-max)}` : s;
+}
+/**
+ * Execute one MACHINE checkpointer in the local repo (执行在客户端 — the
+ * server can't run repo tests) and fold the result into a structured verdict.
+ * Exit 0 = PASS with a `cmd:` evidence pointer; non-zero = CRITERION_UNMET;
+ * spawn failure or timeout = CHECK_EXECUTION_ERROR.
+ */
+function runCheckpointer(criterionId, checkpointer, timeoutMs) {
+    const r = (0, child_process_1.spawnSync)(checkpointer, {
+        shell: true,
+        encoding: 'utf8',
+        timeout: timeoutMs,
+        maxBuffer: MAX_OUTPUT_BUFFER,
+        cwd: process.cwd(),
+    });
+    if (r.error) {
+        const timedOut = r.error.code === 'ETIMEDOUT' ||
+            r.signal === 'SIGTERM';
+        return {
+            criterionId,
+            verdict: 'FAIL',
+            rejectionReason: timedOut
+                ? `checkpointer timed out after ${Math.round(timeoutMs / 1000)}s: ${checkpointer}`
+                : `checkpointer failed to execute: ${r.error.message}`,
+            rejectionReasonEnum: 'CHECK_EXECUTION_ERROR',
+        };
+    }
+    const exitCode = r.status ?? 1;
+    if (exitCode === 0) {
+        return {
+            criterionId,
+            verdict: 'PASS',
+            evidencePointer: (0, acceptance_evidence_1.buildCmdEvidencePointer)(checkpointer, 0),
+        };
+    }
+    const output = `${r.stdout ?? ''}\n${r.stderr ?? ''}`.trim();
+    return {
+        criterionId,
+        verdict: 'FAIL',
+        evidencePointer: (0, acceptance_evidence_1.buildCmdEvidencePointer)(checkpointer, exitCode),
+        rejectionReason: tail(output, OUTPUT_TAIL_CHARS) || `exit code ${exitCode}`,
+        rejectionReasonEnum: 'CRITERION_UNMET',
+    };
+}
+/**
+ * `lumo verify [task]` — the machine half of the acceptance loop.
+ *
+ * Runs every MACHINE criterion's checkpointer locally, reports one structured
+ * verdict per criterion to the server (the judge: round numbering, the
+ * 3-round cap, escalation, and the IN_REVIEW transition all live there), and
+ * prints what to do next. Defaults to the session-bound task; an explicit
+ * identifier overrides.
+ */
+async function verify(identifier, options = {}) {
+    const creds = (0, config_1.readCredentials)();
+    if (!creds) {
+        console.error('Error: not logged in. Run `lumo auth login` first.');
+        return 1;
+    }
+    const base = (0, api_1.trimTrailingSlash)((0, api_1.resolveAuthedApiUrl)(creds.apiUrl));
+    const headers = {
+        Authorization: `Bearer ${creds.token}`,
+    };
+    const sessionId = process.env.CLAUDE_CODE_SESSION_ID;
+    if (sessionId)
+        headers['X-Lumo-Session-Id'] = sessionId;
+    const timeoutSeconds = options.timeout
+        ? parseInt(options.timeout, 10)
+        : DEFAULT_TIMEOUT_SECONDS;
+    if (!Number.isFinite(timeoutSeconds) || timeoutSeconds <= 0) {
+        console.error('Error: --timeout must be a positive number of seconds.');
+        return 1;
+    }
+    // ── Resolve the task: explicit identifier or the session binding ─────────
+    let taskId = identifier;
+    if (!taskId) {
+        if (!sessionId) {
+            console.error('Error: no task given and $CLAUDE_CODE_SESSION_ID is not set.\n' +
+                'Run `lumo verify <LUM-N>` or run inside a Claude Code session bound via `lumo session attach`.');
+            return 1;
+        }
+        let res;
+        try {
+            res = await fetch(`${base}/api/sessions/${encodeURIComponent(sessionId)}`, { headers });
+        }
+        catch (err) {
+            const msg = err instanceof Error ? err.message : String(err);
+            console.error(`Error: could not reach Lumo API (${msg})`);
+            return 1;
+        }
+        const data = res.ok
+            ? (await res.json())
+            : null;
+        if (!data?.taskIdentifier) {
+            console.error('Error: this session is not bound to a task. Run `lumo session attach <LUM-N>` first, or pass the task explicitly: `lumo verify <LUM-N>`.');
+            return 1;
+        }
+        taskId = data.taskIdentifier;
+    }
+    // ── Load the contract and pick out the MACHINE criteria ─────────────────
+    let criteriaRes;
+    try {
+        criteriaRes = await fetch(`${base}/api/tasks/${encodeURIComponent(taskId)}/criteria`, { headers });
+    }
+    catch (err) {
+        const msg = err instanceof Error ? err.message : String(err);
+        console.error(`Error: could not reach Lumo API (${msg})`);
+        return 1;
+    }
+    if (criteriaRes.status === 401) {
+        console.error('Error: API key invalid or revoked. Run `lumo auth login`.');
+        return 1;
+    }
+    if (criteriaRes.status === 404) {
+        console.error(`Error: task ${taskId} not found in workspace ${creds.workspaceSlug}`);
+        return 1;
+    }
+    if (!criteriaRes.ok) {
+        console.error(`Error: could not load criteria (HTTP ${criteriaRes.status})`);
+        return 1;
+    }
+    const { criteria } = (await criteriaRes.json());
+    const machine = criteria.filter(c => c.verifierType === 'MACHINE' && c.checkpointer);
+    if (criteria.length === 0) {
+        process.stdout.write(`${taskId} has no acceptance contract yet — draft one with \`lumo task criteria set\` before verifying.\n`);
+        return 1;
+    }
+    if (machine.length === 0) {
+        process.stdout.write(`${taskId} has no MACHINE criteria — nothing for the machine loop to run.\n` +
+            `The contract is HUMAN-only; finish your work and hand off for human review (lumo task update ${taskId} --status in_review).\n`);
+        return;
+    }
+    // ── Execute every checkpointer locally ───────────────────────────────────
+    process.stdout.write(`Verifying ${taskId} — ${machine.length} MACHINE criteria\n`);
+    const results = [];
+    for (const [i, c] of machine.entries()) {
+        process.stdout.write(`\n[${i + 1}/${machine.length}] ${(0, sanitize_1.sanitizeField)(c.statement)}\n` +
+            `  $ ${(0, sanitize_1.sanitizeField)(c.checkpointer)}\n`);
+        const verdict = runCheckpointer(c.id, c.checkpointer, timeoutSeconds * 1000);
+        results.push(verdict);
+        if (verdict.verdict === 'PASS') {
+            process.stdout.write('  ✓ PASS\n');
+        }
+        else {
+            process.stdout.write(`  ✗ FAIL${verdict.rejectionReason ? ` — ${(0, sanitize_1.sanitizeField)(tail(verdict.rejectionReason, 400))}` : ''}\n`);
+        }
+    }
+    // ── Report the round to the judge ─────────────────────────────────────────
+    let res;
+    try {
+        res = await fetch(`${base}/api/tasks/${encodeURIComponent(taskId)}/verify`, {
+            method: 'POST',
+            headers: { ...headers, 'Content-Type': 'application/json' },
+            body: JSON.stringify({ results }),
+        });
+    }
+    catch (err) {
+        const msg = err instanceof Error ? err.message : String(err);
+        console.error(`Error: could not report the round to Lumo (${msg})`);
+        return 1;
+    }
+    if (!res.ok) {
+        const body = (await res.json().catch(() => null));
+        const detail = body && typeof body.error === 'string' ? (0, sanitize_1.sanitizeField)(body.error) : '';
+        console.error(`Error: verification round rejected (HTTP ${res.status})${detail ? ` — ${detail}` : ''}`);
+        return 1;
+    }
+    const outcome = (await res.json());
+    process.stdout.write(`\nRound ${outcome.round}/${outcome.maxRounds} recorded.\n`);
+    if (outcome.allPassed) {
+        process.stdout.write(`✓ All MACHINE criteria passed — task is now ${outcome.taskStatus}.\n` +
+            `Stop here: human adjudication (and any HUMAN criteria) take over from this point.\n`);
+        return;
+    }
+    if (outcome.escalated) {
+        process.stdout.write(`✗ Round ${outcome.round} still has failures — the machine loop is exhausted (cap ${outcome.maxRounds}).\n` +
+            `A human has been notified to take over. STOP retrying lumo verify; fix only what they direct.\n`);
+        return 1;
+    }
+    process.stdout.write(`✗ ${outcome.nextActions.length} criteria unmet — task stays ${outcome.taskStatus}. Next actions:\n`);
+    for (const a of outcome.nextActions) {
+        process.stdout.write(`  • ${(0, sanitize_1.sanitizeField)(a.statement)}\n`);
+        if (a.checkpointer) {
+            process.stdout.write(`    check: ${(0, sanitize_1.sanitizeField)(a.checkpointer)}\n`);
+        }
+        if (a.rejectionReason) {
+            process.stdout.write(`    why: ${(0, sanitize_1.sanitizeField)(tail(a.rejectionReason, 400))}\n`);
+        }
+    }
+    process.stdout.write(`Fix the failures, then re-run \`lumo verify\` (${outcome.maxRounds - outcome.round} round${outcome.maxRounds - outcome.round === 1 ? '' : 's'} left).\n`);
+    return 1;
+}

package/dist/cli/src/index.js CHANGED Viewed

@@ -47,6 +47,7 @@ const session_detach_1 = require("./commands/session-detach");
 const session_status_1 = require("./commands/session-status");
 const session_wrap_1 = require("./commands/session-wrap");
 const next_1 = require("./commands/next");
+const verify_1 = require("./commands/verify");
 const task_context_1 = require("./commands/task-context");
 const task_create_1 = require("./commands/task-create");
 const task_update_1 = require("./commands/task-update");
@@ -196,6 +197,11 @@ program
     .option('--force', 'Overwrite existing skill files (SKILL.md + references/) when they differ from the bundled version')
     .option('--agent <token>', 'Coding agent these hooks run under (claude-code, codex, cursor, gemini-cli, github-copilot, windsurf). Baked into every hook command. Defaults to claude-code.')
     .action(wrap(options => (0, setup_1.setup)(options)));
+program
+    .command('verify [task]')
+    .description('Machine verification loop (LUM-343): run every MACHINE criterion checkpointer locally, report structured verdicts to the server (round cap 3), and print next actions. All-pass moves the task to IN_REVIEW. Defaults to the session-bound task.')
+    .option('--timeout <seconds>', 'Per-checkpointer timeout in seconds (default 600)')
+    .action(wrap((task, options) => (0, verify_1.verify)(task, options)));
 program
     .command('next')
     .description('Recommend the next task(s) to work on, ranked by priority, active sprint, and due date. Prints top N (default 3); pick one and run `session attach` + `task context`.')
@@ -380,6 +386,7 @@ taskCriteria
     .description('Submit the whole acceptance contract from a JSON file. Default = initial agent draft (locked once submitted); --human records a HUMAN_EDIT revision (desired final list; items with "id" keep/update, missing ones are deleted).')
     .requiredOption('--file <path>', 'JSON array of criteria: [{"statement","verifierType":"MACHINE"|"HUMAN","checkpointer?","evidenceRequired?","id?"}]')
     .option('--human', 'Record a human contract revision (HUMAN_EDIT) transcribed from the conversation, with session 出处')
+    .option('--cause <tag>', 'Why the contract drifted (with --human): NEW_INFO | SCOPE_CHANGE | DRAFT_BLIND_SPOT | GRANULARITY | OTHER')
     .action(wrap((taskId, options) => (0, task_criteria_set_1.taskCriteriaSet)(taskId, options)));
 taskCriteria
     .command('list <task>')

package/dist/shared/src/acceptance-evidence.js ADDED Viewed

@@ -0,0 +1,42 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.EVIDENCE_POINTER_MAX = exports.EVIDENCE_POINTER_FORMAT_HINT = exports.EVIDENCE_POINTER_PATTERNS = void 0;
+exports.isValidEvidencePointer = isValidEvidencePointer;
+exports.buildCmdEvidencePointer = buildCmdEvidencePointer;
+/**
+ * Evidence-pointer grammar for acceptance verification (LUM-343).
+ *
+ * A VerificationRun's evidencePointer is NOT free text — free-text evidence
+ * from an agent-controlled path would reopen the side channel the verdict
+ * red line closed (agent-containment absorption, LUM-343 review note). The
+ * pointer must take one of the enumerated shapes below. Single source of
+ * truth shared by the CLI (which builds pointers) and the server validation
+ * layer (which rejects everything else).
+ *
+ * Shapes:
+ * - `commit:<7-40 hex>`            — a commit the evidence lives in
+ * - `file:<path>:<line>[-<line>]`  — a file location (path:line, no spaces)
+ * - `cmd:<command>#exit=<code>`    — an executed check plus its exit status
+ */
+exports.EVIDENCE_POINTER_PATTERNS = [
+    /^commit:[0-9a-f]{7,40}$/,
+    /^file:[^\s]+:\d+(-\d+)?$/,
+    /^cmd:[\s\S]+#exit=\d+$/,
+];
+exports.EVIDENCE_POINTER_FORMAT_HINT = 'evidencePointer must be one of: commit:<hash>, file:<path>:<line>, cmd:<command>#exit=<code>';
+function isValidEvidencePointer(value) {
+    return exports.EVIDENCE_POINTER_PATTERNS.some(p => p.test(value));
+}
+/** Max stored pointer length — mirrors the column-level cap in validation. */
+exports.EVIDENCE_POINTER_MAX = 2_000;
+/**
+ * Build a `cmd:` evidence pointer for an executed checkpointer. The command
+ * is truncated so the suffix (`#exit=N`) always survives the length cap —
+ * a pointer that loses its exit marker would no longer parse as evidence.
+ */
+function buildCmdEvidencePointer(command, exitCode) {
+    const suffix = `#exit=${exitCode}`;
+    const budget = exports.EVIDENCE_POINTER_MAX - 'cmd:'.length - suffix.length;
+    const cmd = command.length > budget ? command.slice(0, budget) : command;
+    return `cmd:${cmd}${suffix}`;
+}

package/dist/shared/src/index.js CHANGED Viewed

@@ -1,7 +1,7 @@
 "use strict";
 // ── Agent Error types ────────────────────────────────────────────────────────
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.sanitizeField = exports.parseStreamJsonUsage = exports.tiptapToMarkdown = exports.markdownToTiptap = exports.AgentError = void 0;
+exports.buildCmdEvidencePointer = exports.isValidEvidencePointer = exports.EVIDENCE_POINTER_MAX = exports.EVIDENCE_POINTER_FORMAT_HINT = exports.EVIDENCE_POINTER_PATTERNS = exports.sanitizeField = exports.parseStreamJsonUsage = exports.tiptapToMarkdown = exports.markdownToTiptap = exports.AgentError = void 0;
 exports.userFriendlyError = userFriendlyError;
 class AgentError extends Error {
     code;
@@ -39,3 +39,10 @@ Object.defineProperty(exports, "parseStreamJsonUsage", { enumerable: true, get:
 // ── Untrusted free-text sanitization ─────────────────────────────────────────
 var sanitize_1 = require("./sanitize");
 Object.defineProperty(exports, "sanitizeField", { enumerable: true, get: function () { return sanitize_1.sanitizeField; } });
+// ── Acceptance verification evidence-pointer grammar ─────────────────────────
+var acceptance_evidence_1 = require("./acceptance-evidence");
+Object.defineProperty(exports, "EVIDENCE_POINTER_PATTERNS", { enumerable: true, get: function () { return acceptance_evidence_1.EVIDENCE_POINTER_PATTERNS; } });
+Object.defineProperty(exports, "EVIDENCE_POINTER_FORMAT_HINT", { enumerable: true, get: function () { return acceptance_evidence_1.EVIDENCE_POINTER_FORMAT_HINT; } });
+Object.defineProperty(exports, "EVIDENCE_POINTER_MAX", { enumerable: true, get: function () { return acceptance_evidence_1.EVIDENCE_POINTER_MAX; } });
+Object.defineProperty(exports, "isValidEvidencePointer", { enumerable: true, get: function () { return acceptance_evidence_1.isValidEvidencePointer; } });
+Object.defineProperty(exports, "buildCmdEvidencePointer", { enumerable: true, get: function () { return acceptance_evidence_1.buildCmdEvidencePointer; } });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lumoai/cli",
-  "version": "1.23.0",
+  "version": "1.24.0",
   "description": "Lumo CLI — manage tasks and sessions from the terminal",
   "license": "MIT",
   "author": "cli@uselumo.ai",