npm - @fitlab-ai/agent-infra - Versions diffs - 0.7.4 → 0.7.5 - Mend

@fitlab-ai/agent-infra 0.7.4 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/templates/.agents/scripts/validate-artifact.js CHANGED Viewed

@@ -1,8 +1,15 @@
 import fs from "node:fs";
 import path from "node:path";
 import process from "node:process";
+import { execFileSync } from "node:child_process";
 import { fileURLToPath } from "node:url";
+import {
+  extractReviewBaseline,
+  findAuthoritativeReviewCodeArtifact,
+  resolvePostReviewGlobs
+} from "./lib/post-review-commit.js";
 const EXIT_CODE = {
   pass: 0,
   fail: 1,
@@ -33,6 +40,24 @@ const AGENT_INFRA_VERSION_PATTERN = /^v\d+\.\d+\.\d+(?:-[0-9A-Za-z.-]+)?(?:\+[0-
 const ACTIVITY_LOG_PATTERN = /^- (\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:[+-]\d{2}:\d{2})?) — \*\*(.+?)\*\* by (.+?) — (.+)$/;
 const BRANCH_SLUG_PATTERN = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
+// Review disagreement ledger (see .agents/rules/review-handshake.md).
+const LEDGER_SECTION_NAMES = ["审查分歧账本", "Review Disagreement Ledger"];
+const LEDGER_STATUSES = new Set([
+  "open",
+  "accepted",
+  "adjusted",
+  "refuted",
+  "cannot-judge",
+  "confirmed",
+  "needs-human-decision",
+  "closed",
+  "human-decided"
+]);
+const LEDGER_TERMINAL_OK = new Set(["confirmed", "closed", "human-decided"]);
+const DEFAULT_MAX_HANDSHAKE_ROUNDS = 3;
+const POST_REVIEW_COMMIT_STAGE = "post-review-commit";
+const SHA_PATTERN = /^[0-9a-f]{7,40}$/i;
 const scriptPath = fileURLToPath(import.meta.url);
 const repoRoot = path.resolve(path.dirname(scriptPath), "..", "..");
@@ -188,6 +213,10 @@ function runCheck(type, context) {
       return checkActivityLog(context);
     case "completion-checklist":
       return checkCompletionChecklist(context);
+    case "review-ledger":
+      return checkReviewLedger(context);
+    case "post-review-commit":
+      return checkPostReviewCommit(context);
     default: {
       const adapter = PLATFORM_ADAPTERS[type];
       if (!adapter) {
@@ -332,6 +361,20 @@ function loadProjectName() {
   }
 }
+function loadReviewConfig() {
+  const configPath = path.join(repoRoot, ".agents", ".airc.json");
+  if (!fs.existsSync(configPath)) {
+    return {};
+  }
+  try {
+    const config = JSON.parse(fs.readFileSync(configPath, "utf8"));
+    return config.review && typeof config.review === "object" ? config.review : {};
+  } catch {
+    return {};
+  }
+}
 function checkArtifact({ taskDir, config, artifactFile }) {
   const resolvedArtifact = resolveArtifactPath(taskDir, config.file_pattern, artifactFile);
   if (!resolvedArtifact.ok) {
@@ -481,6 +524,203 @@ function checkCompletionChecklist({ taskDir, config }) {
   return passResult("completion-checklist", `Completion Checklist valid (${items.length} items checked)`);
 }
+function parseLedgerRows(section) {
+  const rows = [];
+  for (const rawLine of String(section || "").split(/\r?\n/)) {
+    const line = rawLine.trim();
+    if (!line.startsWith("|")) {
+      continue;
+    }
+    if (/^\|[\s:|-]+\|?$/.test(line)) {
+      continue; // separator row
+    }
+    const inner = line.replace(/^\|/, "").replace(/\|$/, "");
+    const cells = inner.split("|").map((cell) => cell.trim());
+    if ((cells[0] || "").toLowerCase() === "id") {
+      continue; // header row
+    }
+    rows.push(cells);
+  }
+  return rows;
+}
+function resolveReviewSetting(config, key, fallback) {
+  if (config && config[key] !== undefined && config[key] !== null) {
+    return config[key];
+  }
+  const reviewConfig = loadReviewConfig();
+  if (reviewConfig[key] !== undefined && reviewConfig[key] !== null) {
+    return reviewConfig[key];
+  }
+  return fallback;
+}
+function checkReviewLedger({ taskDir, config }) {
+  const task = loadTask(taskDir);
+  if (!task.ok) {
+    return failResult("review-ledger", task.message);
+  }
+  const section = getSectionContent(task.content, LEDGER_SECTION_NAMES);
+  if (!section.trim()) {
+    return passResult("review-ledger", "No disagreement ledger section; treated as no open disagreements");
+  }
+  const rows = parseLedgerRows(section);
+  if (rows.length === 0) {
+    return passResult("review-ledger", "Disagreement ledger has no entries");
+  }
+  const stageScope = Array.isArray(config.stage_scope) ? config.stage_scope : null;
+  const maxRounds = Number(resolveReviewSetting(config, "max_handshake_rounds", DEFAULT_MAX_HANDSHAKE_ROUNDS));
+  const problems = [];
+  let inScopeCount = 0;
+  for (const cells of rows) {
+    if (cells.length < 6) {
+      problems.push(`malformed row (expected 6 columns): ${cells.join(" | ")}`);
+      continue;
+    }
+    const [id, stage, roundRaw, , status, evidence] = cells;
+    const stageScoped = stageScope ? stageScope.includes(stage) : true;
+    // post-review-commit exemption rows are consumed by the post-review-commit
+    // check, not enforced here.
+    if (stage === POST_REVIEW_COMMIT_STAGE) {
+      continue;
+    }
+    if (!stageScoped) {
+      continue;
+    }
+    inScopeCount += 1;
+    if (!LEDGER_STATUSES.has(status)) {
+      problems.push(`${id}: illegal status '${status}'`);
+      continue;
+    }
+    if (status !== "open" && evidence === "") {
+      problems.push(`${id}: status '${status}' requires evidence`);
+    }
+    const round = Number.parseInt(roundRaw, 10);
+    if (
+      Number.isFinite(round) &&
+      round >= maxRounds &&
+      !LEDGER_TERMINAL_OK.has(status) &&
+      status !== "needs-human-decision"
+    ) {
+      problems.push(`${id}: round ${round} reached limit ${maxRounds} without convergence; escalate to needs-human-decision`);
+    }
+    if (!LEDGER_TERMINAL_OK.has(status)) {
+      problems.push(`${id}: unresolved (status '${status}')`);
+    }
+  }
+  if (problems.length > 0) {
+    return failResult("review-ledger", `Unclosed/invalid disagreements: ${problems.join("; ")}`);
+  }
+  const scopeLabel = stageScope ? ` for stages [${stageScope.join(", ")}]` : "";
+  return passResult("review-ledger", `Disagreement ledger clean (${inScopeCount} in-scope entries terminal${scopeLabel})`);
+}
+function checkPostReviewCommit({ taskDir, config }) {
+  const reviewArtifact = findAuthoritativeReviewCodeArtifact(taskDir);
+  if (!reviewArtifact.ok) {
+    return passResult("post-review-commit", "No review-code artifact; check inactive");
+  }
+  let gitRoot;
+  try {
+    gitRoot = execFileSync("git", ["-C", taskDir, "rev-parse", "--show-toplevel"], { encoding: "utf8" }).trim();
+  } catch {
+    return blockedResult("post-review-commit", "git unavailable or task directory is not inside a git repository");
+  }
+  const task = loadTask(taskDir);
+  const content = fs.readFileSync(reviewArtifact.path, "utf8");
+  const reviewBaseline = extractReviewBaseline(content);
+  const lastReviewedCommit = task.ok ? (task.metadata.last_reviewed_commit || "").trim() : "";
+  const baselineSource = resolvePostReviewBaseline({
+    gitRoot,
+    lastReviewedCommit,
+    reviewBaseline,
+    reviewArtifact: reviewArtifact.fileName
+  });
+  if (!baselineSource.ok) {
+    return baselineSource.result;
+  }
+  const sha = baselineSource.sha;
+  const globs = resolvePostReviewGlobs(config, loadReviewConfig());
+  let commits;
+  try {
+    const out = execFileSync("git", ["-C", gitRoot, "rev-list", `${sha}..HEAD`, "--", ...globs], { encoding: "utf8" });
+    commits = out.split(/\r?\n/).filter((line) => line.trim() !== "");
+  } catch {
+    return blockedResult("post-review-commit", `git rev-list failed for baseline ${sha}; manual inspection required`);
+  }
+  if (commits.length === 0) {
+    return passResult("post-review-commit", `No post-review commits to code/rule paths since ${sha.slice(0, 8)}`);
+  }
+  const ledgerSection = task.ok ? getSectionContent(task.content, LEDGER_SECTION_NAMES) : "";
+  const exempt = parseLedgerRows(ledgerSection).some(
+    (cells) => cells[1] === POST_REVIEW_COMMIT_STAGE && cells[4] === "human-decided"
+  );
+  if (exempt) {
+    return passResult(
+      "post-review-commit",
+      `${commits.length} post-review commit(s) covered by a human-decided exemption`
+    );
+  }
+  return failResult(
+    "post-review-commit",
+    `${commits.length} commit(s) to code/rule paths after review baseline ${sha.slice(0, 8)}; re-run review-code or record a human-decided exemption`
+  );
+}
+function resolvePostReviewBaseline({ gitRoot, lastReviewedCommit, reviewBaseline, reviewArtifact }) {
+  if (lastReviewedCommit) {
+    if (SHA_PATTERN.test(lastReviewedCommit) && gitCommitExists(gitRoot, lastReviewedCommit)) {
+      return { ok: true, sha: lastReviewedCommit };
+    }
+  }
+  if (!reviewBaseline) {
+    return {
+      ok: false,
+      result: passResult(
+        "post-review-commit",
+        `${reviewArtifact} predates baseline-commit anchoring; skipped (legacy artifact)`,
+        [`${reviewArtifact} has no 审查基线提交 / Review Baseline Commit field`]
+      )
+    };
+  }
+  if (!SHA_PATTERN.test(reviewBaseline)) {
+    return {
+      ok: false,
+      result: blockedResult(
+        "post-review-commit",
+        `${reviewArtifact} has an empty or malformed 审查基线提交 SHA ('${reviewBaseline}'); manual remediation required`
+      )
+    };
+  }
+  return { ok: true, sha: reviewBaseline };
+}
+function gitCommitExists(gitRoot, sha) {
+  try {
+    execFileSync("git", ["-C", gitRoot, "cat-file", "-e", `${sha}^{commit}`], { encoding: "utf8" });
+    return true;
+  } catch {
+    return false;
+  }
+}
 // === File & Config Loaders ===
 function loadVerifyConfig(skillName) {

package/templates/.agents/skills/analyze-task/SKILL.en.md CHANGED Viewed

@@ -65,9 +65,49 @@ If `task.md` contains these source fields, also read the corresponding source in
 - `codescan_alert_number` - Code Scanning alert
 - `security_alert_number` - Dependabot alert
-**Round ≥ 2: respond to the prior review (only when a review artifact exists)**: if the task directory contains `review-analysis.md` / `review-analysis-r{N}.md`, read the highest-round review report; add a `## Response to Prior Review` section to this round's analysis artifact, and for each finding verify it via Read/Grep before acting (holds → accept and fix; judged hallucinated/unfounded → rebut with counter-evidence rather than defaulting to compliance); record any open disagreement under `## Open Questions`. Round 1 has no review, so skip this section.
+**Round ≥ 2: respond to the prior review (only when a review artifact exists)**: if the task directory contains `review-analysis.md` / `review-analysis-r{N}.md`, read the highest-round review report; add a `## Response to Prior Review` section to this round's analysis artifact, and for each finding verify it via Read/Grep, then dispose of it with one of the four states in `.agents/rules/review-handshake.md` (`accepted` / `adjusted` / `refuted` / `cannot-judge`) — every state needs commensurate evidence, never defaulting to compliance; write the disposition back to the matching row in the task.md disagreement ledger (stage=analysis, round +1). Record any open disagreement under `## Open Questions`. Round 1 has no review, so skip this section.
-### 4. Perform Requirements Analysis
+### 4. Requirement Sufficiency Gate
+> Questions in this step are authorized by `.agents/rules/no-mid-flow-questions.md` "Exemption 3: Entry-point requirement-sufficiency clarification": only at the analyze-task entry point, only to judge and fill requirement sufficiency, one question at a time, and **never** to solicit implementation / technical-choice preferences.
+Runs after Step 0 state check and Step 3 (questioning is an external-state action and must come after the state-check hard gate; the judgment and state read/write need task.md first).
+**4.1 Read cross-round state**: read the `## Brainstorming` section of task.md (treat as first time when absent, `question_count=0`). Section format:
+```
+## Brainstorming
+- status: asking | done
+- question_count: <int>
+- pending_question: <text, may be empty>
+- answered:
+  - Q: … / A: …
+```
+**4.2 Receive the answer to the previous question**: if `pending_question` exists:
+- the user's current message yields an answer → write the answer back into `## Description` / `## Requirements`, append that `Q/A` to `answered`, and clear `pending_question` (`question_count` unchanged).
+- no answer carried → restate `pending_question` and take Scenario B early-exit below (do not increase `question_count`).
+**4.3 Sufficiency judgment** (objective checklist; any gap hit means insufficient):
+- description/requirements empty, or a single sentence with no verifiable acceptance criteria;
+- missing goal or impact scope (unclear what to change / who is affected);
+- requirement items contradict each other, or key terms are undefined and block analysis.
+**4.4 Branch**:
+- **Scenario A (sufficient / converged)** — any exit condition met: the checklist fully passes / the user explicitly says "just analyze / skip" / `question_count` reaches the cap (≤5). Set `## Brainstorming` `status: done` and continue the normal flow from step 5; write any remaining gaps into the analysis artifact `## Assumptions` / `## Open Questions`.
+- **Scenario B (insufficient, ask and early-exit)** — close the loop within this step and STOP early:
+  1. Decide this round's question (consistent with 4.2):
+     - if a `pending_question` already exists (the previous question is still unanswered) → restate that `pending_question`, do **not** modify it and do **not** increment `question_count`;
+     - otherwise (no pending question) → pick the single highest-value question (acceptance criteria > scope > ambiguity) and write `## Brainstorming`: `status: asking`, `pending_question: <question>`, `question_count += 1`.
+  2. Update frontmatter: `current_step: requirement-analysis`, `assigned_to`, `updated_at`, `agent_infra_version` (read `.agents/rules/version-stamp.md` first).
+  3. Append to Activity Log: `- {YYYY-MM-DD HH:mm:ss±HH:MM} — **Analyze Task (Brainstorming)** by {agent} — Asked Q{question_count}, awaiting answer`.
+  4. Issue sync (when `issue_number` exists, skip on any failure): read `.agents/rules/issue-sync.md` first for upstream / permission detection; update only the **task comment** per the task.md comment sync rule; keep the `status` label at `pending-design-work`; do **not** publish an analysis artifact comment.
+  5. Verification (replaces the step 8 artifact gate): `node .agents/scripts/validate-artifact.js check task-meta .agents/workspace/active/{task-id} --skill analyze-task --format text` (the early-exit set `current_step: requirement-analysis`, so it should pass); also keep `rg -n 'Analyze Task \(Brainstorming\)' .agents/workspace/active/{task-id}/task.md` and the task-comment sync evidence. Do **not** run the artifact gate, nor `check activity-log` / `check platform-sync` (both bind to the analysis artifact path).
+  6. User output: show only the current **single question** plus how to answer/continue (re-trigger `analyze-task {task-ref}` with the answer), and append the `Completed at` line per `.agents/rules/next-step-output.md`.
+  7. **STOP** and wait for the answer. The next trigger returns to this step.
+### 5. Perform Requirements Analysis
 Before analysis begins: if `start_date` in the frontmatter is empty, write today's date immediately (command: `date +%F`, format `YYYY-MM-DD`); keep any existing value. Before writing, read `.agents/rules/version-stamp.md` and refresh `updated_at` / `agent_infra_version` at the same time.
@@ -80,7 +120,9 @@ Follow the `analysis` step in `.agents/workflows/feature-development.yaml`:
 - [ ] Identify potential technical risks and dependencies
 - [ ] Assess effort and complexity
-### 5. Output Analysis Document
+### 6. Output Analysis Document
+> Steps 6–9 are the **Scenario A (normal output)** path. **Scenario B (ask and early-exit)** already finished its state update, task-comment sync, and verification inside step 4 and STOPped, so it does not enter these steps.
 Create `.agents/workspace/active/{task-id}/{analysis-artifact}`.
@@ -138,7 +180,7 @@ Create `.agents/workspace/active/{task-id}/{analysis-artifact}`.
 - Risk level: {High/Medium/Low}
 ```
-### 6. Update Task Status
+### 7. Update Task Status
 Get the current time:
@@ -170,7 +212,9 @@ If task.md contains a valid `issue_number`, perform these sync actions (skip and
 - Publish the `{analysis-artifact}` comment
 - Read `.agents/rules/issue-fields.md` and follow Flow A to sync every non-empty Issue field (`priority`/`effort`/`start_date`/`target_date`) from `task.md` to the Issue (idempotent; skip without blocking when `has_push=false` or the fetch/write fails)
-### 7. Verification Gate
+### 8. Verification Gate
+> This artifact gate is for **Scenario A** only; Scenario B's verification is in step 4 (`check task-meta` + explicit evidence), not the artifact gate here.
 Run the verification gate to confirm the task artifact and sync state are valid:
@@ -185,7 +229,9 @@ Handle the result as follows:
 Keep the gate output in your reply as fresh evidence. Do not claim completion without output from this run.
-### 8. Inform User
+### 9. Inform User
+> This step is the **Scenario A** normal-completion output; Scenario B's single-question output is in step 4.
 > Execute this step only after the verification gate passes.

package/templates/.agents/skills/analyze-task/SKILL.zh-CN.md CHANGED Viewed

@@ -64,9 +64,49 @@ tail .agents/workspace/active/{task-id}/task.md
 - `codescan_alert_number` - Code Scanning 告警
 - `security_alert_number` - Dependabot 告警
-**Round ≥ 2：响应上一轮审查（仅当存在审查产物时）**：若任务目录存在 `review-analysis.md` / `review-analysis-r{N}.md`，读取最高轮次的审查报告；在本轮分析产物中新增 `## 对上一轮审查的响应` 段，对每条发现先 Read/Grep 核实再处置（成立 → 接受并修正；判定为幻觉/不成立 → 附反证反驳，不默认顺从），未决分歧写入 `## 未决问题`。Round 1 无审查，跳过本段。
+**Round ≥ 2：响应上一轮审查（仅当存在审查产物时）**：若任务目录存在 `review-analysis.md` / `review-analysis-r{N}.md`，读取最高轮次的审查报告；在本轮分析产物中新增 `## 对上一轮审查的响应` 段，对每条发现先 Read/Grep 核实，再按 `.agents/rules/review-handshake.md` 的四态（`accepted` / `adjusted` / `refuted` / `cannot-judge`）处置——每态都要附相称证据，不默认顺从；并把处置回写 task.md `## 审查分歧账本` 对应行（stage=analysis，round +1）。未决分歧写入 `## 未决问题`。Round 1 无审查，跳过本段。
-### 4. 执行需求分析
+### 4. 入口需求充分性闸门
+> 本步骤的发问受 `.agents/rules/no-mid-flow-questions.md`「例外 3：入口式需求充分性澄清」授权：仅在 analyze-task 入口、仅用于判断并补齐需求充分性，一次只问一个问题，**绝不**借此征求实现 / 技术选型偏好。
+排在第 0 步状态核对与步骤 3 之后执行（提问属对外动作，须在状态核对硬闸门之后；判定与状态读写需先读到 task.md）。
+**4.1 读取跨轮状态**：读取 task.md 的 `## Brainstorming` 段（不存在则视为首次，`question_count=0`）。段格式：
+```
+## Brainstorming
+- status: asking | done
+- question_count: <int>
+- pending_question: <文本，可空>
+- answered:
+  - Q: … / A: …
+```
+**4.2 接收上一问的答案**：若存在 `pending_question`：
+- 用户当轮消息可解析出答案 → 把答案回写 `## 描述` / `## 需求`，把该 `Q/A` 追加进 `answered`，清空 `pending_question`（`question_count` 不变）。
+- 未携带答案 → 复述 `pending_question`，按下文场景 B 提问早退（不增加 `question_count`）。
+**4.3 充分性判定**（客观清单，命中任一缺口即判为不足）：
+- 描述/需求为空，或仅一句话且无可验证的验收标准；
+- 缺少目标或受影响范围（不知道要改什么 / 影响谁）；
+- 需求条目自相矛盾，或关键名词未定义而无法分析。
+**4.4 分流**：
+- **场景 A（充分 / 已收敛）**——满足任一退出条件：充分性清单全部通过 / 用户显式「直接分析 / skip」/ `question_count` 达上限（≤5）。置 `## Brainstorming` 的 `status: done`，继续步骤 5 起的正常流程；未补齐的缺口写入分析产物 `## 假设` / `## 未决问题`。
+- **场景 B（不足，提问早退）**——在本步骤内闭环并提前 STOP：
+  1. 确定本轮要问的问题（与 4.2 保持一致）：
+     - 若已存在 `pending_question`（上一问尚未得到答案）→ 复述该 `pending_question`，**不**修改它、**不**增加 `question_count`；
+     - 否则（无待答问题）→ 选最高价值的一个问题（验收标准 > 范围 > 歧义），写入 `## Brainstorming`：`status: asking`、`pending_question: <问题>`、`question_count += 1`。
+  2. 更新 frontmatter：`current_step: requirement-analysis`、`assigned_to`、`updated_at`、`agent_infra_version`（先读 `.agents/rules/version-stamp.md`）。
+  3. 追加 Activity Log：`- {YYYY-MM-DD HH:mm:ss±HH:MM} — **Analyze Task (Brainstorming)** by {agent} — Asked Q{question_count}, awaiting answer`。
+  4. Issue 同步（存在 `issue_number` 时，任一失败跳过）：先读 `.agents/rules/issue-sync.md` 完成 upstream / 权限检测；仅按 task.md 评论同步规则更新 **task 评论**；`status` label 维持 `pending-design-work`；**不**发布分析产物评论。
+  5. 校验（替代步骤 8 的 artifact gate）：`node .agents/scripts/validate-artifact.js check task-meta .agents/workspace/active/{task-id} --skill analyze-task --format text`（早退已置 `current_step: requirement-analysis`，预期通过）；并保留 `rg -n 'Analyze Task \(Brainstorming\)' .agents/workspace/active/{task-id}/task.md` 与 task 评论同步证据。**不**跑 artifact gate，也不跑 `check activity-log` / `check platform-sync`（二者绑定分析产物路径）。
+  6. 用户输出：只展示当前**单个问题** + 如何回答/继续（再次触发 `analyze-task {task-ref}` 并附答案），并按 `.agents/rules/next-step-output.md` 在末行追加 `Completed at`。
+  7. **STOP**，等待回答。下一次触发回到本步骤。
+### 5. 执行需求分析
 开始分析前：若 frontmatter 的 `start_date` 为空，立即写入当日日期（命令 `date +%F`，格式 `YYYY-MM-DD`）；已有值则保留。写入前先读取 `.agents/rules/version-stamp.md`，并同步刷新 `updated_at` / `agent_infra_version`。
@@ -79,7 +119,9 @@ tail .agents/workspace/active/{task-id}/task.md
 - [ ] 识别潜在技术风险和依赖
 - [ ] 评估工作量和复杂度
-### 5. 输出分析文档
+### 6. 输出分析文档
+> 步骤 6–9 属**场景 A（正常产出）**路径。**场景 B（提问早退）**已在步骤 4 内完成状态更新、task 评论同步与校验并 STOP，不进入这些步骤。
 创建 `.agents/workspace/active/{task-id}/{analysis-artifact}`。
@@ -137,7 +179,7 @@ tail .agents/workspace/active/{task-id}/task.md
 - 风险等级：{高/中/低}
 ```
-### 6. 更新任务状态
+### 7. 更新任务状态
 获取当前时间：
@@ -169,7 +211,9 @@ date "+%Y-%m-%d %H:%M:%S%:z"
 - 发布 `{analysis-artifact}` 评论
 - 读取 `.agents/rules/issue-fields.md`，按流程 A 把 `task.md` 中所有非空的 Issue 字段（`priority`/`effort`/`start_date`/`target_date`）同步到 Issue（幂等；`has_push=false` 或取数/写入失败时跳过，不阻断）
-### 7. 完成校验
+### 8. 完成校验
+> 本步骤的 artifact gate 仅用于**场景 A**；场景 B 的校验见步骤 4（`check task-meta` + 显式证据），不在此跑 artifact gate。
 运行完成校验，确认任务产物和同步状态符合规范：
@@ -184,7 +228,9 @@ node .agents/scripts/validate-artifact.js gate analyze-task .agents/workspace/ac
 将校验输出保留在回复中作为当次验证输出。没有当次校验输出，不得声明完成。
-### 8. 告知用户
+### 9. 告知用户
+> 本步骤为**场景 A** 正常完成输出；场景 B 的单问输出见步骤 4。
 > 仅在校验通过后执行本步骤。

package/templates/.agents/skills/code-task/config/verify.en.json CHANGED Viewed

@@ -35,6 +35,9 @@
       "expected_action_pattern": "(Code Task|Code) \\(Round \\d+(?:, fix for review-code(?:-r\\d+)?\\.md)?\\)",
       "freshness_minutes": 30
     },
+    "review-ledger": {
+      "stage_scope": ["analysis", "plan"]
+    },
     "platform-sync": {
       "when": "issue_number_exists",
       "expected_status_label": "status: in-progress",

package/templates/.agents/skills/code-task/config/verify.zh-CN.json CHANGED Viewed

@@ -35,6 +35,9 @@
       "expected_action_pattern": "(Code Task|Code) \\(Round \\d+(?:, fix for review-code(?:-r\\d+)?\\.md)?\\)",
       "freshness_minutes": 30
     },
+    "review-ledger": {
+      "stage_scope": ["analysis", "plan"]
+    },
     "platform-sync": {
       "when": "issue_number_exists",
       "expected_status_label": "status: in-progress",

package/templates/.agents/skills/code-task/reference/fix-mode.en.md CHANGED Viewed

@@ -4,9 +4,11 @@ Read this file before changing code during fix mode.
 ## Plan the Fixes
-**Verify each finding first (mandatory before editing)**: for every finding in `{review-artifact}`, Read/Grep the cited `file:line` and the corresponding `git diff` to confirm the issue is real:
-- Holds → include it in the classification and fixes below
-- Unfounded / based on a wrong `file:line` / hallucinated → do not change code; give a counter-argument in the report's `## Per-Finding Verification` section and record it under unresolved issues
+**Verify each finding first (mandatory before editing)**: for every finding in `{review-artifact}`, Read/Grep the cited `file:line` and the corresponding `git diff` to confirm the issue is real, then dispose of it with one of the four states in `.agents/rules/review-handshake.md`, and write the disposition + commensurate evidence back to the matching row in the task.md disagreement ledger (stage=code, round +1; symmetric evidence — every state needs evidence, "accept" is not a zero-cost default):
+- `accepted` → include it in the classification and fixes below; evidence cites the fix `file:line`
+- `adjusted` → use an alternative fix, with rationale; awaits review-code confirmation
+- `refuted` → verification judged it unfounded / a wrong `file:line` / hallucinated → do not change code; give a counter-argument in the report's `## Per-Finding Verification` section; awaits review-code confirmation
+- `cannot-judge` → insufficient evidence to decide; hand to reviewer/human
 - Do not expand fixes to issues the review did not list
 Classify and prioritize work:

package/templates/.agents/skills/code-task/reference/fix-mode.zh-CN.md CHANGED Viewed

@@ -4,9 +4,11 @@
 ## 规划修复
-**先逐条核实（动手前必做）**：对 `{review-artifact}` 的每一条发现，先 Read/Grep 其引用的 `file:line` 与对应 `git diff`，确认问题真实存在：
-- 成立 → 纳入下方分类与修复
-- 不成立 / 基于错误 `file:line` / 幻觉 → 不改代码，在报告 `## 对审查发现的逐条核实` 给出反证，并记入 unresolved issues
+**先逐条核实（动手前必做）**：对 `{review-artifact}` 的每一条发现，先 Read/Grep 其引用的 `file:line` 与对应 `git diff`，确认问题真实存在，再按 `.agents/rules/review-handshake.md` 的四态处置，并把处置 + 相称证据回写 task.md `## 审查分歧账本` 对应行（stage=code，round +1；对称证据：每态都要附证据，"接受"不是零成本默认）：
+- `accepted` → 纳入下方分类与修复，证据指向修复点 `file:line`
+- `adjusted` → 采用替代修法，附理由，待 review-code 复核确认
+- `refuted` → 核实判定不成立 / 基于错误 `file:line` / 幻觉 → 不改代码，在报告 `## 对审查发现的逐条核实` 给出反证，待 review-code 复核确认
+- `cannot-judge` → 证据不足无法判断，交检视方/人工
 - 不擅自把修复扩大到审查未列出的问题
 按以下顺序分类并确定优先级：

package/templates/.agents/skills/code-task/reference/report-template.en.md CHANGED Viewed

@@ -64,11 +64,11 @@ $ {command}
 ## Per-Finding Verification
-> Fix mode only; for an initial implementation write "(initial implementation this round, no review findings)". Read/Grep-verify each finding of the previous `review-code` before acting on it.
+> Fix mode only; for an initial implementation write "(initial implementation this round, no review findings)". Read/Grep-verify each finding of the previous `review-code`, then dispose of it with one of the four states in `.agents/rules/review-handshake.md`; write the disposition and **commensurate evidence** back to the matching row in the task.md disagreement ledger (stage=code, round +1). Symmetric evidence: accepted/adjusted cite the fix `file:line`; refuted/cannot-judge cite counter-evidence `file:line` or raw command output.
-| Finding | Reproduced? | Disposition (fix / rebut) |
-|------|----------|----------------------|
-| {finding} | {yes/no, with file:line or command} | {fix note, or counter-argument + recorded under unresolved} |
+| Finding | Disposition | Commensurate evidence |
+|------|----------|----------|
+| {finding} | {accepted / adjusted / refuted / cannot-judge} | {fix file:line, or counter-evidence file:line / raw command output} |
 ## Items for Review

package/templates/.agents/skills/code-task/reference/report-template.zh-CN.md CHANGED Viewed

@@ -64,11 +64,11 @@ $ {command}
 ## 对审查发现的逐条核实
-> 仅修复模式填写；初次实现写「（本轮为初次实现，无审查发现）」。对上一轮 `review-code` 的每条发现先 Read/Grep 核实再处置。
+> 仅修复模式填写；初次实现写「（本轮为初次实现，无审查发现）」。对上一轮 `review-code` 的每条发现先 Read/Grep 核实，再按 `.agents/rules/review-handshake.md` 的四态处置；并把处置与**相称证据**回写 task.md `## 审查分歧账本` 对应行（stage=code，round +1）。对称证据：accepted/adjusted 附修复点 file:line，refuted/cannot-judge 附反证 file:line 或命令原文。
-| 发现 | 是否复现 | 处置（修复 / 反驳） |
-|------|----------|----------------------|
-| {finding} | {是/否，附 file:line 或命令} | {修复说明，或反证 + 记入 unresolved} |
+| 发现 | 处置状态 | 相称证据 |
+|------|----------|----------|
+| {finding} | {accepted / adjusted / refuted / cannot-judge} | {修复点 file:line，或反证 file:line / 命令原文} |
 ## 供审查关注的内容