npm - @fitlab-ai/agent-infra - Versions diffs - 0.7.5 → 0.7.6 - Mend

@fitlab-ai/agent-infra 0.7.5 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/dist/lib/sandbox/commands/create.js CHANGED Viewed

@@ -820,6 +820,12 @@ function runEngineTaskCommand(engine, cmd, args, opts = {}) {
     const command = commandForEngine(engine, cmd, args);
     return runTaskCommand(command.cmd, command.args, opts);
 }
+// `docker run` args for mounting a tool's containerMount as an in-container
+// tmpfs. containerMount is an in-container path, so it is NOT engine-converted.
+export function buildTmpfsRunArgs(containerMount, tmpfs) {
+    const size = tmpfs.size ?? '512m';
+    return ['--tmpfs', `${containerMount}:rw,size=${size}`];
+}
 export function buildImage(config, tools, dockerfilePath, imageSignature, { engine, runFn = runEngine, runSafeFn = runSafeEngine, runVerboseFn = runVerboseEngine, env = process.env } = {}) {
     const selectedEngine = engine ?? detectEngine({ engine: config.engine });
     const { uid: hostUid, gid: hostGid } = resolveBuildUid({
@@ -1076,10 +1082,8 @@ export async function create(args) {
                             // The TUI reads <toolDir>/opencode.json via OPENCODE_CONFIG pinned in tools.js.
                             ensureOpenCodeModelInheritance(opencodeEntry.dir, effectiveConfig.home);
                         }
-                        const toolVolumes = effectiveResolvedTools.flatMap(({ tool, dir }) => [
-                            '-v',
-                            volumeArg(engine, dir, tool.containerMount)
-                        ]);
+                        const toolVolumes = effectiveResolvedTools.flatMap(({ tool, dir }) => tool.tmpfs ? [] : ['-v', volumeArg(engine, dir, tool.containerMount)]);
+                        const tmpfsArgs = effectiveResolvedTools.flatMap(({ tool }) => tool.tmpfs ? buildTmpfsRunArgs(tool.containerMount, tool.tmpfs) : []);
                         const workspaceDir = path.join(effectiveConfig.repoRoot, '.agents', 'workspace');
                         hostShellConfig = prepareHostShellConfig({
                             home: effectiveConfig.home,
@@ -1091,6 +1095,22 @@ export async function create(args) {
                             '-v',
                             volumeArg(engine, hostPath, containerPath, ':ro')
                         ]);
+                        // A tmpfs containerMount starts empty, so the config seeded into the
+                        // host dir before launch would be invisible in-container. Bind only
+                        // the explicitly declared seed entries (config.toml, model-catalogs)
+                        // back over the tmpfs as nested mounts — the same proven mechanism as
+                        // hostLiveMounts/auth.json, established at `docker run` time (no
+                        // post-start `docker cp`, which can land under a freshly-mounted
+                        // tmpfs instead of inside it). The allowlist is deliberate: any
+                        // runtime files left in the host dir (e.g. a stale logs_2.sqlite or
+                        // sessions/ from a previous bind-mount era) must NOT be re-mounted,
+                        // or the high-churn writes would land on the host SSD again.
+                        const tmpfsSeedVolumes = effectiveResolvedTools.flatMap(({ tool, dir }) => (tool.tmpfs?.seed ?? []).flatMap((entry) => {
+                            const hostPath = path.join(dir, entry);
+                            return fs.existsSync(hostPath)
+                                ? ['-v', volumeArg(engine, hostPath, path.posix.join(tool.containerMount, entry))]
+                                : [];
+                        }));
                         const liveMountVolumes = effectiveResolvedTools.flatMap(({ tool }) => (tool.hostLiveMounts ?? [])
                             .filter(({ hostPath }) => fs.existsSync(hostPath))
                             .flatMap(({ hostPath, containerSubpath }) => [
@@ -1133,6 +1153,8 @@ export async function create(args) {
                             volumeArg(engine, hostJoin(effectiveConfig.home, '.ssh'), '/home/devuser/.ssh', ':ro'),
                             ...dotfilesMount,
                             ...toolVolumes,
+                            ...tmpfsArgs,
+                            ...tmpfsSeedVolumes,
                             ...liveMountVolumes,
                             ...shellConfigVolumes,
                             ...envFile.dockerArgs,

package/dist/lib/sandbox/tools.js CHANGED Viewed

@@ -41,6 +41,12 @@ function createBuiltinTools(home, project) {
             containerMount: '/home/devuser/.codex',
             versionCmd: 'codex --version',
             setupHint: 'Run codex once inside the container and choose Device Code login if needed.',
+            // codex churns ~/.codex/logs_2.sqlite heavily (upstream openai/codex#24275);
+            // a bind-mount would write-amplify onto the host SSD via virtiofs. Mount the
+            // codex home as tmpfs so those logs stay in RAM and die with the container.
+            // Only the seeded config (config.toml, model-catalogs) is bound back over
+            // the tmpfs; runtime files like logs_2.sqlite must stay in RAM.
+            tmpfs: { size: '512m', seed: ['config.toml', 'model-catalogs'] },
             hostLiveMounts: [
                 { hostPath: hostJoin(home, '.codex', 'auth.json'), containerSubpath: 'auth.json' }
             ],
@@ -218,6 +224,18 @@ function parseHostLiveMounts(value, context) {
         };
     });
 }
+function parseTmpfs(value, context) {
+    if (value === undefined) {
+        return undefined;
+    }
+    if (!isPlainObject(value)) {
+        throw new Error(`${context}: field "tmpfs" must be an object when provided`);
+    }
+    return {
+        size: asOptionalNonEmptyString(value.size, 'tmpfs.size', context),
+        seed: asStringArray(value.seed, 'tmpfs.seed', context)
+    };
+}
 export function parseCustomTool(entry, index, options) {
     const context = `customTools[${index}]`;
     if (!isPlainObject(entry)) {
@@ -246,7 +264,8 @@ export function parseCustomTool(entry, index, options) {
         hostPreSeedDirs: parseHostPreSeedDirs(entry.hostPreSeedDirs, context),
         pathRewriteFiles: asStringArray(entry.pathRewriteFiles, 'pathRewriteFiles', context),
         hostLiveMounts: parseHostLiveMounts(entry.hostLiveMounts, context),
-        postSetupCmds: asStringArray(entry.postSetupCmds, 'postSetupCmds', context)
+        postSetupCmds: asStringArray(entry.postSetupCmds, 'postSetupCmds', context),
+        tmpfs: parseTmpfs(entry.tmpfs, context)
     };
     validateTool(tool);
     return tool;

package/dist/lib/task/commands/log.js CHANGED Viewed

@@ -3,12 +3,14 @@ import { formatTable } from "../../table.js";
 import { resolveTaskRef } from "../resolve-ref.js";
 const USAGE = `Usage: ai task log <N | #N | TASK-id>
-Renders a task's activity log as a chronological timeline table.
+Renders a task's activity log as a per-step status table. A step's start and
+completion are paired onto one row: STARTED holds the start time, DONE the
+completion time (or '(in progress)' while still running).
   <ref>   Bare numeric / '#N' short id, or a full TASK-YYYYMMDD-HHMMSS id.
-Columns: # (timeline position) / TIME / STEP / AGENT / NOTE
+Columns: # (row) / STEP / AGENT / STARTED / DONE / NOTE
 `;
-const TABLE_HEADERS = ['#', 'TIME', 'STEP', 'AGENT', 'NOTE'];
+const TABLE_HEADERS = ['#', 'STEP', 'AGENT', 'STARTED', 'DONE', 'NOTE'];
 // The activity-log H2 heading is language-dependent (zh template / en template).
 const HEADING_RE = /^##\s+(活动日志|Activity Log)\s*$/;
 const NEXT_H2_RE = /^##\s/;
@@ -16,6 +18,11 @@ const NEXT_H2_RE = /^##\s/;
 // (U+2014). STEP/AGENT are non-greedy so a note that itself contains ' — ' or
 // '→' is not mis-split; NOTE greedily takes the rest of the line.
 const ENTRY_RE = /^- (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2}) — \*\*(.+?)\*\* by (.+?) — (.*)$/;
+// A start marker reuses the normal entry grammar and only suffixes its action
+// with ` [started]`; the matching done entry carries the identical base action
+// without the suffix. Pairing therefore keys on the base action (including any
+// `(Round N)`), so every round and every repeated execution pairs on its own.
+const STARTED_SUFFIX_RE = /\s*\[started\]\s*$/;
 function parseActivityLog(content) {
     const lines = content.split('\n');
     let i = 0;
@@ -40,6 +47,41 @@ function parseActivityLog(content) {
     parsed.sort((a, b) => a.epoch - b.epoch || a.order - b.order);
     return { sectionFound: true, entries: parsed.map((p) => p.entry) };
 }
+// Collapse a chronological entry list into per-step rows: a `[started]` marker
+// opens a row, the next matching done entry fills it in place (FIFO per base
+// action). Started-only rows stay in flight; done-only entries (legacy logs with
+// no start marker) render as standalone rows. Result order = first-seen order,
+// which is already ascending because `entries` is sorted ascending.
+function pairEntries(entries) {
+    const rows = [];
+    const open = new Map();
+    for (const e of entries) {
+        const isStarted = STARTED_SUFFIX_RE.test(e.step);
+        const base = e.step.replace(STARTED_SUFFIX_RE, '');
+        if (isStarted) {
+            const row = { step: base, agent: e.agent, started: e.time, done: '', note: e.note };
+            rows.push(row);
+            const queue = open.get(base);
+            if (queue)
+                queue.push(row);
+            else
+                open.set(base, [row]);
+        }
+        else {
+            const pending = open.get(base)?.shift();
+            if (pending) {
+                // Done fills the open row; the done entry carries the meaningful note.
+                pending.done = e.time;
+                pending.agent = e.agent;
+                pending.note = e.note;
+            }
+            else {
+                rows.push({ step: base, agent: e.agent, started: '', done: e.time, note: e.note });
+            }
+        }
+    }
+    return rows;
+}
 function log(args = []) {
     if (args.length === 0 || args[0] === '--help' || args[0] === '-h') {
         process.stdout.write(USAGE);
@@ -65,11 +107,19 @@ function log(args = []) {
         process.exitCode = 1;
         return;
     }
-    const rows = entries.map((e, idx) => [String(idx + 1), e.time, e.step, e.agent, e.note]);
+    const steps = pairEntries(entries);
+    const rows = steps.map((s, idx) => [
+        String(idx + 1),
+        s.step,
+        s.agent,
+        s.started,
+        s.done || (s.started ? '(in progress)' : ''),
+        s.note
+    ]);
     for (const line of formatTable(TABLE_HEADERS, rows, { zebra: Boolean(process.stdout.isTTY) })) {
         process.stdout.write(`${line}\n`);
     }
-    process.stdout.write(`Total: ${entries.length} entries\n`);
+    process.stdout.write(`Total: ${steps.length} steps\n`);
 }
-export { log, parseActivityLog };
+export { log, parseActivityLog, pairEntries };
 //# sourceMappingURL=log.js.map

package/lib/sandbox/commands/create.ts CHANGED Viewed

@@ -1084,6 +1084,13 @@ function runEngineTaskCommand(engine: string, cmd: string, args: string[], opts:
   return runTaskCommand(command.cmd, command.args, opts);
 }
+// `docker run` args for mounting a tool's containerMount as an in-container
+// tmpfs. containerMount is an in-container path, so it is NOT engine-converted.
+export function buildTmpfsRunArgs(containerMount: string, tmpfs: { size?: string }): string[] {
+  const size = tmpfs.size ?? '512m';
+  return ['--tmpfs', `${containerMount}:rw,size=${size}`];
+}
 export function buildImage(
   config: Pick<SandboxCreateConfig, 'project' | 'imageName' | 'repoRoot'> & { engine?: string | null },
   tools: SandboxTool[],
@@ -1397,10 +1404,12 @@ export async function create(args: string[]): Promise<void> {
               // The TUI reads <toolDir>/opencode.json via OPENCODE_CONFIG pinned in tools.js.
               ensureOpenCodeModelInheritance(opencodeEntry.dir, effectiveConfig.home);
             }
-            const toolVolumes = effectiveResolvedTools.flatMap(({ tool, dir }) => [
-              '-v',
-              volumeArg(engine, dir, tool.containerMount)
-            ]);
+            const toolVolumes = effectiveResolvedTools.flatMap(({ tool, dir }) =>
+              tool.tmpfs ? [] : ['-v', volumeArg(engine, dir, tool.containerMount)]
+            );
+            const tmpfsArgs = effectiveResolvedTools.flatMap(({ tool }) =>
+              tool.tmpfs ? buildTmpfsRunArgs(tool.containerMount, tool.tmpfs) : []
+            );
             const workspaceDir = path.join(effectiveConfig.repoRoot, '.agents', 'workspace');
             hostShellConfig = prepareHostShellConfig({
               home: effectiveConfig.home,
@@ -1412,6 +1421,24 @@ export async function create(args: string[]): Promise<void> {
               '-v',
               volumeArg(engine, hostPath, containerPath, ':ro')
             ]);
+            // A tmpfs containerMount starts empty, so the config seeded into the
+            // host dir before launch would be invisible in-container. Bind only
+            // the explicitly declared seed entries (config.toml, model-catalogs)
+            // back over the tmpfs as nested mounts — the same proven mechanism as
+            // hostLiveMounts/auth.json, established at `docker run` time (no
+            // post-start `docker cp`, which can land under a freshly-mounted
+            // tmpfs instead of inside it). The allowlist is deliberate: any
+            // runtime files left in the host dir (e.g. a stale logs_2.sqlite or
+            // sessions/ from a previous bind-mount era) must NOT be re-mounted,
+            // or the high-churn writes would land on the host SSD again.
+            const tmpfsSeedVolumes = effectiveResolvedTools.flatMap(({ tool, dir }) =>
+              (tool.tmpfs?.seed ?? []).flatMap((entry) => {
+                const hostPath = path.join(dir, entry);
+                return fs.existsSync(hostPath)
+                  ? ['-v', volumeArg(engine, hostPath, path.posix.join(tool.containerMount, entry))]
+                  : [];
+              })
+            );
             const liveMountVolumes = effectiveResolvedTools.flatMap(({ tool }) =>
               (tool.hostLiveMounts ?? [])
                 .filter(({ hostPath }) => fs.existsSync(hostPath))
@@ -1466,6 +1493,8 @@ export async function create(args: string[]): Promise<void> {
               volumeArg(engine, hostJoin(effectiveConfig.home, '.ssh'), '/home/devuser/.ssh', ':ro'),
               ...dotfilesMount,
               ...toolVolumes,
+              ...tmpfsArgs,
+              ...tmpfsSeedVolumes,
               ...liveMountVolumes,
               ...shellConfigVolumes,
               ...envFile.dockerArgs,

package/lib/sandbox/tools.ts CHANGED Viewed

@@ -19,6 +19,13 @@ export type SandboxTool = {
   pathRewriteFiles?: string[];
   hostLiveMounts?: Array<{ hostPath: string; containerSubpath: string }>;
   postSetupCmds?: string[];
+  // When set, containerMount is mounted as an in-container tmpfs (RAM) instead
+  // of bind-mounting the host config dir, keeping high-churn tool logs off the
+  // host disk. `seed` lists the host-dir entries (relative to the tool's config
+  // dir) to bind back over the tmpfs so seeded config stays visible — it is an
+  // explicit allowlist so runtime files (e.g. logs_2.sqlite, sessions) left in
+  // the host dir are NOT re-mounted, which would defeat the tmpfs.
+  tmpfs?: { size?: string; seed?: string[] };
 };
 type ToolsConfig = {
@@ -70,6 +77,12 @@ function createBuiltinTools(home: string, project: string): Record<string, Sandb
       containerMount: '/home/devuser/.codex',
       versionCmd: 'codex --version',
       setupHint: 'Run codex once inside the container and choose Device Code login if needed.',
+      // codex churns ~/.codex/logs_2.sqlite heavily (upstream openai/codex#24275);
+      // a bind-mount would write-amplify onto the host SSD via virtiofs. Mount the
+      // codex home as tmpfs so those logs stay in RAM and die with the container.
+      // Only the seeded config (config.toml, model-catalogs) is bound back over
+      // the tmpfs; runtime files like logs_2.sqlite must stay in RAM.
+      tmpfs: { size: '512m', seed: ['config.toml', 'model-catalogs'] },
       hostLiveMounts: [
         { hostPath: hostJoin(home, '.codex', 'auth.json'), containerSubpath: 'auth.json' }
       ],
@@ -259,6 +272,19 @@ function parseHostLiveMounts(value: unknown, context: string): SandboxTool['host
   });
 }
+function parseTmpfs(value: unknown, context: string): SandboxTool['tmpfs'] {
+  if (value === undefined) {
+    return undefined;
+  }
+  if (!isPlainObject(value)) {
+    throw new Error(`${context}: field "tmpfs" must be an object when provided`);
+  }
+  return {
+    size: asOptionalNonEmptyString(value.size, 'tmpfs.size', context),
+    seed: asStringArray(value.seed, 'tmpfs.seed', context)
+  };
+}
 export function parseCustomTool(
   entry: unknown,
   index: number,
@@ -294,7 +320,8 @@ export function parseCustomTool(
     hostPreSeedDirs: parseHostPreSeedDirs(entry.hostPreSeedDirs, context),
     pathRewriteFiles: asStringArray(entry.pathRewriteFiles, 'pathRewriteFiles', context),
     hostLiveMounts: parseHostLiveMounts(entry.hostLiveMounts, context),
-    postSetupCmds: asStringArray(entry.postSetupCmds, 'postSetupCmds', context)
+    postSetupCmds: asStringArray(entry.postSetupCmds, 'postSetupCmds', context),
+    tmpfs: parseTmpfs(entry.tmpfs, context)
   };
   validateTool(tool);

package/lib/task/commands/log.ts CHANGED Viewed

@@ -4,13 +4,15 @@ import { resolveTaskRef } from '../resolve-ref.ts';
 const USAGE = `Usage: ai task log <N | #N | TASK-id>
-Renders a task's activity log as a chronological timeline table.
+Renders a task's activity log as a per-step status table. A step's start and
+completion are paired onto one row: STARTED holds the start time, DONE the
+completion time (or '(in progress)' while still running).
   <ref>   Bare numeric / '#N' short id, or a full TASK-YYYYMMDD-HHMMSS id.
-Columns: # (timeline position) / TIME / STEP / AGENT / NOTE
+Columns: # (row) / STEP / AGENT / STARTED / DONE / NOTE
 `;
-const TABLE_HEADERS = ['#', 'TIME', 'STEP', 'AGENT', 'NOTE'] as const;
+const TABLE_HEADERS = ['#', 'STEP', 'AGENT', 'STARTED', 'DONE', 'NOTE'] as const;
 // The activity-log H2 heading is language-dependent (zh template / en template).
 const HEADING_RE = /^##\s+(活动日志|Activity Log)\s*$/;
@@ -23,6 +25,17 @@ const ENTRY_RE =
 type LogEntry = { time: string; step: string; agent: string; note: string };
+// One rendered row = one step instance. `started`/`done` are timestamps; an empty
+// `done` with a non-empty `started` means the step is still in flight, while an
+// empty `started` is a historical done-only entry (no start marker was written).
+type StepRow = { step: string; agent: string; started: string; done: string; note: string };
+// A start marker reuses the normal entry grammar and only suffixes its action
+// with ` [started]`; the matching done entry carries the identical base action
+// without the suffix. Pairing therefore keys on the base action (including any
+// `(Round N)`), so every round and every repeated execution pairs on its own.
+const STARTED_SUFFIX_RE = /\s*\[started\]\s*$/;
 function parseActivityLog(content: string): { sectionFound: boolean; entries: LogEntry[] } {
   const lines = content.split('\n');
   let i = 0;
@@ -44,6 +57,38 @@ function parseActivityLog(content: string): { sectionFound: boolean; entries: Lo
   return { sectionFound: true, entries: parsed.map((p) => p.entry) };
 }
+// Collapse a chronological entry list into per-step rows: a `[started]` marker
+// opens a row, the next matching done entry fills it in place (FIFO per base
+// action). Started-only rows stay in flight; done-only entries (legacy logs with
+// no start marker) render as standalone rows. Result order = first-seen order,
+// which is already ascending because `entries` is sorted ascending.
+function pairEntries(entries: LogEntry[]): StepRow[] {
+  const rows: StepRow[] = [];
+  const open = new Map<string, StepRow[]>();
+  for (const e of entries) {
+    const isStarted = STARTED_SUFFIX_RE.test(e.step);
+    const base = e.step.replace(STARTED_SUFFIX_RE, '');
+    if (isStarted) {
+      const row: StepRow = { step: base, agent: e.agent, started: e.time, done: '', note: e.note };
+      rows.push(row);
+      const queue = open.get(base);
+      if (queue) queue.push(row);
+      else open.set(base, [row]);
+    } else {
+      const pending = open.get(base)?.shift();
+      if (pending) {
+        // Done fills the open row; the done entry carries the meaningful note.
+        pending.done = e.time;
+        pending.agent = e.agent;
+        pending.note = e.note;
+      } else {
+        rows.push({ step: base, agent: e.agent, started: '', done: e.time, note: e.note });
+      }
+    }
+  }
+  return rows;
+}
 function log(args: string[] = []): void {
   if (args.length === 0 || args[0] === '--help' || args[0] === '-h') {
     process.stdout.write(USAGE);
@@ -70,11 +115,19 @@ function log(args: string[] = []): void {
     process.exitCode = 1;
     return;
   }
-  const rows = entries.map((e, idx) => [String(idx + 1), e.time, e.step, e.agent, e.note]);
+  const steps = pairEntries(entries);
+  const rows = steps.map((s, idx) => [
+    String(idx + 1),
+    s.step,
+    s.agent,
+    s.started,
+    s.done || (s.started ? '(in progress)' : ''),
+    s.note
+  ]);
   for (const line of formatTable(TABLE_HEADERS, rows, { zebra: Boolean(process.stdout.isTTY) })) {
     process.stdout.write(`${line}\n`);
   }
-  process.stdout.write(`Total: ${entries.length} entries\n`);
+  process.stdout.write(`Total: ${steps.length} steps\n`);
 }
-export { log, parseActivityLog };
+export { log, parseActivityLog, pairEntries };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@fitlab-ai/agent-infra",
-  "version": "0.7.5",
+  "version": "0.7.6",
   "description": "Bootstrap tool for AI multi-tool collaboration infrastructure — works with Claude Code, Codex, Gemini CLI, and OpenCode",
   "license": "MIT",
   "type": "module",

package/templates/.agents/rules/no-mid-flow-questions.en.md CHANGED Viewed

@@ -53,6 +53,17 @@ For every SKILL execution context not covered by any exemption above, the defaul
    - Meaning: the assumptions section records assumptions used for this run that may be revisited later; the open questions section records unresolved questions for human review
    - If the artifact template does not reserve these sections, append them as needed. If there are no assumptions or open questions, do not force empty sections.
+## Key Design Decision Marking And Ledgering
+When an open question is a key design decision that needs human judgment, the executor must mark the item with `[needs-human-decision]` and write the matching `HD-` row to task.md `## Review Disagreement Ledger` according to `.agents/rules/review-handshake.md`.
+Use these checks together:
+- **Source test**: can the conclusion be uniquely derived from the task description, existing requirements, code conventions, or an approved plan? If not, and multiple reasonable options exist, it is a choice.
+- **Impact test**: does the choice change scope, boundaries, defaults, thresholds, become irreversible / costly, or set precedent for later tasks? Any hit upgrades it to a key design decision.
+- **Small-impact exemption**: if it is only a local, reversible, low-cost execution detail, record it under `## Assumptions` instead of upgrading it to a human ruling.
+- **Fallback**: when unsure whether it is key, treat it as key; `review-*` must check whether the executor missed any `[needs-human-decision]` markings that should have been upgraded.
 ## Human Review Checkpoint Semantics
 A mandatory human review checkpoint means:

package/templates/.agents/rules/no-mid-flow-questions.zh-CN.md CHANGED Viewed

@@ -53,6 +53,17 @@
    - 含义：`假设` 段记录本次按某假设推进、未来若假设不成立可推翻；`未决问题` 段记录本次未决、需要人工审查时裁定的问题
    - 产物模板未预留这两段时，按需追加；没有假设或未决问题时不必强行写空段。
+## 关键设计决策标记与落账
+当未决问题属于需要人工裁定的关键设计决策时，执行方必须在该条目前标记 `[needs-human-decision]`，并按 `.agents/rules/review-handshake.md` 在 task.md `## 审查分歧账本` 写入 `HD-` 行。
+判定时同时使用以下检查：
+- **来源测试**：结论是否能从任务描述、既有需求、代码约定或已批准方案中唯一推出？若不能，且存在多个合理选项，则它是选择题。
+- **影响测试**：该选择是否改变范围、边界、默认值、阈值，是否不可逆 / 成本较高，或是否会扩散成后续任务先例？任一命中即升级为关键设计决策。
+- **小影响豁免**：若它只是局部、可逆、低成本的执行细节，写入 `## 假设` 即可，不升级为人工裁决。
+- **兜底**：无法判断是否关键时按关键处理；`review-*` 需要复核执行方是否漏标应升级的 `[needs-human-decision]`。
 ## 人工审查检查点语义
 「强制性人工审查检查点」（mandatory human review checkpoint）的语义是：

package/templates/.agents/rules/review-handshake.en.md CHANGED Viewed

@@ -58,13 +58,27 @@ The single source of truth for disagreement state is the fixed `## 审查分歧
 | CD-1 | code | 1 | blocker | open | review-code.md#1 |
 ```
-- `id`: stage prefix + ordinal — analysis→`AN-`, plan→`PL-`, code→`CD-`.
+- `id`: stage prefix + ordinal — analysis→`AN-`, plan→`PL-`, code→`CD-`; executor-raised human-ruling rows use `HD-`.
 - `stage` ∈ `{analysis, plan, code}` (plus the reserved value `post-review-commit`, used only for post-review exemption rows).
 - `status` legal enum: `open` / `accepted` / `adjusted` / `refuted` / `cannot-judge` / `confirmed` / `needs-human-decision` / `closed` / `human-decided`.
 - **Terminal set (gate passes)**: `{confirmed, closed, human-decided}`; everything else is blocking.
 - **Write responsibility**: `review-*` raises a finding → upsert an `open` row; `*-task` responds → set four-state and fill `evidence`, `round` +1; next `review-*` → `confirmed` / back to `open` / `needs-human-decision`; an executor fix verified by the next review → `closed`; a human ruling → `human-decided`.
 - **Backward compatible**: when task.md has no such section the gate treats it as no open disagreements and passes.
+### Executor-raised human-ruling rows
+When an executor marks an item in the artifact `## Open Questions` section as `[needs-human-decision]`, it must upsert the matching `HD-` row in task.md `## Review Disagreement Ledger`:
+```markdown
+| HD-1 | plan | - | decision | needs-human-decision | plan.md#HD-1 |
+```
+- `stage` is the stage where the decision arose: `analysis` / `plan` / `code`.
+- `round` is `-` because this is not a review-finding handshake round.
+- `severity` is always `decision`.
+- `status` starts as `needs-human-decision`, so the existing gate blocks it.
+- After a human records the ruling in task.md `## Human Rulings`, flip the matching `HD-` row to `human-decided` and point `evidence` to that ruling.
 ## post-review commit gate (code stage only)
 - The highest-round `review-code` report records `Review Baseline Commit` (R, `git rev-parse HEAD`) and `Reviewed Diff Fingerprint` (F, full worktree diff fingerprint).

package/templates/.agents/rules/review-handshake.zh-CN.md CHANGED Viewed

@@ -58,13 +58,27 @@
 | CD-1 | code | 1 | blocker | open | review-code.md#1 |
 ```
-- `id`：阶段前缀 + 序号——analysis→`AN-`、plan→`PL-`、code→`CD-`。
+- `id`：阶段前缀 + 序号——analysis→`AN-`、plan→`PL-`、code→`CD-`；执行方自提的人工裁决行使用 `HD-`。
 - `stage` ∈ `{analysis, plan, code}`（外加保留值 `post-review-commit`，仅用于 post-review 豁免行）。
 - `status` 合法枚举：`open` / `accepted` / `adjusted` / `refuted` / `cannot-judge` / `confirmed` / `needs-human-decision` / `closed` / `human-decided`。
 - **终态集合（gate 放行）**：`{confirmed, closed, human-decided}`；其余为阻塞态。
 - **写入责任**：`review-*` 提 finding → upsert `open` 行；`*-task` 响应 → 改四态并填 `evidence`、`round` +1；下一轮 `review-*` → `confirmed` / 置回 `open` / `needs-human-decision`；执行方修复经下一轮 review 验证通过 → `closed`；人工裁决 → `human-decided`。
 - **向后兼容**：task.md 无此段时，gate 视为无未决分歧而放行。
+### 执行方自提人工裁决行
+当执行方在产物 `## 未决问题` 中标记 `[needs-human-decision]` 时，必须在 task.md `## 审查分歧账本` upsert 对应 `HD-` 行：
+```markdown
+| HD-1 | plan | - | decision | needs-human-decision | plan.md#HD-1 |
+```
+- `stage` 填该决策产生的阶段：`analysis` / `plan` / `code`。
+- `round` 填 `-`，因为它不是 review finding 的握手轮次。
+- `severity` 固定填 `decision`。
+- `status` 初始填 `needs-human-decision`，因此会被现有 gate 阻塞。
+- 人工在 task.md `## 人工裁决` 段记录裁定后，把对应 `HD-` 行翻为 `human-decided`，`evidence` 指向该裁定记录。
 ## post-review commit 门禁（仅 code 阶段）
 - `review-code` 在最高轮报告中记录 `审查基线提交`（R，`git rev-parse HEAD`）和 `审查差异指纹`（F，完整工作区 diff fingerprint）。

package/templates/.agents/rules/task-management.en.md CHANGED Viewed

@@ -37,3 +37,28 @@ Map user intent to the corresponding workflow command:
 - `complete-task`: update `status`, `current_step`, `completed_at`, `updated_at`, `agent_infra_version`
 - `block-task`: update `status`, `blocked_at`, `blocked_reason`, `updated_at`, `agent_infra_version`
 - `cancel-task`: update `status`, `cancelled_at`, `cancel_reason`, `updated_at`, `agent_infra_version`
+## Activity Log started / done dual-marker convention (single source of truth)
+> This section is the sole authoritative definition of the started/done dual marker. The skills, the renderer (`lib/task/commands/log.ts`), and the validator (`.agents/scripts/validate-artifact.js`) all defer to it; keep this section in sync when changing any of them.
+**Line grammar is unchanged**: both started and done use the existing entry grammar `- {YYYY-MM-DD HH:mm:ss±HH:MM} — **{action}** by {agent} — {note}`, so the parsing regexes (`log.ts:ENTRY_RE` and `validate-artifact.js:ACTIVITY_LOG_PATTERN`) need no change.
+- **started line** (written when the step begins): the action suffixes the existing base with ` [started]`, note is `started`:
+  `- {time} — **{base} [started]** by {agent} — started`
+- **done line** (written when the step completes, unchanged from today): the action is the base itself:
+  `- {time} — **{base}** by {agent} — {completion summary}`
+- `{base}` is that skill's existing done action text, including `(Round {N})` (e.g. `Plan Task (Round 1)`). started and done must share the same `{base}` to pair.
+**Pairing and rendering** (`ai task log`): a started entry pairs with the next same-`{base}` done entry onto one row (repeated executions of the same base pair FIFO by ascending time). The STARTED column shows the start time, DONE the completion time; started with no done = in progress (DONE shows `(in progress)`); done with no started (legacy logs) = a standalone completed row. All three shapes are valid and never error.
+**Gate** (`checkActivityLog`): when computing the "latest action / freshness" it skips `[started]` lines (ascending-order and format checks still cover every line), so a started marker never satisfies a skill's `expected_action_pattern`.
+**Skills that write started**: every workflow skill that **appends entries to a task's `## Activity Log`** writes started, so the STARTED column stays uniformly complete across the whole `ai task log` table. Two forms, depending on whether task.md already exists:
+- **Standard form (task.md already exists)** — append the started line when that round's real work begins (after prerequisites, before the first artifact action) and the done line on completion:
+  `analyze-task`, `plan-task`, `code-task`, `review-analysis`, `review-plan`, `review-code`, `commit`, `complete-task`, `create-pr`, `watch-pr`, `block-task`, `cancel-task`, `restore-task`, `close-codescan`, `close-dependabot`.
+- **Deferred form (the skill creates task.md, so there is no file to write to at the start)** — capture `started_at` in memory before running, then when writing the Activity Log at the end, **append both lines at once** (started line uses `started_at`, done line uses the completion time):
+  `create-task`, `import-issue`, `import-codescan`, `import-dependabot`.
+**Exceptions**: read-only inspection skills that do not represent real progress (e.g. `check-task`) do not write started. A bare operation with no task.md context (e.g. a `commit` not tied to a task) likewise skips it.

package/templates/.agents/rules/task-management.zh-CN.md CHANGED Viewed

@@ -37,3 +37,32 @@
 - `complete-task`：更新 `status`、`current_step`、`completed_at`、`updated_at`、`agent_infra_version`
 - `block-task`：更新 `status`、`blocked_at`、`blocked_reason`、`updated_at`、`agent_infra_version`
 - `cancel-task`：更新 `status`、`cancelled_at`、`cancel_reason`、`updated_at`、`agent_infra_version`
+## Activity Log started / done 双标记约定（单一事实源）
+> 本节是 started/done 双标记的唯一权威定义。各 SKILL、渲染器（`lib/task/commands/log.ts`）、
+> 校验脚本（`.agents/scripts/validate-artifact.js`）的相关行为都以本节为准；改动任一端时同步本节。
+**行语法不变**：started 与 done 都沿用既有条目语法
+`- {YYYY-MM-DD HH:mm:ss±HH:MM} — **{action}** by {agent} — {note}`，因此解析正则
+（`log.ts:ENTRY_RE` 与 `validate-artifact.js:ACTIVITY_LOG_PATTERN`）无需改动。
+- **started 行**（步骤开始时写）：action 在既有基名末尾加后缀 ` [started]`，note 用 `started`：
+  `- {time} — **{基名} [started]** by {agent} — started`
+- **done 行**（步骤完成时写，与现状一致）：action 即基名本身：
+  `- {time} — **{基名}** by {agent} — {完成说明}`
+- `{基名}` 指该 SKILL 既有 done 条目的 action 文本，含 `(Round {N})`（如 `Plan Task (Round 1)`）。
+  started 与 done 共用同一 `{基名}` 才能配对。
+**配对与渲染**（`ai task log`）：按 `{基名}` 把 started 与其后最近的同名 done 配成一行（同基名多次执行按时间升序 FIFO 配对）。STARTED 列显示 started 时间、DONE 列显示 done 时间；只有 started 无 done = 进行中（DONE 显示 `(in progress)`）；只有 done 无 started（历史日志）= 单态完成行。三种形态都合法、不报错。
+**gate**（`checkActivityLog`）：计算「最新 action / freshness」时跳过 `[started]` 行（升序与格式校验仍覆盖全部行），故 started 标记不会污染各 SKILL 的 `expected_action_pattern`。
+**写 started 的 SKILL**：所有**会向某个任务的 `## 活动日志` 追加条目**的工作流 SKILL 都写 started，保证 `ai task log` 整张表的 STARTED 列一致完整。两种写法按技能是否已有 task.md 区分：
+- **常规写法（task.md 已存在）**——在「该轮实质工作开始时」（前置条件确认后、第一个产出动作前）追加 started 行，完成时写 done 行：
+  `analyze-task`、`plan-task`、`code-task`、`review-analysis`、`review-plan`、`review-code`、`commit`、`complete-task`、`create-pr`、`watch-pr`、`block-task`、`cancel-task`、`restore-task`、`close-codescan`、`close-dependabot`。
+- **延迟补写（本技能创建 task.md，开始时无文件可写）**——开始执行前先在内存记录 `started_at`，最后写活动日志时**一次性补两条**（started 行用 `started_at`、done 行用完成时间）：
+  `create-task`、`import-issue`、`import-codescan`、`import-dependabot`。
+**例外**：`check-task` 等只读巡检类、不代表实质工作推进的技能不写 started。无 task.md 上下文的纯操作（如无关联任务的 `commit`）同样跳过。

package/templates/.agents/scripts/validate-artifact.js CHANGED Viewed

@@ -38,6 +38,10 @@ const DEFAULT_FRESHNESS_MINUTES = 30;
 const DATE_TIME_PATTERN = /^\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:[+-]\d{2}:\d{2})?$/;
 const AGENT_INFRA_VERSION_PATTERN = /^v\d+\.\d+\.\d+(?:-[0-9A-Za-z.-]+)?(?:\+[0-9A-Za-z.-]+)?$/;
 const ACTIVITY_LOG_PATTERN = /^- (\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:[+-]\d{2}:\d{2})?) — \*\*(.+?)\*\* by (.+?) — (.+)$/;
+// Start markers (action suffixed with ` [started]`) are excluded from the
+// "latest action" / freshness computation so a step's in-flight marker never
+// satisfies a skill's expected_action_pattern; the matching done entry does.
+const ACTIVITY_LOG_STARTED_RE = /\s*\[started\]\s*$/;
 const BRANCH_SLUG_PATTERN = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
 // Review disagreement ledger (see .agents/rules/review-handshake.md).
@@ -462,8 +466,13 @@ function checkActivityLog({ taskDir, config }) {
     }
     previousTimestamp = timestamp;
-    latestTimestamp = timestamp;
-    latestAction = action;
+    // Ascending order is checked over every entry, but a `[started]` marker is
+    // not a terminal action: keep latestAction/latestTimestamp on the most
+    // recent done entry so expected_action_pattern and freshness see it.
+    if (!ACTIVITY_LOG_STARTED_RE.test(action)) {
+      latestTimestamp = timestamp;
+      latestAction = action;
+    }
   }
   if (config.expected_action_pattern && !new RegExp(config.expected_action_pattern).test(latestAction)) {

package/templates/.agents/skills/analyze-task/SKILL.en.md CHANGED Viewed

@@ -31,6 +31,16 @@ Before the state check is complete, do not make external-state assertions such a
 > If `{task-id}` matches `^[#]?[0-9]+$` (bare numeric or `#`-prefixed), follow the "SKILL parameter resolver" section of `.agents/rules/task-short-id.md`; treat `{task-id}` as the resolved full `TASK-YYYYMMDD-HHMMSS` form for every downstream command.
+## Step Start: Write the started Marker
+After prerequisites pass and before this round's first artifact action, append a started marker to task.md `## Activity Log` (same base action as this round's done entry plus a ` [started]` suffix, note `started`):
+```
+- {YYYY-MM-DD HH:mm:ss±HH:MM} — **Analyze Task (Round {N}) [started]** by {agent} — started
+```
+`ai task log` pairs it with the done entry written on completion (step 7) onto one row (in progress → done). Format and pairing rules: see the "Activity Log started / done dual-marker convention" in `.agents/rules/task-management.md`.
 ## Steps
 ### 1. Verify Prerequisites
@@ -172,6 +182,7 @@ Create `.agents/workspace/active/{task-id}/{analysis-artifact}`.
 ## Open Questions
 > If there are unresolved questions for human review, list them here; omit this section if there are none.
+> Mark key design decisions with `[needs-human-decision]` and write `HD-` ledger rows according to `.agents/rules/no-mid-flow-questions.md`.
 - {open question}