npm - @kontourai/flow-agents - Versions diffs - 2.0.1 → 2.1.1 - Mend

@kontourai/flow-agents 2.0.1 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/.github/actions/trust-verify/action.yml +4 -2
package/.github/workflows/ci.yml +16 -4
package/.github/workflows/docs-pages.yml +1 -1
package/.github/workflows/kit-gates-demo.yml +2 -2
package/.github/workflows/publish-npm.yml +2 -2
package/.github/workflows/runtime-compat.yml +2 -2
package/.github/workflows/trust-reconcile.yml +1 -1
package/CHANGELOG.md +28 -0
package/README.md +3 -3
package/build/src/cli/workflow-sidecar.js +8 -2
package/context/scripts/telemetry/lib/config.sh +15 -0
package/context/scripts/telemetry/telemetry.conf +4 -0
package/context/scripts/telemetry/telemetry.sh +23 -1
package/docs/design/flowrun-eventsourcing-design.md +216 -0
package/docs/design/workflowrun-observability-design.md +431 -0
package/evals/ci/antigaming-suite.sh +1 -0
package/evals/ci/run-baseline.sh +2 -0
package/evals/integration/test_command_log_concurrency.sh +114 -0
package/evals/integration/test_gate_lockdown.sh +21 -6
package/evals/integration/test_usage_cost.sh +119 -0
package/evals/integration/test_verify_cli.sh +23 -0
package/integrations/strands/flow_agents_strands/hooks.py +126 -1
package/integrations/strands/flow_agents_strands/telemetry.py +172 -0
package/integrations/strands/tests/test_usage.py +129 -0
package/integrations/strands-ts/src/hooks.ts +135 -1
package/integrations/strands-ts/src/telemetry.ts +170 -0
package/integrations/strands-ts/test/test-usage.ts +85 -0
package/package.json +2 -2
package/scripts/ci/trust-reconcile.js +7 -23
package/scripts/hooks/evidence-capture.js +85 -50
package/scripts/hooks/stop-goal-fit.js +18 -45
package/scripts/lib/command-log-chain.js +73 -0
package/scripts/repair-command-log.js +8 -15
package/scripts/telemetry/lib/config.sh +15 -0
package/scripts/telemetry/lib/pricing.sh +42 -0
package/scripts/telemetry/lib/usage.sh +108 -0
package/scripts/telemetry/pricing.golden.json +15 -0
package/scripts/telemetry/pricing.json +31 -0
package/scripts/telemetry/telemetry.conf +4 -0
package/scripts/telemetry/telemetry.sh +23 -1
package/src/cli/workflow-sidecar.ts +8 -2

package/integrations/strands-ts/src/hooks.ts CHANGED Viewed

@@ -125,6 +125,70 @@ function readKitFlows(flowAgentsDir: string): KitFlowEntry[] {
   return results;
 }
+// ---------------------------------------------------------------------------
+// Usage extraction — map a Strands model-call event onto the documented
+// Anthropic usage object, defensively across SDK shapes.
+// ---------------------------------------------------------------------------
+function asRecord(value: unknown): Record<string, unknown> | undefined {
+  return value && typeof value === "object" ? (value as Record<string, unknown>) : undefined;
+}
+function numField(obj: Record<string, unknown> | undefined, ...keys: string[]): number {
+  if (!obj) return 0;
+  for (const key of keys) {
+    const v = obj[key];
+    if (typeof v === "number" && Number.isFinite(v)) return v;
+  }
+  return 0;
+}
+function strField(obj: Record<string, unknown> | undefined, ...keys: string[]): string | undefined {
+  if (!obj) return undefined;
+  for (const key of keys) {
+    const v = obj[key];
+    if (typeof v === "string" && v) return v;
+  }
+  return undefined;
+}
+export function extractModelUsage(
+  event: StrandsEvent
+): { model: string; input: number; output: number; cacheCreation: number; cacheRead: number } | null {
+  // Find the usage object wherever the event surfaces it.
+  const containers = [
+    event,
+    asRecord(event.usage),
+    asRecord(event.response),
+    asRecord(event.result),
+    asRecord(event.message),
+    asRecord(event.output),
+    asRecord(event.modelResponse),
+  ];
+  let usage: Record<string, unknown> | undefined;
+  let modelCarrier: Record<string, unknown> | undefined;
+  for (const container of containers) {
+    const c = asRecord(container);
+    if (!c) continue;
+    const candidate = asRecord(c.usage) ?? (("input_tokens" in c || "inputTokens" in c) ? c : undefined);
+    if (candidate && !usage) usage = candidate;
+    if (!modelCarrier && (typeof c.model === "string" || typeof c.modelId === "string")) modelCarrier = c;
+  }
+  if (!usage) return null;
+  const input = numField(usage, "input_tokens", "inputTokens");
+  const output = numField(usage, "output_tokens", "outputTokens");
+  const cacheCreation = numField(usage, "cache_creation_input_tokens", "cacheCreationInputTokens");
+  const cacheRead = numField(usage, "cache_read_input_tokens", "cacheReadInputTokens");
+  if (input === 0 && output === 0 && cacheCreation === 0 && cacheRead === 0) return null;
+  const model =
+    strField(modelCarrier, "model", "modelId") ??
+    strField(usage, "model") ??
+    "unknown";
+  return { model, input, output, cacheCreation, cacheRead };
+}
 function buildKitFlowsHint(flows: KitFlowEntry[]): string {
   if (flows.length === 0) return "";
   const lines = ["KIT FLOWS: the following kit flows are activated for this workspace:"];
@@ -164,6 +228,11 @@ export class FlowAgentsHooks {
   private readonly policyGate: PolicyGate;
   private readonly _workspace: string;
   private _sessionStartMs: number | null = null;
+  // Per-model token accumulator, summed across model-call events for the session.
+  private _usageByModel = new Map<
+    string,
+    { input: number; output: number; cacheCreation: number; cacheRead: number }
+  >();
   constructor(options: FlowAgentsHooksOptions = {}) {
     this._workspace = findRepoRoot(options.workspace ?? process.cwd());
@@ -248,6 +317,15 @@ export class FlowAgentsHooks {
     registry.addCallback(AfterInvocationEvent, (event) => this.onAfterInvocation(event));
     registry.addCallback(BeforeToolCallEvent, (event) => this.onBeforeToolCall(event));
     registry.addCallback(AfterToolCallEvent, (event) => this.onAfterToolCall(event));
+    // AfterModelCallEvent carries per-call token usage (the SDK's documented
+    // usage source). Optional — only registered if the installed SDK exposes it,
+    // so older SDKs still work (usage is simply not collected there).
+    // eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/no-explicit-any
+    const AfterModelCallEvent = (require("strands-agents") as any).AfterModelCallEvent as EventClass | undefined;
+    if (AfterModelCallEvent) {
+      registry.addCallback(AfterModelCallEvent, (event) => this.onAfterModelCall(event));
+    }
   }
   // --------------------------------------------------------------------------
@@ -262,13 +340,68 @@ export class FlowAgentsHooks {
     this.sink.emitUserPromptSubmit();
   }
-  /** AfterInvocationEvent → stop / session.end */
+  /** AfterInvocationEvent → emit session.usage (if any) then stop / session.end */
   onAfterInvocation(_event: StrandsEvent): void {
     const durationMs =
       this._sessionStartMs !== null ? Date.now() - this._sessionStartMs : 0;
+    if (this._usageByModel.size > 0) {
+      const byModel = Array.from(this._usageByModel.entries()).map(([model, t]) => ({
+        model,
+        inputTokens: t.input,
+        outputTokens: t.output,
+        cacheCreationInputTokens: t.cacheCreation,
+        cacheReadInputTokens: t.cacheRead,
+      }));
+      const sum = byModel.reduce(
+        (acc, m) => ({
+          input: acc.input + m.inputTokens,
+          output: acc.output + m.outputTokens,
+          cacheCreation: acc.cacheCreation + m.cacheCreationInputTokens,
+          cacheRead: acc.cacheRead + m.cacheReadInputTokens,
+        }),
+        { input: 0, output: 0, cacheCreation: 0, cacheRead: 0 }
+      );
+      this.sink.emitUsage({
+        model: byModel.length === 1 ? byModel[0].model : undefined,
+        durationS: durationMs / 1000,
+        inputTokens: sum.input,
+        outputTokens: sum.output,
+        cacheCreationInputTokens: sum.cacheCreation,
+        cacheReadInputTokens: sum.cacheRead,
+        byModel,
+      });
+      this._usageByModel.clear();
+    }
     this.sink.emitSessionEnd(durationMs);
   }
+  /**
+   * AfterModelCallEvent → accumulate per-model token usage.
+   *
+   * Reads the documented Anthropic usage object (input_tokens, output_tokens,
+   * cache_creation_input_tokens, cache_read_input_tokens) from wherever the
+   * Strands event surfaces it. Defensive across SDK shapes — if no usage is
+   * found, the call is a no-op (tokens for that turn are simply not counted).
+   */
+  onAfterModelCall(event: StrandsEvent): void {
+    const extracted = extractModelUsage(event);
+    if (!extracted) return;
+    const { model, input, output, cacheCreation, cacheRead } = extracted;
+    const current = this._usageByModel.get(model) ?? {
+      input: 0,
+      output: 0,
+      cacheCreation: 0,
+      cacheRead: 0,
+    };
+    current.input += input;
+    current.output += output;
+    current.cacheCreation += cacheCreation;
+    current.cacheRead += cacheRead;
+    this._usageByModel.set(model, current);
+  }
   /**
    * BeforeToolCallEvent → preToolUse / tool.invoke + config-protection policy gate.
    *
@@ -307,6 +440,7 @@ export class FlowAgentsHooks {
   /** Call once after constructing / wiring to emit the agentSpawn event. */
   emitSessionStart(): void {
     this._sessionStartMs = Date.now();
+    this._usageByModel.clear();
     this.sink.emitSessionStart();
   }
 }

package/integrations/strands-ts/src/telemetry.ts CHANGED Viewed

@@ -12,8 +12,13 @@
 import fs from "node:fs";
 import path from "node:path";
+import { fileURLToPath } from "node:url";
 import { randomUUID } from "node:crypto";
+// ESM has no __dirname; derive it (this package is "type":"module").
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
 // ---------------------------------------------------------------------------
 // Strands TS → canonical event-name mapping
 // Mirrors STRANDS_TO_CANONICAL in integrations/strands/flow_agents_strands/telemetry.py
@@ -248,4 +253,169 @@ export class TelemetrySink {
   emitUserPromptSubmit(extra?: Record<string, unknown>): TelemetryEvent {
     return this.emit("userPromptSubmit", extra);
   }
+  /**
+   * Emit a `session.usage` event with real token counts + derived cost.
+   *
+   * The Strands SDK surfaces per-invocation usage on AfterModelCall /
+   * AfterInvocation events; accumulate those and pass the totals here at
+   * session end. Tokens are the source of truth; estimated_cost_usd is derived
+   * from PRICING (the console recomputes it authoritatively, so a pricing
+   * change is retroactive). Mirrors the `session.usage` shape emitted by
+   * scripts/telemetry/telemetry.sh so the console aggregates both identically.
+   */
+  emitUsage(usage: UsageInput): TelemetryEvent {
+    const event = this.buildBaseEvent("session.usage");
+    event.event_id = `${event.event_id}-usage`;
+    event.hook = { ...event.hook, event_name: "usage" };
+    const byModel = (usage.byModel ?? []).map((entry) => {
+      const tokens = normalizeTokens(entry);
+      return {
+        model: entry.model,
+        input_tokens: tokens.input,
+        output_tokens: tokens.output,
+        cache_creation_input_tokens: tokens.cacheCreation,
+        cache_read_input_tokens: tokens.cacheRead,
+        estimated_cost_usd: costForModel(entry.model, tokens)
+      };
+    });
+    const flat = normalizeTokens(usage);
+    const cost = byModel.length
+      ? round6(byModel.reduce((sum, m) => sum + m.estimated_cost_usd, 0))
+      : costForModel(usage.model, flat);
+    event.usage = {
+      model: usage.model ?? this.runtime,
+      duration_s: usage.durationS ?? null,
+      input_tokens: flat.input,
+      output_tokens: flat.output,
+      cache_creation_input_tokens: flat.cacheCreation,
+      cache_read_input_tokens: flat.cacheRead,
+      estimated_cost_usd: cost,
+      pricing_version: pricingVersion(),
+      by_model: byModel.length ? byModel : null
+    };
+    try {
+      fs.appendFileSync(this.logFile, JSON.stringify(event) + "\n", "utf8");
+    } catch {
+      // fail-open: telemetry must never block agent work
+    }
+    return event;
+  }
+}
+// ---------------------------------------------------------------------------
+// Usage / cost — mirror of scripts/telemetry/pricing.json (per 1M tokens, USD)
+// ---------------------------------------------------------------------------
+export interface TokenCounts {
+  inputTokens?: number;
+  outputTokens?: number;
+  cacheCreationInputTokens?: number;
+  cacheReadInputTokens?: number;
+}
+export interface UsageInput extends TokenCounts {
+  model?: string;
+  durationS?: number;
+  byModel?: Array<TokenCounts & { model: string }>;
+}
+interface NormalizedTokens {
+  input: number;
+  output: number;
+  cacheCreation: number;
+  cacheRead: number;
+}
+// Pricing is read from the single-source registry (scripts/telemetry/pricing.json),
+// never hand-maintained here. Resolution: TELEMETRY_PRICING_FILE /
+// FLOW_AGENTS_PRICING_FILE env path, else the repo-relative registry, else a
+// minimal fallback. Tokens are exact regardless; the console recomputes cost
+// authoritatively, so a missing file only degrades the sink's stamped estimate.
+interface PricingVersionBlock {
+  cache_multipliers: { write_5m: number; write_1h: number; read: number };
+  models: Record<string, { input: number; output: number }>;
+  default: { input: number; output: number };
+  zero_cost_models: string[];
+}
+interface PricingRegistry {
+  current_version: string;
+  versions: Record<string, PricingVersionBlock>;
+}
+const FALLBACK_REGISTRY: PricingRegistry = {
+  current_version: "fallback",
+  versions: {
+    fallback: {
+      cache_multipliers: { write_5m: 1.25, write_1h: 2.0, read: 0.1 },
+      models: {},
+      default: { input: 5.0, output: 25.0 },
+      zero_cost_models: ["<synthetic>", "synthetic", "unknown", ""]
+    }
+  }
+};
+let cachedRegistry: PricingRegistry | null = null;
+function loadRegistry(): PricingRegistry {
+  if (cachedRegistry) return cachedRegistry;
+  const candidates = [
+    process.env.TELEMETRY_PRICING_FILE,
+    process.env.FLOW_AGENTS_PRICING_FILE,
+    path.join(__dirname, "../../../scripts/telemetry/pricing.json"),
+    path.join(__dirname, "../../../../scripts/telemetry/pricing.json")
+  ].filter((p): p is string => Boolean(p));
+  for (const candidate of candidates) {
+    try {
+      const parsed = JSON.parse(fs.readFileSync(candidate, "utf8"));
+      if (parsed && typeof parsed.current_version === "string" && parsed.versions) {
+        cachedRegistry = parsed as PricingRegistry;
+        return cachedRegistry;
+      }
+    } catch {
+      // try next candidate
+    }
+  }
+  cachedRegistry = FALLBACK_REGISTRY;
+  return cachedRegistry;
+}
+function pricingVersion(): string {
+  return loadRegistry().current_version;
+}
+function num(value: number | undefined): number {
+  return typeof value === "number" && Number.isFinite(value) ? value : 0;
+}
+function round6(value: number): number {
+  return Math.round(value * 1_000_000) / 1_000_000;
+}
+function normalizeTokens(tokens: TokenCounts): NormalizedTokens {
+  return {
+    input: num(tokens.inputTokens),
+    output: num(tokens.outputTokens),
+    cacheCreation: num(tokens.cacheCreationInputTokens),
+    cacheRead: num(tokens.cacheReadInputTokens)
+  };
+}
+function costForModel(model: string | undefined, tokens: NormalizedTokens): number {
+  const registry = loadRegistry();
+  const block = registry.versions[registry.current_version] ?? FALLBACK_REGISTRY.versions.fallback;
+  const key = (model ?? "").trim();
+  if (block.zero_cost_models.includes(key)) return 0;
+  const rate = block.models[key] ?? block.default;
+  const cm = block.cache_multipliers;
+  return round6(
+    (tokens.input * rate.input +
+      tokens.output * rate.output +
+      tokens.cacheCreation * rate.input * cm.write_5m +
+      tokens.cacheRead * rate.input * cm.read) /
+      1_000_000
+  );
 }

package/integrations/strands-ts/test/test-usage.ts ADDED Viewed

@@ -0,0 +1,85 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import fs from "node:fs";
+import path from "node:path";
+import os from "node:os";
+import { fileURLToPath } from "node:url";
+import { TelemetrySink } from "../src/telemetry.js";
+import { extractModelUsage } from "../src/hooks.js";
+const here = path.dirname(fileURLToPath(import.meta.url));
+const tmpSink = () => new TelemetrySink({ workspace: fs.mkdtempSync(path.join(os.tmpdir(), "ts-usage-")) });
+test("emitUsage writes tokens + cost + pricing_version + by_model", () => {
+  const ev = tmpSink().emitUsage({
+    model: "claude-opus-4-8",
+    inputTokens: 1000,
+    outputTokens: 2000,
+    cacheReadInputTokens: 500000,
+    byModel: [{ model: "claude-opus-4-8", inputTokens: 1000, outputTokens: 2000, cacheReadInputTokens: 500000 }]
+  } as any);
+  const u = ev.usage as any;
+  assert.equal(u.input_tokens, 1000);
+  assert.equal(u.output_tokens, 2000);
+  assert.equal(u.cache_read_input_tokens, 500000);
+  assert.equal(u.pricing_version, "2026-06-28");
+  assert.equal(u.estimated_cost_usd, 0.305); // (1000*5 + 2000*25 + 500000*5*0.1)/1e6
+  assert.equal(u.by_model[0].model, "claude-opus-4-8");
+});
+test("emitUsage multi-model sums + prices each", () => {
+  const ev = tmpSink().emitUsage({
+    outputTokens: 2000,
+    byModel: [
+      { model: "claude-opus-4-8", outputTokens: 1000 },
+      { model: "claude-haiku-4-5", outputTokens: 1000 }
+    ]
+  } as any);
+  const u = ev.usage as any;
+  const costs: Record<string, number> = Object.fromEntries(u.by_model.map((m: any) => [m.model, m.estimated_cost_usd]));
+  assert.equal(costs["claude-opus-4-8"], 0.025);
+  assert.equal(costs["claude-haiku-4-5"], 0.005);
+  assert.equal(u.estimated_cost_usd, 0.03);
+});
+test("extractModelUsage reads usage from varied event shapes", () => {
+  assert.deepEqual(
+    extractModelUsage({ model: "claude-opus-4-8", usage: { input_tokens: 10, output_tokens: 20, cache_read_input_tokens: 30 } } as any),
+    { model: "claude-opus-4-8", input: 10, output: 20, cacheCreation: 0, cacheRead: 30 }
+  );
+  // camelCase + modelId
+  const camel = extractModelUsage({ modelId: "claude-haiku-4-5", usage: { inputTokens: 5, outputTokens: 6 } } as any);
+  assert.equal(camel?.model, "claude-haiku-4-5");
+  assert.equal(camel?.input, 5);
+  // nested response carrier
+  const nested = extractModelUsage({ response: { model: "claude-fable-5", usage: { output_tokens: 100 } } } as any);
+  assert.equal(nested?.model, "claude-fable-5");
+  assert.equal(nested?.output, 100);
+  // no usage / all-zero → null
+  assert.equal(extractModelUsage({ model: "x" } as any), null);
+  assert.equal(extractModelUsage({ model: "x", usage: { input_tokens: 0, output_tokens: 0 } } as any), null);
+});
+test("cross-runtime golden vectors (TS sink prices identically)", () => {
+  const candidates = [
+    path.join(here, "../../../../scripts/telemetry/pricing.golden.json"),
+    path.join(here, "../../../scripts/telemetry/pricing.golden.json"),
+    path.join(process.cwd(), "../../scripts/telemetry/pricing.golden.json")
+  ];
+  const file = candidates.find((p) => fs.existsSync(p));
+  assert.ok(file, "pricing.golden.json not found");
+  const golden = JSON.parse(fs.readFileSync(file!, "utf8"));
+  const sink = tmpSink();
+  for (const c of golden.cases) {
+    const ev = sink.emitUsage({
+      byModel: [{
+        model: c.model,
+        inputTokens: c.tokens.input,
+        outputTokens: c.tokens.output,
+        cacheCreationInputTokens: c.tokens.cache_creation,
+        cacheReadInputTokens: c.tokens.cache_read
+      }]
+    } as any);
+    assert.equal((ev.usage as any).estimated_cost_usd, c.expected_cost_usd, `golden ${c.name} (${c.model})`);
+  }
+});

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@kontourai/flow-agents",
-  "version": "2.0.1",
+  "version": "2.1.1",
   "description": "Flow Agents — a Kontour product that applies Flow and Veritas discipline as a portable process layer inside the agent tools you already use: Claude Code, Codex, Kiro, opencode, pi, and GitHub Actions — with framework adapters (AWS Strands preview) on the same policy-engine contract.",
   "keywords": [
     "agents",
@@ -141,7 +141,7 @@
     "typescript": "^6.0.3"
   },
   "dependencies": {
-    "@kontourai/flow": "~1.3.0"
+    "@kontourai/flow": "~1.4.1"
   },
   "optionalDependencies": {
     "hachure": "^0.5.1",

package/scripts/ci/trust-reconcile.js CHANGED Viewed

@@ -61,6 +61,10 @@ const { spawnSync } = require('child_process');
 const fs = require('fs');
 const os = require('os');
 const path = require('path');
+// One normative definition shared with scripts/hooks/stop-goal-fit.js — the local
+// copy here had drifted (it was missing the trailing `/bin/true` check), which is
+// exactly why this is now imported rather than duplicated.
+const { hasLaunderingOperator } = require('../lib/command-log-chain.js');
 // ---------------------------------------------------------------------------
 // Helpers
@@ -80,29 +84,9 @@ function isPassingValue(v) {
   return v === true || v === 1 || v === 'true' || v === 'pass';
 }
-/**
- * Returns true when a command string contains an exit-code-laundering operator.
- * These operators mask real exit codes so the real sub-command may have failed silently.
- *
- * Rules (applied to claimed verification commands only):
- *   - ANY || operator — verify commands must not contain ||. This catches:
- *     || exit 0, || echo ok, || /bin/true, || true, || :, etc.
- *   - ; or newline followed by true / : / exit 0 — trailing success injection
- *
- * NOTE: Logic must stay identical to scripts/hooks/stop-goal-fit.js hasLaunderingOperator.
- * Centralize into a shared module as a follow-up (coordinate-free duplication for now).
- */
-function hasLaunderingOperator(cmd) {
-  // Flag ANY || operator — masks the exit code of the left-hand command.
-  if (/\|\|/.test(cmd)) return true;
-  // Flag ; or newline followed by true / : / exit 0
-  if (/[;\n]\s*true\b/.test(cmd)) return true;
-  if (/[;\n]\s*:\s*(?:$|\s|;|\n)/.test(cmd)) return true;
-  if (/[;\n]\s*exit\s+0\b/.test(cmd)) return true;
-  // Flag pipe to true (pipeline absorbs exit code)
-  if (/\|\s*true\b/.test(cmd)) return true;
-  return false;
-}
+// hasLaunderingOperator is imported from ../lib/command-log-chain.js (above) so this
+// CI reconciler and the stop-goal-fit verifier apply the identical exit-code-mask
+// heuristic — see that module for the rules.
 /**
  * Run a single shell command under bash, capturing exit code.

package/scripts/hooks/evidence-capture.js CHANGED Viewed

@@ -62,43 +62,16 @@ const COMMAND_TOOL_NAME = /(^|[^a-z])(bash|shell|sh|exec|run|command|terminal|cm
 // ─── Hash-chain integrity (tamper-EVIDENCE) ───────────────────────────────────
 //
-// Genesis prevHash: a fixed arbitrary sentinel used when the log is empty or
-// the last entry has no _chain field (legacy record). This is NOT the SHA256 of
-// any specific input string — it is a fixed constant chosen for the original
-// implementation. (A previous comment incorrectly claimed it was
-// sha256("flow-agents:command-log:genesis"); that is wrong.)
-//
-// Writer (this file, CHAIN_GENESIS) and verifier (stop-goal-fit.js,
-// CHAIN_GENESIS_VERIFY) MUST use the same value. Do not change one without
-// changing the other — existing chained logs depend on this constant.
-//
-// HONEST FRAMING: this makes alteration DETECTABLE, not impossible. An agent
-// that rewrites all hashes can still forge the chain. The real tamper-proof
-// boundary is the signed checkpoint (B1). We do not oversell this boundary.
-const CHAIN_GENESIS = 'a3f9e2b7d5c84f1e6a0d2c3b9f7e1a4d8c6b5f2e9a0d3c7b1f4e8a2d6c0b9f3';
-/**
- * Stable canonical JSON for the chain input: the record WITHOUT the `_chain`
- * field, keys sorted alphabetically. This ensures the hash is independent of
- * key insertion order and that `_chain` itself does not contribute to its own
- * hash (circular dependency).
- */
-function canonicalJsonForChain(record) {
-  // Strip _chain if present (should not be, but defensive).
-  const keys = Object.keys(record).filter(k => k !== '_chain').sort();
-  const obj = {};
-  for (const k of keys) obj[k] = record[k];
-  return JSON.stringify(obj);
-}
-/**
- * Compute the sha256 hex hash for this chain link.
- * hash = sha256(prevHash + canonicalJson(record))
- */
-function computeChainHash(prevHash, record) {
-  const input = prevHash + canonicalJsonForChain(record);
-  return crypto.createHash('sha256').update(input, 'utf8').digest('hex');
-}
+// CHAIN_GENESIS is a fixed arbitrary sentinel — NOT the SHA256 of any specific
+// input string (a previous comment incorrectly claimed sha256("…:genesis")). The
+// writer here and the verifier in stop-goal-fit.js MUST canonicalize and seed
+// identically, so the genesis constant and the canonicalJson/hash helpers live in
+// ONE shared module that both import — divergence is structurally impossible.
+const {
+  CHAIN_GENESIS,
+  canonicalJsonForChain,
+  computeChainHash,
+} = require('../lib/command-log-chain.js');
 /**
  * Read the last entry from command-log.jsonl that has a `_chain` block.
@@ -121,6 +94,58 @@ function readLastChainState(logFile) {
   }
   return { seq: -1, hash: CHAIN_GENESIS };
 }
+// ─── Concurrency-safe append (lockfile) ──────────────────────────────────────
+//
+// The chain link is a read-(last hash)→compute→append critical section. Without
+// mutual exclusion, two capture processes writing to the SAME command-log
+// concurrently (e.g. parallel agents in one workspace) can both read the same
+// prevHash and append entries with an identical seq/prevHash — forking the chain
+// and tripping the tamper-evidence verifier on a benign race. We serialize the
+// section with an atomic create-exclusive lockfile.
+//
+// FAIL-OPEN, like the rest of this hook: if the lock cannot be acquired we still
+// append (capture must NEVER block the agent or drop evidence), accepting the
+// small residual race rather than losing the record. A crashed holder's stale
+// lock is stolen after LOCK_STALE_MS so a dead process can't wedge capture.
+const LOCK_RETRY_MS = 5;        // backoff between attempts
+const LOCK_MAX_TRIES = 200;     // ~1s total acquisition budget
+const LOCK_STALE_MS = 10000;    // steal a lock older than this (crashed holder)
+/** Synchronous sleep without busy-spinning. Best-effort; no-ops if unavailable. */
+function sleepSync(ms) {
+  try { Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms); }
+  catch { /* SharedArrayBuffer/Atomics unavailable — skip the backoff */ }
+}
+/**
+ * Acquire an exclusive lock via atomic create-exclusive (O_CREAT|O_EXCL).
+ * Returns a file descriptor on success, or null on failure (caller fails open).
+ */
+function acquireLock(lockFile) {
+  for (let i = 0; i < LOCK_MAX_TRIES; i++) {
+    try {
+      const fd = fs.openSync(lockFile, 'wx');
+      try { fs.writeSync(fd, String(process.pid)); } catch { /* pid is advisory only */ }
+      return fd;
+    } catch (err) {
+      if (!err || err.code !== 'EEXIST') return null; // unexpected — fail open
+      // Lock held: steal it if the holder appears dead (stale), else back off.
+      try {
+        const st = fs.statSync(lockFile);
+        if (Date.now() - st.mtimeMs > LOCK_STALE_MS) { fs.unlinkSync(lockFile); continue; }
+      } catch { continue; } // lock vanished between open and stat — retry immediately
+      sleepSync(LOCK_RETRY_MS);
+    }
+  }
+  return null;
+}
+/** Release a lock acquired by acquireLock. Best-effort. */
+function releaseLock(fd, lockFile) {
+  try { fs.closeSync(fd); } catch { /* already closed */ }
+  try { fs.unlinkSync(lockFile); } catch { /* already removed */ }
+}
 // ─────────────────────────────────────────────────────────────────────────────
 function parseJson(raw) {
@@ -305,20 +330,30 @@ function run(rawInput) {
     const logFile = path.join(artifactDir, 'command-log.jsonl');
     fs.mkdirSync(artifactDir, { recursive: true });
-    // Hash-chain integrity: compute _chain before appending. Fail-open: any
-    // error in chain computation falls back to the plain record (no _chain).
-    // A chain failure must NEVER block capture or corrupt the log.
-    let recordToWrite = record;
+    // Serialize the read→compute→append critical section so concurrent captures
+    // (parallel agents sharing this log) cannot fork the hash-chain. Fail-open:
+    // a null fd means we could not lock — we still append rather than drop the
+    // record. The lock is always released in finally.
+    const lockFile = logFile + '.lock';
+    const lockFd = acquireLock(lockFile);
     try {
-      const { seq: prevSeq, hash: prevHash } = readLastChainState(logFile);
-      const seq = prevSeq + 1;
-      const hash = computeChainHash(prevHash, record);
-      // Spread record fields then add _chain so the chain field is appended last
-      // (cosmetic ordering; canonicalJsonForChain excludes it during hashing).
-      recordToWrite = { ...record, _chain: { seq, prevHash, hash } };
-    } catch { /* chain computation failed — write plain record, do not block */ }
-    fs.appendFileSync(logFile, JSON.stringify(recordToWrite) + '\n');
+      // Hash-chain integrity: compute _chain before appending. Fail-open: any
+      // error in chain computation falls back to the plain record (no _chain).
+      // A chain failure must NEVER block capture or corrupt the log.
+      let recordToWrite = record;
+      try {
+        const { seq: prevSeq, hash: prevHash } = readLastChainState(logFile);
+        const seq = prevSeq + 1;
+        const hash = computeChainHash(prevHash, record);
+        // Spread record fields then add _chain so the chain field is appended last
+        // (cosmetic ordering; canonicalJsonForChain excludes it during hashing).
+        recordToWrite = { ...record, _chain: { seq, prevHash, hash } };
+      } catch { /* chain computation failed — write plain record, do not block */ }
+      fs.appendFileSync(logFile, JSON.stringify(recordToWrite) + '\n');
+    } finally {
+      if (lockFd !== null) releaseLock(lockFd, lockFile);
+    }
   } catch { /* fail-open: capture never blocks or corrupts */ }
   return rawInput;
 }