npm - bereach-openclaw - Versions diffs - 1.5.9 → 1.5.11 - Mend

bereach-openclaw 1.5.9 → 1.5.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/node_modules/@bereach/tools/src/cost-estimation.ts +31 -14
package/node_modules/@bereach/tools/src/enforcement-types.ts +1 -1
package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/src/commands/connector/execution.ts +35 -6
package/src/commands/connector/index.ts +32 -24
package/src/hooks/context/formatters.ts +8 -4
package/src/hooks/context/index.ts +32 -10
package/src/hooks/detect-task-mode.ts +6 -4
package/src/hooks/lifecycle.ts +111 -6

package/node_modules/@bereach/tools/src/cost-estimation.ts CHANGED Viewed

@@ -3,12 +3,12 @@
  * Single source of truth, used by both lifecycle hook and connector.
  */
-/** Model pricing per 1M tokens */
-export const MODEL_PRICING: Record<string, { input: number; output: number; cacheRead: number }> = {
-  "haiku": { input: 1.0, output: 5.0, cacheRead: 0.1 },
-  "sonnet": { input: 3.0, output: 15.0, cacheRead: 0.3 },
-  "flash": { input: 0.3, output: 2.5, cacheRead: 0.03 },
-  "pro": { input: 1.25, output: 10.0, cacheRead: 0.125 },
+/** Model pricing per 1M tokens (Anthropic 5-min cache, Google standard) */
+export const MODEL_PRICING: Record<string, { input: number; output: number; cacheRead: number; cacheWrite: number }> = {
+  "haiku": { input: 1.0, output: 5.0, cacheRead: 0.1, cacheWrite: 1.25 },
+  "sonnet": { input: 3.0, output: 15.0, cacheRead: 0.3, cacheWrite: 3.75 },
+  "flash": { input: 0.3, output: 2.5, cacheRead: 0.03, cacheWrite: 0.375 },
+  "pro": { input: 1.25, output: 10.0, cacheRead: 0.125, cacheWrite: 1.5625 },
 };
 export type TokenUsage = {
@@ -21,25 +21,29 @@ export type TokenUsage = {
 /**
  * Estimate cost in USD from token usage.
  * Matches model by substring (e.g. "claude-3-5-haiku" matches "haiku").
+ * Includes cache write costs (Anthropic 5-min ephemeral cache).
  */
 export function estimateTaskCost(
   inputTokens: number,
   outputTokens: number,
   cacheReadTokens: number,
   modelSlug?: string | null,
+  cacheWriteTokens?: number,
 ): number {
   const key = Object.keys(MODEL_PRICING).find((k) => modelSlug?.includes(k)) ?? "haiku";
   const p = MODEL_PRICING[key];
   const uncached = Math.max(0, inputTokens - cacheReadTokens);
-  return (uncached * p.input + cacheReadTokens * p.cacheRead + outputTokens * p.output) / 1_000_000;
+  const writeCost = (cacheWriteTokens ?? 0) * p.cacheWrite;
+  return (uncached * p.input + cacheReadTokens * p.cacheRead + outputTokens * p.output + writeCost) / 1_000_000;
 }
 /**
  * Extract token usage from an OpenClaw meta/wrapper object.
- * Handles multiple data locations across OpenClaw versions:
+ * Handles multiple data locations and field naming conventions across OpenClaw versions:
  *   - meta.agentMeta.lastCallUsage (OpenClaw 2026.4+)
- *   - meta.cost (older versions / test runner)
+ *   - meta.cost (CLI --json output / older versions)
  *   - meta.usage (alternative format)
+ *   - Both camelCase and snake_case field names
  */
 export function extractTokenUsage(
   meta: Record<string, unknown>,
@@ -49,13 +53,26 @@ export function extractTokenUsage(
   const costData = (meta.cost ?? {}) as Record<string, number>;
   const usageData = (meta.usage ?? {}) as Record<string, number>;
-  const inputTokens = lastCall.input ?? costData.inputTokens ?? costData.input ?? usageData.input ?? 0;
-  const outputTokens = lastCall.output ?? costData.outputTokens ?? costData.output ?? usageData.output ?? 0;
+  // Resolve input/output tokens across all naming conventions (camelCase + snake_case)
+  const inputTokens =
+    lastCall.input ?? lastCall.input_tokens ??
+    costData.inputTokens ?? costData.input_tokens ?? costData.input ??
+    usageData.input ?? usageData.inputTokens ?? usageData.input_tokens ?? 0;
+  const outputTokens =
+    lastCall.output ?? lastCall.output_tokens ??
+    costData.outputTokens ?? costData.output_tokens ?? costData.output ??
+    usageData.output ?? usageData.outputTokens ?? usageData.output_tokens ?? 0;
   if (inputTokens === 0 && outputTokens === 0) return null;
-  const cacheRead = lastCall.cacheRead ?? usageData.cacheRead ?? costData.cacheReadTokens ?? 0;
-  const cacheWrite = lastCall.cacheWrite ?? usageData.cacheWrite ?? costData.cacheWriteTokens ?? 0;
+  const cacheRead =
+    lastCall.cacheRead ?? lastCall.cache_read_input_tokens ??
+    usageData.cacheRead ?? usageData.cache_read_input_tokens ??
+    costData.cacheReadTokens ?? costData.cache_read_input_tokens ?? 0;
+  const cacheWrite =
+    lastCall.cacheWrite ?? lastCall.cache_write_input_tokens ??
+    usageData.cacheWrite ?? usageData.cache_write_input_tokens ??
+    costData.cacheWriteTokens ?? costData.cache_write_input_tokens ?? 0;
   return {
     usage: {
@@ -64,6 +81,6 @@ export function extractTokenUsage(
       ...(cacheRead > 0 ? { cacheReadTokens: cacheRead } : {}),
       ...(cacheWrite > 0 ? { cacheWriteTokens: cacheWrite } : {}),
     },
-    model: agentMeta.model as string | undefined,
+    model: (agentMeta.model ?? meta.model) as string | undefined,
   };
 }

package/node_modules/@bereach/tools/src/enforcement-types.ts CHANGED Viewed

@@ -170,7 +170,7 @@ export const PACING = {
 export const DEFAULTS = {
   maxVisitsPerSession: 200,
   engagementThreshold: 5,
-  contextCacheTtlMs: 5 * 60 * 1000, // 5 minutes
+  contextCacheTtlMs: 15 * 60 * 1000, // 15 minutes
   maxResultItems: 15,
   maxPostTextLength: 500,
 } as const;

package/openclaw.plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "id": "bereach-openclaw",
   "name": "BeReach",
-  "version": "1.5.9",
+  "version": "1.5.11",
   "description": "LinkedIn outreach automation — 75+ tools, hook-based enforcement, dynamic context",
   "configSchema": {
     "type": "object",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bereach-openclaw",
-  "version": "1.5.9",
+  "version": "1.5.11",
   "description": "BeReach LinkedIn automation plugin for OpenClaw",
   "license": "AGPL-3.0",
   "exports": {

package/src/commands/connector/execution.ts CHANGED Viewed

@@ -19,8 +19,21 @@ export function enrichResultWithTokens(
 ): void {
   if (result.tokenUsage) return; // already enriched (e.g. by lifecycle hook)
   const resultObj = output?.result as Record<string, unknown> | undefined;
+  // Try multiple locations where OpenClaw might place cost/usage data
   const meta = (resultObj?.meta ?? output?.meta ?? {}) as Record<string, unknown>;
-  const extracted = extractTokenUsage(meta);
+  let extracted = extractTokenUsage(meta);
+  // Fallback: output.cost directly (some OpenClaw versions put cost at top level)
+  if (!extracted && output?.cost) {
+    extracted = extractTokenUsage({ cost: output.cost } as Record<string, unknown>);
+  }
+  // Fallback: output.usage directly
+  if (!extracted && output?.usage) {
+    extracted = extractTokenUsage({ usage: output.usage } as Record<string, unknown>);
+  }
   if (!extracted?.usage) return;
   const effectiveModel = extracted.model ?? modelSlug ?? undefined;
   result.tokenUsage = extracted.usage;
@@ -30,9 +43,10 @@ export function enrichResultWithTokens(
     extracted.usage.outputTokens,
     extracted.usage.cacheReadTokens ?? 0,
     effectiveModel,
+    extracted.usage.cacheWriteTokens ?? 0,
   );
   const u = extracted.usage;
-  console.log(`[connector] Tokens: in=${u.inputTokens} out=${u.outputTokens} cache=${u.cacheReadTokens ?? 0} cost=$${result.estimatedCostUsd.toFixed(4)} model=${effectiveModel ?? "unknown"}`);
+  console.log(`[connector] Tokens: in=${u.inputTokens} out=${u.outputTokens} cacheR=${u.cacheReadTokens ?? 0} cacheW=${u.cacheWriteTokens ?? 0} cost=$${result.estimatedCostUsd.toFixed(4)} model=${effectiveModel ?? "unknown"}`);
 }
 // ---------------------------------------------------------------------------
@@ -42,13 +56,15 @@ export function enrichResultWithTokens(
 export async function executeViaWebhook(
   config: ConnectorConfig,
   task: NonNullable<PullResponse["task"]>,
-): Promise<{ result: TaskResult | null; error: string | null }> {
+): Promise<{ result: TaskResult | null; error: string | null; deferred?: boolean }> {
   const message = task.message || `Execute ${task.type} task`;
   const maxCredits = (task.payload as Record<string, unknown>)?.maxCredits ?? 100;
   const timeoutMs = (task.timeoutSeconds || 300) * 1000;
-  // Encode task metadata in the session name (Strategy 1 for detectTaskMode)
-  const sessionName = task.sessionKey || `hook:${task.id}:${task.campaignId ?? ""}:${task.type}`;
+  // Session reuse: use a stable session name per campaign+type so the Anthropic
+  // prompt cache carries over between consecutive tasks (~80% savings per task).
+  // The unique taskId is passed via TASK_META in the message body instead.
+  const sessionName = task.sessionKey || `hook:task:${task.campaignId ?? ""}:${task.type}`;
   // Also encode in message as fallback (Strategy 4: TASK_META bracket format)
   const taskMessage = `[TASK_META: taskType=${task.type} taskId=${task.id} campaignId=${task.campaignId ?? ""} maxCredits=${maxCredits}]\n\n${message}`;
@@ -99,6 +115,17 @@ export async function executeViaWebhook(
       return { result: null, error: String(output.error).slice(0, 500) };
     }
+    // HTTP 202 Accepted = fire-and-forget. The gateway accepted the task but the
+    // agent hasn't run yet. The lifecycle hook (agent_end) inside the gateway will
+    // POST the real result (with token usage, contacts processed, etc.) when done.
+    // Do NOT submit a premature empty result here - it would win the race against
+    // the lifecycle hook and store { success: true } with no data.
+    if (res.status === 202) {
+      const runId = output.runId ?? "unknown";
+      console.log(`[connector] Webhook accepted (202) task ${task.id}, runId=${runId} - deferring result to lifecycle hook`);
+      return { result: null, error: null, deferred: true };
+    }
     const webhookResult = (output.result as TaskResult) ?? { success: true };
     enrichResultWithTokens(webhookResult, output, task.model);
     return { result: webhookResult, error: null };
@@ -112,11 +139,13 @@ export async function executeViaWebhook(
 /**
  * Execute a task via webhook.
  * Returns webhookDead=true if the hooks endpoint returned 404 (gateway restart needed).
+ * Returns deferred=true if the gateway accepted the task (202) and the lifecycle hook
+ * will submit the result — the connector should NOT submit a result in this case.
  */
 export async function executeOnOpenClaw(
   config: ConnectorConfig,
   task: NonNullable<PullResponse["task"]>,
-): Promise<{ result: TaskResult | null; error: string | null; webhookDead?: boolean }> {
+): Promise<{ result: TaskResult | null; error: string | null; webhookDead?: boolean; deferred?: boolean }> {
   const res = await executeViaWebhook(config, task);
   if (res.error?.startsWith("Webhook HTTP 404")) {
     console.warn(`[connector] Webhook returned 404 for task ${task.id} — hooks endpoint lost`);

package/src/commands/connector/index.ts CHANGED Viewed

@@ -171,7 +171,7 @@ export async function runConnectorLoop(
           const execResult = await Promise.race([
             executeOnOpenClaw(config, task),
-            new Promise<{ result: null; error: string; webhookDead?: boolean }>((resolve) => {
+            new Promise<{ result: null; error: string; webhookDead?: boolean; deferred?: boolean }>((resolve) => {
               const check = setTimeout(() => {
                 if (watchdogFired) {
                   resolve({ result: null, error: `Watchdog: task execution exceeded ${Math.round(watchdogMs / 1000)}s timeout` });
@@ -198,33 +198,41 @@ export async function runConnectorLoop(
           }
           clearTimeout(watchdogTimer);
-          const taskStatus = error ? "failed" : (result?.success !== false ? "succeeded" : "failed");
-          const execDuration = Date.now() - (task as any)._startedAt;
-          console.log(`[connector] Task ${task.id} ${taskStatus} (${Math.round(execDuration / 1000)}s)${error ? `: ${error.slice(0, 100)}` : ""}${result?.reason ? ` reason=${result.reason}` : ""}`);
+          // When deferred=true, the gateway accepted the task (HTTP 202) and the
+          // lifecycle hook inside the gateway will submit the real result with full
+          // data (token usage, contacts processed, etc.). The connector must NOT
+          // submit a premature empty result that would win the race.
+          if (execResult.deferred) {
+            const execDuration = Date.now() - (task as any)._startedAt;
+            console.log(`[connector] Task ${task.id} dispatched to gateway (${Math.round(execDuration / 1000)}s) — result deferred to lifecycle hook`);
+          } else {
+            const taskStatus = error ? "failed" : (result?.success !== false ? "succeeded" : "failed");
+            const execDuration = Date.now() - (task as any)._startedAt;
+            console.log(`[connector] Task ${task.id} ${taskStatus} (${Math.round(execDuration / 1000)}s)${error ? `: ${error.slice(0, 100)}` : ""}${result?.reason ? ` reason=${result.reason}` : ""}`);
+            if (!error && result?.success === false && (result as any)?.toolCallCount === 0 && execDuration < 10_000) {
+              console.warn(`[connector] DIAGNOSTIC: Task ${task.id} failed in <10s with 0 tool calls`);
+            }
-          if (!error && result?.success === false && (result as any)?.toolCallCount === 0 && execDuration < 10_000) {
-            console.warn(`[connector] DIAGNOSTIC: Task ${task.id} failed in <10s with 0 tool calls`);
-          }
+            // Scan for LLM provider errors
+            const allText = [error, result?.error, (result as any)?.reason].filter(Boolean).join(" ");
+            const llmError = detectLlmError(allText);
+            if (llmError && result && result.success !== false) {
+              console.warn(`[connector] LLM provider error detected in task ${task.id}: ${llmError}`);
+              result.success = false;
+              if (!result.error) result.error = `LLM provider error: ${llmError}`;
+            }
-          // Scan for LLM provider errors
-          const allText = [error, result?.error, (result as any)?.reason].filter(Boolean).join(" ");
-          const llmError = detectLlmError(allText);
-          if (llmError && result && result.success !== false) {
-            console.warn(`[connector] LLM provider error detected in task ${task.id}: ${llmError}`);
-            result.success = false;
-            if (!result.error) result.error = `LLM provider error: ${llmError}`;
+            // Submit result only when NOT deferred. When the gateway returns a
+            // synchronous result (non-202), the connector acts as a safety net.
+            const derivedError = error ?? (
+              result?.success === false
+                ? (result.error ?? result.reason ?? "Task failed (no details from agent)")
+                : undefined
+            );
+            await submitResult(config, task.id, result, derivedError ?? null);
           }
-          // Safety-net submission — lifecycle hook usually reports first,
-          // but if it fails silently the result endpoint's optimistic lock
-          // ensures double-submissions are harmless.
-          const derivedError = error ?? (
-            result?.success === false
-              ? (result.error ?? result.reason ?? "Task failed (no details from agent)")
-              : undefined
-          );
-          await submitResult(config, task.id, result, derivedError ?? null);
           totalTasksExecuted++;
           pollInterval = 5_000;
         } finally {

package/src/hooks/context/formatters.ts CHANGED Viewed

@@ -6,6 +6,7 @@
 import { cacheSet, type CacheStore, type ContextEntry, type OnboardingState, type RecentEvent } from "../cache";
 import { type DbCampaign, type SessionState } from "../types";
 import { errMsg, createLogger, CHAT_BASE, PRICING_URL, apiFetch } from "../utils";
+import { readEnv } from "../../env";
 const log = createLogger("context");
@@ -286,10 +287,13 @@ export function formatUpgradeSignals(data: CacheStore): string {
 // ---------------------------------------------------------------------------
 export function formatAnthropicKeyWarning(): string {
+  const key = readEnv("ANTHROPIC_API_KEY");
+  // No key or already subscription → nothing to warn about
+  if (!key || key.startsWith("sk-ant-oat")) return "";
   return (
-    "\n### Anthropic API Key Check\n\n" +
-    "Silently check the Anthropic API key prefix: `sk-ant-oat` = subscription (OK, say nothing). " +
-    "Other prefix = pay-per-use → warn user to switch to subscription key. Skip if key not found.\n"
+    "\n### Anthropic API Key Warning\n\n" +
+    "The Anthropic API key uses pay-per-use billing (not subscription). " +
+    "Recommend the user switch to a subscription key (`sk-ant-oat-*`) to reduce costs.\n"
   );
 }
@@ -563,7 +567,7 @@ export function formatLiveStatus(state: SessionState, data: CacheStore, apiKey?:
   // Anti-cron directive: campaigns are automated by the task scheduler, not crons
   if (hasCampaigns) {
-    lines.push("**Scheduling**: Campaign automation is handled by the task scheduler and connector. Do NOT suggest cron-based scheduling, polling setups, or recurring manual actions. If the user asks for crons or polling, guide them to create or adjust campaigns instead.");
+    lines.push("**Scheduling**: Campaigns are automated by the task scheduler. Never suggest crons or polling — guide users to campaigns instead.");
     lines.push("");
   }

package/src/hooks/context/index.ts CHANGED Viewed

@@ -142,39 +142,56 @@ async function autoInitProfile(state: SessionState, data: CacheStore, apiKey: st
 // Interactive context builder
 // ---------------------------------------------------------------------------
+/**
+ * Build interactive context, split into static (cacheable) and dynamic (per-turn) parts.
+ *
+ * The OpenClaw gateway treats `appendSystemContext` as provider-cacheable and
+ * `prependContext` as per-turn. By separating the soul template (static, ~8KB)
+ * from the live status (dynamic), the gateway can cache the soul template across
+ * all turns in a session via Anthropic prompt caching — saving ~90% on those tokens.
+ */
 function buildInteractiveContext(
   state: SessionState,
   soulTemplate: string,
   liveData: CacheStore,
   apiKey: string,
-): string {
+): { staticContext: string; dynamicContext: string } {
   const liveStatus = formatLiveStatus(state, liveData, apiKey);
   const activityBlock = formatRecentActivity(liveData.recentEvents);
   const toneDirective = formatToneInferenceDirective(state, liveData);
-  let fullContext = soulTemplate + "\n" + liveStatus + activityBlock;
-  if (toneDirective) fullContext += toneDirective;
+  // Static part: soul template with rules, identity, protocols — identical every turn.
+  // Goes into appendSystemContext so the gateway can cache it.
+  const staticContext = soulTemplate;
+  // Dynamic part: live status, activity, tone, warnings — changes per turn.
+  // Goes into prependContext so it doesn't invalidate the cached soul template.
+  let dynamicContext = liveStatus + activityBlock;
+  if (toneDirective) dynamicContext += toneDirective;
   if (!state.anthropicKeyWarningInjected) {
     const anthropicWarning = formatAnthropicKeyWarning();
-    if (anthropicWarning) fullContext += anthropicWarning;
+    if (anthropicWarning) dynamicContext += anthropicWarning;
     state.anthropicKeyWarningInjected = true;
   }
+  const totalLength = staticContext.length + dynamicContext.length;
   // Size guard — log warning but NEVER truncate. User content (ICP, playbook, tone)
   // must always be injected in full. Truncating can silently drop critical instructions
   // that the agent needs for correct outreach and qualification. The LLM context window
   // is large enough to handle the full context in practice.
-  if (fullContext.length > MAX_CONTEXT_CHARS) {
-    log(`context size WARNING: ${fullContext.length} chars exceeds ${MAX_CONTEXT_CHARS} soft limit (NOT truncating)`);
+  if (totalLength > MAX_CONTEXT_CHARS) {
+    log(`context size WARNING: ${totalLength} chars exceeds ${MAX_CONTEXT_CHARS} soft limit (NOT truncating)`);
   }
   const yn = (v: unknown) => (v ? "yes" : "no");
   const ob = liveData.onboardingState;
-  log(`context: soul=${soulTemplate.length} live=${liveStatus.length} tone=${yn(toneDirective)} total=${fullContext.length}`);
+  log(`context: soul=${staticContext.length} live=${liveStatus.length} tone=${yn(toneDirective)} total=${totalLength} (static=${staticContext.length} dynamic=${dynamicContext.length})`);
   log(`sections: account=${yn(liveData.activeAccount)} credits=${yn(liveData.credits)} limits=${yn(liveData.limits)} pipeline=${yn(liveData.pipeline)} contexts=${liveData.contexts.length} campaigns=${liveData.activeCampaigns.length} drafts=${liveData.pendingDrafts} failed=${liveData.failedDrafts} unread=${liveData.unreadDMs} onboarding=${ob == null ? "null" : ob.completed ? "done" : "pending"} firstSession=${yn(!liveData.sessionMeta?.lastSessionAt)}`);
-  return fullContext;
+  return { staticContext, dynamicContext };
 }
 // ---------------------------------------------------------------------------
@@ -287,9 +304,14 @@ export function registerContextHook(api: any, apiKey: string | undefined, state:
         await autoInitProfile(state, liveData, key);
-        const fullContext = buildInteractiveContext(state, soulTemplate, liveData, key);
+        const { staticContext, dynamicContext } = buildInteractiveContext(state, soulTemplate, liveData, key);
-        return { appendSystemContext: fullContext };
+        // appendSystemContext = cached by gateway (soul template, static across turns)
+        // prependContext = per-turn dynamic data (live status, activity, tone)
+        return {
+          appendSystemContext: staticContext,
+          prependContext: dynamicContext,
+        };
       } catch (err) {
         log(`error: ${errMsg(err)}`);
         return { appendSystemContext: SOUL_TEMPLATE };

package/src/hooks/detect-task-mode.ts CHANGED Viewed

@@ -26,15 +26,17 @@ export function detectTaskMode(sessionKey: string | undefined | null, metadata?:
   // Without a valid ID, the lifecycle hook can't POST results to /api/tasks/:id/result.
   const fallbackTaskId = () => `local-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
-  // Strategy 1: sessionKey format "hook:{userId}:{campaignId}:{type}"
+  // Strategy 1: sessionKey format "hook:{taskId|task}:{campaignId}:{type}"
   // The connector/workflow sets the session key during task creation.
-  // parts[1] = userId, parts[2] = campaignId, parts[3+] = taskType
-  // The actual taskId comes from metadata (set by TASK_META in the message).
+  // New format uses "task" as a stable placeholder for session reuse (prompt caching).
+  // The actual taskId always comes from metadata (set by TASK_META in the message).
   if (sessionKey?.startsWith("hook:")) {
     const parts = sessionKey.split(":");
     if (parts.length >= 4) {
+      // parts[1] is "task" (reusable session) or a legacy taskId
+      const sessionPart = parts[1];
       return {
-        taskId: (metadata?.taskId as string) || parts[1] || fallbackTaskId(),
+        taskId: (metadata?.taskId as string) || (sessionPart !== "task" ? sessionPart : null) || fallbackTaskId(),
         taskType: parts.slice(3).join(":"),
         campaignId: parts[2] || null,
         maxCredits: parseMaxCredits(metadata?.maxCredits),

package/src/hooks/lifecycle.ts CHANGED Viewed

@@ -24,17 +24,119 @@ export { parseStructuredResult as _parseStructuredResult };
 // Token usage enrichment (uses shared cost-estimation package)
 // ---------------------------------------------------------------------------
+/**
+ * Extract cumulative token usage from assistant messages in the agent_end context.
+ *
+ * OpenClaw v2026.2.x does NOT pass meta.agentMeta to the agent_end hook,
+ * but assistant messages in the messages array carry per-turn `.usage` objects
+ * with { input_tokens, output_tokens, cache_read_input_tokens, ... }.
+ * We sum across all assistant messages to get the session total.
+ */
+function extractUsageFromMessages(
+  messages: Array<{ role?: string; usage?: Record<string, unknown>; model?: string; provider?: string }>,
+): { inputTokens: number; outputTokens: number; cacheReadTokens: number; cacheWriteTokens: number; model?: string } | null {
+  let inputTokens = 0;
+  let outputTokens = 0;
+  let cacheReadTokens = 0;
+  let cacheWriteTokens = 0;
+  let model: string | undefined;
+  for (const msg of messages) {
+    if (msg.role !== "assistant" || !msg.usage) continue;
+    const u = msg.usage as Record<string, number>;
+    // OpenClaw normalizes usage with multiple key conventions
+    const inp = u.input_tokens ?? u.inputTokens ?? u.input ?? 0;
+    const out = u.output_tokens ?? u.outputTokens ?? u.output ?? 0;
+    const cr = u.cache_read_input_tokens ?? u.cached_input_tokens ?? u.cacheRead ?? 0;
+    const cw = u.cache_write_input_tokens ?? u.cache_creation_input_tokens ?? u.cacheWrite ?? 0;
+    inputTokens += inp;
+    outputTokens += out;
+    cacheReadTokens += cr;
+    cacheWriteTokens += cw;
+    // Take model from the last assistant message that has one
+    if (msg.model) model = msg.model;
+  }
+  if (inputTokens === 0 && outputTokens === 0) return null;
+  return { inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens, model };
+}
 function enrichWithTokenUsage(
   taskResult: Record<string, unknown>,
   endCtx: unknown,
   taskMode: TaskModeInfo,
 ): void {
   try {
-    const meta = (endCtx as any)?.meta ?? (endCtx as any)?.result?.meta ?? {};
-    const extracted = extractTokenUsage(meta);
+    const ctx = endCtx as Record<string, unknown> | undefined;
+    let extracted: ReturnType<typeof extractTokenUsage> = null;
+    // Strategy 1a: Try endCtx.meta (OpenClaw 2026.4+ when agentMeta is exposed)
+    const topMeta = (ctx?.meta ?? {}) as Record<string, unknown>;
+    if (Object.keys(topMeta).length > 0) {
+      extracted = extractTokenUsage(topMeta);
+    }
+    // Strategy 1b: Try endCtx.result.meta (CLI --json wrapper structure)
+    // The ?? chain was broken: if endCtx.meta was {} (truthy), result.meta was never checked
+    if (!extracted) {
+      const resultMeta = ((ctx?.result as Record<string, unknown>)?.meta ?? {}) as Record<string, unknown>;
+      if (Object.keys(resultMeta).length > 0) {
+        extracted = extractTokenUsage(resultMeta);
+      }
+    }
+    // Strategy 1c: Try endCtx.cost directly (some versions put cost at top level)
+    if (!extracted && ctx?.cost) {
+      extracted = extractTokenUsage({ cost: ctx.cost } as Record<string, unknown>);
+    }
+    // Strategy 2: Extract from assistant messages' .usage fields
+    // OpenClaw v2026.2.x stores per-turn token usage on each assistant message
+    // but does not include agentMeta in the agent_end hook context.
+    // With session reuse (prompt caching), messages include history from previous
+    // tasks. Scope to only the CURRENT task by finding the last TASK_META message
+    // matching this taskId, and counting only messages after it.
+    const allMessages: Array<{ role?: string; content?: unknown; usage?: Record<string, unknown>; model?: string }> = (ctx?.messages ?? []) as any;
+    if (!extracted) {
+      let taskStartIdx = 0;
+      if (taskMode.taskId) {
+        for (let i = allMessages.length - 1; i >= 0; i--) {
+          const msg = allMessages[i];
+          if (msg.role === "user") {
+            const text = extractTextFromContent(msg.content);
+            if (text.includes(`taskId=${taskMode.taskId}`)) {
+              taskStartIdx = i;
+              break;
+            }
+          }
+        }
+      }
+      const taskMessages = allMessages.slice(taskStartIdx);
+      const fromMessages = extractUsageFromMessages(taskMessages as any);
+      if (fromMessages) {
+        extracted = {
+          usage: {
+            inputTokens: fromMessages.inputTokens,
+            outputTokens: fromMessages.outputTokens,
+            ...(fromMessages.cacheReadTokens > 0 ? { cacheReadTokens: fromMessages.cacheReadTokens } : {}),
+            ...(fromMessages.cacheWriteTokens > 0 ? { cacheWriteTokens: fromMessages.cacheWriteTokens } : {}),
+          },
+          model: fromMessages.model,
+        };
+        log(`token usage: extracted from ${taskMessages.filter(m => m.role === "assistant").length} assistant messages (of ${allMessages.length} total) for ${taskMode.taskId}`);
+      }
+    }
     if (!extracted) {
-      log(`token usage: no data in endCtx.meta for ${taskMode.taskId} (keys: ${Object.keys(meta).join(",")})`);
+      // Diagnostic: log the actual shape so we can trace what OpenClaw provides
+      const ctxKeys = Object.keys(ctx ?? {}).join(",");
+      const metaKeys = Object.keys(topMeta).join(",");
+      const assistantMsgs = allMessages.filter(m => m.role === "assistant");
+      const sampleKeys = assistantMsgs.length > 0 ? Object.keys(assistantMsgs[0]).join(",") : "none";
+      log(`token usage: NO DATA for ${taskMode.taskId} | endCtx=[${ctxKeys}] meta=[${metaKeys}] msgs=${allMessages.length} assistants=${assistantMsgs.length} sampleMsgKeys=[${sampleKeys}]`);
       return;
     }
@@ -47,10 +149,11 @@ function enrichWithTokenUsage(
       extracted.usage.outputTokens,
       extracted.usage.cacheReadTokens ?? 0,
       modelSlug,
+      extracted.usage.cacheWriteTokens ?? 0,
     );
     const u = extracted.usage;
-    log(`tokens ${taskMode.taskId}: in=${u.inputTokens} out=${u.outputTokens} cache=${u.cacheReadTokens ?? 0} cost=$${(taskResult.estimatedCostUsd as number).toFixed(4)} model=${modelSlug ?? "unknown"}`);
+    log(`tokens ${taskMode.taskId}: in=${u.inputTokens} out=${u.outputTokens} cacheR=${u.cacheReadTokens ?? 0} cacheW=${u.cacheWriteTokens ?? 0} cost=$${(taskResult.estimatedCostUsd as number).toFixed(4)} model=${modelSlug ?? "unknown"}`);
   } catch (err) {
     log(`token usage extraction failed: ${errMsg(err)}`);
   }
@@ -80,10 +183,12 @@ export function registerLifecycleHook(
     let taskMode: TaskModeInfo | null = null;
     // Strategy 1: recover from TASK_META in endCtx.messages (always works, immune to race)
+    // With session reuse (prompt caching), multiple tasks run in the same session.
+    // Use the LAST matching user message to get the most recent task's metadata.
     const allMessages: Array<{ role?: string; content?: unknown }> = endCtx?.messages ?? [];
     const userMsgs = allMessages.filter((m: any) => m.role === "user");
-    for (const userMsg of userMsgs) {
-      const msgText = extractTextFromContent(userMsg?.content);
+    for (let i = userMsgs.length - 1; i >= 0; i--) {
+      const msgText = extractTextFromContent(userMsgs[i]?.content);
       const match = msgText.match(TASK_META_RE);
       if (match?.[1] && match?.[2]) {
         taskMode = {