npm - @t2000/engine - Versions diffs - 1.3.0 → 1.5.0 - Mend

@t2000/engine 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js CHANGED Viewed

@@ -61,6 +61,13 @@ var TxMutex = class {
     }
   }
 };
+function withRetryStats(context) {
+  const retryStats = { attemptCount: 1 };
+  return {
+    context: { ...context, retryStats },
+    readAttemptCount: () => retryStats.attemptCount > 1 ? retryStats.attemptCount : void 0
+  };
+}
 async function* runTools(pending, tools, context, txMutex) {
   const { reads, writes } = partitionToolCalls(pending, tools);
   if (reads.length > 0) {
@@ -68,24 +75,36 @@ async function* runTools(pending, tools, context, txMutex) {
       reads.map(async (call) => {
         const tool = findTool(tools, call.name);
         if (!tool) {
-          return { call, result: { data: { error: `Unknown tool: ${call.name}` } }, isError: true };
+          return {
+            call,
+            result: { data: { error: `Unknown tool: ${call.name}` } },
+            isError: true,
+            attemptCount: void 0
+          };
         }
-        const execResult = await executeSingleTool(tool, call, context);
+        const { context: toolCtx, readAttemptCount } = withRetryStats(context);
+        const execResult = await executeSingleTool(tool, call, toolCtx);
         if (!execResult.isError) {
           execResult.data = budgetToolResult(execResult.data, tool);
         }
-        return { call, result: execResult, isError: execResult.isError };
+        return {
+          call,
+          result: execResult,
+          isError: execResult.isError,
+          attemptCount: readAttemptCount()
+        };
       })
     );
     for (const settled of readResults) {
       if (settled.status === "fulfilled") {
-        const { call, result, isError } = settled.value;
+        const { call, result, isError, attemptCount } = settled.value;
         yield {
           type: "tool_result",
           toolName: call.name,
           toolUseId: call.id,
           result: result.data,
-          isError
+          isError,
+          ...attemptCount !== void 0 ? { attemptCount } : {}
         };
       } else {
         const idx = readResults.indexOf(settled);
@@ -114,16 +133,19 @@ async function* runTools(pending, tools, context, txMutex) {
     }
     await txMutex.acquire();
     try {
-      const result = await executeSingleTool(tool, call, context);
+      const { context: toolCtx, readAttemptCount } = withRetryStats(context);
+      const result = await executeSingleTool(tool, call, toolCtx);
       if (!result.isError) {
         result.data = budgetToolResult(result.data, tool);
       }
+      const attemptCount = readAttemptCount();
       yield {
         type: "tool_result",
         toolName: call.name,
         toolUseId: call.id,
         result: result.data,
-        isError: result.isError
+        isError: result.isError,
+        ...attemptCount !== void 0 ? { attemptCount } : {}
       };
     } catch (err) {
       yield {
@@ -726,6 +748,7 @@ async function fetchBlockVisionWithRetry(url, init, opts = {}) {
   let lastError = null;
   let lastResponse = null;
   for (let attempt = 0; attempt < BV_RETRY_MAX_ATTEMPTS; attempt++) {
+    if (opts.retryStats) opts.retryStats.attemptCount = attempt + 1;
     if (attempt > 0) {
       let waitMs = BV_RETRY_BASE_DELAY_MS * Math.pow(BV_RETRY_BACKOFF_FACTOR, attempt - 1);
       const retryAfter = lastResponse?.headers.get("retry-after");
@@ -808,7 +831,7 @@ async function safeWalletStoreGet(store, address) {
     return null;
   }
 }
-async function fetchAddressPortfolio(address, apiKey, fallbackRpcUrl) {
+async function fetchAddressPortfolio(address, apiKey, fallbackRpcUrl, opts = {}) {
   const store = getWalletCacheStore();
   const cachedEntry = await safeWalletStoreGet(store, address);
   if (cachedEntry) {
@@ -841,7 +864,7 @@ async function fetchAddressPortfolio(address, apiKey, fallbackRpcUrl) {
             }
           }
           if (apiKey && apiKey.trim().length > 0) {
-            const blockvision = await fetchPortfolioFromBlockVision(address, apiKey);
+            const blockvision = await fetchPortfolioFromBlockVision(address, apiKey, opts.retryStats);
             if (blockvision) {
               await safeWalletStoreSet(
                 store,
@@ -885,7 +908,7 @@ async function fetchAddressPortfolio(address, apiKey, fallbackRpcUrl) {
   portfolioInflight.set(address, promise);
   return promise;
 }
-async function fetchPortfolioFromBlockVision(address, apiKey) {
+async function fetchPortfolioFromBlockVision(address, apiKey, retryStats) {
   const url = `${BLOCKVISION_BASE}/account/coins?account=${encodeURIComponent(address)}`;
   const signal = AbortSignal.timeout(PORTFOLIO_TIMEOUT_MS);
   let res;
@@ -896,7 +919,7 @@ async function fetchPortfolioFromBlockVision(address, apiKey) {
         headers: { "x-api-key": apiKey, accept: "application/json" },
         signal
       },
-      { signal }
+      { signal, retryStats }
     );
   } catch (err) {
     console.warn("[blockvision-prices] portfolio fetch threw, degrading:", err);
@@ -982,7 +1005,7 @@ async function fetchPortfolioFromSuiRpc(address, apiKey, fallbackRpcUrl) {
     source: "sui-rpc-degraded"
   };
 }
-async function fetchTokenPrices(coinTypes, apiKey) {
+async function fetchTokenPrices(coinTypes, apiKey, opts = {}) {
   if (coinTypes.length === 0) return {};
   const now = Date.now();
   const cacheValid = priceMapCache !== null && now - priceMapCache.ts < CACHE_TTL_MS;
@@ -1006,7 +1029,7 @@ async function fetchTokenPrices(coinTypes, apiKey) {
   if (!apiKey || apiKey.trim().length === 0) {
     return result;
   }
-  const fetched = await fetchPricesFromBlockVision(stillMissing, apiKey);
+  const fetched = await fetchPricesFromBlockVision(stillMissing, apiKey, opts.retryStats);
   Object.assign(result, fetched);
   const cacheUpdates = {};
   for (const [original, value] of Object.entries(fetched)) {
@@ -1016,7 +1039,7 @@ async function fetchTokenPrices(coinTypes, apiKey) {
   priceMapCache = { prices: merged, ts: cacheValid ? priceMapCache.ts : now };
   return result;
 }
-async function fetchPricesFromBlockVision(coinTypes, apiKey) {
+async function fetchPricesFromBlockVision(coinTypes, apiKey, retryStats) {
   const out = {};
   const longToOriginal = /* @__PURE__ */ new Map();
   for (const original of coinTypes) {
@@ -1037,7 +1060,7 @@ async function fetchPricesFromBlockVision(coinTypes, apiKey) {
           headers: { "x-api-key": apiKey, accept: "application/json" },
           signal
         },
-        { signal }
+        { signal, retryStats }
       );
     } catch (err) {
       console.warn("[blockvision-prices] price chunk threw, skipping:", err);
@@ -1123,7 +1146,7 @@ async function safeDefiStoreGet(store, address) {
   }
 }
 var warnedMissingApiKey = false;
-async function fetchAddressDefiPortfolio(address, apiKey, priceHints = {}) {
+async function fetchAddressDefiPortfolio(address, apiKey, priceHints = {}, opts = {}) {
   if (!apiKey || apiKey.trim().length === 0) {
     if (!warnedMissingApiKey) {
       warnedMissingApiKey = true;
@@ -1164,7 +1187,7 @@ async function fetchAddressDefiPortfolio(address, apiKey, priceHints = {}) {
           const stickyBasis = recheck ?? cachedEntry;
           const fanoutAt = Date.now();
           const settled = await Promise.allSettled(
-            DEFI_PROTOCOLS.map((p) => fetchOneDefiProtocol(address, p, apiKey))
+            DEFI_PROTOCOLS.map((p) => fetchOneDefiProtocol(address, p, apiKey, opts.retryStats))
           );
           const seen = /* @__PURE__ */ new Set();
           for (const s of settled) {
@@ -1259,7 +1282,7 @@ async function fetchAddressDefiPortfolio(address, apiKey, priceHints = {}) {
   defiInflight.set(address, inflight);
   return inflight;
 }
-async function fetchOneDefiProtocol(address, protocol, apiKey) {
+async function fetchOneDefiProtocol(address, protocol, apiKey, retryStats) {
   const url = `${BLOCKVISION_BASE}/account/defiPortfolio?address=${encodeURIComponent(address)}&protocol=${protocol}`;
   const signal = AbortSignal.timeout(DEFI_PORTFOLIO_TIMEOUT_MS);
   let res;
@@ -1270,7 +1293,7 @@ async function fetchOneDefiProtocol(address, protocol, apiKey) {
         headers: { "x-api-key": apiKey, accept: "application/json" },
         signal
       },
-      { signal }
+      { signal, retryStats }
     );
   } catch (err) {
     console.warn(`[defi] ${protocol} fetch threw:`, err);
@@ -1810,12 +1833,12 @@ async function applyVsuiPriceFallback(portfolio) {
     portfolio.totalUsd = portfolio.totalUsd - previousUsd + usdValue;
   }
 }
-async function loadPortfolio(address, blockvisionApiKey, fallbackRpcUrl, cache) {
+async function loadPortfolio(address, blockvisionApiKey, fallbackRpcUrl, cache, retryStats) {
   if (cache) {
     const hit = cache.get(address);
     if (hit) return hit;
   }
-  const portfolio = await fetchAddressPortfolio(address, blockvisionApiKey, fallbackRpcUrl);
+  const portfolio = await fetchAddressPortfolio(address, blockvisionApiKey, fallbackRpcUrl, { retryStats });
   if (cache) cache.set(address, portfolio);
   return portfolio;
 }
@@ -1874,7 +1897,8 @@ var balanceCheckTool = buildTool({
           address,
           context.blockvisionApiKey,
           context.suiRpcUrl,
-          context.portfolioCache
+          context.portfolioCache,
+          context.retryStats
         ).catch((err) => {
           console.warn("[balance_check] portfolio fetch failed, returning empty:", err);
           const fallback = {
@@ -1906,7 +1930,7 @@ var balanceCheckTool = buildTool({
         // as defi.totalUsd === 0 and `source: 'degraded'`, leaving the
         // rest of balance_check unaffected. The fetcher fills its own
         // prices via fetchTokenPrices for any coin types it discovers.
-        fetchAddressDefiPortfolio(address, context.blockvisionApiKey).catch((err) => {
+        fetchAddressDefiPortfolio(address, context.blockvisionApiKey, {}, { retryStats: context.retryStats }).catch((err) => {
           console.warn("[balance_check] defi fetch failed:", err);
           const fallback = {
             totalUsd: 0,
@@ -2023,7 +2047,7 @@ var balanceCheckTool = buildTool({
     const fetchAddress = targetAddress ?? context.walletAddress;
     const [balance, defi] = await Promise.all([
       agent.balance(),
-      fetchAddressDefiPortfolio(fetchAddress, context.blockvisionApiKey).catch((err) => {
+      fetchAddressDefiPortfolio(fetchAddress, context.blockvisionApiKey, {}, { retryStats: context.retryStats }).catch((err) => {
         console.warn("[balance_check] sdk-path defi fetch failed:", err);
         const fallback = {
           totalUsd: 0,
@@ -3986,7 +4010,8 @@ var portfolioAnalysisTool = buildTool({
         const fresh = await fetchAddressPortfolio(
           address,
           context.blockvisionApiKey,
-          context.suiRpcUrl
+          context.suiRpcUrl,
+          { retryStats: context.retryStats }
         );
         context.portfolioCache?.set(address, fresh);
         return fresh;
@@ -4036,7 +4061,7 @@ var portfolioAnalysisTool = buildTool({
         perProtocol: {},
         pricedAt: Date.now(),
         source: audricSnapshot.defiSource
-      }) : fetchAddressDefiPortfolio(address, context.blockvisionApiKey).catch(
+      }) : fetchAddressDefiPortfolio(address, context.blockvisionApiKey, {}, { retryStats: context.retryStats }).catch(
         (err) => {
           console.warn("[portfolio_analysis] defi fetch failed:", err);
           return { totalUsd: 0, perProtocol: {}, pricedAt: Date.now(), source: "degraded" };
@@ -5104,6 +5129,107 @@ var resolveSuinsTool = buildTool({
     }
   }
 });
+var todoStatusSchema = z.enum(["pending", "in_progress", "completed"]);
+var todoItemSchema = z.object({
+  id: z.string().min(1, "id must be a non-empty string").max(40, "id must be \u226440 chars (use a slug, not a sentence)"),
+  label: z.string().min(1, "label must be a non-empty string").max(80, "label must be \u226480 chars (the whole point of this tool is concision)"),
+  status: todoStatusSchema
+});
+var inputSchema6 = z.object({
+  items: z.array(todoItemSchema).min(1, "items must contain at least 1 entry").max(8, "items must contain at most 8 entries (SPEC 8 ceiling)")
+});
+var updateTodoTool = buildTool({
+  name: "update_todo",
+  description: "Declare or replace your plan for the current turn as a structured todo list. Call this when the user's ask is multi-step (\u22653 tools, \u22652 reasoning hops) so the user can see what you're doing as you do it. Each call replaces the entire list \u2014 the tool is idempotent. \n\nRules: 1\u20138 items, each label \u226480 chars, exactly 1 item must be `in_progress`. Use stable `id`s across calls within the same turn so the UI can track item transitions (e.g. `id: 'check-balance'` first as `pending`, later as `completed`). \n\nDO NOT call this for single-step asks ('balance', 'rate') \u2014 it's wasted tokens. DO call it before kicking off long flows where the user benefits from seeing the plan unfold ('save my idle USDC' \u2192 check balance \u2192 check rates \u2192 compute split \u2192 propose). \n\nThis call doesn't count against your turn budget \u2014 re-narrating the plan as items move from pending \u2192 in_progress \u2192 completed is encouraged.",
+  inputSchema: inputSchema6,
+  jsonSchema: {
+    type: "object",
+    properties: {
+      items: {
+        type: "array",
+        minItems: 1,
+        maxItems: 8,
+        items: {
+          type: "object",
+          properties: {
+            id: {
+              type: "string",
+              description: 'Stable identifier across calls within the same turn (e.g. "check-balance"). \u226440 chars.'
+            },
+            label: {
+              type: "string",
+              description: 'What this step is doing, \u226480 chars. Concrete (e.g. "Check USDC rate") not abstract ("Gather data").'
+            },
+            status: {
+              type: "string",
+              enum: ["pending", "in_progress", "completed"],
+              description: "Lifecycle state. Exactly one item must be `in_progress` per call."
+            }
+          },
+          required: ["id", "label", "status"]
+        }
+      }
+    },
+    required: ["items"]
+  },
+  isReadOnly: true,
+  // No I/O, just a pass-through that emits a side-channel event. Skip the
+  // turn-read cache — every call is intentionally distinct (ids may match
+  // but statuses change).
+  cacheable: false,
+  preflight: (input) => {
+    const items = input.items ?? [];
+    if (items.length === 0) {
+      return { valid: false, error: "items must contain at least 1 entry" };
+    }
+    if (items.length > 8) {
+      return { valid: false, error: `items must contain at most 8 entries, got ${items.length}` };
+    }
+    const seenIds = /* @__PURE__ */ new Set();
+    let inProgressCount = 0;
+    for (const item of items) {
+      if (!item.id || item.id.trim().length === 0) {
+        return { valid: false, error: "every item must have a non-empty id" };
+      }
+      if (item.id.length > 40) {
+        return { valid: false, error: `item id "${item.id.slice(0, 30)}\u2026" exceeds 40 chars` };
+      }
+      if (seenIds.has(item.id)) {
+        return { valid: false, error: `duplicate item id "${item.id}" \u2014 ids must be unique within a list` };
+      }
+      seenIds.add(item.id);
+      if (!item.label || item.label.trim().length === 0) {
+        return { valid: false, error: `item "${item.id}" has empty label` };
+      }
+      if (item.label.length > 80) {
+        return { valid: false, error: `item "${item.id}" label exceeds 80 chars (got ${item.label.length})` };
+      }
+      if (item.status === "in_progress") {
+        inProgressCount++;
+      }
+    }
+    if (inProgressCount !== 1) {
+      return {
+        valid: false,
+        error: `exactly 1 item must be in_progress, got ${inProgressCount}`
+      };
+    }
+    return { valid: true };
+  },
+  async call(input) {
+    return {
+      // The `__todoUpdate` flag tells the engine's agent loop to emit a
+      // `todo_update` side-channel event (mirrors the `__canvas` magic
+      // flag pattern). The LLM still gets a normal `tool_result` keyed
+      // to its `tool_use_id` so the Anthropic protocol stays satisfied.
+      data: {
+        __todoUpdate: true,
+        items: input.items
+      },
+      displayText: `${input.items.length} step${input.items.length === 1 ? "" : "s"}: ${input.items.map((i) => `${i.status === "completed" ? "\u2713" : i.status === "in_progress" ? "\u2192" : "\xB7"} ${i.label}`).join(" / ")}`
+    };
+  }
+});
 var tokenPricesTool = buildTool({
   name: "token_prices",
   description: 'Get current USD prices for Sui tokens, with optional 24h change. Accepts full coin type strings (e.g. "0x2::sui::SUI"). Returns price per token and (when requested) 24h change percentage. Use for "what is X worth?" or "did Y move today?". For balance + portfolio rendering, prefer balance_check / portfolio_analysis instead \u2014 they bundle the same prices into the standard cards.',
@@ -5128,7 +5254,7 @@ var tokenPricesTool = buildTool({
   },
   isReadOnly: true,
   async call(input, context) {
-    const prices = await fetchTokenPrices(input.coinTypes, context.blockvisionApiKey);
+    const prices = await fetchTokenPrices(input.coinTypes, context.blockvisionApiKey, { retryStats: context.retryStats });
     const results = input.coinTypes.map((coinType) => {
       const entry = prices[coinType];
       const symbol = coinType.split("::").pop() ?? coinType;
@@ -5350,6 +5476,29 @@ var CostTracker = class {
   }
 };
+// src/thinking-budget.ts
+var EFFORT_THINKING_BUDGET_CAPS = {
+  // null = thinking force-disabled (LEAN tier — single-fact reads need
+  // zero deliberation; a thinking block here adds ~300ms TTFVP for no
+  // benefit — see SPEC 8 § "Decision 2: LEAN shape: zero thinking blocks")
+  low: null,
+  medium: 8e3,
+  high: 16e3,
+  max: 32e3
+};
+function clampThinkingForEffort(config, effort) {
+  if (!config) return config;
+  if (effort === void 0) return config;
+  const cap = EFFORT_THINKING_BUDGET_CAPS[effort];
+  if (cap === null) {
+    return { type: "disabled" };
+  }
+  if (config.type === "enabled" && config.budgetTokens > cap) {
+    return { ...config, budgetTokens: cap };
+  }
+  return config;
+}
 // src/guards.ts
 function guardVerdictToAction(verdict) {
   if (verdict === "pass" || verdict === "hint") return "allow";
@@ -6374,12 +6523,14 @@ var EarlyToolDispatcher = class {
           call,
           tool,
           promise: Promise.resolve({ data: cached.result, isError: false }),
-          deduped: true
+          deduped: true,
+          readAttemptCount: () => void 0
         });
         return true;
       }
     }
-    const childContext = { ...this.context, signal: this.abortController.signal };
+    const baseChildContext = { ...this.context, signal: this.abortController.signal };
+    const { context: childContext, readAttemptCount } = withRetryStats(baseChildContext);
     const promise = executeTool(tool, call, childContext).then((result) => {
       if (!result.isError && this.turnReadCache) {
         const cacheKey = TurnReadCache.keyFor(call.name, call.input);
@@ -6390,7 +6541,7 @@ var EarlyToolDispatcher = class {
       }
       return result;
     });
-    this.entries.push({ call, tool, promise, deduped: false });
+    this.entries.push({ call, tool, promise, deduped: false, readAttemptCount });
     return true;
   }
   /** True if any tools have been dispatched. */
@@ -6419,6 +6570,7 @@ var EarlyToolDispatcher = class {
       try {
         const result = await entry.promise;
         const budgeted = result.isError ? result.data : budgetToolResult(result.data, entry.tool);
+        const attemptCount = entry.readAttemptCount();
         yield {
           type: "tool_result",
           toolName: entry.call.name,
@@ -6426,7 +6578,8 @@ var EarlyToolDispatcher = class {
           result: budgeted,
           isError: result.isError,
           wasEarlyDispatched: true,
-          ...entry.deduped ? { resultDeduped: true } : {}
+          ...entry.deduped ? { resultDeduped: true } : {},
+          ...attemptCount !== void 0 ? { attemptCount } : {}
         };
       } catch (err) {
         yield {
@@ -6560,8 +6713,17 @@ var QueryEngine = class {
    * `pending_action` event and the stream ends — no persistent connection needed.
    * The caller should save messages + pendingAction to the session store, then
    * call `resumeWithToolResult()` after the user approves/denies and executes.
+   *
+   * [SPEC 8 v0.5.1 B3.2] Optional `options.harnessShape` + `options.harnessRationale`
+   * cause a one-shot `harness_shape` event to be yielded BEFORE the agent loop
+   * begins. The engine itself doesn't classify — the host calls
+   * `classifyEffort()` (host already does this for thinking-budget routing)
+   * and maps via `harnessShapeForEffort()` before calling `submitMessage`.
+   * Hosts that don't pass `harnessShape` won't see the event (existing
+   * pre-SPEC-8 hosts continue to work; their `TurnMetrics.harnessShape`
+   * defaults to `'legacy'`).
    */
-  async *submitMessage(prompt) {
+  async *submitMessage(prompt, options) {
     if (this.costTracker.isOverBudget()) {
       yield { type: "error", error: new Error("Session budget exceeded") };
       return;
@@ -6573,6 +6735,13 @@ var QueryEngine = class {
       role: "user",
       content: [{ type: "text", text: prompt }]
     });
+    if (options?.harnessShape) {
+      yield {
+        type: "harness_shape",
+        shape: options.harnessShape,
+        rationale: options.harnessRationale && options.harnessRationale.trim().length > 0 ? options.harnessRationale : `host-classified ${options.harnessShape}`
+      };
+    }
     this.turnPaused = false;
     try {
       yield* this.agentLoop(prompt, signal);
@@ -6699,11 +6868,19 @@ var QueryEngine = class {
               isError: true,
               data: {
                 error: `Post-write refresh: invalid input for ${tool.name}`
-              }
+              },
+              attemptCount: void 0
             };
           }
-          const result = await tool.call(parsed.data, context);
-          return { tool, id, isError: false, data: result.data };
+          const { context: toolCtx, readAttemptCount } = withRetryStats(context);
+          const result = await tool.call(parsed.data, toolCtx);
+          return {
+            tool,
+            id,
+            isError: false,
+            data: result.data,
+            attemptCount: readAttemptCount()
+          };
         } catch (err) {
           return {
             tool,
@@ -6711,7 +6888,8 @@ var QueryEngine = class {
             isError: true,
             data: {
               error: err instanceof Error ? err.message : "Post-write refresh failed"
-            }
+            },
+            attemptCount: void 0
           };
         }
       })
@@ -6743,7 +6921,8 @@ var QueryEngine = class {
         toolUseId: r.id,
         result: r.data,
         isError: r.isError,
-        wasPostWriteRefresh: true
+        wasPostWriteRefresh: true,
+        ...r.attemptCount !== void 0 ? { attemptCount: r.attemptCount } : {}
       };
     }
   }
@@ -6940,6 +7119,7 @@ ${recipeCtx}`;
             ];
           }
         }
+        const cappedThinking = clampThinkingForEffort(this.thinking, this.outputConfig?.effort);
         const stream = this.provider.chat({
           messages: this.messages,
           systemPrompt: effectivePrompt,
@@ -6948,7 +7128,7 @@ ${recipeCtx}`;
           maxTokens: this.maxTokens,
           temperature: this.temperature,
           toolChoice: effectiveToolChoice,
-          thinking: this.thinking,
+          thinking: cappedThinking,
           outputConfig: this.outputConfig,
           signal
         });
@@ -7029,6 +7209,13 @@ ${recipeCtx}`;
                   toolUseId: finalEvent.toolUseId
                 };
               }
+              if (r && r.__todoUpdate === true && Array.isArray(r.items)) {
+                yield {
+                  type: "todo_update",
+                  items: r.items,
+                  toolUseId: finalEvent.toolUseId
+                };
+              }
             }
             earlyResultBlocks.push({
               type: "tool_result",
@@ -7225,6 +7412,13 @@ ${recipeCtx}`;
                 toolUseId: finalEvent.toolUseId
               };
             }
+            if (r && r.__todoUpdate === true && Array.isArray(r.items)) {
+              yield {
+                type: "todo_update",
+                items: r.items,
+                toolUseId: finalEvent.toolUseId
+              };
+            }
             if (tool && !tool.isReadOnly && this.onAutoExecuted && this.permissionConfig && this.priceCache) {
               const operation = toolNameToOperation(toolEvent.toolName);
               if (operation && originalCall) {
@@ -7316,6 +7510,11 @@ ${recipeCtx}`;
       }
       this.messages.push({ role: "assistant", content: acc.assistantBlocks });
       this.messages.push({ role: "user", content: toolResultBlocks });
+      const toolUseBlocks = acc.assistantBlocks.filter((b) => b.type === "tool_use");
+      const allUpdateTodo = toolUseBlocks.length > 0 && toolUseBlocks.every((b) => b.name === "update_todo");
+      if (allUpdateTodo) {
+        turns--;
+      }
       if (this.costTracker.isOverBudget()) {
         yield { type: "error", error: new Error("Session budget exceeded") };
         return;
@@ -7340,7 +7539,7 @@ ${recipeCtx}`;
   *handleProviderEvent(event, acc, dispatcher) {
     switch (event.type) {
       case "thinking_delta": {
-        yield { type: "thinking_delta", text: event.text };
+        yield { type: "thinking_delta", text: event.text, blockIndex: event.blockIndex };
         break;
       }
       case "thinking_done": {
@@ -7349,7 +7548,14 @@ ${recipeCtx}`;
           thinking: event.thinking,
           signature: event.signature
         });
-        yield { type: "thinking_done", signature: event.signature };
+        yield {
+          type: "thinking_done",
+          blockIndex: event.blockIndex,
+          signature: event.signature,
+          // [SPEC 8 v0.5.1] forward HowIEvaluated structured fields when
+          // the provider parsed an <eval_summary> marker.
+          ...event.summaryMode && event.evaluationItems ? { summaryMode: true, evaluationItems: event.evaluationItems } : {}
+        };
         break;
       }
       case "redacted_thinking": {
@@ -7551,6 +7757,20 @@ function flagSuspiciousResult(toolName, result) {
   return null;
 }
+// src/types.ts
+function harnessShapeForEffort(effort) {
+  switch (effort) {
+    case "low":
+      return "lean";
+    case "medium":
+      return "standard";
+    case "high":
+      return "rich";
+    case "max":
+      return "max";
+  }
+}
 // src/streaming.ts
 function serializeSSE(event) {
   const data = JSON.stringify(event);
@@ -7785,6 +8005,54 @@ function classifyEffort(model, userMessage, matchedRecipe, sessionWriteCount) {
   return "medium";
 }
+// src/eval-summary.ts
+var MARKER_REGEX = /<eval_summary>([\s\S]*?)<\/eval_summary>/g;
+var VALID_STATUSES = /* @__PURE__ */ new Set([
+  "good",
+  "warning",
+  "critical",
+  "info"
+]);
+function parseEvalSummary(thinkingText) {
+  if (!thinkingText.includes("<eval_summary>")) return null;
+  const matches = [];
+  for (const match of thinkingText.matchAll(MARKER_REGEX)) {
+    matches.push(match[1] ?? "");
+  }
+  if (matches.length === 0) return null;
+  const firstPayload = matches[0].trim();
+  let parsed;
+  try {
+    parsed = JSON.parse(firstPayload);
+  } catch {
+    return null;
+  }
+  if (!parsed || typeof parsed !== "object") return null;
+  const items = parsed.items;
+  if (!Array.isArray(items)) return null;
+  const evaluationItems = [];
+  for (const item of items) {
+    if (!item || typeof item !== "object") continue;
+    const i = item;
+    if (typeof i.label !== "string" || i.label.trim().length === 0) continue;
+    if (typeof i.status !== "string" || !VALID_STATUSES.has(i.status)) continue;
+    const out = {
+      label: i.label,
+      status: i.status
+    };
+    if (typeof i.note === "string" && i.note.length > 0) {
+      out.note = i.note;
+    }
+    evaluationItems.push(out);
+  }
+  if (evaluationItems.length === 0) return null;
+  return {
+    summaryMode: true,
+    evaluationItems,
+    markerCount: matches.length
+  };
+}
 // src/prompt-cache.ts
 function buildCachedSystemPrompt(staticParts, dynamicPart) {
   const blocks = staticParts.map((text, i) => ({
@@ -8342,7 +8610,7 @@ var AnthropicProvider = class {
             } else if (delta.type === "thinking_delta") {
               const buf = thinkingBuffers.get(event.index);
               if (buf?.type === "thinking") buf.text += delta.thinking ?? "";
-              yield { type: "thinking_delta", text: delta.thinking ?? "" };
+              yield { type: "thinking_delta", text: delta.thinking ?? "", blockIndex: event.index };
             } else if (delta.type === "signature_delta") {
               const buf = thinkingBuffers.get(event.index);
               if (buf?.type === "thinking") buf.signature = delta.signature ?? "";
@@ -8368,7 +8636,14 @@ var AnthropicProvider = class {
             }
             const thinkBuf = thinkingBuffers.get(event.index);
             if (thinkBuf?.type === "thinking") {
-              yield { type: "thinking_done", thinking: thinkBuf.text, signature: thinkBuf.signature };
+              const summary = parseEvalSummary(thinkBuf.text);
+              yield {
+                type: "thinking_done",
+                blockIndex: event.index,
+                thinking: thinkBuf.text,
+                signature: thinkBuf.signature,
+                ...summary ? { summaryMode: true, evaluationItems: summary.evaluationItems } : {}
+              };
               thinkingBuffers.delete(event.index);
             } else if (thinkBuf?.type === "redacted_thinking") {
               yield { type: "redacted_thinking", data: thinkBuf.data };
@@ -8582,6 +8857,6 @@ function sanitizeAnthropicMessages(messages) {
   return merged;
 }
-export { AnthropicProvider, BalanceTracker, CANVAS_TEMPLATES, ContextBudget, CostTracker, DEFAULT_GUARD_CONFIG, DEFAULT_LEASE_SEC, DEFAULT_PERMISSION_CONFIG, DEFAULT_POLL_BUDGET_MS, DEFAULT_POLL_INTERVAL_MS, DEFAULT_SYSTEM_PROMPT, EarlyToolDispatcher, InMemoryDefiCacheStore, InMemoryFetchLock, InMemoryNaviCacheStore, InMemoryWalletCacheStore, InvalidAddressError, McpClientManager, McpResponseCache, MemorySessionStore, NAVI_ADDR_TTL_SEC, NAVI_MCP_CONFIG, NAVI_MCP_URL, NAVI_RATES_TTL_SEC, NAVI_SERVER_NAME, NaviTools, PERMISSION_PRESETS, QueryEngine, READ_TOOLS, RecipeRegistry, RetryTracker, SUINS_NAME_REGEX, SUI_ADDRESS_REGEX, SUI_ADDRESS_STRICT_REGEX, SuinsNotRegisteredError, SuinsRpcError, TOOL_FLAGS, TOOL_MODIFIABLE_FIELDS, TxMutex, WRITE_TOOLS, _resetNaviCircuitBreaker, activitySummaryTool, adaptAllMcpTools, adaptAllServerTools, adaptMcpTool, applyToolFlags, awaitOrFetch, balanceCheckTool, borrowTool, budgetToolResult, buildCachedSystemPrompt, buildMcpTools, buildProactivenessInstructions, buildProfileContext, buildSelfEvaluationInstruction, buildStateContext, buildTool, claimRewardsTool, classifyEffort, clearPortfolioCache, clearPortfolioCacheFor, clearPriceMapCache, compactMessages, createGuardRunnerState, engineToSSE, estimateTokens, explainTxTool, extractConversationText, extractMcpText, fetchAddressDefiPortfolio, fetchAddressPortfolio, fetchAudricHistory, fetchAudricPortfolio, fetchAvailableRewards, fetchBalance, fetchHealthFactor, fetchPositions, fetchProtocolStats, fetchRates, fetchSavings, fetchTokenPrices, fetchWalletCoins, findTool, getAudricApiBase, getDefaultTools, getDefiCacheStore, getFetchLock, getMcpManager, getModifiableFields, getNaviCacheStore, getTelemetrySink, getToolFlags, getWalletAddress, getWalletCacheStore, guardArtifactPreview, guardStaleData, hasNaviMcp, healthCheckTool, loadRecipes, looksLikeSuiNs, microcompact, mppServicesTool, naviKey, normalizeAddressInput, parseMcpJson, parseRecipe, parseSSE, payApiTool, portfolioAnalysisTool, protocolDeepDiveTool, ratesInfoTool, registerEngineTools, renderCanvasTool, repayDebtTool, requireAgent, resetDefiCacheStore, resetFetchLock, resetNaviCacheStore, resetTelemetrySink, resetWalletCacheStore, resolveAddressToSuinsViaRpc, resolvePermissionTier, resolveSuinsTool, resolveSuinsViaRpc, resolveUsdValue, runGuards, runTools, saveContactTool, saveDepositTool, savingsInfoTool, sendTransferTool, serializeSSE, setDefiCacheStore, setFetchLock, setNaviCacheStore, setTelemetrySink, setWalletCacheStore, spendingAnalyticsTool, swapExecuteTool, swapQuoteTool, tokenPricesTool, toolNameToOperation, toolsToDefinitions, transactionHistoryTool, transformBalance, transformHealthFactor, transformPositions, transformRates, transformRewards, transformSavings, updateGuardStateAfterToolResult, validateHistory, voloStakeTool, voloStatsTool, voloUnstakeTool, webSearchTool, withdrawTool, yieldSummaryTool };
+export { AnthropicProvider, BalanceTracker, CANVAS_TEMPLATES, ContextBudget, CostTracker, DEFAULT_GUARD_CONFIG, DEFAULT_LEASE_SEC, DEFAULT_PERMISSION_CONFIG, DEFAULT_POLL_BUDGET_MS, DEFAULT_POLL_INTERVAL_MS, DEFAULT_SYSTEM_PROMPT, EFFORT_THINKING_BUDGET_CAPS, EarlyToolDispatcher, InMemoryDefiCacheStore, InMemoryFetchLock, InMemoryNaviCacheStore, InMemoryWalletCacheStore, InvalidAddressError, McpClientManager, McpResponseCache, MemorySessionStore, NAVI_ADDR_TTL_SEC, NAVI_MCP_CONFIG, NAVI_MCP_URL, NAVI_RATES_TTL_SEC, NAVI_SERVER_NAME, NaviTools, PERMISSION_PRESETS, QueryEngine, READ_TOOLS, RecipeRegistry, RetryTracker, SUINS_NAME_REGEX, SUI_ADDRESS_REGEX, SUI_ADDRESS_STRICT_REGEX, SuinsNotRegisteredError, SuinsRpcError, TOOL_FLAGS, TOOL_MODIFIABLE_FIELDS, TxMutex, WRITE_TOOLS, _resetNaviCircuitBreaker, activitySummaryTool, adaptAllMcpTools, adaptAllServerTools, adaptMcpTool, applyToolFlags, awaitOrFetch, balanceCheckTool, borrowTool, budgetToolResult, buildCachedSystemPrompt, buildMcpTools, buildProactivenessInstructions, buildProfileContext, buildSelfEvaluationInstruction, buildStateContext, buildTool, claimRewardsTool, clampThinkingForEffort, classifyEffort, clearPortfolioCache, clearPortfolioCacheFor, clearPriceMapCache, compactMessages, createGuardRunnerState, engineToSSE, estimateTokens, explainTxTool, extractConversationText, extractMcpText, fetchAddressDefiPortfolio, fetchAddressPortfolio, fetchAudricHistory, fetchAudricPortfolio, fetchAvailableRewards, fetchBalance, fetchHealthFactor, fetchPositions, fetchProtocolStats, fetchRates, fetchSavings, fetchTokenPrices, fetchWalletCoins, findTool, getAudricApiBase, getDefaultTools, getDefiCacheStore, getFetchLock, getMcpManager, getModifiableFields, getNaviCacheStore, getTelemetrySink, getToolFlags, getWalletAddress, getWalletCacheStore, guardArtifactPreview, guardStaleData, harnessShapeForEffort, hasNaviMcp, healthCheckTool, loadRecipes, looksLikeSuiNs, microcompact, mppServicesTool, naviKey, normalizeAddressInput, parseEvalSummary, parseMcpJson, parseRecipe, parseSSE, payApiTool, portfolioAnalysisTool, protocolDeepDiveTool, ratesInfoTool, registerEngineTools, renderCanvasTool, repayDebtTool, requireAgent, resetDefiCacheStore, resetFetchLock, resetNaviCacheStore, resetTelemetrySink, resetWalletCacheStore, resolveAddressToSuinsViaRpc, resolvePermissionTier, resolveSuinsTool, resolveSuinsViaRpc, resolveUsdValue, runGuards, runTools, saveContactTool, saveDepositTool, savingsInfoTool, sendTransferTool, serializeSSE, setDefiCacheStore, setFetchLock, setNaviCacheStore, setTelemetrySink, setWalletCacheStore, spendingAnalyticsTool, swapExecuteTool, swapQuoteTool, tokenPricesTool, toolNameToOperation, toolsToDefinitions, transactionHistoryTool, transformBalance, transformHealthFactor, transformPositions, transformRates, transformRewards, transformSavings, updateGuardStateAfterToolResult, updateTodoTool, validateHistory, voloStakeTool, voloStatsTool, voloUnstakeTool, webSearchTool, withdrawTool, yieldSummaryTool };
 //# sourceMappingURL=index.js.map
 //# sourceMappingURL=index.js.map