@t2000/engine 0.46.6 → 0.46.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -888,6 +888,8 @@ declare class QueryEngine {
888
888
  private messages;
889
889
  private abortController;
890
890
  private guardEvents;
891
+ private readonly turnReadCache;
892
+ private turnPaused;
891
893
  constructor(config: EngineConfig);
892
894
  /**
893
895
  * Submit a user message and stream engine events.
@@ -922,6 +924,32 @@ declare class QueryEngine {
922
924
  reset(): void;
923
925
  getGuardEvents(): readonly GuardEvent[];
924
926
  loadMessages(messages: Message[]): void;
927
+ /**
928
+ * [v0.46.7] Run a read-only tool out-of-band, using the engine's tool
929
+ * registry and ToolContext. Used by hosts to deterministically pre-dispatch
930
+ * tools based on user-message intent (e.g. always call `balance_check` when
931
+ * the user says "what's my net worth?", regardless of whether the LLM would
932
+ * have otherwise re-called it).
933
+ *
934
+ * The host is responsible for:
935
+ * - Streaming the synthetic `tool_start` + `tool_result` events to the UI
936
+ * (so cards render as if the LLM had called the tool).
937
+ * - Appending matching `tool_use` + `tool_result` ContentBlocks to the
938
+ * engine's message history via `loadMessages([...getMessages(), ...synth])`
939
+ * BEFORE calling `submitMessage`, so the LLM sees the fresh data and
940
+ * doesn't re-call.
941
+ *
942
+ * Throws if the tool isn't registered, isn't read-only, or fails input
943
+ * validation. Tool execution errors are returned as `{ data, isError: true }`
944
+ * for the caller to handle (typically: skip the injection so the LLM falls
945
+ * back to its normal flow).
946
+ */
947
+ invokeReadTool(toolName: string, input: unknown, options?: {
948
+ signal?: AbortSignal;
949
+ }): Promise<{
950
+ data: unknown;
951
+ isError: boolean;
952
+ }>;
925
953
  setServerPositions(data: EngineConfig['serverPositions']): void;
926
954
  getUsage(): CostSnapshot;
927
955
  /**
@@ -1215,6 +1243,89 @@ interface MicrocompactResult extends Array<Message> {
1215
1243
  */
1216
1244
  declare function microcompact(messages: readonly Message[], tools?: readonly Tool[]): MicrocompactResult;
1217
1245
 
1246
+ /**
1247
+ * [v0.46.8] Intra-turn deduplication of read-only tool calls.
1248
+ *
1249
+ * # Problem
1250
+ * Two independent execution paths can call the same read-only tool within
1251
+ * the same user turn:
1252
+ * 1. Host pre-dispatch via `engine.invokeReadTool()` (deterministic — runs
1253
+ * before the LLM ever sees the message; injects a synthetic
1254
+ * `tool_use`+`tool_result` pair into the ledger so the card renders
1255
+ * immediately and the LLM has the data).
1256
+ * 2. The LLM itself, mid-turn, emitting a `tool_use` block for the same
1257
+ * tool (often because the prompt says "always call balance_check on
1258
+ * direct read questions" and the model doesn't trust the synthetic
1259
+ * pair).
1260
+ *
1261
+ * Both paths emit a `tool_result` SSE event, the host renders BOTH cards,
1262
+ * the user sees a duplicate. Coordinating these two paths via prompt rules
1263
+ * is probabilistic ("DO NOT re-call when you see a synthetic pair") and
1264
+ * has empirically shown ~30% miss rate — the LLM still re-calls anyway.
1265
+ *
1266
+ * # Fix
1267
+ * Idempotent intra-turn cache. Within one user turn:
1268
+ * - Calling the same read-only tool with the same args twice returns the
1269
+ * cached result on the second call.
1270
+ * - The second call yields a `tool_result` event with `resultDeduped:true`
1271
+ * so hosts can skip rendering a duplicate card while the LLM still gets
1272
+ * the data it needs to satisfy its `tool_use` id.
1273
+ *
1274
+ * # Lifecycle
1275
+ * - Cache lives on the `QueryEngine` instance.
1276
+ * - Populated by `invokeReadTool` (host pre-dispatch) AND by the agent
1277
+ * loop's tool-execution path (LLM-driven calls).
1278
+ * - Cleared on `turn_complete` (clean slate for the next user turn).
1279
+ * - Cleared whenever a WRITE tool executes successfully (writes mutate
1280
+ * on-chain state, so any subsequent read in the same turn must re-fetch
1281
+ * for freshness).
1282
+ * - Cleared on errors / abort (defensive cleanup).
1283
+ *
1284
+ * # Why not just extend microcompact?
1285
+ * `microcompact` does CROSS-turn dedup, but explicitly excludes
1286
+ * `cacheable: false` tools (balance_check, health_check, savings_info,
1287
+ * transaction_history) so post-write refreshes always surface fresh data.
1288
+ * Within a single turn (pre-write), those same tools are perfectly
1289
+ * dedup-able — state can't change. This cache fills that exact gap.
1290
+ *
1291
+ * # Invariants
1292
+ * - Read-only tools only. Write tools never enter the cache.
1293
+ * - Errored results are NEVER cached (the next call should retry).
1294
+ * - Cache key includes the full input, stably stringified — different
1295
+ * filter args (e.g. `transaction_history({minUsd:5})` vs
1296
+ * `transaction_history({})`) hit different cache entries.
1297
+ */
1298
+ declare class TurnReadCache {
1299
+ private readonly store;
1300
+ /**
1301
+ * Build the cache key for a (toolName, input) pair. Stable across object
1302
+ * key ordering so `{a:1,b:2}` and `{b:2,a:1}` map to the same entry.
1303
+ */
1304
+ static keyFor(toolName: string, input: unknown): string;
1305
+ has(key: string): boolean;
1306
+ get(key: string): {
1307
+ result: unknown;
1308
+ sourceToolUseId: string;
1309
+ } | undefined;
1310
+ /**
1311
+ * Populate the cache. Caller is responsible for ensuring the result was
1312
+ * a successful read (no errors). Overwrites any prior entry for the same
1313
+ * key — the most recent successful read wins, which is correct under our
1314
+ * "writes invalidate the whole cache" invariant.
1315
+ */
1316
+ set(key: string, value: {
1317
+ result: unknown;
1318
+ sourceToolUseId: string;
1319
+ }): void;
1320
+ /**
1321
+ * Drop every entry. Called at turn end and after every successful write.
1322
+ * Cheap and intentional — the cache is small (a handful of entries per
1323
+ * turn at most) and clearing is the correct response to any state mutation.
1324
+ */
1325
+ clear(): void;
1326
+ size(): number;
1327
+ }
1328
+
1218
1329
  /**
1219
1330
  * EarlyToolDispatcher — dispatches read-only tools mid-stream.
1220
1331
  *
@@ -1231,11 +1342,21 @@ declare class EarlyToolDispatcher {
1231
1342
  private entries;
1232
1343
  private readonly tools;
1233
1344
  private readonly context;
1345
+ private readonly turnReadCache;
1234
1346
  private abortController;
1235
- constructor(tools: Tool[], context: ToolContext);
1347
+ constructor(tools: Tool[], context: ToolContext, turnReadCache?: TurnReadCache);
1236
1348
  /**
1237
1349
  * Attempt to dispatch a tool call. Returns true if the tool was dispatched
1238
1350
  * (read-only + concurrency-safe), false if it should be queued for later.
1351
+ *
1352
+ * [v0.46.8] Cache-aware: if a `TurnReadCache` was supplied at
1353
+ * construction and a prior call this turn already produced a result
1354
+ * for the same `(toolName, input)`, the dispatcher returns true (the
1355
+ * call IS handled here, not queued for the post-stream loop) but
1356
+ * skips the tool execution entirely — `collectResults` will surface
1357
+ * the cached value with `resultDeduped: true`. On a cache miss for
1358
+ * a successful real execution, the result is written back to the
1359
+ * cache so any later call within the same turn dedups too.
1239
1360
  */
1240
1361
  tryDispatch(call: PendingToolCall): boolean;
1241
1362
  /** True if any tools have been dispatched. */
package/dist/index.js CHANGED
@@ -1710,17 +1710,31 @@ async function fetchCatalog() {
1710
1710
  catalogCache = { data, ts: Date.now() };
1711
1711
  return data;
1712
1712
  }
1713
+ function renderServices(catalog) {
1714
+ return catalog.map((s) => ({
1715
+ id: s.id,
1716
+ name: s.name,
1717
+ description: s.description,
1718
+ categories: s.categories,
1719
+ endpoints: s.endpoints.map((e) => ({
1720
+ url: `${MPP_GATEWAY2}/${s.id}${e.path}`,
1721
+ method: e.method,
1722
+ description: e.description,
1723
+ price: `$${e.price}`
1724
+ }))
1725
+ }));
1726
+ }
1713
1727
  function matchesQuery(service, q) {
1714
1728
  const lower = q.toLowerCase();
1715
1729
  return service.id.toLowerCase().includes(lower) || service.name.toLowerCase().includes(lower) || service.description.toLowerCase().includes(lower) || service.categories.some((c) => c.toLowerCase().includes(lower)) || service.endpoints.some((e) => e.description.toLowerCase().includes(lower));
1716
1730
  }
1717
1731
  var mppServicesTool = buildTool({
1718
1732
  name: "mpp_services",
1719
- description: 'Discover available MPP gateway services. Returns service names, descriptions, endpoints with required parameters, and pricing. Use BEFORE calling pay_api. Modes: pass `query` for keyword search, `category` to filter by category, or `mode: "full"` to fetch the ENTIRE catalog in one card (for "show me all MPP services" / "full catalog" requests \u2014 never enumerate per category in a loop). Calling with no args returns a category summary so you can narrow.',
1733
+ description: 'Discover available MPP gateway services. Returns service names, descriptions, endpoints with required parameters, and pricing. Use BEFORE calling pay_api. With no args, returns the FULL catalog as a single card (default behavior \u2014 covers "show me available MPP services", "what services exist", "show me all MPP services"). Use `query` to keyword-search a specific need ("translate", "weather", "postcard"). Use `category` to filter to one category. Use `mode: "summary"` only if you explicitly want a category-counts overview without the full list.',
1720
1734
  inputSchema: z.object({
1721
- query: z.string().optional().describe('Filter by keyword (e.g. "postcard", "translate", "weather").'),
1722
- category: z.string().optional().describe('Filter by category exactly (e.g. "weather", "image"). See category summary returned when called without filters.'),
1723
- mode: z.enum(["summary", "full"]).optional().describe('"full" returns the entire catalog in a single card \u2014 use this for "show me all MPP services" / "full catalog" requests instead of looping per category. Default is "summary" (category counts only when no filter is supplied).')
1735
+ query: z.string().optional().describe('Filter by keyword (e.g. "postcard", "translate", "weather"). Returns matching services in one card.'),
1736
+ category: z.string().optional().describe('Filter by category exactly (e.g. "weather", "image"). Use mode:"summary" first if you need to see the category list.'),
1737
+ mode: z.enum(["summary", "full"]).optional().describe('"full" (default) returns the entire catalog in one card. "summary" returns category counts only \u2014 use this only when the user explicitly asks for a category overview.')
1724
1738
  }),
1725
1739
  jsonSchema: {
1726
1740
  type: "object",
@@ -1736,7 +1750,7 @@ var mppServicesTool = buildTool({
1736
1750
  mode: {
1737
1751
  type: "string",
1738
1752
  enum: ["summary", "full"],
1739
- description: '"full" returns the entire catalog in one card. Use for "show me all" requests.'
1753
+ description: '"full" (default) returns the entire catalog in one card. "summary" returns category counts only.'
1740
1754
  }
1741
1755
  },
1742
1756
  required: []
@@ -1748,25 +1762,14 @@ var mppServicesTool = buildTool({
1748
1762
  maxResultSizeChars: 12e3,
1749
1763
  async call(input) {
1750
1764
  const catalog = await fetchCatalog();
1751
- if (input.mode === "full") {
1752
- const services2 = catalog.map((s) => ({
1753
- id: s.id,
1754
- name: s.name,
1755
- description: s.description,
1756
- categories: s.categories,
1757
- endpoints: s.endpoints.map((e) => ({
1758
- url: `${MPP_GATEWAY2}/${s.id}${e.path}`,
1759
- method: e.method,
1760
- description: e.description,
1761
- price: `$${e.price}`
1762
- }))
1763
- }));
1765
+ if (input.mode !== "summary" && !input.query && !input.category) {
1766
+ const services2 = renderServices(catalog);
1764
1767
  return {
1765
1768
  data: { services: services2, total: services2.length, mode: "full" },
1766
1769
  displayText: `Full MPP catalog: ${services2.length} services.`
1767
1770
  };
1768
1771
  }
1769
- if (!input.query && !input.category) {
1772
+ if (input.mode === "summary" && !input.query && !input.category) {
1770
1773
  const counts = /* @__PURE__ */ new Map();
1771
1774
  for (const svc of catalog) {
1772
1775
  for (const cat of svc.categories) {
@@ -1777,14 +1780,14 @@ var mppServicesTool = buildTool({
1777
1780
  return {
1778
1781
  data: {
1779
1782
  _refine: {
1780
- reason: 'MPP catalog has many services \u2014 pick a category, supply a query, or pass mode:"full" to fetch everything.',
1783
+ reason: 'Category summary (mode:"summary"). Re-call with a category or omit mode for the full catalog.',
1781
1784
  suggestedParams: { category: categories[0]?.category ?? "weather" },
1782
1785
  allModes: ["summary", "full"]
1783
1786
  },
1784
1787
  categories,
1785
1788
  totalServices: catalog.length
1786
1789
  },
1787
- displayText: `${catalog.length} services across ${categories.length} categories. Re-call with a category, query, or mode:"full".`
1790
+ displayText: `${catalog.length} services across ${categories.length} categories.`
1788
1791
  };
1789
1792
  }
1790
1793
  let filtered = catalog;
@@ -1795,18 +1798,7 @@ var mppServicesTool = buildTool({
1795
1798
  if (input.query) {
1796
1799
  filtered = filtered.filter((s) => matchesQuery(s, input.query));
1797
1800
  }
1798
- const services = filtered.map((s) => ({
1799
- id: s.id,
1800
- name: s.name,
1801
- description: s.description,
1802
- categories: s.categories,
1803
- endpoints: s.endpoints.map((e) => ({
1804
- url: `${MPP_GATEWAY2}/${s.id}${e.path}`,
1805
- method: e.method,
1806
- description: e.description,
1807
- price: `$${e.price}`
1808
- }))
1809
- }));
1801
+ const services = renderServices(filtered);
1810
1802
  const filterDesc = [
1811
1803
  input.query ? `query "${input.query}"` : null,
1812
1804
  input.category ? `category "${input.category}"` : null
@@ -4265,27 +4257,116 @@ function safeNum(v) {
4265
4257
  return isNaN(n) ? 0 : n;
4266
4258
  }
4267
4259
 
4260
+ // src/turn-read-cache.ts
4261
+ var TurnReadCache = class {
4262
+ store = /* @__PURE__ */ new Map();
4263
+ /**
4264
+ * Build the cache key for a (toolName, input) pair. Stable across object
4265
+ * key ordering so `{a:1,b:2}` and `{b:2,a:1}` map to the same entry.
4266
+ */
4267
+ static keyFor(toolName, input) {
4268
+ return `${toolName}:${stableStringify2(input)}`;
4269
+ }
4270
+ has(key) {
4271
+ return this.store.has(key);
4272
+ }
4273
+ get(key) {
4274
+ return this.store.get(key);
4275
+ }
4276
+ /**
4277
+ * Populate the cache. Caller is responsible for ensuring the result was
4278
+ * a successful read (no errors). Overwrites any prior entry for the same
4279
+ * key — the most recent successful read wins, which is correct under our
4280
+ * "writes invalidate the whole cache" invariant.
4281
+ */
4282
+ set(key, value) {
4283
+ this.store.set(key, value);
4284
+ }
4285
+ /**
4286
+ * Drop every entry. Called at turn end and after every successful write.
4287
+ * Cheap and intentional — the cache is small (a handful of entries per
4288
+ * turn at most) and clearing is the correct response to any state mutation.
4289
+ */
4290
+ clear() {
4291
+ this.store.clear();
4292
+ }
4293
+ size() {
4294
+ return this.store.size;
4295
+ }
4296
+ };
4297
+ function stableStringify2(value) {
4298
+ if (value === null || value === void 0) return "";
4299
+ if (typeof value !== "object") return JSON.stringify(value);
4300
+ if (Array.isArray(value)) return JSON.stringify(value.map(stableStringifyForObject));
4301
+ return stableStringifyForObject(value);
4302
+ }
4303
+ function stableStringifyForObject(value) {
4304
+ if (value === null || value === void 0) return JSON.stringify(value);
4305
+ if (typeof value !== "object") return JSON.stringify(value);
4306
+ if (Array.isArray(value)) {
4307
+ return `[${value.map(stableStringifyForObject).join(",")}]`;
4308
+ }
4309
+ const sorted = Object.keys(value).sort();
4310
+ const parts = sorted.map(
4311
+ (k) => `${JSON.stringify(k)}:${stableStringifyForObject(value[k])}`
4312
+ );
4313
+ return `{${parts.join(",")}}`;
4314
+ }
4315
+
4268
4316
  // src/early-dispatcher.ts
4269
4317
  var EarlyToolDispatcher = class {
4270
4318
  entries = [];
4271
4319
  tools;
4272
4320
  context;
4321
+ turnReadCache;
4273
4322
  abortController;
4274
- constructor(tools, context) {
4323
+ constructor(tools, context, turnReadCache) {
4275
4324
  this.tools = tools;
4276
4325
  this.context = context;
4326
+ this.turnReadCache = turnReadCache;
4277
4327
  this.abortController = new AbortController();
4278
4328
  }
4279
4329
  /**
4280
4330
  * Attempt to dispatch a tool call. Returns true if the tool was dispatched
4281
4331
  * (read-only + concurrency-safe), false if it should be queued for later.
4332
+ *
4333
+ * [v0.46.8] Cache-aware: if a `TurnReadCache` was supplied at
4334
+ * construction and a prior call this turn already produced a result
4335
+ * for the same `(toolName, input)`, the dispatcher returns true (the
4336
+ * call IS handled here, not queued for the post-stream loop) but
4337
+ * skips the tool execution entirely — `collectResults` will surface
4338
+ * the cached value with `resultDeduped: true`. On a cache miss for
4339
+ * a successful real execution, the result is written back to the
4340
+ * cache so any later call within the same turn dedups too.
4282
4341
  */
4283
4342
  tryDispatch(call) {
4284
4343
  const tool = findTool(this.tools, call.name);
4285
4344
  if (!tool || !tool.isReadOnly || !tool.isConcurrencySafe) return false;
4345
+ if (this.turnReadCache) {
4346
+ const cacheKey = TurnReadCache.keyFor(call.name, call.input);
4347
+ const cached = this.turnReadCache.get(cacheKey);
4348
+ if (cached) {
4349
+ this.entries.push({
4350
+ call,
4351
+ tool,
4352
+ promise: Promise.resolve({ data: cached.result, isError: false }),
4353
+ deduped: true
4354
+ });
4355
+ return true;
4356
+ }
4357
+ }
4286
4358
  const childContext = { ...this.context, signal: this.abortController.signal };
4287
- const promise = executeTool(tool, call, childContext);
4288
- this.entries.push({ call, tool, promise });
4359
+ const promise = executeTool(tool, call, childContext).then((result) => {
4360
+ if (!result.isError && this.turnReadCache) {
4361
+ const cacheKey = TurnReadCache.keyFor(call.name, call.input);
4362
+ this.turnReadCache.set(cacheKey, {
4363
+ result: result.data,
4364
+ sourceToolUseId: call.id
4365
+ });
4366
+ }
4367
+ return result;
4368
+ });
4369
+ this.entries.push({ call, tool, promise, deduped: false });
4289
4370
  return true;
4290
4371
  }
4291
4372
  /** True if any tools have been dispatched. */
@@ -4311,7 +4392,8 @@ var EarlyToolDispatcher = class {
4311
4392
  toolUseId: entry.call.id,
4312
4393
  result: budgeted,
4313
4394
  isError: result.isError,
4314
- wasEarlyDispatched: true
4395
+ wasEarlyDispatched: true,
4396
+ ...entry.deduped ? { resultDeduped: true } : {}
4315
4397
  };
4316
4398
  } catch (err) {
4317
4399
  yield {
@@ -4383,6 +4465,18 @@ var QueryEngine = class {
4383
4465
  messages = [];
4384
4466
  abortController = null;
4385
4467
  guardEvents = [];
4468
+ // [v0.46.8] Intra-turn dedup cache for read-only tool calls. See
4469
+ // `turn-read-cache.ts` for the full lifecycle. Key takeaway: the cache
4470
+ // lives across the host's pre-dispatch (`invokeReadTool`) and the
4471
+ // agent loop's LLM-driven tool execution within ONE user turn, then
4472
+ // clears on `turn_complete` or after any successful write.
4473
+ turnReadCache = new TurnReadCache();
4474
+ // [v0.46.8] Set to `true` when the agent loop yields `pending_action`
4475
+ // and returns (turn is paused awaiting user confirmation). The
4476
+ // submitMessage / resumeWithToolResult wrappers consult this flag in
4477
+ // their `finally` block so they DON'T clear the cache mid-turn — the
4478
+ // pending write may resume, and the cache should survive the pause.
4479
+ turnPaused = false;
4386
4480
  constructor(config) {
4387
4481
  this.provider = config.provider;
4388
4482
  this.agent = config.agent;
@@ -4434,7 +4528,14 @@ var QueryEngine = class {
4434
4528
  role: "user",
4435
4529
  content: [{ type: "text", text: prompt }]
4436
4530
  });
4437
- yield* this.agentLoop(prompt, signal);
4531
+ this.turnPaused = false;
4532
+ try {
4533
+ yield* this.agentLoop(prompt, signal);
4534
+ } finally {
4535
+ if (!this.turnPaused) {
4536
+ this.turnReadCache.clear();
4537
+ }
4538
+ }
4438
4539
  }
4439
4540
  /**
4440
4541
  * Resume the conversation after a pending action is resolved.
@@ -4478,10 +4579,19 @@ var QueryEngine = class {
4478
4579
  };
4479
4580
  if (!response.approved) {
4480
4581
  yield { type: "turn_complete", stopReason: "end_turn" };
4582
+ this.turnReadCache.clear();
4481
4583
  return;
4482
4584
  }
4585
+ this.turnReadCache.clear();
4483
4586
  yield* this.runPostWriteRefresh(action, response, signal);
4484
- yield* this.agentLoop(null, signal, false);
4587
+ this.turnPaused = false;
4588
+ try {
4589
+ yield* this.agentLoop(null, signal, false);
4590
+ } finally {
4591
+ if (!this.turnPaused) {
4592
+ this.turnReadCache.clear();
4593
+ }
4594
+ }
4485
4595
  }
4486
4596
  /**
4487
4597
  * [v1.5] Auto-run configured read tools after a successful write,
@@ -4560,6 +4670,12 @@ var QueryEngine = class {
4560
4670
  }));
4561
4671
  this.messages.push({ role: "user", content: refreshResults });
4562
4672
  for (const r of refreshes) {
4673
+ if (!r.isError) {
4674
+ this.turnReadCache.set(
4675
+ TurnReadCache.keyFor(r.tool.name, {}),
4676
+ { result: r.data, sourceToolUseId: r.id }
4677
+ );
4678
+ }
4563
4679
  yield {
4564
4680
  type: "tool_result",
4565
4681
  toolName: r.tool.name,
@@ -4595,6 +4711,71 @@ var QueryEngine = class {
4595
4711
  loadMessages(messages) {
4596
4712
  this.messages = [...messages];
4597
4713
  }
4714
+ /**
4715
+ * [v0.46.7] Run a read-only tool out-of-band, using the engine's tool
4716
+ * registry and ToolContext. Used by hosts to deterministically pre-dispatch
4717
+ * tools based on user-message intent (e.g. always call `balance_check` when
4718
+ * the user says "what's my net worth?", regardless of whether the LLM would
4719
+ * have otherwise re-called it).
4720
+ *
4721
+ * The host is responsible for:
4722
+ * - Streaming the synthetic `tool_start` + `tool_result` events to the UI
4723
+ * (so cards render as if the LLM had called the tool).
4724
+ * - Appending matching `tool_use` + `tool_result` ContentBlocks to the
4725
+ * engine's message history via `loadMessages([...getMessages(), ...synth])`
4726
+ * BEFORE calling `submitMessage`, so the LLM sees the fresh data and
4727
+ * doesn't re-call.
4728
+ *
4729
+ * Throws if the tool isn't registered, isn't read-only, or fails input
4730
+ * validation. Tool execution errors are returned as `{ data, isError: true }`
4731
+ * for the caller to handle (typically: skip the injection so the LLM falls
4732
+ * back to its normal flow).
4733
+ */
4734
+ async invokeReadTool(toolName, input, options = {}) {
4735
+ const tool = findTool(this.tools, toolName);
4736
+ if (!tool) throw new Error(`invokeReadTool: tool not found: ${toolName}`);
4737
+ if (!tool.isReadOnly) {
4738
+ throw new Error(`invokeReadTool: tool is not read-only: ${toolName} (write tools must go through the permission gate)`);
4739
+ }
4740
+ const parsed = tool.inputSchema.safeParse(input);
4741
+ if (!parsed.success) {
4742
+ throw new Error(
4743
+ `invokeReadTool: invalid input for ${toolName}: ${parsed.error.issues.map((i) => i.message).join(", ")}`
4744
+ );
4745
+ }
4746
+ const cacheKey = TurnReadCache.keyFor(toolName, parsed.data);
4747
+ const cached = this.turnReadCache.get(cacheKey);
4748
+ if (cached) {
4749
+ return { data: cached.result, isError: false };
4750
+ }
4751
+ const signal = options.signal ?? new AbortController().signal;
4752
+ const context = {
4753
+ agent: this.agent,
4754
+ mcpManager: this.mcpManager,
4755
+ walletAddress: this.walletAddress,
4756
+ suiRpcUrl: this.suiRpcUrl,
4757
+ serverPositions: this.serverPositions,
4758
+ positionFetcher: this.positionFetcher,
4759
+ env: this.env,
4760
+ signal,
4761
+ priceCache: this.priceCache,
4762
+ permissionConfig: this.permissionConfig,
4763
+ sessionSpendUsd: this.sessionSpendUsd
4764
+ };
4765
+ try {
4766
+ const result = await tool.call(parsed.data, context);
4767
+ this.turnReadCache.set(cacheKey, {
4768
+ result: result.data,
4769
+ sourceToolUseId: "invokeReadTool"
4770
+ });
4771
+ return { data: result.data, isError: false };
4772
+ } catch (err) {
4773
+ return {
4774
+ data: { error: err instanceof Error ? err.message : "Tool execution failed" },
4775
+ isError: true
4776
+ };
4777
+ }
4778
+ }
4598
4779
  setServerPositions(data) {
4599
4780
  this.serverPositions = data;
4600
4781
  }
@@ -4639,7 +4820,7 @@ var QueryEngine = class {
4639
4820
  assistantBlocks: [],
4640
4821
  pendingToolCalls: []
4641
4822
  };
4642
- const dispatcher = new EarlyToolDispatcher(this.tools, context);
4823
+ const dispatcher = new EarlyToolDispatcher(this.tools, context, this.turnReadCache);
4643
4824
  try {
4644
4825
  const microcompacted = microcompact(this.messages, this.tools);
4645
4826
  this.messages = microcompacted;
@@ -4811,6 +4992,27 @@ ${recipeCtx}`;
4811
4992
  let pendingWrite = null;
4812
4993
  for (const call of acc.pendingToolCalls) {
4813
4994
  const tool = findTool(this.tools, call.name);
4995
+ if (tool && tool.isReadOnly) {
4996
+ const cacheKey = TurnReadCache.keyFor(call.name, call.input);
4997
+ const cached = this.turnReadCache.get(cacheKey);
4998
+ if (cached) {
4999
+ yield {
5000
+ type: "tool_result",
5001
+ toolName: call.name,
5002
+ toolUseId: call.id,
5003
+ result: cached.result,
5004
+ isError: false,
5005
+ resultDeduped: true
5006
+ };
5007
+ toolResultBlocks.push({
5008
+ type: "tool_result",
5009
+ toolUseId: call.id,
5010
+ content: JSON.stringify(cached.result),
5011
+ isError: false
5012
+ });
5013
+ continue;
5014
+ }
5015
+ }
4814
5016
  const needsConfirmation = (() => {
4815
5017
  if (!tool || tool.isReadOnly) return false;
4816
5018
  if (tool.permissionLevel === "explicit") return true;
@@ -4924,6 +5126,18 @@ ${recipeCtx}`;
4924
5126
  }
4925
5127
  }
4926
5128
  const finalEvent = enrichedResult !== toolEvent.result ? { ...toolEvent, result: enrichedResult } : toolEvent;
5129
+ if (!finalEvent.isError && tool) {
5130
+ if (tool.isReadOnly) {
5131
+ const inputForKey = originalCall?.input ?? {};
5132
+ const cacheKey = TurnReadCache.keyFor(finalEvent.toolName, inputForKey);
5133
+ this.turnReadCache.set(cacheKey, {
5134
+ result: finalEvent.result,
5135
+ sourceToolUseId: finalEvent.toolUseId
5136
+ });
5137
+ } else {
5138
+ this.turnReadCache.clear();
5139
+ }
5140
+ }
4927
5141
  yield finalEvent;
4928
5142
  if (finalEvent.type === "tool_result" && !finalEvent.isError) {
4929
5143
  const r = finalEvent.result;
@@ -5000,6 +5214,7 @@ ${recipeCtx}`;
5000
5214
  const writeGuardInjections = pendingWrite.call._guardInjections;
5001
5215
  const modifiableFields = getModifiableFields(pendingWrite.call.name);
5002
5216
  const turnIndex = this.messages.filter((m) => m.role === "assistant").length;
5217
+ this.turnPaused = true;
5003
5218
  yield {
5004
5219
  type: "pending_action",
5005
5220
  action: {