anyclaude-sdk 0.7.4 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -386,6 +386,20 @@ npm create anyclaude-app@latest my-app # template: bolt — WebContainer + cha
386
386
 
387
387
  The `bolt` template wires `useWebContainerPreview({ wc })` (boot a dev server → live preview URL) + a browser-side `query()` + the IDE components. See [`anyclaude-react`](#react-ui-kit--anyclaude-react).
388
388
 
389
+ ## Token efficiency — deferred tools
390
+
391
+ Keep a large pool of rarely-used tools **out of the per-turn payload** (big savings on weak/uncached models) while staying discoverable + callable. Mark them deferred; `tool_search` indexes them and the loop **arms** a tool (sends its schema on subsequent turns) once search surfaces it — then it executes normally.
392
+
393
+ ```ts
394
+ query({ prompt, workspace, llm,
395
+ extraTools: [deploy, ...integrationTools], // e.g. 35 integration tools
396
+ deferredTools: ['stripe_charge', 'supabase_query', /* … the niche ones */],
397
+ })
398
+ // or per-tool: defineTool({ name, description, parameters, run, defer: true })
399
+ ```
400
+
401
+ Only the lean core + `tool_search` are sent each turn; the model searches when it needs a niche tool, the SDK arms it, and the call goes through. Register 35, send ~10.
402
+
389
403
  ## Other niceties
390
404
 
391
405
  - **Live compaction marker** — `autoCompact` emits a `compact_boundary` with `status: 'start'` *before* summarizing (for a live "compacting…" shimmer) and `status: 'end'` after with `post_tokens`.
@@ -402,7 +416,7 @@ Runnable Vite projects in [`examples/`](examples/): **`browser-ide`** (WebContai
402
416
  - `prompt: string | AsyncIterable<SDKUserMessage>`
403
417
  - `workspace: FileSystem & CommandExecutor`
404
418
  - `llm: LLMClient`
405
- - `tools?`, `extraTools?`, `allowedTools?`/`disallowedTools?`, `model?`, `systemPrompt?`/`appendSystemPrompt?`, `maxTurns?` (default 50), `cwd?`, `abortController?`
419
+ - `tools?`, `extraTools?`, `allowedTools?`/`disallowedTools?`, `deferredTools?` (lazy-load), `model?`, `systemPrompt?`/`appendSystemPrompt?`, `maxTurns?` (default 50), `cwd?`, `abortController?`
406
420
  - serverless: `sessionStore?`, `resume?`, `maxDurationMs?`, `continueRun?`
407
421
  - client tools: `clientTools?`, `clientToolResults?`; interactive: `onAskUser?`
408
422
  - also: `mcpServers?`, `agents?`, `commands?`, `hooks?`, `background?`, `team?`, `memory?`, `permissionMode?`/`canUseTool?`, `messageQueue?`
package/dist/agent.d.ts CHANGED
@@ -26,6 +26,11 @@ export interface AgentOptions {
26
26
  allowedTools?: string[];
27
27
  /** Denylist of tool names, applied after allowedTools. */
28
28
  disallowedTools?: string[];
29
+ /** Tool names to DEFER out of the per-turn payload — still discoverable via
30
+ * `tool_search` and executable, but their schema isn't sent (saving tokens)
31
+ * until the model searches and the loop arms them. For large pools of
32
+ * rarely-used integration tools. (Per-tool `defer: true` works too.) */
33
+ deferredTools?: string[];
29
34
  maxTurns?: number;
30
35
  /** Wall-clock budget (ms). At a turn boundary past this, the loop pauses: it
31
36
  * persists to sessionStore and emits a `paused` system message instead of
package/dist/agent.js CHANGED
@@ -263,8 +263,17 @@ export async function* runAgent(options) {
263
263
  for (const t of tools)
264
264
  if (!t.run)
265
265
  clientTools.add(t.def.function.name);
266
- const defs = toolDefs(tools);
266
+ const defs = toolDefs(tools); // FULL set — for the search index, suppression, and call recovery
267
267
  const byName = toolByName(tools);
268
+ // Deferred tools: kept OUT of the per-turn payload (token savings) but still
269
+ // discoverable via tool_search and executable. `tool_search` surfaces them and
270
+ // arms them (adds their schema to subsequent turns). tool_search itself is
271
+ // never deferred, or discovery breaks.
272
+ const deferredSet = new Set([...(options.deferredTools ?? []), ...tools.filter((t) => t.defer).map((t) => t.def.function.name)].filter((n) => n !== 'tool_search'));
273
+ const armed = new Set();
274
+ const sentDefs = () => deferredSet.size
275
+ ? toolDefs(tools.filter((t) => !deferredSet.has(t.def.function.name) || armed.has(t.def.function.name)))
276
+ : defs;
268
277
  // Stop streaming visible deltas once tool-call / reasoning markup begins (native
269
278
  // dialects, <thinking>, or named-tag tools like <finish>); final text is cleaned.
270
279
  const streamSuppressRe = (() => {
@@ -302,6 +311,13 @@ export async function* runAgent(options) {
302
311
  memory,
303
312
  skills,
304
313
  planMode,
314
+ armTools: deferredSet.size
315
+ ? (names) => {
316
+ for (const n of names)
317
+ if (deferredSet.has(n))
318
+ armed.add(n);
319
+ }
320
+ : undefined,
305
321
  };
306
322
  const skillCommands = skillsToCommands(skills);
307
323
  const allCommands = [...(options.commands ?? []), ...skillCommands];
@@ -412,7 +428,7 @@ export async function* runAgent(options) {
412
428
  subtype: 'init',
413
429
  apiKeySource: 'none',
414
430
  cwd,
415
- tools: defs.map((d) => d.function.name),
431
+ tools: sentDefs().map((d) => d.function.name),
416
432
  mcp_servers: mcpStatuses,
417
433
  model: model ?? 'unknown',
418
434
  permissionMode,
@@ -641,7 +657,7 @@ export async function* runAgent(options) {
641
657
  let inToolMarkup = false;
642
658
  const sp = llm.streamChat(history, {
643
659
  model,
644
- tools: defs,
660
+ tools: sentDefs(),
645
661
  signal,
646
662
  onToken: (delta) => {
647
663
  streamedText += delta;
@@ -673,7 +689,7 @@ export async function* runAgent(options) {
673
689
  else {
674
690
  result = await llm.streamChat(history, {
675
691
  model,
676
- tools: defs,
692
+ tools: sentDefs(),
677
693
  signal,
678
694
  onToken: (delta) => {
679
695
  streamedText += delta;
package/dist/query.d.ts CHANGED
@@ -22,6 +22,9 @@ export interface QueryOptions {
22
22
  appendSystemPrompt?: string;
23
23
  allowedTools?: string[];
24
24
  disallowedTools?: string[];
25
+ /** Tool names to defer out of the per-turn payload — discoverable via `tool_search`
26
+ * and armed on demand. Saves tokens on large tool pools (also per-tool `defer: true`). */
27
+ deferredTools?: string[];
25
28
  maxTurns?: number;
26
29
  /** Wall-clock budget (ms): pause at a turn boundary past this + emit `paused` (survivor). */
27
30
  maxDurationMs?: number;
package/dist/query.js CHANGED
@@ -21,6 +21,7 @@ export function query(options) {
21
21
  appendSystemPrompt: options.appendSystemPrompt,
22
22
  allowedTools: options.allowedTools,
23
23
  disallowedTools: options.disallowedTools,
24
+ deferredTools: options.deferredTools,
24
25
  maxTurns: options.maxTurns,
25
26
  maxDurationMs: options.maxDurationMs,
26
27
  continueRun: options.continueRun,
@@ -15,6 +15,9 @@ export interface DefineToolSpec {
15
15
  run?: (input: Record<string, unknown>, ctx: ToolContext) => Promise<ToolResult> | ToolResult;
16
16
  /** Optional: spill threshold for large outputs (see Tool.maxResultChars). */
17
17
  maxResultChars?: number;
18
+ /** Defer out of the per-turn payload — discoverable via `tool_search`, armed on
19
+ * demand. For large pools of rarely-used tools (see Tool.defer). */
20
+ defer?: boolean;
18
21
  }
19
22
  /** Build a `Tool` from a friendly spec. */
20
23
  export declare function defineTool(spec: DefineToolSpec): Tool;
@@ -19,5 +19,7 @@ export function defineTool(spec) {
19
19
  tool.run = async (input, ctx) => spec.run(input, ctx);
20
20
  if (spec.maxResultChars !== undefined)
21
21
  tool.maxResultChars = spec.maxResultChars;
22
+ if (spec.defer)
23
+ tool.defer = true;
22
24
  return tool;
23
25
  }
@@ -41,8 +41,11 @@ export const toolSearch = {
41
41
  .slice(0, limit);
42
42
  if (!scored.length)
43
43
  return { content: `No tools matched "${q}".` };
44
+ // Arm any deferred tools we surfaced so their full schema is sent next turn
45
+ // and the model can call them directly.
46
+ ctx.armTools?.(scored.map(({ t }) => t.name));
44
47
  return {
45
- content: `Matching tools for "${q}":\n` +
48
+ content: `Matching tools for "${q}" (now available to call):\n` +
46
49
  scored.map(({ t }) => ` ${t.name} — ${t.description.split('\n')[0]}`).join('\n'),
47
50
  };
48
51
  },
@@ -72,6 +72,9 @@ export interface ToolContext {
72
72
  planMode?: {
73
73
  active: boolean;
74
74
  };
75
+ /** Arm deferred tools by name so their full schema is sent on subsequent turns.
76
+ * Provided by the loop; `tool_search` calls it for the deferred tools it surfaces. */
77
+ armTools?: (names: string[]) => void;
75
78
  }
76
79
  /** Result returned by a tool run. */
77
80
  export interface ToolResult {
@@ -92,4 +95,12 @@ export interface Tool {
92
95
  * When omitted, the loop uses its global default threshold.
93
96
  */
94
97
  maxResultChars?: number;
98
+ /**
99
+ * DEFER this tool out of the per-turn payload sent to the LLM. It stays
100
+ * discoverable via `tool_search` and executable when called, but its schema
101
+ * isn't sent (saving tokens every turn) until `tool_search` surfaces it — at
102
+ * which point the loop "arms" it for subsequent turns. Use for large pools of
103
+ * rarely-used integration tools. (Also settable via `query({ deferredTools })`.)
104
+ */
105
+ defer?: boolean;
95
106
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "anyclaude-sdk",
3
- "version": "0.7.4",
3
+ "version": "0.8.1",
4
4
  "description": "Standalone, browser-compatible SDK providing Claude Code agent capabilities (tools, tool loop, multi-turn, MCP, sub-agents, sessions) against any OpenAI/Anthropic-compatible LLM endpoint. Runs in the browser (WebContainer), Node, and Bun — no backend required.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",