npm - anyclaude-sdk - Versions diffs - 0.8.0 → 0.9.0 - Mend

anyclaude-sdk 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md CHANGED Viewed

@@ -386,6 +386,38 @@ npm create anyclaude-app@latest my-app   # template: bolt — WebContainer + cha
 The `bolt` template wires `useWebContainerPreview({ wc })` (boot a dev server → live preview URL) + a browser-side `query()` + the IDE components. See [`anyclaude-react`](#react-ui-kit--anyclaude-react).
+## Token efficiency — deferred tools
+Keep a large pool of rarely-used tools **out of the per-turn payload** (big savings on weak/uncached models) while staying discoverable + callable. Mark them deferred; `tool_search` indexes them and the loop **arms** a tool (sends its schema on subsequent turns) once search surfaces it — then it executes normally.
+```ts
+query({ prompt, workspace, llm,
+  extraTools: [deploy, ...integrationTools],   // e.g. 35 integration tools
+  deferredTools: ['stripe_charge', 'supabase_query', /* … the niche ones */],
+})
+// or per-tool: defineTool({ name, description, parameters, run, defer: true })
+```
+Only the lean core + `tool_search` are sent each turn; the model searches when it needs a niche tool, the SDK arms it, and the call goes through. Register 35, send ~10.
+## Agent-loop tuning (cheap / lightweight / fast)
+Opt-in knobs for token cost and latency — especially on weak / uncached models:
+```ts
+query({
+  prompt, workspace, llm,
+  systemPromptPreset: 'lean',      // ~70% shorter built-in prompt — saved every turn on uncached models
+  keepToolResults: 6,              // context editing: stub tool_results older than the last 6 (caps transcript growth)
+  parallelToolExecution: true,     // run a turn's read-only tool calls concurrently (~2× faster on multi-read turns)
+  deferredTools: [/* niche tools */], // keep rarely-used tools out of the payload until tool_search arms them
+})
+// custom read tool opting into parallelism:
+defineTool({ name: 'get_logs', description: '…', parameters, run, parallelSafe: true })
+```
+Mutating tools / `bash` / delegated client tools always execute serially; `keepToolResults` and `parallelToolExecution` preserve correctness, just trim cost/latency.
 ## Other niceties
 - **Live compaction marker** — `autoCompact` emits a `compact_boundary` with `status: 'start'` *before* summarizing (for a live "compacting…" shimmer) and `status: 'end'` after with `post_tokens`.
@@ -402,7 +434,7 @@ Runnable Vite projects in [`examples/`](examples/): **`browser-ide`** (WebContai
   - `prompt: string | AsyncIterable<SDKUserMessage>`
   - `workspace: FileSystem & CommandExecutor`
   - `llm: LLMClient`
-  - `tools?`, `extraTools?`, `allowedTools?`/`disallowedTools?`, `model?`, `systemPrompt?`/`appendSystemPrompt?`, `maxTurns?` (default 50), `cwd?`, `abortController?`
+  - `tools?`, `extraTools?`, `allowedTools?`/`disallowedTools?`, `deferredTools?` (lazy-load), `model?`, `systemPrompt?`/`appendSystemPrompt?`, `maxTurns?` (default 50), `cwd?`, `abortController?`
   - serverless: `sessionStore?`, `resume?`, `maxDurationMs?`, `continueRun?`
   - client tools: `clientTools?`, `clientToolResults?`; interactive: `onAskUser?`
   - also: `mcpServers?`, `agents?`, `commands?`, `hooks?`, `background?`, `team?`, `memory?`, `permissionMode?`/`canUseTool?`, `messageQueue?`

package/dist/agent.d.ts CHANGED Viewed

@@ -18,8 +18,12 @@ export interface AgentOptions {
     /** Custom tools ADDED to the builtins (or to `tools` if given). Use `defineTool`. */
     extraTools?: Tool[];
     model?: string;
-    /** Full system prompt. If omitted, the default Claude Code prompt is used. */
+    /** Full system prompt. If omitted, the built-in prompt for `systemPromptPreset` is used. */
     systemPrompt?: string;
+    /** Which built-in system prompt to use when `systemPrompt` is omitted: `'default'`
+     *  (full Claude-Code contract) or `'lean'` (much shorter — cheaper every turn on
+     *  weak/uncached models). Default `'default'`. */
+    systemPromptPreset?: 'default' | 'lean';
     /** Text appended after the (default or custom) system prompt. */
     appendSystemPrompt?: string;
     /** Allowlist of tool names. When set, only these tools are exposed. */
@@ -31,6 +35,14 @@ export interface AgentOptions {
      *  until the model searches and the loop arms them. For large pools of
      *  rarely-used integration tools. (Per-tool `defer: true` works too.) */
     deferredTools?: string[];
+    /** Context editing: keep only the most recent N tool_result messages verbatim;
+     *  older ones are replaced with a short stub before each LLM call. Caps transcript
+     *  growth on long runs. Off when undefined. (Trades prompt-cache hits on the cleared
+     *  span for fewer tokens — a clear win on uncached endpoints.) */
+    keepToolResults?: number;
+    /** Execute a turn's tool calls concurrently when they're all read-only + server-run
+     *  (mutating tools / bash / delegated stay sequential). Latency win on multi-read turns. */
+    parallelToolExecution?: boolean;
     maxTurns?: number;
     /** Wall-clock budget (ms). At a turn boundary past this, the loop pauses: it
      *  persists to sessionStore and emits a `paused` system message instead of

package/dist/agent.js CHANGED Viewed

@@ -20,7 +20,7 @@ import { PLAN_MODE_TOOLS } from './tools/plan_mode.js';
 import { rulesToCanUseTool, ruleSetFromStrings, applyPermissionUpdate, isReadOnlyTool, } from './permissions/index.js';
 import { loadSettings, settingsToPermissionRuleSet } from './settings/index.js';
 import { loadSkillsFromFs, skillsToCommands, skill as skillTool } from './skills/index.js';
-import { defaultSystemPrompt, defaultSubagentPrompt } from './prompt.js';
+import { defaultSubagentPrompt, systemPromptFor } from './prompt.js';
 import { DEFAULT_MAX_RESULT_CHARS, maybePersistLargeResult } from './persist.js';
 import { computeCostUSD, contextWindowFor } from './util/pricing.js';
 import { estimateTokens, summarizeHistory } from './compact.js';
@@ -282,7 +282,7 @@ export async function* runAgent(options) {
         const named = names.length ? `|<(?:${names.join('|')})[\\s/>]` : '';
         return new RegExp(`<tool_call|<function\\s*=|<thinking${named}`, 'i');
     })();
-    let system = options.systemPrompt != null ? options.systemPrompt : defaultSystemPrompt(cwd);
+    let system = options.systemPrompt != null ? options.systemPrompt : systemPromptFor(cwd, options.systemPromptPreset);
     if (teamEnabled)
         system += '\n\n' + coordinatorPrompt();
     if (memory) {
@@ -293,6 +293,26 @@ export async function* runAgent(options) {
     if (options.appendSystemPrompt)
         system += '\n\n' + options.appendSystemPrompt;
     const history = [{ role: 'system', content: system }];
+    // Context editing: keep the most recent N tool_result messages verbatim; replace
+    // older ones with a short stub (idempotent) so they stop costing tokens each turn.
+    const keepToolResults = options.keepToolResults;
+    const CLEARED_STUB = '[earlier tool output cleared to save context]';
+    const pruneToolResults = () => {
+        if (keepToolResults == null || keepToolResults < 0)
+            return;
+        const toolIdx = [];
+        for (let i = 0; i < history.length; i++)
+            if (history[i].role === 'tool')
+                toolIdx.push(i);
+        const cutoff = toolIdx.length - keepToolResults;
+        for (let j = 0; j < cutoff; j++) {
+            const m = history[toolIdx[j]];
+            if (typeof m.content === 'string' && m.content !== CLEARED_STUB)
+                m.content = CLEARED_STUB;
+            else if (Array.isArray(m.content))
+                m.content = CLEARED_STUB;
+        }
+    };
     const store = { todos: [] };
     const ctx = {
         fs: workspace,
@@ -645,6 +665,9 @@ export async function* runAgent(options) {
                         await runHooks('PostCompact', { hook_event_name: 'PostCompact', trigger: 'auto' });
                 }
             }
+            // Context editing: stub out all but the most recent N tool_result messages so
+            // old tool output stops costing tokens on every subsequent turn.
+            pruneToolResults();
             let streamedText = '';
             let captured = [];
             const apiStart = Date.now();
@@ -760,6 +783,28 @@ export async function* runAgent(options) {
             const toolResultBlocks = [];
             clientRequests = [];
             const turnMedia = [];
+            // Parallel tool execution: when every call this turn is read-only + server-run,
+            // kick off the runs concurrently up front; the sequential loop below still does
+            // permission/hooks/assembly in order but awaits these prefetched results instead
+            // of running serially. Read-only ⇒ no ordering/side-effect risk. (Mutating tools,
+            // bash, and delegated client tools fall through to the normal serial path.)
+            const prefetch = new Map();
+            if (options.parallelToolExecution &&
+                calls.length > 1 &&
+                calls.every((c) => {
+                    const t = byName.get(c.function.name);
+                    if (clientTools.has(c.function.name) || !t?.run)
+                        return false;
+                    return t.parallelSafe === true || isReadOnlyTool(c.function.name, safeParse(c.function.arguments));
+                })) {
+                for (const c of calls) {
+                    const t = byName.get(c.function.name);
+                    const input = safeParse(c.function.arguments);
+                    prefetch.set(c.id, Promise.resolve()
+                        .then(() => t.run(input, ctx))
+                        .then((r) => ({ r }), (e) => ({ e })));
+                }
+            }
             for (const call of calls) {
                 if (signal?.aborted)
                     break;
@@ -872,10 +917,23 @@ export async function* runAgent(options) {
                                 abortController?.abort();
                         }
                         else {
+                            const inputChanged = !!('updatedInput' in decision && decision.updatedInput);
                             if ('updatedInput' in decision && decision.updatedInput)
                                 input = decision.updatedInput;
                             try {
-                                const r = await tool.run(input, ctx);
+                                // Use the concurrently-prefetched result when present and the input
+                                // wasn't rewritten by permission; otherwise run now.
+                                const pf = !inputChanged ? prefetch.get(call.id) : undefined;
+                                let r;
+                                if (pf) {
+                                    const out = await pf;
+                                    if (out.e !== undefined)
+                                        throw out.e;
+                                    r = out.r;
+                                }
+                                else {
+                                    r = await tool.run(input, ctx);
+                                }
                                 content = r.content;
                                 isError = !!r.isError;
                             }

package/dist/loop.d.ts CHANGED Viewed

@@ -42,6 +42,9 @@ export interface RunToolLoopOptions {
      * Default `true`. Set `false` to pass raw args straight through.
      */
     repairToolCalls?: boolean;
+    /** Run a turn's tool calls concurrently when all are read-only + server-run
+     *  (mutating/bash/delegated stay serial). Latency win on multi-read turns. */
+    parallelToolExecution?: boolean;
 }
 /**
  * Run the bare tool loop, yielding SDKMessages until the model stops or maxTurns.

package/dist/loop.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { toolByName, toolDefs } from './tools/index.js';
 import { validateToolArguments } from './llm/repair.js';
 import { parseToolCalls } from './llm/dialects.js';
+import { isReadOnlyTool } from './permissions/index.js';
 import { uuid } from './util/ids.js';
 /** Regex that matches the onset of tool-call / reasoning markup in streamed text. */
 function buildSuppressRe(toolNames) {
@@ -202,6 +203,23 @@ export async function* runToolLoop(opts) {
             break;
         const toolResultBlocks = [];
         const turnMedia = [];
+        // Parallel tool execution: when all calls this turn are read-only + server-run,
+        // run them concurrently up front; the loop assembles results in order below.
+        const prefetch = new Map();
+        if (opts.parallelToolExecution &&
+            calls.length > 1 &&
+            calls.every((c) => {
+                const t = byName.get(c.function.name);
+                if (clientTools.has(c.function.name) || !t?.run)
+                    return false;
+                return t.parallelSafe === true || isReadOnlyTool(c.function.name, safeParse(c.function.arguments));
+            })) {
+            for (const c of calls) {
+                const t = byName.get(c.function.name);
+                const input = safeParse(c.function.arguments);
+                prefetch.set(c.id, Promise.resolve().then(() => t.run(input, ctx)).then((r) => ({ r }), (e) => ({ e })));
+            }
+        }
         for (const call of calls) {
             if (signal?.aborted)
                 break;
@@ -254,10 +272,21 @@ export async function* runToolLoop(opts) {
                         isError = true;
                     }
                     else {
+                        const inputChanged = !!('updatedInput' in decision && decision.updatedInput);
                         if ('updatedInput' in decision && decision.updatedInput)
                             input = decision.updatedInput;
                         try {
-                            const r = await tool.run(input, ctx);
+                            const pf = !inputChanged ? prefetch.get(call.id) : undefined;
+                            let r;
+                            if (pf) {
+                                const out = await pf;
+                                if (out.e !== undefined)
+                                    throw out.e;
+                                r = out.r;
+                            }
+                            else {
+                                r = await tool.run(input, ctx);
+                            }
                             content = r.content;
                             isError = !!r.isError;
                         }

package/dist/prompt.d.ts CHANGED Viewed

@@ -1,4 +1,12 @@
 export declare function defaultSystemPrompt(cwd: string): string;
+/**
+ * Lean system prompt — a much shorter contract for token-sensitive / weak models
+ * (and uncached endpoints, where the prompt is paid every turn). Keeps only the
+ * load-bearing rules: read-before-edit, exact edit matching, stop when done.
+ */
+export declare function leanSystemPrompt(cwd: string): string;
+/** Pick the built-in system prompt by preset. */
+export declare function systemPromptFor(cwd: string, preset?: 'default' | 'lean'): string;
 /**
  * Default system prompt for a general-purpose sub-agent spawned via the `task`
  * tool. The sub-agent runs autonomously and returns only its final answer.

package/dist/prompt.js CHANGED Viewed

@@ -21,6 +21,19 @@ You have access to tools for reading, writing, and editing files, running shell
 When the task is complete, stop calling tools and give a short summary of what you did.`;
 }
+/**
+ * Lean system prompt — a much shorter contract for token-sensitive / weak models
+ * (and uncached endpoints, where the prompt is paid every turn). Keeps only the
+ * load-bearing rules: read-before-edit, exact edit matching, stop when done.
+ */
+export function leanSystemPrompt(cwd) {
+    return `You are a coding agent working on a real workspace (files + shell) via tools. Working directory: ${cwd}.
+Rules: read a file with read_file before edit_file; edit_file old_string must match exactly (else add context or use replace_all); prefer file tools over cat/sed; batch independent tool calls. Be concise. When done, stop calling tools and give a one-line summary.`;
+}
+/** Pick the built-in system prompt by preset. */
+export function systemPromptFor(cwd, preset) {
+    return preset === 'lean' ? leanSystemPrompt(cwd) : defaultSystemPrompt(cwd);
+}
 /**
  * Default system prompt for a general-purpose sub-agent spawned via the `task`
  * tool. The sub-agent runs autonomously and returns only its final answer.

package/dist/query.d.ts CHANGED Viewed

@@ -19,12 +19,20 @@ export interface QueryOptions {
     extraTools?: Tool[];
     model?: string;
     systemPrompt?: string;
+    /** Built-in prompt when `systemPrompt` is omitted: `'default'` or `'lean'` (shorter,
+     *  cheaper every turn on weak/uncached models). */
+    systemPromptPreset?: 'default' | 'lean';
     appendSystemPrompt?: string;
     allowedTools?: string[];
     disallowedTools?: string[];
     /** Tool names to defer out of the per-turn payload — discoverable via `tool_search`
      *  and armed on demand. Saves tokens on large tool pools (also per-tool `defer: true`). */
     deferredTools?: string[];
+    /** Context editing: keep only the most recent N tool_result messages verbatim; older
+     *  ones are stubbed before each LLM call. Caps transcript growth on long runs. */
+    keepToolResults?: number;
+    /** Run a turn's read-only tool calls concurrently (mutating/bash/delegated stay serial). */
+    parallelToolExecution?: boolean;
     maxTurns?: number;
     /** Wall-clock budget (ms): pause at a turn boundary past this + emit `paused` (survivor). */
     maxDurationMs?: number;

package/dist/query.js CHANGED Viewed

@@ -18,10 +18,13 @@ export function query(options) {
         extraTools: options.extraTools,
         model: options.model,
         systemPrompt: options.systemPrompt,
+        systemPromptPreset: options.systemPromptPreset,
         appendSystemPrompt: options.appendSystemPrompt,
         allowedTools: options.allowedTools,
         disallowedTools: options.disallowedTools,
         deferredTools: options.deferredTools,
+        keepToolResults: options.keepToolResults,
+        parallelToolExecution: options.parallelToolExecution,
         maxTurns: options.maxTurns,
         maxDurationMs: options.maxDurationMs,
         continueRun: options.continueRun,

package/dist/tools/define.d.ts CHANGED Viewed

@@ -18,6 +18,8 @@ export interface DefineToolSpec {
     /** Defer out of the per-turn payload — discoverable via `tool_search`, armed on
      *  demand. For large pools of rarely-used tools (see Tool.defer). */
     defer?: boolean;
+    /** Safe to run concurrently with other calls in a turn (see Tool.parallelSafe). */
+    parallelSafe?: boolean;
 }
 /** Build a `Tool` from a friendly spec. */
 export declare function defineTool(spec: DefineToolSpec): Tool;

package/dist/tools/define.js CHANGED Viewed

@@ -21,5 +21,7 @@ export function defineTool(spec) {
         tool.maxResultChars = spec.maxResultChars;
     if (spec.defer)
         tool.defer = true;
+    if (spec.parallelSafe)
+        tool.parallelSafe = true;
     return tool;
 }

package/dist/tools/types.d.ts CHANGED Viewed

@@ -103,4 +103,11 @@ export interface Tool {
      * rarely-used integration tools. (Also settable via `query({ deferredTools })`.)
      */
     defer?: boolean;
+    /**
+     * Mark this tool safe to execute CONCURRENTLY with other calls in the same turn
+     * (no side effects / no ordering dependency) — opts it into `parallelToolExecution`
+     * even if it isn't a recognized read-only builtin. Set on custom read tools
+     * (e.g. get_console_logs, screenshot, fetch-status).
+     */
+    parallelSafe?: boolean;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "anyclaude-sdk",
-  "version": "0.8.0",
+  "version": "0.9.0",
   "description": "Standalone, browser-compatible SDK providing Claude Code agent capabilities (tools, tool loop, multi-turn, MCP, sub-agents, sessions) against any OpenAI/Anthropic-compatible LLM endpoint. Runs in the browser (WebContainer), Node, and Bun — no backend required.",
   "type": "module",
   "main": "./dist/index.js",