anyclaude-sdk 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -386,6 +386,38 @@ npm create anyclaude-app@latest my-app # template: bolt — WebContainer + cha
386
386
 
387
387
  The `bolt` template wires `useWebContainerPreview({ wc })` (boot a dev server → live preview URL) + a browser-side `query()` + the IDE components. See [`anyclaude-react`](#react-ui-kit--anyclaude-react).
388
388
 
389
+ ## Token efficiency — deferred tools
390
+
391
+ Keep a large pool of rarely-used tools **out of the per-turn payload** (big savings on weak/uncached models) while staying discoverable + callable. Mark them deferred; `tool_search` indexes them and the loop **arms** a tool (sends its schema on subsequent turns) once search surfaces it — then it executes normally.
392
+
393
+ ```ts
394
+ query({ prompt, workspace, llm,
395
+ extraTools: [deploy, ...integrationTools], // e.g. 35 integration tools
396
+ deferredTools: ['stripe_charge', 'supabase_query', /* … the niche ones */],
397
+ })
398
+ // or per-tool: defineTool({ name, description, parameters, run, defer: true })
399
+ ```
400
+
401
+ Only the lean core + `tool_search` are sent each turn; the model searches when it needs a niche tool, the SDK arms it, and the call goes through. Register 35, send ~10.
402
+
403
+ ## Agent-loop tuning (cheap / lightweight / fast)
404
+
405
+ Opt-in knobs for token cost and latency — especially on weak / uncached models:
406
+
407
+ ```ts
408
+ query({
409
+ prompt, workspace, llm,
410
+ systemPromptPreset: 'lean', // ~70% shorter built-in prompt — saved every turn on uncached models
411
+ keepToolResults: 6, // context editing: stub tool_results older than the last 6 (caps transcript growth)
412
+ parallelToolExecution: true, // run a turn's read-only tool calls concurrently (~2× faster on multi-read turns)
413
+ deferredTools: [/* niche tools */], // keep rarely-used tools out of the payload until tool_search arms them
414
+ })
415
+ // custom read tool opting into parallelism:
416
+ defineTool({ name: 'get_logs', description: '…', parameters, run, parallelSafe: true })
417
+ ```
418
+
419
+ Mutating tools / `bash` / delegated client tools always execute serially; `keepToolResults` and `parallelToolExecution` preserve correctness, just trim cost/latency.
420
+
389
421
  ## Other niceties
390
422
 
391
423
  - **Live compaction marker** — `autoCompact` emits a `compact_boundary` with `status: 'start'` *before* summarizing (for a live "compacting…" shimmer) and `status: 'end'` after with `post_tokens`.
@@ -402,7 +434,7 @@ Runnable Vite projects in [`examples/`](examples/): **`browser-ide`** (WebContai
402
434
  - `prompt: string | AsyncIterable<SDKUserMessage>`
403
435
  - `workspace: FileSystem & CommandExecutor`
404
436
  - `llm: LLMClient`
405
- - `tools?`, `extraTools?`, `allowedTools?`/`disallowedTools?`, `model?`, `systemPrompt?`/`appendSystemPrompt?`, `maxTurns?` (default 50), `cwd?`, `abortController?`
437
+ - `tools?`, `extraTools?`, `allowedTools?`/`disallowedTools?`, `deferredTools?` (lazy-load), `model?`, `systemPrompt?`/`appendSystemPrompt?`, `maxTurns?` (default 50), `cwd?`, `abortController?`
406
438
  - serverless: `sessionStore?`, `resume?`, `maxDurationMs?`, `continueRun?`
407
439
  - client tools: `clientTools?`, `clientToolResults?`; interactive: `onAskUser?`
408
440
  - also: `mcpServers?`, `agents?`, `commands?`, `hooks?`, `background?`, `team?`, `memory?`, `permissionMode?`/`canUseTool?`, `messageQueue?`
package/dist/agent.d.ts CHANGED
@@ -18,8 +18,12 @@ export interface AgentOptions {
18
18
  /** Custom tools ADDED to the builtins (or to `tools` if given). Use `defineTool`. */
19
19
  extraTools?: Tool[];
20
20
  model?: string;
21
- /** Full system prompt. If omitted, the default Claude Code prompt is used. */
21
+ /** Full system prompt. If omitted, the built-in prompt for `systemPromptPreset` is used. */
22
22
  systemPrompt?: string;
23
+ /** Which built-in system prompt to use when `systemPrompt` is omitted: `'default'`
24
+ * (full Claude-Code contract) or `'lean'` (much shorter — cheaper every turn on
25
+ * weak/uncached models). Default `'default'`. */
26
+ systemPromptPreset?: 'default' | 'lean';
23
27
  /** Text appended after the (default or custom) system prompt. */
24
28
  appendSystemPrompt?: string;
25
29
  /** Allowlist of tool names. When set, only these tools are exposed. */
@@ -31,6 +35,14 @@ export interface AgentOptions {
31
35
  * until the model searches and the loop arms them. For large pools of
32
36
  * rarely-used integration tools. (Per-tool `defer: true` works too.) */
33
37
  deferredTools?: string[];
38
+ /** Context editing: keep only the most recent N tool_result messages verbatim;
39
+ * older ones are replaced with a short stub before each LLM call. Caps transcript
40
+ * growth on long runs. Off when undefined. (Trades prompt-cache hits on the cleared
41
+ * span for fewer tokens — a clear win on uncached endpoints.) */
42
+ keepToolResults?: number;
43
+ /** Execute a turn's tool calls concurrently when they're all read-only + server-run
44
+ * (mutating tools / bash / delegated stay sequential). Latency win on multi-read turns. */
45
+ parallelToolExecution?: boolean;
34
46
  maxTurns?: number;
35
47
  /** Wall-clock budget (ms). At a turn boundary past this, the loop pauses: it
36
48
  * persists to sessionStore and emits a `paused` system message instead of
package/dist/agent.js CHANGED
@@ -20,7 +20,7 @@ import { PLAN_MODE_TOOLS } from './tools/plan_mode.js';
20
20
  import { rulesToCanUseTool, ruleSetFromStrings, applyPermissionUpdate, isReadOnlyTool, } from './permissions/index.js';
21
21
  import { loadSettings, settingsToPermissionRuleSet } from './settings/index.js';
22
22
  import { loadSkillsFromFs, skillsToCommands, skill as skillTool } from './skills/index.js';
23
- import { defaultSystemPrompt, defaultSubagentPrompt } from './prompt.js';
23
+ import { defaultSubagentPrompt, systemPromptFor } from './prompt.js';
24
24
  import { DEFAULT_MAX_RESULT_CHARS, maybePersistLargeResult } from './persist.js';
25
25
  import { computeCostUSD, contextWindowFor } from './util/pricing.js';
26
26
  import { estimateTokens, summarizeHistory } from './compact.js';
@@ -282,7 +282,7 @@ export async function* runAgent(options) {
282
282
  const named = names.length ? `|<(?:${names.join('|')})[\\s/>]` : '';
283
283
  return new RegExp(`<tool_call|<function\\s*=|<thinking${named}`, 'i');
284
284
  })();
285
- let system = options.systemPrompt != null ? options.systemPrompt : defaultSystemPrompt(cwd);
285
+ let system = options.systemPrompt != null ? options.systemPrompt : systemPromptFor(cwd, options.systemPromptPreset);
286
286
  if (teamEnabled)
287
287
  system += '\n\n' + coordinatorPrompt();
288
288
  if (memory) {
@@ -293,6 +293,26 @@ export async function* runAgent(options) {
293
293
  if (options.appendSystemPrompt)
294
294
  system += '\n\n' + options.appendSystemPrompt;
295
295
  const history = [{ role: 'system', content: system }];
296
+ // Context editing: keep the most recent N tool_result messages verbatim; replace
297
+ // older ones with a short stub (idempotent) so they stop costing tokens each turn.
298
+ const keepToolResults = options.keepToolResults;
299
+ const CLEARED_STUB = '[earlier tool output cleared to save context]';
300
+ const pruneToolResults = () => {
301
+ if (keepToolResults == null || keepToolResults < 0)
302
+ return;
303
+ const toolIdx = [];
304
+ for (let i = 0; i < history.length; i++)
305
+ if (history[i].role === 'tool')
306
+ toolIdx.push(i);
307
+ const cutoff = toolIdx.length - keepToolResults;
308
+ for (let j = 0; j < cutoff; j++) {
309
+ const m = history[toolIdx[j]];
310
+ if (typeof m.content === 'string' && m.content !== CLEARED_STUB)
311
+ m.content = CLEARED_STUB;
312
+ else if (Array.isArray(m.content))
313
+ m.content = CLEARED_STUB;
314
+ }
315
+ };
296
316
  const store = { todos: [] };
297
317
  const ctx = {
298
318
  fs: workspace,
@@ -645,6 +665,9 @@ export async function* runAgent(options) {
645
665
  await runHooks('PostCompact', { hook_event_name: 'PostCompact', trigger: 'auto' });
646
666
  }
647
667
  }
668
+ // Context editing: stub out all but the most recent N tool_result messages so
669
+ // old tool output stops costing tokens on every subsequent turn.
670
+ pruneToolResults();
648
671
  let streamedText = '';
649
672
  let captured = [];
650
673
  const apiStart = Date.now();
@@ -760,6 +783,28 @@ export async function* runAgent(options) {
760
783
  const toolResultBlocks = [];
761
784
  clientRequests = [];
762
785
  const turnMedia = [];
786
+ // Parallel tool execution: when every call this turn is read-only + server-run,
787
+ // kick off the runs concurrently up front; the sequential loop below still does
788
+ // permission/hooks/assembly in order but awaits these prefetched results instead
789
+ // of running serially. Read-only ⇒ no ordering/side-effect risk. (Mutating tools,
790
+ // bash, and delegated client tools fall through to the normal serial path.)
791
+ const prefetch = new Map();
792
+ if (options.parallelToolExecution &&
793
+ calls.length > 1 &&
794
+ calls.every((c) => {
795
+ const t = byName.get(c.function.name);
796
+ if (clientTools.has(c.function.name) || !t?.run)
797
+ return false;
798
+ return t.parallelSafe === true || isReadOnlyTool(c.function.name, safeParse(c.function.arguments));
799
+ })) {
800
+ for (const c of calls) {
801
+ const t = byName.get(c.function.name);
802
+ const input = safeParse(c.function.arguments);
803
+ prefetch.set(c.id, Promise.resolve()
804
+ .then(() => t.run(input, ctx))
805
+ .then((r) => ({ r }), (e) => ({ e })));
806
+ }
807
+ }
763
808
  for (const call of calls) {
764
809
  if (signal?.aborted)
765
810
  break;
@@ -872,10 +917,23 @@ export async function* runAgent(options) {
872
917
  abortController?.abort();
873
918
  }
874
919
  else {
920
+ const inputChanged = !!('updatedInput' in decision && decision.updatedInput);
875
921
  if ('updatedInput' in decision && decision.updatedInput)
876
922
  input = decision.updatedInput;
877
923
  try {
878
- const r = await tool.run(input, ctx);
924
+ // Use the concurrently-prefetched result when present and the input
925
+ // wasn't rewritten by permission; otherwise run now.
926
+ const pf = !inputChanged ? prefetch.get(call.id) : undefined;
927
+ let r;
928
+ if (pf) {
929
+ const out = await pf;
930
+ if (out.e !== undefined)
931
+ throw out.e;
932
+ r = out.r;
933
+ }
934
+ else {
935
+ r = await tool.run(input, ctx);
936
+ }
879
937
  content = r.content;
880
938
  isError = !!r.isError;
881
939
  }
package/dist/loop.d.ts CHANGED
@@ -42,6 +42,9 @@ export interface RunToolLoopOptions {
42
42
  * Default `true`. Set `false` to pass raw args straight through.
43
43
  */
44
44
  repairToolCalls?: boolean;
45
+ /** Run a turn's tool calls concurrently when all are read-only + server-run
46
+ * (mutating/bash/delegated stay serial). Latency win on multi-read turns. */
47
+ parallelToolExecution?: boolean;
45
48
  }
46
49
  /**
47
50
  * Run the bare tool loop, yielding SDKMessages until the model stops or maxTurns.
package/dist/loop.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import { toolByName, toolDefs } from './tools/index.js';
2
2
  import { validateToolArguments } from './llm/repair.js';
3
3
  import { parseToolCalls } from './llm/dialects.js';
4
+ import { isReadOnlyTool } from './permissions/index.js';
4
5
  import { uuid } from './util/ids.js';
5
6
  /** Regex that matches the onset of tool-call / reasoning markup in streamed text. */
6
7
  function buildSuppressRe(toolNames) {
@@ -202,6 +203,23 @@ export async function* runToolLoop(opts) {
202
203
  break;
203
204
  const toolResultBlocks = [];
204
205
  const turnMedia = [];
206
+ // Parallel tool execution: when all calls this turn are read-only + server-run,
207
+ // run them concurrently up front; the loop assembles results in order below.
208
+ const prefetch = new Map();
209
+ if (opts.parallelToolExecution &&
210
+ calls.length > 1 &&
211
+ calls.every((c) => {
212
+ const t = byName.get(c.function.name);
213
+ if (clientTools.has(c.function.name) || !t?.run)
214
+ return false;
215
+ return t.parallelSafe === true || isReadOnlyTool(c.function.name, safeParse(c.function.arguments));
216
+ })) {
217
+ for (const c of calls) {
218
+ const t = byName.get(c.function.name);
219
+ const input = safeParse(c.function.arguments);
220
+ prefetch.set(c.id, Promise.resolve().then(() => t.run(input, ctx)).then((r) => ({ r }), (e) => ({ e })));
221
+ }
222
+ }
205
223
  for (const call of calls) {
206
224
  if (signal?.aborted)
207
225
  break;
@@ -254,10 +272,21 @@ export async function* runToolLoop(opts) {
254
272
  isError = true;
255
273
  }
256
274
  else {
275
+ const inputChanged = !!('updatedInput' in decision && decision.updatedInput);
257
276
  if ('updatedInput' in decision && decision.updatedInput)
258
277
  input = decision.updatedInput;
259
278
  try {
260
- const r = await tool.run(input, ctx);
279
+ const pf = !inputChanged ? prefetch.get(call.id) : undefined;
280
+ let r;
281
+ if (pf) {
282
+ const out = await pf;
283
+ if (out.e !== undefined)
284
+ throw out.e;
285
+ r = out.r;
286
+ }
287
+ else {
288
+ r = await tool.run(input, ctx);
289
+ }
261
290
  content = r.content;
262
291
  isError = !!r.isError;
263
292
  }
package/dist/prompt.d.ts CHANGED
@@ -1,4 +1,12 @@
1
1
  export declare function defaultSystemPrompt(cwd: string): string;
2
+ /**
3
+ * Lean system prompt — a much shorter contract for token-sensitive / weak models
4
+ * (and uncached endpoints, where the prompt is paid every turn). Keeps only the
5
+ * load-bearing rules: read-before-edit, exact edit matching, stop when done.
6
+ */
7
+ export declare function leanSystemPrompt(cwd: string): string;
8
+ /** Pick the built-in system prompt by preset. */
9
+ export declare function systemPromptFor(cwd: string, preset?: 'default' | 'lean'): string;
2
10
  /**
3
11
  * Default system prompt for a general-purpose sub-agent spawned via the `task`
4
12
  * tool. The sub-agent runs autonomously and returns only its final answer.
package/dist/prompt.js CHANGED
@@ -21,6 +21,19 @@ You have access to tools for reading, writing, and editing files, running shell
21
21
 
22
22
  When the task is complete, stop calling tools and give a short summary of what you did.`;
23
23
  }
24
+ /**
25
+ * Lean system prompt — a much shorter contract for token-sensitive / weak models
26
+ * (and uncached endpoints, where the prompt is paid every turn). Keeps only the
27
+ * load-bearing rules: read-before-edit, exact edit matching, stop when done.
28
+ */
29
+ export function leanSystemPrompt(cwd) {
30
+ return `You are a coding agent working on a real workspace (files + shell) via tools. Working directory: ${cwd}.
31
+ Rules: read a file with read_file before edit_file; edit_file old_string must match exactly (else add context or use replace_all); prefer file tools over cat/sed; batch independent tool calls. Be concise. When done, stop calling tools and give a one-line summary.`;
32
+ }
33
+ /** Pick the built-in system prompt by preset. */
34
+ export function systemPromptFor(cwd, preset) {
35
+ return preset === 'lean' ? leanSystemPrompt(cwd) : defaultSystemPrompt(cwd);
36
+ }
24
37
  /**
25
38
  * Default system prompt for a general-purpose sub-agent spawned via the `task`
26
39
  * tool. The sub-agent runs autonomously and returns only its final answer.
package/dist/query.d.ts CHANGED
@@ -19,12 +19,20 @@ export interface QueryOptions {
19
19
  extraTools?: Tool[];
20
20
  model?: string;
21
21
  systemPrompt?: string;
22
+ /** Built-in prompt when `systemPrompt` is omitted: `'default'` or `'lean'` (shorter,
23
+ * cheaper every turn on weak/uncached models). */
24
+ systemPromptPreset?: 'default' | 'lean';
22
25
  appendSystemPrompt?: string;
23
26
  allowedTools?: string[];
24
27
  disallowedTools?: string[];
25
28
  /** Tool names to defer out of the per-turn payload — discoverable via `tool_search`
26
29
  * and armed on demand. Saves tokens on large tool pools (also per-tool `defer: true`). */
27
30
  deferredTools?: string[];
31
+ /** Context editing: keep only the most recent N tool_result messages verbatim; older
32
+ * ones are stubbed before each LLM call. Caps transcript growth on long runs. */
33
+ keepToolResults?: number;
34
+ /** Run a turn's read-only tool calls concurrently (mutating/bash/delegated stay serial). */
35
+ parallelToolExecution?: boolean;
28
36
  maxTurns?: number;
29
37
  /** Wall-clock budget (ms): pause at a turn boundary past this + emit `paused` (survivor). */
30
38
  maxDurationMs?: number;
package/dist/query.js CHANGED
@@ -18,10 +18,13 @@ export function query(options) {
18
18
  extraTools: options.extraTools,
19
19
  model: options.model,
20
20
  systemPrompt: options.systemPrompt,
21
+ systemPromptPreset: options.systemPromptPreset,
21
22
  appendSystemPrompt: options.appendSystemPrompt,
22
23
  allowedTools: options.allowedTools,
23
24
  disallowedTools: options.disallowedTools,
24
25
  deferredTools: options.deferredTools,
26
+ keepToolResults: options.keepToolResults,
27
+ parallelToolExecution: options.parallelToolExecution,
25
28
  maxTurns: options.maxTurns,
26
29
  maxDurationMs: options.maxDurationMs,
27
30
  continueRun: options.continueRun,
@@ -18,6 +18,8 @@ export interface DefineToolSpec {
18
18
  /** Defer out of the per-turn payload — discoverable via `tool_search`, armed on
19
19
  * demand. For large pools of rarely-used tools (see Tool.defer). */
20
20
  defer?: boolean;
21
+ /** Safe to run concurrently with other calls in a turn (see Tool.parallelSafe). */
22
+ parallelSafe?: boolean;
21
23
  }
22
24
  /** Build a `Tool` from a friendly spec. */
23
25
  export declare function defineTool(spec: DefineToolSpec): Tool;
@@ -21,5 +21,7 @@ export function defineTool(spec) {
21
21
  tool.maxResultChars = spec.maxResultChars;
22
22
  if (spec.defer)
23
23
  tool.defer = true;
24
+ if (spec.parallelSafe)
25
+ tool.parallelSafe = true;
24
26
  return tool;
25
27
  }
@@ -103,4 +103,11 @@ export interface Tool {
103
103
  * rarely-used integration tools. (Also settable via `query({ deferredTools })`.)
104
104
  */
105
105
  defer?: boolean;
106
+ /**
107
+ * Mark this tool safe to execute CONCURRENTLY with other calls in the same turn
108
+ * (no side effects / no ordering dependency) — opts it into `parallelToolExecution`
109
+ * even if it isn't a recognized read-only builtin. Set on custom read tools
110
+ * (e.g. get_console_logs, screenshot, fetch-status).
111
+ */
112
+ parallelSafe?: boolean;
106
113
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "anyclaude-sdk",
3
- "version": "0.8.0",
3
+ "version": "0.9.0",
4
4
  "description": "Standalone, browser-compatible SDK providing Claude Code agent capabilities (tools, tool loop, multi-turn, MCP, sub-agents, sessions) against any OpenAI/Anthropic-compatible LLM endpoint. Runs in the browser (WebContainer), Node, and Bun — no backend required.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",