talon-agent 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,559 +1,10 @@
1
- import { query } from "@anthropic-ai/claude-agent-sdk";
2
- import type { TalonConfig } from "../../util/config.js";
3
- import {
4
- getSession,
5
- incrementTurns,
6
- recordUsage,
7
- resetSession,
8
- setSessionId,
9
- setSessionName,
10
- } from "../../storage/sessions.js";
11
- import { getChatSettings, setChatModel } from "../../storage/chat-settings.js";
12
- import { resolve } from "node:path";
13
- import { classify } from "../../core/errors.js";
14
- import {
15
- getPluginMcpServers,
16
- getPluginPromptAdditions,
17
- } from "../../core/plugin.js";
18
- import { rebuildSystemPrompt } from "../../util/config.js";
19
- import { log, logError, logWarn } from "../../util/log.js";
20
- import { traceMessage } from "../../util/trace.js";
21
- import { formatFullDatetime } from "../../util/time.js";
22
-
23
- import type { QueryParams, QueryResult } from "../../core/types.js";
24
-
25
- // ── State ────────────────────────────────────────────────────────────────────
26
-
27
- let config: TalonConfig;
28
- let bridgePortFn: () => number = () => 19876;
29
-
30
- export function initAgent(
31
- cfg: TalonConfig,
32
- getBridgePort?: () => number,
33
- ): void {
34
- config = cfg;
35
- if (getBridgePort) bridgePortFn = getBridgePort;
36
-
37
- // The Agent SDK spawns an embedded Claude Code subprocess.
38
- // If CLAUDECODE is set (e.g. running from a Claude Code terminal),
39
- // the subprocess refuses to start with a nested-session error that
40
- // gets swallowed — causing an infinite hang on Windows.
41
- delete process.env.CLAUDECODE;
42
- }
43
-
44
- /** Update the system prompt on the live config. Used by plugin hot-reload
45
- * so the next message picks up new plugin tool descriptions. */
46
- export function updateSystemPrompt(prompt: string): void {
47
- if (config) config.systemPrompt = prompt;
48
- }
49
-
50
- // ── Shared options builder ───────────────────────────────────────────────────
51
-
52
- function buildSdkOptions(chatId: string) {
53
- const chatSettings = getChatSettings(chatId);
54
- const activeModel = chatSettings.model ?? config.model;
55
- const activeEffort = chatSettings.effort ?? "adaptive";
56
-
57
- const EFFORT_MAP: Record<
58
- string,
59
- {
60
- thinking: { type: "adaptive" | "disabled" };
61
- effort?: "low" | "medium" | "high" | "max";
62
- }
63
- > = {
64
- off: { thinking: { type: "disabled" } },
65
- low: { thinking: { type: "adaptive" }, effort: "low" },
66
- medium: { thinking: { type: "adaptive" }, effort: "medium" },
67
- high: { thinking: { type: "adaptive" }, effort: "high" },
68
- max: { thinking: { type: "adaptive" }, effort: "max" },
69
- };
70
- const thinkingConfig = EFFORT_MAP[activeEffort] ?? {
71
- thinking: { type: "adaptive" as const },
72
- };
73
-
74
- const supports1m =
75
- !activeModel.includes("haiku") && !activeModel.includes("[1m]");
76
- const sdkModel = supports1m ? `${activeModel}[1m]` : activeModel;
77
-
78
- const session = getSession(chatId);
79
-
80
- const options = {
81
- model: sdkModel,
82
- systemPrompt: config.systemPrompt,
83
- cwd: config.workspace,
84
- permissionMode: "bypassPermissions" as const,
85
- allowDangerouslySkipPermissions: true,
86
- ...(config.claudeBinary
87
- ? { pathToClaudeCodeExecutable: config.claudeBinary }
88
- : {}),
89
- disallowedTools: [
90
- "EnterPlanMode",
91
- "ExitPlanMode",
92
- "EnterWorktree",
93
- "ExitWorktree",
94
- "TodoWrite",
95
- "TodoRead",
96
- "TaskCreate",
97
- "TaskUpdate",
98
- "TaskGet",
99
- "TaskList",
100
- "TaskOutput",
101
- "TaskStop",
102
- "AskUserQuestion",
103
- "WebSearch",
104
- "WebFetch",
105
- ],
106
- ...thinkingConfig,
107
- mcpServers: {
108
- ...(() => {
109
- const allFrontends = Array.isArray(config.frontend)
110
- ? config.frontend
111
- : [config.frontend];
112
- const frontends = allFrontends.filter((f) => f !== "terminal");
113
- const bridgeUrl = `http://127.0.0.1:${bridgePortFn()}`;
114
- const servers: Record<
115
- string,
116
- { command: string; args: string[]; env: Record<string, string> }
117
- > = {};
118
- const tsxImport = resolve(
119
- import.meta.dirname ?? ".",
120
- "../../../node_modules/tsx/dist/esm/index.mjs",
121
- );
122
- const mcpServerPath = resolve(
123
- import.meta.dirname ?? ".",
124
- "../../core/tools/mcp-server.ts",
125
- );
126
-
127
- for (const frontend of frontends) {
128
- const serverName = `${frontend}-tools`;
129
- const mcpEnv = {
130
- TALON_BRIDGE_URL: bridgeUrl,
131
- TALON_CHAT_ID: chatId,
132
- TALON_FRONTEND: frontend,
133
- };
134
- servers[serverName] = {
135
- command: process.platform === "win32" ? "npx" : "node",
136
- args:
137
- process.platform === "win32"
138
- ? ["tsx", mcpServerPath]
139
- : ["--import", tsxImport, mcpServerPath],
140
- env: mcpEnv,
141
- };
142
- }
143
- return servers;
144
- })(),
145
- ...(config.braveApiKey
146
- ? {
147
- "brave-search": {
148
- command: resolve(
149
- import.meta.dirname ?? ".",
150
- "../../../node_modules/.bin/brave-search-mcp-server",
151
- ),
152
- args: [],
153
- env: { BRAVE_API_KEY: config.braveApiKey },
154
- },
155
- }
156
- : {}),
157
- ...getPluginMcpServers(`http://127.0.0.1:${bridgePortFn()}`, chatId),
158
- },
159
- ...(session.sessionId ? { resume: session.sessionId } : {}),
160
- };
161
-
162
- return { options, activeModel, session };
163
- }
164
-
165
- // ── Session warm-up ─────────────────────────────────────────────────────────
166
-
167
1
  /**
168
- * Cold-start a session by spawning an SDK subprocess in streaming input mode,
169
- * calling getContextUsage() to populate contextWindow and baseline contextTokens,
170
- * then tearing it down. Fire-and-forget does not block the caller.
2
+ * Claude SDK backend barrel re-export.
3
+ *
4
+ * All consumers import from this file; the implementation is split across
5
+ * focused modules for readability and maintainability.
171
6
  */
172
- export async function warmSession(chatId: string): Promise<void> {
173
- if (!config) return;
174
- const abort = new AbortController();
175
- try {
176
- rebuildSystemPrompt(config, getPluginPromptAdditions());
177
- const { options } = buildSdkOptions(chatId);
178
-
179
- // Streaming input mode: pass an async iterable that never yields a user message
180
- const neverYield = async function* (): AsyncGenerator<never> {
181
- await new Promise<never>((_, reject) => {
182
- abort.signal.addEventListener("abort", () =>
183
- reject(new Error("aborted")),
184
- );
185
- });
186
- };
187
-
188
- const q = query({
189
- prompt: neverYield(),
190
- options: {
191
- ...options,
192
- abortController: abort,
193
- } as Parameters<typeof query>[0]["options"],
194
- });
195
-
196
- // Drain the stream in the background so the SDK's internal message loop
197
- // doesn't stall — control responses are processed in readMessages() which
198
- // needs the inputStream consumer to not back-pressure.
199
- const drainPromise = (async () => {
200
- try {
201
- for await (const _ of q) {
202
- // discard SDK messages; we only care about the control response
203
- }
204
- } catch {
205
- // expected: abort causes the stream to end with an error
206
- }
207
- })();
208
-
209
- // Race getContextUsage against a timeout so /reset doesn't hang
210
- const timeout = new Promise<never>((_, reject) =>
211
- setTimeout(() => reject(new Error("warm-up timed out")), 15_000),
212
- );
213
- const ctx = await Promise.race([q.getContextUsage(), timeout]);
214
- const session = getSession(chatId);
215
- if (ctx.maxTokens > 0) session.usage.contextWindow = ctx.maxTokens;
216
- if (ctx.totalTokens > 0) session.usage.contextTokens = ctx.totalTokens;
217
- log(
218
- "agent",
219
- `[${chatId}] warm-up: context ${ctx.totalTokens}/${ctx.maxTokens} (${ctx.percentage.toFixed(1)}%) model=${ctx.model}`,
220
- );
221
-
222
- abort.abort();
223
- await drainPromise;
224
- } catch (err) {
225
- abort.abort();
226
- // Non-fatal — /status will just show 0 until first real message
227
- logWarn(
228
- "agent",
229
- `[${chatId}] warm-up failed: ${err instanceof Error ? err.message : err}`,
230
- );
231
- }
232
- }
233
-
234
- // ── Main handler ─────────────────────────────────────────────────────────────
235
-
236
- export async function handleMessage(
237
- params: QueryParams,
238
- _retried = false,
239
- ): Promise<QueryResult> {
240
- if (!config)
241
- throw new Error("Agent not initialized. Call initAgent() first.");
242
-
243
- const {
244
- chatId,
245
- text,
246
- senderName,
247
- isGroup,
248
- onTextBlock,
249
- onStreamDelta,
250
- onToolUse,
251
- } = params;
252
- const session = getSession(chatId);
253
- const t0 = Date.now();
254
-
255
- // Rebuild system prompt on first turn of a new/reset session so identity,
256
- // memory, and workspace listing are fresh
257
- if (session.turns === 0) {
258
- rebuildSystemPrompt(config, getPluginPromptAdditions());
259
- }
260
-
261
- const { options, activeModel } = buildSdkOptions(chatId);
262
-
263
- const msgIdHint = params.messageId ? ` [msg_id:${params.messageId}]` : "";
264
- const nowTag = `[${formatFullDatetime()}]`;
265
-
266
- const prompt = isGroup
267
- ? `${nowTag} [${senderName}]${msgIdHint}: ${text}`
268
- : `${nowTag}${msgIdHint} ${text}`;
269
- log("agent", `[${chatId}] <- (${text.length} chars)`);
270
- traceMessage(chatId, "in", text, { senderName, isGroup });
271
-
272
- // SDK types are not fully exported; cast options at the boundary
273
- const qi = query({
274
- prompt,
275
- options: options as Parameters<typeof query>[0]["options"],
276
- });
277
-
278
- let currentBlockText = "";
279
- let allResponseText = "";
280
- let newSessionId: string | undefined;
281
- let toolCalls = 0;
282
- // Populated from SDK result message
283
- let contextTokens = 0; // actual context fill from last iteration
284
- let contextWindow: number | undefined;
285
- let numApiCalls = 0;
286
- // Cumulative token counts from SDK modelUsage (aggregated across models)
287
- let sdkInputTokens = 0;
288
- let sdkOutputTokens = 0;
289
- let sdkCacheRead = 0;
290
- let sdkCacheWrite = 0;
291
-
292
- // Streaming throttle
293
- let lastStreamUpdate = 0;
294
- const STREAM_INTERVAL = 1000;
295
-
296
- try {
297
- for await (const message of qi) {
298
- const msg = message as Record<string, unknown>;
299
- const type = msg.type as string;
300
-
301
- // Session ID capture
302
- if (
303
- type === "system" &&
304
- msg.subtype === "init" &&
305
- typeof msg.session_id === "string"
306
- ) {
307
- newSessionId = msg.session_id;
308
- }
309
-
310
- // Stream text deltas and thinking deltas
311
- if (type === "stream_event" && onStreamDelta) {
312
- const event = msg.event as Record<string, unknown> | undefined;
313
- if (event?.type === "content_block_delta") {
314
- const delta = event.delta as Record<string, unknown> | undefined;
315
- if (
316
- delta?.type === "thinking_delta" &&
317
- typeof delta.thinking === "string"
318
- ) {
319
- // Thinking phase: notify but don't accumulate text
320
- const now = Date.now();
321
- if (now - lastStreamUpdate >= STREAM_INTERVAL) {
322
- lastStreamUpdate = now;
323
- onStreamDelta(currentBlockText, "thinking");
324
- }
325
- } else if (
326
- delta?.type === "text_delta" &&
327
- typeof delta.text === "string"
328
- ) {
329
- currentBlockText += delta.text;
330
- const now = Date.now();
331
- if (now - lastStreamUpdate >= STREAM_INTERVAL) {
332
- lastStreamUpdate = now;
333
- onStreamDelta(currentBlockText, "text");
334
- }
335
- }
336
- }
337
- }
338
-
339
- // Complete assistant message — may contain multiple text blocks
340
- // and tool_use blocks. Each text block before a tool_use is a
341
- // "progress message" that should be sent immediately.
342
- if (type === "assistant") {
343
- const content = (msg.message as { content?: unknown[] })?.content;
344
- if (Array.isArray(content)) {
345
- let blockText = "";
346
- for (const block of content) {
347
- const b = block as { type: string; text?: string; name?: string };
348
- if (b.type === "text" && b.text) {
349
- blockText += b.text;
350
- }
351
- if (b.type === "tool_use") {
352
- toolCalls++;
353
- const tb = block as {
354
- type: string;
355
- name?: string;
356
- input?: Record<string, unknown>;
357
- };
358
- if (onToolUse && tb.name) {
359
- try {
360
- onToolUse(tb.name, tb.input ?? {});
361
- } catch {
362
- /* non-fatal */
363
- }
364
- }
365
- // If there's text before this tool call, send it as a progress message
366
- if (blockText.trim() && onTextBlock) {
367
- try {
368
- await onTextBlock(blockText.trim());
369
- } catch {
370
- /* non-fatal — don't abort the stream loop */
371
- }
372
- allResponseText += blockText;
373
- blockText = "";
374
- currentBlockText = "";
375
- }
376
- }
377
- }
378
- // Remaining text after all tool calls (or if no tool calls)
379
- if (blockText.trim()) {
380
- currentBlockText = blockText;
381
- }
382
- }
383
- }
384
-
385
- // Final result — read all data from SDK result fields
386
- if (type === "result") {
387
- numApiCalls =
388
- ((msg as Record<string, unknown>).num_turns as number) ?? 0;
389
-
390
- // Context fill from last API iteration (only available in raw usage)
391
- const usage = msg.usage as
392
- | {
393
- iterations?: Array<{
394
- input_tokens: number;
395
- cache_read_input_tokens: number;
396
- cache_creation_input_tokens: number;
397
- }>;
398
- }
399
- | undefined;
400
- if (
401
- usage &&
402
- Array.isArray(usage.iterations) &&
403
- usage.iterations.length > 0
404
- ) {
405
- const last = usage.iterations[usage.iterations.length - 1];
406
- contextTokens =
407
- (last.input_tokens ?? 0) +
408
- (last.cache_read_input_tokens ?? 0) +
409
- (last.cache_creation_input_tokens ?? 0);
410
- }
411
-
412
- // Token counts, context window from SDK modelUsage (aggregated per model)
413
- type MU = {
414
- inputTokens?: number;
415
- outputTokens?: number;
416
- cacheReadInputTokens?: number;
417
- cacheCreationInputTokens?: number;
418
- contextWindow?: number;
419
- };
420
- const modelUsage = (msg as Record<string, unknown>).modelUsage as
421
- | Record<string, MU>
422
- | undefined;
423
- if (modelUsage) {
424
- for (const mu of Object.values(modelUsage)) {
425
- sdkInputTokens += mu.inputTokens ?? 0;
426
- sdkOutputTokens += mu.outputTokens ?? 0;
427
- sdkCacheRead += mu.cacheReadInputTokens ?? 0;
428
- sdkCacheWrite += mu.cacheCreationInputTokens ?? 0;
429
- if (
430
- mu.contextWindow &&
431
- mu.contextWindow > 0 &&
432
- contextWindow === undefined
433
- ) {
434
- contextWindow = mu.contextWindow;
435
- }
436
- }
437
- }
438
- log(
439
- "agent",
440
- `SDK result: modelUsage=${JSON.stringify(modelUsage)}, contextWindow=${contextWindow}, contextTokens=${contextTokens}, numApiCalls=${numApiCalls}`,
441
- );
442
-
443
- // If we still have unsent text and no streaming captured it
444
- if (
445
- !allResponseText &&
446
- !currentBlockText &&
447
- typeof msg.result === "string"
448
- ) {
449
- currentBlockText = msg.result;
450
- }
451
- }
452
- }
453
- } catch (err) {
454
- const classified = classify(err);
455
- if (classified.reason === "session_expired" && !_retried) {
456
- logWarn(
457
- "agent",
458
- `[${chatId}] Stale session, retrying with fresh session`,
459
- );
460
- resetSession(chatId);
461
- return handleMessage(params, true);
462
- }
463
- // Context length exceeded — reset session and retry (SDK auto-compaction should prevent
464
- // this, but handle it as a safety net for edge cases)
465
- if (classified.reason === "context_length" && !_retried) {
466
- logWarn(
467
- "agent",
468
- `[${chatId}] Context length exceeded, resetting session and retrying`,
469
- );
470
- resetSession(chatId);
471
- return handleMessage(params, true);
472
- }
473
- // Model fallback: if overloaded/timeout, retry with a faster model
474
- if (!_retried && classified.retryable) {
475
- const fallbackModel = activeModel.includes("opus")
476
- ? "claude-sonnet-4-6"
477
- : activeModel.includes("sonnet")
478
- ? "claude-haiku-4-5"
479
- : null;
480
- if (fallbackModel) {
481
- logWarn(
482
- "agent",
483
- `[${chatId}] ${classified.reason}, falling back to ${fallbackModel.replace("claude-", "")}`,
484
- );
485
- resetSession(chatId);
486
- const originalModel = getChatSettings(chatId).model;
487
- setChatModel(chatId, fallbackModel);
488
- try {
489
- return await handleMessage(params, true);
490
- } finally {
491
- setChatModel(chatId, originalModel);
492
- }
493
- }
494
- }
495
- logError("agent", `[${chatId}] SDK error: ${classified.message}`);
496
- throw classified;
497
- }
498
-
499
- // Persist session and usage
500
- const durationMs = Date.now() - t0;
501
- if (newSessionId) setSessionId(chatId, newSessionId);
502
- incrementTurns(chatId);
503
- recordUsage(chatId, {
504
- inputTokens: sdkInputTokens,
505
- outputTokens: sdkOutputTokens,
506
- cacheRead: sdkCacheRead,
507
- cacheWrite: sdkCacheWrite,
508
- durationMs,
509
- model: activeModel,
510
- contextTokens,
511
- contextWindow,
512
- numApiCalls,
513
- });
514
-
515
- // Set a descriptive session name from the first message
516
- if (session.turns === 0 && text) {
517
- // Strip metadata prefixes like [DM from ...] or [Name]:
518
- const cleanText = text
519
- .replace(/^\[.*?\]\s*/g, "")
520
- .replace(/\[msg_id:\d+\]\s*/g, "")
521
- .trim();
522
- if (cleanText) {
523
- const name =
524
- cleanText.length > 30 ? cleanText.slice(0, 30) + "..." : cleanText;
525
- setSessionName(chatId, name);
526
- }
527
- }
528
-
529
- // The remaining currentBlockText is the final response text
530
- allResponseText += currentBlockText;
531
-
532
- const totalPrompt = sdkInputTokens + sdkCacheRead + sdkCacheWrite;
533
- const cacheHitPct =
534
- totalPrompt > 0 ? Math.round((sdkCacheRead / totalPrompt) * 100) : 0;
535
-
536
- log(
537
- "agent",
538
- `[${chatId}] -> (${durationMs}ms, in=${sdkInputTokens} out=${sdkOutputTokens} cache=${cacheHitPct}%` +
539
- `${toolCalls > 0 ? ` tools=${toolCalls}` : ""})`,
540
- );
541
- traceMessage(chatId, "out", allResponseText, {
542
- durationMs,
543
- inputTokens: sdkInputTokens,
544
- outputTokens: sdkOutputTokens,
545
- cacheRead: sdkCacheRead,
546
- cacheWrite: sdkCacheWrite,
547
- toolCalls,
548
- model: activeModel,
549
- });
550
7
 
551
- return {
552
- text: allResponseText.trim(),
553
- durationMs,
554
- inputTokens: sdkInputTokens,
555
- outputTokens: sdkOutputTokens,
556
- cacheRead: sdkCacheRead,
557
- cacheWrite: sdkCacheWrite,
558
- };
559
- }
8
+ export { initAgent, updateSystemPrompt } from "./state.js";
9
+ export { warmSession } from "./warm.js";
10
+ export { handleMessage } from "./handler.js";