talon-agent 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/prompts/heartbeat.md +18 -6
- package/src/__tests__/heartbeat.test.ts +21 -0
- package/src/__tests__/reload-plugins.test.ts +199 -0
- package/src/__tests__/sessions.test.ts +155 -121
- package/src/backend/claude-sdk/index.ts +198 -62
- package/src/bootstrap.ts +3 -103
- package/src/core/gateway-actions.ts +42 -1
- package/src/core/heartbeat.ts +8 -5
- package/src/core/plugin.ts +147 -0
- package/src/core/tools/admin.ts +22 -0
- package/src/core/tools/index.ts +2 -0
- package/src/core/tools/types.ts +2 -1
- package/src/frontend/teams/index.ts +9 -10
- package/src/frontend/telegram/commands.ts +11 -10
- package/src/storage/sessions.ts +34 -40
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "talon-agent",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.5.0",
|
|
4
4
|
"description": "Multi-frontend AI agent with full tool access, streaming, cron jobs, and plugin system",
|
|
5
5
|
"author": "Dylan Neve",
|
|
6
6
|
"license": "MIT",
|
|
@@ -51,7 +51,7 @@
|
|
|
51
51
|
"format:check": "prettier --check src/ prompts/"
|
|
52
52
|
},
|
|
53
53
|
"dependencies": {
|
|
54
|
-
"@anthropic-ai/claude-agent-sdk": "^0.2.
|
|
54
|
+
"@anthropic-ai/claude-agent-sdk": "^0.2.104",
|
|
55
55
|
"@brave/brave-search-mcp-server": "^2.0.75",
|
|
56
56
|
"@clack/prompts": "^1.2.0",
|
|
57
57
|
"@grammyjs/auto-retry": "^2.0.2",
|
package/prompts/heartbeat.md
CHANGED
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
You are Talon's background heartbeat agent. You run periodically (every {{intervalMinutes}} minutes) to perform maintenance tasks defined by the user.
|
|
2
2
|
|
|
3
|
-
You have access
|
|
3
|
+
You have access to filesystem tools (Read, Write, Edit, Bash, Glob, Grep) and all loaded MCP plugins. Do NOT use Telegram messaging tools — you cannot send messages to users.
|
|
4
|
+
|
|
5
|
+
## Available MCP Tools
|
|
6
|
+
|
|
7
|
+
You have access to all registered MCP plugin tools (excluding Telegram messaging tools). The exact set depends on what plugins are enabled in the current configuration, but may include email, memory/knowledge graph, web search, Wikipedia, GitHub, media processing, browser automation, and more.
|
|
8
|
+
|
|
9
|
+
Only use tools that are actually available in your current session. Do not assume any specific tool is present — check what's exposed to you at runtime.
|
|
10
|
+
|
|
11
|
+
Use available tools when they help accomplish the user-defined tasks (e.g. checking email, querying the knowledge graph, searching the web for updates).
|
|
4
12
|
|
|
5
13
|
## Context
|
|
6
14
|
|
|
@@ -20,11 +28,15 @@ If the instructions file does not exist or is empty, perform these default tasks
|
|
|
20
28
|
1. **Review recent logs** — Check `{{logsDir}}/` for log files dated after `{{lastRunIso}}`. If `{{lastRunIso}}` is `never`, treat it as the beginning of time and review all available logs. Extract any new facts, preferences, or notable events.
|
|
21
29
|
2. **Update memory** — Merge any new information into `{{memoryFile}}`, keeping entries concise and factual.
|
|
22
30
|
3. **Update daily notes** — Write today's learnings, observations, corrections, and follow-ups to `{{dailyMemoryFile}}`. Keep entries concise — the bot reads this file on demand for context.
|
|
23
|
-
4. **
|
|
31
|
+
4. **Check email** — If email tools are available, check the inbox for new messages and note anything important.
|
|
32
|
+
5. **Workspace hygiene** — Note any issues but do not delete files unless the instructions explicitly say to.
|
|
24
33
|
|
|
25
34
|
## Rules
|
|
26
35
|
|
|
27
|
-
-
|
|
28
|
-
-
|
|
29
|
-
-
|
|
30
|
-
-
|
|
36
|
+
- Do NOT use Telegram messaging tools — they are not available in heartbeat mode.
|
|
37
|
+
- Be concise in log entries and memory updates.
|
|
38
|
+
- If a task fails, log the error and move on to the next task.
|
|
39
|
+
- Do NOT modify the instructions file — only read it.
|
|
40
|
+
- Be surgical: only make the minimal file changes needed to complete the current task.
|
|
41
|
+
- Do NOT create, modify, move, or delete files outside `{{workspace}}` unless the user-defined instructions explicitly require it.
|
|
42
|
+
- Complete all tasks within the time budget. If running low, prioritize memory updates.
|
|
@@ -48,6 +48,10 @@ vi.mock("@anthropic-ai/claude-agent-sdk", () => ({
|
|
|
48
48
|
query: queryMock,
|
|
49
49
|
}));
|
|
50
50
|
|
|
51
|
+
vi.mock("../core/plugin.js", () => ({
|
|
52
|
+
getPluginMcpServers: vi.fn(() => ({})),
|
|
53
|
+
}));
|
|
54
|
+
|
|
51
55
|
vi.mock("../util/paths.js", () => ({
|
|
52
56
|
files: {
|
|
53
57
|
heartbeatState: "/fake/.talon/workspace/memory/heartbeat_state.json",
|
|
@@ -184,6 +188,23 @@ describe("forceHeartbeat", () => {
|
|
|
184
188
|
expect(finalState.status).toBe("idle");
|
|
185
189
|
});
|
|
186
190
|
|
|
191
|
+
it("passes plugin MCP servers to the agent via getPluginMcpServers", async () => {
|
|
192
|
+
const { getPluginMcpServers } = await import("../core/plugin.js");
|
|
193
|
+
const mockServers = {
|
|
194
|
+
"email-tools": { command: "node", args: ["email.js"], env: {} },
|
|
195
|
+
};
|
|
196
|
+
vi.mocked(getPluginMcpServers).mockReturnValue(mockServers);
|
|
197
|
+
|
|
198
|
+
await forceHeartbeat();
|
|
199
|
+
|
|
200
|
+
expect(getPluginMcpServers).toHaveBeenCalledWith("", "heartbeat");
|
|
201
|
+
// Verify mcpServers was passed through to query()
|
|
202
|
+
const queryCall = queryMock.mock.calls[0] as unknown as [
|
|
203
|
+
{ options: { mcpServers: Record<string, unknown> } },
|
|
204
|
+
];
|
|
205
|
+
expect(queryCall[0].options.mcpServers).toEqual(mockServers);
|
|
206
|
+
});
|
|
207
|
+
|
|
187
208
|
it("preserves previous last_run on failure", async () => {
|
|
188
209
|
const previousLastRun = Date.now() - 3600_000;
|
|
189
210
|
existsSyncMock.mockReturnValue(true);
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
|
+
|
|
3
|
+
// ── Module mocks ──────────────────────────────────────────────────────────
|
|
4
|
+
|
|
5
|
+
vi.mock("../util/log.js", () => ({
|
|
6
|
+
log: vi.fn(),
|
|
7
|
+
logError: vi.fn(),
|
|
8
|
+
logWarn: vi.fn(),
|
|
9
|
+
logDebug: vi.fn(),
|
|
10
|
+
}));
|
|
11
|
+
|
|
12
|
+
vi.mock("write-file-atomic", () => ({
|
|
13
|
+
default: { sync: vi.fn() },
|
|
14
|
+
}));
|
|
15
|
+
|
|
16
|
+
// Mock cheerio (required by gateway-actions via extractText)
|
|
17
|
+
vi.mock("cheerio", () => ({
|
|
18
|
+
load: vi.fn(() => {
|
|
19
|
+
const $ = (sel: string) => ({
|
|
20
|
+
remove: vi.fn(),
|
|
21
|
+
text: () => "",
|
|
22
|
+
});
|
|
23
|
+
($ as any).root = vi.fn();
|
|
24
|
+
return $;
|
|
25
|
+
}),
|
|
26
|
+
}));
|
|
27
|
+
|
|
28
|
+
// Mock storage modules required by gateway-actions
|
|
29
|
+
vi.mock("../storage/history.js", () => ({
|
|
30
|
+
getRecentFormatted: vi.fn(() => ""),
|
|
31
|
+
searchHistory: vi.fn(() => ""),
|
|
32
|
+
getMessagesByUser: vi.fn(() => ""),
|
|
33
|
+
getKnownUsers: vi.fn(() => ""),
|
|
34
|
+
}));
|
|
35
|
+
vi.mock("../storage/media-index.js", () => ({
|
|
36
|
+
formatMediaIndex: vi.fn(() => ""),
|
|
37
|
+
}));
|
|
38
|
+
vi.mock("../storage/cron-store.js", () => ({
|
|
39
|
+
addCronJob: vi.fn(),
|
|
40
|
+
getCronJob: vi.fn(),
|
|
41
|
+
getCronJobsForChat: vi.fn(() => []),
|
|
42
|
+
updateCronJob: vi.fn(),
|
|
43
|
+
deleteCronJob: vi.fn(),
|
|
44
|
+
validateCronExpression: vi.fn(() => ({ valid: true })),
|
|
45
|
+
generateCronId: vi.fn(() => "test-id"),
|
|
46
|
+
}));
|
|
47
|
+
|
|
48
|
+
// ── Plugin mocking ──────────────────────────────────────────────────────
|
|
49
|
+
|
|
50
|
+
const DEFAULT_CONFIG = {
|
|
51
|
+
model: "claude-opus-4-6",
|
|
52
|
+
frontend: "telegram",
|
|
53
|
+
plugins: [],
|
|
54
|
+
systemPrompt: "test prompt",
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
const mockReloadPlugins = vi.fn(async () => ({
|
|
58
|
+
names: ["extras", "brave-search"],
|
|
59
|
+
config: { ...DEFAULT_CONFIG },
|
|
60
|
+
}));
|
|
61
|
+
const mockGetPluginPromptAdditions = vi.fn(() => "prompt additions");
|
|
62
|
+
const mockRebuildSystemPrompt = vi.fn();
|
|
63
|
+
const mockUpdateSystemPrompt = vi.fn();
|
|
64
|
+
|
|
65
|
+
vi.mock("../core/plugin.js", () => ({
|
|
66
|
+
reloadPlugins: (...args: unknown[]) =>
|
|
67
|
+
mockReloadPlugins(...(args as Parameters<typeof mockReloadPlugins>)),
|
|
68
|
+
getPluginPromptAdditions: () => mockGetPluginPromptAdditions(),
|
|
69
|
+
}));
|
|
70
|
+
|
|
71
|
+
vi.mock("../util/config.js", () => ({
|
|
72
|
+
rebuildSystemPrompt: (...args: unknown[]) =>
|
|
73
|
+
mockRebuildSystemPrompt(
|
|
74
|
+
...(args as Parameters<typeof mockRebuildSystemPrompt>),
|
|
75
|
+
),
|
|
76
|
+
}));
|
|
77
|
+
|
|
78
|
+
vi.mock("../backend/claude-sdk/index.js", () => ({
|
|
79
|
+
updateSystemPrompt: (...args: unknown[]) =>
|
|
80
|
+
mockUpdateSystemPrompt(
|
|
81
|
+
...(args as Parameters<typeof mockUpdateSystemPrompt>),
|
|
82
|
+
),
|
|
83
|
+
}));
|
|
84
|
+
|
|
85
|
+
// ── Import after mocks ────────────────────────────────────────────────────
|
|
86
|
+
|
|
87
|
+
import { handleSharedAction } from "../core/gateway-actions.js";
|
|
88
|
+
|
|
89
|
+
// ── Tests ─────────────────────────────────────────────────────────────────
|
|
90
|
+
|
|
91
|
+
describe("reload_plugins gateway action", () => {
|
|
92
|
+
beforeEach(() => {
|
|
93
|
+
vi.resetAllMocks();
|
|
94
|
+
// Re-establish default implementations after reset
|
|
95
|
+
mockReloadPlugins.mockImplementation(async () => ({
|
|
96
|
+
names: ["extras", "brave-search"],
|
|
97
|
+
config: { ...DEFAULT_CONFIG },
|
|
98
|
+
}));
|
|
99
|
+
mockGetPluginPromptAdditions.mockReturnValue("prompt additions");
|
|
100
|
+
mockRebuildSystemPrompt.mockImplementation(() => {});
|
|
101
|
+
mockUpdateSystemPrompt.mockImplementation(() => {});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it("returns loaded plugin names on success", async () => {
|
|
105
|
+
const result = await handleSharedAction(
|
|
106
|
+
{ action: "reload_plugins" },
|
|
107
|
+
12345,
|
|
108
|
+
);
|
|
109
|
+
expect(result).not.toBeNull();
|
|
110
|
+
expect(result!.ok).toBe(true);
|
|
111
|
+
expect(result!.text).toContain("Plugins reloaded successfully");
|
|
112
|
+
expect(result!.text).toContain("extras");
|
|
113
|
+
expect(result!.text).toContain("brave-search");
|
|
114
|
+
expect(result!.text).toContain("(2)");
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it("calls reloadPlugins without explicit frontends (derived from config)", async () => {
|
|
118
|
+
await handleSharedAction({ action: "reload_plugins" }, 12345);
|
|
119
|
+
// Gateway no longer passes frontends — reloadPlugins derives them from config
|
|
120
|
+
expect(mockReloadPlugins).toHaveBeenCalledWith();
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it("rebuilds system prompt after reloading", async () => {
|
|
124
|
+
await handleSharedAction({ action: "reload_plugins" }, 12345);
|
|
125
|
+
expect(mockRebuildSystemPrompt).toHaveBeenCalledTimes(1);
|
|
126
|
+
expect(mockGetPluginPromptAdditions).toHaveBeenCalledTimes(1);
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it("updates backend system prompt after rebuild", async () => {
|
|
130
|
+
await handleSharedAction({ action: "reload_plugins" }, 12345);
|
|
131
|
+
expect(mockUpdateSystemPrompt).toHaveBeenCalledTimes(1);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it("returns error when reloadPlugins throws", async () => {
|
|
135
|
+
mockReloadPlugins.mockRejectedValueOnce(
|
|
136
|
+
new Error("Config validation failed"),
|
|
137
|
+
);
|
|
138
|
+
const result = await handleSharedAction(
|
|
139
|
+
{ action: "reload_plugins" },
|
|
140
|
+
12345,
|
|
141
|
+
);
|
|
142
|
+
expect(result).not.toBeNull();
|
|
143
|
+
expect(result!.ok).toBe(false);
|
|
144
|
+
expect(result!.error).toContain("Config validation failed");
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
it("returns error when config is malformed", async () => {
|
|
148
|
+
mockReloadPlugins.mockRejectedValueOnce(
|
|
149
|
+
new Error("Invalid JSON in config"),
|
|
150
|
+
);
|
|
151
|
+
const result = await handleSharedAction(
|
|
152
|
+
{ action: "reload_plugins" },
|
|
153
|
+
12345,
|
|
154
|
+
);
|
|
155
|
+
expect(result!.ok).toBe(false);
|
|
156
|
+
expect(result!.error).toContain("Invalid JSON in config");
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it("reports zero plugins when none configured", async () => {
|
|
160
|
+
mockReloadPlugins.mockImplementation(async () => ({
|
|
161
|
+
names: [],
|
|
162
|
+
config: { ...DEFAULT_CONFIG },
|
|
163
|
+
}));
|
|
164
|
+
const result = await handleSharedAction(
|
|
165
|
+
{ action: "reload_plugins" },
|
|
166
|
+
12345,
|
|
167
|
+
);
|
|
168
|
+
expect(result!.ok).toBe(true);
|
|
169
|
+
expect(result!.text).toContain("(0)");
|
|
170
|
+
expect(result!.text).toContain("(none)");
|
|
171
|
+
});
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
// ── Admin tool description tests ──────────────────────────────────────────
|
|
175
|
+
|
|
176
|
+
describe("admin tool description", () => {
|
|
177
|
+
it("does not mention session reset or MCP subprocesses", async () => {
|
|
178
|
+
const { adminTools } = await import("../core/tools/admin.js");
|
|
179
|
+
const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
|
|
180
|
+
expect(reloadTool).toBeDefined();
|
|
181
|
+
expect(reloadTool!.description).not.toContain("resets sessions");
|
|
182
|
+
expect(reloadTool!.description).not.toContain("sessions reset");
|
|
183
|
+
expect(reloadTool!.description).not.toContain("MCP subprocesses");
|
|
184
|
+
expect(reloadTool!.description).toContain("without restarting");
|
|
185
|
+
expect(reloadTool!.description).toContain("without downtime");
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
it("mentions env var cleanup", async () => {
|
|
189
|
+
const { adminTools } = await import("../core/tools/admin.js");
|
|
190
|
+
const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
|
|
191
|
+
expect(reloadTool!.description).toContain("env vars");
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
it("has admin tag", async () => {
|
|
195
|
+
const { adminTools } = await import("../core/tools/admin.js");
|
|
196
|
+
const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
|
|
197
|
+
expect(reloadTool!.tag).toBe("admin");
|
|
198
|
+
});
|
|
199
|
+
});
|
|
@@ -72,7 +72,6 @@ describe("sessions", () => {
|
|
|
72
72
|
expect(session.usage.totalCacheRead).toBe(0);
|
|
73
73
|
expect(session.usage.totalCacheWrite).toBe(0);
|
|
74
74
|
expect(session.usage.lastPromptTokens).toBe(0);
|
|
75
|
-
expect(session.usage.estimatedCostUsd).toBe(0);
|
|
76
75
|
expect(session.usage.totalResponseMs).toBe(0);
|
|
77
76
|
expect(session.usage.lastResponseMs).toBe(0);
|
|
78
77
|
expect(session.usage.fastestResponseMs).toBe(Infinity);
|
|
@@ -151,20 +150,6 @@ describe("sessions", () => {
|
|
|
151
150
|
expect(getSession(chatId).usage.lastPromptTokens).toBe(250);
|
|
152
151
|
});
|
|
153
152
|
|
|
154
|
-
it("calculates estimated cost", () => {
|
|
155
|
-
const chatId = "test-cost";
|
|
156
|
-
getSession(chatId);
|
|
157
|
-
|
|
158
|
-
recordUsage(chatId, {
|
|
159
|
-
inputTokens: 1_000_000,
|
|
160
|
-
outputTokens: 0,
|
|
161
|
-
cacheRead: 0,
|
|
162
|
-
cacheWrite: 0,
|
|
163
|
-
});
|
|
164
|
-
// Cost for 1M input tokens at $3/M = $3
|
|
165
|
-
expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3, 1);
|
|
166
|
-
});
|
|
167
|
-
|
|
168
153
|
it("tracks response time duration", () => {
|
|
169
154
|
const chatId = "test-duration";
|
|
170
155
|
getSession(chatId);
|
|
@@ -251,98 +236,129 @@ describe("sessions", () => {
|
|
|
251
236
|
});
|
|
252
237
|
});
|
|
253
238
|
|
|
254
|
-
describe("recordUsage
|
|
255
|
-
it("
|
|
256
|
-
const chatId = "test-
|
|
239
|
+
describe("recordUsage — model tracking", () => {
|
|
240
|
+
it("tracks lastModel", () => {
|
|
241
|
+
const chatId = "test-last-model";
|
|
257
242
|
getSession(chatId);
|
|
258
243
|
|
|
259
244
|
recordUsage(chatId, {
|
|
260
|
-
inputTokens:
|
|
261
|
-
outputTokens:
|
|
245
|
+
inputTokens: 100,
|
|
246
|
+
outputTokens: 50,
|
|
262
247
|
cacheRead: 0,
|
|
263
248
|
cacheWrite: 0,
|
|
264
|
-
model: "claude-
|
|
249
|
+
model: "claude-opus-4-6",
|
|
265
250
|
});
|
|
266
|
-
|
|
267
|
-
expect(getSession(chatId).
|
|
251
|
+
|
|
252
|
+
expect(getSession(chatId).lastModel).toBe("claude-opus-4-6");
|
|
268
253
|
});
|
|
269
254
|
|
|
270
|
-
it("
|
|
271
|
-
const chatId = "test-
|
|
255
|
+
it("updates fastestResponseMs correctly across turns", () => {
|
|
256
|
+
const chatId = "test-fastest-response";
|
|
272
257
|
getSession(chatId);
|
|
273
258
|
|
|
274
259
|
recordUsage(chatId, {
|
|
275
|
-
inputTokens:
|
|
276
|
-
outputTokens:
|
|
260
|
+
inputTokens: 100,
|
|
261
|
+
outputTokens: 50,
|
|
277
262
|
cacheRead: 0,
|
|
278
263
|
cacheWrite: 0,
|
|
279
|
-
|
|
264
|
+
durationMs: 2000,
|
|
280
265
|
});
|
|
281
|
-
// Opus input: $15/M
|
|
282
|
-
expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(15, 1);
|
|
283
|
-
});
|
|
284
266
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
267
|
+
recordUsage(chatId, {
|
|
268
|
+
inputTokens: 100,
|
|
269
|
+
outputTokens: 50,
|
|
270
|
+
cacheRead: 0,
|
|
271
|
+
cacheWrite: 0,
|
|
272
|
+
durationMs: 500,
|
|
273
|
+
});
|
|
288
274
|
|
|
289
275
|
recordUsage(chatId, {
|
|
290
|
-
inputTokens:
|
|
291
|
-
outputTokens:
|
|
276
|
+
inputTokens: 100,
|
|
277
|
+
outputTokens: 50,
|
|
292
278
|
cacheRead: 0,
|
|
293
279
|
cacheWrite: 0,
|
|
280
|
+
durationMs: 1000,
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
const usage = getSession(chatId).usage;
|
|
284
|
+
expect(usage.fastestResponseMs).toBe(500);
|
|
285
|
+
expect(usage.lastResponseMs).toBe(1000);
|
|
286
|
+
expect(usage.totalResponseMs).toBe(3500);
|
|
287
|
+
});
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
describe("recordUsage — context tracking fields", () => {
|
|
291
|
+
it("stores contextTokens from SDK iteration data", () => {
|
|
292
|
+
const chatId = "test-ctx-tokens";
|
|
293
|
+
getSession(chatId);
|
|
294
|
+
|
|
295
|
+
recordUsage(chatId, {
|
|
296
|
+
inputTokens: 100,
|
|
297
|
+
outputTokens: 50,
|
|
298
|
+
cacheRead: 10,
|
|
299
|
+
cacheWrite: 5,
|
|
300
|
+
contextTokens: 85000,
|
|
294
301
|
});
|
|
295
|
-
|
|
296
|
-
expect(getSession(chatId).usage.
|
|
302
|
+
|
|
303
|
+
expect(getSession(chatId).usage.contextTokens).toBe(85000);
|
|
297
304
|
});
|
|
298
305
|
|
|
299
|
-
it("
|
|
300
|
-
const chatId = "test-
|
|
306
|
+
it("stores contextWindow from SDK modelUsage", () => {
|
|
307
|
+
const chatId = "test-ctx-window";
|
|
301
308
|
getSession(chatId);
|
|
302
309
|
|
|
303
310
|
recordUsage(chatId, {
|
|
304
|
-
inputTokens:
|
|
305
|
-
outputTokens:
|
|
311
|
+
inputTokens: 100,
|
|
312
|
+
outputTokens: 50,
|
|
306
313
|
cacheRead: 0,
|
|
307
314
|
cacheWrite: 0,
|
|
308
|
-
|
|
315
|
+
contextWindow: 1_000_000,
|
|
309
316
|
});
|
|
310
|
-
|
|
311
|
-
expect(getSession(chatId).usage.
|
|
317
|
+
|
|
318
|
+
expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
|
|
312
319
|
});
|
|
313
320
|
|
|
314
|
-
it("
|
|
315
|
-
const chatId = "test-
|
|
321
|
+
it("stores numApiCalls from SDK num_turns", () => {
|
|
322
|
+
const chatId = "test-num-api-calls";
|
|
316
323
|
getSession(chatId);
|
|
317
324
|
|
|
318
325
|
recordUsage(chatId, {
|
|
319
|
-
inputTokens:
|
|
320
|
-
outputTokens:
|
|
321
|
-
cacheRead:
|
|
326
|
+
inputTokens: 100,
|
|
327
|
+
outputTokens: 50,
|
|
328
|
+
cacheRead: 0,
|
|
322
329
|
cacheWrite: 0,
|
|
323
|
-
|
|
330
|
+
numApiCalls: 3,
|
|
324
331
|
});
|
|
325
|
-
|
|
326
|
-
expect(getSession(chatId).usage.
|
|
332
|
+
|
|
333
|
+
expect(getSession(chatId).usage.numApiCalls).toBe(3);
|
|
327
334
|
});
|
|
328
335
|
|
|
329
|
-
it("
|
|
330
|
-
const chatId = "test-
|
|
336
|
+
it("resets contextTokens to 0 when not provided", () => {
|
|
337
|
+
const chatId = "test-ctx-tokens-reset";
|
|
331
338
|
getSession(chatId);
|
|
332
339
|
|
|
340
|
+
// First turn with context data
|
|
333
341
|
recordUsage(chatId, {
|
|
334
|
-
inputTokens:
|
|
335
|
-
outputTokens:
|
|
342
|
+
inputTokens: 100,
|
|
343
|
+
outputTokens: 50,
|
|
336
344
|
cacheRead: 0,
|
|
337
|
-
cacheWrite:
|
|
338
|
-
|
|
345
|
+
cacheWrite: 0,
|
|
346
|
+
contextTokens: 50000,
|
|
339
347
|
});
|
|
340
|
-
|
|
341
|
-
|
|
348
|
+
expect(getSession(chatId).usage.contextTokens).toBe(50000);
|
|
349
|
+
|
|
350
|
+
// Second turn without context data — resets to 0
|
|
351
|
+
recordUsage(chatId, {
|
|
352
|
+
inputTokens: 200,
|
|
353
|
+
outputTokens: 100,
|
|
354
|
+
cacheRead: 0,
|
|
355
|
+
cacheWrite: 0,
|
|
356
|
+
});
|
|
357
|
+
expect(getSession(chatId).usage.contextTokens).toBe(0);
|
|
342
358
|
});
|
|
343
359
|
|
|
344
|
-
it("
|
|
345
|
-
const chatId = "test-
|
|
360
|
+
it("preserves contextWindow across turns when not reported", () => {
|
|
361
|
+
const chatId = "test-ctx-window-preserve";
|
|
346
362
|
getSession(chatId);
|
|
347
363
|
|
|
348
364
|
recordUsage(chatId, {
|
|
@@ -350,44 +366,75 @@ describe("sessions", () => {
|
|
|
350
366
|
outputTokens: 50,
|
|
351
367
|
cacheRead: 0,
|
|
352
368
|
cacheWrite: 0,
|
|
353
|
-
|
|
369
|
+
contextWindow: 1_000_000,
|
|
354
370
|
});
|
|
371
|
+
expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
|
|
355
372
|
|
|
356
|
-
|
|
373
|
+
// Turn without contextWindow — preserves previous value
|
|
374
|
+
recordUsage(chatId, {
|
|
375
|
+
inputTokens: 200,
|
|
376
|
+
outputTokens: 100,
|
|
377
|
+
cacheRead: 0,
|
|
378
|
+
cacheWrite: 0,
|
|
379
|
+
});
|
|
380
|
+
expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
|
|
357
381
|
});
|
|
358
382
|
|
|
359
|
-
it("
|
|
360
|
-
const chatId = "test-
|
|
383
|
+
it("rejects non-finite contextWindow values and keeps previous", () => {
|
|
384
|
+
const chatId = "test-ctx-window-nan";
|
|
361
385
|
getSession(chatId);
|
|
362
386
|
|
|
387
|
+
// Set a valid contextWindow first
|
|
363
388
|
recordUsage(chatId, {
|
|
364
389
|
inputTokens: 100,
|
|
365
390
|
outputTokens: 50,
|
|
366
391
|
cacheRead: 0,
|
|
367
392
|
cacheWrite: 0,
|
|
368
|
-
|
|
393
|
+
contextWindow: 1_000_000,
|
|
369
394
|
});
|
|
395
|
+
expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
|
|
370
396
|
|
|
397
|
+
// NaN should not overwrite
|
|
371
398
|
recordUsage(chatId, {
|
|
372
399
|
inputTokens: 100,
|
|
373
400
|
outputTokens: 50,
|
|
374
401
|
cacheRead: 0,
|
|
375
402
|
cacheWrite: 0,
|
|
376
|
-
|
|
403
|
+
contextWindow: NaN,
|
|
377
404
|
});
|
|
405
|
+
expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
|
|
378
406
|
|
|
407
|
+
// Infinity should not overwrite
|
|
379
408
|
recordUsage(chatId, {
|
|
380
409
|
inputTokens: 100,
|
|
381
410
|
outputTokens: 50,
|
|
382
411
|
cacheRead: 0,
|
|
383
412
|
cacheWrite: 0,
|
|
384
|
-
|
|
413
|
+
contextWindow: Infinity,
|
|
385
414
|
});
|
|
415
|
+
expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
|
|
416
|
+
});
|
|
386
417
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
418
|
+
it("rejects negative contextWindow values and keeps previous", () => {
|
|
419
|
+
const chatId = "test-ctx-window-neg";
|
|
420
|
+
getSession(chatId);
|
|
421
|
+
|
|
422
|
+
recordUsage(chatId, {
|
|
423
|
+
inputTokens: 100,
|
|
424
|
+
outputTokens: 50,
|
|
425
|
+
cacheRead: 0,
|
|
426
|
+
cacheWrite: 0,
|
|
427
|
+
contextWindow: 200_000,
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
recordUsage(chatId, {
|
|
431
|
+
inputTokens: 100,
|
|
432
|
+
outputTokens: 50,
|
|
433
|
+
cacheRead: 0,
|
|
434
|
+
cacheWrite: 0,
|
|
435
|
+
contextWindow: -100,
|
|
436
|
+
});
|
|
437
|
+
expect(getSession(chatId).usage.contextWindow).toBe(200_000);
|
|
391
438
|
});
|
|
392
439
|
});
|
|
393
440
|
|
|
@@ -484,52 +531,6 @@ describe("sessions", () => {
|
|
|
484
531
|
});
|
|
485
532
|
});
|
|
486
533
|
|
|
487
|
-
describe("cost calculation math", () => {
|
|
488
|
-
it("calculates multi-component cost correctly (input + output + cache)", () => {
|
|
489
|
-
const chatId = "test-cost-math";
|
|
490
|
-
getSession(chatId);
|
|
491
|
-
|
|
492
|
-
// Use exact token counts to verify the formula:
|
|
493
|
-
// cost = (input * pricing.input + cacheWrite * pricing.cacheWrite +
|
|
494
|
-
// cacheRead * pricing.cacheRead + output * pricing.output) / 1_000_000
|
|
495
|
-
// Sonnet: input=$3/M, output=$15/M, cacheRead=$0.3/M, cacheWrite=$3.75/M
|
|
496
|
-
recordUsage(chatId, {
|
|
497
|
-
inputTokens: 500_000, // 500k * 3 / 1M = $1.50
|
|
498
|
-
outputTokens: 100_000, // 100k * 15 / 1M = $1.50
|
|
499
|
-
cacheRead: 200_000, // 200k * 0.3 / 1M = $0.06
|
|
500
|
-
cacheWrite: 100_000, // 100k * 3.75 / 1M = $0.375
|
|
501
|
-
model: "claude-sonnet-4-6",
|
|
502
|
-
});
|
|
503
|
-
|
|
504
|
-
const usage = getSession(chatId).usage;
|
|
505
|
-
// Total: 1.50 + 1.50 + 0.06 + 0.375 = $3.435
|
|
506
|
-
expect(usage.estimatedCostUsd).toBeCloseTo(3.435, 3);
|
|
507
|
-
});
|
|
508
|
-
|
|
509
|
-
it("accumulates cost across multiple recordUsage calls", () => {
|
|
510
|
-
const chatId = "test-cost-accum";
|
|
511
|
-
getSession(chatId);
|
|
512
|
-
|
|
513
|
-
recordUsage(chatId, {
|
|
514
|
-
inputTokens: 1_000_000,
|
|
515
|
-
outputTokens: 0,
|
|
516
|
-
cacheRead: 0,
|
|
517
|
-
cacheWrite: 0,
|
|
518
|
-
});
|
|
519
|
-
// Sonnet input: $3
|
|
520
|
-
expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3, 2);
|
|
521
|
-
|
|
522
|
-
recordUsage(chatId, {
|
|
523
|
-
inputTokens: 0,
|
|
524
|
-
outputTokens: 1_000_000,
|
|
525
|
-
cacheRead: 0,
|
|
526
|
-
cacheWrite: 0,
|
|
527
|
-
});
|
|
528
|
-
// + Sonnet output: $15. Total: $18
|
|
529
|
-
expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(18, 2);
|
|
530
|
-
});
|
|
531
|
-
});
|
|
532
|
-
|
|
533
534
|
describe("cache hit rate tracking", () => {
|
|
534
535
|
it("tracks cache read tokens across multiple turns", () => {
|
|
535
536
|
const chatId = "test-cache-track-read";
|
|
@@ -571,7 +572,6 @@ describe("sessions", () => {
|
|
|
571
572
|
const fresh = getSession(chatId);
|
|
572
573
|
expect(fresh.sessionId).toBeUndefined();
|
|
573
574
|
expect(fresh.turns).toBe(0);
|
|
574
|
-
expect(fresh.usage.estimatedCostUsd).toBe(0);
|
|
575
575
|
expect(fresh.usage.totalInputTokens).toBe(0);
|
|
576
576
|
});
|
|
577
577
|
});
|
|
@@ -642,6 +642,40 @@ describe("sessions — migration of legacy field formats", () => {
|
|
|
642
642
|
expect(session.createdAt).toBe(9999999);
|
|
643
643
|
});
|
|
644
644
|
|
|
645
|
+
it("backfills missing context tracking fields on legacy sessions", () => {
|
|
646
|
+
vi.mocked(existsSync).mockReturnValueOnce(true);
|
|
647
|
+
vi.mocked(readFileSync).mockReturnValueOnce(
|
|
648
|
+
JSON.stringify({
|
|
649
|
+
"migrate-chat-ctx": {
|
|
650
|
+
sessionId: undefined,
|
|
651
|
+
turns: 4,
|
|
652
|
+
lastActive: 2000,
|
|
653
|
+
createdAt: 2000,
|
|
654
|
+
usage: {
|
|
655
|
+
totalInputTokens: 100,
|
|
656
|
+
totalOutputTokens: 50,
|
|
657
|
+
totalCacheRead: 10,
|
|
658
|
+
totalCacheWrite: 5,
|
|
659
|
+
lastPromptTokens: 115,
|
|
660
|
+
estimatedCostUsd: 0.5,
|
|
661
|
+
totalResponseMs: 1000,
|
|
662
|
+
lastResponseMs: 500,
|
|
663
|
+
fastestResponseMs: 500,
|
|
664
|
+
// contextTokens, contextWindow, numApiCalls deliberately omitted
|
|
665
|
+
},
|
|
666
|
+
},
|
|
667
|
+
}),
|
|
668
|
+
);
|
|
669
|
+
loadSessions();
|
|
670
|
+
const session = getSession("migrate-chat-ctx");
|
|
671
|
+
expect(session.usage.contextTokens).toBe(0);
|
|
672
|
+
expect(session.usage.contextWindow).toBe(0);
|
|
673
|
+
expect(session.usage.numApiCalls).toBe(0);
|
|
674
|
+
// Existing fields should be preserved
|
|
675
|
+
expect(session.usage.totalInputTokens).toBe(100);
|
|
676
|
+
expect(session.usage.lastPromptTokens).toBe(115);
|
|
677
|
+
});
|
|
678
|
+
|
|
645
679
|
it("fixes fastestResponseMs of 0 to Infinity", () => {
|
|
646
680
|
vi.mocked(existsSync).mockReturnValueOnce(true);
|
|
647
681
|
vi.mocked(readFileSync).mockReturnValueOnce(
|