jinzd-ai-cli 0.4.54 → 0.4.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ import { platform } from "os";
6
6
  import chalk from "chalk";
7
7
 
8
8
  // src/core/constants.ts
9
- var VERSION = "0.4.54";
9
+ var VERSION = "0.4.55";
10
10
  var APP_NAME = "ai-cli";
11
11
  var CONFIG_DIR_NAME = ".aicli";
12
12
  var CONFIG_FILE_NAME = "config.json";
@@ -7,7 +7,7 @@ import {
7
7
  ProviderNotFoundError,
8
8
  RateLimitError,
9
9
  schemaToJsonSchema
10
- } from "./chunk-TAR67QTH.js";
10
+ } from "./chunk-YQEIQJ6K.js";
11
11
  import {
12
12
  APP_NAME,
13
13
  CONFIG_DIR_NAME,
@@ -20,7 +20,7 @@ import {
20
20
  MCP_TOOL_PREFIX,
21
21
  PLUGINS_DIR_NAME,
22
22
  VERSION
23
- } from "./chunk-NP5KZVP6.js";
23
+ } from "./chunk-W7QVBFIJ.js";
24
24
 
25
25
  // src/config/config-manager.ts
26
26
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
@@ -312,6 +312,7 @@ var BaseProvider = class {
312
312
  };
313
313
 
314
314
  // src/providers/claude.ts
315
+ var CACHE_MIN_SYSTEM_CHARS = 2e3;
315
316
  var ClaudeProvider = class extends BaseProvider {
316
317
  client;
317
318
  info = {
@@ -382,6 +383,52 @@ var ClaudeProvider = class extends BaseProvider {
382
383
  }
383
384
  return blocks.length > 0 ? blocks : "";
384
385
  }
386
+ /**
387
+ * Build a cacheable system prompt payload.
388
+ * When the prompt is long enough to be worth caching, return an array with a
389
+ * single text block carrying `cache_control: { type: 'ephemeral' }`. This caches
390
+ * system + memory + context files across every request in an agentic loop.
391
+ * Short prompts pass through as a plain string (no caching overhead).
392
+ */
393
+ buildSystemParam(systemPrompt) {
394
+ if (!systemPrompt) return void 0;
395
+ if (systemPrompt.length < CACHE_MIN_SYSTEM_CHARS) return systemPrompt;
396
+ return [
397
+ {
398
+ type: "text",
399
+ text: systemPrompt,
400
+ cache_control: { type: "ephemeral" }
401
+ }
402
+ ];
403
+ }
404
+ /**
405
+ * Mark the last tool definition with `cache_control: ephemeral` so the entire
406
+ * tool block (all 24+ tools) is cached together. Anthropic caches everything
407
+ * up to and including a cache breakpoint, so one marker covers all tools.
408
+ * Returns a new array — does not mutate the input.
409
+ */
410
+ addToolsCacheControl(tools) {
411
+ if (tools.length === 0) return tools;
412
+ const last = tools[tools.length - 1];
413
+ return [
414
+ ...tools.slice(0, -1),
415
+ { ...last, cache_control: { type: "ephemeral" } }
416
+ ];
417
+ }
418
+ /** Extract usage (including cache fields) from an Anthropic response. */
419
+ extractUsage(u) {
420
+ const usage = {
421
+ inputTokens: u.input_tokens,
422
+ outputTokens: u.output_tokens
423
+ };
424
+ if (u.cache_creation_input_tokens != null && u.cache_creation_input_tokens > 0) {
425
+ usage.cacheCreationTokens = u.cache_creation_input_tokens;
426
+ }
427
+ if (u.cache_read_input_tokens != null && u.cache_read_input_tokens > 0) {
428
+ usage.cacheReadTokens = u.cache_read_input_tokens;
429
+ }
430
+ return usage;
431
+ }
385
432
  /**
386
433
  * 构建 Extended Thinking 参数。
387
434
  * - thinking 启用时 temperature 必须为 1 或不设置(Anthropic API 要求)
@@ -432,7 +479,7 @@ var ClaudeProvider = class extends BaseProvider {
432
479
  const response = await this.client.messages.create({
433
480
  model: request.model,
434
481
  messages,
435
- system: request.systemPrompt,
482
+ system: this.buildSystemParam(request.systemPrompt),
436
483
  max_tokens: request.maxTokens ?? 8192,
437
484
  temperature,
438
485
  thinking
@@ -441,10 +488,7 @@ var ClaudeProvider = class extends BaseProvider {
441
488
  return {
442
489
  content,
443
490
  model: response.model,
444
- usage: {
445
- inputTokens: response.usage.input_tokens,
446
- outputTokens: response.usage.output_tokens
447
- }
491
+ usage: this.extractUsage(response.usage)
448
492
  };
449
493
  } catch (err) {
450
494
  throw this.wrapError(err);
@@ -460,7 +504,7 @@ var ClaudeProvider = class extends BaseProvider {
460
504
  const stream = this.client.messages.stream({
461
505
  model: request.model,
462
506
  messages,
463
- system: request.systemPrompt,
507
+ system: this.buildSystemParam(request.systemPrompt),
464
508
  max_tokens: request.maxTokens ?? 8192,
465
509
  temperature,
466
510
  thinking
@@ -493,20 +537,22 @@ var ClaudeProvider = class extends BaseProvider {
493
537
  }
494
538
  async chatWithTools(request, tools) {
495
539
  try {
496
- const anthropicTools = tools.map((t) => ({
497
- name: t.name,
498
- description: t.description,
499
- input_schema: {
500
- type: "object",
501
- properties: Object.fromEntries(
502
- Object.entries(t.parameters).map(([key, schema]) => [
503
- key,
504
- schemaToJsonSchema(schema)
505
- ])
506
- ),
507
- required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
508
- }
509
- }));
540
+ const anthropicTools = this.addToolsCacheControl(
541
+ tools.map((t) => ({
542
+ name: t.name,
543
+ description: t.description,
544
+ input_schema: {
545
+ type: "object",
546
+ properties: Object.fromEntries(
547
+ Object.entries(t.parameters).map(([key, schema]) => [
548
+ key,
549
+ schemaToJsonSchema(schema)
550
+ ])
551
+ ),
552
+ required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
553
+ }
554
+ }))
555
+ );
510
556
  const baseMessages = request.messages.filter((m) => m.role !== "system").map((m) => ({ role: m.role, content: this.contentToClaudeParts(m.content) }));
511
557
  const extraMessages = request._extraMessages ?? [];
512
558
  const allMessages = [...baseMessages, ...extraMessages];
@@ -515,15 +561,12 @@ var ClaudeProvider = class extends BaseProvider {
515
561
  model: request.model,
516
562
  messages: allMessages,
517
563
  tools: anthropicTools,
518
- system: request.systemPrompt,
564
+ system: this.buildSystemParam(request.systemPrompt),
519
565
  max_tokens: request.maxTokens ?? 8192,
520
566
  temperature,
521
567
  thinking
522
568
  });
523
- const usage = {
524
- inputTokens: response.usage.input_tokens,
525
- outputTokens: response.usage.output_tokens
526
- };
569
+ const usage = this.extractUsage(response.usage);
527
570
  const toolUseBlocks = response.content.filter(
528
571
  (b) => b.type === "tool_use"
529
572
  );
@@ -547,20 +590,22 @@ var ClaudeProvider = class extends BaseProvider {
547
590
  * 同时收集原始 content blocks 供 buildToolResultMessages 使用。
548
591
  */
549
592
  async *chatWithToolsStream(request, tools) {
550
- const anthropicTools = tools.map((t) => ({
551
- name: t.name,
552
- description: t.description,
553
- input_schema: {
554
- type: "object",
555
- properties: Object.fromEntries(
556
- Object.entries(t.parameters).map(([key, schema]) => [
557
- key,
558
- schemaToJsonSchema(schema)
559
- ])
560
- ),
561
- required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
562
- }
563
- }));
593
+ const anthropicTools = this.addToolsCacheControl(
594
+ tools.map((t) => ({
595
+ name: t.name,
596
+ description: t.description,
597
+ input_schema: {
598
+ type: "object",
599
+ properties: Object.fromEntries(
600
+ Object.entries(t.parameters).map(([key, schema]) => [
601
+ key,
602
+ schemaToJsonSchema(schema)
603
+ ])
604
+ ),
605
+ required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
606
+ }
607
+ }))
608
+ );
564
609
  const baseMessages = request.messages.filter((m) => m.role !== "system").map((m) => ({ role: m.role, content: this.contentToClaudeParts(m.content) }));
565
610
  const extraMessages = request._extraMessages ?? [];
566
611
  const allMessages = [...baseMessages, ...extraMessages];
@@ -572,7 +617,7 @@ var ClaudeProvider = class extends BaseProvider {
572
617
  model: request.model,
573
618
  messages: allMessages,
574
619
  tools: anthropicTools,
575
- system: request.systemPrompt,
620
+ system: this.buildSystemParam(request.systemPrompt),
576
621
  max_tokens: request.maxTokens ?? 8192,
577
622
  temperature,
578
623
  thinking
@@ -580,7 +625,13 @@ var ClaudeProvider = class extends BaseProvider {
580
625
  let currentBlockType = null;
581
626
  let currentToolIndex = 0;
582
627
  let currentBlockData = {};
628
+ let startUsage = null;
583
629
  for await (const event of stream) {
630
+ if (event.type === "message_start") {
631
+ const msgUsage = event.message?.usage;
632
+ if (msgUsage) startUsage = msgUsage;
633
+ continue;
634
+ }
584
635
  if (event.type === "content_block_start") {
585
636
  const block = event.content_block;
586
637
  currentBlockType = block.type;
@@ -641,15 +692,17 @@ var ClaudeProvider = class extends BaseProvider {
641
692
  currentBlockType = null;
642
693
  currentBlockData = {};
643
694
  } else if (event.type === "message_delta") {
644
- const usage = event.usage;
645
- if (usage) {
695
+ const deltaUsage = event.usage;
696
+ if (deltaUsage) {
646
697
  doneEmitted = true;
647
698
  yield {
648
699
  type: "done",
649
- usage: {
650
- inputTokens: usage.input_tokens ?? 0,
651
- outputTokens: usage.output_tokens ?? 0
652
- },
700
+ usage: this.extractUsage({
701
+ input_tokens: startUsage?.input_tokens ?? deltaUsage.input_tokens ?? 0,
702
+ output_tokens: deltaUsage.output_tokens ?? 0,
703
+ cache_creation_input_tokens: startUsage?.cache_creation_input_tokens,
704
+ cache_read_input_tokens: startUsage?.cache_read_input_tokens
705
+ }),
653
706
  rawContent: rawContentBlocks
654
707
  };
655
708
  }
@@ -1003,6 +1056,16 @@ Node.js does not automatically use system proxies. Try one of the following:
1003
1056
 
1004
1057
  // src/providers/openai-compatible.ts
1005
1058
  import OpenAI from "openai";
1059
+ function toUsage(u) {
1060
+ if (!u) return void 0;
1061
+ const cached = u.prompt_tokens_details?.cached_tokens ?? 0;
1062
+ const usage = {
1063
+ inputTokens: Math.max(0, u.prompt_tokens - cached),
1064
+ outputTokens: u.completion_tokens
1065
+ };
1066
+ if (cached > 0) usage.cacheReadTokens = cached;
1067
+ return usage;
1068
+ }
1006
1069
  var OpenAICompatibleProvider = class extends BaseProvider {
1007
1070
  client;
1008
1071
  defaultTimeout = 6e4;
@@ -1056,10 +1119,7 @@ var OpenAICompatibleProvider = class extends BaseProvider {
1056
1119
  return {
1057
1120
  content: firstChoice.message.content ?? "",
1058
1121
  model: response.model,
1059
- usage: response.usage ? {
1060
- inputTokens: response.usage.prompt_tokens,
1061
- outputTokens: response.usage.completion_tokens
1062
- } : void 0
1122
+ usage: toUsage(response.usage)
1063
1123
  };
1064
1124
  } catch (err) {
1065
1125
  throw this.wrapError(err);
@@ -1088,10 +1148,7 @@ var OpenAICompatibleProvider = class extends BaseProvider {
1088
1148
  yield {
1089
1149
  delta: "",
1090
1150
  done: true,
1091
- usage: {
1092
- inputTokens: chunk.usage.prompt_tokens,
1093
- outputTokens: chunk.usage.completion_tokens
1094
- }
1151
+ usage: toUsage(chunk.usage)
1095
1152
  };
1096
1153
  continue;
1097
1154
  }
@@ -1159,10 +1216,7 @@ var OpenAICompatibleProvider = class extends BaseProvider {
1159
1216
  return { content: "", usage: void 0 };
1160
1217
  }
1161
1218
  const message = firstChoice.message;
1162
- const usage = response.usage ? {
1163
- inputTokens: response.usage.prompt_tokens,
1164
- outputTokens: response.usage.completion_tokens
1165
- } : void 0;
1219
+ const usage = toUsage(response.usage);
1166
1220
  const reasoningContent = message.reasoning_content;
1167
1221
  if (message.tool_calls && message.tool_calls.length > 0) {
1168
1222
  const toolCalls = message.tool_calls.map((tc) => {
@@ -1275,10 +1329,7 @@ var OpenAICompatibleProvider = class extends BaseProvider {
1275
1329
  }
1276
1330
  yield {
1277
1331
  type: "done",
1278
- usage: {
1279
- inputTokens: chunk.usage.prompt_tokens,
1280
- outputTokens: chunk.usage.completion_tokens
1281
- }
1332
+ usage: toUsage(chunk.usage)
1282
1333
  };
1283
1334
  continue;
1284
1335
  }
@@ -2331,7 +2382,12 @@ var Session = class _Session {
2331
2382
  updated;
2332
2383
  messages = [];
2333
2384
  title;
2334
- tokenUsage = { inputTokens: 0, outputTokens: 0 };
2385
+ tokenUsage = {
2386
+ inputTokens: 0,
2387
+ outputTokens: 0,
2388
+ cacheCreationTokens: 0,
2389
+ cacheReadTokens: 0
2390
+ };
2335
2391
  checkpoints = [];
2336
2392
  constructor(id, provider, model) {
2337
2393
  this.id = id;
@@ -2359,11 +2415,18 @@ var Session = class _Session {
2359
2415
  addTokenUsage(usage) {
2360
2416
  this.tokenUsage.inputTokens += usage.inputTokens;
2361
2417
  this.tokenUsage.outputTokens += usage.outputTokens;
2418
+ this.tokenUsage.cacheCreationTokens += usage.cacheCreationTokens ?? 0;
2419
+ this.tokenUsage.cacheReadTokens += usage.cacheReadTokens ?? 0;
2362
2420
  }
2363
2421
  clear() {
2364
2422
  this.messages = [];
2365
2423
  this.title = void 0;
2366
- this.tokenUsage = { inputTokens: 0, outputTokens: 0 };
2424
+ this.tokenUsage = {
2425
+ inputTokens: 0,
2426
+ outputTokens: 0,
2427
+ cacheCreationTokens: 0,
2428
+ cacheReadTokens: 0
2429
+ };
2367
2430
  this.updated = /* @__PURE__ */ new Date();
2368
2431
  }
2369
2432
  /**
@@ -2491,7 +2554,9 @@ var Session = class _Session {
2491
2554
  if (tu && typeof tu === "object") {
2492
2555
  session.tokenUsage = {
2493
2556
  inputTokens: typeof tu.inputTokens === "number" ? tu.inputTokens : 0,
2494
- outputTokens: typeof tu.outputTokens === "number" ? tu.outputTokens : 0
2557
+ outputTokens: typeof tu.outputTokens === "number" ? tu.outputTokens : 0,
2558
+ cacheCreationTokens: typeof tu.cacheCreationTokens === "number" ? tu.cacheCreationTokens : 0,
2559
+ cacheReadTokens: typeof tu.cacheReadTokens === "number" ? tu.cacheReadTokens : 0
2495
2560
  };
2496
2561
  }
2497
2562
  if (Array.isArray(d.checkpoints)) {
@@ -3495,6 +3560,87 @@ async function setupProxy(configProxy) {
3495
3560
  }
3496
3561
  }
3497
3562
 
3563
+ // src/core/pricing.ts
3564
+ var PRICING_TABLE = {
3565
+ // ── Anthropic Claude ──────────────────────────────────────────
3566
+ "claude-opus-4-6": { input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.5 },
3567
+ "claude-opus-4-5": { input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.5 },
3568
+ "claude-sonnet-4-6": { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.3 },
3569
+ "claude-sonnet-4-5-20250929": { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.3 },
3570
+ "claude-haiku-4-5-20251001": { input: 1, output: 5, cacheWrite: 1.25, cacheRead: 0.1 },
3571
+ "claude-haiku-4-5": { input: 1, output: 5, cacheWrite: 1.25, cacheRead: 0.1 },
3572
+ // Legacy Claude 3.x families (prefix fallback handles minor date suffixes)
3573
+ "claude-3-5-sonnet": { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.3 },
3574
+ "claude-3-5-haiku": { input: 0.8, output: 4, cacheWrite: 1, cacheRead: 0.08 },
3575
+ "claude-3-opus": { input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.5 },
3576
+ // ── OpenAI ────────────────────────────────────────────────────
3577
+ "gpt-4o": { input: 2.5, output: 10, cacheRead: 1.25 },
3578
+ "gpt-4o-mini": { input: 0.15, output: 0.6, cacheRead: 0.075 },
3579
+ "gpt-4-turbo": { input: 10, output: 30 },
3580
+ "gpt-4": { input: 30, output: 60 },
3581
+ "gpt-4.1": { input: 2, output: 8, cacheRead: 0.5 },
3582
+ "gpt-4.1-mini": { input: 0.4, output: 1.6, cacheRead: 0.1 },
3583
+ "gpt-4.1-nano": { input: 0.1, output: 0.4, cacheRead: 0.025 },
3584
+ "o1": { input: 15, output: 60, cacheRead: 7.5 },
3585
+ "o1-mini": { input: 3, output: 12, cacheRead: 1.5 },
3586
+ "o3": { input: 10, output: 40, cacheRead: 2.5 },
3587
+ "o3-mini": { input: 1.1, output: 4.4, cacheRead: 0.55 },
3588
+ // ── Google Gemini ─────────────────────────────────────────────
3589
+ "gemini-2.5-pro": { input: 1.25, output: 10 },
3590
+ "gemini-2.5-flash": { input: 0.3, output: 2.5 },
3591
+ "gemini-2.0-flash": { input: 0.1, output: 0.4 },
3592
+ "gemini-1.5-pro": { input: 1.25, output: 5 },
3593
+ "gemini-1.5-flash": { input: 0.075, output: 0.3 },
3594
+ // ── DeepSeek ──────────────────────────────────────────────────
3595
+ "deepseek-chat": { input: 0.27, output: 1.1, cacheRead: 0.07 },
3596
+ "deepseek-reasoner": { input: 0.55, output: 2.19, cacheRead: 0.14 },
3597
+ "deepseek-v3": { input: 0.27, output: 1.1, cacheRead: 0.07 },
3598
+ // ── Moonshot Kimi ─────────────────────────────────────────────
3599
+ "moonshot-v1-8k": { input: 0.17, output: 0.17 },
3600
+ "moonshot-v1-32k": { input: 0.33, output: 0.33 },
3601
+ "moonshot-v1-128k": { input: 0.83, output: 0.83 },
3602
+ "kimi-k2": { input: 0.6, output: 2.5 },
3603
+ "kimi-latest": { input: 0.6, output: 2.5 },
3604
+ // ── Zhipu GLM ─────────────────────────────────────────────────
3605
+ "glm-4-plus": { input: 0.7, output: 0.7 },
3606
+ "glm-4": { input: 0.14, output: 0.14 },
3607
+ "glm-4-flash": { input: 0, output: 0 },
3608
+ "glm-4.5": { input: 0.29, output: 1.14 },
3609
+ "glm-4.6": { input: 0.6, output: 2.2 }
3610
+ // ── OpenRouter (pass-through — actual cost depends on underlying model) ──
3611
+ // Left empty; callers should resolve via underlying model ID.
3612
+ // ── Ollama (local, zero cost) ─────────────────────────────────
3613
+ // Handled via provider check below.
3614
+ };
3615
+ var FREE_PROVIDERS = /* @__PURE__ */ new Set(["ollama"]);
3616
+ function getPricing(provider, model) {
3617
+ if (FREE_PROVIDERS.has(provider.toLowerCase())) {
3618
+ return { input: 0, output: 0 };
3619
+ }
3620
+ const key = model.toLowerCase();
3621
+ if (PRICING_TABLE[key]) return PRICING_TABLE[key];
3622
+ const keys = Object.keys(PRICING_TABLE).sort((a, b) => b.length - a.length);
3623
+ for (const k of keys) {
3624
+ if (key.startsWith(k)) return PRICING_TABLE[k];
3625
+ }
3626
+ return null;
3627
+ }
3628
+ function computeCost(provider, model, usage) {
3629
+ const p = getPricing(provider, model);
3630
+ if (!p) return null;
3631
+ const input = usage.inputTokens * p.input;
3632
+ const output = usage.outputTokens * p.output;
3633
+ const cacheWrite = (usage.cacheCreationTokens ?? 0) * (p.cacheWrite ?? p.input);
3634
+ const cacheRead = (usage.cacheReadTokens ?? 0) * (p.cacheRead ?? p.input);
3635
+ return (input + output + cacheWrite + cacheRead) / 1e6;
3636
+ }
3637
+ function formatCost(amount) {
3638
+ if (amount === 0) return "$0.0000";
3639
+ if (amount < 0.01) return `$${amount.toFixed(4)}`;
3640
+ if (amount < 1) return `$${amount.toFixed(3)}`;
3641
+ return `$${amount.toFixed(2)}`;
3642
+ }
3643
+
3498
3644
  // src/repl/dev-state.ts
3499
3645
  import { existsSync as existsSync5, readFileSync as readFileSync4, writeFileSync as writeFileSync3, unlinkSync as unlinkSync2, mkdirSync as mkdirSync4 } from "fs";
3500
3646
  import { join as join5 } from "path";
@@ -3601,6 +3747,9 @@ export {
3601
3747
  getGitRoot,
3602
3748
  getGitContext,
3603
3749
  formatGitContextForPrompt,
3750
+ getPricing,
3751
+ computeCost,
3752
+ formatCost,
3604
3753
  parseSimpleYaml,
3605
3754
  SNAPSHOT_PROMPT,
3606
3755
  sessionHasMeaningfulContent,
@@ -8,7 +8,7 @@ import { platform } from "os";
8
8
  import chalk from "chalk";
9
9
 
10
10
  // src/core/constants.ts
11
- var VERSION = "0.4.54";
11
+ var VERSION = "0.4.55";
12
12
  var APP_NAME = "ai-cli";
13
13
  var CONFIG_DIR_NAME = ".aicli";
14
14
  var CONFIG_FILE_NAME = "config.json";
@@ -10,7 +10,7 @@ import {
10
10
  SUBAGENT_DEFAULT_MAX_ROUNDS,
11
11
  SUBAGENT_MAX_ROUNDS_LIMIT,
12
12
  runTestsTool
13
- } from "./chunk-NP5KZVP6.js";
13
+ } from "./chunk-W7QVBFIJ.js";
14
14
 
15
15
  // src/tools/builtin/bash.ts
16
16
  import { execSync } from "child_process";
@@ -385,7 +385,7 @@ ${content}`);
385
385
  }
386
386
  }
387
387
  async function runTaskMode(config, providers, configManager, topic) {
388
- const { TaskOrchestrator } = await import("./task-orchestrator-TSY7CJE6.js");
388
+ const { TaskOrchestrator } = await import("./task-orchestrator-MWO6A4KQ.js");
389
389
  const orchestrator = new TaskOrchestrator(config, providers, configManager);
390
390
  let interrupted = false;
391
391
  const onSigint = () => {
package/dist/index.js CHANGED
@@ -11,20 +11,23 @@ import {
11
11
  buildPhantomCorrectionMessage,
12
12
  buildWriteRoundReminder,
13
13
  clearDevState,
14
+ computeCost,
14
15
  detectsHallucinatedFileOp,
15
16
  extractWrittenFilePaths,
16
17
  findPhantomClaims,
18
+ formatCost,
17
19
  formatGitContextForPrompt,
18
20
  getContentText,
19
21
  getGitContext,
20
22
  getGitRoot,
23
+ getPricing,
21
24
  hadPreviousWriteToolCalls,
22
25
  loadDevState,
23
26
  parseSimpleYaml,
24
27
  saveDevState,
25
28
  sessionHasMeaningfulContent,
26
29
  setupProxy
27
- } from "./chunk-6FYFVPVE.js";
30
+ } from "./chunk-JL5NK6AR.js";
28
31
  import {
29
32
  ToolExecutor,
30
33
  ToolRegistry,
@@ -38,7 +41,7 @@ import {
38
41
  spawnAgentContext,
39
42
  theme,
40
43
  undoStack
41
- } from "./chunk-TAR67QTH.js";
44
+ } from "./chunk-YQEIQJ6K.js";
42
45
  import {
43
46
  fileCheckpoints
44
47
  } from "./chunk-4BKXL7SM.js";
@@ -63,7 +66,7 @@ import {
63
66
  SKILLS_DIR_NAME,
64
67
  VERSION,
65
68
  buildUserIdentityPrompt
66
- } from "./chunk-NP5KZVP6.js";
69
+ } from "./chunk-W7QVBFIJ.js";
67
70
 
68
71
  // src/index.ts
69
72
  import { program } from "commander";
@@ -487,8 +490,12 @@ Error${typeName}: ${lines.join("\n")}
487
490
  renderUsage(usage, sessionTotal) {
488
491
  const total = usage.inputTokens + usage.outputTokens;
489
492
  let line = theme.dim("\u{1F4CA} ") + theme.dim(`in ${usage.inputTokens.toLocaleString()}`) + theme.dim(" + ") + theme.dim(`out ${usage.outputTokens.toLocaleString()}`) + theme.dim(` = ${total.toLocaleString()} tokens`);
493
+ const cacheRead = usage.cacheReadTokens ?? 0;
494
+ if (cacheRead > 0) {
495
+ line += theme.dim(` \u2502 cache: ${cacheRead.toLocaleString()}`);
496
+ }
490
497
  if (sessionTotal) {
491
- const sessionSum = sessionTotal.inputTokens + sessionTotal.outputTokens;
498
+ const sessionSum = sessionTotal.inputTokens + sessionTotal.outputTokens + (sessionTotal.cacheCreationTokens ?? 0) + (sessionTotal.cacheReadTokens ?? 0);
492
499
  line += theme.dim(` \u2502 session total: ${sessionSum.toLocaleString()}`);
493
500
  }
494
501
  process.stdout.write(line + "\n\n");
@@ -1217,11 +1224,18 @@ function createDefaultCommands() {
1217
1224
  if (sys) {
1218
1225
  console.log(` System : ${sys.slice(0, 60)}...`);
1219
1226
  }
1220
- const totalTokens = tokenUsage.inputTokens + tokenUsage.outputTokens;
1227
+ const cacheRead = tokenUsage.cacheReadTokens ?? 0;
1228
+ const cacheCreate = tokenUsage.cacheCreationTokens ?? 0;
1229
+ const totalTokens = tokenUsage.inputTokens + tokenUsage.outputTokens + cacheRead + cacheCreate;
1221
1230
  if (totalTokens > 0) {
1231
+ const cacheSuffix = cacheRead > 0 || cacheCreate > 0 ? ` [cache: +${cacheCreate.toLocaleString()} / -${cacheRead.toLocaleString()}]` : "";
1222
1232
  console.log(
1223
- ` Tokens : in ${tokenUsage.inputTokens.toLocaleString()} + out ${tokenUsage.outputTokens.toLocaleString()} = ${totalTokens.toLocaleString()} (session total)`
1233
+ ` Tokens : in ${tokenUsage.inputTokens.toLocaleString()} + out ${tokenUsage.outputTokens.toLocaleString()} = ${totalTokens.toLocaleString()}${cacheSuffix}`
1224
1234
  );
1235
+ const cost = computeCost(ctx.getCurrentProvider(), ctx.getCurrentModel(), tokenUsage);
1236
+ if (cost != null) {
1237
+ console.log(` Cost : ${formatCost(cost)} (session total)`);
1238
+ }
1225
1239
  }
1226
1240
  const ctxWindowSize = ctx.getContextWindowSize();
1227
1241
  if (ctxWindowSize > 0) {
@@ -1823,7 +1837,7 @@ ${hint}` : "")
1823
1837
  },
1824
1838
  {
1825
1839
  name: "cost",
1826
- description: "Show session token usage summary",
1840
+ description: "Show session token usage, prompt-cache hits, and USD cost",
1827
1841
  usage: "/cost [reset]",
1828
1842
  execute(args, ctx) {
1829
1843
  const sub = args[0]?.toLowerCase();
@@ -1832,24 +1846,65 @@ ${hint}` : "")
1832
1846
  ctx.renderer.printSuccess("Session token counters reset.");
1833
1847
  return;
1834
1848
  }
1835
- const usage = ctx.getSessionTokenUsage();
1836
- const totalTokens = usage.inputTokens + usage.outputTokens;
1849
+ const session = ctx.sessions.current;
1850
+ const usage = session?.tokenUsage ?? {
1851
+ inputTokens: 0,
1852
+ outputTokens: 0,
1853
+ cacheCreationTokens: 0,
1854
+ cacheReadTokens: 0
1855
+ };
1856
+ const cacheCreate = usage.cacheCreationTokens ?? 0;
1857
+ const cacheRead = usage.cacheReadTokens ?? 0;
1858
+ const totalTokens = usage.inputTokens + usage.outputTokens + cacheCreate + cacheRead;
1837
1859
  if (totalTokens === 0) {
1838
1860
  ctx.renderer.printInfo("No token usage recorded this session.");
1839
1861
  return;
1840
1862
  }
1863
+ const provider = ctx.getCurrentProvider();
1864
+ const model = ctx.getCurrentModel();
1865
+ const cost = computeCost(provider, model, usage);
1866
+ const pricing = getPricing(provider, model);
1867
+ let savings = null;
1868
+ if (cost != null && pricing && cacheRead > 0) {
1869
+ const costWithoutCache = computeCost(provider, model, {
1870
+ inputTokens: usage.inputTokens + cacheRead,
1871
+ outputTokens: usage.outputTokens,
1872
+ cacheCreationTokens: cacheCreate,
1873
+ cacheReadTokens: 0
1874
+ });
1875
+ if (costWithoutCache != null) savings = costWithoutCache - cost;
1876
+ }
1841
1877
  console.log();
1842
- console.log(theme.heading(" \u{1F4CA} Session Token Usage"));
1843
- console.log(theme.dim(" " + "\u2500".repeat(40)));
1844
- console.log(theme.dim(" Input tokens : ") + chalk2.white(usage.inputTokens.toLocaleString()));
1845
- console.log(theme.dim(" Output tokens : ") + chalk2.white(usage.outputTokens.toLocaleString()));
1846
- console.log(theme.dim(" Total tokens : ") + chalk2.bold.white(totalTokens.toLocaleString()));
1847
- console.log(theme.dim(" " + "\u2500".repeat(40)));
1848
- const session = ctx.sessions.current;
1878
+ console.log(theme.heading(" \u{1F4B0} Session Cost & Token Usage"));
1879
+ console.log(theme.dim(" " + "\u2500".repeat(48)));
1880
+ console.log(theme.dim(" Input (uncached) : ") + chalk2.white(usage.inputTokens.toLocaleString().padStart(12)));
1881
+ console.log(theme.dim(" Output : ") + chalk2.white(usage.outputTokens.toLocaleString().padStart(12)));
1882
+ if (cacheCreate > 0) {
1883
+ console.log(theme.dim(" Cache write : ") + chalk2.yellow(cacheCreate.toLocaleString().padStart(12)));
1884
+ }
1885
+ if (cacheRead > 0) {
1886
+ const pct = Math.round(cacheRead / (cacheRead + usage.inputTokens) * 100);
1887
+ console.log(
1888
+ theme.dim(" Cache read : ") + chalk2.green(cacheRead.toLocaleString().padStart(12)) + theme.dim(` (${pct}% hit rate)`)
1889
+ );
1890
+ }
1891
+ console.log(theme.dim(" Total tokens : ") + chalk2.bold.white(totalTokens.toLocaleString().padStart(12)));
1892
+ console.log(theme.dim(" " + "\u2500".repeat(48)));
1893
+ if (cost != null) {
1894
+ console.log(theme.dim(" Cost : ") + chalk2.bold.cyan(formatCost(cost).padStart(12)));
1895
+ if (savings != null && savings > 0) {
1896
+ console.log(
1897
+ theme.dim(" Cache savings : ") + chalk2.green(`-${formatCost(savings)}`.padStart(12)) + theme.dim(` (vs no cache)`)
1898
+ );
1899
+ }
1900
+ } else {
1901
+ console.log(theme.dim(" Cost : ") + theme.dim(" \u2014 (pricing unknown)"));
1902
+ }
1903
+ console.log(theme.dim(" " + "\u2500".repeat(48)));
1849
1904
  if (session) {
1850
- console.log(theme.dim(" Provider : ") + theme.dim(ctx.getCurrentProvider()));
1851
- console.log(theme.dim(" Model : ") + theme.dim(ctx.getCurrentModel()));
1852
- console.log(theme.dim(" Messages : ") + theme.dim(String(session.messages.length)));
1905
+ console.log(theme.dim(" Provider : ") + theme.dim(provider));
1906
+ console.log(theme.dim(" Model : ") + theme.dim(model));
1907
+ console.log(theme.dim(" Messages : ") + theme.dim(String(session.messages.length)));
1853
1908
  }
1854
1909
  console.log();
1855
1910
  }
@@ -2106,7 +2161,7 @@ ${hint}` : "")
2106
2161
  usage: "/test [command|filter]",
2107
2162
  async execute(args, ctx) {
2108
2163
  try {
2109
- const { executeTests } = await import("./run-tests-P53FNUJY.js");
2164
+ const { executeTests } = await import("./run-tests-X4PCLXA2.js");
2110
2165
  const argStr = args.join(" ").trim();
2111
2166
  let testArgs = {};
2112
2167
  if (argStr) {
@@ -3344,7 +3399,14 @@ var Repl = class {
3344
3399
  /** 当前加载的层级上下文(全局/项目/子目录) */
3345
3400
  contextLayers = [];
3346
3401
  /** 本次会话累计 token 用量 */
3347
- sessionTokenUsage = { inputTokens: 0, outputTokens: 0 };
3402
+ sessionTokenUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
3403
+ /** Fold a single-request TokenUsage (with optional cache fields) into sessionTokenUsage. */
3404
+ addSessionUsage(u) {
3405
+ this.sessionTokenUsage.inputTokens += u.inputTokens;
3406
+ this.sessionTokenUsage.outputTokens += u.outputTokens;
3407
+ this.sessionTokenUsage.cacheCreationTokens += u.cacheCreationTokens ?? 0;
3408
+ this.sessionTokenUsage.cacheReadTokens += u.cacheReadTokens ?? 0;
3409
+ }
3348
3410
  /** 启动时检测到的 Git 分支(无 git 仓库时为 null) */
3349
3411
  gitBranch = null;
3350
3412
  /** MCP 多服务器管理器(无 MCP 配置时为 null) */
@@ -4607,8 +4669,7 @@ Session '${this.resumeSessionId}' not found.
4607
4669
  session.addMessage({ role: "assistant", content, timestamp: /* @__PURE__ */ new Date() });
4608
4670
  this.events.emit("message.after", { content });
4609
4671
  if (usage) {
4610
- this.sessionTokenUsage.inputTokens += usage.inputTokens;
4611
- this.sessionTokenUsage.outputTokens += usage.outputTokens;
4672
+ this.addSessionUsage(usage);
4612
4673
  session.addTokenUsage(usage);
4613
4674
  if (showTokens && !tokensShown) {
4614
4675
  this.renderer.renderUsage(usage, this.sessionTokenUsage);
@@ -4637,8 +4698,7 @@ Session '${this.resumeSessionId}' not found.
4637
4698
  session.addMessage({ role: "assistant", content: response.content, timestamp: /* @__PURE__ */ new Date() });
4638
4699
  this.events.emit("message.after", { content: response.content });
4639
4700
  if (response.usage) {
4640
- this.sessionTokenUsage.inputTokens += response.usage.inputTokens;
4641
- this.sessionTokenUsage.outputTokens += response.usage.outputTokens;
4701
+ this.addSessionUsage(response.usage);
4642
4702
  session.addTokenUsage(response.usage);
4643
4703
  if (this.shouldShowTokens()) {
4644
4704
  this.renderer.renderUsage(response.usage, this.sessionTokenUsage);
@@ -4797,7 +4857,7 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
4797
4857
  const modelParams = this.getModelParams();
4798
4858
  const useStreaming = this.config.get("ui").streaming;
4799
4859
  const spinner = this.renderer.showSpinner("Thinking...");
4800
- const roundUsage = { inputTokens: 0, outputTokens: 0 };
4860
+ const roundUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
4801
4861
  const supportsStreamingTools = useStreaming && typeof provider.chatWithToolsStream === "function";
4802
4862
  let consecutiveFreeRounds = 0;
4803
4863
  let lastToolCallSignature = "";
@@ -4904,6 +4964,8 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
4904
4964
  if (result.usage) {
4905
4965
  roundUsage.inputTokens += result.usage.inputTokens;
4906
4966
  roundUsage.outputTokens += result.usage.outputTokens;
4967
+ roundUsage.cacheCreationTokens += result.usage.cacheCreationTokens ?? 0;
4968
+ roundUsage.cacheReadTokens += result.usage.cacheReadTokens ?? 0;
4907
4969
  }
4908
4970
  if ("content" in result) {
4909
4971
  const hasWriteTools = toolDefs.some((t) => t.name === "write_file" || t.name === "edit_file");
@@ -4954,8 +5016,7 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
4954
5016
  });
4955
5017
  this.events.emit("message.after", { content: finalContent });
4956
5018
  if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
4957
- this.sessionTokenUsage.inputTokens += roundUsage.inputTokens;
4958
- this.sessionTokenUsage.outputTokens += roundUsage.outputTokens;
5019
+ this.addSessionUsage(roundUsage);
4959
5020
  session.addTokenUsage(roundUsage);
4960
5021
  if (this.shouldShowTokens()) {
4961
5022
  this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
@@ -4993,6 +5054,8 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
4993
5054
  if (genUsage) {
4994
5055
  roundUsage.inputTokens += genUsage.inputTokens;
4995
5056
  roundUsage.outputTokens += genUsage.outputTokens;
5057
+ roundUsage.cacheCreationTokens += genUsage.cacheCreationTokens ?? 0;
5058
+ roundUsage.cacheReadTokens += genUsage.cacheReadTokens ?? 0;
4996
5059
  }
4997
5060
  session.addMessage({ role: "assistant", content: genContent, timestamp: /* @__PURE__ */ new Date() });
4998
5061
  this.events.emit("message.after", { content: genContent });
@@ -5007,8 +5070,7 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
5007
5070
  const newMsgs2 = provider.buildToolResultMessages(result.toolCalls, syntheticResults, reasoningContent2);
5008
5071
  extraMessages.push(...newMsgs2);
5009
5072
  if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
5010
- this.sessionTokenUsage.inputTokens += roundUsage.inputTokens;
5011
- this.sessionTokenUsage.outputTokens += roundUsage.outputTokens;
5073
+ this.addSessionUsage(roundUsage);
5012
5074
  session.addTokenUsage(roundUsage);
5013
5075
  if (teeShowTokens && !teeTokShown) {
5014
5076
  this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
@@ -5204,6 +5266,8 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
5204
5266
  if (summaryResult.usage) {
5205
5267
  roundUsage.inputTokens += summaryResult.usage.inputTokens;
5206
5268
  roundUsage.outputTokens += summaryResult.usage.outputTokens;
5269
+ roundUsage.cacheCreationTokens += summaryResult.usage.cacheCreationTokens ?? 0;
5270
+ roundUsage.cacheReadTokens += summaryResult.usage.cacheReadTokens ?? 0;
5207
5271
  }
5208
5272
  } else {
5209
5273
  this.renderer.renderError(
@@ -5218,8 +5282,7 @@ Tip: You can continue the conversation by asking the AI to proceed.`
5218
5282
  );
5219
5283
  }
5220
5284
  if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
5221
- this.sessionTokenUsage.inputTokens += roundUsage.inputTokens;
5222
- this.sessionTokenUsage.outputTokens += roundUsage.outputTokens;
5285
+ this.addSessionUsage(roundUsage);
5223
5286
  session.addTokenUsage(roundUsage);
5224
5287
  if (this.shouldShowTokens()) {
5225
5288
  this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
@@ -5316,7 +5379,12 @@ Tip: You can continue the conversation by asking the AI to proceed.`
5316
5379
  },
5317
5380
  getSessionTokenUsage: () => ({ ...this.sessionTokenUsage }),
5318
5381
  resetSessionTokenUsage: () => {
5319
- this.sessionTokenUsage = { inputTokens: 0, outputTokens: 0 };
5382
+ this.sessionTokenUsage = {
5383
+ inputTokens: 0,
5384
+ outputTokens: 0,
5385
+ cacheCreationTokens: 0,
5386
+ cacheReadTokens: 0
5387
+ };
5320
5388
  },
5321
5389
  getGitBranch: () => this.gitBranch,
5322
5390
  getLastResponse: () => lastResponseStore.content,
@@ -5493,7 +5561,7 @@ program.command("web").description("Start Web UI server with browser-based chat
5493
5561
  console.error("Error: Invalid port number. Must be between 1 and 65535.");
5494
5562
  process.exit(1);
5495
5563
  }
5496
- const { startWebServer } = await import("./server-BQHIMEBH.js");
5564
+ const { startWebServer } = await import("./server-YPAZWGUE.js");
5497
5565
  await startWebServer({ port, host: options.host });
5498
5566
  });
5499
5567
  program.command("user [action] [username]").description("Manage Web UI users (list | create <name> | delete <name> | reset-password <name> | migrate <name>)").action(async (action, username) => {
@@ -5726,7 +5794,7 @@ program.command("hub [topic]").description("Start multi-agent hub (discuss / bra
5726
5794
  }),
5727
5795
  config.get("customProviders")
5728
5796
  );
5729
- const { startHub } = await import("./hub-6V54V4O3.js");
5797
+ const { startHub } = await import("./hub-AUWP4SWJ.js");
5730
5798
  await startHub(
5731
5799
  {
5732
5800
  topic: topic ?? "",
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  executeTests,
3
3
  runTestsTool
4
- } from "./chunk-FOFQAEU6.js";
4
+ } from "./chunk-DJ342VFS.js";
5
5
  export {
6
6
  executeTests,
7
7
  runTestsTool
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  executeTests,
4
4
  runTestsTool
5
- } from "./chunk-NP5KZVP6.js";
5
+ } from "./chunk-W7QVBFIJ.js";
6
6
  export {
7
7
  executeTests,
8
8
  runTestsTool
@@ -7,7 +7,9 @@ import {
7
7
  SessionManager,
8
8
  SkillManager,
9
9
  TOOL_CALL_REMINDER,
10
+ computeCost,
10
11
  detectsHallucinatedFileOp,
12
+ formatCost,
11
13
  formatGitContextForPrompt,
12
14
  getContentText,
13
15
  getGitContext,
@@ -15,7 +17,7 @@ import {
15
17
  hadPreviousWriteToolCalls,
16
18
  loadDevState,
17
19
  setupProxy
18
- } from "./chunk-6FYFVPVE.js";
20
+ } from "./chunk-JL5NK6AR.js";
19
21
  import {
20
22
  AuthManager
21
23
  } from "./chunk-BYNY5JPB.js";
@@ -34,7 +36,7 @@ import {
34
36
  spawnAgentContext,
35
37
  truncateOutput,
36
38
  undoStack
37
- } from "./chunk-TAR67QTH.js";
39
+ } from "./chunk-YQEIQJ6K.js";
38
40
  import "./chunk-4BKXL7SM.js";
39
41
  import {
40
42
  AGENTIC_BEHAVIOR_GUIDELINE,
@@ -54,7 +56,7 @@ import {
54
56
  SKILLS_DIR_NAME,
55
57
  VERSION,
56
58
  buildUserIdentityPrompt
57
- } from "./chunk-NP5KZVP6.js";
59
+ } from "./chunk-W7QVBFIJ.js";
58
60
 
59
61
  // src/web/server.ts
60
62
  import express from "express";
@@ -483,7 +485,17 @@ var SessionHandler = class _SessionHandler {
483
485
  currentModel;
484
486
  planMode = false;
485
487
  runtimeThinking = null;
486
- sessionTokenUsage = { inputTokens: 0, outputTokens: 0 };
488
+ sessionTokenUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
489
+ /** Accumulate a TokenUsage (with optional cache fields) into sessionTokenUsage. */
490
+ addWebSessionUsage(u) {
491
+ this.sessionTokenUsage.inputTokens += u.inputTokens;
492
+ this.sessionTokenUsage.outputTokens += u.outputTokens;
493
+ this.sessionTokenUsage.cacheCreationTokens += u.cacheCreationTokens ?? 0;
494
+ this.sessionTokenUsage.cacheReadTokens += u.cacheReadTokens ?? 0;
495
+ }
496
+ resetWebSessionUsage() {
497
+ this.sessionTokenUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
498
+ }
487
499
  abortController = null;
488
500
  userInterjection = null;
489
501
  processing = false;
@@ -547,6 +559,7 @@ var SessionHandler = class _SessionHandler {
547
559
  displayName: p.info.displayName,
548
560
  models: p.info.models.map((m) => ({ id: m.id, name: m.displayName ?? m.id }))
549
561
  }));
562
+ const costUsd = computeCost(this.currentProvider, this.currentModel, this.sessionTokenUsage);
550
563
  this.send({
551
564
  type: "status",
552
565
  provider: this.currentProvider,
@@ -557,6 +570,7 @@ var SessionHandler = class _SessionHandler {
557
570
  planMode: this.planMode,
558
571
  thinkingMode: this.runtimeThinking ?? false,
559
572
  tokenUsage: { ...this.sessionTokenUsage },
573
+ costUsd,
560
574
  providers: providerList
561
575
  });
562
576
  }
@@ -724,8 +738,7 @@ var SessionHandler = class _SessionHandler {
724
738
  if (chunk.done) {
725
739
  this.send({ type: "response_done", content: fullContent, usage: chunk.usage });
726
740
  if (chunk.usage) {
727
- this.sessionTokenUsage.inputTokens += chunk.usage.inputTokens;
728
- this.sessionTokenUsage.outputTokens += chunk.usage.outputTokens;
741
+ this.addWebSessionUsage(chunk.usage);
729
742
  session.addTokenUsage(chunk.usage);
730
743
  }
731
744
  break;
@@ -759,7 +772,7 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
759
772
  - When remaining rounds are low, focus on completing the current task and summarizing.`;
760
773
  const systemPrompt = baseSystemPrompt + roundBudgetHint;
761
774
  const modelParams = this.getModelParams();
762
- const roundUsage = { inputTokens: 0, outputTokens: 0 };
775
+ const roundUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
763
776
  const supportsStreamingTools = typeof provider.chatWithToolsStream === "function";
764
777
  let consecutiveFreeRounds = 0;
765
778
  const warnNoteAt = Math.max(10, Math.floor(maxToolRounds * 0.2));
@@ -829,6 +842,8 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
829
842
  if (result.usage) {
830
843
  roundUsage.inputTokens += result.usage.inputTokens;
831
844
  roundUsage.outputTokens += result.usage.outputTokens;
845
+ roundUsage.cacheCreationTokens += result.usage.cacheCreationTokens ?? 0;
846
+ roundUsage.cacheReadTokens += result.usage.cacheReadTokens ?? 0;
832
847
  }
833
848
  if (result.content && !result.toolCalls) {
834
849
  const hasWriteTools = toolDefs.some((t) => t.name === "write_file" || t.name === "edit_file");
@@ -843,8 +858,7 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
843
858
  }
844
859
  this.send({ type: "response_done", content: result.content, usage: roundUsage });
845
860
  session.addMessage({ role: "assistant", content: result.content, timestamp: /* @__PURE__ */ new Date() });
846
- this.sessionTokenUsage.inputTokens += roundUsage.inputTokens;
847
- this.sessionTokenUsage.outputTokens += roundUsage.outputTokens;
861
+ this.addWebSessionUsage(roundUsage);
848
862
  session.addTokenUsage(roundUsage);
849
863
  return;
850
864
  }
@@ -917,8 +931,7 @@ ${summaryResult.content}`,
917
931
  message: `Reached maximum tool call rounds (${maxToolRounds}). You can continue by asking the AI to proceed.`
918
932
  });
919
933
  }
920
- this.sessionTokenUsage.inputTokens += roundUsage.inputTokens;
921
- this.sessionTokenUsage.outputTokens += roundUsage.outputTokens;
934
+ this.addWebSessionUsage(roundUsage);
922
935
  session.addTokenUsage(roundUsage);
923
936
  } catch (err) {
924
937
  if (err.name === "AbortError") {
@@ -1034,7 +1047,7 @@ ${summaryResult.content}`,
1034
1047
  case "clear":
1035
1048
  this.saveIfNeeded();
1036
1049
  this.sessions.createSession(this.currentProvider, this.currentModel);
1037
- this.sessionTokenUsage = { inputTokens: 0, outputTokens: 0 };
1050
+ this.resetWebSessionUsage();
1038
1051
  this.send({ type: "info", message: "Conversation cleared." });
1039
1052
  this.sendStatus();
1040
1053
  this.sendSessionList();
@@ -1068,12 +1081,19 @@ ${summaryResult.content}`,
1068
1081
  }
1069
1082
  case "status": {
1070
1083
  const session = this.sessions.current;
1084
+ const cacheRead = this.sessionTokenUsage.cacheReadTokens;
1085
+ const cacheCreate = this.sessionTokenUsage.cacheCreationTokens;
1086
+ const cost = computeCost(this.currentProvider, this.currentModel, this.sessionTokenUsage);
1087
+ const cacheLine = cacheRead > 0 || cacheCreate > 0 ? `
1088
+ Cache: write=${cacheCreate} read=${cacheRead}` : "";
1089
+ const costLine = cost != null ? `
1090
+ Cost: ${formatCost(cost)}` : "";
1071
1091
  this.send({
1072
1092
  type: "info",
1073
1093
  message: `Provider: ${this.currentProvider}
1074
1094
  Model: ${this.currentModel}
1075
1095
  Session: ${session?.id ?? "none"} (${session?.messages.length ?? 0} messages)
1076
- Tokens: in=${this.sessionTokenUsage.inputTokens} out=${this.sessionTokenUsage.outputTokens}`
1096
+ Tokens: in=${this.sessionTokenUsage.inputTokens} out=${this.sessionTokenUsage.outputTokens}${cacheLine}${costLine}`
1077
1097
  });
1078
1098
  break;
1079
1099
  }
@@ -1083,7 +1103,7 @@ Tokens: in=${this.sessionTokenUsage.inputTokens} out=${this.sessionTokenUsage.ou
1083
1103
  this.saveIfNeeded();
1084
1104
  const created = this.sessions.createSession(this.currentProvider, this.currentModel);
1085
1105
  this.unsavedSessions.set(created.id, created);
1086
- this.sessionTokenUsage = { inputTokens: 0, outputTokens: 0 };
1106
+ this.resetWebSessionUsage();
1087
1107
  this.send({ type: "info", message: "New session created." });
1088
1108
  this.sendStatus();
1089
1109
  this.sendSessionList();
@@ -1094,7 +1114,7 @@ Tokens: in=${this.sessionTokenUsage.inputTokens} out=${this.sessionTokenUsage.ou
1094
1114
  const cached = cachedExact ?? [...this.unsavedSessions.values()].find((s) => s.id.startsWith(targetId));
1095
1115
  if (cached) {
1096
1116
  this.sessions.setCurrent(cached);
1097
- this.sessionTokenUsage = { inputTokens: 0, outputTokens: 0 };
1117
+ this.resetWebSessionUsage();
1098
1118
  this.send({
1099
1119
  type: "info",
1100
1120
  message: `Loaded session: ${cached.id.slice(0, 8)} "${cached.title ?? ""}" (${cached.messages.length} messages)`
@@ -1108,7 +1128,7 @@ Tokens: in=${this.sessionTokenUsage.inputTokens} out=${this.sessionTokenUsage.ou
1108
1128
  const found = list.find((s) => s.id.startsWith(targetId));
1109
1129
  if (found) {
1110
1130
  this.sessions.loadSession(found.id);
1111
- this.sessionTokenUsage = { inputTokens: 0, outputTokens: 0 };
1131
+ this.resetWebSessionUsage();
1112
1132
  this.send({ type: "info", message: `Loaded session: ${found.id.slice(0, 8)} "${found.title ?? ""}" (${found.messageCount} messages)` });
1113
1133
  this.sendSessionMessages();
1114
1134
  this.sendStatus();
@@ -1116,7 +1136,7 @@ Tokens: in=${this.sessionTokenUsage.inputTokens} out=${this.sessionTokenUsage.ou
1116
1136
  } else {
1117
1137
  const recreated = this.sessions.createSession(this.currentProvider, this.currentModel);
1118
1138
  this.unsavedSessions.set(recreated.id, recreated);
1119
- this.sessionTokenUsage = { inputTokens: 0, outputTokens: 0 };
1139
+ this.resetWebSessionUsage();
1120
1140
  this.send({
1121
1141
  type: "info",
1122
1142
  message: `Previous session (${targetId.slice(0, 8)}) is no longer available \u2014 started a new one.`
@@ -1256,16 +1276,41 @@ Tokens: in=${this.sessionTokenUsage.inputTokens} out=${this.sessionTokenUsage.ou
1256
1276
  });
1257
1277
  break;
1258
1278
  case "cost": {
1259
- const total = this.sessionTokenUsage.inputTokens + this.sessionTokenUsage.outputTokens;
1260
- this.send({
1261
- type: "info",
1262
- message: `\u{1F4CA} Token Usage
1263
- Provider: ${this.currentProvider}
1264
- Model: ${this.currentModel}
1265
- Input: ${this.sessionTokenUsage.inputTokens.toLocaleString()}
1266
- Output: ${this.sessionTokenUsage.outputTokens.toLocaleString()}
1267
- Total: ${total.toLocaleString()}`
1268
- });
1279
+ const u = this.sessionTokenUsage;
1280
+ const total = u.inputTokens + u.outputTokens + u.cacheCreationTokens + u.cacheReadTokens;
1281
+ const cost = computeCost(this.currentProvider, this.currentModel, u);
1282
+ let savings = null;
1283
+ if (cost != null && u.cacheReadTokens > 0) {
1284
+ const withoutCache = computeCost(this.currentProvider, this.currentModel, {
1285
+ inputTokens: u.inputTokens + u.cacheReadTokens,
1286
+ outputTokens: u.outputTokens,
1287
+ cacheCreationTokens: u.cacheCreationTokens,
1288
+ cacheReadTokens: 0
1289
+ });
1290
+ if (withoutCache != null) savings = withoutCache - cost;
1291
+ }
1292
+ const lines = [
1293
+ "\u{1F4B0} Session Cost & Token Usage",
1294
+ ` Provider : ${this.currentProvider}`,
1295
+ ` Model : ${this.currentModel}`,
1296
+ ` Input (uncached): ${u.inputTokens.toLocaleString()}`,
1297
+ ` Output : ${u.outputTokens.toLocaleString()}`
1298
+ ];
1299
+ if (u.cacheCreationTokens > 0) lines.push(` Cache write : ${u.cacheCreationTokens.toLocaleString()}`);
1300
+ if (u.cacheReadTokens > 0) {
1301
+ const pct = Math.round(u.cacheReadTokens / (u.cacheReadTokens + u.inputTokens) * 100);
1302
+ lines.push(` Cache read : ${u.cacheReadTokens.toLocaleString()} (${pct}% hit rate)`);
1303
+ }
1304
+ lines.push(` Total tokens : ${total.toLocaleString()}`);
1305
+ if (cost != null) {
1306
+ lines.push(` Cost : ${formatCost(cost)}`);
1307
+ if (savings != null && savings > 0) {
1308
+ lines.push(` Cache savings : -${formatCost(savings)} (vs no cache)`);
1309
+ }
1310
+ } else {
1311
+ lines.push(` Cost : \u2014 (pricing unknown for this model)`);
1312
+ }
1313
+ this.send({ type: "info", message: lines.join("\n") });
1269
1314
  break;
1270
1315
  }
1271
1316
  case "tools":
@@ -1691,7 +1736,7 @@ ${undoResults.map((r) => ` \u2022 ${r}`).join("\n")}` });
1691
1736
  case "test": {
1692
1737
  this.send({ type: "info", message: "\u{1F9EA} Running tests..." });
1693
1738
  try {
1694
- const { executeTests } = await import("./run-tests-P53FNUJY.js");
1739
+ const { executeTests } = await import("./run-tests-X4PCLXA2.js");
1695
1740
  const argStr = args.join(" ").trim();
1696
1741
  let testArgs = {};
1697
1742
  if (argStr) {
@@ -4,11 +4,11 @@ import {
4
4
  getDangerLevel,
5
5
  googleSearchContext,
6
6
  truncateOutput
7
- } from "./chunk-TAR67QTH.js";
7
+ } from "./chunk-YQEIQJ6K.js";
8
8
  import "./chunk-4BKXL7SM.js";
9
9
  import {
10
10
  SUBAGENT_ALLOWED_TOOLS
11
- } from "./chunk-NP5KZVP6.js";
11
+ } from "./chunk-W7QVBFIJ.js";
12
12
 
13
13
  // src/hub/task-orchestrator.ts
14
14
  import { createInterface } from "readline";
@@ -496,7 +496,16 @@ function handleStatus(msg) {
496
496
  btnPlan.classList.toggle('btn-active-toggle', msg.planMode);
497
497
  statusSession.textContent = `📋 ${msg.sessionId?.slice(0, 8) || '—'} (${msg.messageCount} msgs)`;
498
498
  if (msg.tokenUsage) {
499
- statusTokens.textContent = `📊 in: ${msg.tokenUsage.inputTokens} out: ${msg.tokenUsage.outputTokens}`;
499
+ const u = msg.tokenUsage;
500
+ const cacheRead = u.cacheReadTokens || 0;
501
+ let line = `📊 in: ${u.inputTokens.toLocaleString()} out: ${u.outputTokens.toLocaleString()}`;
502
+ if (cacheRead > 0) line += ` cache: ${cacheRead.toLocaleString()}`;
503
+ if (msg.costUsd != null) {
504
+ const cost = msg.costUsd;
505
+ const costStr = cost === 0 ? '$0' : cost < 0.01 ? `$${cost.toFixed(4)}` : cost < 1 ? `$${cost.toFixed(3)}` : `$${cost.toFixed(2)}`;
506
+ line += ` 💰 ${costStr}`;
507
+ }
508
+ statusTokens.textContent = line;
500
509
  }
501
510
  sessionListEl.querySelectorAll('.session-item').forEach(el => {
502
511
  el.classList.toggle('active', el.dataset.sessionId === msg.sessionId);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "jinzd-ai-cli",
3
- "version": "0.4.54",
3
+ "version": "0.4.55",
4
4
  "description": "Cross-platform REPL-style AI CLI with multi-provider support",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",