jinzd-ai-cli 0.4.53 → 0.4.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ import { platform } from "os";
6
6
  import chalk from "chalk";
7
7
 
8
8
  // src/core/constants.ts
9
- var VERSION = "0.4.53";
9
+ var VERSION = "0.4.55";
10
10
  var APP_NAME = "ai-cli";
11
11
  var CONFIG_DIR_NAME = ".aicli";
12
12
  var CONFIG_FILE_NAME = "config.json";
@@ -7,7 +7,7 @@ import {
7
7
  ProviderNotFoundError,
8
8
  RateLimitError,
9
9
  schemaToJsonSchema
10
- } from "./chunk-IXDGWT2Z.js";
10
+ } from "./chunk-YQEIQJ6K.js";
11
11
  import {
12
12
  APP_NAME,
13
13
  CONFIG_DIR_NAME,
@@ -20,7 +20,7 @@ import {
20
20
  MCP_TOOL_PREFIX,
21
21
  PLUGINS_DIR_NAME,
22
22
  VERSION
23
- } from "./chunk-YIMTDKUW.js";
23
+ } from "./chunk-W7QVBFIJ.js";
24
24
 
25
25
  // src/config/config-manager.ts
26
26
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
@@ -312,6 +312,7 @@ var BaseProvider = class {
312
312
  };
313
313
 
314
314
  // src/providers/claude.ts
315
+ var CACHE_MIN_SYSTEM_CHARS = 2e3;
315
316
  var ClaudeProvider = class extends BaseProvider {
316
317
  client;
317
318
  info = {
@@ -382,6 +383,52 @@ var ClaudeProvider = class extends BaseProvider {
382
383
  }
383
384
  return blocks.length > 0 ? blocks : "";
384
385
  }
386
+ /**
387
+ * Build a cacheable system prompt payload.
388
+ * When the prompt is long enough to be worth caching, return an array with a
389
+ * single text block carrying `cache_control: { type: 'ephemeral' }`. This caches
390
+ * system + memory + context files across every request in an agentic loop.
391
+ * Short prompts pass through as a plain string (no caching overhead).
392
+ */
393
+ buildSystemParam(systemPrompt) {
394
+ if (!systemPrompt) return void 0;
395
+ if (systemPrompt.length < CACHE_MIN_SYSTEM_CHARS) return systemPrompt;
396
+ return [
397
+ {
398
+ type: "text",
399
+ text: systemPrompt,
400
+ cache_control: { type: "ephemeral" }
401
+ }
402
+ ];
403
+ }
404
+ /**
405
+ * Mark the last tool definition with `cache_control: ephemeral` so the entire
406
+ * tool block (all 24+ tools) is cached together. Anthropic caches everything
407
+ * up to and including a cache breakpoint, so one marker covers all tools.
408
+ * Returns a new array — does not mutate the input.
409
+ */
410
+ addToolsCacheControl(tools) {
411
+ if (tools.length === 0) return tools;
412
+ const last = tools[tools.length - 1];
413
+ return [
414
+ ...tools.slice(0, -1),
415
+ { ...last, cache_control: { type: "ephemeral" } }
416
+ ];
417
+ }
418
+ /** Extract usage (including cache fields) from an Anthropic response. */
419
+ extractUsage(u) {
420
+ const usage = {
421
+ inputTokens: u.input_tokens,
422
+ outputTokens: u.output_tokens
423
+ };
424
+ if (u.cache_creation_input_tokens != null && u.cache_creation_input_tokens > 0) {
425
+ usage.cacheCreationTokens = u.cache_creation_input_tokens;
426
+ }
427
+ if (u.cache_read_input_tokens != null && u.cache_read_input_tokens > 0) {
428
+ usage.cacheReadTokens = u.cache_read_input_tokens;
429
+ }
430
+ return usage;
431
+ }
385
432
  /**
386
433
  * 构建 Extended Thinking 参数。
387
434
  * - thinking 启用时 temperature 必须为 1 或不设置(Anthropic API 要求)
@@ -432,7 +479,7 @@ var ClaudeProvider = class extends BaseProvider {
432
479
  const response = await this.client.messages.create({
433
480
  model: request.model,
434
481
  messages,
435
- system: request.systemPrompt,
482
+ system: this.buildSystemParam(request.systemPrompt),
436
483
  max_tokens: request.maxTokens ?? 8192,
437
484
  temperature,
438
485
  thinking
@@ -441,10 +488,7 @@ var ClaudeProvider = class extends BaseProvider {
441
488
  return {
442
489
  content,
443
490
  model: response.model,
444
- usage: {
445
- inputTokens: response.usage.input_tokens,
446
- outputTokens: response.usage.output_tokens
447
- }
491
+ usage: this.extractUsage(response.usage)
448
492
  };
449
493
  } catch (err) {
450
494
  throw this.wrapError(err);
@@ -460,7 +504,7 @@ var ClaudeProvider = class extends BaseProvider {
460
504
  const stream = this.client.messages.stream({
461
505
  model: request.model,
462
506
  messages,
463
- system: request.systemPrompt,
507
+ system: this.buildSystemParam(request.systemPrompt),
464
508
  max_tokens: request.maxTokens ?? 8192,
465
509
  temperature,
466
510
  thinking
@@ -493,20 +537,22 @@ var ClaudeProvider = class extends BaseProvider {
493
537
  }
494
538
  async chatWithTools(request, tools) {
495
539
  try {
496
- const anthropicTools = tools.map((t) => ({
497
- name: t.name,
498
- description: t.description,
499
- input_schema: {
500
- type: "object",
501
- properties: Object.fromEntries(
502
- Object.entries(t.parameters).map(([key, schema]) => [
503
- key,
504
- schemaToJsonSchema(schema)
505
- ])
506
- ),
507
- required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
508
- }
509
- }));
540
+ const anthropicTools = this.addToolsCacheControl(
541
+ tools.map((t) => ({
542
+ name: t.name,
543
+ description: t.description,
544
+ input_schema: {
545
+ type: "object",
546
+ properties: Object.fromEntries(
547
+ Object.entries(t.parameters).map(([key, schema]) => [
548
+ key,
549
+ schemaToJsonSchema(schema)
550
+ ])
551
+ ),
552
+ required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
553
+ }
554
+ }))
555
+ );
510
556
  const baseMessages = request.messages.filter((m) => m.role !== "system").map((m) => ({ role: m.role, content: this.contentToClaudeParts(m.content) }));
511
557
  const extraMessages = request._extraMessages ?? [];
512
558
  const allMessages = [...baseMessages, ...extraMessages];
@@ -515,15 +561,12 @@ var ClaudeProvider = class extends BaseProvider {
515
561
  model: request.model,
516
562
  messages: allMessages,
517
563
  tools: anthropicTools,
518
- system: request.systemPrompt,
564
+ system: this.buildSystemParam(request.systemPrompt),
519
565
  max_tokens: request.maxTokens ?? 8192,
520
566
  temperature,
521
567
  thinking
522
568
  });
523
- const usage = {
524
- inputTokens: response.usage.input_tokens,
525
- outputTokens: response.usage.output_tokens
526
- };
569
+ const usage = this.extractUsage(response.usage);
527
570
  const toolUseBlocks = response.content.filter(
528
571
  (b) => b.type === "tool_use"
529
572
  );
@@ -547,20 +590,22 @@ var ClaudeProvider = class extends BaseProvider {
547
590
  * 同时收集原始 content blocks 供 buildToolResultMessages 使用。
548
591
  */
549
592
  async *chatWithToolsStream(request, tools) {
550
- const anthropicTools = tools.map((t) => ({
551
- name: t.name,
552
- description: t.description,
553
- input_schema: {
554
- type: "object",
555
- properties: Object.fromEntries(
556
- Object.entries(t.parameters).map(([key, schema]) => [
557
- key,
558
- schemaToJsonSchema(schema)
559
- ])
560
- ),
561
- required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
562
- }
563
- }));
593
+ const anthropicTools = this.addToolsCacheControl(
594
+ tools.map((t) => ({
595
+ name: t.name,
596
+ description: t.description,
597
+ input_schema: {
598
+ type: "object",
599
+ properties: Object.fromEntries(
600
+ Object.entries(t.parameters).map(([key, schema]) => [
601
+ key,
602
+ schemaToJsonSchema(schema)
603
+ ])
604
+ ),
605
+ required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
606
+ }
607
+ }))
608
+ );
564
609
  const baseMessages = request.messages.filter((m) => m.role !== "system").map((m) => ({ role: m.role, content: this.contentToClaudeParts(m.content) }));
565
610
  const extraMessages = request._extraMessages ?? [];
566
611
  const allMessages = [...baseMessages, ...extraMessages];
@@ -572,7 +617,7 @@ var ClaudeProvider = class extends BaseProvider {
572
617
  model: request.model,
573
618
  messages: allMessages,
574
619
  tools: anthropicTools,
575
- system: request.systemPrompt,
620
+ system: this.buildSystemParam(request.systemPrompt),
576
621
  max_tokens: request.maxTokens ?? 8192,
577
622
  temperature,
578
623
  thinking
@@ -580,7 +625,13 @@ var ClaudeProvider = class extends BaseProvider {
580
625
  let currentBlockType = null;
581
626
  let currentToolIndex = 0;
582
627
  let currentBlockData = {};
628
+ let startUsage = null;
583
629
  for await (const event of stream) {
630
+ if (event.type === "message_start") {
631
+ const msgUsage = event.message?.usage;
632
+ if (msgUsage) startUsage = msgUsage;
633
+ continue;
634
+ }
584
635
  if (event.type === "content_block_start") {
585
636
  const block = event.content_block;
586
637
  currentBlockType = block.type;
@@ -641,15 +692,17 @@ var ClaudeProvider = class extends BaseProvider {
641
692
  currentBlockType = null;
642
693
  currentBlockData = {};
643
694
  } else if (event.type === "message_delta") {
644
- const usage = event.usage;
645
- if (usage) {
695
+ const deltaUsage = event.usage;
696
+ if (deltaUsage) {
646
697
  doneEmitted = true;
647
698
  yield {
648
699
  type: "done",
649
- usage: {
650
- inputTokens: usage.input_tokens ?? 0,
651
- outputTokens: usage.output_tokens ?? 0
652
- },
700
+ usage: this.extractUsage({
701
+ input_tokens: startUsage?.input_tokens ?? deltaUsage.input_tokens ?? 0,
702
+ output_tokens: deltaUsage.output_tokens ?? 0,
703
+ cache_creation_input_tokens: startUsage?.cache_creation_input_tokens,
704
+ cache_read_input_tokens: startUsage?.cache_read_input_tokens
705
+ }),
653
706
  rawContent: rawContentBlocks
654
707
  };
655
708
  }
@@ -1003,6 +1056,16 @@ Node.js does not automatically use system proxies. Try one of the following:
1003
1056
 
1004
1057
  // src/providers/openai-compatible.ts
1005
1058
  import OpenAI from "openai";
1059
+ function toUsage(u) {
1060
+ if (!u) return void 0;
1061
+ const cached = u.prompt_tokens_details?.cached_tokens ?? 0;
1062
+ const usage = {
1063
+ inputTokens: Math.max(0, u.prompt_tokens - cached),
1064
+ outputTokens: u.completion_tokens
1065
+ };
1066
+ if (cached > 0) usage.cacheReadTokens = cached;
1067
+ return usage;
1068
+ }
1006
1069
  var OpenAICompatibleProvider = class extends BaseProvider {
1007
1070
  client;
1008
1071
  defaultTimeout = 6e4;
@@ -1056,10 +1119,7 @@ var OpenAICompatibleProvider = class extends BaseProvider {
1056
1119
  return {
1057
1120
  content: firstChoice.message.content ?? "",
1058
1121
  model: response.model,
1059
- usage: response.usage ? {
1060
- inputTokens: response.usage.prompt_tokens,
1061
- outputTokens: response.usage.completion_tokens
1062
- } : void 0
1122
+ usage: toUsage(response.usage)
1063
1123
  };
1064
1124
  } catch (err) {
1065
1125
  throw this.wrapError(err);
@@ -1088,10 +1148,7 @@ var OpenAICompatibleProvider = class extends BaseProvider {
1088
1148
  yield {
1089
1149
  delta: "",
1090
1150
  done: true,
1091
- usage: {
1092
- inputTokens: chunk.usage.prompt_tokens,
1093
- outputTokens: chunk.usage.completion_tokens
1094
- }
1151
+ usage: toUsage(chunk.usage)
1095
1152
  };
1096
1153
  continue;
1097
1154
  }
@@ -1159,10 +1216,7 @@ var OpenAICompatibleProvider = class extends BaseProvider {
1159
1216
  return { content: "", usage: void 0 };
1160
1217
  }
1161
1218
  const message = firstChoice.message;
1162
- const usage = response.usage ? {
1163
- inputTokens: response.usage.prompt_tokens,
1164
- outputTokens: response.usage.completion_tokens
1165
- } : void 0;
1219
+ const usage = toUsage(response.usage);
1166
1220
  const reasoningContent = message.reasoning_content;
1167
1221
  if (message.tool_calls && message.tool_calls.length > 0) {
1168
1222
  const toolCalls = message.tool_calls.map((tc) => {
@@ -1275,10 +1329,7 @@ var OpenAICompatibleProvider = class extends BaseProvider {
1275
1329
  }
1276
1330
  yield {
1277
1331
  type: "done",
1278
- usage: {
1279
- inputTokens: chunk.usage.prompt_tokens,
1280
- outputTokens: chunk.usage.completion_tokens
1281
- }
1332
+ usage: toUsage(chunk.usage)
1282
1333
  };
1283
1334
  continue;
1284
1335
  }
@@ -2331,7 +2382,12 @@ var Session = class _Session {
2331
2382
  updated;
2332
2383
  messages = [];
2333
2384
  title;
2334
- tokenUsage = { inputTokens: 0, outputTokens: 0 };
2385
+ tokenUsage = {
2386
+ inputTokens: 0,
2387
+ outputTokens: 0,
2388
+ cacheCreationTokens: 0,
2389
+ cacheReadTokens: 0
2390
+ };
2335
2391
  checkpoints = [];
2336
2392
  constructor(id, provider, model) {
2337
2393
  this.id = id;
@@ -2359,11 +2415,18 @@ var Session = class _Session {
2359
2415
  addTokenUsage(usage) {
2360
2416
  this.tokenUsage.inputTokens += usage.inputTokens;
2361
2417
  this.tokenUsage.outputTokens += usage.outputTokens;
2418
+ this.tokenUsage.cacheCreationTokens += usage.cacheCreationTokens ?? 0;
2419
+ this.tokenUsage.cacheReadTokens += usage.cacheReadTokens ?? 0;
2362
2420
  }
2363
2421
  clear() {
2364
2422
  this.messages = [];
2365
2423
  this.title = void 0;
2366
- this.tokenUsage = { inputTokens: 0, outputTokens: 0 };
2424
+ this.tokenUsage = {
2425
+ inputTokens: 0,
2426
+ outputTokens: 0,
2427
+ cacheCreationTokens: 0,
2428
+ cacheReadTokens: 0
2429
+ };
2367
2430
  this.updated = /* @__PURE__ */ new Date();
2368
2431
  }
2369
2432
  /**
@@ -2491,7 +2554,9 @@ var Session = class _Session {
2491
2554
  if (tu && typeof tu === "object") {
2492
2555
  session.tokenUsage = {
2493
2556
  inputTokens: typeof tu.inputTokens === "number" ? tu.inputTokens : 0,
2494
- outputTokens: typeof tu.outputTokens === "number" ? tu.outputTokens : 0
2557
+ outputTokens: typeof tu.outputTokens === "number" ? tu.outputTokens : 0,
2558
+ cacheCreationTokens: typeof tu.cacheCreationTokens === "number" ? tu.cacheCreationTokens : 0,
2559
+ cacheReadTokens: typeof tu.cacheReadTokens === "number" ? tu.cacheReadTokens : 0
2495
2560
  };
2496
2561
  }
2497
2562
  if (Array.isArray(d.checkpoints)) {
@@ -3495,6 +3560,87 @@ async function setupProxy(configProxy) {
3495
3560
  }
3496
3561
  }
3497
3562
 
3563
+ // src/core/pricing.ts
3564
+ var PRICING_TABLE = {
3565
+ // ── Anthropic Claude ──────────────────────────────────────────
3566
+ "claude-opus-4-6": { input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.5 },
3567
+ "claude-opus-4-5": { input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.5 },
3568
+ "claude-sonnet-4-6": { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.3 },
3569
+ "claude-sonnet-4-5-20250929": { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.3 },
3570
+ "claude-haiku-4-5-20251001": { input: 1, output: 5, cacheWrite: 1.25, cacheRead: 0.1 },
3571
+ "claude-haiku-4-5": { input: 1, output: 5, cacheWrite: 1.25, cacheRead: 0.1 },
3572
+ // Legacy Claude 3.x families (prefix fallback handles minor date suffixes)
3573
+ "claude-3-5-sonnet": { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.3 },
3574
+ "claude-3-5-haiku": { input: 0.8, output: 4, cacheWrite: 1, cacheRead: 0.08 },
3575
+ "claude-3-opus": { input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.5 },
3576
+ // ── OpenAI ────────────────────────────────────────────────────
3577
+ "gpt-4o": { input: 2.5, output: 10, cacheRead: 1.25 },
3578
+ "gpt-4o-mini": { input: 0.15, output: 0.6, cacheRead: 0.075 },
3579
+ "gpt-4-turbo": { input: 10, output: 30 },
3580
+ "gpt-4": { input: 30, output: 60 },
3581
+ "gpt-4.1": { input: 2, output: 8, cacheRead: 0.5 },
3582
+ "gpt-4.1-mini": { input: 0.4, output: 1.6, cacheRead: 0.1 },
3583
+ "gpt-4.1-nano": { input: 0.1, output: 0.4, cacheRead: 0.025 },
3584
+ "o1": { input: 15, output: 60, cacheRead: 7.5 },
3585
+ "o1-mini": { input: 3, output: 12, cacheRead: 1.5 },
3586
+ "o3": { input: 10, output: 40, cacheRead: 2.5 },
3587
+ "o3-mini": { input: 1.1, output: 4.4, cacheRead: 0.55 },
3588
+ // ── Google Gemini ─────────────────────────────────────────────
3589
+ "gemini-2.5-pro": { input: 1.25, output: 10 },
3590
+ "gemini-2.5-flash": { input: 0.3, output: 2.5 },
3591
+ "gemini-2.0-flash": { input: 0.1, output: 0.4 },
3592
+ "gemini-1.5-pro": { input: 1.25, output: 5 },
3593
+ "gemini-1.5-flash": { input: 0.075, output: 0.3 },
3594
+ // ── DeepSeek ──────────────────────────────────────────────────
3595
+ "deepseek-chat": { input: 0.27, output: 1.1, cacheRead: 0.07 },
3596
+ "deepseek-reasoner": { input: 0.55, output: 2.19, cacheRead: 0.14 },
3597
+ "deepseek-v3": { input: 0.27, output: 1.1, cacheRead: 0.07 },
3598
+ // ── Moonshot Kimi ─────────────────────────────────────────────
3599
+ "moonshot-v1-8k": { input: 0.17, output: 0.17 },
3600
+ "moonshot-v1-32k": { input: 0.33, output: 0.33 },
3601
+ "moonshot-v1-128k": { input: 0.83, output: 0.83 },
3602
+ "kimi-k2": { input: 0.6, output: 2.5 },
3603
+ "kimi-latest": { input: 0.6, output: 2.5 },
3604
+ // ── Zhipu GLM ─────────────────────────────────────────────────
3605
+ "glm-4-plus": { input: 0.7, output: 0.7 },
3606
+ "glm-4": { input: 0.14, output: 0.14 },
3607
+ "glm-4-flash": { input: 0, output: 0 },
3608
+ "glm-4.5": { input: 0.29, output: 1.14 },
3609
+ "glm-4.6": { input: 0.6, output: 2.2 }
3610
+ // ── OpenRouter (pass-through — actual cost depends on underlying model) ──
3611
+ // Left empty; callers should resolve via underlying model ID.
3612
+ // ── Ollama (local, zero cost) ─────────────────────────────────
3613
+ // Handled via provider check below.
3614
+ };
3615
+ var FREE_PROVIDERS = /* @__PURE__ */ new Set(["ollama"]);
3616
+ function getPricing(provider, model) {
3617
+ if (FREE_PROVIDERS.has(provider.toLowerCase())) {
3618
+ return { input: 0, output: 0 };
3619
+ }
3620
+ const key = model.toLowerCase();
3621
+ if (PRICING_TABLE[key]) return PRICING_TABLE[key];
3622
+ const keys = Object.keys(PRICING_TABLE).sort((a, b) => b.length - a.length);
3623
+ for (const k of keys) {
3624
+ if (key.startsWith(k)) return PRICING_TABLE[k];
3625
+ }
3626
+ return null;
3627
+ }
3628
+ function computeCost(provider, model, usage) {
3629
+ const p = getPricing(provider, model);
3630
+ if (!p) return null;
3631
+ const input = usage.inputTokens * p.input;
3632
+ const output = usage.outputTokens * p.output;
3633
+ const cacheWrite = (usage.cacheCreationTokens ?? 0) * (p.cacheWrite ?? p.input);
3634
+ const cacheRead = (usage.cacheReadTokens ?? 0) * (p.cacheRead ?? p.input);
3635
+ return (input + output + cacheWrite + cacheRead) / 1e6;
3636
+ }
3637
+ function formatCost(amount) {
3638
+ if (amount === 0) return "$0.0000";
3639
+ if (amount < 0.01) return `$${amount.toFixed(4)}`;
3640
+ if (amount < 1) return `$${amount.toFixed(3)}`;
3641
+ return `$${amount.toFixed(2)}`;
3642
+ }
3643
+
3498
3644
  // src/repl/dev-state.ts
3499
3645
  import { existsSync as existsSync5, readFileSync as readFileSync4, writeFileSync as writeFileSync3, unlinkSync as unlinkSync2, mkdirSync as mkdirSync4 } from "fs";
3500
3646
  import { join as join5 } from "path";
@@ -3601,6 +3747,9 @@ export {
3601
3747
  getGitRoot,
3602
3748
  getGitContext,
3603
3749
  formatGitContextForPrompt,
3750
+ getPricing,
3751
+ computeCost,
3752
+ formatCost,
3604
3753
  parseSimpleYaml,
3605
3754
  SNAPSHOT_PROMPT,
3606
3755
  sessionHasMeaningfulContent,
@@ -8,7 +8,7 @@ import { platform } from "os";
8
8
  import chalk from "chalk";
9
9
 
10
10
  // src/core/constants.ts
11
- var VERSION = "0.4.53";
11
+ var VERSION = "0.4.55";
12
12
  var APP_NAME = "ai-cli";
13
13
  var CONFIG_DIR_NAME = ".aicli";
14
14
  var CONFIG_FILE_NAME = "config.json";
@@ -10,7 +10,7 @@ import {
10
10
  SUBAGENT_DEFAULT_MAX_ROUNDS,
11
11
  SUBAGENT_MAX_ROUNDS_LIMIT,
12
12
  runTestsTool
13
- } from "./chunk-YIMTDKUW.js";
13
+ } from "./chunk-W7QVBFIJ.js";
14
14
 
15
15
  // src/tools/builtin/bash.ts
16
16
  import { execSync } from "child_process";
@@ -385,7 +385,7 @@ ${content}`);
385
385
  }
386
386
  }
387
387
  async function runTaskMode(config, providers, configManager, topic) {
388
- const { TaskOrchestrator } = await import("./task-orchestrator-C42TNHE6.js");
388
+ const { TaskOrchestrator } = await import("./task-orchestrator-MWO6A4KQ.js");
389
389
  const orchestrator = new TaskOrchestrator(config, providers, configManager);
390
390
  let interrupted = false;
391
391
  const onSigint = () => {