jinzd-ai-cli 0.4.67 → 0.4.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ import { platform } from "os";
8
8
  import chalk from "chalk";
9
9
 
10
10
  // src/core/constants.ts
11
- var VERSION = "0.4.67";
11
+ var VERSION = "0.4.68";
12
12
  var APP_NAME = "ai-cli";
13
13
  var CONFIG_DIR_NAME = ".aicli";
14
14
  var CONFIG_FILE_NAME = "config.json";
@@ -8,7 +8,7 @@ import {
8
8
  RateLimitError,
9
9
  schemaToJsonSchema,
10
10
  truncateForPersist
11
- } from "./chunk-BVLQ3FRA.js";
11
+ } from "./chunk-Q5QSCO5D.js";
12
12
  import {
13
13
  APP_NAME,
14
14
  CONFIG_DIR_NAME,
@@ -21,7 +21,7 @@ import {
21
21
  MCP_TOOL_PREFIX,
22
22
  PLUGINS_DIR_NAME,
23
23
  VERSION
24
- } from "./chunk-F44OKMB2.js";
24
+ } from "./chunk-3LCVJ4AF.js";
25
25
 
26
26
  // src/config/config-manager.ts
27
27
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
@@ -191,7 +191,27 @@ var ConfigSchema = z.object({
191
191
  // 插件以完整 Node.js 权限在主进程中执行(可读写文件、访问网络、执行命令),
192
192
  // 必须确认插件来源可信后,再设为 true 启用。
193
193
  // 可通过 /config 命令或直接编辑 ~/.aicli/config.json 开启。
194
- allowPlugins: z.boolean().default(false)
194
+ allowPlugins: z.boolean().default(false),
195
+ // 智能模型路由(v0.4.68+)
196
+ // 按用户每轮输入的内容/标签/长度动态选择模型,在同一 provider 内切换,
197
+ // 例:短问题走 haiku(省钱),planning 走 opus(质量)。
198
+ // enabled=false 时永远返回当前模型。rules 按顺序匹配,首个命中的规则生效。
199
+ // 每个 rule 的 match 必须至少有一个条件(tag/contains/maxLength/minLength)。
200
+ // 详见 src/core/model-router.ts。
201
+ routing: z.object({
202
+ enabled: z.boolean().default(false),
203
+ rules: z.array(z.object({
204
+ match: z.object({
205
+ contains: z.array(z.string()).optional(),
206
+ maxLength: z.number().int().positive().optional(),
207
+ minLength: z.number().int().positive().optional(),
208
+ tag: z.string().optional()
209
+ }),
210
+ model: z.string(),
211
+ name: z.string().optional()
212
+ })).default([]),
213
+ fallback: z.string().optional()
214
+ }).default({ enabled: false, rules: [] })
195
215
  });
196
216
 
197
217
  // src/config/config-manager.ts
@@ -10,7 +10,7 @@ import {
10
10
  SUBAGENT_DEFAULT_MAX_ROUNDS,
11
11
  SUBAGENT_MAX_ROUNDS_LIMIT,
12
12
  runTestsTool
13
- } from "./chunk-F44OKMB2.js";
13
+ } from "./chunk-3LCVJ4AF.js";
14
14
 
15
15
  // src/tools/builtin/bash.ts
16
16
  import { execSync } from "child_process";
@@ -6,7 +6,7 @@ import { platform } from "os";
6
6
  import chalk from "chalk";
7
7
 
8
8
  // src/core/constants.ts
9
- var VERSION = "0.4.67";
9
+ var VERSION = "0.4.68";
10
10
  var APP_NAME = "ai-cli";
11
11
  var CONFIG_DIR_NAME = ".aicli";
12
12
  var CONFIG_FILE_NAME = "config.json";
@@ -385,7 +385,7 @@ ${content}`);
385
385
  }
386
386
  }
387
387
  async function runTaskMode(config, providers, configManager, topic) {
388
- const { TaskOrchestrator } = await import("./task-orchestrator-4PVBMQJ7.js");
388
+ const { TaskOrchestrator } = await import("./task-orchestrator-WDRXASIC.js");
389
389
  const orchestrator = new TaskOrchestrator(config, providers, configManager);
390
390
  let interrupted = false;
391
391
  const onSigint = () => {
package/dist/index.js CHANGED
@@ -31,7 +31,7 @@ import {
31
31
  saveDevState,
32
32
  sessionHasMeaningfulContent,
33
33
  setupProxy
34
- } from "./chunk-XSIVGDCN.js";
34
+ } from "./chunk-G5AISHJE.js";
35
35
  import {
36
36
  ToolExecutor,
37
37
  ToolRegistry,
@@ -47,7 +47,7 @@ import {
47
47
  spawnAgentContext,
48
48
  theme,
49
49
  undoStack
50
- } from "./chunk-BVLQ3FRA.js";
50
+ } from "./chunk-Q5QSCO5D.js";
51
51
  import {
52
52
  fileCheckpoints
53
53
  } from "./chunk-4BKXL7SM.js";
@@ -72,7 +72,7 @@ import {
72
72
  SKILLS_DIR_NAME,
73
73
  VERSION,
74
74
  buildUserIdentityPrompt
75
- } from "./chunk-F44OKMB2.js";
75
+ } from "./chunk-3LCVJ4AF.js";
76
76
 
77
77
  // src/index.ts
78
78
  import { program } from "commander";
@@ -1075,6 +1075,79 @@ function createDefaultCommands() {
1075
1075
  );
1076
1076
  }
1077
1077
  },
1078
+ {
1079
+ name: "route",
1080
+ description: "Smart model routing \u2014 enable/disable or inspect routing rules",
1081
+ usage: "/route [on|off|show|test <message>]",
1082
+ async execute(args, ctx) {
1083
+ const sub = (args[0] ?? "show").toLowerCase();
1084
+ const routing = ctx.config.get("routing");
1085
+ if (sub === "on" || sub === "enable") {
1086
+ ctx.config.setByPath("routing.enabled", "true");
1087
+ ctx.renderer.printSuccess("Smart model routing enabled.");
1088
+ if (!routing || routing.rules.length === 0) {
1089
+ ctx.renderer.printInfo(
1090
+ 'No rules configured yet. Add rules under `routing.rules` in ~/.aicli/config.json.\nExample: { match: { tag: "fast" }, model: "claude-haiku-4-5" }'
1091
+ );
1092
+ }
1093
+ return;
1094
+ }
1095
+ if (sub === "off" || sub === "disable") {
1096
+ ctx.config.setByPath("routing.enabled", "false");
1097
+ ctx.renderer.printSuccess("Smart model routing disabled.");
1098
+ return;
1099
+ }
1100
+ if (sub === "test") {
1101
+ const msg = args.slice(1).join(" ").trim();
1102
+ if (!msg) {
1103
+ console.log(theme.warning(" Usage: /route test <message>"));
1104
+ return;
1105
+ }
1106
+ const decision = ctx.computeRoutingDecision(msg);
1107
+ const marker = decision.overridden ? theme.accent("\u2192 ROUTED") : theme.dim("(unchanged)");
1108
+ console.log();
1109
+ console.log(` Input: ${theme.dim(msg)}`);
1110
+ console.log(` Current: ${theme.info(ctx.getCurrentModel())}`);
1111
+ console.log(` Decision: ${theme.info(decision.model)} ${marker}`);
1112
+ console.log(` Reason: ${theme.dim(decision.reason)}`);
1113
+ if (typeof decision.ruleIdx === "number") {
1114
+ console.log(` Rule: #${decision.ruleIdx}`);
1115
+ }
1116
+ console.log();
1117
+ return;
1118
+ }
1119
+ if (sub === "show" || sub === "status") {
1120
+ console.log();
1121
+ console.log(` ${theme.heading("Smart Model Routing")}`);
1122
+ console.log(` Status: ${routing?.enabled ? theme.success("enabled") : theme.dim("disabled")}`);
1123
+ console.log(` Provider: ${theme.info(ctx.getCurrentProvider())}`);
1124
+ console.log(` Current: ${theme.info(ctx.getCurrentModel())}`);
1125
+ if (routing?.fallback) {
1126
+ console.log(` Fallback: ${theme.info(routing.fallback)}`);
1127
+ }
1128
+ console.log();
1129
+ if (!routing || routing.rules.length === 0) {
1130
+ console.log(` ${theme.dim("(no rules configured \u2014 edit ~/.aicli/config.json `routing.rules`)")}`);
1131
+ } else {
1132
+ console.log(` ${theme.heading("Rules")} ${theme.dim(`(evaluated top-to-bottom)`)}:`);
1133
+ routing.rules.forEach((r, i) => {
1134
+ const parts = [];
1135
+ if (r.match.tag) parts.push(`tag=#${r.match.tag}`);
1136
+ if (r.match.contains && r.match.contains.length > 0) parts.push(`contains=[${r.match.contains.slice(0, 3).join(", ")}${r.match.contains.length > 3 ? ", \u2026" : ""}]`);
1137
+ if (typeof r.match.maxLength === "number") parts.push(`maxLen=${r.match.maxLength}`);
1138
+ if (typeof r.match.minLength === "number") parts.push(`minLen=${r.match.minLength}`);
1139
+ const cond = parts.length > 0 ? parts.join(" & ") : theme.warning("(empty \u2014 never matches)");
1140
+ console.log(` ${theme.dim(`#${i}`)} ${r.name ? theme.accent(r.name) + " " : ""}${cond} ${theme.dim("\u2192")} ${theme.info(r.model)}`);
1141
+ });
1142
+ }
1143
+ console.log();
1144
+ console.log(` ${theme.dim("Commands: /route on | off | test <msg> | show")}`);
1145
+ console.log();
1146
+ return;
1147
+ }
1148
+ console.log(theme.warning(` Unknown subcommand: ${sub}. Usage: /route [on|off|show|test <message>]`));
1149
+ }
1150
+ },
1078
1151
  {
1079
1152
  name: "clear",
1080
1153
  description: "Clear conversation history",
@@ -1124,18 +1197,18 @@ function createDefaultCommands() {
1124
1197
  return;
1125
1198
  }
1126
1199
  const sessions = ctx.sessions.listSessions();
1127
- const matches = sessions.filter((s) => s.id.startsWith(id));
1128
- if (matches.length === 0) {
1200
+ const matches2 = sessions.filter((s) => s.id.startsWith(id));
1201
+ if (matches2.length === 0) {
1129
1202
  ctx.renderer.renderError(`Session '${id}' not found.`);
1130
1203
  return;
1131
1204
  }
1132
- if (matches.length > 1) {
1133
- console.log(theme.warning(` \u26A0 Ambiguous prefix '${id}' matches ${matches.length} sessions \u2014 loading most recent:`));
1134
- for (const m of matches.slice(0, 5)) {
1205
+ if (matches2.length > 1) {
1206
+ console.log(theme.warning(` \u26A0 Ambiguous prefix '${id}' matches ${matches2.length} sessions \u2014 loading most recent:`));
1207
+ for (const m of matches2.slice(0, 5)) {
1135
1208
  console.log(theme.dim(` ${m.id.slice(0, 12)} ${m.title ?? "(untitled)"}`));
1136
1209
  }
1137
1210
  }
1138
- const match = matches[0];
1211
+ const match = matches2[0];
1139
1212
  ctx.sessions.loadSession(match.id);
1140
1213
  ctx.setProvider(match.provider, match.model);
1141
1214
  ctx.resetSessionTokenUsage();
@@ -1373,13 +1446,13 @@ ${text}
1373
1446
  ${theme.heading(`Found ${results.length} session(s) containing "${query}"`)}
1374
1447
  `);
1375
1448
  for (const r of results) {
1376
- const { sessionMeta, matches } = r;
1449
+ const { sessionMeta, matches: matches2 } = r;
1377
1450
  const dateStr = sessionMeta.updated.toLocaleDateString();
1378
1451
  console.log(
1379
1452
  ` ${theme.accent(sessionMeta.id.slice(0, 8))}` + theme.dim(` [${dateStr}] ${sessionMeta.provider} / ${sessionMeta.model}`) + (sessionMeta.title ? `
1380
1453
  ${theme.dim(" " + sessionMeta.title)}` : "")
1381
1454
  );
1382
- for (const m of matches) {
1455
+ for (const m of matches2) {
1383
1456
  const icon = m.role === "user" ? "\u{1F464}" : "\u{1F916}";
1384
1457
  console.log(` ${icon} ${theme.warning(m.snippet)}`);
1385
1458
  }
@@ -2194,7 +2267,7 @@ ${hint}` : "")
2194
2267
  usage: "/test [command|filter]",
2195
2268
  async execute(args, ctx) {
2196
2269
  try {
2197
- const { executeTests } = await import("./run-tests-NUF7CNM4.js");
2270
+ const { executeTests } = await import("./run-tests-WD53PYVA.js");
2198
2271
  const argStr = args.join(" ").trim();
2199
2272
  let testArgs = {};
2200
2273
  if (argStr) {
@@ -3394,6 +3467,74 @@ var CostTracker = class {
3394
3467
  }
3395
3468
  };
3396
3469
 
3470
+ // src/core/model-router.ts
3471
+ var TAG_REGEX = /(?:^|\s)#([a-zA-Z][\w-]{0,31})\b/g;
3472
+ function extractTags(message) {
3473
+ const tags = /* @__PURE__ */ new Set();
3474
+ let m;
3475
+ TAG_REGEX.lastIndex = 0;
3476
+ while ((m = TAG_REGEX.exec(message)) !== null) {
3477
+ tags.add(m[1].toLowerCase());
3478
+ }
3479
+ return tags;
3480
+ }
3481
+ function matches(message, matcher) {
3482
+ const trimmed = message.trim();
3483
+ const lower = trimmed.toLowerCase();
3484
+ if (matcher.tag) {
3485
+ const tags = extractTags(trimmed);
3486
+ if (!tags.has(matcher.tag.toLowerCase())) return false;
3487
+ }
3488
+ if (matcher.contains && matcher.contains.length > 0) {
3489
+ const hit = matcher.contains.some((kw) => lower.includes(kw.toLowerCase()));
3490
+ if (!hit) return false;
3491
+ }
3492
+ if (typeof matcher.maxLength === "number") {
3493
+ if (trimmed.length > matcher.maxLength) return false;
3494
+ }
3495
+ if (typeof matcher.minLength === "number") {
3496
+ if (trimmed.length < matcher.minLength) return false;
3497
+ }
3498
+ const hasAnyCondition = !!matcher.tag || matcher.contains && matcher.contains.length > 0 || typeof matcher.maxLength === "number" || typeof matcher.minLength === "number";
3499
+ return !!hasAnyCondition;
3500
+ }
3501
+ function pickModel(message, currentModel, config, availableModels = []) {
3502
+ if (!config.enabled || config.rules.length === 0) {
3503
+ return { model: currentModel, reason: "routing disabled", overridden: false };
3504
+ }
3505
+ const isAvailable = (m) => availableModels.length === 0 || availableModels.includes(m);
3506
+ for (let i = 0; i < config.rules.length; i++) {
3507
+ const rule = config.rules[i];
3508
+ if (!matches(message, rule.match)) continue;
3509
+ if (!isAvailable(rule.model)) continue;
3510
+ if (rule.model === currentModel) {
3511
+ return {
3512
+ model: currentModel,
3513
+ reason: `rule "${rule.name ?? `#${i}`}" matched (same as current)`,
3514
+ overridden: false,
3515
+ ruleIdx: i
3516
+ };
3517
+ }
3518
+ return {
3519
+ model: rule.model,
3520
+ reason: `rule "${rule.name ?? `#${i}`}" matched`,
3521
+ overridden: true,
3522
+ ruleIdx: i
3523
+ };
3524
+ }
3525
+ if (config.fallback && config.fallback !== currentModel && isAvailable(config.fallback)) {
3526
+ return {
3527
+ model: config.fallback,
3528
+ reason: "fallback",
3529
+ overridden: true
3530
+ };
3531
+ }
3532
+ return { model: currentModel, reason: "no rule matched", overridden: false };
3533
+ }
3534
+ function stripRoutingTags(message) {
3535
+ return message.replace(/(?:^|\s)#(fast|deep|default)\b/gi, " ").replace(/\s{2,}/g, " ").trim();
3536
+ }
3537
+
3397
3538
  // src/repl/notify.ts
3398
3539
  import { spawn } from "child_process";
3399
3540
  import { platform as platform2 } from "os";
@@ -3570,13 +3711,15 @@ var Repl = class {
3570
3711
  contextLayers = [];
3571
3712
  /** 本次会话累计 token 用量 */
3572
3713
  sessionTokenUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
3573
- /** Fold a single-request TokenUsage (with optional cache fields) into sessionTokenUsage + cost tracker. */
3574
- addSessionUsage(u) {
3714
+ /** Fold a single-request TokenUsage (with optional cache fields) into sessionTokenUsage + cost tracker.
3715
+ * modelOverride lets the smart router attribute cost to the actually-used model
3716
+ * when it differs from the UI-selected currentModel. */
3717
+ addSessionUsage(u, modelOverride) {
3575
3718
  this.sessionTokenUsage.inputTokens += u.inputTokens;
3576
3719
  this.sessionTokenUsage.outputTokens += u.outputTokens;
3577
3720
  this.sessionTokenUsage.cacheCreationTokens += u.cacheCreationTokens ?? 0;
3578
3721
  this.sessionTokenUsage.cacheReadTokens += u.cacheReadTokens ?? 0;
3579
- this.costTracker.addCost(this.currentProvider, this.currentModel, u);
3722
+ this.costTracker.addCost(this.currentProvider, modelOverride ?? this.currentModel, u);
3580
3723
  }
3581
3724
  /** 启动时检测到的 Git 分支(无 git 仓库时为 null) */
3582
3725
  gitBranch = null;
@@ -4468,7 +4611,16 @@ Session '${this.resumeSessionId}' not found.
4468
4611
  `));
4469
4612
  }
4470
4613
  }
4471
- const messageContent = parts.length > 0 ? parts.length === 1 && parts[0].type === "text" ? parts[0].text : parts : userInput;
4614
+ const routingDecision = this.computeRoutingDecision(userInput);
4615
+ const cleanInput = stripRoutingTags(userInput);
4616
+ let effectiveParts = parts;
4617
+ if (cleanInput !== userInput && parts.length > 0 && parts[0].type === "text") {
4618
+ effectiveParts = [
4619
+ { type: "text", text: stripRoutingTags(parts[0].text ?? "") },
4620
+ ...parts.slice(1)
4621
+ ];
4622
+ }
4623
+ const messageContent = effectiveParts.length > 0 ? effectiveParts.length === 1 && effectiveParts[0].type === "text" ? effectiveParts[0].text : effectiveParts : cleanInput;
4472
4624
  if (hasImage) {
4473
4625
  const visionHint = this.getVisionModelHint();
4474
4626
  if (visionHint) {
@@ -4489,6 +4641,12 @@ Session '${this.resumeSessionId}' not found.
4489
4641
  timestamp: /* @__PURE__ */ new Date()
4490
4642
  });
4491
4643
  this.events.emit("message.before", { input: userInput });
4644
+ if (routingDecision.overridden) {
4645
+ process.stdout.write(
4646
+ theme.dim(` \u2192 Routed to ${routingDecision.model} (${routingDecision.reason})
4647
+ `)
4648
+ );
4649
+ }
4492
4650
  const t0 = Date.now();
4493
4651
  try {
4494
4652
  const provider = this.providers.get(this.currentProvider);
@@ -4496,10 +4654,11 @@ Session '${this.resumeSessionId}' not found.
4496
4654
  if (supportsTools) {
4497
4655
  await this.handleChatWithTools(
4498
4656
  provider,
4499
- session.messages
4657
+ session.messages,
4658
+ routingDecision.model
4500
4659
  );
4501
4660
  } else {
4502
- await this.handleChatSimple(provider, session.messages);
4661
+ await this.handleChatSimple(provider, session.messages, routingDecision.model);
4503
4662
  }
4504
4663
  if (this.config.get("session").autoSave) {
4505
4664
  if (autoTrimSessionIfNeeded(session)) {
@@ -4577,9 +4736,10 @@ Session '${this.resumeSessionId}' not found.
4577
4736
  */
4578
4737
  /** 运行时 thinking 模式覆盖:null=使用配置值,true/false=运行时覆盖 */
4579
4738
  runtimeThinking = null;
4580
- getModelParams() {
4739
+ getModelParams(modelOverride) {
4581
4740
  const allParams = this.config.get("modelParams");
4582
- const params = allParams[this.currentModel] ?? {};
4741
+ const modelId = modelOverride ?? this.currentModel;
4742
+ const params = allParams[modelId] ?? {};
4583
4743
  return {
4584
4744
  ...params,
4585
4745
  maxTokens: params.maxTokens ?? DEFAULT_MAX_TOKENS,
@@ -4587,6 +4747,25 @@ Session '${this.resumeSessionId}' not found.
4587
4747
  thinkingBudget: params.thinkingBudget
4588
4748
  };
4589
4749
  }
4750
+ /**
4751
+ * Compute smart-routing decision for this user turn.
4752
+ * Only considers models available for the current provider (rule skipped otherwise).
4753
+ * When routing is disabled or no rule matches, returns the current model unchanged.
4754
+ */
4755
+ computeRoutingDecision(userInput) {
4756
+ const routingConfig = this.config.get("routing");
4757
+ if (!routingConfig || !routingConfig.enabled) {
4758
+ return { model: this.currentModel, reason: "routing disabled", overridden: false };
4759
+ }
4760
+ let availableModels = [];
4761
+ try {
4762
+ const provider = this.providers.get(this.currentProvider);
4763
+ availableModels = provider.info.models.map((m) => m.id);
4764
+ } catch {
4765
+ availableModels = [];
4766
+ }
4767
+ return pickModel(userInput, this.currentModel, routingConfig, availableModels);
4768
+ }
4590
4769
  // ─── Context 自动管理 ───────────────────────────────────────────────────
4591
4770
  /**
4592
4771
  * 估算文本的 token 数。
@@ -4639,12 +4818,15 @@ Session '${this.resumeSessionId}' not found.
4639
4818
  return total;
4640
4819
  }
4641
4820
  /**
4642
- * 获取当前模型的 context window 大小。
4821
+ * 获取指定模型的 context window 大小(默认当前模型)。
4822
+ * 智能路由可能在 handleChatWithTools 内把 effectiveModel 暂时切到别的模型,
4823
+ * 故此处接受可选的 modelOverride 以保持计算一致性。
4643
4824
  */
4644
- getContextWindowSize() {
4825
+ getContextWindowSize(modelOverride) {
4645
4826
  try {
4646
4827
  const provider = this.providers.get(this.currentProvider);
4647
- const modelInfo = provider.info.models.find((m) => m.id === this.currentModel);
4828
+ const modelId = modelOverride ?? this.currentModel;
4829
+ const modelInfo = provider.info.models.find((m) => m.id === modelId);
4648
4830
  return modelInfo?.contextWindow ?? 0;
4649
4831
  } catch {
4650
4832
  return 0;
@@ -4881,16 +5063,17 @@ Session '${this.resumeSessionId}' not found.
4881
5063
  }
4882
5064
  });
4883
5065
  }
4884
- async handleChatSimple(provider, messages) {
5066
+ async handleChatSimple(provider, messages, modelOverride) {
4885
5067
  const session = this.sessions.current;
4886
5068
  const useStreaming = this.config.get("ui").streaming;
4887
- const modelParams = this.getModelParams();
5069
+ const effectiveModel = modelOverride ?? this.currentModel;
5070
+ const modelParams = this.getModelParams(effectiveModel);
4888
5071
  if (useStreaming) {
4889
5072
  const ac = this.setupStreamInterrupt();
4890
5073
  try {
4891
5074
  const stream = provider.chatStream({
4892
5075
  messages,
4893
- model: this.currentModel,
5076
+ model: effectiveModel,
4894
5077
  systemPrompt: this.buildCurrentSystemPrompt(),
4895
5078
  stream: true,
4896
5079
  temperature: modelParams.temperature,
@@ -4910,7 +5093,7 @@ Session '${this.resumeSessionId}' not found.
4910
5093
  session.addMessage({ role: "assistant", content, timestamp: /* @__PURE__ */ new Date() });
4911
5094
  this.events.emit("message.after", { content });
4912
5095
  if (usage) {
4913
- this.addSessionUsage(usage);
5096
+ this.addSessionUsage(usage, effectiveModel);
4914
5097
  session.addTokenUsage(usage);
4915
5098
  if (showTokens && !tokensShown) {
4916
5099
  this.renderer.renderUsage(usage, this.sessionTokenUsage);
@@ -4924,7 +5107,7 @@ Session '${this.resumeSessionId}' not found.
4924
5107
  try {
4925
5108
  const response = await provider.chat({
4926
5109
  messages,
4927
- model: this.currentModel,
5110
+ model: effectiveModel,
4928
5111
  systemPrompt: this.buildCurrentSystemPrompt(),
4929
5112
  stream: false,
4930
5113
  temperature: modelParams.temperature,
@@ -4939,7 +5122,7 @@ Session '${this.resumeSessionId}' not found.
4939
5122
  session.addMessage({ role: "assistant", content: response.content, timestamp: /* @__PURE__ */ new Date() });
4940
5123
  this.events.emit("message.after", { content: response.content });
4941
5124
  if (response.usage) {
4942
- this.addSessionUsage(response.usage);
5125
+ this.addSessionUsage(response.usage, effectiveModel);
4943
5126
  session.addTokenUsage(response.usage);
4944
5127
  if (this.shouldShowTokens()) {
4945
5128
  this.renderer.renderUsage(response.usage, this.sessionTokenUsage);
@@ -5052,8 +5235,9 @@ Session '${this.resumeSessionId}' not found.
5052
5235
  rawContent
5053
5236
  };
5054
5237
  }
5055
- async handleChatWithTools(provider, messages) {
5238
+ async handleChatWithTools(provider, messages, modelOverride) {
5056
5239
  const session = this.sessions.current;
5240
+ const effectiveModel = modelOverride ?? this.currentModel;
5057
5241
  let toolDefs;
5058
5242
  let mcpBudgetNote = null;
5059
5243
  const usedMcpToolNames = /* @__PURE__ */ new Set();
@@ -5064,7 +5248,7 @@ Session '${this.resumeSessionId}' not found.
5064
5248
  if (skillFilter) {
5065
5249
  toolDefs = this.toolRegistry.getDefinitions().filter((t) => skillFilter.has(t.name));
5066
5250
  } else {
5067
- const contextWindow = this.getContextWindowSize();
5251
+ const contextWindow = this.getContextWindowSize(effectiveModel);
5068
5252
  if (contextWindow > 0) {
5069
5253
  const toolBudget = Math.floor(contextWindow * 0.2);
5070
5254
  const { definitions, trimmedCount, systemNote } = this.toolRegistry.getDefinitionsWithBudget(toolBudget, usedMcpToolNames);
@@ -5114,7 +5298,7 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
5114
5298
  const systemPrompt = baseSystemPrompt + roundBudgetHint + (mcpBudgetNote ? `
5115
5299
 
5116
5300
  ${mcpBudgetNote}` : "");
5117
- const modelParams = this.getModelParams();
5301
+ const modelParams = this.getModelParams(effectiveModel);
5118
5302
  const useStreaming = this.config.get("ui").streaming;
5119
5303
  const spinner = this.renderer.showSpinner("Thinking...");
5120
5304
  const roundUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
@@ -5204,7 +5388,7 @@ ${mcpBudgetNote}` : "");
5204
5388
  )
5205
5389
  );
5206
5390
  if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
5207
- this.addSessionUsage(roundUsage);
5391
+ this.addSessionUsage(roundUsage, effectiveModel);
5208
5392
  session.addTokenUsage(roundUsage);
5209
5393
  if (this.shouldShowTokens()) {
5210
5394
  this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
@@ -5232,7 +5416,7 @@ ${mcpBudgetNote}` : "");
5232
5416
  let alreadyRendered = false;
5233
5417
  const chatRequest = {
5234
5418
  messages: apiMessages,
5235
- model: this.currentModel,
5419
+ model: effectiveModel,
5236
5420
  systemPrompt,
5237
5421
  stream: false,
5238
5422
  temperature: modelParams.temperature,
@@ -5334,7 +5518,7 @@ ${mcpBudgetNote}` : "");
5334
5518
  )
5335
5519
  );
5336
5520
  if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
5337
- this.addSessionUsage(roundUsage);
5521
+ this.addSessionUsage(roundUsage, effectiveModel);
5338
5522
  session.addTokenUsage(roundUsage);
5339
5523
  if (this.shouldShowTokens()) {
5340
5524
  this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
@@ -5369,7 +5553,7 @@ ${mcpBudgetNote}` : "");
5369
5553
  });
5370
5554
  this.events.emit("message.after", { content: finalContent });
5371
5555
  if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
5372
- this.addSessionUsage(roundUsage);
5556
+ this.addSessionUsage(roundUsage, effectiveModel);
5373
5557
  session.addTokenUsage(roundUsage);
5374
5558
  if (this.shouldShowTokens()) {
5375
5559
  this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
@@ -5387,7 +5571,7 @@ ${mcpBudgetNote}` : "");
5387
5571
  try {
5388
5572
  const genStream = provider.chatStream({
5389
5573
  messages: apiMessages,
5390
- model: this.currentModel,
5574
+ model: effectiveModel,
5391
5575
  systemPrompt,
5392
5576
  stream: true,
5393
5577
  temperature: modelParams.temperature,
@@ -5423,7 +5607,7 @@ ${mcpBudgetNote}` : "");
5423
5607
  const newMsgs2 = provider.buildToolResultMessages(result.toolCalls, syntheticResults, reasoningContent2);
5424
5608
  extraMessages.push(...newMsgs2);
5425
5609
  if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
5426
- this.addSessionUsage(roundUsage);
5610
+ this.addSessionUsage(roundUsage, effectiveModel);
5427
5611
  session.addTokenUsage(roundUsage);
5428
5612
  if (teeShowTokens && !teeTokShown) {
5429
5613
  this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
@@ -5438,14 +5622,14 @@ ${mcpBudgetNote}` : "");
5438
5622
  askUserContext.rl = this.rl;
5439
5623
  googleSearchContext.configManager = this.config;
5440
5624
  streamToFileContext.provider = provider;
5441
- streamToFileContext.model = this.currentModel;
5625
+ streamToFileContext.model = effectiveModel;
5442
5626
  streamToFileContext.systemPrompt = systemPrompt;
5443
5627
  streamToFileContext.messages = apiMessages;
5444
5628
  streamToFileContext.extraMessages = extraMessages;
5445
5629
  streamToFileContext.temperature = modelParams.temperature;
5446
5630
  streamToFileContext.timeout = modelParams.timeout;
5447
5631
  spawnAgentContext.provider = provider;
5448
- spawnAgentContext.model = this.currentModel;
5632
+ spawnAgentContext.model = effectiveModel;
5449
5633
  spawnAgentContext.systemPrompt = systemPrompt;
5450
5634
  spawnAgentContext.modelParams = modelParams;
5451
5635
  spawnAgentContext.configManager = this.config;
@@ -5605,7 +5789,7 @@ ${mcpBudgetNote}` : "");
5605
5789
  const summaryResult = await provider.chatWithTools(
5606
5790
  {
5607
5791
  messages: apiMessages,
5608
- model: this.currentModel,
5792
+ model: effectiveModel,
5609
5793
  systemPrompt,
5610
5794
  stream: false,
5611
5795
  temperature: modelParams.temperature,
@@ -5643,7 +5827,7 @@ Tip: You can continue the conversation by asking the AI to proceed.`
5643
5827
  );
5644
5828
  }
5645
5829
  if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
5646
- this.addSessionUsage(roundUsage);
5830
+ this.addSessionUsage(roundUsage, effectiveModel);
5647
5831
  session.addTokenUsage(roundUsage);
5648
5832
  if (this.shouldShowTokens()) {
5649
5833
  this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
@@ -5819,6 +6003,7 @@ Tip: You can continue the conversation by asking the AI to proceed.`
5819
6003
  forkSession: (messageCount, title) => this.sessions.forkSession(messageCount, title),
5820
6004
  getToolExecutor: () => this.toolExecutor,
5821
6005
  getCostTracker: () => this.costTracker,
6006
+ computeRoutingDecision: (userInput) => this.computeRoutingDecision(userInput),
5822
6007
  exit: () => this.handleExit()
5823
6008
  };
5824
6009
  await cmd.execute(args, ctx);
@@ -5923,7 +6108,7 @@ program.command("web").description("Start Web UI server with browser-based chat
5923
6108
  console.error("Error: Invalid port number. Must be between 1 and 65535.");
5924
6109
  process.exit(1);
5925
6110
  }
5926
- const { startWebServer } = await import("./server-5HLNHRKM.js");
6111
+ const { startWebServer } = await import("./server-MDBQX5UZ.js");
5927
6112
  await startWebServer({ port, host: options.host });
5928
6113
  });
5929
6114
  program.command("user [action] [username]").description("Manage Web UI users (list | create <name> | delete <name> | reset-password <name> | migrate <name>)").action(async (action, username) => {
@@ -6156,7 +6341,7 @@ program.command("hub [topic]").description("Start multi-agent hub (discuss / bra
6156
6341
  }),
6157
6342
  config.get("customProviders")
6158
6343
  );
6159
- const { startHub } = await import("./hub-RGK325NQ.js");
6344
+ const { startHub } = await import("./hub-4VPTOMBP.js");
6160
6345
  await startHub(
6161
6346
  {
6162
6347
  topic: topic ?? "",
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  executeTests,
3
3
  runTestsTool
4
- } from "./chunk-X7NVAEFI.js";
4
+ } from "./chunk-VO5IZN2C.js";
5
5
  export {
6
6
  executeTests,
7
7
  runTestsTool
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  executeTests,
4
4
  runTestsTool
5
- } from "./chunk-F44OKMB2.js";
5
+ } from "./chunk-3LCVJ4AF.js";
6
6
  export {
7
7
  executeTests,
8
8
  runTestsTool
@@ -21,7 +21,7 @@ import {
21
21
  persistToolRound,
22
22
  rebuildExtraMessages,
23
23
  setupProxy
24
- } from "./chunk-XSIVGDCN.js";
24
+ } from "./chunk-G5AISHJE.js";
25
25
  import {
26
26
  AuthManager
27
27
  } from "./chunk-BYNY5JPB.js";
@@ -42,7 +42,7 @@ import {
42
42
  spawnAgentContext,
43
43
  truncateOutput,
44
44
  undoStack
45
- } from "./chunk-BVLQ3FRA.js";
45
+ } from "./chunk-Q5QSCO5D.js";
46
46
  import "./chunk-4BKXL7SM.js";
47
47
  import {
48
48
  AGENTIC_BEHAVIOR_GUIDELINE,
@@ -62,7 +62,7 @@ import {
62
62
  SKILLS_DIR_NAME,
63
63
  VERSION,
64
64
  buildUserIdentityPrompt
65
- } from "./chunk-F44OKMB2.js";
65
+ } from "./chunk-3LCVJ4AF.js";
66
66
 
67
67
  // src/web/server.ts
68
68
  import express from "express";
@@ -1946,7 +1946,7 @@ ${undoResults.map((r) => ` \u2022 ${r}`).join("\n")}` });
1946
1946
  case "test": {
1947
1947
  this.send({ type: "info", message: "\u{1F9EA} Running tests..." });
1948
1948
  try {
1949
- const { executeTests } = await import("./run-tests-NUF7CNM4.js");
1949
+ const { executeTests } = await import("./run-tests-WD53PYVA.js");
1950
1950
  const argStr = args.join(" ").trim();
1951
1951
  let testArgs = {};
1952
1952
  if (argStr) {
@@ -4,11 +4,11 @@ import {
4
4
  getDangerLevel,
5
5
  googleSearchContext,
6
6
  truncateOutput
7
- } from "./chunk-BVLQ3FRA.js";
7
+ } from "./chunk-Q5QSCO5D.js";
8
8
  import "./chunk-4BKXL7SM.js";
9
9
  import {
10
10
  SUBAGENT_ALLOWED_TOOLS
11
- } from "./chunk-F44OKMB2.js";
11
+ } from "./chunk-3LCVJ4AF.js";
12
12
 
13
13
  // src/hub/task-orchestrator.ts
14
14
  import { createInterface } from "readline";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "jinzd-ai-cli",
3
- "version": "0.4.67",
3
+ "version": "0.4.68",
4
4
  "description": "Cross-platform REPL-style AI CLI with multi-provider support",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",