jinzd-ai-cli 0.4.54 → 0.4.55
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-FOFQAEU6.js → chunk-DJ342VFS.js} +1 -1
- package/dist/{chunk-6FYFVPVE.js → chunk-JL5NK6AR.js} +216 -67
- package/dist/{chunk-NP5KZVP6.js → chunk-W7QVBFIJ.js} +1 -1
- package/dist/{chunk-TAR67QTH.js → chunk-YQEIQJ6K.js} +1 -1
- package/dist/{hub-6V54V4O3.js → hub-AUWP4SWJ.js} +1 -1
- package/dist/index.js +103 -35
- package/dist/{run-tests-6G65OGSL.js → run-tests-I6UDHVIS.js} +1 -1
- package/dist/{run-tests-P53FNUJY.js → run-tests-X4PCLXA2.js} +1 -1
- package/dist/{server-BQHIMEBH.js → server-YPAZWGUE.js} +73 -28
- package/dist/{task-orchestrator-TSY7CJE6.js → task-orchestrator-MWO6A4KQ.js} +2 -2
- package/dist/web/client/app.js +10 -1
- package/package.json +1 -1
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
ProviderNotFoundError,
|
|
8
8
|
RateLimitError,
|
|
9
9
|
schemaToJsonSchema
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-YQEIQJ6K.js";
|
|
11
11
|
import {
|
|
12
12
|
APP_NAME,
|
|
13
13
|
CONFIG_DIR_NAME,
|
|
@@ -20,7 +20,7 @@ import {
|
|
|
20
20
|
MCP_TOOL_PREFIX,
|
|
21
21
|
PLUGINS_DIR_NAME,
|
|
22
22
|
VERSION
|
|
23
|
-
} from "./chunk-
|
|
23
|
+
} from "./chunk-W7QVBFIJ.js";
|
|
24
24
|
|
|
25
25
|
// src/config/config-manager.ts
|
|
26
26
|
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
|
|
@@ -312,6 +312,7 @@ var BaseProvider = class {
|
|
|
312
312
|
};
|
|
313
313
|
|
|
314
314
|
// src/providers/claude.ts
|
|
315
|
+
var CACHE_MIN_SYSTEM_CHARS = 2e3;
|
|
315
316
|
var ClaudeProvider = class extends BaseProvider {
|
|
316
317
|
client;
|
|
317
318
|
info = {
|
|
@@ -382,6 +383,52 @@ var ClaudeProvider = class extends BaseProvider {
|
|
|
382
383
|
}
|
|
383
384
|
return blocks.length > 0 ? blocks : "";
|
|
384
385
|
}
|
|
386
|
+
/**
|
|
387
|
+
* Build a cacheable system prompt payload.
|
|
388
|
+
* When the prompt is long enough to be worth caching, return an array with a
|
|
389
|
+
* single text block carrying `cache_control: { type: 'ephemeral' }`. This caches
|
|
390
|
+
* system + memory + context files across every request in an agentic loop.
|
|
391
|
+
* Short prompts pass through as a plain string (no caching overhead).
|
|
392
|
+
*/
|
|
393
|
+
buildSystemParam(systemPrompt) {
|
|
394
|
+
if (!systemPrompt) return void 0;
|
|
395
|
+
if (systemPrompt.length < CACHE_MIN_SYSTEM_CHARS) return systemPrompt;
|
|
396
|
+
return [
|
|
397
|
+
{
|
|
398
|
+
type: "text",
|
|
399
|
+
text: systemPrompt,
|
|
400
|
+
cache_control: { type: "ephemeral" }
|
|
401
|
+
}
|
|
402
|
+
];
|
|
403
|
+
}
|
|
404
|
+
/**
|
|
405
|
+
* Mark the last tool definition with `cache_control: ephemeral` so the entire
|
|
406
|
+
* tool block (all 24+ tools) is cached together. Anthropic caches everything
|
|
407
|
+
* up to and including a cache breakpoint, so one marker covers all tools.
|
|
408
|
+
* Returns a new array — does not mutate the input.
|
|
409
|
+
*/
|
|
410
|
+
addToolsCacheControl(tools) {
|
|
411
|
+
if (tools.length === 0) return tools;
|
|
412
|
+
const last = tools[tools.length - 1];
|
|
413
|
+
return [
|
|
414
|
+
...tools.slice(0, -1),
|
|
415
|
+
{ ...last, cache_control: { type: "ephemeral" } }
|
|
416
|
+
];
|
|
417
|
+
}
|
|
418
|
+
/** Extract usage (including cache fields) from an Anthropic response. */
|
|
419
|
+
extractUsage(u) {
|
|
420
|
+
const usage = {
|
|
421
|
+
inputTokens: u.input_tokens,
|
|
422
|
+
outputTokens: u.output_tokens
|
|
423
|
+
};
|
|
424
|
+
if (u.cache_creation_input_tokens != null && u.cache_creation_input_tokens > 0) {
|
|
425
|
+
usage.cacheCreationTokens = u.cache_creation_input_tokens;
|
|
426
|
+
}
|
|
427
|
+
if (u.cache_read_input_tokens != null && u.cache_read_input_tokens > 0) {
|
|
428
|
+
usage.cacheReadTokens = u.cache_read_input_tokens;
|
|
429
|
+
}
|
|
430
|
+
return usage;
|
|
431
|
+
}
|
|
385
432
|
/**
|
|
386
433
|
* 构建 Extended Thinking 参数。
|
|
387
434
|
* - thinking 启用时 temperature 必须为 1 或不设置(Anthropic API 要求)
|
|
@@ -432,7 +479,7 @@ var ClaudeProvider = class extends BaseProvider {
|
|
|
432
479
|
const response = await this.client.messages.create({
|
|
433
480
|
model: request.model,
|
|
434
481
|
messages,
|
|
435
|
-
system: request.systemPrompt,
|
|
482
|
+
system: this.buildSystemParam(request.systemPrompt),
|
|
436
483
|
max_tokens: request.maxTokens ?? 8192,
|
|
437
484
|
temperature,
|
|
438
485
|
thinking
|
|
@@ -441,10 +488,7 @@ var ClaudeProvider = class extends BaseProvider {
|
|
|
441
488
|
return {
|
|
442
489
|
content,
|
|
443
490
|
model: response.model,
|
|
444
|
-
usage:
|
|
445
|
-
inputTokens: response.usage.input_tokens,
|
|
446
|
-
outputTokens: response.usage.output_tokens
|
|
447
|
-
}
|
|
491
|
+
usage: this.extractUsage(response.usage)
|
|
448
492
|
};
|
|
449
493
|
} catch (err) {
|
|
450
494
|
throw this.wrapError(err);
|
|
@@ -460,7 +504,7 @@ var ClaudeProvider = class extends BaseProvider {
|
|
|
460
504
|
const stream = this.client.messages.stream({
|
|
461
505
|
model: request.model,
|
|
462
506
|
messages,
|
|
463
|
-
system: request.systemPrompt,
|
|
507
|
+
system: this.buildSystemParam(request.systemPrompt),
|
|
464
508
|
max_tokens: request.maxTokens ?? 8192,
|
|
465
509
|
temperature,
|
|
466
510
|
thinking
|
|
@@ -493,20 +537,22 @@ var ClaudeProvider = class extends BaseProvider {
|
|
|
493
537
|
}
|
|
494
538
|
async chatWithTools(request, tools) {
|
|
495
539
|
try {
|
|
496
|
-
const anthropicTools =
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
Object.
|
|
503
|
-
key,
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
540
|
+
const anthropicTools = this.addToolsCacheControl(
|
|
541
|
+
tools.map((t) => ({
|
|
542
|
+
name: t.name,
|
|
543
|
+
description: t.description,
|
|
544
|
+
input_schema: {
|
|
545
|
+
type: "object",
|
|
546
|
+
properties: Object.fromEntries(
|
|
547
|
+
Object.entries(t.parameters).map(([key, schema]) => [
|
|
548
|
+
key,
|
|
549
|
+
schemaToJsonSchema(schema)
|
|
550
|
+
])
|
|
551
|
+
),
|
|
552
|
+
required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
|
|
553
|
+
}
|
|
554
|
+
}))
|
|
555
|
+
);
|
|
510
556
|
const baseMessages = request.messages.filter((m) => m.role !== "system").map((m) => ({ role: m.role, content: this.contentToClaudeParts(m.content) }));
|
|
511
557
|
const extraMessages = request._extraMessages ?? [];
|
|
512
558
|
const allMessages = [...baseMessages, ...extraMessages];
|
|
@@ -515,15 +561,12 @@ var ClaudeProvider = class extends BaseProvider {
|
|
|
515
561
|
model: request.model,
|
|
516
562
|
messages: allMessages,
|
|
517
563
|
tools: anthropicTools,
|
|
518
|
-
system: request.systemPrompt,
|
|
564
|
+
system: this.buildSystemParam(request.systemPrompt),
|
|
519
565
|
max_tokens: request.maxTokens ?? 8192,
|
|
520
566
|
temperature,
|
|
521
567
|
thinking
|
|
522
568
|
});
|
|
523
|
-
const usage =
|
|
524
|
-
inputTokens: response.usage.input_tokens,
|
|
525
|
-
outputTokens: response.usage.output_tokens
|
|
526
|
-
};
|
|
569
|
+
const usage = this.extractUsage(response.usage);
|
|
527
570
|
const toolUseBlocks = response.content.filter(
|
|
528
571
|
(b) => b.type === "tool_use"
|
|
529
572
|
);
|
|
@@ -547,20 +590,22 @@ var ClaudeProvider = class extends BaseProvider {
|
|
|
547
590
|
* 同时收集原始 content blocks 供 buildToolResultMessages 使用。
|
|
548
591
|
*/
|
|
549
592
|
async *chatWithToolsStream(request, tools) {
|
|
550
|
-
const anthropicTools =
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
Object.
|
|
557
|
-
key,
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
593
|
+
const anthropicTools = this.addToolsCacheControl(
|
|
594
|
+
tools.map((t) => ({
|
|
595
|
+
name: t.name,
|
|
596
|
+
description: t.description,
|
|
597
|
+
input_schema: {
|
|
598
|
+
type: "object",
|
|
599
|
+
properties: Object.fromEntries(
|
|
600
|
+
Object.entries(t.parameters).map(([key, schema]) => [
|
|
601
|
+
key,
|
|
602
|
+
schemaToJsonSchema(schema)
|
|
603
|
+
])
|
|
604
|
+
),
|
|
605
|
+
required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
|
|
606
|
+
}
|
|
607
|
+
}))
|
|
608
|
+
);
|
|
564
609
|
const baseMessages = request.messages.filter((m) => m.role !== "system").map((m) => ({ role: m.role, content: this.contentToClaudeParts(m.content) }));
|
|
565
610
|
const extraMessages = request._extraMessages ?? [];
|
|
566
611
|
const allMessages = [...baseMessages, ...extraMessages];
|
|
@@ -572,7 +617,7 @@ var ClaudeProvider = class extends BaseProvider {
|
|
|
572
617
|
model: request.model,
|
|
573
618
|
messages: allMessages,
|
|
574
619
|
tools: anthropicTools,
|
|
575
|
-
system: request.systemPrompt,
|
|
620
|
+
system: this.buildSystemParam(request.systemPrompt),
|
|
576
621
|
max_tokens: request.maxTokens ?? 8192,
|
|
577
622
|
temperature,
|
|
578
623
|
thinking
|
|
@@ -580,7 +625,13 @@ var ClaudeProvider = class extends BaseProvider {
|
|
|
580
625
|
let currentBlockType = null;
|
|
581
626
|
let currentToolIndex = 0;
|
|
582
627
|
let currentBlockData = {};
|
|
628
|
+
let startUsage = null;
|
|
583
629
|
for await (const event of stream) {
|
|
630
|
+
if (event.type === "message_start") {
|
|
631
|
+
const msgUsage = event.message?.usage;
|
|
632
|
+
if (msgUsage) startUsage = msgUsage;
|
|
633
|
+
continue;
|
|
634
|
+
}
|
|
584
635
|
if (event.type === "content_block_start") {
|
|
585
636
|
const block = event.content_block;
|
|
586
637
|
currentBlockType = block.type;
|
|
@@ -641,15 +692,17 @@ var ClaudeProvider = class extends BaseProvider {
|
|
|
641
692
|
currentBlockType = null;
|
|
642
693
|
currentBlockData = {};
|
|
643
694
|
} else if (event.type === "message_delta") {
|
|
644
|
-
const
|
|
645
|
-
if (
|
|
695
|
+
const deltaUsage = event.usage;
|
|
696
|
+
if (deltaUsage) {
|
|
646
697
|
doneEmitted = true;
|
|
647
698
|
yield {
|
|
648
699
|
type: "done",
|
|
649
|
-
usage: {
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
700
|
+
usage: this.extractUsage({
|
|
701
|
+
input_tokens: startUsage?.input_tokens ?? deltaUsage.input_tokens ?? 0,
|
|
702
|
+
output_tokens: deltaUsage.output_tokens ?? 0,
|
|
703
|
+
cache_creation_input_tokens: startUsage?.cache_creation_input_tokens,
|
|
704
|
+
cache_read_input_tokens: startUsage?.cache_read_input_tokens
|
|
705
|
+
}),
|
|
653
706
|
rawContent: rawContentBlocks
|
|
654
707
|
};
|
|
655
708
|
}
|
|
@@ -1003,6 +1056,16 @@ Node.js does not automatically use system proxies. Try one of the following:
|
|
|
1003
1056
|
|
|
1004
1057
|
// src/providers/openai-compatible.ts
|
|
1005
1058
|
import OpenAI from "openai";
|
|
1059
|
+
function toUsage(u) {
|
|
1060
|
+
if (!u) return void 0;
|
|
1061
|
+
const cached = u.prompt_tokens_details?.cached_tokens ?? 0;
|
|
1062
|
+
const usage = {
|
|
1063
|
+
inputTokens: Math.max(0, u.prompt_tokens - cached),
|
|
1064
|
+
outputTokens: u.completion_tokens
|
|
1065
|
+
};
|
|
1066
|
+
if (cached > 0) usage.cacheReadTokens = cached;
|
|
1067
|
+
return usage;
|
|
1068
|
+
}
|
|
1006
1069
|
var OpenAICompatibleProvider = class extends BaseProvider {
|
|
1007
1070
|
client;
|
|
1008
1071
|
defaultTimeout = 6e4;
|
|
@@ -1056,10 +1119,7 @@ var OpenAICompatibleProvider = class extends BaseProvider {
|
|
|
1056
1119
|
return {
|
|
1057
1120
|
content: firstChoice.message.content ?? "",
|
|
1058
1121
|
model: response.model,
|
|
1059
|
-
usage: response.usage
|
|
1060
|
-
inputTokens: response.usage.prompt_tokens,
|
|
1061
|
-
outputTokens: response.usage.completion_tokens
|
|
1062
|
-
} : void 0
|
|
1122
|
+
usage: toUsage(response.usage)
|
|
1063
1123
|
};
|
|
1064
1124
|
} catch (err) {
|
|
1065
1125
|
throw this.wrapError(err);
|
|
@@ -1088,10 +1148,7 @@ var OpenAICompatibleProvider = class extends BaseProvider {
|
|
|
1088
1148
|
yield {
|
|
1089
1149
|
delta: "",
|
|
1090
1150
|
done: true,
|
|
1091
|
-
usage:
|
|
1092
|
-
inputTokens: chunk.usage.prompt_tokens,
|
|
1093
|
-
outputTokens: chunk.usage.completion_tokens
|
|
1094
|
-
}
|
|
1151
|
+
usage: toUsage(chunk.usage)
|
|
1095
1152
|
};
|
|
1096
1153
|
continue;
|
|
1097
1154
|
}
|
|
@@ -1159,10 +1216,7 @@ var OpenAICompatibleProvider = class extends BaseProvider {
|
|
|
1159
1216
|
return { content: "", usage: void 0 };
|
|
1160
1217
|
}
|
|
1161
1218
|
const message = firstChoice.message;
|
|
1162
|
-
const usage = response.usage
|
|
1163
|
-
inputTokens: response.usage.prompt_tokens,
|
|
1164
|
-
outputTokens: response.usage.completion_tokens
|
|
1165
|
-
} : void 0;
|
|
1219
|
+
const usage = toUsage(response.usage);
|
|
1166
1220
|
const reasoningContent = message.reasoning_content;
|
|
1167
1221
|
if (message.tool_calls && message.tool_calls.length > 0) {
|
|
1168
1222
|
const toolCalls = message.tool_calls.map((tc) => {
|
|
@@ -1275,10 +1329,7 @@ var OpenAICompatibleProvider = class extends BaseProvider {
|
|
|
1275
1329
|
}
|
|
1276
1330
|
yield {
|
|
1277
1331
|
type: "done",
|
|
1278
|
-
usage:
|
|
1279
|
-
inputTokens: chunk.usage.prompt_tokens,
|
|
1280
|
-
outputTokens: chunk.usage.completion_tokens
|
|
1281
|
-
}
|
|
1332
|
+
usage: toUsage(chunk.usage)
|
|
1282
1333
|
};
|
|
1283
1334
|
continue;
|
|
1284
1335
|
}
|
|
@@ -2331,7 +2382,12 @@ var Session = class _Session {
|
|
|
2331
2382
|
updated;
|
|
2332
2383
|
messages = [];
|
|
2333
2384
|
title;
|
|
2334
|
-
tokenUsage = {
|
|
2385
|
+
tokenUsage = {
|
|
2386
|
+
inputTokens: 0,
|
|
2387
|
+
outputTokens: 0,
|
|
2388
|
+
cacheCreationTokens: 0,
|
|
2389
|
+
cacheReadTokens: 0
|
|
2390
|
+
};
|
|
2335
2391
|
checkpoints = [];
|
|
2336
2392
|
constructor(id, provider, model) {
|
|
2337
2393
|
this.id = id;
|
|
@@ -2359,11 +2415,18 @@ var Session = class _Session {
|
|
|
2359
2415
|
addTokenUsage(usage) {
|
|
2360
2416
|
this.tokenUsage.inputTokens += usage.inputTokens;
|
|
2361
2417
|
this.tokenUsage.outputTokens += usage.outputTokens;
|
|
2418
|
+
this.tokenUsage.cacheCreationTokens += usage.cacheCreationTokens ?? 0;
|
|
2419
|
+
this.tokenUsage.cacheReadTokens += usage.cacheReadTokens ?? 0;
|
|
2362
2420
|
}
|
|
2363
2421
|
clear() {
|
|
2364
2422
|
this.messages = [];
|
|
2365
2423
|
this.title = void 0;
|
|
2366
|
-
this.tokenUsage = {
|
|
2424
|
+
this.tokenUsage = {
|
|
2425
|
+
inputTokens: 0,
|
|
2426
|
+
outputTokens: 0,
|
|
2427
|
+
cacheCreationTokens: 0,
|
|
2428
|
+
cacheReadTokens: 0
|
|
2429
|
+
};
|
|
2367
2430
|
this.updated = /* @__PURE__ */ new Date();
|
|
2368
2431
|
}
|
|
2369
2432
|
/**
|
|
@@ -2491,7 +2554,9 @@ var Session = class _Session {
|
|
|
2491
2554
|
if (tu && typeof tu === "object") {
|
|
2492
2555
|
session.tokenUsage = {
|
|
2493
2556
|
inputTokens: typeof tu.inputTokens === "number" ? tu.inputTokens : 0,
|
|
2494
|
-
outputTokens: typeof tu.outputTokens === "number" ? tu.outputTokens : 0
|
|
2557
|
+
outputTokens: typeof tu.outputTokens === "number" ? tu.outputTokens : 0,
|
|
2558
|
+
cacheCreationTokens: typeof tu.cacheCreationTokens === "number" ? tu.cacheCreationTokens : 0,
|
|
2559
|
+
cacheReadTokens: typeof tu.cacheReadTokens === "number" ? tu.cacheReadTokens : 0
|
|
2495
2560
|
};
|
|
2496
2561
|
}
|
|
2497
2562
|
if (Array.isArray(d.checkpoints)) {
|
|
@@ -3495,6 +3560,87 @@ async function setupProxy(configProxy) {
|
|
|
3495
3560
|
}
|
|
3496
3561
|
}
|
|
3497
3562
|
|
|
3563
|
+
// src/core/pricing.ts
|
|
3564
|
+
var PRICING_TABLE = {
|
|
3565
|
+
// ── Anthropic Claude ──────────────────────────────────────────
|
|
3566
|
+
"claude-opus-4-6": { input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.5 },
|
|
3567
|
+
"claude-opus-4-5": { input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.5 },
|
|
3568
|
+
"claude-sonnet-4-6": { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.3 },
|
|
3569
|
+
"claude-sonnet-4-5-20250929": { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.3 },
|
|
3570
|
+
"claude-haiku-4-5-20251001": { input: 1, output: 5, cacheWrite: 1.25, cacheRead: 0.1 },
|
|
3571
|
+
"claude-haiku-4-5": { input: 1, output: 5, cacheWrite: 1.25, cacheRead: 0.1 },
|
|
3572
|
+
// Legacy Claude 3.x families (prefix fallback handles minor date suffixes)
|
|
3573
|
+
"claude-3-5-sonnet": { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.3 },
|
|
3574
|
+
"claude-3-5-haiku": { input: 0.8, output: 4, cacheWrite: 1, cacheRead: 0.08 },
|
|
3575
|
+
"claude-3-opus": { input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.5 },
|
|
3576
|
+
// ── OpenAI ────────────────────────────────────────────────────
|
|
3577
|
+
"gpt-4o": { input: 2.5, output: 10, cacheRead: 1.25 },
|
|
3578
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6, cacheRead: 0.075 },
|
|
3579
|
+
"gpt-4-turbo": { input: 10, output: 30 },
|
|
3580
|
+
"gpt-4": { input: 30, output: 60 },
|
|
3581
|
+
"gpt-4.1": { input: 2, output: 8, cacheRead: 0.5 },
|
|
3582
|
+
"gpt-4.1-mini": { input: 0.4, output: 1.6, cacheRead: 0.1 },
|
|
3583
|
+
"gpt-4.1-nano": { input: 0.1, output: 0.4, cacheRead: 0.025 },
|
|
3584
|
+
"o1": { input: 15, output: 60, cacheRead: 7.5 },
|
|
3585
|
+
"o1-mini": { input: 3, output: 12, cacheRead: 1.5 },
|
|
3586
|
+
"o3": { input: 10, output: 40, cacheRead: 2.5 },
|
|
3587
|
+
"o3-mini": { input: 1.1, output: 4.4, cacheRead: 0.55 },
|
|
3588
|
+
// ── Google Gemini ─────────────────────────────────────────────
|
|
3589
|
+
"gemini-2.5-pro": { input: 1.25, output: 10 },
|
|
3590
|
+
"gemini-2.5-flash": { input: 0.3, output: 2.5 },
|
|
3591
|
+
"gemini-2.0-flash": { input: 0.1, output: 0.4 },
|
|
3592
|
+
"gemini-1.5-pro": { input: 1.25, output: 5 },
|
|
3593
|
+
"gemini-1.5-flash": { input: 0.075, output: 0.3 },
|
|
3594
|
+
// ── DeepSeek ──────────────────────────────────────────────────
|
|
3595
|
+
"deepseek-chat": { input: 0.27, output: 1.1, cacheRead: 0.07 },
|
|
3596
|
+
"deepseek-reasoner": { input: 0.55, output: 2.19, cacheRead: 0.14 },
|
|
3597
|
+
"deepseek-v3": { input: 0.27, output: 1.1, cacheRead: 0.07 },
|
|
3598
|
+
// ── Moonshot Kimi ─────────────────────────────────────────────
|
|
3599
|
+
"moonshot-v1-8k": { input: 0.17, output: 0.17 },
|
|
3600
|
+
"moonshot-v1-32k": { input: 0.33, output: 0.33 },
|
|
3601
|
+
"moonshot-v1-128k": { input: 0.83, output: 0.83 },
|
|
3602
|
+
"kimi-k2": { input: 0.6, output: 2.5 },
|
|
3603
|
+
"kimi-latest": { input: 0.6, output: 2.5 },
|
|
3604
|
+
// ── Zhipu GLM ─────────────────────────────────────────────────
|
|
3605
|
+
"glm-4-plus": { input: 0.7, output: 0.7 },
|
|
3606
|
+
"glm-4": { input: 0.14, output: 0.14 },
|
|
3607
|
+
"glm-4-flash": { input: 0, output: 0 },
|
|
3608
|
+
"glm-4.5": { input: 0.29, output: 1.14 },
|
|
3609
|
+
"glm-4.6": { input: 0.6, output: 2.2 }
|
|
3610
|
+
// ── OpenRouter (pass-through — actual cost depends on underlying model) ──
|
|
3611
|
+
// Left empty; callers should resolve via underlying model ID.
|
|
3612
|
+
// ── Ollama (local, zero cost) ─────────────────────────────────
|
|
3613
|
+
// Handled via provider check below.
|
|
3614
|
+
};
|
|
3615
|
+
var FREE_PROVIDERS = /* @__PURE__ */ new Set(["ollama"]);
|
|
3616
|
+
function getPricing(provider, model) {
|
|
3617
|
+
if (FREE_PROVIDERS.has(provider.toLowerCase())) {
|
|
3618
|
+
return { input: 0, output: 0 };
|
|
3619
|
+
}
|
|
3620
|
+
const key = model.toLowerCase();
|
|
3621
|
+
if (PRICING_TABLE[key]) return PRICING_TABLE[key];
|
|
3622
|
+
const keys = Object.keys(PRICING_TABLE).sort((a, b) => b.length - a.length);
|
|
3623
|
+
for (const k of keys) {
|
|
3624
|
+
if (key.startsWith(k)) return PRICING_TABLE[k];
|
|
3625
|
+
}
|
|
3626
|
+
return null;
|
|
3627
|
+
}
|
|
3628
|
+
function computeCost(provider, model, usage) {
|
|
3629
|
+
const p = getPricing(provider, model);
|
|
3630
|
+
if (!p) return null;
|
|
3631
|
+
const input = usage.inputTokens * p.input;
|
|
3632
|
+
const output = usage.outputTokens * p.output;
|
|
3633
|
+
const cacheWrite = (usage.cacheCreationTokens ?? 0) * (p.cacheWrite ?? p.input);
|
|
3634
|
+
const cacheRead = (usage.cacheReadTokens ?? 0) * (p.cacheRead ?? p.input);
|
|
3635
|
+
return (input + output + cacheWrite + cacheRead) / 1e6;
|
|
3636
|
+
}
|
|
3637
|
+
function formatCost(amount) {
|
|
3638
|
+
if (amount === 0) return "$0.0000";
|
|
3639
|
+
if (amount < 0.01) return `$${amount.toFixed(4)}`;
|
|
3640
|
+
if (amount < 1) return `$${amount.toFixed(3)}`;
|
|
3641
|
+
return `$${amount.toFixed(2)}`;
|
|
3642
|
+
}
|
|
3643
|
+
|
|
3498
3644
|
// src/repl/dev-state.ts
|
|
3499
3645
|
import { existsSync as existsSync5, readFileSync as readFileSync4, writeFileSync as writeFileSync3, unlinkSync as unlinkSync2, mkdirSync as mkdirSync4 } from "fs";
|
|
3500
3646
|
import { join as join5 } from "path";
|
|
@@ -3601,6 +3747,9 @@ export {
|
|
|
3601
3747
|
getGitRoot,
|
|
3602
3748
|
getGitContext,
|
|
3603
3749
|
formatGitContextForPrompt,
|
|
3750
|
+
getPricing,
|
|
3751
|
+
computeCost,
|
|
3752
|
+
formatCost,
|
|
3604
3753
|
parseSimpleYaml,
|
|
3605
3754
|
SNAPSHOT_PROMPT,
|
|
3606
3755
|
sessionHasMeaningfulContent,
|
|
@@ -385,7 +385,7 @@ ${content}`);
|
|
|
385
385
|
}
|
|
386
386
|
}
|
|
387
387
|
async function runTaskMode(config, providers, configManager, topic) {
|
|
388
|
-
const { TaskOrchestrator } = await import("./task-orchestrator-
|
|
388
|
+
const { TaskOrchestrator } = await import("./task-orchestrator-MWO6A4KQ.js");
|
|
389
389
|
const orchestrator = new TaskOrchestrator(config, providers, configManager);
|
|
390
390
|
let interrupted = false;
|
|
391
391
|
const onSigint = () => {
|
package/dist/index.js
CHANGED
|
@@ -11,20 +11,23 @@ import {
|
|
|
11
11
|
buildPhantomCorrectionMessage,
|
|
12
12
|
buildWriteRoundReminder,
|
|
13
13
|
clearDevState,
|
|
14
|
+
computeCost,
|
|
14
15
|
detectsHallucinatedFileOp,
|
|
15
16
|
extractWrittenFilePaths,
|
|
16
17
|
findPhantomClaims,
|
|
18
|
+
formatCost,
|
|
17
19
|
formatGitContextForPrompt,
|
|
18
20
|
getContentText,
|
|
19
21
|
getGitContext,
|
|
20
22
|
getGitRoot,
|
|
23
|
+
getPricing,
|
|
21
24
|
hadPreviousWriteToolCalls,
|
|
22
25
|
loadDevState,
|
|
23
26
|
parseSimpleYaml,
|
|
24
27
|
saveDevState,
|
|
25
28
|
sessionHasMeaningfulContent,
|
|
26
29
|
setupProxy
|
|
27
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-JL5NK6AR.js";
|
|
28
31
|
import {
|
|
29
32
|
ToolExecutor,
|
|
30
33
|
ToolRegistry,
|
|
@@ -38,7 +41,7 @@ import {
|
|
|
38
41
|
spawnAgentContext,
|
|
39
42
|
theme,
|
|
40
43
|
undoStack
|
|
41
|
-
} from "./chunk-
|
|
44
|
+
} from "./chunk-YQEIQJ6K.js";
|
|
42
45
|
import {
|
|
43
46
|
fileCheckpoints
|
|
44
47
|
} from "./chunk-4BKXL7SM.js";
|
|
@@ -63,7 +66,7 @@ import {
|
|
|
63
66
|
SKILLS_DIR_NAME,
|
|
64
67
|
VERSION,
|
|
65
68
|
buildUserIdentityPrompt
|
|
66
|
-
} from "./chunk-
|
|
69
|
+
} from "./chunk-W7QVBFIJ.js";
|
|
67
70
|
|
|
68
71
|
// src/index.ts
|
|
69
72
|
import { program } from "commander";
|
|
@@ -487,8 +490,12 @@ Error${typeName}: ${lines.join("\n")}
|
|
|
487
490
|
renderUsage(usage, sessionTotal) {
|
|
488
491
|
const total = usage.inputTokens + usage.outputTokens;
|
|
489
492
|
let line = theme.dim("\u{1F4CA} ") + theme.dim(`in ${usage.inputTokens.toLocaleString()}`) + theme.dim(" + ") + theme.dim(`out ${usage.outputTokens.toLocaleString()}`) + theme.dim(` = ${total.toLocaleString()} tokens`);
|
|
493
|
+
const cacheRead = usage.cacheReadTokens ?? 0;
|
|
494
|
+
if (cacheRead > 0) {
|
|
495
|
+
line += theme.dim(` \u2502 cache: ${cacheRead.toLocaleString()}`);
|
|
496
|
+
}
|
|
490
497
|
if (sessionTotal) {
|
|
491
|
-
const sessionSum = sessionTotal.inputTokens + sessionTotal.outputTokens;
|
|
498
|
+
const sessionSum = sessionTotal.inputTokens + sessionTotal.outputTokens + (sessionTotal.cacheCreationTokens ?? 0) + (sessionTotal.cacheReadTokens ?? 0);
|
|
492
499
|
line += theme.dim(` \u2502 session total: ${sessionSum.toLocaleString()}`);
|
|
493
500
|
}
|
|
494
501
|
process.stdout.write(line + "\n\n");
|
|
@@ -1217,11 +1224,18 @@ function createDefaultCommands() {
|
|
|
1217
1224
|
if (sys) {
|
|
1218
1225
|
console.log(` System : ${sys.slice(0, 60)}...`);
|
|
1219
1226
|
}
|
|
1220
|
-
const
|
|
1227
|
+
const cacheRead = tokenUsage.cacheReadTokens ?? 0;
|
|
1228
|
+
const cacheCreate = tokenUsage.cacheCreationTokens ?? 0;
|
|
1229
|
+
const totalTokens = tokenUsage.inputTokens + tokenUsage.outputTokens + cacheRead + cacheCreate;
|
|
1221
1230
|
if (totalTokens > 0) {
|
|
1231
|
+
const cacheSuffix = cacheRead > 0 || cacheCreate > 0 ? ` [cache: +${cacheCreate.toLocaleString()} / -${cacheRead.toLocaleString()}]` : "";
|
|
1222
1232
|
console.log(
|
|
1223
|
-
` Tokens : in ${tokenUsage.inputTokens.toLocaleString()} + out ${tokenUsage.outputTokens.toLocaleString()} = ${totalTokens.toLocaleString()}
|
|
1233
|
+
` Tokens : in ${tokenUsage.inputTokens.toLocaleString()} + out ${tokenUsage.outputTokens.toLocaleString()} = ${totalTokens.toLocaleString()}${cacheSuffix}`
|
|
1224
1234
|
);
|
|
1235
|
+
const cost = computeCost(ctx.getCurrentProvider(), ctx.getCurrentModel(), tokenUsage);
|
|
1236
|
+
if (cost != null) {
|
|
1237
|
+
console.log(` Cost : ${formatCost(cost)} (session total)`);
|
|
1238
|
+
}
|
|
1225
1239
|
}
|
|
1226
1240
|
const ctxWindowSize = ctx.getContextWindowSize();
|
|
1227
1241
|
if (ctxWindowSize > 0) {
|
|
@@ -1823,7 +1837,7 @@ ${hint}` : "")
|
|
|
1823
1837
|
},
|
|
1824
1838
|
{
|
|
1825
1839
|
name: "cost",
|
|
1826
|
-
description: "Show session token usage
|
|
1840
|
+
description: "Show session token usage, prompt-cache hits, and USD cost",
|
|
1827
1841
|
usage: "/cost [reset]",
|
|
1828
1842
|
execute(args, ctx) {
|
|
1829
1843
|
const sub = args[0]?.toLowerCase();
|
|
@@ -1832,24 +1846,65 @@ ${hint}` : "")
|
|
|
1832
1846
|
ctx.renderer.printSuccess("Session token counters reset.");
|
|
1833
1847
|
return;
|
|
1834
1848
|
}
|
|
1835
|
-
const
|
|
1836
|
-
const
|
|
1849
|
+
const session = ctx.sessions.current;
|
|
1850
|
+
const usage = session?.tokenUsage ?? {
|
|
1851
|
+
inputTokens: 0,
|
|
1852
|
+
outputTokens: 0,
|
|
1853
|
+
cacheCreationTokens: 0,
|
|
1854
|
+
cacheReadTokens: 0
|
|
1855
|
+
};
|
|
1856
|
+
const cacheCreate = usage.cacheCreationTokens ?? 0;
|
|
1857
|
+
const cacheRead = usage.cacheReadTokens ?? 0;
|
|
1858
|
+
const totalTokens = usage.inputTokens + usage.outputTokens + cacheCreate + cacheRead;
|
|
1837
1859
|
if (totalTokens === 0) {
|
|
1838
1860
|
ctx.renderer.printInfo("No token usage recorded this session.");
|
|
1839
1861
|
return;
|
|
1840
1862
|
}
|
|
1863
|
+
const provider = ctx.getCurrentProvider();
|
|
1864
|
+
const model = ctx.getCurrentModel();
|
|
1865
|
+
const cost = computeCost(provider, model, usage);
|
|
1866
|
+
const pricing = getPricing(provider, model);
|
|
1867
|
+
let savings = null;
|
|
1868
|
+
if (cost != null && pricing && cacheRead > 0) {
|
|
1869
|
+
const costWithoutCache = computeCost(provider, model, {
|
|
1870
|
+
inputTokens: usage.inputTokens + cacheRead,
|
|
1871
|
+
outputTokens: usage.outputTokens,
|
|
1872
|
+
cacheCreationTokens: cacheCreate,
|
|
1873
|
+
cacheReadTokens: 0
|
|
1874
|
+
});
|
|
1875
|
+
if (costWithoutCache != null) savings = costWithoutCache - cost;
|
|
1876
|
+
}
|
|
1841
1877
|
console.log();
|
|
1842
|
-
console.log(theme.heading(" \u{
|
|
1843
|
-
console.log(theme.dim(" " + "\u2500".repeat(
|
|
1844
|
-
console.log(theme.dim(" Input
|
|
1845
|
-
console.log(theme.dim(" Output
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1878
|
+
console.log(theme.heading(" \u{1F4B0} Session Cost & Token Usage"));
|
|
1879
|
+
console.log(theme.dim(" " + "\u2500".repeat(48)));
|
|
1880
|
+
console.log(theme.dim(" Input (uncached) : ") + chalk2.white(usage.inputTokens.toLocaleString().padStart(12)));
|
|
1881
|
+
console.log(theme.dim(" Output : ") + chalk2.white(usage.outputTokens.toLocaleString().padStart(12)));
|
|
1882
|
+
if (cacheCreate > 0) {
|
|
1883
|
+
console.log(theme.dim(" Cache write : ") + chalk2.yellow(cacheCreate.toLocaleString().padStart(12)));
|
|
1884
|
+
}
|
|
1885
|
+
if (cacheRead > 0) {
|
|
1886
|
+
const pct = Math.round(cacheRead / (cacheRead + usage.inputTokens) * 100);
|
|
1887
|
+
console.log(
|
|
1888
|
+
theme.dim(" Cache read : ") + chalk2.green(cacheRead.toLocaleString().padStart(12)) + theme.dim(` (${pct}% hit rate)`)
|
|
1889
|
+
);
|
|
1890
|
+
}
|
|
1891
|
+
console.log(theme.dim(" Total tokens : ") + chalk2.bold.white(totalTokens.toLocaleString().padStart(12)));
|
|
1892
|
+
console.log(theme.dim(" " + "\u2500".repeat(48)));
|
|
1893
|
+
if (cost != null) {
|
|
1894
|
+
console.log(theme.dim(" Cost : ") + chalk2.bold.cyan(formatCost(cost).padStart(12)));
|
|
1895
|
+
if (savings != null && savings > 0) {
|
|
1896
|
+
console.log(
|
|
1897
|
+
theme.dim(" Cache savings : ") + chalk2.green(`-${formatCost(savings)}`.padStart(12)) + theme.dim(` (vs no cache)`)
|
|
1898
|
+
);
|
|
1899
|
+
}
|
|
1900
|
+
} else {
|
|
1901
|
+
console.log(theme.dim(" Cost : ") + theme.dim(" \u2014 (pricing unknown)"));
|
|
1902
|
+
}
|
|
1903
|
+
console.log(theme.dim(" " + "\u2500".repeat(48)));
|
|
1849
1904
|
if (session) {
|
|
1850
|
-
console.log(theme.dim(" Provider
|
|
1851
|
-
console.log(theme.dim(" Model
|
|
1852
|
-
console.log(theme.dim(" Messages
|
|
1905
|
+
console.log(theme.dim(" Provider : ") + theme.dim(provider));
|
|
1906
|
+
console.log(theme.dim(" Model : ") + theme.dim(model));
|
|
1907
|
+
console.log(theme.dim(" Messages : ") + theme.dim(String(session.messages.length)));
|
|
1853
1908
|
}
|
|
1854
1909
|
console.log();
|
|
1855
1910
|
}
|
|
@@ -2106,7 +2161,7 @@ ${hint}` : "")
|
|
|
2106
2161
|
usage: "/test [command|filter]",
|
|
2107
2162
|
async execute(args, ctx) {
|
|
2108
2163
|
try {
|
|
2109
|
-
const { executeTests } = await import("./run-tests-
|
|
2164
|
+
const { executeTests } = await import("./run-tests-X4PCLXA2.js");
|
|
2110
2165
|
const argStr = args.join(" ").trim();
|
|
2111
2166
|
let testArgs = {};
|
|
2112
2167
|
if (argStr) {
|
|
@@ -3344,7 +3399,14 @@ var Repl = class {
|
|
|
3344
3399
|
/** 当前加载的层级上下文(全局/项目/子目录) */
|
|
3345
3400
|
contextLayers = [];
|
|
3346
3401
|
/** 本次会话累计 token 用量 */
|
|
3347
|
-
sessionTokenUsage = { inputTokens: 0, outputTokens: 0 };
|
|
3402
|
+
sessionTokenUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
|
|
3403
|
+
/** Fold a single-request TokenUsage (with optional cache fields) into sessionTokenUsage. */
|
|
3404
|
+
addSessionUsage(u) {
|
|
3405
|
+
this.sessionTokenUsage.inputTokens += u.inputTokens;
|
|
3406
|
+
this.sessionTokenUsage.outputTokens += u.outputTokens;
|
|
3407
|
+
this.sessionTokenUsage.cacheCreationTokens += u.cacheCreationTokens ?? 0;
|
|
3408
|
+
this.sessionTokenUsage.cacheReadTokens += u.cacheReadTokens ?? 0;
|
|
3409
|
+
}
|
|
3348
3410
|
/** 启动时检测到的 Git 分支(无 git 仓库时为 null) */
|
|
3349
3411
|
gitBranch = null;
|
|
3350
3412
|
/** MCP 多服务器管理器(无 MCP 配置时为 null) */
|
|
@@ -4607,8 +4669,7 @@ Session '${this.resumeSessionId}' not found.
|
|
|
4607
4669
|
session.addMessage({ role: "assistant", content, timestamp: /* @__PURE__ */ new Date() });
|
|
4608
4670
|
this.events.emit("message.after", { content });
|
|
4609
4671
|
if (usage) {
|
|
4610
|
-
this.
|
|
4611
|
-
this.sessionTokenUsage.outputTokens += usage.outputTokens;
|
|
4672
|
+
this.addSessionUsage(usage);
|
|
4612
4673
|
session.addTokenUsage(usage);
|
|
4613
4674
|
if (showTokens && !tokensShown) {
|
|
4614
4675
|
this.renderer.renderUsage(usage, this.sessionTokenUsage);
|
|
@@ -4637,8 +4698,7 @@ Session '${this.resumeSessionId}' not found.
|
|
|
4637
4698
|
session.addMessage({ role: "assistant", content: response.content, timestamp: /* @__PURE__ */ new Date() });
|
|
4638
4699
|
this.events.emit("message.after", { content: response.content });
|
|
4639
4700
|
if (response.usage) {
|
|
4640
|
-
this.
|
|
4641
|
-
this.sessionTokenUsage.outputTokens += response.usage.outputTokens;
|
|
4701
|
+
this.addSessionUsage(response.usage);
|
|
4642
4702
|
session.addTokenUsage(response.usage);
|
|
4643
4703
|
if (this.shouldShowTokens()) {
|
|
4644
4704
|
this.renderer.renderUsage(response.usage, this.sessionTokenUsage);
|
|
@@ -4797,7 +4857,7 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
|
|
|
4797
4857
|
const modelParams = this.getModelParams();
|
|
4798
4858
|
const useStreaming = this.config.get("ui").streaming;
|
|
4799
4859
|
const spinner = this.renderer.showSpinner("Thinking...");
|
|
4800
|
-
const roundUsage = { inputTokens: 0, outputTokens: 0 };
|
|
4860
|
+
const roundUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
|
|
4801
4861
|
const supportsStreamingTools = useStreaming && typeof provider.chatWithToolsStream === "function";
|
|
4802
4862
|
let consecutiveFreeRounds = 0;
|
|
4803
4863
|
let lastToolCallSignature = "";
|
|
@@ -4904,6 +4964,8 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
|
|
|
4904
4964
|
if (result.usage) {
|
|
4905
4965
|
roundUsage.inputTokens += result.usage.inputTokens;
|
|
4906
4966
|
roundUsage.outputTokens += result.usage.outputTokens;
|
|
4967
|
+
roundUsage.cacheCreationTokens += result.usage.cacheCreationTokens ?? 0;
|
|
4968
|
+
roundUsage.cacheReadTokens += result.usage.cacheReadTokens ?? 0;
|
|
4907
4969
|
}
|
|
4908
4970
|
if ("content" in result) {
|
|
4909
4971
|
const hasWriteTools = toolDefs.some((t) => t.name === "write_file" || t.name === "edit_file");
|
|
@@ -4954,8 +5016,7 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
|
|
|
4954
5016
|
});
|
|
4955
5017
|
this.events.emit("message.after", { content: finalContent });
|
|
4956
5018
|
if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
|
|
4957
|
-
this.
|
|
4958
|
-
this.sessionTokenUsage.outputTokens += roundUsage.outputTokens;
|
|
5019
|
+
this.addSessionUsage(roundUsage);
|
|
4959
5020
|
session.addTokenUsage(roundUsage);
|
|
4960
5021
|
if (this.shouldShowTokens()) {
|
|
4961
5022
|
this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
|
|
@@ -4993,6 +5054,8 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
|
|
|
4993
5054
|
if (genUsage) {
|
|
4994
5055
|
roundUsage.inputTokens += genUsage.inputTokens;
|
|
4995
5056
|
roundUsage.outputTokens += genUsage.outputTokens;
|
|
5057
|
+
roundUsage.cacheCreationTokens += genUsage.cacheCreationTokens ?? 0;
|
|
5058
|
+
roundUsage.cacheReadTokens += genUsage.cacheReadTokens ?? 0;
|
|
4996
5059
|
}
|
|
4997
5060
|
session.addMessage({ role: "assistant", content: genContent, timestamp: /* @__PURE__ */ new Date() });
|
|
4998
5061
|
this.events.emit("message.after", { content: genContent });
|
|
@@ -5007,8 +5070,7 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
|
|
|
5007
5070
|
const newMsgs2 = provider.buildToolResultMessages(result.toolCalls, syntheticResults, reasoningContent2);
|
|
5008
5071
|
extraMessages.push(...newMsgs2);
|
|
5009
5072
|
if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
|
|
5010
|
-
this.
|
|
5011
|
-
this.sessionTokenUsage.outputTokens += roundUsage.outputTokens;
|
|
5073
|
+
this.addSessionUsage(roundUsage);
|
|
5012
5074
|
session.addTokenUsage(roundUsage);
|
|
5013
5075
|
if (teeShowTokens && !teeTokShown) {
|
|
5014
5076
|
this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
|
|
@@ -5204,6 +5266,8 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
|
|
|
5204
5266
|
if (summaryResult.usage) {
|
|
5205
5267
|
roundUsage.inputTokens += summaryResult.usage.inputTokens;
|
|
5206
5268
|
roundUsage.outputTokens += summaryResult.usage.outputTokens;
|
|
5269
|
+
roundUsage.cacheCreationTokens += summaryResult.usage.cacheCreationTokens ?? 0;
|
|
5270
|
+
roundUsage.cacheReadTokens += summaryResult.usage.cacheReadTokens ?? 0;
|
|
5207
5271
|
}
|
|
5208
5272
|
} else {
|
|
5209
5273
|
this.renderer.renderError(
|
|
@@ -5218,8 +5282,7 @@ Tip: You can continue the conversation by asking the AI to proceed.`
|
|
|
5218
5282
|
);
|
|
5219
5283
|
}
|
|
5220
5284
|
if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
|
|
5221
|
-
this.
|
|
5222
|
-
this.sessionTokenUsage.outputTokens += roundUsage.outputTokens;
|
|
5285
|
+
this.addSessionUsage(roundUsage);
|
|
5223
5286
|
session.addTokenUsage(roundUsage);
|
|
5224
5287
|
if (this.shouldShowTokens()) {
|
|
5225
5288
|
this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
|
|
@@ -5316,7 +5379,12 @@ Tip: You can continue the conversation by asking the AI to proceed.`
|
|
|
5316
5379
|
},
|
|
5317
5380
|
getSessionTokenUsage: () => ({ ...this.sessionTokenUsage }),
|
|
5318
5381
|
resetSessionTokenUsage: () => {
|
|
5319
|
-
this.sessionTokenUsage = {
|
|
5382
|
+
this.sessionTokenUsage = {
|
|
5383
|
+
inputTokens: 0,
|
|
5384
|
+
outputTokens: 0,
|
|
5385
|
+
cacheCreationTokens: 0,
|
|
5386
|
+
cacheReadTokens: 0
|
|
5387
|
+
};
|
|
5320
5388
|
},
|
|
5321
5389
|
getGitBranch: () => this.gitBranch,
|
|
5322
5390
|
getLastResponse: () => lastResponseStore.content,
|
|
@@ -5493,7 +5561,7 @@ program.command("web").description("Start Web UI server with browser-based chat
|
|
|
5493
5561
|
console.error("Error: Invalid port number. Must be between 1 and 65535.");
|
|
5494
5562
|
process.exit(1);
|
|
5495
5563
|
}
|
|
5496
|
-
const { startWebServer } = await import("./server-
|
|
5564
|
+
const { startWebServer } = await import("./server-YPAZWGUE.js");
|
|
5497
5565
|
await startWebServer({ port, host: options.host });
|
|
5498
5566
|
});
|
|
5499
5567
|
program.command("user [action] [username]").description("Manage Web UI users (list | create <name> | delete <name> | reset-password <name> | migrate <name>)").action(async (action, username) => {
|
|
@@ -5726,7 +5794,7 @@ program.command("hub [topic]").description("Start multi-agent hub (discuss / bra
|
|
|
5726
5794
|
}),
|
|
5727
5795
|
config.get("customProviders")
|
|
5728
5796
|
);
|
|
5729
|
-
const { startHub } = await import("./hub-
|
|
5797
|
+
const { startHub } = await import("./hub-AUWP4SWJ.js");
|
|
5730
5798
|
await startHub(
|
|
5731
5799
|
{
|
|
5732
5800
|
topic: topic ?? "",
|
|
@@ -7,7 +7,9 @@ import {
|
|
|
7
7
|
SessionManager,
|
|
8
8
|
SkillManager,
|
|
9
9
|
TOOL_CALL_REMINDER,
|
|
10
|
+
computeCost,
|
|
10
11
|
detectsHallucinatedFileOp,
|
|
12
|
+
formatCost,
|
|
11
13
|
formatGitContextForPrompt,
|
|
12
14
|
getContentText,
|
|
13
15
|
getGitContext,
|
|
@@ -15,7 +17,7 @@ import {
|
|
|
15
17
|
hadPreviousWriteToolCalls,
|
|
16
18
|
loadDevState,
|
|
17
19
|
setupProxy
|
|
18
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-JL5NK6AR.js";
|
|
19
21
|
import {
|
|
20
22
|
AuthManager
|
|
21
23
|
} from "./chunk-BYNY5JPB.js";
|
|
@@ -34,7 +36,7 @@ import {
|
|
|
34
36
|
spawnAgentContext,
|
|
35
37
|
truncateOutput,
|
|
36
38
|
undoStack
|
|
37
|
-
} from "./chunk-
|
|
39
|
+
} from "./chunk-YQEIQJ6K.js";
|
|
38
40
|
import "./chunk-4BKXL7SM.js";
|
|
39
41
|
import {
|
|
40
42
|
AGENTIC_BEHAVIOR_GUIDELINE,
|
|
@@ -54,7 +56,7 @@ import {
|
|
|
54
56
|
SKILLS_DIR_NAME,
|
|
55
57
|
VERSION,
|
|
56
58
|
buildUserIdentityPrompt
|
|
57
|
-
} from "./chunk-
|
|
59
|
+
} from "./chunk-W7QVBFIJ.js";
|
|
58
60
|
|
|
59
61
|
// src/web/server.ts
|
|
60
62
|
import express from "express";
|
|
@@ -483,7 +485,17 @@ var SessionHandler = class _SessionHandler {
|
|
|
483
485
|
currentModel;
|
|
484
486
|
planMode = false;
|
|
485
487
|
runtimeThinking = null;
|
|
486
|
-
sessionTokenUsage = { inputTokens: 0, outputTokens: 0 };
|
|
488
|
+
sessionTokenUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
|
|
489
|
+
/** Accumulate a TokenUsage (with optional cache fields) into sessionTokenUsage. */
|
|
490
|
+
addWebSessionUsage(u) {
|
|
491
|
+
this.sessionTokenUsage.inputTokens += u.inputTokens;
|
|
492
|
+
this.sessionTokenUsage.outputTokens += u.outputTokens;
|
|
493
|
+
this.sessionTokenUsage.cacheCreationTokens += u.cacheCreationTokens ?? 0;
|
|
494
|
+
this.sessionTokenUsage.cacheReadTokens += u.cacheReadTokens ?? 0;
|
|
495
|
+
}
|
|
496
|
+
resetWebSessionUsage() {
|
|
497
|
+
this.sessionTokenUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
|
|
498
|
+
}
|
|
487
499
|
abortController = null;
|
|
488
500
|
userInterjection = null;
|
|
489
501
|
processing = false;
|
|
@@ -547,6 +559,7 @@ var SessionHandler = class _SessionHandler {
|
|
|
547
559
|
displayName: p.info.displayName,
|
|
548
560
|
models: p.info.models.map((m) => ({ id: m.id, name: m.displayName ?? m.id }))
|
|
549
561
|
}));
|
|
562
|
+
const costUsd = computeCost(this.currentProvider, this.currentModel, this.sessionTokenUsage);
|
|
550
563
|
this.send({
|
|
551
564
|
type: "status",
|
|
552
565
|
provider: this.currentProvider,
|
|
@@ -557,6 +570,7 @@ var SessionHandler = class _SessionHandler {
|
|
|
557
570
|
planMode: this.planMode,
|
|
558
571
|
thinkingMode: this.runtimeThinking ?? false,
|
|
559
572
|
tokenUsage: { ...this.sessionTokenUsage },
|
|
573
|
+
costUsd,
|
|
560
574
|
providers: providerList
|
|
561
575
|
});
|
|
562
576
|
}
|
|
@@ -724,8 +738,7 @@ var SessionHandler = class _SessionHandler {
|
|
|
724
738
|
if (chunk.done) {
|
|
725
739
|
this.send({ type: "response_done", content: fullContent, usage: chunk.usage });
|
|
726
740
|
if (chunk.usage) {
|
|
727
|
-
this.
|
|
728
|
-
this.sessionTokenUsage.outputTokens += chunk.usage.outputTokens;
|
|
741
|
+
this.addWebSessionUsage(chunk.usage);
|
|
729
742
|
session.addTokenUsage(chunk.usage);
|
|
730
743
|
}
|
|
731
744
|
break;
|
|
@@ -759,7 +772,7 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
|
|
|
759
772
|
- When remaining rounds are low, focus on completing the current task and summarizing.`;
|
|
760
773
|
const systemPrompt = baseSystemPrompt + roundBudgetHint;
|
|
761
774
|
const modelParams = this.getModelParams();
|
|
762
|
-
const roundUsage = { inputTokens: 0, outputTokens: 0 };
|
|
775
|
+
const roundUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
|
|
763
776
|
const supportsStreamingTools = typeof provider.chatWithToolsStream === "function";
|
|
764
777
|
let consecutiveFreeRounds = 0;
|
|
765
778
|
const warnNoteAt = Math.max(10, Math.floor(maxToolRounds * 0.2));
|
|
@@ -829,6 +842,8 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
|
|
|
829
842
|
if (result.usage) {
|
|
830
843
|
roundUsage.inputTokens += result.usage.inputTokens;
|
|
831
844
|
roundUsage.outputTokens += result.usage.outputTokens;
|
|
845
|
+
roundUsage.cacheCreationTokens += result.usage.cacheCreationTokens ?? 0;
|
|
846
|
+
roundUsage.cacheReadTokens += result.usage.cacheReadTokens ?? 0;
|
|
832
847
|
}
|
|
833
848
|
if (result.content && !result.toolCalls) {
|
|
834
849
|
const hasWriteTools = toolDefs.some((t) => t.name === "write_file" || t.name === "edit_file");
|
|
@@ -843,8 +858,7 @@ You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan effi
|
|
|
843
858
|
}
|
|
844
859
|
this.send({ type: "response_done", content: result.content, usage: roundUsage });
|
|
845
860
|
session.addMessage({ role: "assistant", content: result.content, timestamp: /* @__PURE__ */ new Date() });
|
|
846
|
-
this.
|
|
847
|
-
this.sessionTokenUsage.outputTokens += roundUsage.outputTokens;
|
|
861
|
+
this.addWebSessionUsage(roundUsage);
|
|
848
862
|
session.addTokenUsage(roundUsage);
|
|
849
863
|
return;
|
|
850
864
|
}
|
|
@@ -917,8 +931,7 @@ ${summaryResult.content}`,
|
|
|
917
931
|
message: `Reached maximum tool call rounds (${maxToolRounds}). You can continue by asking the AI to proceed.`
|
|
918
932
|
});
|
|
919
933
|
}
|
|
920
|
-
this.
|
|
921
|
-
this.sessionTokenUsage.outputTokens += roundUsage.outputTokens;
|
|
934
|
+
this.addWebSessionUsage(roundUsage);
|
|
922
935
|
session.addTokenUsage(roundUsage);
|
|
923
936
|
} catch (err) {
|
|
924
937
|
if (err.name === "AbortError") {
|
|
@@ -1034,7 +1047,7 @@ ${summaryResult.content}`,
|
|
|
1034
1047
|
case "clear":
|
|
1035
1048
|
this.saveIfNeeded();
|
|
1036
1049
|
this.sessions.createSession(this.currentProvider, this.currentModel);
|
|
1037
|
-
this.
|
|
1050
|
+
this.resetWebSessionUsage();
|
|
1038
1051
|
this.send({ type: "info", message: "Conversation cleared." });
|
|
1039
1052
|
this.sendStatus();
|
|
1040
1053
|
this.sendSessionList();
|
|
@@ -1068,12 +1081,19 @@ ${summaryResult.content}`,
|
|
|
1068
1081
|
}
|
|
1069
1082
|
case "status": {
|
|
1070
1083
|
const session = this.sessions.current;
|
|
1084
|
+
const cacheRead = this.sessionTokenUsage.cacheReadTokens;
|
|
1085
|
+
const cacheCreate = this.sessionTokenUsage.cacheCreationTokens;
|
|
1086
|
+
const cost = computeCost(this.currentProvider, this.currentModel, this.sessionTokenUsage);
|
|
1087
|
+
const cacheLine = cacheRead > 0 || cacheCreate > 0 ? `
|
|
1088
|
+
Cache: write=${cacheCreate} read=${cacheRead}` : "";
|
|
1089
|
+
const costLine = cost != null ? `
|
|
1090
|
+
Cost: ${formatCost(cost)}` : "";
|
|
1071
1091
|
this.send({
|
|
1072
1092
|
type: "info",
|
|
1073
1093
|
message: `Provider: ${this.currentProvider}
|
|
1074
1094
|
Model: ${this.currentModel}
|
|
1075
1095
|
Session: ${session?.id ?? "none"} (${session?.messages.length ?? 0} messages)
|
|
1076
|
-
Tokens: in=${this.sessionTokenUsage.inputTokens} out=${this.sessionTokenUsage.outputTokens}`
|
|
1096
|
+
Tokens: in=${this.sessionTokenUsage.inputTokens} out=${this.sessionTokenUsage.outputTokens}${cacheLine}${costLine}`
|
|
1077
1097
|
});
|
|
1078
1098
|
break;
|
|
1079
1099
|
}
|
|
@@ -1083,7 +1103,7 @@ Tokens: in=${this.sessionTokenUsage.inputTokens} out=${this.sessionTokenUsage.ou
|
|
|
1083
1103
|
this.saveIfNeeded();
|
|
1084
1104
|
const created = this.sessions.createSession(this.currentProvider, this.currentModel);
|
|
1085
1105
|
this.unsavedSessions.set(created.id, created);
|
|
1086
|
-
this.
|
|
1106
|
+
this.resetWebSessionUsage();
|
|
1087
1107
|
this.send({ type: "info", message: "New session created." });
|
|
1088
1108
|
this.sendStatus();
|
|
1089
1109
|
this.sendSessionList();
|
|
@@ -1094,7 +1114,7 @@ Tokens: in=${this.sessionTokenUsage.inputTokens} out=${this.sessionTokenUsage.ou
|
|
|
1094
1114
|
const cached = cachedExact ?? [...this.unsavedSessions.values()].find((s) => s.id.startsWith(targetId));
|
|
1095
1115
|
if (cached) {
|
|
1096
1116
|
this.sessions.setCurrent(cached);
|
|
1097
|
-
this.
|
|
1117
|
+
this.resetWebSessionUsage();
|
|
1098
1118
|
this.send({
|
|
1099
1119
|
type: "info",
|
|
1100
1120
|
message: `Loaded session: ${cached.id.slice(0, 8)} "${cached.title ?? ""}" (${cached.messages.length} messages)`
|
|
@@ -1108,7 +1128,7 @@ Tokens: in=${this.sessionTokenUsage.inputTokens} out=${this.sessionTokenUsage.ou
|
|
|
1108
1128
|
const found = list.find((s) => s.id.startsWith(targetId));
|
|
1109
1129
|
if (found) {
|
|
1110
1130
|
this.sessions.loadSession(found.id);
|
|
1111
|
-
this.
|
|
1131
|
+
this.resetWebSessionUsage();
|
|
1112
1132
|
this.send({ type: "info", message: `Loaded session: ${found.id.slice(0, 8)} "${found.title ?? ""}" (${found.messageCount} messages)` });
|
|
1113
1133
|
this.sendSessionMessages();
|
|
1114
1134
|
this.sendStatus();
|
|
@@ -1116,7 +1136,7 @@ Tokens: in=${this.sessionTokenUsage.inputTokens} out=${this.sessionTokenUsage.ou
|
|
|
1116
1136
|
} else {
|
|
1117
1137
|
const recreated = this.sessions.createSession(this.currentProvider, this.currentModel);
|
|
1118
1138
|
this.unsavedSessions.set(recreated.id, recreated);
|
|
1119
|
-
this.
|
|
1139
|
+
this.resetWebSessionUsage();
|
|
1120
1140
|
this.send({
|
|
1121
1141
|
type: "info",
|
|
1122
1142
|
message: `Previous session (${targetId.slice(0, 8)}) is no longer available \u2014 started a new one.`
|
|
@@ -1256,16 +1276,41 @@ Tokens: in=${this.sessionTokenUsage.inputTokens} out=${this.sessionTokenUsage.ou
|
|
|
1256
1276
|
});
|
|
1257
1277
|
break;
|
|
1258
1278
|
case "cost": {
|
|
1259
|
-
const
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1279
|
+
const u = this.sessionTokenUsage;
|
|
1280
|
+
const total = u.inputTokens + u.outputTokens + u.cacheCreationTokens + u.cacheReadTokens;
|
|
1281
|
+
const cost = computeCost(this.currentProvider, this.currentModel, u);
|
|
1282
|
+
let savings = null;
|
|
1283
|
+
if (cost != null && u.cacheReadTokens > 0) {
|
|
1284
|
+
const withoutCache = computeCost(this.currentProvider, this.currentModel, {
|
|
1285
|
+
inputTokens: u.inputTokens + u.cacheReadTokens,
|
|
1286
|
+
outputTokens: u.outputTokens,
|
|
1287
|
+
cacheCreationTokens: u.cacheCreationTokens,
|
|
1288
|
+
cacheReadTokens: 0
|
|
1289
|
+
});
|
|
1290
|
+
if (withoutCache != null) savings = withoutCache - cost;
|
|
1291
|
+
}
|
|
1292
|
+
const lines = [
|
|
1293
|
+
"\u{1F4B0} Session Cost & Token Usage",
|
|
1294
|
+
` Provider : ${this.currentProvider}`,
|
|
1295
|
+
` Model : ${this.currentModel}`,
|
|
1296
|
+
` Input (uncached): ${u.inputTokens.toLocaleString()}`,
|
|
1297
|
+
` Output : ${u.outputTokens.toLocaleString()}`
|
|
1298
|
+
];
|
|
1299
|
+
if (u.cacheCreationTokens > 0) lines.push(` Cache write : ${u.cacheCreationTokens.toLocaleString()}`);
|
|
1300
|
+
if (u.cacheReadTokens > 0) {
|
|
1301
|
+
const pct = Math.round(u.cacheReadTokens / (u.cacheReadTokens + u.inputTokens) * 100);
|
|
1302
|
+
lines.push(` Cache read : ${u.cacheReadTokens.toLocaleString()} (${pct}% hit rate)`);
|
|
1303
|
+
}
|
|
1304
|
+
lines.push(` Total tokens : ${total.toLocaleString()}`);
|
|
1305
|
+
if (cost != null) {
|
|
1306
|
+
lines.push(` Cost : ${formatCost(cost)}`);
|
|
1307
|
+
if (savings != null && savings > 0) {
|
|
1308
|
+
lines.push(` Cache savings : -${formatCost(savings)} (vs no cache)`);
|
|
1309
|
+
}
|
|
1310
|
+
} else {
|
|
1311
|
+
lines.push(` Cost : \u2014 (pricing unknown for this model)`);
|
|
1312
|
+
}
|
|
1313
|
+
this.send({ type: "info", message: lines.join("\n") });
|
|
1269
1314
|
break;
|
|
1270
1315
|
}
|
|
1271
1316
|
case "tools":
|
|
@@ -1691,7 +1736,7 @@ ${undoResults.map((r) => ` \u2022 ${r}`).join("\n")}` });
|
|
|
1691
1736
|
case "test": {
|
|
1692
1737
|
this.send({ type: "info", message: "\u{1F9EA} Running tests..." });
|
|
1693
1738
|
try {
|
|
1694
|
-
const { executeTests } = await import("./run-tests-
|
|
1739
|
+
const { executeTests } = await import("./run-tests-X4PCLXA2.js");
|
|
1695
1740
|
const argStr = args.join(" ").trim();
|
|
1696
1741
|
let testArgs = {};
|
|
1697
1742
|
if (argStr) {
|
|
@@ -4,11 +4,11 @@ import {
|
|
|
4
4
|
getDangerLevel,
|
|
5
5
|
googleSearchContext,
|
|
6
6
|
truncateOutput
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-YQEIQJ6K.js";
|
|
8
8
|
import "./chunk-4BKXL7SM.js";
|
|
9
9
|
import {
|
|
10
10
|
SUBAGENT_ALLOWED_TOOLS
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-W7QVBFIJ.js";
|
|
12
12
|
|
|
13
13
|
// src/hub/task-orchestrator.ts
|
|
14
14
|
import { createInterface } from "readline";
|
package/dist/web/client/app.js
CHANGED
|
@@ -496,7 +496,16 @@ function handleStatus(msg) {
|
|
|
496
496
|
btnPlan.classList.toggle('btn-active-toggle', msg.planMode);
|
|
497
497
|
statusSession.textContent = `📋 ${msg.sessionId?.slice(0, 8) || '—'} (${msg.messageCount} msgs)`;
|
|
498
498
|
if (msg.tokenUsage) {
|
|
499
|
-
|
|
499
|
+
const u = msg.tokenUsage;
|
|
500
|
+
const cacheRead = u.cacheReadTokens || 0;
|
|
501
|
+
let line = `📊 in: ${u.inputTokens.toLocaleString()} out: ${u.outputTokens.toLocaleString()}`;
|
|
502
|
+
if (cacheRead > 0) line += ` cache: ${cacheRead.toLocaleString()}`;
|
|
503
|
+
if (msg.costUsd != null) {
|
|
504
|
+
const cost = msg.costUsd;
|
|
505
|
+
const costStr = cost === 0 ? '$0' : cost < 0.01 ? `$${cost.toFixed(4)}` : cost < 1 ? `$${cost.toFixed(3)}` : `$${cost.toFixed(2)}`;
|
|
506
|
+
line += ` 💰 ${costStr}`;
|
|
507
|
+
}
|
|
508
|
+
statusTokens.textContent = line;
|
|
500
509
|
}
|
|
501
510
|
sessionListEl.querySelectorAll('.session-item').forEach(el => {
|
|
502
511
|
el.classList.toggle('active', el.dataset.sessionId === msg.sessionId);
|