@link-assistant/hive-mind 1.57.3 → 1.59.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +186 -0
- package/package.json +1 -1
- package/src/anthropic-server-tool-pricing.lib.mjs +34 -0
- package/src/bidirectional-interactive.lib.mjs +392 -21
- package/src/claude.budget-stats.lib.mjs +154 -27
- package/src/claude.cost.lib.mjs +88 -0
- package/src/claude.lib.mjs +54 -58
- package/src/codex.lib.mjs +31 -0
- package/src/config.lib.mjs +39 -2
- package/src/github-cost-info.lib.mjs +4 -1
- package/src/lino.lib.mjs +3 -1
- package/src/solve.auto-merge.lib.mjs +5 -0
- package/src/solve.config.lib.mjs +39 -0
- package/src/sub-session-size.lib.mjs +239 -0
- package/src/use-with-retry.lib.mjs +91 -0
|
@@ -47,6 +47,11 @@ export const accumulateModelUsage = (modelUsageMap, entry) => {
|
|
|
47
47
|
}
|
|
48
48
|
if (usage.cache_read_input_tokens) modelUsageMap[model].cacheReadTokens += usage.cache_read_input_tokens;
|
|
49
49
|
if (usage.output_tokens) modelUsageMap[model].outputTokens += usage.output_tokens;
|
|
50
|
+
// Issue #1710: track Anthropic server-tool usage from per-request JSONL entries
|
|
51
|
+
// so the public-pricing estimate can bill them at the documented per-request rate.
|
|
52
|
+
if (usage.server_tool_use?.web_search_requests) {
|
|
53
|
+
modelUsageMap[model].webSearchRequests += usage.server_tool_use.web_search_requests;
|
|
54
|
+
}
|
|
50
55
|
};
|
|
51
56
|
|
|
52
57
|
/**
|
|
@@ -109,6 +114,11 @@ export const displayModelUsage = async (usage, log) => {
|
|
|
109
114
|
await log(` ${label}: ${formatNumber(breakdown[key].tokens)} tokens × $${breakdown[key].costPerMillion}/M = $${new Decimal(breakdown[key].cost).toFixed(6)}`);
|
|
110
115
|
}
|
|
111
116
|
}
|
|
117
|
+
// Issue #1710: itemise server-tool charges so the residual that puzzled
|
|
118
|
+
// readers in PR #1707 ($0.04 web_search) is visible in the breakdown.
|
|
119
|
+
if (breakdown.webSearch && breakdown.webSearch.requests > 0) {
|
|
120
|
+
await log(` Web search: ${breakdown.webSearch.requests} requests × $${breakdown.webSearch.costPerRequest}/req = $${new Decimal(breakdown.webSearch.cost).toFixed(6)}`);
|
|
121
|
+
}
|
|
112
122
|
await log(' ─────────────────────────────────');
|
|
113
123
|
await log(` Total: $${new Decimal(usage.costUSD).toFixed(6)}`);
|
|
114
124
|
} else if (usage.modelInfo === null) {
|
|
@@ -129,7 +139,9 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
|
|
|
129
139
|
const hasAnthropic = anthropicCost !== null && anthropicCost !== undefined;
|
|
130
140
|
const publicDec = hasPublic ? new Decimal(publicCost) : null;
|
|
131
141
|
const anthropicDec = hasAnthropic ? new Decimal(anthropicCost) : null;
|
|
132
|
-
|
|
142
|
+
// Issue #1703: also collapse to the short form when the rounded difference is below display precision,
|
|
143
|
+
// so reports like "Difference: $-0.000000 (-0.00%)" no longer waste two extra lines.
|
|
144
|
+
if (publicDec && anthropicDec && anthropicDec.minus(publicDec).abs().toFixed(6) === '0.000000') {
|
|
133
145
|
await log(`\n 💰 Cost: $${anthropicDec.toFixed(6)}`);
|
|
134
146
|
return;
|
|
135
147
|
}
|
|
@@ -145,6 +157,52 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
|
|
|
145
157
|
}
|
|
146
158
|
};
|
|
147
159
|
|
|
160
|
+
/**
|
|
161
|
+
* Issue #1710: Emit a verbose, machine-friendly trace of every input that
|
|
162
|
+
* feeds the budget-stats renderer for a single model. Hidden behind
|
|
163
|
+
* `{ verbose: true }` so it never pollutes the default log, but always
|
|
164
|
+
* captured when --verbose is set. The trace is what we wished we had had
|
|
165
|
+
* available *before* filing #1710 — it shows peak vs. cumulative side by
|
|
166
|
+
* side, splits cache writes from cache reads, and surfaces server-tool
|
|
167
|
+
* usage (web search) that the public-pricing estimator currently ignores.
|
|
168
|
+
*
|
|
169
|
+
* @param {Object} usage - Per-model usage entry from `tokenUsage.modelUsage`.
|
|
170
|
+
* @param {Object} tokenUsage - Full token usage object (used only for sub-session count).
|
|
171
|
+
* @param {Function} log - Async logger (must accept a `{verbose}` options arg).
|
|
172
|
+
*/
|
|
173
|
+
export const dumpBudgetTrace = async (usage, tokenUsage, log) => {
|
|
174
|
+
const modelName = usage.modelName || usage.modelInfo?.name || 'unknown';
|
|
175
|
+
const limit = usage.modelInfo?.limit || {};
|
|
176
|
+
const peak = usage.peakContextUsage || 0;
|
|
177
|
+
const writes5m = usage.cacheCreation5mTokens || 0;
|
|
178
|
+
const writes1h = usage.cacheCreation1hTokens || 0;
|
|
179
|
+
const writes = usage.cacheCreationTokens || 0;
|
|
180
|
+
const reads = usage.cacheReadTokens || 0;
|
|
181
|
+
const inputs = usage.inputTokens || 0;
|
|
182
|
+
const outputs = usage.outputTokens || 0;
|
|
183
|
+
const webSearches = usage.webSearchRequests || 0;
|
|
184
|
+
const subSessionCount = (tokenUsage?.subSessions || []).length;
|
|
185
|
+
const source = usage._sourceResultJson ? 'jsonl + result-event' : 'jsonl';
|
|
186
|
+
|
|
187
|
+
await log(`\n 📊 [budget-trace] ${modelName}`, { verbose: true });
|
|
188
|
+
// Issue #1710 R5: peak request is `input + cache_creation` (cache reads
|
|
189
|
+
// tracked separately on the cumulative line).
|
|
190
|
+
await log(` peak request: ${formatNumber(peak)}${limit.context ? ` / ${formatNumber(limit.context)} context` : ''} (largest single-request input + cache_creation, excludes cache_read)`, { verbose: true });
|
|
191
|
+
await log(` cumulative: input ${formatNumber(inputs)}, cache_write ${formatNumber(writes)} (5m ${formatNumber(writes5m)} / 1h ${formatNumber(writes1h)}), cache_read ${formatNumber(reads)}, output ${formatNumber(outputs)}`, { verbose: true });
|
|
192
|
+
// Issue #1710 R1: web_search is now billed in calculateModelCost. The trace
|
|
193
|
+
// still surfaces the implied dollar cost so the residual remains debuggable
|
|
194
|
+
// from the saved log even if a future model lacks pricing data.
|
|
195
|
+
await log(` server tools: web_search ${webSearches}${webSearches > 0 ? ` (= $${(webSearches * 0.01).toFixed(6)} at $10 / 1k searches)` : ''}`, { verbose: true });
|
|
196
|
+
if (usage.costUSD !== null && usage.costUSD !== undefined) {
|
|
197
|
+
await log(` cost (public): $${new Decimal(usage.costUSD).toFixed(6)}`, { verbose: true });
|
|
198
|
+
}
|
|
199
|
+
if (usage._resultCostUSD !== null && usage._resultCostUSD !== undefined) {
|
|
200
|
+
await log(` cost (anthropic result-event): $${new Decimal(usage._resultCostUSD).toFixed(6)}`, { verbose: true });
|
|
201
|
+
}
|
|
202
|
+
await log(` sub-session count: ${subSessionCount}`, { verbose: true });
|
|
203
|
+
await log(` data source: ${source}`, { verbose: true });
|
|
204
|
+
};
|
|
205
|
+
|
|
148
206
|
/**
|
|
149
207
|
* Display token budget statistics (context window usage and ratios)
|
|
150
208
|
* @param {Object} usage - Usage data for a model
|
|
@@ -153,6 +211,10 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
|
|
|
153
211
|
*/
|
|
154
212
|
/**
|
|
155
213
|
* Issue #1526: Updated to use single-line context+output format.
|
|
214
|
+
* Issue #1710: After the standard rendering, emit a verbose trace of the
|
|
215
|
+
* raw inputs that fed the renderer (gated behind --verbose),
|
|
216
|
+
* so future calculation-correctness reports can be triaged
|
|
217
|
+
* without re-running the session.
|
|
156
218
|
*/
|
|
157
219
|
export const displayBudgetStats = async (usage, tokenUsage, log) => {
|
|
158
220
|
const modelInfo = usage.modelInfo;
|
|
@@ -171,14 +233,18 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
|
|
|
171
233
|
const peakContext = usage.peakContextUsage || 0;
|
|
172
234
|
|
|
173
235
|
if (hasMultipleSubSessions) {
|
|
174
|
-
// Issue #1600: Unified format — numbered list without "Context window:" prefix
|
|
236
|
+
// Issue #1600: Unified format — numbered list without "Context window:" prefix.
|
|
237
|
+
// Issue #1710 R3/R5: Peak input is `input + cache_creation` (cache reads
|
|
238
|
+
// are tracked separately on the Total line), and the bullet is now
|
|
239
|
+
// labelled "peak request:" so a reader does not try to reconcile it with
|
|
240
|
+
// the cumulative Total figure.
|
|
175
241
|
for (let i = 0; i < subSessions.length; i++) {
|
|
176
242
|
const sub = subSessions[i];
|
|
177
243
|
const subPeak = sub.peakContextUsage || 0;
|
|
178
244
|
const parts = [];
|
|
179
245
|
if (contextLimit && subPeak > 0) {
|
|
180
246
|
const pct = ((subPeak / contextLimit) * 100).toFixed(0);
|
|
181
|
-
parts.push(
|
|
247
|
+
parts.push(`peak request: ${formatNumber(subPeak)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
|
|
182
248
|
}
|
|
183
249
|
if (outputLimit) {
|
|
184
250
|
const outPct = ((sub.outputTokens / outputLimit) * 100).toFixed(0);
|
|
@@ -192,7 +258,7 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
|
|
|
192
258
|
const parts = [];
|
|
193
259
|
if (contextLimit) {
|
|
194
260
|
const pct = ((peakContext / contextLimit) * 100).toFixed(0);
|
|
195
|
-
parts.push(
|
|
261
|
+
parts.push(`peak request: ${formatNumber(peakContext)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
|
|
196
262
|
}
|
|
197
263
|
if (outputLimit) {
|
|
198
264
|
const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
|
|
@@ -203,16 +269,16 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
|
|
|
203
269
|
}
|
|
204
270
|
}
|
|
205
271
|
|
|
206
|
-
// Cumulative totals — single line
|
|
207
|
-
// Issue #1547: Parenthesized cached format and consistent output format
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
let totalLine
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
}
|
|
272
|
+
// Cumulative totals — single line.
|
|
273
|
+
// Issue #1547: Parenthesized cached format and consistent output format.
|
|
274
|
+
// Issue #1710 R4: When cache writes are present, render them as a separate
|
|
275
|
+
// category instead of folding them into the input figure.
|
|
276
|
+
let totalLine = buildCumulativeInputPhrase({
|
|
277
|
+
input: usage.inputTokens || 0,
|
|
278
|
+
cacheWrites: usage.cacheCreationTokens || 0,
|
|
279
|
+
cacheReads: usage.cacheReadTokens || 0,
|
|
280
|
+
format: formatNumber,
|
|
281
|
+
});
|
|
216
282
|
if (peakContext === 0 && outputLimit) {
|
|
217
283
|
const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
|
|
218
284
|
totalLine += `, ${formatNumber(usage.outputTokens)} / ${formatNumber(outputLimit)} (${outPct}%) output tokens`;
|
|
@@ -220,6 +286,9 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
|
|
|
220
286
|
totalLine += `, ${formatNumber(usage.outputTokens)} output tokens`;
|
|
221
287
|
}
|
|
222
288
|
await log(` Total: ${totalLine}`);
|
|
289
|
+
|
|
290
|
+
// Issue #1710: verbose-only, never affects default output.
|
|
291
|
+
await dumpBudgetTrace(usage, tokenUsage, log);
|
|
223
292
|
};
|
|
224
293
|
|
|
225
294
|
/**
|
|
@@ -292,6 +361,44 @@ const formatTokensCompact = tokens => {
|
|
|
292
361
|
return tokens.toLocaleString();
|
|
293
362
|
};
|
|
294
363
|
|
|
364
|
+
/**
|
|
365
|
+
* Issue #1710: Build the cumulative input-tokens phrase for the Total / fallback
|
|
366
|
+
* lines, splitting cache writes and cache reads so neither category is ever
|
|
367
|
+
* silently fused with raw input tokens.
|
|
368
|
+
*
|
|
369
|
+
* Forms (in priority order):
|
|
370
|
+
* - reads > 0 && writes > 0 → "(X new + W cache writes + Y cache reads) input tokens"
|
|
371
|
+
* - reads > 0 && writes = 0 → "(X + Y cached) input tokens" (back-compat shape)
|
|
372
|
+
* - reads = 0 && writes > 0 → "(X new + W cache writes) input tokens"
|
|
373
|
+
* - reads = 0 && writes = 0 → "X input tokens"
|
|
374
|
+
*
|
|
375
|
+
* The legacy `(X + Y cached)` shape is preserved when only cache reads exist
|
|
376
|
+
* so we don't churn output for the common Opus-only case. The new explicit
|
|
377
|
+
* forms only appear when cache writes are non-zero (issue #1710 R4).
|
|
378
|
+
*
|
|
379
|
+
* @param {Object} opts
|
|
380
|
+
* @param {number} opts.input - non-cached input tokens (excludes cache writes/reads)
|
|
381
|
+
* @param {number} opts.cacheWrites - cache_creation_input_tokens (cumulative)
|
|
382
|
+
* @param {number} opts.cacheReads - cache_read_input_tokens (cumulative)
|
|
383
|
+
* @param {(n: number) => string} opts.format - formatter (compact or full)
|
|
384
|
+
* @returns {string} the cumulative input phrase, e.g. "(78K new + 57.6K cache writes) input tokens"
|
|
385
|
+
*/
|
|
386
|
+
export const buildCumulativeInputPhrase = ({ input, cacheWrites, cacheReads, format }) => {
|
|
387
|
+
const w = Math.max(0, cacheWrites || 0);
|
|
388
|
+
const r = Math.max(0, cacheReads || 0);
|
|
389
|
+
const i = Math.max(0, input || 0);
|
|
390
|
+
if (w > 0 && r > 0) {
|
|
391
|
+
return `(${format(i)} new + ${format(w)} cache writes + ${format(r)} cache reads) input tokens`;
|
|
392
|
+
}
|
|
393
|
+
if (w > 0) {
|
|
394
|
+
return `(${format(i)} new + ${format(w)} cache writes) input tokens`;
|
|
395
|
+
}
|
|
396
|
+
if (r > 0) {
|
|
397
|
+
return `(${format(i)} + ${format(r)} cached) input tokens`;
|
|
398
|
+
}
|
|
399
|
+
return `${format(i)} input tokens`;
|
|
400
|
+
};
|
|
401
|
+
|
|
295
402
|
/**
|
|
296
403
|
* Format sub-sessions list for budget stats display
|
|
297
404
|
* @param {Array} subSessions - Array of sub-session usage objects
|
|
@@ -315,6 +422,9 @@ const formatSubSessionsList = (subSessions, contextLimit, outputLimit) => {
|
|
|
315
422
|
|
|
316
423
|
/**
|
|
317
424
|
* Issue #1600: Build a single-line context + output tokens string (unified format, no "Context window:" prefix).
|
|
425
|
+
* Issue #1710 R3/R5: The input figure is the peak per-request `input + cache_creation`
|
|
426
|
+
* (cache reads excluded). Labelling it "peak request:" lets readers tell it apart
|
|
427
|
+
* from the cumulative Total line.
|
|
318
428
|
* @param {number} peakContext - Peak context usage (0 if unknown — context display skipped)
|
|
319
429
|
* @param {number} contextLimit - Context window limit (null if unknown)
|
|
320
430
|
* @param {number} outputTokens - Output tokens used
|
|
@@ -326,7 +436,7 @@ const formatContextOutputLine = (peakContext, contextLimit, outputTokens, output
|
|
|
326
436
|
const parts = [];
|
|
327
437
|
if (contextLimit && peakContext > 0) {
|
|
328
438
|
const pct = ((peakContext / contextLimit) * 100).toFixed(0);
|
|
329
|
-
parts.push(
|
|
439
|
+
parts.push(`peak request: ${formatTokensCompact(peakContext)} / ${formatTokensCompact(contextLimit)} (${pct}%) input tokens`);
|
|
330
440
|
}
|
|
331
441
|
if (outputLimit) {
|
|
332
442
|
const outPct = ((outputTokens / outputLimit) * 100).toFixed(0);
|
|
@@ -457,20 +567,33 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
|
|
|
457
567
|
} else if (peakContext > 0) {
|
|
458
568
|
stats += formatContextOutputLine(peakContext, contextLimit, usage.outputTokens, outputLimit, '- ');
|
|
459
569
|
} else if (outputLimit && callCount <= 1) {
|
|
460
|
-
// Issue #1600:
|
|
570
|
+
// Issue #1600: Sub-agent single sessions previously showed only an output line.
|
|
571
|
+
// Issue #1710 R2: Always surface the cumulative input information too — sub-agent
|
|
572
|
+
// models (e.g. Haiku) never appear as the responding model in the parent JSONL,
|
|
573
|
+
// so peakContext stays at 0; without this fallback the rendered comment loses
|
|
574
|
+
// the sub-agent's input-token information entirely. Cache writes / reads are
|
|
575
|
+
// split via the same helper used for the Total line so the two lines stay
|
|
576
|
+
// arithmetically consistent.
|
|
577
|
+
const inputPhrase = buildCumulativeInputPhrase({
|
|
578
|
+
input: usage.inputTokens || 0,
|
|
579
|
+
cacheWrites: usage.cacheCreationTokens || 0,
|
|
580
|
+
cacheReads: usage.cacheReadTokens || 0,
|
|
581
|
+
format: formatTokensCompact,
|
|
582
|
+
});
|
|
461
583
|
const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
|
|
462
|
-
stats += `\n- ${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`;
|
|
584
|
+
stats += `\n- ${inputPhrase}, ${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`;
|
|
463
585
|
}
|
|
464
586
|
|
|
465
|
-
// Cumulative totals per model: input tokens + cached shown separately
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
587
|
+
// Cumulative totals per model: input tokens + cached shown separately.
|
|
588
|
+
// Issue #1710 R4: Cache writes are now their own category (so the displayed
|
|
589
|
+
// "input tokens" figure never silently fuses 1.25× / 2× cache-write tokens
|
|
590
|
+
// with regular 1× input tokens — see issue #1710 root cause D).
|
|
591
|
+
let totalLine = buildCumulativeInputPhrase({
|
|
592
|
+
input: usage.inputTokens || 0,
|
|
593
|
+
cacheWrites: usage.cacheCreationTokens || 0,
|
|
594
|
+
cacheReads: usage.cacheReadTokens || 0,
|
|
595
|
+
format: formatTokensCompact,
|
|
596
|
+
});
|
|
474
597
|
|
|
475
598
|
// Issue #1600: Output tokens on Total line — skip percentage if already shown above or aggregated
|
|
476
599
|
if (callCount > 1) {
|
|
@@ -512,7 +635,11 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
|
|
|
512
635
|
stats += `\n${i + 1}. ${parts.join(', ')}`;
|
|
513
636
|
}
|
|
514
637
|
} else {
|
|
515
|
-
|
|
638
|
+
// Estimated per-call breakdown when sub-agent stream tracking did not capture
|
|
639
|
+
// per-call usage. Includes everything the model actually saw:
|
|
640
|
+
// input + cache_creation (writes) + cache_read.
|
|
641
|
+
const aggregateInput = (usage.inputTokens || 0) + (usage.cacheCreationTokens || 0) + (usage.cacheReadTokens || 0);
|
|
642
|
+
const avgInput = Math.round(aggregateInput / callCount);
|
|
516
643
|
const avgOutput = Math.round(usage.outputTokens / callCount);
|
|
517
644
|
for (let i = 0; i < matchingCalls.length; i++) {
|
|
518
645
|
const parts = [];
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Issue #1710: Per-model cost calculation extracted from claude.lib.mjs to
|
|
5
|
+
* keep that file under the 1500-line repo cap. Behaviour is unchanged from
|
|
6
|
+
* the previous in-place implementation.
|
|
7
|
+
*/
|
|
8
|
+
import Decimal from 'decimal.js-light';
|
|
9
|
+
import { SERVER_TOOL_PRICING_USD } from './anthropic-server-tool-pricing.lib.mjs';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Calculate USD cost for a model's usage with optional detailed breakdown.
|
|
13
|
+
*
|
|
14
|
+
* Cost components (Issue #1600 uses Decimal for precision):
|
|
15
|
+
* - input × cost.input / 1M
|
|
16
|
+
* - cacheWrite × cost.cache_write / 1M
|
|
17
|
+
* - cacheRead × cost.cache_read / 1M
|
|
18
|
+
* - output × cost.output / 1M
|
|
19
|
+
* - webSearch × $0.01 / request (Issue #1710 — see SERVER_TOOL_PRICING_USD)
|
|
20
|
+
*
|
|
21
|
+
* @param {Object} usage - per-model usage entry
|
|
22
|
+
* @param {Object|null} modelInfo - model-info shape (includes `cost` map)
|
|
23
|
+
* @param {boolean} [includeBreakdown=false] - return `{ total, breakdown }` when true
|
|
24
|
+
* @returns {number|{total: number, breakdown: Object}}
|
|
25
|
+
*/
|
|
26
|
+
export const calculateModelCost = (usage, modelInfo, includeBreakdown = false) => {
|
|
27
|
+
if (!modelInfo || !modelInfo.cost) {
|
|
28
|
+
return includeBreakdown ? { total: 0, breakdown: null } : 0;
|
|
29
|
+
}
|
|
30
|
+
const cost = modelInfo.cost;
|
|
31
|
+
const million = new Decimal(1000000);
|
|
32
|
+
const breakdown = {
|
|
33
|
+
input: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
34
|
+
cacheWrite: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
35
|
+
cacheRead: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
36
|
+
output: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
37
|
+
// Issue #1710: server-side tool usage (web_search) is billed per-request,
|
|
38
|
+
// independent of token cost. Without this entry the public-pricing total
|
|
39
|
+
// diverges from Anthropic's reported total by exactly the per-request
|
|
40
|
+
// rate times the request count — the residual quoted in issue #1710.
|
|
41
|
+
webSearch: { requests: 0, costPerRequest: 0, cost: 0 },
|
|
42
|
+
};
|
|
43
|
+
if (usage.inputTokens && cost.input) {
|
|
44
|
+
breakdown.input = {
|
|
45
|
+
tokens: usage.inputTokens,
|
|
46
|
+
costPerMillion: cost.input,
|
|
47
|
+
cost: new Decimal(usage.inputTokens).div(million).mul(new Decimal(cost.input)).toNumber(),
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
if (usage.cacheCreationTokens && cost.cache_write) {
|
|
51
|
+
breakdown.cacheWrite = {
|
|
52
|
+
tokens: usage.cacheCreationTokens,
|
|
53
|
+
costPerMillion: cost.cache_write,
|
|
54
|
+
cost: new Decimal(usage.cacheCreationTokens).div(million).mul(new Decimal(cost.cache_write)).toNumber(),
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
if (usage.cacheReadTokens && cost.cache_read) {
|
|
58
|
+
breakdown.cacheRead = {
|
|
59
|
+
tokens: usage.cacheReadTokens,
|
|
60
|
+
costPerMillion: cost.cache_read,
|
|
61
|
+
cost: new Decimal(usage.cacheReadTokens).div(million).mul(new Decimal(cost.cache_read)).toNumber(),
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
if (usage.outputTokens && cost.output) {
|
|
65
|
+
breakdown.output = {
|
|
66
|
+
tokens: usage.outputTokens,
|
|
67
|
+
costPerMillion: cost.output,
|
|
68
|
+
cost: new Decimal(usage.outputTokens).div(million).mul(new Decimal(cost.output)).toNumber(),
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
// Issue #1710: bill web_search requests at the documented per-request rate.
|
|
72
|
+
if (usage.webSearchRequests && SERVER_TOOL_PRICING_USD.web_search.costPerRequest > 0) {
|
|
73
|
+
const perReq = SERVER_TOOL_PRICING_USD.web_search.costPerRequest;
|
|
74
|
+
breakdown.webSearch = {
|
|
75
|
+
requests: usage.webSearchRequests,
|
|
76
|
+
costPerRequest: perReq,
|
|
77
|
+
cost: new Decimal(usage.webSearchRequests).mul(new Decimal(perReq)).toNumber(),
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
const totalCost = new Decimal(breakdown.input.cost).plus(breakdown.cacheWrite.cost).plus(breakdown.cacheRead.cost).plus(breakdown.output.cost).plus(breakdown.webSearch.cost).toNumber();
|
|
81
|
+
if (includeBreakdown) {
|
|
82
|
+
return {
|
|
83
|
+
total: totalCost,
|
|
84
|
+
breakdown,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
return totalCost;
|
|
88
|
+
};
|
package/src/claude.lib.mjs
CHANGED
|
@@ -25,6 +25,7 @@ import { resolveClaudeSessionToolFlags } from './useless-tools.lib.mjs';
|
|
|
25
25
|
import { ensureClaudeQuietConfig } from './claude-quiet-config.lib.mjs';
|
|
26
26
|
import { fetchModelInfo } from './model-info.lib.mjs';
|
|
27
27
|
import { classifyRetryableError, maybeSwitchToFallbackModel } from './tool-retry.lib.mjs';
|
|
28
|
+
import { resolveSubSessionSize } from './sub-session-size.lib.mjs'; // Issue #1706
|
|
28
29
|
export { availableModels }; // Re-export for backward compatibility
|
|
29
30
|
export { fetchModelInfo };
|
|
30
31
|
const showResumeCommand = async (sessionId, tempDir, claudePath, model, log) => {
|
|
@@ -373,6 +374,9 @@ export const executeClaude = async params => {
|
|
|
373
374
|
owner,
|
|
374
375
|
repo,
|
|
375
376
|
prNumber,
|
|
377
|
+
// Issue #1708: forwarded so the bidirectional handler can poll
|
|
378
|
+
// issue title/body changes and uncommitted changes during the session.
|
|
379
|
+
issueNumber,
|
|
376
380
|
});
|
|
377
381
|
};
|
|
378
382
|
/** Check if a model supports vision (image input) using models.dev API @returns {Promise<boolean>} */
|
|
@@ -386,56 +390,10 @@ export const checkModelVisionCapability = async modelId => {
|
|
|
386
390
|
return false;
|
|
387
391
|
}
|
|
388
392
|
};
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
}
|
|
394
|
-
const cost = modelInfo.cost;
|
|
395
|
-
const million = new Decimal(1000000);
|
|
396
|
-
const breakdown = {
|
|
397
|
-
input: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
398
|
-
cacheWrite: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
399
|
-
cacheRead: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
400
|
-
output: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
401
|
-
};
|
|
402
|
-
if (usage.inputTokens && cost.input) {
|
|
403
|
-
breakdown.input = {
|
|
404
|
-
tokens: usage.inputTokens,
|
|
405
|
-
costPerMillion: cost.input,
|
|
406
|
-
cost: new Decimal(usage.inputTokens).div(million).mul(new Decimal(cost.input)).toNumber(),
|
|
407
|
-
};
|
|
408
|
-
}
|
|
409
|
-
if (usage.cacheCreationTokens && cost.cache_write) {
|
|
410
|
-
breakdown.cacheWrite = {
|
|
411
|
-
tokens: usage.cacheCreationTokens,
|
|
412
|
-
costPerMillion: cost.cache_write,
|
|
413
|
-
cost: new Decimal(usage.cacheCreationTokens).div(million).mul(new Decimal(cost.cache_write)).toNumber(),
|
|
414
|
-
};
|
|
415
|
-
}
|
|
416
|
-
if (usage.cacheReadTokens && cost.cache_read) {
|
|
417
|
-
breakdown.cacheRead = {
|
|
418
|
-
tokens: usage.cacheReadTokens,
|
|
419
|
-
costPerMillion: cost.cache_read,
|
|
420
|
-
cost: new Decimal(usage.cacheReadTokens).div(million).mul(new Decimal(cost.cache_read)).toNumber(),
|
|
421
|
-
};
|
|
422
|
-
}
|
|
423
|
-
if (usage.outputTokens && cost.output) {
|
|
424
|
-
breakdown.output = {
|
|
425
|
-
tokens: usage.outputTokens,
|
|
426
|
-
costPerMillion: cost.output,
|
|
427
|
-
cost: new Decimal(usage.outputTokens).div(million).mul(new Decimal(cost.output)).toNumber(),
|
|
428
|
-
};
|
|
429
|
-
}
|
|
430
|
-
const totalCost = new Decimal(breakdown.input.cost).plus(breakdown.cacheWrite.cost).plus(breakdown.cacheRead.cost).plus(breakdown.output.cost).toNumber();
|
|
431
|
-
if (includeBreakdown) {
|
|
432
|
-
return {
|
|
433
|
-
total: totalCost,
|
|
434
|
-
breakdown,
|
|
435
|
-
};
|
|
436
|
-
}
|
|
437
|
-
return totalCost;
|
|
438
|
-
};
|
|
393
|
+
// Issue #1710: calculateModelCost extracted to ./claude.cost.lib.mjs to keep
|
|
394
|
+
// this file under the 1500-line repo cap (see check-file-line-limits CI job).
|
|
395
|
+
import { calculateModelCost } from './claude.cost.lib.mjs';
|
|
396
|
+
export { calculateModelCost };
|
|
439
397
|
export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsage = null) => {
|
|
440
398
|
const os = (await use('os')).default;
|
|
441
399
|
const homeDir = os.homedir();
|
|
@@ -497,8 +455,14 @@ export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsag
|
|
|
497
455
|
}
|
|
498
456
|
accumulateModelUsage(modelUsage, entry);
|
|
499
457
|
// Issue #1501: Track peak context usage per single API request
|
|
458
|
+
// Issue #1710: Exclude cache_read_input_tokens — sub-sessions and
|
|
459
|
+
// per-request peaks should reflect *new* input the model received,
|
|
460
|
+
// not cached prompt context. Cache reads remain visible in the
|
|
461
|
+
// cumulative Total line as `(X + Y cached)`. This makes the
|
|
462
|
+
// peak-request value reconcilable with the cumulative non-cached
|
|
463
|
+
// input figure (instead of mixing semantics across the two lines).
|
|
500
464
|
const usage = entry.message.usage;
|
|
501
|
-
const requestContext = (usage.input_tokens || 0) + (usage.cache_creation_input_tokens || 0)
|
|
465
|
+
const requestContext = (usage.input_tokens || 0) + (usage.cache_creation_input_tokens || 0);
|
|
502
466
|
const model = entry.message.model;
|
|
503
467
|
if (requestContext > (peakContextByModel[model] || 0)) {
|
|
504
468
|
peakContextByModel[model] = requestContext;
|
|
@@ -633,6 +597,9 @@ export const executeClaudeCommand = async params => {
|
|
|
633
597
|
owner,
|
|
634
598
|
repo,
|
|
635
599
|
prNumber,
|
|
600
|
+
// Issue #1708: enables status streaming (CI/uncommitted/PR-metadata)
|
|
601
|
+
// and issue body/title polling in setupBidirectionalHandler.
|
|
602
|
+
issueNumber,
|
|
636
603
|
} = params;
|
|
637
604
|
// Issue #817: Apply bidirectional-mode composition and tool-support validation before running.
|
|
638
605
|
// This may enable argv.interactiveMode, argv.acceptIncommingCommentsAsInput, and
|
|
@@ -721,9 +688,11 @@ export const executeClaudeCommand = async params => {
|
|
|
721
688
|
} else if (argv.interactiveMode) {
|
|
722
689
|
await log('⚠️ Interactive mode: Disabled - missing PR info (owner/repo/prNumber)', { verbose: true });
|
|
723
690
|
}
|
|
724
|
-
// Issue #817: Set up bidirectional handler when --accept-incomming-comments-as-input
|
|
725
|
-
// (or composite --bidirectional-interactive-mode) is enabled.
|
|
726
|
-
|
|
691
|
+
// Issue #817 / #1708: Set up bidirectional handler when --accept-incomming-comments-as-input
|
|
692
|
+
// (or composite --bidirectional-interactive-mode / --auto-input-until-mergeable) is enabled.
|
|
693
|
+
// Returns null when inactive. issueNumber + tempDir are forwarded so the handler can
|
|
694
|
+
// poll issue title/body changes and uncommitted changes during the session (Issue #1708).
|
|
695
|
+
const bidirectionalHandler = await setupBidirectionalHandler({ argv, owner, repo, prNumber, issueNumber, tempDir, $, log });
|
|
727
696
|
const progressMonitor = await initProgressMonitoring(argv, { owner, repo, prNumber, $, log }); // works with or without --interactive-mode
|
|
728
697
|
let execCommand;
|
|
729
698
|
const mappedModel = mapModelToId(argv.model);
|
|
@@ -761,9 +730,10 @@ export const executeClaudeCommand = async params => {
|
|
|
761
730
|
}
|
|
762
731
|
try {
|
|
763
732
|
const { thinkingBudget: resolvedThinkingBudget, thinkLevel, isNewVersion, maxBudget } = await resolveThinkingSettings(argv, log);
|
|
764
|
-
// Issue #
|
|
765
|
-
|
|
766
|
-
|
|
733
|
+
// Issue #1706: --sub-session-size + --disable-1m-context. Resolve here, then pass into getClaudeEnv along with the rest.
|
|
734
|
+
const { parsed: parsedSubSessionSize, contextWindowTokens } = await resolveSubSessionSize({ rawValue: argv.subSessionSize, tool: 'claude', modelId: effectiveModel, fetchModelInfo, log });
|
|
735
|
+
// Issue #817: streaming mode sets exitAfterStopDelayMs=60000 so the headless Claude process stays alive between NDJSON turns.
|
|
736
|
+
const claudeEnv = getClaudeEnv({ thinkingBudget: resolvedThinkingBudget, model: effectiveModel, thinkLevel, maxBudget, planModel: resolvedPlanModel, executionModel: resolvedExecutionModel, showThinkingContent: argv.showThinkingContent, exitAfterStopDelayMs: streamingInput ? 60_000 : undefined, disable1mContext: !!argv.disable1mContext, subSessionSize: parsedSubSessionSize, contextWindowTokens });
|
|
767
737
|
if (argv.verbose) claudeEnv.ANTHROPIC_LOG = 'debug';
|
|
768
738
|
const modelMaxOutputTokens = getMaxOutputTokensForModel(effectiveModel);
|
|
769
739
|
if (argv.verbose) {
|
|
@@ -772,6 +742,9 @@ export const executeClaudeCommand = async params => {
|
|
|
772
742
|
if (resolvedThinkingBudget !== undefined) await log(`📊 MAX_THINKING_TOKENS: ${resolvedThinkingBudget}`, { verbose: true });
|
|
773
743
|
if (claudeEnv.CLAUDE_CODE_EFFORT_LEVEL) await log(`📊 CLAUDE_CODE_EFFORT_LEVEL: ${claudeEnv.CLAUDE_CODE_EFFORT_LEVEL}`, { verbose: true });
|
|
774
744
|
if (claudeEnv.CLAUDE_CODE_SHOW_THINKING) await log(`📊 CLAUDE_CODE_SHOW_THINKING: ${claudeEnv.CLAUDE_CODE_SHOW_THINKING}`, { verbose: true });
|
|
745
|
+
// Issue #1706: log applied env vars (--disable-1m-context, --sub-session-size).
|
|
746
|
+
const sub1706 = ['CLAUDE_CODE_DISABLE_1M_CONTEXT', 'CLAUDE_CODE_AUTO_COMPACT_WINDOW', 'CLAUDE_AUTOCOMPACT_PCT_OVERRIDE'].filter(k => claudeEnv[k]).map(k => `${k}=${claudeEnv[k]}`);
|
|
747
|
+
if (sub1706.length) await log(`📊 ${sub1706.join(', ')}`, { verbose: true });
|
|
775
748
|
if (!isNewVersion && thinkLevel) await log(`📊 Thinking level (via keywords): ${thinkLevel}`, { verbose: true });
|
|
776
749
|
}
|
|
777
750
|
const simpleEscapedSystem = systemPrompt.replace(/"/g, '\\"');
|
|
@@ -920,6 +893,18 @@ export const executeClaudeCommand = async params => {
|
|
|
920
893
|
}
|
|
921
894
|
if (data.type === 'message') messageCount++;
|
|
922
895
|
else if (data.type === 'tool_use') toolUseCount++;
|
|
896
|
+
// Issue #1708: signal busy/idle to the bidirectional handler so
|
|
897
|
+
// queue-comments-to-input mode can hold frames until the AI is
|
|
898
|
+
// idle. Any assistant/tool_use/system event means the AI is
|
|
899
|
+
// actively processing; a result event means the turn is done
|
|
900
|
+
// and queued frames can flush.
|
|
901
|
+
if (bidirectionalHandler) {
|
|
902
|
+
if (data.type === 'assistant' || data.type === 'tool_use' || data.type === 'tool_result') {
|
|
903
|
+
if (typeof bidirectionalHandler.markAiBusy === 'function') {
|
|
904
|
+
bidirectionalHandler.markAiBusy();
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
}
|
|
923
908
|
if (progressMonitor) await progressMonitor.processStreamEvent(data).catch(e => log(`⚠️ Progress: ${e.message}`, { verbose: true }));
|
|
924
909
|
if (data.type === 'result') {
|
|
925
910
|
if (!resultEventReceived) {
|
|
@@ -927,6 +912,15 @@ export const executeClaudeCommand = async params => {
|
|
|
927
912
|
await log(`📌 Result event received, starting ${streamCloseTimeoutMs / 1000}s stream close timeout (Issue #1280)`, { verbose: true });
|
|
928
913
|
resultTimeoutId = setTimeout(forceExitOnTimeout, streamCloseTimeoutMs);
|
|
929
914
|
}
|
|
915
|
+
// Issue #1708: result event = AI is idle and waiting for next
|
|
916
|
+
// user input. Flush any frames queued by --queue-comments-to-input.
|
|
917
|
+
if (bidirectionalHandler && typeof bidirectionalHandler.markAiIdle === 'function') {
|
|
918
|
+
try {
|
|
919
|
+
await bidirectionalHandler.markAiIdle();
|
|
920
|
+
} catch (idleErr) {
|
|
921
|
+
if (argv.verbose) await log(`⚠️ Bidirectional mode: markAiIdle error: ${idleErr.message}`, { verbose: true });
|
|
922
|
+
}
|
|
923
|
+
}
|
|
930
924
|
if (data.subtype === 'success') resultSuccessReceived = true;
|
|
931
925
|
if (data.subtype === 'success' && data.total_cost_usd !== undefined && data.total_cost_usd !== null) {
|
|
932
926
|
anthropicTotalCostUSD = data.total_cost_usd;
|
|
@@ -1301,7 +1295,9 @@ export const executeClaudeCommand = async params => {
|
|
|
1301
1295
|
await log(`\n⚠️ JSONL deduplication: skipped ${tokenUsage.duplicateEntriesSkipped} duplicate entries (upstream: anthropics/claude-code#6805)`, { verbose: true });
|
|
1302
1296
|
}
|
|
1303
1297
|
if (tokenUsage.peakContextUsage > 0) {
|
|
1304
|
-
|
|
1298
|
+
// Issue #1710: rename so the metric matches the new definition (input + cache_creation,
|
|
1299
|
+
// excluding cache_read). Cache reads are still visible separately on the Total line.
|
|
1300
|
+
await log(`📊 Peak single-request input (excl. cache reads): ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
|
|
1305
1301
|
}
|
|
1306
1302
|
await log('\n💰 Token Usage Summary:');
|
|
1307
1303
|
// Display per-model breakdown
|
package/src/codex.lib.mjs
CHANGED
|
@@ -25,6 +25,7 @@ import { getCodexPlaywrightMcpDisableConfigArgs } from './playwright-mcp.lib.mjs
|
|
|
25
25
|
import { fetchModelInfo } from './model-info.lib.mjs';
|
|
26
26
|
import { defaultModels } from './models/index.mjs';
|
|
27
27
|
import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
|
|
28
|
+
import { parseSubSessionSize, buildCodexSubSessionSizeConfigArgs, buildCodexDisable1mContextConfigArgs } from './sub-session-size.lib.mjs'; // Issue #1706
|
|
28
29
|
import Decimal from 'decimal.js-light';
|
|
29
30
|
|
|
30
31
|
const CODEX_USAGE_FIELD_NAMES = ['input_tokens', 'cached_input_tokens', 'output_tokens', 'cache_write_tokens', 'cache_creation_input_tokens', 'reasoning_tokens', 'input_tokens_details.cached_tokens', 'input_tokens_details.cache_read_tokens', 'input_tokens_details.cache_write_tokens', 'input_tokens_details.cache_creation_tokens', 'input_tokens_details.cache_creation_input_tokens', 'output_tokens_details.reasoning_tokens'];
|
|
@@ -741,6 +742,36 @@ export const executeCodexCommand = async params => {
|
|
|
741
742
|
}
|
|
742
743
|
codexArgs += ` --json --skip-git-repo-check -o ${shellQuote(lastMessageFile)} -c ${shellQuote(`model_reasoning_effort=${reasoningEffort}`)} -c ${shellQuote('model_reasoning_summary=auto')} --dangerously-bypass-approvals-and-sandbox`;
|
|
743
744
|
|
|
745
|
+
// Issue #1706: Append --disable-1m-context and --sub-session-size as Codex -c overrides.
|
|
746
|
+
let parsedSubSessionSize;
|
|
747
|
+
try {
|
|
748
|
+
parsedSubSessionSize = parseSubSessionSize(argv.subSessionSize);
|
|
749
|
+
} catch (parseError) {
|
|
750
|
+
await log(`⚠️ ${parseError.message}`, { level: 'warn' });
|
|
751
|
+
parsedSubSessionSize = { kind: 'default', tokens: null, percent: null, raw: '' };
|
|
752
|
+
}
|
|
753
|
+
let codexContextWindowTokens = null;
|
|
754
|
+
if (parsedSubSessionSize.kind === 'percent') {
|
|
755
|
+
try {
|
|
756
|
+
const codexModelMeta = await fetchModelInfo(mappedModel, { preferredProviderIds: ['openai'] });
|
|
757
|
+
codexContextWindowTokens = codexModelMeta?.limit?.context || null;
|
|
758
|
+
} catch {
|
|
759
|
+
codexContextWindowTokens = null;
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
const disable1mArgs = buildCodexDisable1mContextConfigArgs(!!argv.disable1mContext);
|
|
763
|
+
for (const arg of disable1mArgs) {
|
|
764
|
+
codexArgs += ` ${shellQuote(arg)}`;
|
|
765
|
+
}
|
|
766
|
+
const subSessionSizeArgs = buildCodexSubSessionSizeConfigArgs(parsedSubSessionSize, { contextWindow: codexContextWindowTokens });
|
|
767
|
+
for (const arg of subSessionSizeArgs) {
|
|
768
|
+
codexArgs += ` ${shellQuote(arg)}`;
|
|
769
|
+
}
|
|
770
|
+
if (argv.verbose) {
|
|
771
|
+
if (disable1mArgs.length) await log(`📊 Codex --disable-1m-context: ${disable1mArgs.join(' ')}`, { verbose: true });
|
|
772
|
+
if (subSessionSizeArgs.length) await log(`📊 Codex --sub-session-size: ${subSessionSizeArgs.join(' ')}`, { verbose: true });
|
|
773
|
+
}
|
|
774
|
+
|
|
744
775
|
const fullCommand = `(cd ${shellQuote(tempDir)} && cat ${shellQuote(promptFile)} | ${codexPath} ${codexArgs})`;
|
|
745
776
|
|
|
746
777
|
await log(`\n${formatAligned('📝', 'Raw command:', '')}`);
|