@link-assistant/hive-mind 1.58.0 → 1.59.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +217 -0
- package/package.json +1 -1
- package/src/anthropic-server-tool-pricing.lib.mjs +34 -0
- package/src/bidirectional-interactive.lib.mjs +392 -21
- package/src/claude.budget-stats.lib.mjs +151 -26
- package/src/claude.cost.lib.mjs +88 -0
- package/src/claude.lib.mjs +46 -55
- package/src/config.lib.mjs +5 -1
- package/src/github-merge-repo-actions.lib.mjs +54 -0
- package/src/github-merge.lib.mjs +24 -6
- package/src/lino.lib.mjs +3 -1
- package/src/queue-config.lib.mjs +7 -2
- package/src/solve.auto-merge-helpers.lib.mjs +89 -7
- package/src/solve.auto-merge.lib.mjs +27 -2
- package/src/solve.config.lib.mjs +29 -0
- package/src/use-with-retry.lib.mjs +107 -0
|
@@ -47,6 +47,11 @@ export const accumulateModelUsage = (modelUsageMap, entry) => {
|
|
|
47
47
|
}
|
|
48
48
|
if (usage.cache_read_input_tokens) modelUsageMap[model].cacheReadTokens += usage.cache_read_input_tokens;
|
|
49
49
|
if (usage.output_tokens) modelUsageMap[model].outputTokens += usage.output_tokens;
|
|
50
|
+
// Issue #1710: track Anthropic server-tool usage from per-request JSONL entries
|
|
51
|
+
// so the public-pricing estimate can bill them at the documented per-request rate.
|
|
52
|
+
if (usage.server_tool_use?.web_search_requests) {
|
|
53
|
+
modelUsageMap[model].webSearchRequests += usage.server_tool_use.web_search_requests;
|
|
54
|
+
}
|
|
50
55
|
};
|
|
51
56
|
|
|
52
57
|
/**
|
|
@@ -109,6 +114,11 @@ export const displayModelUsage = async (usage, log) => {
|
|
|
109
114
|
await log(` ${label}: ${formatNumber(breakdown[key].tokens)} tokens × $${breakdown[key].costPerMillion}/M = $${new Decimal(breakdown[key].cost).toFixed(6)}`);
|
|
110
115
|
}
|
|
111
116
|
}
|
|
117
|
+
// Issue #1710: itemise server-tool charges so the residual that puzzled
|
|
118
|
+
// readers in PR #1707 ($0.04 web_search) is visible in the breakdown.
|
|
119
|
+
if (breakdown.webSearch && breakdown.webSearch.requests > 0) {
|
|
120
|
+
await log(` Web search: ${breakdown.webSearch.requests} requests × $${breakdown.webSearch.costPerRequest}/req = $${new Decimal(breakdown.webSearch.cost).toFixed(6)}`);
|
|
121
|
+
}
|
|
112
122
|
await log(' ─────────────────────────────────');
|
|
113
123
|
await log(` Total: $${new Decimal(usage.costUSD).toFixed(6)}`);
|
|
114
124
|
} else if (usage.modelInfo === null) {
|
|
@@ -147,6 +157,52 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
|
|
|
147
157
|
}
|
|
148
158
|
};
|
|
149
159
|
|
|
160
|
+
/**
|
|
161
|
+
* Issue #1710: Emit a verbose, machine-friendly trace of every input that
|
|
162
|
+
* feeds the budget-stats renderer for a single model. Hidden behind
|
|
163
|
+
* `{ verbose: true }` so it never pollutes the default log, but always
|
|
164
|
+
* captured when --verbose is set. The trace is what we wished we had had
|
|
165
|
+
* available *before* filing #1710 — it shows peak vs. cumulative side by
|
|
166
|
+
* side, splits cache writes from cache reads, and surfaces server-tool
|
|
167
|
+
* usage (web search) that the public-pricing estimator currently ignores.
|
|
168
|
+
*
|
|
169
|
+
* @param {Object} usage - Per-model usage entry from `tokenUsage.modelUsage`.
|
|
170
|
+
* @param {Object} tokenUsage - Full token usage object (used only for sub-session count).
|
|
171
|
+
* @param {Function} log - Async logger (must accept a `{verbose}` options arg).
|
|
172
|
+
*/
|
|
173
|
+
export const dumpBudgetTrace = async (usage, tokenUsage, log) => {
|
|
174
|
+
const modelName = usage.modelName || usage.modelInfo?.name || 'unknown';
|
|
175
|
+
const limit = usage.modelInfo?.limit || {};
|
|
176
|
+
const peak = usage.peakContextUsage || 0;
|
|
177
|
+
const writes5m = usage.cacheCreation5mTokens || 0;
|
|
178
|
+
const writes1h = usage.cacheCreation1hTokens || 0;
|
|
179
|
+
const writes = usage.cacheCreationTokens || 0;
|
|
180
|
+
const reads = usage.cacheReadTokens || 0;
|
|
181
|
+
const inputs = usage.inputTokens || 0;
|
|
182
|
+
const outputs = usage.outputTokens || 0;
|
|
183
|
+
const webSearches = usage.webSearchRequests || 0;
|
|
184
|
+
const subSessionCount = (tokenUsage?.subSessions || []).length;
|
|
185
|
+
const source = usage._sourceResultJson ? 'jsonl + result-event' : 'jsonl';
|
|
186
|
+
|
|
187
|
+
await log(`\n 📊 [budget-trace] ${modelName}`, { verbose: true });
|
|
188
|
+
// Issue #1710 R5: peak request is `input + cache_creation` (cache reads
|
|
189
|
+
// tracked separately on the cumulative line).
|
|
190
|
+
await log(` peak request: ${formatNumber(peak)}${limit.context ? ` / ${formatNumber(limit.context)} context` : ''} (largest single-request input + cache_creation, excludes cache_read)`, { verbose: true });
|
|
191
|
+
await log(` cumulative: input ${formatNumber(inputs)}, cache_write ${formatNumber(writes)} (5m ${formatNumber(writes5m)} / 1h ${formatNumber(writes1h)}), cache_read ${formatNumber(reads)}, output ${formatNumber(outputs)}`, { verbose: true });
|
|
192
|
+
// Issue #1710 R1: web_search is now billed in calculateModelCost. The trace
|
|
193
|
+
// still surfaces the implied dollar cost so the residual remains debuggable
|
|
194
|
+
// from the saved log even if a future model lacks pricing data.
|
|
195
|
+
await log(` server tools: web_search ${webSearches}${webSearches > 0 ? ` (= $${(webSearches * 0.01).toFixed(6)} at $10 / 1k searches)` : ''}`, { verbose: true });
|
|
196
|
+
if (usage.costUSD !== null && usage.costUSD !== undefined) {
|
|
197
|
+
await log(` cost (public): $${new Decimal(usage.costUSD).toFixed(6)}`, { verbose: true });
|
|
198
|
+
}
|
|
199
|
+
if (usage._resultCostUSD !== null && usage._resultCostUSD !== undefined) {
|
|
200
|
+
await log(` cost (anthropic result-event): $${new Decimal(usage._resultCostUSD).toFixed(6)}`, { verbose: true });
|
|
201
|
+
}
|
|
202
|
+
await log(` sub-session count: ${subSessionCount}`, { verbose: true });
|
|
203
|
+
await log(` data source: ${source}`, { verbose: true });
|
|
204
|
+
};
|
|
205
|
+
|
|
150
206
|
/**
|
|
151
207
|
* Display token budget statistics (context window usage and ratios)
|
|
152
208
|
* @param {Object} usage - Usage data for a model
|
|
@@ -155,6 +211,10 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
|
|
|
155
211
|
*/
|
|
156
212
|
/**
|
|
157
213
|
* Issue #1526: Updated to use single-line context+output format.
|
|
214
|
+
* Issue #1710: After the standard rendering, emit a verbose trace of the
|
|
215
|
+
* raw inputs that fed the renderer (gated behind --verbose),
|
|
216
|
+
* so future calculation-correctness reports can be triaged
|
|
217
|
+
* without re-running the session.
|
|
158
218
|
*/
|
|
159
219
|
export const displayBudgetStats = async (usage, tokenUsage, log) => {
|
|
160
220
|
const modelInfo = usage.modelInfo;
|
|
@@ -173,14 +233,18 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
|
|
|
173
233
|
const peakContext = usage.peakContextUsage || 0;
|
|
174
234
|
|
|
175
235
|
if (hasMultipleSubSessions) {
|
|
176
|
-
// Issue #1600: Unified format — numbered list without "Context window:" prefix
|
|
236
|
+
// Issue #1600: Unified format — numbered list without "Context window:" prefix.
|
|
237
|
+
// Issue #1710 R3/R5: Peak input is `input + cache_creation` (cache reads
|
|
238
|
+
// are tracked separately on the Total line), and the bullet is now
|
|
239
|
+
// labelled "peak request:" so a reader does not try to reconcile it with
|
|
240
|
+
// the cumulative Total figure.
|
|
177
241
|
for (let i = 0; i < subSessions.length; i++) {
|
|
178
242
|
const sub = subSessions[i];
|
|
179
243
|
const subPeak = sub.peakContextUsage || 0;
|
|
180
244
|
const parts = [];
|
|
181
245
|
if (contextLimit && subPeak > 0) {
|
|
182
246
|
const pct = ((subPeak / contextLimit) * 100).toFixed(0);
|
|
183
|
-
parts.push(
|
|
247
|
+
parts.push(`peak request: ${formatNumber(subPeak)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
|
|
184
248
|
}
|
|
185
249
|
if (outputLimit) {
|
|
186
250
|
const outPct = ((sub.outputTokens / outputLimit) * 100).toFixed(0);
|
|
@@ -194,7 +258,7 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
|
|
|
194
258
|
const parts = [];
|
|
195
259
|
if (contextLimit) {
|
|
196
260
|
const pct = ((peakContext / contextLimit) * 100).toFixed(0);
|
|
197
|
-
parts.push(
|
|
261
|
+
parts.push(`peak request: ${formatNumber(peakContext)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
|
|
198
262
|
}
|
|
199
263
|
if (outputLimit) {
|
|
200
264
|
const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
|
|
@@ -205,16 +269,16 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
|
|
|
205
269
|
}
|
|
206
270
|
}
|
|
207
271
|
|
|
208
|
-
// Cumulative totals — single line
|
|
209
|
-
// Issue #1547: Parenthesized cached format and consistent output format
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
let totalLine
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
}
|
|
272
|
+
// Cumulative totals — single line.
|
|
273
|
+
// Issue #1547: Parenthesized cached format and consistent output format.
|
|
274
|
+
// Issue #1710 R4: When cache writes are present, render them as a separate
|
|
275
|
+
// category instead of folding them into the input figure.
|
|
276
|
+
let totalLine = buildCumulativeInputPhrase({
|
|
277
|
+
input: usage.inputTokens || 0,
|
|
278
|
+
cacheWrites: usage.cacheCreationTokens || 0,
|
|
279
|
+
cacheReads: usage.cacheReadTokens || 0,
|
|
280
|
+
format: formatNumber,
|
|
281
|
+
});
|
|
218
282
|
if (peakContext === 0 && outputLimit) {
|
|
219
283
|
const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
|
|
220
284
|
totalLine += `, ${formatNumber(usage.outputTokens)} / ${formatNumber(outputLimit)} (${outPct}%) output tokens`;
|
|
@@ -222,6 +286,9 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
|
|
|
222
286
|
totalLine += `, ${formatNumber(usage.outputTokens)} output tokens`;
|
|
223
287
|
}
|
|
224
288
|
await log(` Total: ${totalLine}`);
|
|
289
|
+
|
|
290
|
+
// Issue #1710: verbose-only, never affects default output.
|
|
291
|
+
await dumpBudgetTrace(usage, tokenUsage, log);
|
|
225
292
|
};
|
|
226
293
|
|
|
227
294
|
/**
|
|
@@ -294,6 +361,44 @@ const formatTokensCompact = tokens => {
|
|
|
294
361
|
return tokens.toLocaleString();
|
|
295
362
|
};
|
|
296
363
|
|
|
364
|
+
/**
|
|
365
|
+
* Issue #1710: Build the cumulative input-tokens phrase for the Total / fallback
|
|
366
|
+
* lines, splitting cache writes and cache reads so neither category is ever
|
|
367
|
+
* silently fused with raw input tokens.
|
|
368
|
+
*
|
|
369
|
+
* Forms (in priority order):
|
|
370
|
+
* - reads > 0 && writes > 0 → "(X new + W cache writes + Y cache reads) input tokens"
|
|
371
|
+
* - reads > 0 && writes = 0 → "(X + Y cached) input tokens" (back-compat shape)
|
|
372
|
+
* - reads = 0 && writes > 0 → "(X new + W cache writes) input tokens"
|
|
373
|
+
* - reads = 0 && writes = 0 → "X input tokens"
|
|
374
|
+
*
|
|
375
|
+
* The legacy `(X + Y cached)` shape is preserved when only cache reads exist
|
|
376
|
+
* so we don't churn output for the common Opus-only case. The new explicit
|
|
377
|
+
* forms only appear when cache writes are non-zero (issue #1710 R4).
|
|
378
|
+
*
|
|
379
|
+
* @param {Object} opts
|
|
380
|
+
* @param {number} opts.input - non-cached input tokens (excludes cache writes/reads)
|
|
381
|
+
* @param {number} opts.cacheWrites - cache_creation_input_tokens (cumulative)
|
|
382
|
+
* @param {number} opts.cacheReads - cache_read_input_tokens (cumulative)
|
|
383
|
+
* @param {(n: number) => string} opts.format - formatter (compact or full)
|
|
384
|
+
* @returns {string} the cumulative input phrase, e.g. "(78K new + 57.6K cache writes) input tokens"
|
|
385
|
+
*/
|
|
386
|
+
export const buildCumulativeInputPhrase = ({ input, cacheWrites, cacheReads, format }) => {
|
|
387
|
+
const w = Math.max(0, cacheWrites || 0);
|
|
388
|
+
const r = Math.max(0, cacheReads || 0);
|
|
389
|
+
const i = Math.max(0, input || 0);
|
|
390
|
+
if (w > 0 && r > 0) {
|
|
391
|
+
return `(${format(i)} new + ${format(w)} cache writes + ${format(r)} cache reads) input tokens`;
|
|
392
|
+
}
|
|
393
|
+
if (w > 0) {
|
|
394
|
+
return `(${format(i)} new + ${format(w)} cache writes) input tokens`;
|
|
395
|
+
}
|
|
396
|
+
if (r > 0) {
|
|
397
|
+
return `(${format(i)} + ${format(r)} cached) input tokens`;
|
|
398
|
+
}
|
|
399
|
+
return `${format(i)} input tokens`;
|
|
400
|
+
};
|
|
401
|
+
|
|
297
402
|
/**
|
|
298
403
|
* Format sub-sessions list for budget stats display
|
|
299
404
|
* @param {Array} subSessions - Array of sub-session usage objects
|
|
@@ -317,6 +422,9 @@ const formatSubSessionsList = (subSessions, contextLimit, outputLimit) => {
|
|
|
317
422
|
|
|
318
423
|
/**
|
|
319
424
|
* Issue #1600: Build a single-line context + output tokens string (unified format, no "Context window:" prefix).
|
|
425
|
+
* Issue #1710 R3/R5: The input figure is the peak per-request `input + cache_creation`
|
|
426
|
+
* (cache reads excluded). Labelling it "peak request:" lets readers tell it apart
|
|
427
|
+
* from the cumulative Total line.
|
|
320
428
|
* @param {number} peakContext - Peak context usage (0 if unknown — context display skipped)
|
|
321
429
|
* @param {number} contextLimit - Context window limit (null if unknown)
|
|
322
430
|
* @param {number} outputTokens - Output tokens used
|
|
@@ -328,7 +436,7 @@ const formatContextOutputLine = (peakContext, contextLimit, outputTokens, output
|
|
|
328
436
|
const parts = [];
|
|
329
437
|
if (contextLimit && peakContext > 0) {
|
|
330
438
|
const pct = ((peakContext / contextLimit) * 100).toFixed(0);
|
|
331
|
-
parts.push(
|
|
439
|
+
parts.push(`peak request: ${formatTokensCompact(peakContext)} / ${formatTokensCompact(contextLimit)} (${pct}%) input tokens`);
|
|
332
440
|
}
|
|
333
441
|
if (outputLimit) {
|
|
334
442
|
const outPct = ((outputTokens / outputLimit) * 100).toFixed(0);
|
|
@@ -459,20 +567,33 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
|
|
|
459
567
|
} else if (peakContext > 0) {
|
|
460
568
|
stats += formatContextOutputLine(peakContext, contextLimit, usage.outputTokens, outputLimit, '- ');
|
|
461
569
|
} else if (outputLimit && callCount <= 1) {
|
|
462
|
-
// Issue #1600:
|
|
570
|
+
// Issue #1600: Sub-agent single sessions previously showed only an output line.
|
|
571
|
+
// Issue #1710 R2: Always surface the cumulative input information too — sub-agent
|
|
572
|
+
// models (e.g. Haiku) never appear as the responding model in the parent JSONL,
|
|
573
|
+
// so peakContext stays at 0; without this fallback the rendered comment loses
|
|
574
|
+
// the sub-agent's input-token information entirely. Cache writes / reads are
|
|
575
|
+
// split via the same helper used for the Total line so the two lines stay
|
|
576
|
+
// arithmetically consistent.
|
|
577
|
+
const inputPhrase = buildCumulativeInputPhrase({
|
|
578
|
+
input: usage.inputTokens || 0,
|
|
579
|
+
cacheWrites: usage.cacheCreationTokens || 0,
|
|
580
|
+
cacheReads: usage.cacheReadTokens || 0,
|
|
581
|
+
format: formatTokensCompact,
|
|
582
|
+
});
|
|
463
583
|
const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
|
|
464
|
-
stats += `\n- ${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`;
|
|
584
|
+
stats += `\n- ${inputPhrase}, ${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`;
|
|
465
585
|
}
|
|
466
586
|
|
|
467
|
-
// Cumulative totals per model: input tokens + cached shown separately
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
587
|
+
// Cumulative totals per model: input tokens + cached shown separately.
|
|
588
|
+
// Issue #1710 R4: Cache writes are now their own category (so the displayed
|
|
589
|
+
// "input tokens" figure never silently fuses 1.25× / 2× cache-write tokens
|
|
590
|
+
// with regular 1× input tokens — see issue #1710 root cause D).
|
|
591
|
+
let totalLine = buildCumulativeInputPhrase({
|
|
592
|
+
input: usage.inputTokens || 0,
|
|
593
|
+
cacheWrites: usage.cacheCreationTokens || 0,
|
|
594
|
+
cacheReads: usage.cacheReadTokens || 0,
|
|
595
|
+
format: formatTokensCompact,
|
|
596
|
+
});
|
|
476
597
|
|
|
477
598
|
// Issue #1600: Output tokens on Total line — skip percentage if already shown above or aggregated
|
|
478
599
|
if (callCount > 1) {
|
|
@@ -514,7 +635,11 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
|
|
|
514
635
|
stats += `\n${i + 1}. ${parts.join(', ')}`;
|
|
515
636
|
}
|
|
516
637
|
} else {
|
|
517
|
-
|
|
638
|
+
// Estimated per-call breakdown when sub-agent stream tracking did not capture
|
|
639
|
+
// per-call usage. Includes everything the model actually saw:
|
|
640
|
+
// input + cache_creation (writes) + cache_read.
|
|
641
|
+
const aggregateInput = (usage.inputTokens || 0) + (usage.cacheCreationTokens || 0) + (usage.cacheReadTokens || 0);
|
|
642
|
+
const avgInput = Math.round(aggregateInput / callCount);
|
|
518
643
|
const avgOutput = Math.round(usage.outputTokens / callCount);
|
|
519
644
|
for (let i = 0; i < matchingCalls.length; i++) {
|
|
520
645
|
const parts = [];
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Issue #1710: Per-model cost calculation extracted from claude.lib.mjs to
|
|
5
|
+
* keep that file under the 1500-line repo cap. Behaviour is unchanged from
|
|
6
|
+
* the previous in-place implementation.
|
|
7
|
+
*/
|
|
8
|
+
import Decimal from 'decimal.js-light';
|
|
9
|
+
import { SERVER_TOOL_PRICING_USD } from './anthropic-server-tool-pricing.lib.mjs';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Calculate USD cost for a model's usage with optional detailed breakdown.
|
|
13
|
+
*
|
|
14
|
+
* Cost components (Issue #1600 uses Decimal for precision):
|
|
15
|
+
* - input × cost.input / 1M
|
|
16
|
+
* - cacheWrite × cost.cache_write / 1M
|
|
17
|
+
* - cacheRead × cost.cache_read / 1M
|
|
18
|
+
* - output × cost.output / 1M
|
|
19
|
+
* - webSearch × $0.01 / request (Issue #1710 — see SERVER_TOOL_PRICING_USD)
|
|
20
|
+
*
|
|
21
|
+
* @param {Object} usage - per-model usage entry
|
|
22
|
+
* @param {Object|null} modelInfo - model-info shape (includes `cost` map)
|
|
23
|
+
* @param {boolean} [includeBreakdown=false] - return `{ total, breakdown }` when true
|
|
24
|
+
* @returns {number|{total: number, breakdown: Object}}
|
|
25
|
+
*/
|
|
26
|
+
export const calculateModelCost = (usage, modelInfo, includeBreakdown = false) => {
|
|
27
|
+
if (!modelInfo || !modelInfo.cost) {
|
|
28
|
+
return includeBreakdown ? { total: 0, breakdown: null } : 0;
|
|
29
|
+
}
|
|
30
|
+
const cost = modelInfo.cost;
|
|
31
|
+
const million = new Decimal(1000000);
|
|
32
|
+
const breakdown = {
|
|
33
|
+
input: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
34
|
+
cacheWrite: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
35
|
+
cacheRead: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
36
|
+
output: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
37
|
+
// Issue #1710: server-side tool usage (web_search) is billed per-request,
|
|
38
|
+
// independent of token cost. Without this entry the public-pricing total
|
|
39
|
+
// diverges from Anthropic's reported total by exactly the per-request
|
|
40
|
+
// rate times the request count — the residual quoted in issue #1710.
|
|
41
|
+
webSearch: { requests: 0, costPerRequest: 0, cost: 0 },
|
|
42
|
+
};
|
|
43
|
+
if (usage.inputTokens && cost.input) {
|
|
44
|
+
breakdown.input = {
|
|
45
|
+
tokens: usage.inputTokens,
|
|
46
|
+
costPerMillion: cost.input,
|
|
47
|
+
cost: new Decimal(usage.inputTokens).div(million).mul(new Decimal(cost.input)).toNumber(),
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
if (usage.cacheCreationTokens && cost.cache_write) {
|
|
51
|
+
breakdown.cacheWrite = {
|
|
52
|
+
tokens: usage.cacheCreationTokens,
|
|
53
|
+
costPerMillion: cost.cache_write,
|
|
54
|
+
cost: new Decimal(usage.cacheCreationTokens).div(million).mul(new Decimal(cost.cache_write)).toNumber(),
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
if (usage.cacheReadTokens && cost.cache_read) {
|
|
58
|
+
breakdown.cacheRead = {
|
|
59
|
+
tokens: usage.cacheReadTokens,
|
|
60
|
+
costPerMillion: cost.cache_read,
|
|
61
|
+
cost: new Decimal(usage.cacheReadTokens).div(million).mul(new Decimal(cost.cache_read)).toNumber(),
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
if (usage.outputTokens && cost.output) {
|
|
65
|
+
breakdown.output = {
|
|
66
|
+
tokens: usage.outputTokens,
|
|
67
|
+
costPerMillion: cost.output,
|
|
68
|
+
cost: new Decimal(usage.outputTokens).div(million).mul(new Decimal(cost.output)).toNumber(),
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
// Issue #1710: bill web_search requests at the documented per-request rate.
|
|
72
|
+
if (usage.webSearchRequests && SERVER_TOOL_PRICING_USD.web_search.costPerRequest > 0) {
|
|
73
|
+
const perReq = SERVER_TOOL_PRICING_USD.web_search.costPerRequest;
|
|
74
|
+
breakdown.webSearch = {
|
|
75
|
+
requests: usage.webSearchRequests,
|
|
76
|
+
costPerRequest: perReq,
|
|
77
|
+
cost: new Decimal(usage.webSearchRequests).mul(new Decimal(perReq)).toNumber(),
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
const totalCost = new Decimal(breakdown.input.cost).plus(breakdown.cacheWrite.cost).plus(breakdown.cacheRead.cost).plus(breakdown.output.cost).plus(breakdown.webSearch.cost).toNumber();
|
|
81
|
+
if (includeBreakdown) {
|
|
82
|
+
return {
|
|
83
|
+
total: totalCost,
|
|
84
|
+
breakdown,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
return totalCost;
|
|
88
|
+
};
|
package/src/claude.lib.mjs
CHANGED
|
@@ -374,6 +374,9 @@ export const executeClaude = async params => {
|
|
|
374
374
|
owner,
|
|
375
375
|
repo,
|
|
376
376
|
prNumber,
|
|
377
|
+
// Issue #1708: forwarded so the bidirectional handler can poll
|
|
378
|
+
// issue title/body changes and uncommitted changes during the session.
|
|
379
|
+
issueNumber,
|
|
377
380
|
});
|
|
378
381
|
};
|
|
379
382
|
/** Check if a model supports vision (image input) using models.dev API @returns {Promise<boolean>} */
|
|
@@ -387,56 +390,10 @@ export const checkModelVisionCapability = async modelId => {
|
|
|
387
390
|
return false;
|
|
388
391
|
}
|
|
389
392
|
};
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
}
|
|
395
|
-
const cost = modelInfo.cost;
|
|
396
|
-
const million = new Decimal(1000000);
|
|
397
|
-
const breakdown = {
|
|
398
|
-
input: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
399
|
-
cacheWrite: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
400
|
-
cacheRead: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
401
|
-
output: { tokens: 0, costPerMillion: 0, cost: 0 },
|
|
402
|
-
};
|
|
403
|
-
if (usage.inputTokens && cost.input) {
|
|
404
|
-
breakdown.input = {
|
|
405
|
-
tokens: usage.inputTokens,
|
|
406
|
-
costPerMillion: cost.input,
|
|
407
|
-
cost: new Decimal(usage.inputTokens).div(million).mul(new Decimal(cost.input)).toNumber(),
|
|
408
|
-
};
|
|
409
|
-
}
|
|
410
|
-
if (usage.cacheCreationTokens && cost.cache_write) {
|
|
411
|
-
breakdown.cacheWrite = {
|
|
412
|
-
tokens: usage.cacheCreationTokens,
|
|
413
|
-
costPerMillion: cost.cache_write,
|
|
414
|
-
cost: new Decimal(usage.cacheCreationTokens).div(million).mul(new Decimal(cost.cache_write)).toNumber(),
|
|
415
|
-
};
|
|
416
|
-
}
|
|
417
|
-
if (usage.cacheReadTokens && cost.cache_read) {
|
|
418
|
-
breakdown.cacheRead = {
|
|
419
|
-
tokens: usage.cacheReadTokens,
|
|
420
|
-
costPerMillion: cost.cache_read,
|
|
421
|
-
cost: new Decimal(usage.cacheReadTokens).div(million).mul(new Decimal(cost.cache_read)).toNumber(),
|
|
422
|
-
};
|
|
423
|
-
}
|
|
424
|
-
if (usage.outputTokens && cost.output) {
|
|
425
|
-
breakdown.output = {
|
|
426
|
-
tokens: usage.outputTokens,
|
|
427
|
-
costPerMillion: cost.output,
|
|
428
|
-
cost: new Decimal(usage.outputTokens).div(million).mul(new Decimal(cost.output)).toNumber(),
|
|
429
|
-
};
|
|
430
|
-
}
|
|
431
|
-
const totalCost = new Decimal(breakdown.input.cost).plus(breakdown.cacheWrite.cost).plus(breakdown.cacheRead.cost).plus(breakdown.output.cost).toNumber();
|
|
432
|
-
if (includeBreakdown) {
|
|
433
|
-
return {
|
|
434
|
-
total: totalCost,
|
|
435
|
-
breakdown,
|
|
436
|
-
};
|
|
437
|
-
}
|
|
438
|
-
return totalCost;
|
|
439
|
-
};
|
|
393
|
+
// Issue #1710: calculateModelCost extracted to ./claude.cost.lib.mjs to keep
|
|
394
|
+
// this file under the 1500-line repo cap (see check-file-line-limits CI job).
|
|
395
|
+
import { calculateModelCost } from './claude.cost.lib.mjs';
|
|
396
|
+
export { calculateModelCost };
|
|
440
397
|
export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsage = null) => {
|
|
441
398
|
const os = (await use('os')).default;
|
|
442
399
|
const homeDir = os.homedir();
|
|
@@ -498,8 +455,14 @@ export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsag
|
|
|
498
455
|
}
|
|
499
456
|
accumulateModelUsage(modelUsage, entry);
|
|
500
457
|
// Issue #1501: Track peak context usage per single API request
|
|
458
|
+
// Issue #1710: Exclude cache_read_input_tokens — sub-sessions and
|
|
459
|
+
// per-request peaks should reflect *new* input the model received,
|
|
460
|
+
// not cached prompt context. Cache reads remain visible in the
|
|
461
|
+
// cumulative Total line as `(X + Y cached)`. This makes the
|
|
462
|
+
// peak-request value reconcilable with the cumulative non-cached
|
|
463
|
+
// input figure (instead of mixing semantics across the two lines).
|
|
501
464
|
const usage = entry.message.usage;
|
|
502
|
-
const requestContext = (usage.input_tokens || 0) + (usage.cache_creation_input_tokens || 0)
|
|
465
|
+
const requestContext = (usage.input_tokens || 0) + (usage.cache_creation_input_tokens || 0);
|
|
503
466
|
const model = entry.message.model;
|
|
504
467
|
if (requestContext > (peakContextByModel[model] || 0)) {
|
|
505
468
|
peakContextByModel[model] = requestContext;
|
|
@@ -634,6 +597,9 @@ export const executeClaudeCommand = async params => {
|
|
|
634
597
|
owner,
|
|
635
598
|
repo,
|
|
636
599
|
prNumber,
|
|
600
|
+
// Issue #1708: enables status streaming (CI/uncommitted/PR-metadata)
|
|
601
|
+
// and issue body/title polling in setupBidirectionalHandler.
|
|
602
|
+
issueNumber,
|
|
637
603
|
} = params;
|
|
638
604
|
// Issue #817: Apply bidirectional-mode composition and tool-support validation before running.
|
|
639
605
|
// This may enable argv.interactiveMode, argv.acceptIncommingCommentsAsInput, and
|
|
@@ -722,9 +688,11 @@ export const executeClaudeCommand = async params => {
|
|
|
722
688
|
} else if (argv.interactiveMode) {
|
|
723
689
|
await log('⚠️ Interactive mode: Disabled - missing PR info (owner/repo/prNumber)', { verbose: true });
|
|
724
690
|
}
|
|
725
|
-
// Issue #817: Set up bidirectional handler when --accept-incomming-comments-as-input
|
|
726
|
-
// (or composite --bidirectional-interactive-mode) is enabled.
|
|
727
|
-
|
|
691
|
+
// Issue #817 / #1708: Set up bidirectional handler when --accept-incomming-comments-as-input
|
|
692
|
+
// (or composite --bidirectional-interactive-mode / --auto-input-until-mergeable) is enabled.
|
|
693
|
+
// Returns null when inactive. issueNumber + tempDir are forwarded so the handler can
|
|
694
|
+
// poll issue title/body changes and uncommitted changes during the session (Issue #1708).
|
|
695
|
+
const bidirectionalHandler = await setupBidirectionalHandler({ argv, owner, repo, prNumber, issueNumber, tempDir, $, log });
|
|
728
696
|
const progressMonitor = await initProgressMonitoring(argv, { owner, repo, prNumber, $, log }); // works with or without --interactive-mode
|
|
729
697
|
let execCommand;
|
|
730
698
|
const mappedModel = mapModelToId(argv.model);
|
|
@@ -925,6 +893,18 @@ export const executeClaudeCommand = async params => {
|
|
|
925
893
|
}
|
|
926
894
|
if (data.type === 'message') messageCount++;
|
|
927
895
|
else if (data.type === 'tool_use') toolUseCount++;
|
|
896
|
+
// Issue #1708: signal busy/idle to the bidirectional handler so
|
|
897
|
+
// queue-comments-to-input mode can hold frames until the AI is
|
|
898
|
+
// idle. Any assistant/tool_use/system event means the AI is
|
|
899
|
+
// actively processing; a result event means the turn is done
|
|
900
|
+
// and queued frames can flush.
|
|
901
|
+
if (bidirectionalHandler) {
|
|
902
|
+
if (data.type === 'assistant' || data.type === 'tool_use' || data.type === 'tool_result') {
|
|
903
|
+
if (typeof bidirectionalHandler.markAiBusy === 'function') {
|
|
904
|
+
bidirectionalHandler.markAiBusy();
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
}
|
|
928
908
|
if (progressMonitor) await progressMonitor.processStreamEvent(data).catch(e => log(`⚠️ Progress: ${e.message}`, { verbose: true }));
|
|
929
909
|
if (data.type === 'result') {
|
|
930
910
|
if (!resultEventReceived) {
|
|
@@ -932,6 +912,15 @@ export const executeClaudeCommand = async params => {
|
|
|
932
912
|
await log(`📌 Result event received, starting ${streamCloseTimeoutMs / 1000}s stream close timeout (Issue #1280)`, { verbose: true });
|
|
933
913
|
resultTimeoutId = setTimeout(forceExitOnTimeout, streamCloseTimeoutMs);
|
|
934
914
|
}
|
|
915
|
+
// Issue #1708: result event = AI is idle and waiting for next
|
|
916
|
+
// user input. Flush any frames queued by --queue-comments-to-input.
|
|
917
|
+
if (bidirectionalHandler && typeof bidirectionalHandler.markAiIdle === 'function') {
|
|
918
|
+
try {
|
|
919
|
+
await bidirectionalHandler.markAiIdle();
|
|
920
|
+
} catch (idleErr) {
|
|
921
|
+
if (argv.verbose) await log(`⚠️ Bidirectional mode: markAiIdle error: ${idleErr.message}`, { verbose: true });
|
|
922
|
+
}
|
|
923
|
+
}
|
|
935
924
|
if (data.subtype === 'success') resultSuccessReceived = true;
|
|
936
925
|
if (data.subtype === 'success' && data.total_cost_usd !== undefined && data.total_cost_usd !== null) {
|
|
937
926
|
anthropicTotalCostUSD = data.total_cost_usd;
|
|
@@ -1306,7 +1295,9 @@ export const executeClaudeCommand = async params => {
|
|
|
1306
1295
|
await log(`\n⚠️ JSONL deduplication: skipped ${tokenUsage.duplicateEntriesSkipped} duplicate entries (upstream: anthropics/claude-code#6805)`, { verbose: true });
|
|
1307
1296
|
}
|
|
1308
1297
|
if (tokenUsage.peakContextUsage > 0) {
|
|
1309
|
-
|
|
1298
|
+
// Issue #1710: rename so the metric matches the new definition (input + cache_creation,
|
|
1299
|
+
// excluding cache_read). Cache reads are still visible separately on the Total line.
|
|
1300
|
+
await log(`📊 Peak single-request input (excl. cache reads): ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
|
|
1310
1301
|
}
|
|
1311
1302
|
await log('\n💰 Token Usage Summary:');
|
|
1312
1303
|
// Display per-model breakdown
|
package/src/config.lib.mjs
CHANGED
|
@@ -18,7 +18,11 @@ if (typeof globalThis.use === 'undefined') {
|
|
|
18
18
|
}
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
// Issue #1710: use-m occasionally hands back a truncated/corrupt global package
|
|
22
|
+
// (npm install -g flake on hosted CI). useWithRetry deletes the broken install
|
|
23
|
+
// dir and re-fetches when the failure is a SyntaxError mid-import.
|
|
24
|
+
const { useWithRetry } = await import('./use-with-retry.lib.mjs');
|
|
25
|
+
const getenvModule = await useWithRetry(globalThis.use, 'getenv');
|
|
22
26
|
// Node 24 CJS/ESM interop may return the whole module object instead of the function directly
|
|
23
27
|
const getenv = typeof getenvModule === 'function' ? getenvModule : getenvModule.default || getenvModule;
|
|
24
28
|
|
|
@@ -91,6 +91,60 @@ export async function getPRCommitShas(owner, repo, prNumber, verbose = false) {
|
|
|
91
91
|
}
|
|
92
92
|
}
|
|
93
93
|
|
|
94
|
+
/**
|
|
95
|
+
* Issue #1712: Collect every active (in_progress / pending / queued / waiting / requested)
|
|
96
|
+
* workflow run on the PR branch — across ALL commits, not only the head SHA.
|
|
97
|
+
*
|
|
98
|
+
* Why this exists: when the user watches `/merge`, the GitHub Actions tab shows yellow
|
|
99
|
+
* dots for every commit that ever had a run, including older commits whose runs were
|
|
100
|
+
* automatically cancelled by GitHub's concurrency group. The verbose log used to list
|
|
101
|
+
* only the head-SHA runs, so a user comparing the log to the GitHub UI would see
|
|
102
|
+
* "1 workflow run" in the log but two yellow dots on screen — looking like a bug.
|
|
103
|
+
*
|
|
104
|
+
* Returns runs grouped by SHA, deduplicated by run.id (a single run can be associated
|
|
105
|
+
* with one SHA, but the same workflow file can produce runs on multiple SHAs).
|
|
106
|
+
*
|
|
107
|
+
* @param {string} owner - Repository owner
|
|
108
|
+
* @param {string} repo - Repository name
|
|
109
|
+
* @param {number} prNumber - Pull request number
|
|
110
|
+
* @param {string} headSha - The PR head SHA (used to mark which group is "current")
|
|
111
|
+
* @param {boolean} verbose - Whether to log verbose output
|
|
112
|
+
* @param {Function} getWorkflowRunsForSha - Function to get workflow runs for a SHA
|
|
113
|
+
* @returns {Promise<{groups: Array<{sha: string, isHead: boolean, runs: Array}>, totalActive: number, headActive: number, otherActive: number}>}
|
|
114
|
+
*/
|
|
115
|
+
export async function getActivePRWorkflowRuns(owner, repo, prNumber, headSha, verbose, getWorkflowRunsForSha) {
|
|
116
|
+
const shas = await getPRCommitShas(owner, repo, prNumber, false);
|
|
117
|
+
if (shas.length === 0) {
|
|
118
|
+
return { groups: [], totalActive: 0, headActive: 0, otherActive: 0 };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const ACTIVE_STATUSES = new Set(['in_progress', 'pending', 'queued', 'waiting', 'requested']);
|
|
122
|
+
const groups = [];
|
|
123
|
+
const seenRunIds = new Set();
|
|
124
|
+
let totalActive = 0;
|
|
125
|
+
let headActive = 0;
|
|
126
|
+
let otherActive = 0;
|
|
127
|
+
|
|
128
|
+
for (const sha of shas) {
|
|
129
|
+
const runs = await getWorkflowRunsForSha(owner, repo, sha, false);
|
|
130
|
+
const activeRuns = runs.filter(r => ACTIVE_STATUSES.has(r.status) && !seenRunIds.has(r.id));
|
|
131
|
+
for (const r of activeRuns) seenRunIds.add(r.id);
|
|
132
|
+
if (activeRuns.length === 0) continue;
|
|
133
|
+
|
|
134
|
+
const isHead = sha === headSha;
|
|
135
|
+
groups.push({ sha, isHead, runs: activeRuns });
|
|
136
|
+
totalActive += activeRuns.length;
|
|
137
|
+
if (isHead) headActive += activeRuns.length;
|
|
138
|
+
else otherActive += activeRuns.length;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if (verbose && totalActive > 0) {
|
|
142
|
+
console.log(`[VERBOSE] pr-commits: ${totalActive} active workflow run(s) across ${groups.length} commit(s) on PR #${prNumber} (${headActive} on HEAD, ${otherActive} on older commits)`);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return { groups, totalActive, headActive, otherActive };
|
|
146
|
+
}
|
|
147
|
+
|
|
94
148
|
/**
|
|
95
149
|
* Check that workflow runs for ALL commits on the PR branch have completed.
|
|
96
150
|
* @param {string} owner - Repository owner
|