@c4t4/heyamigo 0.9.16 → 0.9.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/claude.js +3 -0
- package/dist/ai/codex.js +5 -0
- package/dist/db/schema.js +8 -0
- package/dist/estimates/image-gen.js +36 -0
- package/dist/estimates/index.js +12 -0
- package/dist/estimates/registry.js +113 -0
- package/dist/estimates/types.js +6 -0
- package/dist/gateway/commands.js +5 -2
- package/dist/gateway/incoming.js +26 -6
- package/dist/gateway/outgoing.js +11 -3
- package/dist/queue/inbound.js +1 -0
- package/dist/queue/worker.js +71 -10
- package/migrations/0007_estimates_kind.sql +2 -0
- package/migrations/meta/0007_snapshot.json +924 -0
- package/migrations/meta/_journal.json +7 -0
- package/package.json +1 -1
package/dist/ai/claude.js
CHANGED
|
@@ -202,6 +202,9 @@ export async function runClaudeTask(params) {
|
|
|
202
202
|
}
|
|
203
203
|
export const claudeProvider = {
|
|
204
204
|
name: 'claude',
|
|
205
|
+
// Claude CLI's `result` event reports per-turn usage (just the
|
|
206
|
+
// tokens consumed by this single resume invocation).
|
|
207
|
+
usageReportingMode: 'per-turn',
|
|
205
208
|
ask: askClaude,
|
|
206
209
|
runTask: runClaudeTask,
|
|
207
210
|
reloadSystemPrompt,
|
package/dist/ai/codex.js
CHANGED
|
@@ -267,6 +267,11 @@ async function askCodex(params) {
|
|
|
267
267
|
}
|
|
268
268
|
export const codexProvider = {
|
|
269
269
|
name: 'codex',
|
|
270
|
+
// Codex CLI's `turn.completed.usage` reports cumulative totals for
|
|
271
|
+
// the entire resume thread, not just this one turn. Worker uses
|
|
272
|
+
// this flag to delta-math each turn before display so the context
|
|
273
|
+
// % doesn't blow up to thousands after many resume turns.
|
|
274
|
+
usageReportingMode: 'cumulative',
|
|
270
275
|
ask: askCodex,
|
|
271
276
|
runTask: runCodexTask,
|
|
272
277
|
reloadSystemPrompt,
|
package/dist/db/schema.js
CHANGED
|
@@ -159,6 +159,12 @@ export const inbound = sqliteTable('inbound', {
|
|
|
159
159
|
mediaBytes: integer('media_bytes'),
|
|
160
160
|
pushName: text('push_name'), // sender's display name at send time
|
|
161
161
|
triggerReason: text('trigger_reason'), // 'alias'|'mention'|'reply'|'owner'|...
|
|
162
|
+
// Job-kind tag for duration estimation (see src/estimates/). Set
|
|
163
|
+
// at ingest time when a registered estimator matches the message
|
|
164
|
+
// (e.g. 'image-gen', 'browser:ig'). Null otherwise. Queried by
|
|
165
|
+
// the estimator on subsequent invocations to compute past-sample
|
|
166
|
+
// averages.
|
|
167
|
+
kind: text('kind'),
|
|
162
168
|
// Producer-built worker payload (JSON). Chat worker deserializes
|
|
163
169
|
// at claim time to reconstruct the Job. Keeps the rebuild logic
|
|
164
170
|
// out of the worker for Phase 4; later phases may move portions
|
|
@@ -179,6 +185,8 @@ export const inbound = sqliteTable('inbound', {
|
|
|
179
185
|
byStatusNext: index('inbound_by_status_next').on(t.status, t.nextAttemptAt),
|
|
180
186
|
byAddress: index('inbound_by_address').on(t.address),
|
|
181
187
|
byPerson: index('inbound_by_person').on(t.personId, t.receivedAt),
|
|
188
|
+
// Used by the duration estimator: "last N done rows of this kind".
|
|
189
|
+
byKindDone: index('inbound_by_kind_done').on(t.kind, t.status),
|
|
182
190
|
// Sparse unique on external_msg_id: enforced only when set. Same
|
|
183
191
|
// pattern as outbound's idempotency_key.
|
|
184
192
|
uniqExtId: uniqueIndex('inbound_external_msg_id_uq')
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
// Image-generation estimator. Matches when the user message looks
|
|
2
|
+
// like a request to produce an image. Tracks duration of the chat-
|
|
3
|
+
// track turn that handles it (claimedAt → updatedAt on the inbound
|
|
4
|
+
// row).
|
|
5
|
+
import { aggregateMean, registerEstimator } from './registry.js';
|
|
6
|
+
// Conservative regex. Requires a generation verb AND an image-class
|
|
7
|
+
// noun within 80 chars. Prefers false-negative to false-positive —
|
|
8
|
+
// a single mistagged sample drags the average for everyone.
|
|
9
|
+
const IMAGE_GEN_RE = /\b(generate|create|make|draw|render|design|sketch|paint|illustrate)\b[^.?!\n]{0,80}\b(image|picture|drawing|art|artwork|photo|portrait|illustration|sketch|render|painting|wallpaper|logo|icon|graphic)\b/i;
|
|
10
|
+
class ImageGenEstimator {
|
|
11
|
+
kind = 'image-gen';
|
|
12
|
+
// 30s starting point — reasonable ballpark for current
|
|
13
|
+
// image-generation APIs (DALL-E 3, Imagen, Flux, etc.). The very
|
|
14
|
+
// first request shows this; from sample 1 onward it averages real
|
|
15
|
+
// observations.
|
|
16
|
+
defaultMs = 30_000;
|
|
17
|
+
matches(ctx) {
|
|
18
|
+
return IMAGE_GEN_RE.test(ctx.description);
|
|
19
|
+
}
|
|
20
|
+
estimate(samples) {
|
|
21
|
+
return aggregateMean(samples, this.defaultMs);
|
|
22
|
+
}
|
|
23
|
+
format(estimate) {
|
|
24
|
+
if (estimate.rangeMs) {
|
|
25
|
+
return `generating image, anywhere from ~${secs(estimate.rangeMs.lowMs)} to ~${secs(estimate.rangeMs.highMs)}`;
|
|
26
|
+
}
|
|
27
|
+
return `generating image, ~${secs(estimate.pointMs)}`;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
function secs(ms) {
|
|
31
|
+
const s = Math.max(1, Math.round(ms / 1000));
|
|
32
|
+
if (s < 60)
|
|
33
|
+
return `${s}s`;
|
|
34
|
+
return `${Math.round(s / 60)}min`;
|
|
35
|
+
}
|
|
36
|
+
registerEstimator(new ImageGenEstimator());
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
// Estimates module entry point. Importing this side-effect-loads
|
|
2
|
+
// every built-in plugin (each plugin file calls registerEstimator()
|
|
3
|
+
// at module load). Outside callers only need:
|
|
4
|
+
//
|
|
5
|
+
// import { classify, estimate } from './estimates/index.js'
|
|
6
|
+
//
|
|
7
|
+
// Adding a new kind = drop a file alongside image-gen.ts and import
|
|
8
|
+
// it below. No other code in the codebase needs to change.
|
|
9
|
+
import './image-gen.js';
|
|
10
|
+
// future: import './browser-ig.js'
|
|
11
|
+
// future: import './voice-gen.js'
|
|
12
|
+
export { classify, estimate, formatEstimateDefault, humanDur, listEstimators, querySamplesForKind, registerEstimator, } from './registry.js';
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
// Estimator registry + the single entry points the rest of the bot
|
|
2
|
+
// uses: classify() and estimate(). Plugins self-register by importing
|
|
3
|
+
// this module and calling registerEstimator().
|
|
4
|
+
import { and, desc, eq, isNotNull } from 'drizzle-orm';
|
|
5
|
+
import { getDb } from '../db/index.js';
|
|
6
|
+
import { inbound } from '../db/schema.js';
|
|
7
|
+
const REGISTRY = [];
|
|
8
|
+
export function registerEstimator(e) {
|
|
9
|
+
// Idempotent on kind so hot-reload during dev doesn't duplicate.
|
|
10
|
+
const i = REGISTRY.findIndex((x) => x.kind === e.kind);
|
|
11
|
+
if (i >= 0)
|
|
12
|
+
REGISTRY[i] = e;
|
|
13
|
+
else
|
|
14
|
+
REGISTRY.push(e);
|
|
15
|
+
}
|
|
16
|
+
export function listEstimators() {
|
|
17
|
+
return REGISTRY;
|
|
18
|
+
}
|
|
19
|
+
// Find the first estimator whose matches() returns true. First-match
|
|
20
|
+
// wins — order matters when registering. More-specific kinds should
|
|
21
|
+
// register before broad fallbacks.
|
|
22
|
+
export function classify(ctx) {
|
|
23
|
+
for (const e of REGISTRY) {
|
|
24
|
+
if (e.matches(ctx))
|
|
25
|
+
return e;
|
|
26
|
+
}
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
// Pull the last N completed inbound rows for this kind. Returns
|
|
30
|
+
// newest-first; estimators that care about recency can use that
|
|
31
|
+
// order directly, the mean-based aggregator below doesn't.
|
|
32
|
+
//
|
|
33
|
+
// Limited to N=20 by default. The mean is fast and stable past 5-10
|
|
34
|
+
// samples; older data isn't helpful and risks staleness.
|
|
35
|
+
const SAMPLE_LIMIT = 20;
|
|
36
|
+
export function querySamplesForKind(kind, limit = SAMPLE_LIMIT) {
|
|
37
|
+
const db = getDb();
|
|
38
|
+
const rows = db
|
|
39
|
+
.select({
|
|
40
|
+
claimedAt: inbound.claimedAt,
|
|
41
|
+
updatedAt: inbound.updatedAt,
|
|
42
|
+
})
|
|
43
|
+
.from(inbound)
|
|
44
|
+
.where(and(eq(inbound.kind, kind), eq(inbound.status, 'done'), isNotNull(inbound.claimedAt)))
|
|
45
|
+
.orderBy(desc(inbound.id))
|
|
46
|
+
.limit(limit)
|
|
47
|
+
.all();
|
|
48
|
+
return rows
|
|
49
|
+
.filter((r) => r.claimedAt !== null)
|
|
50
|
+
.map((r) => ({
|
|
51
|
+
durationMs: (r.updatedAt - r.claimedAt) * 1000,
|
|
52
|
+
finishedAt: r.updatedAt,
|
|
53
|
+
}))
|
|
54
|
+
.filter((s) => s.durationMs > 0);
|
|
55
|
+
}
|
|
56
|
+
// Public entry point. Returns the kind + formatted text, or null
|
|
57
|
+
// when no estimator matched (i.e. this isn't a job-kind we estimate).
|
|
58
|
+
// If an estimator matches, the result is ALWAYS non-null — the
|
|
59
|
+
// estimator falls back to its defaultMs when no samples exist.
|
|
60
|
+
export function estimate(ctx) {
|
|
61
|
+
const e = classify(ctx);
|
|
62
|
+
if (!e)
|
|
63
|
+
return null;
|
|
64
|
+
const samples = querySamplesForKind(e.kind);
|
|
65
|
+
const result = e.estimate(samples);
|
|
66
|
+
const text = (e.format ?? formatEstimateDefault)(result);
|
|
67
|
+
return { kind: e.kind, result, text };
|
|
68
|
+
}
|
|
69
|
+
// Default UX-friendly rendering. Each estimator can override.
|
|
70
|
+
export function formatEstimateDefault(r) {
|
|
71
|
+
if (r.rangeMs) {
|
|
72
|
+
return `anywhere from ~${humanDur(r.rangeMs.lowMs)} to ~${humanDur(r.rangeMs.highMs)}`;
|
|
73
|
+
}
|
|
74
|
+
return `~${humanDur(r.pointMs)}`;
|
|
75
|
+
}
|
|
76
|
+
export function humanDur(ms) {
|
|
77
|
+
const s = Math.max(1, Math.round(ms / 1000));
|
|
78
|
+
if (s < 60)
|
|
79
|
+
return `${s}s`;
|
|
80
|
+
const m = Math.round(s / 60);
|
|
81
|
+
if (m < 60)
|
|
82
|
+
return `${m}min`;
|
|
83
|
+
return `${Math.round(m / 60)}h`;
|
|
84
|
+
}
|
|
85
|
+
// Shared aggregator used by built-in estimators. Each estimator may
|
|
86
|
+
// implement its own estimate() but most just call this.
|
|
87
|
+
export function aggregateMean(samples, defaultMs) {
|
|
88
|
+
if (samples.length === 0) {
|
|
89
|
+
return { pointMs: defaultMs, sampleSize: 0, confidence: 'low' };
|
|
90
|
+
}
|
|
91
|
+
const ds = samples.map((s) => s.durationMs);
|
|
92
|
+
const mean = ds.reduce((a, b) => a + b, 0) / ds.length;
|
|
93
|
+
if (samples.length === 1) {
|
|
94
|
+
return { pointMs: mean, sampleSize: 1, confidence: 'low' };
|
|
95
|
+
}
|
|
96
|
+
const variance = ds.reduce((acc, x) => acc + (x - mean) ** 2, 0) / ds.length;
|
|
97
|
+
const std = Math.sqrt(variance);
|
|
98
|
+
const confidence = samples.length >= 10 ? 'high' : samples.length >= 5 ? 'medium' : 'low';
|
|
99
|
+
// Disclose range when stddev is a large fraction of the mean.
|
|
100
|
+
// Threshold chosen at 50% — beyond that, a single point estimate
|
|
101
|
+
// hides too much.
|
|
102
|
+
return std / mean > 0.5
|
|
103
|
+
? {
|
|
104
|
+
pointMs: mean,
|
|
105
|
+
sampleSize: samples.length,
|
|
106
|
+
confidence,
|
|
107
|
+
rangeMs: {
|
|
108
|
+
lowMs: Math.max(0, mean - std),
|
|
109
|
+
highMs: mean + std,
|
|
110
|
+
},
|
|
111
|
+
}
|
|
112
|
+
: { pointMs: mean, sampleSize: samples.length, confidence };
|
|
113
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
// Job duration estimation interface. The system stays "blackbox" by
|
|
2
|
+
// design — outside callers only touch the registry's classify() /
|
|
3
|
+
// estimate() functions. Each kind plugs in via a self-contained file
|
|
4
|
+
// in src/estimates/<kind>.ts that calls registerEstimator() at module
|
|
5
|
+
// load.
|
|
6
|
+
export {};
|
package/dist/gateway/commands.js
CHANGED
|
@@ -34,8 +34,11 @@ export async function tryCommand(ctx) {
|
|
|
34
34
|
if (info.usage) {
|
|
35
35
|
const max = config.claude.contextWindow;
|
|
36
36
|
const used = info.usage.totalContextTokens;
|
|
37
|
-
|
|
38
|
-
|
|
37
|
+
// Clamp leftPct to [0, 100] so stale or inconsistent data
|
|
38
|
+
// doesn't surface a negative or >100 percentage.
|
|
39
|
+
const leftRatio = Math.max(0, Math.min(1, 1 - used / max));
|
|
40
|
+
const leftPct = (leftRatio * 100).toFixed(1);
|
|
41
|
+
lines.push(`Context: ${used.toLocaleString()} / ${max.toLocaleString()} (${leftPct}% left, last turn)`);
|
|
39
42
|
lines.push(`Turns: ${info.usage.numTurns}`);
|
|
40
43
|
}
|
|
41
44
|
await sendText(ctx.sock, ctx.jid, lines.join('\n'), ctx.quoted);
|
package/dist/gateway/incoming.js
CHANGED
|
@@ -5,6 +5,7 @@ import { getSession } from '../ai/sessions.js';
|
|
|
5
5
|
import { formatAddress, jidToAddress } from '../db/address.js';
|
|
6
6
|
import { personIdForAddress } from '../db/identity-sync.js';
|
|
7
7
|
import { config } from '../config.js';
|
|
8
|
+
import { estimate as estimateJob } from '../estimates/index.js';
|
|
8
9
|
import { logger } from '../logger.js';
|
|
9
10
|
import { buildMemoryPreamble } from '../memory/preamble.js';
|
|
10
11
|
import { enqueueInbound } from '../queue/inbound.js';
|
|
@@ -216,12 +217,30 @@ async function processMessages(messages, sock, ownerJid, isHistorySync = false)
|
|
|
216
217
|
const actorPersonId = senderAddress
|
|
217
218
|
? personIdForAddress(senderAddress)
|
|
218
219
|
: null;
|
|
219
|
-
//
|
|
220
|
-
//
|
|
221
|
-
//
|
|
222
|
-
//
|
|
223
|
-
//
|
|
224
|
-
|
|
220
|
+
// Estimator: classify this message and, when a kind matches,
|
|
221
|
+
// (a) tag the inbound row so future estimates of the same kind
|
|
222
|
+
// get a fresh sample, and (b) send the estimate text as an
|
|
223
|
+
// immediate ack so the user sees a timeline before the agent
|
|
224
|
+
// even starts.
|
|
225
|
+
const est = estimateJob({
|
|
226
|
+
description: stored.text,
|
|
227
|
+
attachments: media ? [{ kind: media.mediaType }] : undefined,
|
|
228
|
+
senderPersonId: actorPersonId ?? undefined,
|
|
229
|
+
});
|
|
230
|
+
const jobKind = est?.kind ?? null;
|
|
231
|
+
if (est) {
|
|
232
|
+
enqueueOutbound({
|
|
233
|
+
address: chatAddress,
|
|
234
|
+
kind: 'text',
|
|
235
|
+
text: est.text,
|
|
236
|
+
idempotencyKey: `estimate-${msg.key.id}`,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
else if (media && config.reply.ackOnMedia !== false) {
|
|
240
|
+
// Fallback media-ack when no estimator matched — keeps the
|
|
241
|
+
// pre-estimator behavior so image messages still get the
|
|
242
|
+
// "looking…" hint. A future MediaIncomingEstimator can replace
|
|
243
|
+
// this with a real average.
|
|
225
244
|
enqueueOutbound({
|
|
226
245
|
address: chatAddress,
|
|
227
246
|
kind: 'text',
|
|
@@ -238,6 +257,7 @@ async function processMessages(messages, sock, ownerJid, isHistorySync = false)
|
|
|
238
257
|
text: stored.text,
|
|
239
258
|
pushName: stored.pushName ?? null,
|
|
240
259
|
triggerReason,
|
|
260
|
+
kind: jobKind,
|
|
241
261
|
receivedAt: stored.timestamp,
|
|
242
262
|
payload: job,
|
|
243
263
|
});
|
package/dist/gateway/outgoing.js
CHANGED
|
@@ -171,13 +171,21 @@ export function formatStatsFooter(stats) {
|
|
|
171
171
|
? ` (${compactTokens(stats.cacheReadTokens)} cached)`
|
|
172
172
|
: '';
|
|
173
173
|
parts.push(`${inStr}↑${cacheStr} ${outStr}↓`);
|
|
174
|
-
// Context % — only when worth calling out
|
|
174
|
+
// Context % — only when worth calling out. Skipped when pct is
|
|
175
|
+
// implausible (>120%) — usually means cumulative/per-turn token
|
|
176
|
+
// counts got crossed by a stale session. Better to show nothing
|
|
177
|
+
// than display "7018% ctx" and lose user trust.
|
|
175
178
|
if (stats.contextWindow > 0) {
|
|
176
179
|
const pct = Math.round((stats.totalContextTokens / stats.contextWindow) * 100);
|
|
177
|
-
if (pct
|
|
180
|
+
if (pct > 120) {
|
|
181
|
+
// skip — data is stale or inconsistent
|
|
182
|
+
}
|
|
183
|
+
else if (pct >= 90) {
|
|
178
184
|
parts.push(`⚠ ${pct}% ctx`);
|
|
179
|
-
|
|
185
|
+
}
|
|
186
|
+
else if (pct >= 70) {
|
|
180
187
|
parts.push(`${pct}% ctx`);
|
|
188
|
+
}
|
|
181
189
|
}
|
|
182
190
|
if (stats.fresh)
|
|
183
191
|
parts.push('fresh');
|
package/dist/queue/inbound.js
CHANGED
|
@@ -37,6 +37,7 @@ export function enqueueInbound(input) {
|
|
|
37
37
|
mediaBytes: input.mediaBytes ?? null,
|
|
38
38
|
pushName: input.pushName ?? null,
|
|
39
39
|
triggerReason: input.triggerReason ?? null,
|
|
40
|
+
kind: input.kind ?? null,
|
|
40
41
|
payload: input.payload === undefined ? null : JSON.stringify(input.payload),
|
|
41
42
|
status: 'pending',
|
|
42
43
|
attempts: 0,
|
package/dist/queue/worker.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { getProvider } from '../ai/providers.js';
|
|
2
|
-
import { clearSession, setSession, setUsage } from '../ai/sessions.js';
|
|
2
|
+
import { clearSession, getSessionInfo, setSession, setUsage, } from '../ai/sessions.js';
|
|
3
3
|
import { config } from '../config.js';
|
|
4
4
|
import { formatAddress, jidToAddress } from '../db/address.js';
|
|
5
5
|
import { logger } from '../logger.js';
|
|
@@ -18,6 +18,9 @@ async function callClaude(job) {
|
|
|
18
18
|
const startedAt = Date.now();
|
|
19
19
|
const wasFresh = !job.sessionId;
|
|
20
20
|
const provider = getProvider();
|
|
21
|
+
// Capture prior session usage BEFORE the ask call so we can compute
|
|
22
|
+
// per-turn deltas regardless of the provider's reporting mode.
|
|
23
|
+
const priorUsage = getSessionInfo(job.jid, provider.name)?.usage;
|
|
21
24
|
const { reply, sessionId, usage } = await provider.ask({
|
|
22
25
|
input: job.input,
|
|
23
26
|
sessionId: job.sessionId,
|
|
@@ -27,20 +30,76 @@ async function callClaude(job) {
|
|
|
27
30
|
if (!job.sessionId) {
|
|
28
31
|
setSession(job.jid, provider.name, sessionId);
|
|
29
32
|
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
33
|
+
// Reconcile per-turn vs cumulative reporting. See AiProvider
|
|
34
|
+
// .usageReportingMode for context. For cumulative providers (Codex),
|
|
35
|
+
// the reported usage = whole-thread totals; we subtract the prior
|
|
36
|
+
// cumulative to get this turn's cost. For per-turn providers
|
|
37
|
+
// (Claude), the reported usage IS this turn's cost; we sum into
|
|
38
|
+
// the running cumulative.
|
|
39
|
+
//
|
|
40
|
+
// Fallback baseline: if cumulative* fields aren't stored yet
|
|
41
|
+
// (first turn after this fix deploys), use the prior plain field
|
|
42
|
+
// values. That treats the existing buggy-cumulative storage as the
|
|
43
|
+
// baseline so the next delta is accurate.
|
|
44
|
+
const baseCumInput = priorUsage?.cumulativeInputTokens ?? priorUsage?.inputTokens ?? 0;
|
|
45
|
+
const baseCumCacheRead = priorUsage?.cumulativeCacheReadTokens ?? priorUsage?.cacheReadTokens ?? 0;
|
|
46
|
+
const baseCumCacheCreate = priorUsage?.cumulativeCacheCreationTokens ?? priorUsage?.cacheCreationTokens ?? 0;
|
|
47
|
+
const baseCumOutput = priorUsage?.cumulativeOutputTokens ?? priorUsage?.outputTokens ?? 0;
|
|
48
|
+
let turnInput;
|
|
49
|
+
let turnCacheRead;
|
|
50
|
+
let turnCacheCreate;
|
|
51
|
+
let turnOutput;
|
|
52
|
+
let newCumInput;
|
|
53
|
+
let newCumCacheRead;
|
|
54
|
+
let newCumCacheCreate;
|
|
55
|
+
let newCumOutput;
|
|
56
|
+
if (provider.usageReportingMode === 'cumulative') {
|
|
57
|
+
// Reported usage IS the cumulative total. Delta = current - prev.
|
|
58
|
+
// Math.max(0, …) protects against the rare case where the CLI's
|
|
59
|
+
// counter resets (e.g. fresh session that we still tracked) —
|
|
60
|
+
// never display negative deltas.
|
|
61
|
+
newCumInput = usage.inputTokens;
|
|
62
|
+
newCumCacheRead = usage.cacheReadTokens;
|
|
63
|
+
newCumCacheCreate = usage.cacheCreationTokens;
|
|
64
|
+
newCumOutput = usage.outputTokens;
|
|
65
|
+
turnInput = Math.max(0, newCumInput - baseCumInput);
|
|
66
|
+
turnCacheRead = Math.max(0, newCumCacheRead - baseCumCacheRead);
|
|
67
|
+
turnCacheCreate = Math.max(0, newCumCacheCreate - baseCumCacheCreate);
|
|
68
|
+
turnOutput = Math.max(0, newCumOutput - baseCumOutput);
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
// Reported usage IS per-turn already. Accumulate into cumulative.
|
|
72
|
+
turnInput = usage.inputTokens;
|
|
73
|
+
turnCacheRead = usage.cacheReadTokens;
|
|
74
|
+
turnCacheCreate = usage.cacheCreationTokens;
|
|
75
|
+
turnOutput = usage.outputTokens;
|
|
76
|
+
newCumInput = baseCumInput + turnInput;
|
|
77
|
+
newCumCacheRead = baseCumCacheRead + turnCacheRead;
|
|
78
|
+
newCumCacheCreate = baseCumCacheCreate + turnCacheCreate;
|
|
79
|
+
newCumOutput = baseCumOutput + turnOutput;
|
|
80
|
+
}
|
|
81
|
+
// totalContextTokens is the PROMPT side (input + cache reads + cache
|
|
82
|
+
// creation). Output is response, not context. The old code included
|
|
83
|
+
// outputTokens here which was wrong.
|
|
84
|
+
const totalContextTokens = turnInput + turnCacheRead + turnCacheCreate;
|
|
34
85
|
setUsage(job.jid, provider.name, {
|
|
35
|
-
|
|
86
|
+
inputTokens: turnInput,
|
|
87
|
+
cacheReadTokens: turnCacheRead,
|
|
88
|
+
cacheCreationTokens: turnCacheCreate,
|
|
89
|
+
outputTokens: turnOutput,
|
|
36
90
|
totalContextTokens,
|
|
91
|
+
numTurns: usage.numTurns,
|
|
92
|
+
cumulativeInputTokens: newCumInput,
|
|
93
|
+
cumulativeCacheReadTokens: newCumCacheRead,
|
|
94
|
+
cumulativeCacheCreationTokens: newCumCacheCreate,
|
|
95
|
+
cumulativeOutputTokens: newCumOutput,
|
|
37
96
|
updatedAt: Math.floor(Date.now() / 1000),
|
|
38
97
|
});
|
|
39
98
|
// Per-user daily token accounting. Owner sender is exempt by check at the
|
|
40
99
|
// incoming gate, but we still bill so /usage reflects reality if added.
|
|
41
100
|
// Cache-read tokens are excluded — they don't cost real budget.
|
|
42
101
|
if (job.senderNumber) {
|
|
43
|
-
addDailyTokens(job.senderNumber,
|
|
102
|
+
addDailyTokens(job.senderNumber, turnInput + turnOutput);
|
|
44
103
|
}
|
|
45
104
|
const rawFlags = extractFlags(reply);
|
|
46
105
|
const { clean, digest, journals, journalCreates, asyncTasks, asyncBrowserTasks, sendTexts, crons, reminds, } = filterFlagsByRole(rawFlags, job.allowedTags);
|
|
@@ -179,9 +238,11 @@ async function callClaude(job) {
|
|
|
179
238
|
reply: clean,
|
|
180
239
|
stats: {
|
|
181
240
|
durationMs,
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
241
|
+
// All per-turn values now (delta-corrected for cumulative
|
|
242
|
+
// providers above). Footer shows these directly.
|
|
243
|
+
inputTokens: turnInput,
|
|
244
|
+
outputTokens: turnOutput,
|
|
245
|
+
cacheReadTokens: turnCacheRead,
|
|
185
246
|
totalContextTokens,
|
|
186
247
|
contextWindow: config.claude.contextWindow,
|
|
187
248
|
fresh: wasFresh,
|