@c4t4/heyamigo 0.9.17 → 0.9.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ai/claude.js CHANGED
@@ -202,6 +202,9 @@ export async function runClaudeTask(params) {
202
202
  }
203
203
  export const claudeProvider = {
204
204
  name: 'claude',
205
+ // Claude CLI's `result` event reports per-turn usage (just the
206
+ // tokens consumed by this single resume invocation).
207
+ usageReportingMode: 'per-turn',
205
208
  ask: askClaude,
206
209
  runTask: runClaudeTask,
207
210
  reloadSystemPrompt,
package/dist/ai/codex.js CHANGED
@@ -267,6 +267,11 @@ async function askCodex(params) {
267
267
  }
268
268
  export const codexProvider = {
269
269
  name: 'codex',
270
+ // Codex CLI's `turn.completed.usage` reports cumulative totals for
271
+ // the entire resume thread, not just this one turn. Worker uses
272
+ // this flag to delta-math each turn before display so the context
273
+ // % doesn't blow up to thousands after many resume turns.
274
+ usageReportingMode: 'cumulative',
270
275
  ask: askCodex,
271
276
  runTask: runCodexTask,
272
277
  reloadSystemPrompt,
@@ -34,8 +34,11 @@ export async function tryCommand(ctx) {
34
34
  if (info.usage) {
35
35
  const max = config.claude.contextWindow;
36
36
  const used = info.usage.totalContextTokens;
37
- const leftPct = Math.max(0, 100 - (used / max) * 100).toFixed(1);
38
- lines.push(`Context: ${used.toLocaleString()} / ${max.toLocaleString()} (${leftPct}% left)`);
37
+ // Clamp leftPct to [0, 100] so stale or inconsistent data
38
+ // doesn't surface a negative or >100 percentage.
39
+ const leftRatio = Math.max(0, Math.min(1, 1 - used / max));
40
+ const leftPct = (leftRatio * 100).toFixed(1);
41
+ lines.push(`Context: ${used.toLocaleString()} / ${max.toLocaleString()} (${leftPct}% left, last turn)`);
39
42
  lines.push(`Turns: ${info.usage.numTurns}`);
40
43
  }
41
44
  await sendText(ctx.sock, ctx.jid, lines.join('\n'), ctx.quoted);
@@ -171,13 +171,21 @@ export function formatStatsFooter(stats) {
171
171
  ? ` (${compactTokens(stats.cacheReadTokens)} cached)`
172
172
  : '';
173
173
  parts.push(`${inStr}↑${cacheStr} ${outStr}↓`);
174
- // Context % — only when worth calling out
174
+ // Context % — only when worth calling out. Skipped when pct is
175
+ // implausible (>120%) — usually means cumulative/per-turn token
176
+ // counts got crossed by a stale session. Better to show nothing
177
+ // than display "7018% ctx" and lose user trust.
175
178
  if (stats.contextWindow > 0) {
176
179
  const pct = Math.round((stats.totalContextTokens / stats.contextWindow) * 100);
177
- if (pct >= 90)
180
+ if (pct > 120) {
181
+ // skip — data is stale or inconsistent
182
+ }
183
+ else if (pct >= 90) {
178
184
  parts.push(`⚠ ${pct}% ctx`);
179
- else if (pct >= 70)
185
+ }
186
+ else if (pct >= 70) {
180
187
  parts.push(`${pct}% ctx`);
188
+ }
181
189
  }
182
190
  if (stats.fresh)
183
191
  parts.push('fresh');
@@ -1,5 +1,5 @@
1
1
  import { getProvider } from '../ai/providers.js';
2
- import { clearSession, setSession, setUsage } from '../ai/sessions.js';
2
+ import { clearSession, getSessionInfo, setSession, setUsage, } from '../ai/sessions.js';
3
3
  import { config } from '../config.js';
4
4
  import { formatAddress, jidToAddress } from '../db/address.js';
5
5
  import { logger } from '../logger.js';
@@ -18,6 +18,9 @@ async function callClaude(job) {
18
18
  const startedAt = Date.now();
19
19
  const wasFresh = !job.sessionId;
20
20
  const provider = getProvider();
21
+ // Capture prior session usage BEFORE the ask call so we can compute
22
+ // per-turn deltas regardless of the provider's reporting mode.
23
+ const priorUsage = getSessionInfo(job.jid, provider.name)?.usage;
21
24
  const { reply, sessionId, usage } = await provider.ask({
22
25
  input: job.input,
23
26
  sessionId: job.sessionId,
@@ -27,20 +30,76 @@ async function callClaude(job) {
27
30
  if (!job.sessionId) {
28
31
  setSession(job.jid, provider.name, sessionId);
29
32
  }
30
- const totalContextTokens = usage.inputTokens +
31
- usage.cacheReadTokens +
32
- usage.cacheCreationTokens +
33
- usage.outputTokens;
33
+ // Reconcile per-turn vs cumulative reporting. See AiProvider
34
+ // .usageReportingMode for context. For cumulative providers (Codex),
35
+ // the reported usage = whole-thread totals; we subtract the prior
36
+ // cumulative to get this turn's cost. For per-turn providers
37
+ // (Claude), the reported usage IS this turn's cost; we sum into
38
+ // the running cumulative.
39
+ //
40
+ // Fallback baseline: if cumulative* fields aren't stored yet
41
+ // (first turn after this fix deploys), use the prior plain field
42
+ // values. That treats the existing buggy-cumulative storage as the
43
+ // baseline so the next delta is accurate.
44
+ const baseCumInput = priorUsage?.cumulativeInputTokens ?? priorUsage?.inputTokens ?? 0;
45
+ const baseCumCacheRead = priorUsage?.cumulativeCacheReadTokens ?? priorUsage?.cacheReadTokens ?? 0;
46
+ const baseCumCacheCreate = priorUsage?.cumulativeCacheCreationTokens ?? priorUsage?.cacheCreationTokens ?? 0;
47
+ const baseCumOutput = priorUsage?.cumulativeOutputTokens ?? priorUsage?.outputTokens ?? 0;
48
+ let turnInput;
49
+ let turnCacheRead;
50
+ let turnCacheCreate;
51
+ let turnOutput;
52
+ let newCumInput;
53
+ let newCumCacheRead;
54
+ let newCumCacheCreate;
55
+ let newCumOutput;
56
+ if (provider.usageReportingMode === 'cumulative') {
57
+ // Reported usage IS the cumulative total. Delta = current - prev.
58
+ // Math.max(0, …) protects against the rare case where the CLI's
59
+ // counter resets (e.g. fresh session that we still tracked) —
60
+ // never display negative deltas.
61
+ newCumInput = usage.inputTokens;
62
+ newCumCacheRead = usage.cacheReadTokens;
63
+ newCumCacheCreate = usage.cacheCreationTokens;
64
+ newCumOutput = usage.outputTokens;
65
+ turnInput = Math.max(0, newCumInput - baseCumInput);
66
+ turnCacheRead = Math.max(0, newCumCacheRead - baseCumCacheRead);
67
+ turnCacheCreate = Math.max(0, newCumCacheCreate - baseCumCacheCreate);
68
+ turnOutput = Math.max(0, newCumOutput - baseCumOutput);
69
+ }
70
+ else {
71
+ // Reported usage IS per-turn already. Accumulate into cumulative.
72
+ turnInput = usage.inputTokens;
73
+ turnCacheRead = usage.cacheReadTokens;
74
+ turnCacheCreate = usage.cacheCreationTokens;
75
+ turnOutput = usage.outputTokens;
76
+ newCumInput = baseCumInput + turnInput;
77
+ newCumCacheRead = baseCumCacheRead + turnCacheRead;
78
+ newCumCacheCreate = baseCumCacheCreate + turnCacheCreate;
79
+ newCumOutput = baseCumOutput + turnOutput;
80
+ }
81
+ // totalContextTokens is the PROMPT side (input + cache reads + cache
82
+ // creation). Output is response, not context. The old code included
83
+ // outputTokens here which was wrong.
84
+ const totalContextTokens = turnInput + turnCacheRead + turnCacheCreate;
34
85
  setUsage(job.jid, provider.name, {
35
- ...usage,
86
+ inputTokens: turnInput,
87
+ cacheReadTokens: turnCacheRead,
88
+ cacheCreationTokens: turnCacheCreate,
89
+ outputTokens: turnOutput,
36
90
  totalContextTokens,
91
+ numTurns: usage.numTurns,
92
+ cumulativeInputTokens: newCumInput,
93
+ cumulativeCacheReadTokens: newCumCacheRead,
94
+ cumulativeCacheCreationTokens: newCumCacheCreate,
95
+ cumulativeOutputTokens: newCumOutput,
37
96
  updatedAt: Math.floor(Date.now() / 1000),
38
97
  });
39
98
  // Per-user daily token accounting. Owner sender is exempt by check at the
40
99
  // incoming gate, but we still bill so /usage reflects reality if added.
41
100
  // Cache-read tokens are excluded — they don't cost real budget.
42
101
  if (job.senderNumber) {
43
- addDailyTokens(job.senderNumber, usage.inputTokens + usage.outputTokens);
102
+ addDailyTokens(job.senderNumber, turnInput + turnOutput);
44
103
  }
45
104
  const rawFlags = extractFlags(reply);
46
105
  const { clean, digest, journals, journalCreates, asyncTasks, asyncBrowserTasks, sendTexts, crons, reminds, } = filterFlagsByRole(rawFlags, job.allowedTags);
@@ -179,9 +238,11 @@ async function callClaude(job) {
179
238
  reply: clean,
180
239
  stats: {
181
240
  durationMs,
182
- inputTokens: usage.inputTokens,
183
- outputTokens: usage.outputTokens,
184
- cacheReadTokens: usage.cacheReadTokens,
241
+ // All per-turn values now (delta-corrected for cumulative
242
+ // providers above). Footer shows these directly.
243
+ inputTokens: turnInput,
244
+ outputTokens: turnOutput,
245
+ cacheReadTokens: turnCacheRead,
185
246
  totalContextTokens,
186
247
  contextWindow: config.claude.contextWindow,
187
248
  fresh: wasFresh,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@c4t4/heyamigo",
3
- "version": "0.9.17",
3
+ "version": "0.9.18",
4
4
  "description": "WhatsApp AI bot powered by Claude with long-term memory, browser control, and role-based access",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",