@c4t4/heyamigo 0.9.17 → 0.9.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/claude.js +3 -0
- package/dist/ai/codex.js +5 -0
- package/dist/ai/spawn.js +9 -3
- package/dist/gateway/commands.js +5 -2
- package/dist/gateway/outgoing.js +11 -3
- package/dist/queue/browser-queue.js +5 -2
- package/dist/queue/inbound.js +5 -4
- package/dist/queue/worker.js +71 -10
- package/package.json +1 -1
package/dist/ai/claude.js
CHANGED
|
@@ -202,6 +202,9 @@ export async function runClaudeTask(params) {
|
|
|
202
202
|
}
|
|
203
203
|
export const claudeProvider = {
|
|
204
204
|
name: 'claude',
|
|
205
|
+
// Claude CLI's `result` event reports per-turn usage (just the
|
|
206
|
+
// tokens consumed by this single resume invocation).
|
|
207
|
+
usageReportingMode: 'per-turn',
|
|
205
208
|
ask: askClaude,
|
|
206
209
|
runTask: runClaudeTask,
|
|
207
210
|
reloadSystemPrompt,
|
package/dist/ai/codex.js
CHANGED
|
@@ -267,6 +267,11 @@ async function askCodex(params) {
|
|
|
267
267
|
}
|
|
268
268
|
export const codexProvider = {
|
|
269
269
|
name: 'codex',
|
|
270
|
+
// Codex CLI's `turn.completed.usage` reports cumulative totals for
|
|
271
|
+
// the entire resume thread, not just this one turn. Worker uses
|
|
272
|
+
// this flag to delta-math each turn before display so the context
|
|
273
|
+
// % doesn't blow up to thousands after many resume turns.
|
|
274
|
+
usageReportingMode: 'cumulative',
|
|
270
275
|
ask: askCodex,
|
|
271
276
|
runTask: runCodexTask,
|
|
272
277
|
reloadSystemPrompt,
|
package/dist/ai/spawn.js
CHANGED
|
@@ -180,8 +180,14 @@ export async function runClaude(opts) {
|
|
|
180
180
|
}
|
|
181
181
|
// Per-lane defaults. Individual callers can override, but these are the
|
|
182
182
|
// shipped caps. Browser-heavy work lives in the async lane.
|
|
183
|
+
//
|
|
184
|
+
// Values picked to accommodate /goal-style long-running tasks (Claude
|
|
185
|
+
// Code / Codex CLI support multi-hour goal sessions). Matching claim
|
|
186
|
+
// TTLs in queue/inbound.ts and queue/browser-queue.ts MUST exceed
|
|
187
|
+
// these — otherwise the orchestrator reclaims live workers and the
|
|
188
|
+
// same task gets processed twice.
|
|
183
189
|
export const TIMEOUT_MS = {
|
|
184
|
-
main:
|
|
185
|
-
async:
|
|
186
|
-
background:
|
|
190
|
+
main: 30 * 60 * 1000, // 30 min — chat track, covers /goal
|
|
191
|
+
async: 60 * 60 * 1000, // 60 min — async lane, deep browser scrapes
|
|
192
|
+
background: 5 * 60 * 1000, // 5 min — digest / sweep / housekeeping
|
|
187
193
|
};
|
package/dist/gateway/commands.js
CHANGED
|
@@ -34,8 +34,11 @@ export async function tryCommand(ctx) {
|
|
|
34
34
|
if (info.usage) {
|
|
35
35
|
const max = config.claude.contextWindow;
|
|
36
36
|
const used = info.usage.totalContextTokens;
|
|
37
|
-
|
|
38
|
-
|
|
37
|
+
// Clamp leftPct to [0, 100] so stale or inconsistent data
|
|
38
|
+
// doesn't surface a negative or >100 percentage.
|
|
39
|
+
const leftRatio = Math.max(0, Math.min(1, 1 - used / max));
|
|
40
|
+
const leftPct = (leftRatio * 100).toFixed(1);
|
|
41
|
+
lines.push(`Context: ${used.toLocaleString()} / ${max.toLocaleString()} (${leftPct}% left, last turn)`);
|
|
39
42
|
lines.push(`Turns: ${info.usage.numTurns}`);
|
|
40
43
|
}
|
|
41
44
|
await sendText(ctx.sock, ctx.jid, lines.join('\n'), ctx.quoted);
|
package/dist/gateway/outgoing.js
CHANGED
|
@@ -171,13 +171,21 @@ export function formatStatsFooter(stats) {
|
|
|
171
171
|
? ` (${compactTokens(stats.cacheReadTokens)} cached)`
|
|
172
172
|
: '';
|
|
173
173
|
parts.push(`${inStr}↑${cacheStr} ${outStr}↓`);
|
|
174
|
-
// Context % — only when worth calling out
|
|
174
|
+
// Context % — only when worth calling out. Skipped when pct is
|
|
175
|
+
// implausible (>120%) — usually means cumulative/per-turn token
|
|
176
|
+
// counts got crossed by a stale session. Better to show nothing
|
|
177
|
+
// than display "7018% ctx" and lose user trust.
|
|
175
178
|
if (stats.contextWindow > 0) {
|
|
176
179
|
const pct = Math.round((stats.totalContextTokens / stats.contextWindow) * 100);
|
|
177
|
-
if (pct
|
|
180
|
+
if (pct > 120) {
|
|
181
|
+
// skip — data is stale or inconsistent
|
|
182
|
+
}
|
|
183
|
+
else if (pct >= 90) {
|
|
178
184
|
parts.push(`⚠ ${pct}% ctx`);
|
|
179
|
-
|
|
185
|
+
}
|
|
186
|
+
else if (pct >= 70) {
|
|
180
187
|
parts.push(`${pct}% ctx`);
|
|
188
|
+
}
|
|
181
189
|
}
|
|
182
190
|
if (stats.fresh)
|
|
183
191
|
parts.push('fresh');
|
|
@@ -121,8 +121,11 @@ export function markBrowserTaskRetryOrDlq(id, workerId, errorMessage) {
|
|
|
121
121
|
return { retried: true, deadLettered: false };
|
|
122
122
|
});
|
|
123
123
|
}
|
|
124
|
-
//
|
|
125
|
-
|
|
124
|
+
// MUST exceed TIMEOUT_MS.async (60min as of the /goal-friendly bump)
|
|
125
|
+
// so live browser workers don't get reclaimed mid-spawn. 5min headroom
|
|
126
|
+
// past the spawn cap so the orchestrator only catches truly dead
|
|
127
|
+
// workers. Browser tasks legitimately run 30-45min for deep scrapes.
|
|
128
|
+
const CLAIM_TTL_SECONDS = 65 * 60;
|
|
126
129
|
export function reclaimStuckBrowserTasks() {
|
|
127
130
|
const db = getDb();
|
|
128
131
|
const cutoff = Math.floor(Date.now() / 1000) - CLAIM_TTL_SECONDS;
|
package/dist/queue/inbound.js
CHANGED
|
@@ -170,10 +170,11 @@ export function markInboundFailed(id, workerId, errorMessage) {
|
|
|
170
170
|
.all();
|
|
171
171
|
return result.length > 0;
|
|
172
172
|
}
|
|
173
|
-
// Orchestrator helper.
|
|
174
|
-
//
|
|
175
|
-
//
|
|
176
|
-
|
|
173
|
+
// Orchestrator helper. MUST exceed TIMEOUT_MS.main (30min as of the
|
|
174
|
+
// /goal-friendly bump) so live workers don't get reclaimed mid-spawn.
|
|
175
|
+
// 5min headroom past the spawn cap so the orchestrator only catches
|
|
176
|
+
// rows whose worker actually died.
|
|
177
|
+
const CLAIM_TTL_SECONDS = 35 * 60;
|
|
177
178
|
export function reclaimStuckInbound() {
|
|
178
179
|
const db = getDb();
|
|
179
180
|
const cutoff = Math.floor(Date.now() / 1000) - CLAIM_TTL_SECONDS;
|
package/dist/queue/worker.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { getProvider } from '../ai/providers.js';
|
|
2
|
-
import { clearSession, setSession, setUsage } from '../ai/sessions.js';
|
|
2
|
+
import { clearSession, getSessionInfo, setSession, setUsage, } from '../ai/sessions.js';
|
|
3
3
|
import { config } from '../config.js';
|
|
4
4
|
import { formatAddress, jidToAddress } from '../db/address.js';
|
|
5
5
|
import { logger } from '../logger.js';
|
|
@@ -18,6 +18,9 @@ async function callClaude(job) {
|
|
|
18
18
|
const startedAt = Date.now();
|
|
19
19
|
const wasFresh = !job.sessionId;
|
|
20
20
|
const provider = getProvider();
|
|
21
|
+
// Capture prior session usage BEFORE the ask call so we can compute
|
|
22
|
+
// per-turn deltas regardless of the provider's reporting mode.
|
|
23
|
+
const priorUsage = getSessionInfo(job.jid, provider.name)?.usage;
|
|
21
24
|
const { reply, sessionId, usage } = await provider.ask({
|
|
22
25
|
input: job.input,
|
|
23
26
|
sessionId: job.sessionId,
|
|
@@ -27,20 +30,76 @@ async function callClaude(job) {
|
|
|
27
30
|
if (!job.sessionId) {
|
|
28
31
|
setSession(job.jid, provider.name, sessionId);
|
|
29
32
|
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
33
|
+
// Reconcile per-turn vs cumulative reporting. See AiProvider
|
|
34
|
+
// .usageReportingMode for context. For cumulative providers (Codex),
|
|
35
|
+
// the reported usage = whole-thread totals; we subtract the prior
|
|
36
|
+
// cumulative to get this turn's cost. For per-turn providers
|
|
37
|
+
// (Claude), the reported usage IS this turn's cost; we sum into
|
|
38
|
+
// the running cumulative.
|
|
39
|
+
//
|
|
40
|
+
// Fallback baseline: if cumulative* fields aren't stored yet
|
|
41
|
+
// (first turn after this fix deploys), use the prior plain field
|
|
42
|
+
// values. That treats the existing buggy-cumulative storage as the
|
|
43
|
+
// baseline so the next delta is accurate.
|
|
44
|
+
const baseCumInput = priorUsage?.cumulativeInputTokens ?? priorUsage?.inputTokens ?? 0;
|
|
45
|
+
const baseCumCacheRead = priorUsage?.cumulativeCacheReadTokens ?? priorUsage?.cacheReadTokens ?? 0;
|
|
46
|
+
const baseCumCacheCreate = priorUsage?.cumulativeCacheCreationTokens ?? priorUsage?.cacheCreationTokens ?? 0;
|
|
47
|
+
const baseCumOutput = priorUsage?.cumulativeOutputTokens ?? priorUsage?.outputTokens ?? 0;
|
|
48
|
+
let turnInput;
|
|
49
|
+
let turnCacheRead;
|
|
50
|
+
let turnCacheCreate;
|
|
51
|
+
let turnOutput;
|
|
52
|
+
let newCumInput;
|
|
53
|
+
let newCumCacheRead;
|
|
54
|
+
let newCumCacheCreate;
|
|
55
|
+
let newCumOutput;
|
|
56
|
+
if (provider.usageReportingMode === 'cumulative') {
|
|
57
|
+
// Reported usage IS the cumulative total. Delta = current - prev.
|
|
58
|
+
// Math.max(0, …) protects against the rare case where the CLI's
|
|
59
|
+
// counter resets (e.g. fresh session that we still tracked) —
|
|
60
|
+
// never display negative deltas.
|
|
61
|
+
newCumInput = usage.inputTokens;
|
|
62
|
+
newCumCacheRead = usage.cacheReadTokens;
|
|
63
|
+
newCumCacheCreate = usage.cacheCreationTokens;
|
|
64
|
+
newCumOutput = usage.outputTokens;
|
|
65
|
+
turnInput = Math.max(0, newCumInput - baseCumInput);
|
|
66
|
+
turnCacheRead = Math.max(0, newCumCacheRead - baseCumCacheRead);
|
|
67
|
+
turnCacheCreate = Math.max(0, newCumCacheCreate - baseCumCacheCreate);
|
|
68
|
+
turnOutput = Math.max(0, newCumOutput - baseCumOutput);
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
// Reported usage IS per-turn already. Accumulate into cumulative.
|
|
72
|
+
turnInput = usage.inputTokens;
|
|
73
|
+
turnCacheRead = usage.cacheReadTokens;
|
|
74
|
+
turnCacheCreate = usage.cacheCreationTokens;
|
|
75
|
+
turnOutput = usage.outputTokens;
|
|
76
|
+
newCumInput = baseCumInput + turnInput;
|
|
77
|
+
newCumCacheRead = baseCumCacheRead + turnCacheRead;
|
|
78
|
+
newCumCacheCreate = baseCumCacheCreate + turnCacheCreate;
|
|
79
|
+
newCumOutput = baseCumOutput + turnOutput;
|
|
80
|
+
}
|
|
81
|
+
// totalContextTokens is the PROMPT side (input + cache reads + cache
|
|
82
|
+
// creation). Output is response, not context. The old code included
|
|
83
|
+
// outputTokens here which was wrong.
|
|
84
|
+
const totalContextTokens = turnInput + turnCacheRead + turnCacheCreate;
|
|
34
85
|
setUsage(job.jid, provider.name, {
|
|
35
|
-
|
|
86
|
+
inputTokens: turnInput,
|
|
87
|
+
cacheReadTokens: turnCacheRead,
|
|
88
|
+
cacheCreationTokens: turnCacheCreate,
|
|
89
|
+
outputTokens: turnOutput,
|
|
36
90
|
totalContextTokens,
|
|
91
|
+
numTurns: usage.numTurns,
|
|
92
|
+
cumulativeInputTokens: newCumInput,
|
|
93
|
+
cumulativeCacheReadTokens: newCumCacheRead,
|
|
94
|
+
cumulativeCacheCreationTokens: newCumCacheCreate,
|
|
95
|
+
cumulativeOutputTokens: newCumOutput,
|
|
37
96
|
updatedAt: Math.floor(Date.now() / 1000),
|
|
38
97
|
});
|
|
39
98
|
// Per-user daily token accounting. Owner sender is exempt by check at the
|
|
40
99
|
// incoming gate, but we still bill so /usage reflects reality if added.
|
|
41
100
|
// Cache-read tokens are excluded — they don't cost real budget.
|
|
42
101
|
if (job.senderNumber) {
|
|
43
|
-
addDailyTokens(job.senderNumber,
|
|
102
|
+
addDailyTokens(job.senderNumber, turnInput + turnOutput);
|
|
44
103
|
}
|
|
45
104
|
const rawFlags = extractFlags(reply);
|
|
46
105
|
const { clean, digest, journals, journalCreates, asyncTasks, asyncBrowserTasks, sendTexts, crons, reminds, } = filterFlagsByRole(rawFlags, job.allowedTags);
|
|
@@ -179,9 +238,11 @@ async function callClaude(job) {
|
|
|
179
238
|
reply: clean,
|
|
180
239
|
stats: {
|
|
181
240
|
durationMs,
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
241
|
+
// All per-turn values now (delta-corrected for cumulative
|
|
242
|
+
// providers above). Footer shows these directly.
|
|
243
|
+
inputTokens: turnInput,
|
|
244
|
+
outputTokens: turnOutput,
|
|
245
|
+
cacheReadTokens: turnCacheRead,
|
|
185
246
|
totalContextTokens,
|
|
186
247
|
contextWindow: config.claude.contextWindow,
|
|
187
248
|
fresh: wasFresh,
|