clawmoney 0.15.68 → 0.15.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ import * as readline from "node:readline";
6
6
  import { intro, outro, multiselect, select, spinner, isCancel, cancel, log, } from "@clack/prompts";
7
7
  import chalk from "chalk";
8
8
  import { apiPost } from "../utils/api.js";
9
- import { loadConfig, requireConfig } from "../utils/config.js";
9
+ import { loadConfig, requireConfig, saveConfig } from "../utils/config.js";
10
10
  import { setupCommand } from "./setup.js";
11
11
  import { API_PRICES, PLATFORM_FEE } from "../relay/pricing.js";
12
12
  import { hasClaudeFingerprint, bootstrapClaudeFingerprint, } from "../relay/upstream/claude-bootstrap.js";
@@ -31,9 +31,10 @@ import { hasCodexFingerprint, bootstrapCodexFingerprint, } from "../relay/upstre
31
31
  // falls through to modelsForCli(cli) which returns EVERY priced
32
32
  // model in that family.
33
33
  const RECOMMENDED_MODELS = {
34
- // Claude Code /model menu: Default(Sonnet 4.6) / Sonnet(1M) / Opus(1M) / Haiku
35
- // 3 unique model IDs (Sonnet 1M = same model + context-1m beta)
36
- claude: ["claude-sonnet-4-6", "claude-opus-4-6", "claude-haiku-4-5"],
34
+ // Claude Code /model menu (post 2026-04-16 Opus 4.7 release):
35
+ // Default(Opus 4.7 1M) / Sonnet 4.6 / Haiku 4.5
36
+ // Opus 4.7 released 2026-04-16 and became the default model.
37
+ claude: ["claude-opus-4-7", "claude-sonnet-4-6", "claude-opus-4-6", "claude-haiku-4-5"],
37
38
  // Codex CLI /model menu for ChatGPT sign-in (post 2026-04-14 cleanup):
38
39
  // gpt-5.4 — latest frontier agentic coding (current default)
39
40
  // gpt-5.4-mini — smaller frontier agentic coding
@@ -358,37 +359,50 @@ export async function relaySetupCommand() {
358
359
  // pricing × number of providers; we can't predict that, so we don't
359
360
  // pretend to.
360
361
  const concurrency = 5;
361
- const dailyLimitChoice = await select({
362
- message: "Daily quota share per model? (applies independently to each model you register)",
362
+ const quotaShareChoice = await select({
363
+ message: "How much of your 5h session window can relay use?",
363
364
  options: [
364
365
  {
365
- value: 15,
366
- label: "~25% · Light",
366
+ value: 25,
367
+ label: "25% · Light",
367
368
  hint: "share a quarter, leaves 75% for your personal use",
368
369
  },
369
370
  {
370
- value: 30,
371
- label: "~50% · Balanced (recommended)",
372
- hint: "splits each model's quota evenly between you and the relay",
371
+ value: 50,
372
+ label: "50% · Balanced (recommended)",
373
+ hint: "splits your quota evenly between you and the relay",
373
374
  },
374
375
  {
375
- value: 45,
376
- label: "~75% · Heavy",
376
+ value: 75,
377
+ label: "75% · Heavy",
377
378
  hint: "most of your subscription goes to relay, 25% reserved for personal use",
378
379
  },
379
380
  {
380
- value: 60,
381
- label: "~100% · Full",
381
+ value: 100,
382
+ label: "100% · Full",
382
383
  hint: "dedicates your subscription to relay — best for accounts you don't use personally",
383
384
  },
384
385
  ],
385
- initialValue: 30,
386
+ initialValue: 50,
386
387
  });
387
- if (isCancel(dailyLimitChoice)) {
388
+ if (isCancel(quotaShareChoice)) {
388
389
  cancel("Setup cancelled");
389
390
  process.exit(0);
390
391
  }
391
- const dailyLimit = dailyLimitChoice;
392
+ const maxRelayUtilization = quotaShareChoice;
393
+ // daily_limit_usd is kept as a high fallback — the real cap is now
394
+ // maxRelayUtilization enforced by the daemon's rate-guard. Set it
395
+ // generously so it doesn't interfere.
396
+ const dailyLimit = 60;
397
+ // Persist max_relay_utilization into config.yaml so the daemon's
398
+ // rate-guard reads it on startup.
399
+ saveConfig({
400
+ relay: {
401
+ rate_guard: {
402
+ max_relay_utilization: maxRelayUtilization,
403
+ },
404
+ },
405
+ });
392
406
  // ── Step 5: register everything under one spinner ──
393
407
  //
394
408
  // We deliberately skip the old per-model Summary block: pricing is on
@@ -404,7 +418,7 @@ export async function relaySetupCommand() {
404
418
  // subscriptions + quota share above; Ctrl-C still aborts, and the
405
419
  // backend is idempotent so mid-way aborts are safe to re-run.
406
420
  const limitLabel = {
407
- 15: "~25%", 30: "~50%", 45: "~75%", 60: "~100%",
421
+ 25: "25%", 50: "50%", 75: "75%", 100: "100%",
408
422
  };
409
423
  const earnPct = Math.round((1 - PLATFORM_FEE) * 100);
410
424
  // Single batch POST — one round-trip, one DB session, no
@@ -454,7 +468,7 @@ export async function relaySetupCommand() {
454
468
  if (failed === 0) {
455
469
  const breakdown = cliSummary.length > 0 ? `: ${cliSummary.join(chalk.dim(" · "))}` : "";
456
470
  regSpin.stop(`${chalk.green(`✓ Registered${breakdown}`)} ` +
457
- chalk.dim(`(${limitLabel[dailyLimit] ?? `$${dailyLimit}`} quota share · you earn ~${earnPct}%)`));
471
+ chalk.dim(`(${limitLabel[maxRelayUtilization] ?? `${maxRelayUtilization}%`} of 5h window · you earn ~${earnPct}%)`));
458
472
  }
459
473
  else {
460
474
  regSpin.stop(`${chalk.yellow(`${succeeded} registered, ${failed} failed`)}`);
@@ -19,6 +19,7 @@ export const API_PRICES = {
19
19
  // ── Anthropic (Claude) ──
20
20
  // Verified against LiteLLM pricing DB. cache_read = 0.1x input,
21
21
  // cache_write = 1.25x input (Anthropic ephemeral cache).
22
+ "claude-opus-4-7": { input: 5, output: 25 }, // released 2026-04-16
22
23
  "claude-opus-4-6": { input: 5, output: 25 },
23
24
  "claude-opus-4-5": { input: 5, output: 25 },
24
25
  "claude-sonnet-4-6": { input: 3, output: 15 },
@@ -7,7 +7,7 @@ import { callClaudeApi, callClaudeApiPassthrough, preflightClaudeApi, getRateGua
7
7
  import { callCodexApi, callCodexApiPassthrough, preflightCodexApi, getRateGuardSnapshot as getCodexRateGuardSnapshot, } from "./upstream/codex-api.js";
8
8
  import { callGeminiApi, preflightGeminiApi, getGeminiRateGuardSnapshot, } from "./upstream/gemini-api.js";
9
9
  import { callAntigravityApi, preflightAntigravityApi, getAntigravityRateGuardSnapshot, } from "./upstream/antigravity-api.js";
10
- import { apiGet } from "../utils/api.js";
10
+ import { apiGet, apiPost } from "../utils/api.js";
11
11
  /**
12
12
  * Pick the rate-guard snapshot matching this request's cli_type. Fixes a
13
13
  * pre-existing bug where gemini/codex responses were piggy-backing Claude's
@@ -167,6 +167,83 @@ function extractMessageText(content) {
167
167
  function messagesToPrompt(messages) {
168
168
  return messages.map((m) => extractMessageText(m.content)).join("\n");
169
169
  }
170
+ // ── OAuth auto-pause (per-cli_type) ────────────────────────────────────
171
+ //
172
+ // When upstream keeps rejecting our OAuth token (Anthropic 403
173
+ // permission_error, ChatGPT auth failures, etc.), continuing to hammer
174
+ // it wastes buyer requests, surfaces errors the Hub has to failover
175
+ // around, and thrashes the Hub's 5xx ban / unban cycle every time the
176
+ // daemon reconnects. Track consecutive auth-broken errors per cli_type
177
+ // — after AUTH_ERROR_THRESHOLD hits in a row, stop accepting new
178
+ // requests for THAT cli_type until daemon restart. Every successful
179
+ // upstream response resets the counter.
180
+ //
181
+ // Key properties:
182
+ // - Per cli_type: a broken Claude OAuth doesn't take down Codex or
183
+ // Gemini on the same daemon, because each has its own counter and
184
+ // its own disable flag.
185
+ // - In-memory only: state resets on daemon restart. If the operator
186
+ // re-authed between restarts, the next request proves the token
187
+ // works and nothing happens; if they didn't, the counter fills
188
+ // back up within AUTH_ERROR_THRESHOLD requests and re-disables.
189
+ // - No WS lifecycle touched: the daemon stays connected to the Hub
190
+ // so other cli_types still serve. We just refuse to call upstream
191
+ // for the disabled one, returning a clean error the Hub can use
192
+ // to ban this provider row (its existing _is_auth_broken_error
193
+ // pattern catches our "OAuth authentication broken" message).
194
+ //
195
+ // Operator recovery: run `clawmoney login <cli>` (or re-auth the
196
+ // relevant CLI directly — `claude login`, `codex login`, etc.), then
197
+ // `clawmoney relay restart` to reset the counter.
198
+ const AUTH_ERROR_THRESHOLD = 3;
199
+ const consecutiveAuthErrorsByCli = new Map();
200
+ const cliAuthDisabled = new Set();
201
+ const AUTH_BROKEN_PATTERNS = [
202
+ // Anthropic 403: OAuth authentication is currently not allowed for
203
+ // this organization. The new prod signal from 2026-04-15 incident.
204
+ "permission_error",
205
+ "not allowed for this organization",
206
+ // Legacy Claude / Anthropic auth failures (also matched by Hub's
207
+ // _AUTH_BROKEN_PATTERNS, so the two sides agree on classification).
208
+ "token refresh failed",
209
+ "invalid_grant",
210
+ "request not allowed",
211
+ "oauth refresh",
212
+ // Generic OAuth HTTP signatures. Catches the one-off 401/403
213
+ // responses from codex / gemini / antigravity that carry the same
214
+ // meaning even when the upstream-specific message format differs.
215
+ "unauthorized",
216
+ ];
217
+ function isAuthBrokenError(errMsg) {
218
+ const lower = errMsg.toLowerCase();
219
+ return AUTH_BROKEN_PATTERNS.some((p) => lower.includes(p));
220
+ }
221
+ function noteUpstreamAuthError(cliType) {
222
+ const next = (consecutiveAuthErrorsByCli.get(cliType) ?? 0) + 1;
223
+ consecutiveAuthErrorsByCli.set(cliType, next);
224
+ if (next >= AUTH_ERROR_THRESHOLD && !cliAuthDisabled.has(cliType)) {
225
+ cliAuthDisabled.add(cliType);
226
+ logger.error("");
227
+ logger.error(` ╔══════════════════════════════════════════════════════════════`);
228
+ logger.error(` ║ OAuth broken for cli_type='${cliType}' — ${next} consecutive`);
229
+ logger.error(` ║ auth-broken responses from upstream. Pausing relay for this`);
230
+ logger.error(` ║ cli_type to stop thrashing buyer requests + Hub ban state.`);
231
+ logger.error(` ║`);
232
+ logger.error(` ║ TO RESUME: re-authenticate your ${cliType} CLI locally, then`);
233
+ logger.error(` ║ run 'clawmoney relay restart'.`);
234
+ logger.error(` ║`);
235
+ logger.error(` ║ Other cli_types on this daemon continue to serve normally.`);
236
+ logger.error(` ╚══════════════════════════════════════════════════════════════`);
237
+ logger.error("");
238
+ }
239
+ }
240
+ function noteUpstreamSuccess(cliType) {
241
+ // Successful request → reset the consecutive counter. The disabled
242
+ // flag is sticky until daemon restart on purpose — we never want to
243
+ // "heal" mid-run based on a single lucky response, which could be
244
+ // an upstream glitch rather than a real token refresh.
245
+ consecutiveAuthErrorsByCli.delete(cliType);
246
+ }
170
247
  async function executeRelayRequest(request, config, sendChunk) {
171
248
  const { request_id, max_budget_usd } = request;
172
249
  const cliType = request.cli_type ?? config.relay.cli_type;
@@ -190,6 +267,21 @@ async function executeRelayRequest(request, config, sendChunk) {
190
267
  logger.info(` │ CLI: ${cliType} / ${model} (${modeLabel})`);
191
268
  logger.info(` │ Turns: ${turns}`);
192
269
  logger.info(` │ Prompt: ${String(lastUserMsg).slice(0, 80)}`);
270
+ // Fast-fail if this cli_type was auto-paused by a run of auth-broken
271
+ // responses earlier in the session. Returning the error here instead
272
+ // of calling upstream saves the round-trip and keeps the Hub's ban
273
+ // pattern triggering (it matches "OAuth authentication" / "auth
274
+ // broken" in _is_auth_broken_error) so buyer requests go straight to
275
+ // a healthy provider.
276
+ if (cliAuthDisabled.has(cliType)) {
277
+ logger.warn(` └─ REFUSED: ${cliType} auth paused (restart relay after re-auth)`);
278
+ return {
279
+ event: "relay_response",
280
+ request_id,
281
+ content: "",
282
+ error: `OAuth authentication broken for cli_type='${cliType}'. Provider needs to re-authenticate locally and restart the daemon. (permission_error)`,
283
+ };
284
+ }
193
285
  try {
194
286
  const startMs = Date.now();
195
287
  let parsed;
@@ -300,6 +392,9 @@ async function executeRelayRequest(request, config, sendChunk) {
300
392
  if (fakeModelUsed) {
301
393
  logger.warn(` ! CLAWMONEY_FAKE_MODEL_USED=${fakeModelUsed} — reporting fake model to Hub (test mode)`);
302
394
  }
395
+ // Successful upstream round-trip — reset the auth-error counter for
396
+ // this cli_type. One good response means the token currently works.
397
+ noteUpstreamSuccess(cliType);
303
398
  return {
304
399
  event: "relay_response",
305
400
  request_id,
@@ -312,12 +407,22 @@ async function executeRelayRequest(request, config, sendChunk) {
312
407
  };
313
408
  }
314
409
  catch (err) {
315
- logger.error(` └─ ERROR: ${err instanceof Error ? err.message : err}`);
410
+ const errMsg = err instanceof Error ? err.message : String(err);
411
+ logger.error(` └─ ERROR: ${errMsg}`);
412
+ // If the upstream error looks like a persistent auth failure
413
+ // (OAuth rejected, token broken, permission_error, etc.), bump
414
+ // this cli_type's consecutive-auth-error counter. After
415
+ // AUTH_ERROR_THRESHOLD in a row, future requests for this
416
+ // cli_type short-circuit at the top of executeRelayRequest until
417
+ // daemon restart.
418
+ if (isAuthBrokenError(errMsg)) {
419
+ noteUpstreamAuthError(cliType);
420
+ }
316
421
  return {
317
422
  event: "relay_response",
318
423
  request_id,
319
424
  content: "",
320
- error: err instanceof Error ? err.message : "Unknown execution error",
425
+ error: errMsg || "Unknown execution error",
321
426
  };
322
427
  }
323
428
  }
@@ -446,6 +551,85 @@ export function runRelayProvider(cliOverride) {
446
551
  });
447
552
  }
448
553
  const activeTasks = new Set();
554
+ async function syncModelCatalog() {
555
+ try {
556
+ // Step 1: existing providers (gives us cli_types + default settings).
557
+ const myResp = await apiGet("/api/v1/relay/providers/me", config.api_key);
558
+ if (!myResp.ok || !Array.isArray(myResp.data)) {
559
+ logger.warn(`[catalog-sync] skipped: /providers/me returned ${myResp.status}`);
560
+ return;
561
+ }
562
+ const existing = myResp.data;
563
+ if (existing.length === 0) {
564
+ logger.info("[catalog-sync] no existing providers yet — skipping auto-sync");
565
+ return;
566
+ }
567
+ // Settings template per cli_type (from any existing provider in that family).
568
+ const settingsByCli = new Map();
569
+ const knownModels = new Set();
570
+ for (const p of existing) {
571
+ knownModels.add(`${p.cli_type}/${p.model}`);
572
+ if (!settingsByCli.has(p.cli_type)) {
573
+ settingsByCli.set(p.cli_type, {
574
+ concurrency: p.concurrency,
575
+ daily_limit_usd: p.daily_limit_usd,
576
+ });
577
+ }
578
+ }
579
+ // Step 2: fetch catalog.
580
+ const catalogResp = await apiGet("/api/v1/relay/model-catalog");
581
+ if (!catalogResp.ok || !catalogResp.data?.catalog) {
582
+ logger.warn(`[catalog-sync] skipped: /model-catalog returned ${catalogResp.status}`);
583
+ return;
584
+ }
585
+ const catalog = catalogResp.data.catalog;
586
+ // Step 3: build batch for cli_types the agent has at least one provider for.
587
+ const batch = [];
588
+ const newModels = [];
589
+ for (const [cliType, settings] of settingsByCli) {
590
+ const recommended = catalog[cliType] ?? [];
591
+ for (const entry of recommended) {
592
+ if (!knownModels.has(`${cliType}/${entry.model}`)) {
593
+ newModels.push(`${cliType}/${entry.model}`);
594
+ }
595
+ batch.push({
596
+ cli_type: cliType,
597
+ model: entry.model,
598
+ mode: "chat",
599
+ concurrency: settings.concurrency,
600
+ daily_limit_usd: settings.daily_limit_usd,
601
+ price_input_per_m: entry.input,
602
+ price_output_per_m: entry.output,
603
+ });
604
+ }
605
+ }
606
+ if (batch.length === 0) {
607
+ return;
608
+ }
609
+ // Step 4: upsert via batch register (already idempotent).
610
+ const regResp = await apiPost("/api/v1/relay/providers/batch", { providers: batch }, config.api_key);
611
+ if (!regResp.ok) {
612
+ logger.warn(`[catalog-sync] batch register failed: ${regResp.status}`);
613
+ return;
614
+ }
615
+ const created = regResp.data.created?.length ?? 0;
616
+ const failed = regResp.data.failed?.length ?? 0;
617
+ if (newModels.length > 0 || created > 0) {
618
+ logger.info(`[catalog-sync] OK: ${batch.length} entries, ${created} newly created, ${failed} failed` +
619
+ (newModels.length > 0 ? ` (new: ${newModels.join(", ")})` : ""));
620
+ }
621
+ else {
622
+ logger.info(`[catalog-sync] OK: ${batch.length} entries, no changes`);
623
+ }
624
+ }
625
+ catch (err) {
626
+ logger.warn(`[catalog-sync] error: ${err.message}`);
627
+ }
628
+ }
629
+ // Initial sync, then every 30 min.
630
+ syncModelCatalog().catch((err) => logger.warn(`[catalog-sync] initial sync failed: ${err.message}`));
631
+ const catalogTimer = setInterval(() => syncModelCatalog().catch((err) => logger.warn(`[catalog-sync] periodic sync failed: ${err.message}`)), 30 * 60 * 1000);
632
+ catalogTimer.unref();
449
633
  // Create WS client
450
634
  const wsClient = new RelayWsClient(config, (event) => {
451
635
  handleEvent(event);
@@ -86,6 +86,7 @@ export interface RelayRateGuardConfig {
86
86
  min_request_gap_ms?: number;
87
87
  jitter_ms?: number;
88
88
  daily_budget_usd?: number;
89
+ max_relay_utilization?: number;
89
90
  }
90
91
  export interface RelayProviderSettings {
91
92
  cli_type: string;
@@ -507,6 +507,7 @@ export function configureAntigravityRateGuard(config) {
507
507
  minRequestGapMs: config.min_request_gap_ms,
508
508
  jitterMs: config.jitter_ms,
509
509
  dailyBudgetUsd: config.daily_budget_usd,
510
+ maxRelayUtilization: config.max_relay_utilization,
510
511
  }
511
512
  : {};
512
513
  const cleaned = Object.fromEntries(Object.entries(mapped).filter(([, v]) => v !== undefined));
@@ -726,6 +726,7 @@ export function configureRateGuard(config) {
726
726
  minRequestGapMs: config.min_request_gap_ms,
727
727
  jitterMs: config.jitter_ms,
728
728
  dailyBudgetUsd: config.daily_budget_usd,
729
+ maxRelayUtilization: config.max_relay_utilization,
729
730
  }
730
731
  : {};
731
732
  // Filter out undefined so defaults apply.
@@ -896,7 +897,19 @@ async function doCallClaudeApi(opts) {
896
897
  // account harder and extend the ban. Parse the reset headers, mark
897
898
  // cooldown, and fail this request. Subsequent requests will immediately
898
899
  // short-circuit via checkCooldown().
900
+ //
901
+ // Exception: "Extra usage is required" is NOT a rate limit — it's a
902
+ // billing/feature gate (e.g. Sonnet 1M context requires Extra usage
903
+ // credits on Claude Max). Triggering a global 5-minute cooldown for
904
+ // this would block ALL subsequent requests (including Opus, Haiku,
905
+ // non-1M Sonnet) even though they don't need Extra usage. Instead,
906
+ // fail only this request and let others through.
899
907
  if (resp.status === 429) {
908
+ const isExtraUsage = errText.toLowerCase().includes("extra usage");
909
+ if (isExtraUsage) {
910
+ logger.warn("[claude-api] 429 Extra usage required — skipping cooldown (not a rate limit)");
911
+ throw new Error(`Anthropic 429 extra-usage-required: ${errText.slice(0, 300)}`);
912
+ }
900
913
  const cooldown = extractCooldownUntilFromHeaders(resp.headers);
901
914
  if (cooldown && rateGuard) {
902
915
  rateGuard.triggerCooldown(cooldown.untilMs, cooldown.reason);
@@ -1307,6 +1320,11 @@ async function doCallClaudeApiPassthrough(opts) {
1307
1320
  }
1308
1321
  const errText = await resp.text();
1309
1322
  if (resp.status === 429) {
1323
+ const isExtraUsage = errText.toLowerCase().includes("extra usage");
1324
+ if (isExtraUsage) {
1325
+ logger.warn("[claude-api] 429 Extra usage required (passthrough) — skipping cooldown");
1326
+ throw new Error(`Anthropic 429 extra-usage-required: ${errText.slice(0, 300)}`);
1327
+ }
1310
1328
  const cooldown = extractCooldownUntilFromHeaders(resp.headers);
1311
1329
  if (cooldown && rateGuard) {
1312
1330
  rateGuard.triggerCooldown(cooldown.untilMs, cooldown.reason);
@@ -371,6 +371,7 @@ export function configureRateGuard(config) {
371
371
  minRequestGapMs: config.min_request_gap_ms,
372
372
  jitterMs: config.jitter_ms,
373
373
  dailyBudgetUsd: config.daily_budget_usd,
374
+ maxRelayUtilization: config.max_relay_utilization,
374
375
  }
375
376
  : {};
376
377
  const cleaned = Object.fromEntries(Object.entries(mapped).filter(([, v]) => v !== undefined));
@@ -243,6 +243,7 @@ export function configureGeminiRateGuard(config) {
243
243
  minRequestGapMs: config.min_request_gap_ms,
244
244
  jitterMs: config.jitter_ms,
245
245
  dailyBudgetUsd: config.daily_budget_usd,
246
+ maxRelayUtilization: config.max_relay_utilization,
246
247
  }
247
248
  : {};
248
249
  const cleaned = Object.fromEntries(Object.entries(mapped).filter(([, v]) => v !== undefined));
@@ -35,11 +35,23 @@ export interface RateGuardConfig {
35
35
  jitterMs: number;
36
36
  /** Hard daily cost cap in USD. Default 15. */
37
37
  dailyBudgetUsd: number;
38
+ /**
39
+ * Max relay utilization of the 5h session window (0-100).
40
+ * When relay's own accumulated utilization delta reaches this %,
41
+ * further relay requests are refused until the window resets.
42
+ * Provider's direct usage does NOT count against this budget —
43
+ * only the delta observed across relay requests is tracked.
44
+ * Default 50 (relay can use up to 50% of the 5h window).
45
+ */
46
+ maxRelayUtilization: number;
38
47
  }
39
48
  export declare const DEFAULT_RATE_GUARD_CONFIG: RateGuardConfig;
40
49
  export declare class RateGuardBudgetExceededError extends Error {
41
50
  constructor(spent: number, limit: number);
42
51
  }
52
+ export declare class RateGuardRelayUtilizationExceededError extends Error {
53
+ constructor(used: number, limit: number, resetMins: number);
54
+ }
43
55
  /**
44
56
  * Thrown when the rate-guard is in a hard cooldown after observing a real
45
57
  * upstream 429. The `untilMs` field is an absolute UNIX ms timestamp — after
@@ -71,15 +83,19 @@ export declare class RateGuard {
71
83
  private cooldownUntilMs;
72
84
  private cooldownReason;
73
85
  private sessionWindow;
86
+ private relayWindowUsed;
87
+ private relayWindowEndMs;
88
+ private lastSeenUtilization;
74
89
  constructor(config?: Partial<RateGuardConfig>);
75
90
  /** Record an upstream-imposed cooldown. Called after parsing a real 429. */
76
91
  triggerCooldown(untilMs: number, reason: string): void;
77
- /** Update the 5h session window tracker from parsed upstream headers. */
92
+ /** Update the 5h session window tracker from parsed upstream headers.
93
+ * Also accumulates relay's own utilization delta for quota enforcement. */
78
94
  setSessionWindow(window: SessionWindow): void;
79
95
  getSessionWindow(): SessionWindow | null;
80
96
  private currentMaxConcurrency;
81
97
  private rotateDailyCounterIfNeeded;
82
- /** Check whether a new request would exceed the daily budget. */
98
+ /** Check whether a new request would exceed the daily budget or relay utilization cap. */
83
99
  checkBudget(): void;
84
100
  /** Check upstream-imposed cooldown. Throws RateGuardCooldownError if still cooling. */
85
101
  checkCooldown(): void;
@@ -93,6 +109,8 @@ export declare class RateGuard {
93
109
  cooldownUntilMs: number;
94
110
  cooldownReason: string;
95
111
  sessionWindow: SessionWindow | null;
112
+ relayWindowUsed: number;
113
+ maxRelayUtilization: number;
96
114
  };
97
115
  /**
98
116
  * Wrap an upstream call. Blocks until:
@@ -30,6 +30,7 @@ export const DEFAULT_RATE_GUARD_CONFIG = {
30
30
  minRequestGapMs: 500,
31
31
  jitterMs: 1500,
32
32
  dailyBudgetUsd: 15,
33
+ maxRelayUtilization: 50,
33
34
  };
34
35
  export class RateGuardBudgetExceededError extends Error {
35
36
  constructor(spent, limit) {
@@ -37,6 +38,12 @@ export class RateGuardBudgetExceededError extends Error {
37
38
  this.name = "RateGuardBudgetExceededError";
38
39
  }
39
40
  }
41
+ export class RateGuardRelayUtilizationExceededError extends Error {
42
+ constructor(used, limit, resetMins) {
43
+ super(`Relay utilization quota reached: ${used.toFixed(1)}% / ${limit}% of 5h window used by relay (resets in ${resetMins}min)`);
44
+ this.name = "RateGuardRelayUtilizationExceededError";
45
+ }
46
+ }
40
47
  /**
41
48
  * Thrown when the rate-guard is in a hard cooldown after observing a real
42
49
  * upstream 429. The `untilMs` field is an absolute UNIX ms timestamp — after
@@ -66,6 +73,12 @@ export class RateGuard {
66
73
  cooldownReason = "";
67
74
  // Rolling 5h session window surfaced by Anthropic headers.
68
75
  sessionWindow = null;
76
+ // Relay utilization tracking — accumulated delta of session_window
77
+ // utilization across relay requests within the current 5h window.
78
+ // Resets when the window resets (endMs changes).
79
+ relayWindowUsed = 0; // accumulated relay % (0-100)
80
+ relayWindowEndMs = 0; // which window we're tracking
81
+ lastSeenUtilization = null; // for delta computation
69
82
  constructor(config = {}) {
70
83
  this.cfg = { ...DEFAULT_RATE_GUARD_CONFIG, ...config };
71
84
  }
@@ -81,11 +94,34 @@ export class RateGuard {
81
94
  logger.warn(`[rate-guard] cooldown engaged (${reason}): ${seconds}s until reset`);
82
95
  }
83
96
  }
84
- /** Update the 5h session window tracker from parsed upstream headers. */
97
+ /** Update the 5h session window tracker from parsed upstream headers.
98
+ * Also accumulates relay's own utilization delta for quota enforcement. */
85
99
  setSessionWindow(window) {
100
+ // Detect window reset — if endMs changed, we're in a new window.
101
+ if (window.endMs !== this.relayWindowEndMs) {
102
+ if (this.relayWindowEndMs > 0 && this.relayWindowUsed > 0) {
103
+ logger.info(`[rate-guard] relay window reset (previous relay_used=${this.relayWindowUsed.toFixed(1)}%)`);
104
+ }
105
+ this.relayWindowUsed = 0;
106
+ this.relayWindowEndMs = window.endMs;
107
+ this.lastSeenUtilization = null;
108
+ }
109
+ // Compute relay delta: how much utilization increased since last observation.
110
+ // This is called AFTER each relay request, so the delta is (approximately)
111
+ // the utilization cost of that one relay request. If the provider was also
112
+ // using the account directly during this request, the delta includes their
113
+ // usage too — accepted trade-off (see design discussion).
114
+ if (typeof window.utilization === "number" &&
115
+ this.lastSeenUtilization !== null) {
116
+ const delta = window.utilization - this.lastSeenUtilization;
117
+ if (delta > 0) {
118
+ this.relayWindowUsed += delta;
119
+ }
120
+ }
121
+ this.lastSeenUtilization = window.utilization ?? null;
86
122
  this.sessionWindow = window;
87
123
  const mins = Math.round((window.endMs - Date.now()) / 60_000);
88
- logger.info(`[rate-guard] session window: ${window.utilization ?? "?"}% used, resets in ${mins}min (status=${window.status ?? "unknown"})`);
124
+ logger.info(`[rate-guard] session window: ${window.utilization ?? "?"}% used (relay_used=${this.relayWindowUsed.toFixed(1)}%/${this.cfg.maxRelayUtilization}%), resets in ${mins}min (status=${window.status ?? "unknown"})`);
89
125
  }
90
126
  getSessionWindow() {
91
127
  if (!this.sessionWindow)
@@ -114,12 +150,20 @@ export class RateGuard {
114
150
  this.dailySpentUsd = 0;
115
151
  }
116
152
  }
117
- /** Check whether a new request would exceed the daily budget. */
153
+ /** Check whether a new request would exceed the daily budget or relay utilization cap. */
118
154
  checkBudget() {
119
155
  this.rotateDailyCounterIfNeeded();
120
156
  if (this.dailySpentUsd >= this.cfg.dailyBudgetUsd) {
121
157
  throw new RateGuardBudgetExceededError(this.dailySpentUsd, this.cfg.dailyBudgetUsd);
122
158
  }
159
+ // Check relay utilization cap against 5h window.
160
+ // Only enforce if we've seen at least one session window update
161
+ // (otherwise we don't know the utilization yet — fail open).
162
+ if (this.relayWindowEndMs > 0 &&
163
+ this.relayWindowUsed >= this.cfg.maxRelayUtilization) {
164
+ const resetMins = Math.max(0, Math.round((this.relayWindowEndMs - Date.now()) / 60_000));
165
+ throw new RateGuardRelayUtilizationExceededError(this.relayWindowUsed, this.cfg.maxRelayUtilization, resetMins);
166
+ }
123
167
  }
124
168
  /** Check upstream-imposed cooldown. Throws RateGuardCooldownError if still cooling. */
125
169
  checkCooldown() {
@@ -147,6 +191,8 @@ export class RateGuard {
147
191
  cooldownUntilMs: this.cooldownUntilMs,
148
192
  cooldownReason: this.cooldownReason,
149
193
  sessionWindow: this.getSessionWindow(),
194
+ relayWindowUsed: this.relayWindowUsed,
195
+ maxRelayUtilization: this.cfg.maxRelayUtilization,
150
196
  };
151
197
  }
152
198
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmoney",
3
- "version": "0.15.68",
3
+ "version": "0.15.70",
4
4
  "description": "ClawMoney CLI -- Earn rewards with your AI agent",
5
5
  "type": "module",
6
6
  "bin": {