npm - clawmoney - Versions diffs - 0.15.68 → 0.15.70 - Mend

clawmoney 0.15.68 → 0.15.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/commands/relay-setup.js +34 -20
package/dist/relay/pricing.js +1 -0
package/dist/relay/provider.js +187 -3
package/dist/relay/types.d.ts +1 -0
package/dist/relay/upstream/antigravity-api.js +1 -0
package/dist/relay/upstream/claude-api.js +18 -0
package/dist/relay/upstream/codex-api.js +1 -0
package/dist/relay/upstream/gemini-api.js +1 -0
package/dist/relay/upstream/rate-guard.d.ts +20 -2
package/dist/relay/upstream/rate-guard.js +49 -3
package/package.json +1 -1

package/dist/commands/relay-setup.js CHANGED Viewed

@@ -6,7 +6,7 @@ import * as readline from "node:readline";
 import { intro, outro, multiselect, select, spinner, isCancel, cancel, log, } from "@clack/prompts";
 import chalk from "chalk";
 import { apiPost } from "../utils/api.js";
-import { loadConfig, requireConfig } from "../utils/config.js";
+import { loadConfig, requireConfig, saveConfig } from "../utils/config.js";
 import { setupCommand } from "./setup.js";
 import { API_PRICES, PLATFORM_FEE } from "../relay/pricing.js";
 import { hasClaudeFingerprint, bootstrapClaudeFingerprint, } from "../relay/upstream/claude-bootstrap.js";
@@ -31,9 +31,10 @@ import { hasCodexFingerprint, bootstrapCodexFingerprint, } from "../relay/upstre
 // falls through to modelsForCli(cli) which returns EVERY priced
 // model in that family.
 const RECOMMENDED_MODELS = {
-    // Claude Code /model menu: Default(Sonnet 4.6) / Sonnet(1M) / Opus(1M) / Haiku
-    // → 3 unique model IDs (Sonnet 1M = same model + context-1m beta)
-    claude: ["claude-sonnet-4-6", "claude-opus-4-6", "claude-haiku-4-5"],
+    // Claude Code /model menu (post 2026-04-16 Opus 4.7 release):
+    //   Default(Opus 4.7 1M) / Sonnet 4.6 / Haiku 4.5
+    // Opus 4.7 released 2026-04-16 and became the default model.
+    claude: ["claude-opus-4-7", "claude-sonnet-4-6", "claude-opus-4-6", "claude-haiku-4-5"],
     // Codex CLI /model menu for ChatGPT sign-in (post 2026-04-14 cleanup):
     //   gpt-5.4             — latest frontier agentic coding (current default)
     //   gpt-5.4-mini        — smaller frontier agentic coding
@@ -358,37 +359,50 @@ export async function relaySetupCommand() {
     // pricing × number of providers; we can't predict that, so we don't
     // pretend to.
     const concurrency = 5;
-    const dailyLimitChoice = await select({
-        message: "Daily quota share per model? (applies independently to each model you register)",
+    const quotaShareChoice = await select({
+        message: "How much of your 5h session window can relay use?",
         options: [
             {
-                value: 15,
-                label: "~25%  ·  Light",
+                value: 25,
+                label: "25%  ·  Light",
                 hint: "share a quarter, leaves 75% for your personal use",
             },
             {
-                value: 30,
-                label: "~50%  ·  Balanced  (recommended)",
-                hint: "splits each model's quota evenly between you and the relay",
+                value: 50,
+                label: "50%  ·  Balanced  (recommended)",
+                hint: "splits your quota evenly between you and the relay",
             },
             {
-                value: 45,
-                label: "~75%  ·  Heavy",
+                value: 75,
+                label: "75%  ·  Heavy",
                 hint: "most of your subscription goes to relay, 25% reserved for personal use",
             },
             {
-                value: 60,
-                label: "~100% ·  Full",
+                value: 100,
+                label: "100% ·  Full",
                 hint: "dedicates your subscription to relay — best for accounts you don't use personally",
             },
         ],
-        initialValue: 30,
+        initialValue: 50,
     });
-    if (isCancel(dailyLimitChoice)) {
+    if (isCancel(quotaShareChoice)) {
         cancel("Setup cancelled");
         process.exit(0);
     }
-    const dailyLimit = dailyLimitChoice;
+    const maxRelayUtilization = quotaShareChoice;
+    // daily_limit_usd is kept as a high fallback — the real cap is now
+    // maxRelayUtilization enforced by the daemon's rate-guard. Set it
+    // generously so it doesn't interfere.
+    const dailyLimit = 60;
+    // Persist max_relay_utilization into config.yaml so the daemon's
+    // rate-guard reads it on startup.
+    saveConfig({
+        relay: {
+            rate_guard: {
+                max_relay_utilization: maxRelayUtilization,
+            },
+        },
+    });
     // ── Step 5: register everything under one spinner ──
     //
     // We deliberately skip the old per-model Summary block: pricing is on
@@ -404,7 +418,7 @@ export async function relaySetupCommand() {
     // subscriptions + quota share above; Ctrl-C still aborts, and the
     // backend is idempotent so mid-way aborts are safe to re-run.
     const limitLabel = {
-        15: "~25%", 30: "~50%", 45: "~75%", 60: "~100%",
+        25: "25%", 50: "50%", 75: "75%", 100: "100%",
     };
     const earnPct = Math.round((1 - PLATFORM_FEE) * 100);
     // Single batch POST — one round-trip, one DB session, no
@@ -454,7 +468,7 @@ export async function relaySetupCommand() {
     if (failed === 0) {
         const breakdown = cliSummary.length > 0 ? `: ${cliSummary.join(chalk.dim(" · "))}` : "";
         regSpin.stop(`${chalk.green(`✓ Registered${breakdown}`)}  ` +
-            chalk.dim(`(${limitLabel[dailyLimit] ?? `$${dailyLimit}`} quota share · you earn ~${earnPct}%)`));
+            chalk.dim(`(${limitLabel[maxRelayUtilization] ?? `${maxRelayUtilization}%`} of 5h window · you earn ~${earnPct}%)`));
     }
     else {
         regSpin.stop(`${chalk.yellow(`${succeeded} registered, ${failed} failed`)}`);

package/dist/relay/pricing.js CHANGED Viewed

@@ -19,6 +19,7 @@ export const API_PRICES = {
     // ── Anthropic (Claude) ──
     // Verified against LiteLLM pricing DB. cache_read = 0.1x input,
     // cache_write = 1.25x input (Anthropic ephemeral cache).
+    "claude-opus-4-7": { input: 5, output: 25 }, // released 2026-04-16
     "claude-opus-4-6": { input: 5, output: 25 },
     "claude-opus-4-5": { input: 5, output: 25 },
     "claude-sonnet-4-6": { input: 3, output: 15 },

package/dist/relay/provider.js CHANGED Viewed

@@ -7,7 +7,7 @@ import { callClaudeApi, callClaudeApiPassthrough, preflightClaudeApi, getRateGua
 import { callCodexApi, callCodexApiPassthrough, preflightCodexApi, getRateGuardSnapshot as getCodexRateGuardSnapshot, } from "./upstream/codex-api.js";
 import { callGeminiApi, preflightGeminiApi, getGeminiRateGuardSnapshot, } from "./upstream/gemini-api.js";
 import { callAntigravityApi, preflightAntigravityApi, getAntigravityRateGuardSnapshot, } from "./upstream/antigravity-api.js";
-import { apiGet } from "../utils/api.js";
+import { apiGet, apiPost } from "../utils/api.js";
 /**
  * Pick the rate-guard snapshot matching this request's cli_type. Fixes a
  * pre-existing bug where gemini/codex responses were piggy-backing Claude's
@@ -167,6 +167,83 @@ function extractMessageText(content) {
 function messagesToPrompt(messages) {
     return messages.map((m) => extractMessageText(m.content)).join("\n");
 }
+// ── OAuth auto-pause (per-cli_type) ────────────────────────────────────
+//
+// When upstream keeps rejecting our OAuth token (Anthropic 403
+// permission_error, ChatGPT auth failures, etc.), continuing to hammer
+// it wastes buyer requests, surfaces errors the Hub has to failover
+// around, and thrashes the Hub's 5xx ban / unban cycle every time the
+// daemon reconnects. Track consecutive auth-broken errors per cli_type
+// — after AUTH_ERROR_THRESHOLD hits in a row, stop accepting new
+// requests for THAT cli_type until daemon restart. Every successful
+// upstream response resets the counter.
+//
+// Key properties:
+//   - Per cli_type: a broken Claude OAuth doesn't take down Codex or
+//     Gemini on the same daemon, because each has its own counter and
+//     its own disable flag.
+//   - In-memory only: state resets on daemon restart. If the operator
+//     re-authed between restarts, the next request proves the token
+//     works and nothing happens; if they didn't, the counter fills
+//     back up within AUTH_ERROR_THRESHOLD requests and re-disables.
+//   - No WS lifecycle touched: the daemon stays connected to the Hub
+//     so other cli_types still serve. We just refuse to call upstream
+//     for the disabled one, returning a clean error the Hub can use
+//     to ban this provider row (its existing _is_auth_broken_error
+//     pattern catches our "OAuth authentication broken" message).
+//
+// Operator recovery: run `clawmoney login <cli>` (or re-auth the
+// relevant CLI directly — `claude login`, `codex login`, etc.), then
+// `clawmoney relay restart` to reset the counter.
+const AUTH_ERROR_THRESHOLD = 3;
+const consecutiveAuthErrorsByCli = new Map();
+const cliAuthDisabled = new Set();
+const AUTH_BROKEN_PATTERNS = [
+    // Anthropic 403: OAuth authentication is currently not allowed for
+    // this organization. The new prod signal from 2026-04-15 incident.
+    "permission_error",
+    "not allowed for this organization",
+    // Legacy Claude / Anthropic auth failures (also matched by Hub's
+    // _AUTH_BROKEN_PATTERNS, so the two sides agree on classification).
+    "token refresh failed",
+    "invalid_grant",
+    "request not allowed",
+    "oauth refresh",
+    // Generic OAuth HTTP signatures. Catches the one-off 401/403
+    // responses from codex / gemini / antigravity that carry the same
+    // meaning even when the upstream-specific message format differs.
+    "unauthorized",
+];
+function isAuthBrokenError(errMsg) {
+    const lower = errMsg.toLowerCase();
+    return AUTH_BROKEN_PATTERNS.some((p) => lower.includes(p));
+}
+function noteUpstreamAuthError(cliType) {
+    const next = (consecutiveAuthErrorsByCli.get(cliType) ?? 0) + 1;
+    consecutiveAuthErrorsByCli.set(cliType, next);
+    if (next >= AUTH_ERROR_THRESHOLD && !cliAuthDisabled.has(cliType)) {
+        cliAuthDisabled.add(cliType);
+        logger.error("");
+        logger.error(`  ╔══════════════════════════════════════════════════════════════`);
+        logger.error(`  ║ OAuth broken for cli_type='${cliType}' — ${next} consecutive`);
+        logger.error(`  ║ auth-broken responses from upstream. Pausing relay for this`);
+        logger.error(`  ║ cli_type to stop thrashing buyer requests + Hub ban state.`);
+        logger.error(`  ║`);
+        logger.error(`  ║ TO RESUME: re-authenticate your ${cliType} CLI locally, then`);
+        logger.error(`  ║           run 'clawmoney relay restart'.`);
+        logger.error(`  ║`);
+        logger.error(`  ║ Other cli_types on this daemon continue to serve normally.`);
+        logger.error(`  ╚══════════════════════════════════════════════════════════════`);
+        logger.error("");
+    }
+}
+function noteUpstreamSuccess(cliType) {
+    // Successful request → reset the consecutive counter. The disabled
+    // flag is sticky until daemon restart on purpose — we never want to
+    // "heal" mid-run based on a single lucky response, which could be
+    // an upstream glitch rather than a real token refresh.
+    consecutiveAuthErrorsByCli.delete(cliType);
+}
 async function executeRelayRequest(request, config, sendChunk) {
     const { request_id, max_budget_usd } = request;
     const cliType = request.cli_type ?? config.relay.cli_type;
@@ -190,6 +267,21 @@ async function executeRelayRequest(request, config, sendChunk) {
     logger.info(`  │ CLI:    ${cliType} / ${model} (${modeLabel})`);
     logger.info(`  │ Turns:  ${turns}`);
     logger.info(`  │ Prompt: ${String(lastUserMsg).slice(0, 80)}`);
+    // Fast-fail if this cli_type was auto-paused by a run of auth-broken
+    // responses earlier in the session. Returning the error here instead
+    // of calling upstream saves the round-trip and keeps the Hub's ban
+    // pattern triggering (it matches "OAuth authentication" / "auth
+    // broken" in _is_auth_broken_error) so buyer requests go straight to
+    // a healthy provider.
+    if (cliAuthDisabled.has(cliType)) {
+        logger.warn(`  └─ REFUSED: ${cliType} auth paused (restart relay after re-auth)`);
+        return {
+            event: "relay_response",
+            request_id,
+            content: "",
+            error: `OAuth authentication broken for cli_type='${cliType}'. Provider needs to re-authenticate locally and restart the daemon. (permission_error)`,
+        };
+    }
     try {
         const startMs = Date.now();
         let parsed;
@@ -300,6 +392,9 @@ async function executeRelayRequest(request, config, sendChunk) {
         if (fakeModelUsed) {
             logger.warn(`  ! CLAWMONEY_FAKE_MODEL_USED=${fakeModelUsed} — reporting fake model to Hub (test mode)`);
         }
+        // Successful upstream round-trip — reset the auth-error counter for
+        // this cli_type. One good response means the token currently works.
+        noteUpstreamSuccess(cliType);
         return {
             event: "relay_response",
             request_id,
@@ -312,12 +407,22 @@ async function executeRelayRequest(request, config, sendChunk) {
         };
     }
     catch (err) {
-        logger.error(`  └─ ERROR: ${err instanceof Error ? err.message : err}`);
+        const errMsg = err instanceof Error ? err.message : String(err);
+        logger.error(`  └─ ERROR: ${errMsg}`);
+        // If the upstream error looks like a persistent auth failure
+        // (OAuth rejected, token broken, permission_error, etc.), bump
+        // this cli_type's consecutive-auth-error counter. After
+        // AUTH_ERROR_THRESHOLD in a row, future requests for this
+        // cli_type short-circuit at the top of executeRelayRequest until
+        // daemon restart.
+        if (isAuthBrokenError(errMsg)) {
+            noteUpstreamAuthError(cliType);
+        }
         return {
             event: "relay_response",
             request_id,
             content: "",
-            error: err instanceof Error ? err.message : "Unknown execution error",
+            error: errMsg || "Unknown execution error",
         };
     }
 }
@@ -446,6 +551,85 @@ export function runRelayProvider(cliOverride) {
         });
     }
     const activeTasks = new Set();
+    async function syncModelCatalog() {
+        try {
+            // Step 1: existing providers (gives us cli_types + default settings).
+            const myResp = await apiGet("/api/v1/relay/providers/me", config.api_key);
+            if (!myResp.ok || !Array.isArray(myResp.data)) {
+                logger.warn(`[catalog-sync] skipped: /providers/me returned ${myResp.status}`);
+                return;
+            }
+            const existing = myResp.data;
+            if (existing.length === 0) {
+                logger.info("[catalog-sync] no existing providers yet — skipping auto-sync");
+                return;
+            }
+            // Settings template per cli_type (from any existing provider in that family).
+            const settingsByCli = new Map();
+            const knownModels = new Set();
+            for (const p of existing) {
+                knownModels.add(`${p.cli_type}/${p.model}`);
+                if (!settingsByCli.has(p.cli_type)) {
+                    settingsByCli.set(p.cli_type, {
+                        concurrency: p.concurrency,
+                        daily_limit_usd: p.daily_limit_usd,
+                    });
+                }
+            }
+            // Step 2: fetch catalog.
+            const catalogResp = await apiGet("/api/v1/relay/model-catalog");
+            if (!catalogResp.ok || !catalogResp.data?.catalog) {
+                logger.warn(`[catalog-sync] skipped: /model-catalog returned ${catalogResp.status}`);
+                return;
+            }
+            const catalog = catalogResp.data.catalog;
+            // Step 3: build batch for cli_types the agent has at least one provider for.
+            const batch = [];
+            const newModels = [];
+            for (const [cliType, settings] of settingsByCli) {
+                const recommended = catalog[cliType] ?? [];
+                for (const entry of recommended) {
+                    if (!knownModels.has(`${cliType}/${entry.model}`)) {
+                        newModels.push(`${cliType}/${entry.model}`);
+                    }
+                    batch.push({
+                        cli_type: cliType,
+                        model: entry.model,
+                        mode: "chat",
+                        concurrency: settings.concurrency,
+                        daily_limit_usd: settings.daily_limit_usd,
+                        price_input_per_m: entry.input,
+                        price_output_per_m: entry.output,
+                    });
+                }
+            }
+            if (batch.length === 0) {
+                return;
+            }
+            // Step 4: upsert via batch register (already idempotent).
+            const regResp = await apiPost("/api/v1/relay/providers/batch", { providers: batch }, config.api_key);
+            if (!regResp.ok) {
+                logger.warn(`[catalog-sync] batch register failed: ${regResp.status}`);
+                return;
+            }
+            const created = regResp.data.created?.length ?? 0;
+            const failed = regResp.data.failed?.length ?? 0;
+            if (newModels.length > 0 || created > 0) {
+                logger.info(`[catalog-sync] OK: ${batch.length} entries, ${created} newly created, ${failed} failed` +
+                    (newModels.length > 0 ? ` (new: ${newModels.join(", ")})` : ""));
+            }
+            else {
+                logger.info(`[catalog-sync] OK: ${batch.length} entries, no changes`);
+            }
+        }
+        catch (err) {
+            logger.warn(`[catalog-sync] error: ${err.message}`);
+        }
+    }
+    // Initial sync, then every 30 min.
+    syncModelCatalog().catch((err) => logger.warn(`[catalog-sync] initial sync failed: ${err.message}`));
+    const catalogTimer = setInterval(() => syncModelCatalog().catch((err) => logger.warn(`[catalog-sync] periodic sync failed: ${err.message}`)), 30 * 60 * 1000);
+    catalogTimer.unref();
     // Create WS client
     const wsClient = new RelayWsClient(config, (event) => {
         handleEvent(event);

package/dist/relay/types.d.ts CHANGED Viewed

@@ -86,6 +86,7 @@ export interface RelayRateGuardConfig {
     min_request_gap_ms?: number;
     jitter_ms?: number;
     daily_budget_usd?: number;
+    max_relay_utilization?: number;
 }
 export interface RelayProviderSettings {
     cli_type: string;

package/dist/relay/upstream/antigravity-api.js CHANGED Viewed

@@ -507,6 +507,7 @@ export function configureAntigravityRateGuard(config) {
             minRequestGapMs: config.min_request_gap_ms,
             jitterMs: config.jitter_ms,
             dailyBudgetUsd: config.daily_budget_usd,
+            maxRelayUtilization: config.max_relay_utilization,
         }
         : {};
     const cleaned = Object.fromEntries(Object.entries(mapped).filter(([, v]) => v !== undefined));

package/dist/relay/upstream/claude-api.js CHANGED Viewed

@@ -726,6 +726,7 @@ export function configureRateGuard(config) {
             minRequestGapMs: config.min_request_gap_ms,
             jitterMs: config.jitter_ms,
             dailyBudgetUsd: config.daily_budget_usd,
+            maxRelayUtilization: config.max_relay_utilization,
         }
         : {};
     // Filter out undefined so defaults apply.
@@ -896,7 +897,19 @@ async function doCallClaudeApi(opts) {
         // account harder and extend the ban. Parse the reset headers, mark
         // cooldown, and fail this request. Subsequent requests will immediately
         // short-circuit via checkCooldown().
+        //
+        // Exception: "Extra usage is required" is NOT a rate limit — it's a
+        // billing/feature gate (e.g. Sonnet 1M context requires Extra usage
+        // credits on Claude Max). Triggering a global 5-minute cooldown for
+        // this would block ALL subsequent requests (including Opus, Haiku,
+        // non-1M Sonnet) even though they don't need Extra usage. Instead,
+        // fail only this request and let others through.
         if (resp.status === 429) {
+            const isExtraUsage = errText.toLowerCase().includes("extra usage");
+            if (isExtraUsage) {
+                logger.warn("[claude-api] 429 Extra usage required — skipping cooldown (not a rate limit)");
+                throw new Error(`Anthropic 429 extra-usage-required: ${errText.slice(0, 300)}`);
+            }
             const cooldown = extractCooldownUntilFromHeaders(resp.headers);
             if (cooldown && rateGuard) {
                 rateGuard.triggerCooldown(cooldown.untilMs, cooldown.reason);
@@ -1307,6 +1320,11 @@ async function doCallClaudeApiPassthrough(opts) {
         }
         const errText = await resp.text();
         if (resp.status === 429) {
+            const isExtraUsage = errText.toLowerCase().includes("extra usage");
+            if (isExtraUsage) {
+                logger.warn("[claude-api] 429 Extra usage required (passthrough) — skipping cooldown");
+                throw new Error(`Anthropic 429 extra-usage-required: ${errText.slice(0, 300)}`);
+            }
             const cooldown = extractCooldownUntilFromHeaders(resp.headers);
             if (cooldown && rateGuard) {
                 rateGuard.triggerCooldown(cooldown.untilMs, cooldown.reason);

package/dist/relay/upstream/codex-api.js CHANGED Viewed

@@ -371,6 +371,7 @@ export function configureRateGuard(config) {
             minRequestGapMs: config.min_request_gap_ms,
             jitterMs: config.jitter_ms,
             dailyBudgetUsd: config.daily_budget_usd,
+            maxRelayUtilization: config.max_relay_utilization,
         }
         : {};
     const cleaned = Object.fromEntries(Object.entries(mapped).filter(([, v]) => v !== undefined));

package/dist/relay/upstream/gemini-api.js CHANGED Viewed

@@ -243,6 +243,7 @@ export function configureGeminiRateGuard(config) {
             minRequestGapMs: config.min_request_gap_ms,
             jitterMs: config.jitter_ms,
             dailyBudgetUsd: config.daily_budget_usd,
+            maxRelayUtilization: config.max_relay_utilization,
         }
         : {};
     const cleaned = Object.fromEntries(Object.entries(mapped).filter(([, v]) => v !== undefined));

package/dist/relay/upstream/rate-guard.d.ts CHANGED Viewed

@@ -35,11 +35,23 @@ export interface RateGuardConfig {
     jitterMs: number;
     /** Hard daily cost cap in USD. Default 15. */
     dailyBudgetUsd: number;
+    /**
+     * Max relay utilization of the 5h session window (0-100).
+     * When relay's own accumulated utilization delta reaches this %,
+     * further relay requests are refused until the window resets.
+     * Provider's direct usage does NOT count against this budget —
+     * only the delta observed across relay requests is tracked.
+     * Default 50 (relay can use up to 50% of the 5h window).
+     */
+    maxRelayUtilization: number;
 }
 export declare const DEFAULT_RATE_GUARD_CONFIG: RateGuardConfig;
 export declare class RateGuardBudgetExceededError extends Error {
     constructor(spent: number, limit: number);
 }
+export declare class RateGuardRelayUtilizationExceededError extends Error {
+    constructor(used: number, limit: number, resetMins: number);
+}
 /**
  * Thrown when the rate-guard is in a hard cooldown after observing a real
  * upstream 429. The `untilMs` field is an absolute UNIX ms timestamp — after
@@ -71,15 +83,19 @@ export declare class RateGuard {
     private cooldownUntilMs;
     private cooldownReason;
     private sessionWindow;
+    private relayWindowUsed;
+    private relayWindowEndMs;
+    private lastSeenUtilization;
     constructor(config?: Partial<RateGuardConfig>);
     /** Record an upstream-imposed cooldown. Called after parsing a real 429. */
     triggerCooldown(untilMs: number, reason: string): void;
-    /** Update the 5h session window tracker from parsed upstream headers. */
+    /** Update the 5h session window tracker from parsed upstream headers.
+     *  Also accumulates relay's own utilization delta for quota enforcement. */
     setSessionWindow(window: SessionWindow): void;
     getSessionWindow(): SessionWindow | null;
     private currentMaxConcurrency;
     private rotateDailyCounterIfNeeded;
-    /** Check whether a new request would exceed the daily budget. */
+    /** Check whether a new request would exceed the daily budget or relay utilization cap. */
     checkBudget(): void;
     /** Check upstream-imposed cooldown. Throws RateGuardCooldownError if still cooling. */
     checkCooldown(): void;
@@ -93,6 +109,8 @@ export declare class RateGuard {
         cooldownUntilMs: number;
         cooldownReason: string;
         sessionWindow: SessionWindow | null;
+        relayWindowUsed: number;
+        maxRelayUtilization: number;
     };
     /**
      * Wrap an upstream call. Blocks until:

package/dist/relay/upstream/rate-guard.js CHANGED Viewed

@@ -30,6 +30,7 @@ export const DEFAULT_RATE_GUARD_CONFIG = {
     minRequestGapMs: 500,
     jitterMs: 1500,
     dailyBudgetUsd: 15,
+    maxRelayUtilization: 50,
 };
 export class RateGuardBudgetExceededError extends Error {
     constructor(spent, limit) {
@@ -37,6 +38,12 @@ export class RateGuardBudgetExceededError extends Error {
         this.name = "RateGuardBudgetExceededError";
     }
 }
+export class RateGuardRelayUtilizationExceededError extends Error {
+    constructor(used, limit, resetMins) {
+        super(`Relay utilization quota reached: ${used.toFixed(1)}% / ${limit}% of 5h window used by relay (resets in ${resetMins}min)`);
+        this.name = "RateGuardRelayUtilizationExceededError";
+    }
+}
 /**
  * Thrown when the rate-guard is in a hard cooldown after observing a real
  * upstream 429. The `untilMs` field is an absolute UNIX ms timestamp — after
@@ -66,6 +73,12 @@ export class RateGuard {
     cooldownReason = "";
     // Rolling 5h session window surfaced by Anthropic headers.
     sessionWindow = null;
+    // Relay utilization tracking — accumulated delta of session_window
+    // utilization across relay requests within the current 5h window.
+    // Resets when the window resets (endMs changes).
+    relayWindowUsed = 0; // accumulated relay % (0-100)
+    relayWindowEndMs = 0; // which window we're tracking
+    lastSeenUtilization = null; // for delta computation
     constructor(config = {}) {
         this.cfg = { ...DEFAULT_RATE_GUARD_CONFIG, ...config };
     }
@@ -81,11 +94,34 @@ export class RateGuard {
             logger.warn(`[rate-guard] cooldown engaged (${reason}): ${seconds}s until reset`);
         }
     }
-    /** Update the 5h session window tracker from parsed upstream headers. */
+    /** Update the 5h session window tracker from parsed upstream headers.
+     *  Also accumulates relay's own utilization delta for quota enforcement. */
     setSessionWindow(window) {
+        // Detect window reset — if endMs changed, we're in a new window.
+        if (window.endMs !== this.relayWindowEndMs) {
+            if (this.relayWindowEndMs > 0 && this.relayWindowUsed > 0) {
+                logger.info(`[rate-guard] relay window reset (previous relay_used=${this.relayWindowUsed.toFixed(1)}%)`);
+            }
+            this.relayWindowUsed = 0;
+            this.relayWindowEndMs = window.endMs;
+            this.lastSeenUtilization = null;
+        }
+        // Compute relay delta: how much utilization increased since last observation.
+        // This is called AFTER each relay request, so the delta is (approximately)
+        // the utilization cost of that one relay request. If the provider was also
+        // using the account directly during this request, the delta includes their
+        // usage too — accepted trade-off (see design discussion).
+        if (typeof window.utilization === "number" &&
+            this.lastSeenUtilization !== null) {
+            const delta = window.utilization - this.lastSeenUtilization;
+            if (delta > 0) {
+                this.relayWindowUsed += delta;
+            }
+        }
+        this.lastSeenUtilization = window.utilization ?? null;
         this.sessionWindow = window;
         const mins = Math.round((window.endMs - Date.now()) / 60_000);
-        logger.info(`[rate-guard] session window: ${window.utilization ?? "?"}% used, resets in ${mins}min (status=${window.status ?? "unknown"})`);
+        logger.info(`[rate-guard] session window: ${window.utilization ?? "?"}% used (relay_used=${this.relayWindowUsed.toFixed(1)}%/${this.cfg.maxRelayUtilization}%), resets in ${mins}min (status=${window.status ?? "unknown"})`);
     }
     getSessionWindow() {
         if (!this.sessionWindow)
@@ -114,12 +150,20 @@ export class RateGuard {
             this.dailySpentUsd = 0;
         }
     }
-    /** Check whether a new request would exceed the daily budget. */
+    /** Check whether a new request would exceed the daily budget or relay utilization cap. */
     checkBudget() {
         this.rotateDailyCounterIfNeeded();
         if (this.dailySpentUsd >= this.cfg.dailyBudgetUsd) {
             throw new RateGuardBudgetExceededError(this.dailySpentUsd, this.cfg.dailyBudgetUsd);
         }
+        // Check relay utilization cap against 5h window.
+        // Only enforce if we've seen at least one session window update
+        // (otherwise we don't know the utilization yet — fail open).
+        if (this.relayWindowEndMs > 0 &&
+            this.relayWindowUsed >= this.cfg.maxRelayUtilization) {
+            const resetMins = Math.max(0, Math.round((this.relayWindowEndMs - Date.now()) / 60_000));
+            throw new RateGuardRelayUtilizationExceededError(this.relayWindowUsed, this.cfg.maxRelayUtilization, resetMins);
+        }
     }
     /** Check upstream-imposed cooldown. Throws RateGuardCooldownError if still cooling. */
     checkCooldown() {
@@ -147,6 +191,8 @@ export class RateGuard {
             cooldownUntilMs: this.cooldownUntilMs,
             cooldownReason: this.cooldownReason,
             sessionWindow: this.getSessionWindow(),
+            relayWindowUsed: this.relayWindowUsed,
+            maxRelayUtilization: this.cfg.maxRelayUtilization,
         };
     }
     /**

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clawmoney",
-  "version": "0.15.68",
+  "version": "0.15.70",
   "description": "ClawMoney CLI -- Earn rewards with your AI agent",
   "type": "module",
   "bin": {