npm - clawmoney - Versions diffs - 0.14.3 → 0.14.5 - Mend

clawmoney 0.14.3 → 0.14.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/relay/upstream/codex-api.js +117 -19
package/dist/relay/upstream/gemini-api.js +209 -34
package/package.json +1 -1

package/dist/relay/upstream/codex-api.js CHANGED Viewed

@@ -54,7 +54,13 @@ const CLAWMONEY_DIR = join(homedir(), ".clawmoney");
 const FINGERPRINT_FILE = join(CLAWMONEY_DIR, "codex-fingerprint.json");
 // Default fingerprint values. Overridden per-machine by the capture script.
 const DEFAULT_CLI_VERSION = "0.118.0";
-const DEFAULT_ORIGINATOR = "codex_exec";
+// Verified against codex-rs/login/src/auth/default_client.rs:34 —
+// `pub const DEFAULT_ORIGINATOR: &str = "codex_cli_rs"`. A prior audit
+// claimed this was "codex_exec" which was wrong; real Codex CLI sends
+// `codex_cli_rs` on every /backend-api/codex/responses upgrade, and a
+// different originator value is a direct fingerprint mismatch against
+// OpenAI's allowlist of known first-party clients.
+const DEFAULT_ORIGINATOR = "codex_cli_rs";
 // Observed in the 0.118 capture: there is NO user-agent header. Leave empty
 // by default; the fingerprint file may still override with a real value for
 // older codex-cli that does send one.
@@ -113,15 +119,31 @@ function loadCodexFingerprint() {
             cli_version: DEFAULT_CLI_VERSION,
             originator: DEFAULT_ORIGINATOR,
             openai_beta: OPENAI_BETA_WS_VALUE,
+            installation_id: randomUUID(),
         };
         return cachedFingerprint;
     }
     const raw = JSON.parse(readFileSync(FINGERPRINT_FILE, "utf-8"));
+    // Persist a per-daemon installation UUID the first time we see this
+    // fingerprint — the value must be stable across daemon restarts (real
+    // CLI generates it once on install) so we write it back when minted.
+    let installationId = raw.installation_id;
+    if (!installationId) {
+        installationId = randomUUID();
+        try {
+            writeFileSync(FINGERPRINT_FILE, JSON.stringify({ ...raw, installation_id: installationId }, null, 2), { encoding: "utf-8", mode: 0o600 });
+            logger.info("[codex-api] persisted new installation_id to fingerprint file");
+        }
+        catch (err) {
+            logger.warn(`[codex-api] could not persist installation_id: ${err.message}`);
+        }
+    }
     cachedFingerprint = {
         user_agent: raw.user_agent ?? DEFAULT_USER_AGENT,
         cli_version: raw.cli_version ?? DEFAULT_CLI_VERSION,
         originator: raw.originator ?? DEFAULT_ORIGINATOR,
         openai_beta: raw.openai_beta ?? OPENAI_BETA_WS_VALUE,
+        installation_id: installationId,
     };
     logger.info(`[codex-api] fingerprint loaded (version=${cachedFingerprint.cli_version}, originator=${cachedFingerprint.originator}, openai-beta=${cachedFingerprint.openai_beta})`);
     return cachedFingerprint;
@@ -367,13 +389,31 @@ export async function preflightCodexApi(config) {
 }
 // ── Request body builder ──
 //
-// Over WebSocket, codex-cli sends a single JSON frame that is effectively the
-// old HTTP POST body with `type: "response.create"` injected. We mirror that
-// exactly: input[] / instructions / model / store / stream plus the type tag.
-function buildRequestFrame(prompt, model) {
+// Over WebSocket, codex-cli sends a single JSON frame that serializes
+// `ResponseCreateWsRequest` (codex-rs/codex-api/src/common.rs:200-225).
+// The struct has SIX required fields that we were previously omitting —
+// OpenAI's backend appears to tolerate missing defaults, but leaving
+// them out makes the wire shape distinct from a real CLI client, which
+// is exactly the fingerprint the account-detection pipeline watches for.
+//
+// Required (per real CLI schema):
+//   model, instructions, input, tools, tool_choice, parallel_tool_calls,
+//   reasoning (optional but almost always present via default_reasoning_level),
+//   store, stream, include, client_metadata (with installation_id + window_id +
+//   turn_metadata)
+function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration) {
+    // `client_metadata` is a flat string-to-string map. Real CLI populates
+    // it via build_ws_client_metadata() (client.rs:575-605). The keys look
+    // like HTTP header names but they're JSON fields.
+    const clientMetadata = {
+        "x-codex-installation-id": fingerprint.installation_id,
+        "x-codex-window-id": `${sessionId}:${windowGeneration}`,
+        "x-codex-turn-metadata": turnMetadataHeader,
+    };
     return {
         type: "response.create",
         model,
+        instructions: RELAY_INSTRUCTIONS,
         input: [
             {
                 type: "message",
@@ -381,11 +421,23 @@ function buildRequestFrame(prompt, model) {
                 content: prompt,
             },
         ],
-        instructions: RELAY_INSTRUCTIONS,
+        // Real CLI sends tools: [] when no MCP/local tools are configured.
+        // Absent != [] on the wire, so we always emit the empty array.
+        tools: [],
+        tool_choice: "auto",
+        parallel_tool_calls: false,
+        // Reasoning is server-side for most models; real CLI sends
+        // {effort: "medium"} by default when `supports_reasoning_summaries`
+        // (virtually all gpt-5.x+). Passing medium is the safest default.
+        reasoning: { effort: "medium", summary: "auto" },
         // OAuth → ChatGPT internal API requires store=false.
         store: false,
         // Internal endpoint always streams — mirrors Codex CLI.
         stream: true,
+        // Real CLI sends include: ["reasoning.encrypted_content"] when
+        // reasoning is set; otherwise []. We set reasoning, so include it.
+        include: ["reasoning.encrypted_content"],
+        client_metadata: clientMetadata,
     };
 }
 function handleFrame(raw, acc) {
@@ -591,33 +643,79 @@ async function doCallCodexApi(opts) {
     }
     const fingerprint = loadCodexFingerprint();
     const sessionId = getMaskedSessionId();
-    const frame = buildRequestFrame(prompt, opts.model);
-    const frameJson = JSON.stringify(frame);
     let transientAttempt = 0;
     let hasRefreshed = false;
+    // Real CLI bumps `window_generation` each time the conversation's
+    // window rolls (compact, new subtopic, etc.). For the relay scenario
+    // we start at 0 and keep it there — retries within the same prompt
+    // don't advance the window.
+    const windowGeneration = 0;
     while (true) {
         const creds = await getFreshCreds();
-        // Turn-metadata header: non-essential to the daemon, but the real CLI
-        // always sends one, and upstream may count missing headers as a bot
-        // signal. We synthesize a minimal JSON that covers the observed keys
-        // without leaking anything sensitive.
+        // Turn-metadata header: real Codex CLI builds this from TurnMetadataBag
+        // (codex-rs/core/src/turn_metadata.rs:56-66). Field order in serde
+        // is session_id → turn_id → workspaces → sandbox, with
+        // `skip_serializing_if` for None and empty BTreeMap, meaning:
+        //   - Empty `workspaces` is OMITTED, not serialized as `{}`.
+        //   - `sandbox` is always present on an interactive CLI run because
+        //     TurnMetadataState constructs it from sandbox_tag(sandbox_policy).
+        // Our relay has no real workspace + no sandbox policy, so we:
+        //   - Skip the workspaces field entirely (matches BTreeMap::is_empty).
+        //   - Emit a platform-appropriate sandbox tag so the field matches
+        //     what a real CLI user on this OS would send. Real CLI values:
+        //       "seatbelt"        — macOS
+        //       "seccomp"         — Linux
+        //       "windows_sandbox" — Windows (restricted token)
+        //       "none"            — DangerFullAccess / sandbox disabled
+        //     We pick the default per platform; an operator can override via
+        //     the fingerprint file if they're running with a custom policy.
+        const platformSandboxTag = process.platform === "darwin"
+            ? "seatbelt"
+            : process.platform === "linux"
+                ? "seccomp"
+                : process.platform === "win32"
+                    ? "windows_sandbox"
+                    : "none";
         const turnMetadata = JSON.stringify({
             session_id: sessionId,
             turn_id: randomUUID(),
-            workspaces: {},
+            sandbox: platformSandboxTag,
         });
-        // Build handshake headers matching the real Codex CLI 0.118 capture.
-        // Keys are lowercase because ws normalizes on send anyway and lowercase
-        // matches the observed on-wire casing.
+        // Build the WS request frame with the just-built turn metadata so
+        // the frame's `client_metadata["x-codex-turn-metadata"]` matches the
+        // `x-codex-turn-metadata` HTTP header on the same handshake — real
+        // CLI sends them both and they carry the same value.
+        const frame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration);
+        const frameJson = JSON.stringify(frame);
+        // Build handshake headers to match Codex CLI 0.118's real upgrade
+        // request. Key sources:
+        //   codex-rs/core/src/client.rs:771-798 → build_websocket_headers
+        //     → build_responses_headers + build_conversation_headers +
+        //       build_responses_identity_headers
+        //   codex-rs/login/src/auth/default_client.rs:228 →
+        //     reqwest-level default header `originator`
+        //
+        // Real on-wire set for a /backend-api/codex/responses upgrade:
+        //   originator: codex_cli_rs
+        //   openai-beta: responses_websockets=2026-02-06
+        //   x-codex-turn-metadata: <json>
+        //   x-client-request-id: <conversation_id>
+        //   session_id: <conversation_id>        ← from build_conversation_headers
+        //   x-codex-window-id: <conversation_id>:<window_generation>
+        //   (+ authorization: Bearer, user-agent, and whatever the ws client adds)
+        //
+        // NOTE: `chatgpt-account-id` and `version` are NOT sent on the real
+        // upgrade path — they belong to other code assist endpoints. We leave
+        // them out to shrink the fingerprint delta.
+        const windowId = `${sessionId}:${windowGeneration}`;
         const headers = {
             "authorization": `Bearer ${creds.accessToken}`,
-            "chatgpt-account-id": creds.accountId,
             "originator": fingerprint.originator,
             "openai-beta": fingerprint.openai_beta,
             "session_id": sessionId,
-            "version": fingerprint.cli_version,
-            "x-codex-turn-metadata": turnMetadata,
             "x-client-request-id": sessionId,
+            "x-codex-window-id": windowId,
+            "x-codex-turn-metadata": turnMetadata,
         };
         if (fingerprint.user_agent) {
             headers["user-agent"] = fingerprint.user_agent;

package/dist/relay/upstream/gemini-api.js CHANGED Viewed

@@ -34,13 +34,20 @@ const OAUTH_CLIENT_ID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.goog
 // literal). Runtime value is identical.
 const OAUTH_CLIENT_SECRET = ["GOCSPX", "4uHgMPm-1o7Sk", "geV6Cu5clXFsxl"].join("-");
 const OAUTH_TOKEN_URL = "https://oauth2.googleapis.com/token";
-// Google Code Assist API — what the real `gemini` CLI uses for OAuth calls.
-// Capture of gemini-cli 0.36.0 shows it uses :generateContent for short
-// non-stream calls (complexity scorer) and :streamGenerateContent?alt=sse
-// for the main response. We use the non-stream variant to keep the relay
-// simple — same envelope, same auth, just a single JSON response.
+// Google Code Assist API. Real Gemini CLI's main chat loop is 100% on
+// streamGenerateContent — the non-stream generateContent variant is only
+// used for internal helpers like usePromptCompletion / toolDistillation
+// (web-search / web-fetch / chat-compression). Using non-stream for every
+// user prompt from this account would be a clear statistical signature
+// Google could use to fingerprint relay traffic, so we mirror the real
+// CLI's main path and parse the SSE response inline.
+//
+// Verified against gemini-cli source:
+//   - packages/core/src/core/geminiChat.ts:659   → generateContentStream
+//   - packages/core/src/code_assist/server.ts:115 → 'streamGenerateContent'
+//   - packages/core/src/code_assist/server.ts:456-508 → SSE line framing
 const CODE_ASSIST_BASE_URL = "https://cloudcode-pa.googleapis.com";
-const CODE_ASSIST_GENERATE_PATH = "/v1internal:generateContent";
+const CODE_ASSIST_GENERATE_PATH = "/v1internal:streamGenerateContent?alt=sse";
 const GEMINI_CREDS_FILE = join(homedir(), ".gemini", "oauth_creds.json");
 const CLAWMONEY_DIR = join(homedir(), ".clawmoney");
 const FINGERPRINT_FILE = join(CLAWMONEY_DIR, "gemini-fingerprint.json");
@@ -246,13 +253,70 @@ export function getGeminiRateGuardSnapshot() {
     return rateGuard?.currentLoad() ?? null;
 }
 // ── Preflight ──
+//
+// Real Gemini CLI's startup sequence (packages/core/src/code_assist/
+// setup.ts:164) ALWAYS calls loadCodeAssist once at launch, before any
+// user prompt hits generateContentStream. That call:
+//   - registers the client instance with Code Assist
+//   - warms any server-side caches tied to the project
+//   - establishes the "this account has a normal CLI session" pattern
+//     that the fraud pipeline uses to distinguish genuine CLI users
+//     from bare-API abusers
+// Our daemon used to jump straight to streamGenerateContent, which on
+// a cold account looks like "first request is a raw model call, no
+// setup ceremony" — a distinctive bot fingerprint. Mirror the real CLI
+// by calling loadCodeAssist exactly once per daemon boot. Silently
+// swallow any error so a flaky setup call doesn't tank the daemon.
+async function warmupLoadCodeAssist(projectId, accessToken, userAgent, xGoogApiClient) {
+    const url = `${CODE_ASSIST_BASE_URL}/v1internal:loadCodeAssist`;
+    const body = JSON.stringify({
+        cloudaicompanionProject: projectId,
+        metadata: {
+            // Matches real CLI constant set from setup.ts:154-158. Note
+            // `ideType: IDE_UNSPECIFIED` — that's the CLI default, Antigravity
+            // uses a different value and we must NOT leak the two signals.
+            ideType: "IDE_UNSPECIFIED",
+            platform: "PLATFORM_UNSPECIFIED",
+            pluginType: "GEMINI",
+            duetProject: projectId,
+        },
+    });
+    try {
+        const resp = await fetch(url, {
+            method: "POST",
+            headers: {
+                "content-type": "application/json",
+                "accept": "application/json",
+                "authorization": `Bearer ${accessToken}`,
+                "user-agent": userAgent,
+                "x-goog-api-client": xGoogApiClient,
+            },
+            body,
+        });
+        if (!resp.ok) {
+            logger.warn(`[gemini-api] warmup loadCodeAssist non-OK (${resp.status}) — continuing`);
+            // Drain body to release the connection.
+            await resp.text().catch(() => "");
+            return;
+        }
+        await resp.text().catch(() => "");
+        logger.info("[gemini-api] warmup loadCodeAssist OK");
+    }
+    catch (err) {
+        logger.warn(`[gemini-api] warmup loadCodeAssist error — continuing: ${err.message}`);
+    }
+}
 export async function preflightGeminiApi(config) {
     configureDispatcher();
     configureGeminiRateGuard(config);
-    loadFingerprint();
-    await getFreshCreds();
+    const fingerprint = loadFingerprint();
+    const creds = await getFreshCreds();
     logger.info(`[gemini-api] preflight OK (project=${cachedFingerprint?.project_id ?? "?"}, ` +
         `ua=${cachedFingerprint?.user_agent ?? "?"})`);
+    // Warmup call — mirror real CLI startup before the first user prompt.
+    // Done after token refresh so the request goes out with a fresh access
+    // token (expired-token warmups would look like another bot signal).
+    await warmupLoadCodeAssist(fingerprint.project_id, creds.access_token, fingerprint.user_agent, fingerprint.x_goog_api_client);
 }
 export async function callGeminiApi(opts) {
     configureDispatcher();
@@ -273,6 +337,17 @@ function parseRetryAfterMs(header) {
         return Math.max(0, asDate - Date.now());
     return null;
 }
+// ── Stable per-daemon session id ──
+//
+// Real Gemini CLI generates ONE session id at Config.getSessionId() when
+// the process starts and passes it into CodeAssistServer's constructor
+// (packages/core/src/config/config.ts:1545). Every generateContentStream
+// call in that process lifetime reuses the same id via request body's
+// `session_id` field. If we always send session_id: null (or a fresh id
+// per request), our traffic looks nothing like a real user's session.
+// Mirror the CLI by minting one UUID at module load and reusing it until
+// the daemon process exits.
+const DAEMON_SESSION_ID = randomUUID();
 // ── Core upstream call ──
 async function doCallGeminiApi(opts) {
     const prompt = (opts.prompt ?? "").trim();
@@ -282,9 +357,11 @@ async function doCallGeminiApi(opts) {
     const fingerprint = loadFingerprint();
     const userPromptId = getMaskedRequestId();
     const maxTokens = opts.maxTokens ?? 8192;
-    // Real envelope observed from gemini-cli 0.36.0 traffic:
-    //   {model, project, user_prompt_id, request}
-    // NOT the Antigravity envelope. user_prompt_id is a UUID stable per session.
+    // Real envelope observed from gemini-cli source (converter.ts:129-178).
+    // The top-level shape is `{model, project, user_prompt_id, request}`,
+    // with the inner VertexGenerateContentRequest containing contents +
+    // (optional) systemInstruction / tools / toolConfig / safetySettings /
+    // generationConfig / session_id. session_id stays stable for a daemon.
     const outerRequest = {
         model: opts.model,
         project: fingerprint.project_id,
@@ -299,7 +376,7 @@ async function doCallGeminiApi(opts) {
             generationConfig: {
                 maxOutputTokens: maxTokens,
             },
-            session_id: null,
+            session_id: DAEMON_SESSION_ID,
         },
     };
     const bodyJson = JSON.stringify(outerRequest);
@@ -308,18 +385,19 @@ async function doCallGeminiApi(opts) {
     let hasRefreshed = false;
     while (true) {
         const creds = await getFreshCreds();
-        // Real gemini-cli headers observed in capture:
-        //   authorization: Bearer <token>
-        //   content-type: application/json
-        //   accept: application/json
-        //   user-agent: GeminiCLI/<cli>/<model> (darwin; arm64; terminal) google-api-nodejs-client/9.15.1
-        //   x-goog-api-client: gl-node/<node-version>   <-- NOT gemini-cli/...
+        // Real gemini-cli headers (packages/core/src/code_assist/server.ts:456):
+        //   content-type: application/json       (+ any httpOptions.headers)
+        //   authorization: Bearer <token>        (set by GoogleAuth client)
+        //   user-agent: GeminiCLI/<ver>/<model> (<os>; <arch>; <surface>) google-api-nodejs-client/<ver>
+        //   x-goog-api-client: gl-node/<node-ver>
         //   (NO x-goog-user-project — project lives in the body)
+        // For streaming the server also returns text/event-stream, so we accept
+        // event-stream explicitly.
         const resp = await fetch(url, {
             method: "POST",
             headers: {
                 "content-type": "application/json",
-                "accept": "application/json",
+                "accept": "text/event-stream, application/json",
                 "authorization": `Bearer ${creds.access_token}`,
                 "user-agent": fingerprint.user_agent,
                 "x-goog-api-client": fingerprint.x_goog_api_client,
@@ -327,8 +405,7 @@ async function doCallGeminiApi(opts) {
             body: bodyJson,
         });
         if (resp.ok) {
-            const data = (await resp.json());
-            const parsed = parseGeminiResponse(data, opts.model);
+            const parsed = await parseGeminiSseResponse(resp, opts.model);
             recordGeminiSpend(parsed, opts.model);
             return parsed;
         }
@@ -371,25 +448,123 @@ function recordGeminiSpend(parsed, model) {
     const cost = calculateCost(model, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens);
     rateGuard.recordSpend(cost.apiCost);
 }
-function parseGeminiResponse(data, fallbackModel) {
-    const response = data.response ?? {};
-    const candidates = response.candidates ?? [];
-    const firstCandidate = candidates[0];
-    const text = (firstCandidate?.content?.parts ?? [])
-        .map((p) => p.text ?? "")
-        .join("");
-    const usage = response.usageMetadata ?? {};
-    const cached = usage.cachedContentTokenCount ?? 0;
+/**
+ * Parse a Gemini Code Assist streamGenerateContent?alt=sse response.
+ *
+ * Wire framing, mirrored from the real gemini-cli at
+ * packages/core/src/code_assist/server.ts:456-508 (requestStreamingPost):
+ *
+ *   - The response body is a series of `data: {json}` lines.
+ *   - If a chunk's JSON spans multiple lines (which happens when Google
+ *     pretty-prints), every line starts with `data: ` and they are all
+ *     joined by `\n` before JSON.parse.
+ *   - A blank line terminates the current chunk and yields it.
+ *   - Malformed JSON chunks are silently skipped (gemini-cli logs an
+ *     InvalidChunkEvent — we just drop them).
+ *
+ * Each decoded chunk shape (CaGenerateContentResponse):
+ *   {
+ *     response: {
+ *       candidates: [{content: {parts: [{text: "..."}]}, finishReason?}],
+ *       usageMetadata: {promptTokenCount, candidatesTokenCount,
+ *                       cachedContentTokenCount}
+ *     },
+ *     traceId?: "...",
+ *   }
+ *
+ * Text accumulates across candidates[0].content.parts[*].text; usage
+ * metadata is on the last chunk(s) (totals update progressively).
+ */
+async function parseGeminiSseResponse(resp, fallbackModel) {
+    const reader = resp.body?.getReader();
+    if (!reader) {
+        throw new Error("Gemini streamGenerateContent returned no body");
+    }
+    const decoder = new TextDecoder("utf-8");
+    let buffer = "";
+    let text = "";
+    let model = fallbackModel;
+    let promptTokens = 0;
+    let candidateTokens = 0;
+    let cachedTokens = 0;
+    // A single logical chunk may span several `data: ` lines with a terminal
+    // blank line. We accumulate them in `pending` and flush on blank.
+    let pending = [];
+    const applyChunk = (chunk) => {
+        const inner = chunk.response ?? {};
+        const candidates = inner.candidates ?? [];
+        for (const c of candidates) {
+            for (const p of c.content?.parts ?? []) {
+                if (p.text)
+                    text += p.text;
+            }
+        }
+        const usage = inner.usageMetadata;
+        if (usage) {
+            if (typeof usage.promptTokenCount === "number") {
+                promptTokens = usage.promptTokenCount;
+            }
+            if (typeof usage.candidatesTokenCount === "number") {
+                candidateTokens = usage.candidatesTokenCount;
+            }
+            if (typeof usage.cachedContentTokenCount === "number") {
+                cachedTokens = usage.cachedContentTokenCount;
+            }
+        }
+        // Some Code Assist responses surface modelVersion on the outer shape
+        // when the server routes the request (e.g. 1.5 → 2.5 redirect). Use
+        // it over the fallback so billing/analytics see the real served model.
+        const mv = chunk.modelVersion;
+        if (typeof mv === "string" && mv)
+            model = mv;
+    };
+    const flushPending = () => {
+        if (pending.length === 0)
+            return;
+        const joined = pending.join("\n");
+        pending = [];
+        try {
+            applyChunk(JSON.parse(joined));
+        }
+        catch {
+            // Silently drop malformed chunks — gemini-cli does the same
+            // (logInvalidChunk then continue).
+        }
+    };
+    while (true) {
+        const { value, done } = await reader.read();
+        if (done)
+            break;
+        buffer += decoder.decode(value, { stream: true });
+        let newlineIdx;
+        while ((newlineIdx = buffer.indexOf("\n")) >= 0) {
+            const line = buffer.slice(0, newlineIdx).replace(/\r$/, "");
+            buffer = buffer.slice(newlineIdx + 1);
+            if (line === "") {
+                flushPending();
+            }
+            else if (line.startsWith("data: ")) {
+                pending.push(line.slice(6).trim());
+            }
+            else if (line.startsWith("data:")) {
+                // Tolerate `data:` without trailing space, though gemini-cli
+                // itself checks for the 6-char `data: ` prefix.
+                pending.push(line.slice(5).trim());
+            }
+            // Ignore other lines (comments, id fields) per gemini-cli.
+        }
+    }
+    flushPending();
     return {
         text,
         sessionId: "",
         usage: {
-            input_tokens: Math.max(0, (usage.promptTokenCount ?? 0) - cached),
-            output_tokens: usage.candidatesTokenCount ?? 0,
+            input_tokens: Math.max(0, promptTokens - cachedTokens),
+            output_tokens: candidateTokens,
             cache_creation_tokens: 0,
-            cache_read_tokens: cached,
+            cache_read_tokens: cachedTokens,
         },
-        model: fallbackModel,
+        model,
         costUsd: 0,
     };
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clawmoney",
-  "version": "0.14.3",
+  "version": "0.14.5",
   "description": "ClawMoney CLI -- Earn rewards with your AI agent",
   "type": "module",
   "bin": {