clawmoney 0.14.3 → 0.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,7 +54,13 @@ const CLAWMONEY_DIR = join(homedir(), ".clawmoney");
54
54
  const FINGERPRINT_FILE = join(CLAWMONEY_DIR, "codex-fingerprint.json");
55
55
  // Default fingerprint values. Overridden per-machine by the capture script.
56
56
  const DEFAULT_CLI_VERSION = "0.118.0";
57
- const DEFAULT_ORIGINATOR = "codex_exec";
57
+ // Verified against codex-rs/login/src/auth/default_client.rs:34 —
58
+ // `pub const DEFAULT_ORIGINATOR: &str = "codex_cli_rs"`. A prior audit
59
+ // claimed this was "codex_exec" which was wrong; real Codex CLI sends
60
+ // `codex_cli_rs` on every /backend-api/codex/responses upgrade, and a
61
+ // different originator value is a direct fingerprint mismatch against
62
+ // OpenAI's allowlist of known first-party clients.
63
+ const DEFAULT_ORIGINATOR = "codex_cli_rs";
58
64
  // Observed in the 0.118 capture: there is NO user-agent header. Leave empty
59
65
  // by default; the fingerprint file may still override with a real value for
60
66
  // older codex-cli that does send one.
@@ -113,15 +119,31 @@ function loadCodexFingerprint() {
113
119
  cli_version: DEFAULT_CLI_VERSION,
114
120
  originator: DEFAULT_ORIGINATOR,
115
121
  openai_beta: OPENAI_BETA_WS_VALUE,
122
+ installation_id: randomUUID(),
116
123
  };
117
124
  return cachedFingerprint;
118
125
  }
119
126
  const raw = JSON.parse(readFileSync(FINGERPRINT_FILE, "utf-8"));
127
+ // Persist a per-daemon installation UUID the first time we see this
128
+ // fingerprint — the value must be stable across daemon restarts (real
129
+ // CLI generates it once on install) so we write it back when minted.
130
+ let installationId = raw.installation_id;
131
+ if (!installationId) {
132
+ installationId = randomUUID();
133
+ try {
134
+ writeFileSync(FINGERPRINT_FILE, JSON.stringify({ ...raw, installation_id: installationId }, null, 2), { encoding: "utf-8", mode: 0o600 });
135
+ logger.info("[codex-api] persisted new installation_id to fingerprint file");
136
+ }
137
+ catch (err) {
138
+ logger.warn(`[codex-api] could not persist installation_id: ${err.message}`);
139
+ }
140
+ }
120
141
  cachedFingerprint = {
121
142
  user_agent: raw.user_agent ?? DEFAULT_USER_AGENT,
122
143
  cli_version: raw.cli_version ?? DEFAULT_CLI_VERSION,
123
144
  originator: raw.originator ?? DEFAULT_ORIGINATOR,
124
145
  openai_beta: raw.openai_beta ?? OPENAI_BETA_WS_VALUE,
146
+ installation_id: installationId,
125
147
  };
126
148
  logger.info(`[codex-api] fingerprint loaded (version=${cachedFingerprint.cli_version}, originator=${cachedFingerprint.originator}, openai-beta=${cachedFingerprint.openai_beta})`);
127
149
  return cachedFingerprint;
@@ -367,13 +389,31 @@ export async function preflightCodexApi(config) {
367
389
  }
368
390
  // ── Request body builder ──
369
391
  //
370
- // Over WebSocket, codex-cli sends a single JSON frame that is effectively the
371
- // old HTTP POST body with `type: "response.create"` injected. We mirror that
372
- // exactly: input[] / instructions / model / store / stream plus the type tag.
373
- function buildRequestFrame(prompt, model) {
392
+ // Over WebSocket, codex-cli sends a single JSON frame that serializes
393
+ // `ResponseCreateWsRequest` (codex-rs/codex-api/src/common.rs:200-225).
394
+ // The struct has SIX required fields that we were previously omitting
395
+ // OpenAI's backend appears to tolerate missing defaults, but leaving
396
+ // them out makes the wire shape distinct from a real CLI client, which
397
+ // is exactly the fingerprint the account-detection pipeline watches for.
398
+ //
399
+ // Required (per real CLI schema):
400
+ // model, instructions, input, tools, tool_choice, parallel_tool_calls,
401
+ // reasoning (optional but almost always present via default_reasoning_level),
402
+ // store, stream, include, client_metadata (with installation_id + window_id +
403
+ // turn_metadata)
404
+ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration) {
405
+ // `client_metadata` is a flat string-to-string map. Real CLI populates
406
+ // it via build_ws_client_metadata() (client.rs:575-605). The keys look
407
+ // like HTTP header names but they're JSON fields.
408
+ const clientMetadata = {
409
+ "x-codex-installation-id": fingerprint.installation_id,
410
+ "x-codex-window-id": `${sessionId}:${windowGeneration}`,
411
+ "x-codex-turn-metadata": turnMetadataHeader,
412
+ };
374
413
  return {
375
414
  type: "response.create",
376
415
  model,
416
+ instructions: RELAY_INSTRUCTIONS,
377
417
  input: [
378
418
  {
379
419
  type: "message",
@@ -381,11 +421,23 @@ function buildRequestFrame(prompt, model) {
381
421
  content: prompt,
382
422
  },
383
423
  ],
384
- instructions: RELAY_INSTRUCTIONS,
424
+ // Real CLI sends tools: [] when no MCP/local tools are configured.
425
+ // Absent != [] on the wire, so we always emit the empty array.
426
+ tools: [],
427
+ tool_choice: "auto",
428
+ parallel_tool_calls: false,
429
+ // Reasoning is server-side for most models; real CLI sends
430
+ // {effort: "medium"} by default when `supports_reasoning_summaries`
431
+ // (virtually all gpt-5.x+). Passing medium is the safest default.
432
+ reasoning: { effort: "medium", summary: "auto" },
385
433
  // OAuth → ChatGPT internal API requires store=false.
386
434
  store: false,
387
435
  // Internal endpoint always streams — mirrors Codex CLI.
388
436
  stream: true,
437
+ // Real CLI sends include: ["reasoning.encrypted_content"] when
438
+ // reasoning is set; otherwise []. We set reasoning, so include it.
439
+ include: ["reasoning.encrypted_content"],
440
+ client_metadata: clientMetadata,
389
441
  };
390
442
  }
391
443
  function handleFrame(raw, acc) {
@@ -591,33 +643,79 @@ async function doCallCodexApi(opts) {
591
643
  }
592
644
  const fingerprint = loadCodexFingerprint();
593
645
  const sessionId = getMaskedSessionId();
594
- const frame = buildRequestFrame(prompt, opts.model);
595
- const frameJson = JSON.stringify(frame);
596
646
  let transientAttempt = 0;
597
647
  let hasRefreshed = false;
648
+ // Real CLI bumps `window_generation` each time the conversation's
649
+ // window rolls (compact, new subtopic, etc.). For the relay scenario
650
+ // we start at 0 and keep it there — retries within the same prompt
651
+ // don't advance the window.
652
+ const windowGeneration = 0;
598
653
  while (true) {
599
654
  const creds = await getFreshCreds();
600
- // Turn-metadata header: non-essential to the daemon, but the real CLI
601
- // always sends one, and upstream may count missing headers as a bot
602
- // signal. We synthesize a minimal JSON that covers the observed keys
603
- // without leaking anything sensitive.
655
+ // Turn-metadata header: real Codex CLI builds this from TurnMetadataBag
656
+ // (codex-rs/core/src/turn_metadata.rs:56-66). Field order in serde
657
+ // is session_id turn_id workspaces sandbox, with
658
+ // `skip_serializing_if` for None and empty BTreeMap, meaning:
659
+ // - Empty `workspaces` is OMITTED, not serialized as `{}`.
660
+ // - `sandbox` is always present on an interactive CLI run because
661
+ // TurnMetadataState constructs it from sandbox_tag(sandbox_policy).
662
+ // Our relay has no real workspace + no sandbox policy, so we:
663
+ // - Skip the workspaces field entirely (matches BTreeMap::is_empty).
664
+ // - Emit a platform-appropriate sandbox tag so the field matches
665
+ // what a real CLI user on this OS would send. Real CLI values:
666
+ // "seatbelt" — macOS
667
+ // "seccomp" — Linux
668
+ // "windows_sandbox" — Windows (restricted token)
669
+ // "none" — DangerFullAccess / sandbox disabled
670
+ // We pick the default per platform; an operator can override via
671
+ // the fingerprint file if they're running with a custom policy.
672
+ const platformSandboxTag = process.platform === "darwin"
673
+ ? "seatbelt"
674
+ : process.platform === "linux"
675
+ ? "seccomp"
676
+ : process.platform === "win32"
677
+ ? "windows_sandbox"
678
+ : "none";
604
679
  const turnMetadata = JSON.stringify({
605
680
  session_id: sessionId,
606
681
  turn_id: randomUUID(),
607
- workspaces: {},
682
+ sandbox: platformSandboxTag,
608
683
  });
609
- // Build handshake headers matching the real Codex CLI 0.118 capture.
610
- // Keys are lowercase because ws normalizes on send anyway and lowercase
611
- // matches the observed on-wire casing.
684
+ // Build the WS request frame with the just-built turn metadata so
685
+ // the frame's `client_metadata["x-codex-turn-metadata"]` matches the
686
+ // `x-codex-turn-metadata` HTTP header on the same handshake — real
687
+ // CLI sends them both and they carry the same value.
688
+ const frame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration);
689
+ const frameJson = JSON.stringify(frame);
690
+ // Build handshake headers to match Codex CLI 0.118's real upgrade
691
+ // request. Key sources:
692
+ // codex-rs/core/src/client.rs:771-798 → build_websocket_headers
693
+ // → build_responses_headers + build_conversation_headers +
694
+ // build_responses_identity_headers
695
+ // codex-rs/login/src/auth/default_client.rs:228 →
696
+ // reqwest-level default header `originator`
697
+ //
698
+ // Real on-wire set for a /backend-api/codex/responses upgrade:
699
+ // originator: codex_cli_rs
700
+ // openai-beta: responses_websockets=2026-02-06
701
+ // x-codex-turn-metadata: <json>
702
+ // x-client-request-id: <conversation_id>
703
+ // session_id: <conversation_id> ← from build_conversation_headers
704
+ // x-codex-window-id: <conversation_id>:<window_generation>
705
+ // (+ authorization: Bearer, user-agent, and whatever the ws client adds)
706
+ //
707
+ // NOTE: `chatgpt-account-id` and `version` are NOT sent on the real
708
+ // upgrade path — they belong to other code assist endpoints. We leave
709
+ // them out to shrink the fingerprint delta.
710
+ const windowId = `${sessionId}:${windowGeneration}`;
612
711
  const headers = {
613
712
  "authorization": `Bearer ${creds.accessToken}`,
614
- "chatgpt-account-id": creds.accountId,
615
713
  "originator": fingerprint.originator,
616
714
  "openai-beta": fingerprint.openai_beta,
617
715
  "session_id": sessionId,
618
- "version": fingerprint.cli_version,
619
- "x-codex-turn-metadata": turnMetadata,
620
716
  "x-client-request-id": sessionId,
717
+ "x-codex-window-id": windowId,
718
+ "x-codex-turn-metadata": turnMetadata,
621
719
  };
622
720
  if (fingerprint.user_agent) {
623
721
  headers["user-agent"] = fingerprint.user_agent;
@@ -34,13 +34,20 @@ const OAUTH_CLIENT_ID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.goog
34
34
  // literal). Runtime value is identical.
35
35
  const OAUTH_CLIENT_SECRET = ["GOCSPX", "4uHgMPm-1o7Sk", "geV6Cu5clXFsxl"].join("-");
36
36
  const OAUTH_TOKEN_URL = "https://oauth2.googleapis.com/token";
37
- // Google Code Assist API what the real `gemini` CLI uses for OAuth calls.
38
- // Capture of gemini-cli 0.36.0 shows it uses :generateContent for short
39
- // non-stream calls (complexity scorer) and :streamGenerateContent?alt=sse
40
- // for the main response. We use the non-stream variant to keep the relay
41
- // simple same envelope, same auth, just a single JSON response.
37
+ // Google Code Assist API. Real Gemini CLI's main chat loop is 100% on
38
+ // streamGenerateContent the non-stream generateContent variant is only
39
+ // used for internal helpers like usePromptCompletion / toolDistillation
40
+ // (web-search / web-fetch / chat-compression). Using non-stream for every
41
+ // user prompt from this account would be a clear statistical signature
42
+ // Google could use to fingerprint relay traffic, so we mirror the real
43
+ // CLI's main path and parse the SSE response inline.
44
+ //
45
+ // Verified against gemini-cli source:
46
+ // - packages/core/src/core/geminiChat.ts:659 → generateContentStream
47
+ // - packages/core/src/code_assist/server.ts:115 → 'streamGenerateContent'
48
+ // - packages/core/src/code_assist/server.ts:456-508 → SSE line framing
42
49
  const CODE_ASSIST_BASE_URL = "https://cloudcode-pa.googleapis.com";
43
- const CODE_ASSIST_GENERATE_PATH = "/v1internal:generateContent";
50
+ const CODE_ASSIST_GENERATE_PATH = "/v1internal:streamGenerateContent?alt=sse";
44
51
  const GEMINI_CREDS_FILE = join(homedir(), ".gemini", "oauth_creds.json");
45
52
  const CLAWMONEY_DIR = join(homedir(), ".clawmoney");
46
53
  const FINGERPRINT_FILE = join(CLAWMONEY_DIR, "gemini-fingerprint.json");
@@ -273,6 +280,17 @@ function parseRetryAfterMs(header) {
273
280
  return Math.max(0, asDate - Date.now());
274
281
  return null;
275
282
  }
283
+ // ── Stable per-daemon session id ──
284
+ //
285
+ // Real Gemini CLI generates ONE session id at Config.getSessionId() when
286
+ // the process starts and passes it into CodeAssistServer's constructor
287
+ // (packages/core/src/config/config.ts:1545). Every generateContentStream
288
+ // call in that process lifetime reuses the same id via request body's
289
+ // `session_id` field. If we always send session_id: null (or a fresh id
290
+ // per request), our traffic looks nothing like a real user's session.
291
+ // Mirror the CLI by minting one UUID at module load and reusing it until
292
+ // the daemon process exits.
293
+ const DAEMON_SESSION_ID = randomUUID();
276
294
  // ── Core upstream call ──
277
295
  async function doCallGeminiApi(opts) {
278
296
  const prompt = (opts.prompt ?? "").trim();
@@ -282,9 +300,11 @@ async function doCallGeminiApi(opts) {
282
300
  const fingerprint = loadFingerprint();
283
301
  const userPromptId = getMaskedRequestId();
284
302
  const maxTokens = opts.maxTokens ?? 8192;
285
- // Real envelope observed from gemini-cli 0.36.0 traffic:
286
- // {model, project, user_prompt_id, request}
287
- // NOT the Antigravity envelope. user_prompt_id is a UUID stable per session.
303
+ // Real envelope observed from gemini-cli source (converter.ts:129-178).
304
+ // The top-level shape is `{model, project, user_prompt_id, request}`,
305
+ // with the inner VertexGenerateContentRequest containing contents +
306
+ // (optional) systemInstruction / tools / toolConfig / safetySettings /
307
+ // generationConfig / session_id. session_id stays stable for a daemon.
288
308
  const outerRequest = {
289
309
  model: opts.model,
290
310
  project: fingerprint.project_id,
@@ -299,7 +319,7 @@ async function doCallGeminiApi(opts) {
299
319
  generationConfig: {
300
320
  maxOutputTokens: maxTokens,
301
321
  },
302
- session_id: null,
322
+ session_id: DAEMON_SESSION_ID,
303
323
  },
304
324
  };
305
325
  const bodyJson = JSON.stringify(outerRequest);
@@ -308,18 +328,19 @@ async function doCallGeminiApi(opts) {
308
328
  let hasRefreshed = false;
309
329
  while (true) {
310
330
  const creds = await getFreshCreds();
311
- // Real gemini-cli headers observed in capture:
312
- // authorization: Bearer <token>
313
- // content-type: application/json
314
- // accept: application/json
315
- // user-agent: GeminiCLI/<cli>/<model> (darwin; arm64; terminal) google-api-nodejs-client/9.15.1
316
- // x-goog-api-client: gl-node/<node-version> <-- NOT gemini-cli/...
331
+ // Real gemini-cli headers (packages/core/src/code_assist/server.ts:456):
332
+ // content-type: application/json (+ any httpOptions.headers)
333
+ // authorization: Bearer <token> (set by GoogleAuth client)
334
+ // user-agent: GeminiCLI/<ver>/<model> (<os>; <arch>; <surface>) google-api-nodejs-client/<ver>
335
+ // x-goog-api-client: gl-node/<node-ver>
317
336
  // (NO x-goog-user-project — project lives in the body)
337
+ // For streaming the server also returns text/event-stream, so we accept
338
+ // event-stream explicitly.
318
339
  const resp = await fetch(url, {
319
340
  method: "POST",
320
341
  headers: {
321
342
  "content-type": "application/json",
322
- "accept": "application/json",
343
+ "accept": "text/event-stream, application/json",
323
344
  "authorization": `Bearer ${creds.access_token}`,
324
345
  "user-agent": fingerprint.user_agent,
325
346
  "x-goog-api-client": fingerprint.x_goog_api_client,
@@ -327,8 +348,7 @@ async function doCallGeminiApi(opts) {
327
348
  body: bodyJson,
328
349
  });
329
350
  if (resp.ok) {
330
- const data = (await resp.json());
331
- const parsed = parseGeminiResponse(data, opts.model);
351
+ const parsed = await parseGeminiSseResponse(resp, opts.model);
332
352
  recordGeminiSpend(parsed, opts.model);
333
353
  return parsed;
334
354
  }
@@ -371,25 +391,123 @@ function recordGeminiSpend(parsed, model) {
371
391
  const cost = calculateCost(model, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens);
372
392
  rateGuard.recordSpend(cost.apiCost);
373
393
  }
374
- function parseGeminiResponse(data, fallbackModel) {
375
- const response = data.response ?? {};
376
- const candidates = response.candidates ?? [];
377
- const firstCandidate = candidates[0];
378
- const text = (firstCandidate?.content?.parts ?? [])
379
- .map((p) => p.text ?? "")
380
- .join("");
381
- const usage = response.usageMetadata ?? {};
382
- const cached = usage.cachedContentTokenCount ?? 0;
394
+ /**
395
+ * Parse a Gemini Code Assist streamGenerateContent?alt=sse response.
396
+ *
397
+ * Wire framing, mirrored from the real gemini-cli at
398
+ * packages/core/src/code_assist/server.ts:456-508 (requestStreamingPost):
399
+ *
400
+ * - The response body is a series of `data: {json}` lines.
401
+ * - If a chunk's JSON spans multiple lines (which happens when Google
402
+ * pretty-prints), every line starts with `data: ` and they are all
403
+ * joined by `\n` before JSON.parse.
404
+ * - A blank line terminates the current chunk and yields it.
405
+ * - Malformed JSON chunks are silently skipped (gemini-cli logs an
406
+ * InvalidChunkEvent — we just drop them).
407
+ *
408
+ * Each decoded chunk shape (CaGenerateContentResponse):
409
+ * {
410
+ * response: {
411
+ * candidates: [{content: {parts: [{text: "..."}]}, finishReason?}],
412
+ * usageMetadata: {promptTokenCount, candidatesTokenCount,
413
+ * cachedContentTokenCount}
414
+ * },
415
+ * traceId?: "...",
416
+ * }
417
+ *
418
+ * Text accumulates across candidates[0].content.parts[*].text; usage
419
+ * metadata is on the last chunk(s) (totals update progressively).
420
+ */
421
+ async function parseGeminiSseResponse(resp, fallbackModel) {
422
+ const reader = resp.body?.getReader();
423
+ if (!reader) {
424
+ throw new Error("Gemini streamGenerateContent returned no body");
425
+ }
426
+ const decoder = new TextDecoder("utf-8");
427
+ let buffer = "";
428
+ let text = "";
429
+ let model = fallbackModel;
430
+ let promptTokens = 0;
431
+ let candidateTokens = 0;
432
+ let cachedTokens = 0;
433
+ // A single logical chunk may span several `data: ` lines with a terminal
434
+ // blank line. We accumulate them in `pending` and flush on blank.
435
+ let pending = [];
436
+ const applyChunk = (chunk) => {
437
+ const inner = chunk.response ?? {};
438
+ const candidates = inner.candidates ?? [];
439
+ for (const c of candidates) {
440
+ for (const p of c.content?.parts ?? []) {
441
+ if (p.text)
442
+ text += p.text;
443
+ }
444
+ }
445
+ const usage = inner.usageMetadata;
446
+ if (usage) {
447
+ if (typeof usage.promptTokenCount === "number") {
448
+ promptTokens = usage.promptTokenCount;
449
+ }
450
+ if (typeof usage.candidatesTokenCount === "number") {
451
+ candidateTokens = usage.candidatesTokenCount;
452
+ }
453
+ if (typeof usage.cachedContentTokenCount === "number") {
454
+ cachedTokens = usage.cachedContentTokenCount;
455
+ }
456
+ }
457
+ // Some Code Assist responses surface modelVersion on the outer shape
458
+ // when the server routes the request (e.g. 1.5 → 2.5 redirect). Use
459
+ // it over the fallback so billing/analytics see the real served model.
460
+ const mv = chunk.modelVersion;
461
+ if (typeof mv === "string" && mv)
462
+ model = mv;
463
+ };
464
+ const flushPending = () => {
465
+ if (pending.length === 0)
466
+ return;
467
+ const joined = pending.join("\n");
468
+ pending = [];
469
+ try {
470
+ applyChunk(JSON.parse(joined));
471
+ }
472
+ catch {
473
+ // Silently drop malformed chunks — gemini-cli does the same
474
+ // (logInvalidChunk then continue).
475
+ }
476
+ };
477
+ while (true) {
478
+ const { value, done } = await reader.read();
479
+ if (done)
480
+ break;
481
+ buffer += decoder.decode(value, { stream: true });
482
+ let newlineIdx;
483
+ while ((newlineIdx = buffer.indexOf("\n")) >= 0) {
484
+ const line = buffer.slice(0, newlineIdx).replace(/\r$/, "");
485
+ buffer = buffer.slice(newlineIdx + 1);
486
+ if (line === "") {
487
+ flushPending();
488
+ }
489
+ else if (line.startsWith("data: ")) {
490
+ pending.push(line.slice(6).trim());
491
+ }
492
+ else if (line.startsWith("data:")) {
493
+ // Tolerate `data:` without trailing space, though gemini-cli
494
+ // itself checks for the 6-char `data: ` prefix.
495
+ pending.push(line.slice(5).trim());
496
+ }
497
+ // Ignore other lines (comments, id fields) per gemini-cli.
498
+ }
499
+ }
500
+ flushPending();
383
501
  return {
384
502
  text,
385
503
  sessionId: "",
386
504
  usage: {
387
- input_tokens: Math.max(0, (usage.promptTokenCount ?? 0) - cached),
388
- output_tokens: usage.candidatesTokenCount ?? 0,
505
+ input_tokens: Math.max(0, promptTokens - cachedTokens),
506
+ output_tokens: candidateTokens,
389
507
  cache_creation_tokens: 0,
390
- cache_read_tokens: cached,
508
+ cache_read_tokens: cachedTokens,
391
509
  },
392
- model: fallbackModel,
510
+ model,
393
511
  costUsd: 0,
394
512
  };
395
513
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmoney",
3
- "version": "0.14.3",
3
+ "version": "0.14.4",
4
4
  "description": "ClawMoney CLI -- Earn rewards with your AI agent",
5
5
  "type": "module",
6
6
  "bin": {