clawmoney 0.14.1 → 0.14.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,8 +53,21 @@ const DEFAULT_USER_AGENT = `claude-cli/${DEFAULT_CLI_VERSION} (external, ${DEFAU
53
53
  // same file — both projects have the identical string. This value is part
54
54
  // of Anthropic's server-side check that the request came from a real CLI.
55
55
  const CLAUDE_FINGERPRINT_SALT = "59cf53e54c78";
56
+ // Headers that real Claude Code emits on every /v1/messages call. The
57
+ // Anthropic SDK would inject these automatically; since we bypass the SDK
58
+ // and hand-roll the fetch call we have to include them verbatim.
59
+ //
60
+ // Note the deliberate omissions:
61
+ // - `anthropic-beta` is NOT static — it is per-request and derived from
62
+ // the model via pickClaudeBetasForModel(). Real Claude Code passes
63
+ // the list via the SDK's `betas: [...]` body param and the SDK then
64
+ // emits it as a comma-joined `anthropic-beta` header. We do the same
65
+ // thing by building the header inline in doCallClaudeApi so Haiku
66
+ // requests drop `claude-code-20250219` like the real CLI.
67
+ // - `accept` is overridden per-request to `text/event-stream` when we
68
+ // set stream:true (see doCallClaudeApi). Leaving it out of the static
69
+ // set so we can pick the right value at call time.
56
70
  const STATIC_CLAUDE_CODE_HEADERS = {
57
- "accept": "application/json",
58
71
  "x-stainless-retry-count": "0",
59
72
  "x-stainless-timeout": "600",
60
73
  "x-stainless-lang": "js",
@@ -67,10 +80,6 @@ const STATIC_CLAUDE_CODE_HEADERS = {
67
80
  "anthropic-version": "2023-06-01",
68
81
  "x-app": "cli",
69
82
  "content-type": "application/json",
70
- // Minimal beta set that Max-tier subscriptions always accept. Adding
71
- // context-1m or context-management here will get rejected as "long
72
- // context beta not available for this subscription" on non-Enterprise tiers.
73
- "anthropic-beta": "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14",
74
83
  };
75
84
  // System prompt captured from real Claude Code ≥ 2.1.x. The first marker line
76
85
  // matches claudeCodeSystemPrompts template #2 in sub2api's validator
@@ -88,6 +97,85 @@ const MODEL_ID_OVERRIDES = {
88
97
  function normalizeModel(model) {
89
98
  return MODEL_ID_OVERRIDES[model] ?? model;
90
99
  }
100
+ // ── Per-model thinking + betas selection (mirrors real Claude Code) ──
101
+ //
102
+ // Real Claude Code ALWAYS sends a `thinking` body field for Claude 4+
103
+ // models, and the shape depends on whether the model supports adaptive
104
+ // thinking. Source: claude-code-best/src/utils/thinking.ts:
105
+ // - modelSupportsThinking() → any canonical name NOT matching "claude-3-"
106
+ // - modelSupportsAdaptiveThinking() → only canonical names containing
107
+ // "opus-4-6" or "sonnet-4-6"
108
+ //
109
+ // If we send requests to Anthropic without this field but with Claude 4+
110
+ // models, the per-account traffic pattern is "zero thinking on every
111
+ // message" which is a clear relay-farm fingerprint (real users on these
112
+ // tiers get adaptive thinking automatically and have no way to turn it
113
+ // off short of setting alwaysThinkingEnabled=false).
114
+ function modelSupportsThinking(model) {
115
+ return !normalizeModel(model).includes("claude-3-");
116
+ }
117
+ function modelSupportsAdaptiveThinking(model) {
118
+ const m = normalizeModel(model);
119
+ return m.includes("opus-4-6") || m.includes("sonnet-4-6");
120
+ }
121
+ // Anthropic's /v1/messages rejects thinking.enabled.budget_tokens < 1024.
122
+ const CLAUDE_MIN_THINKING_BUDGET = 1024;
123
+ function pickClaudeThinkingConfig(model, maxTokens) {
124
+ if (!modelSupportsThinking(model)) {
125
+ return { config: undefined, adjustedMaxTokens: maxTokens };
126
+ }
127
+ if (modelSupportsAdaptiveThinking(model)) {
128
+ // Adaptive has no fixed budget — the API internally picks. Don't
129
+ // inflate max_tokens; keep caller's cap.
130
+ return { config: { type: "adaptive" }, adjustedMaxTokens: maxTokens };
131
+ }
132
+ // Budget thinking (4-5 / haiku-4-5): budget_tokens must be >= 1024 AND
133
+ // strictly less than max_tokens. If caller's max_tokens is too low to
134
+ // fit the 1024 floor + 1, bump max_tokens so we can send a valid
135
+ // thinking block. Real Claude Code uses `getMaxThinkingTokensForModel
136
+ // = getModelMaxOutputTokens(model).upperLimit - 1` which is usually
137
+ // many thousands, but for a relay we want to respect the caller's cap
138
+ // unless it would force an invalid request.
139
+ const requiredMax = CLAUDE_MIN_THINKING_BUDGET + 1;
140
+ const adjustedMaxTokens = Math.max(maxTokens, requiredMax);
141
+ const budget = Math.max(CLAUDE_MIN_THINKING_BUDGET, adjustedMaxTokens - 1);
142
+ return {
143
+ config: { type: "enabled", budget_tokens: budget },
144
+ adjustedMaxTokens,
145
+ };
146
+ }
147
+ /**
148
+ * Assemble the `betas` array that goes into the /v1/messages body. Real
149
+ * Claude Code constructs this dynamically per-request from
150
+ * getAllModelBetas() — the key branches are:
151
+ * 1. non-haiku → push `claude-code-20250219`
152
+ * 2. OAuth subscriber → push `oauth-2025-04-20`
153
+ * 3. model supports interleaved-source-processing (ISP, i.e. any 4+) →
154
+ * push `interleaved-thinking-2025-05-14`
155
+ * Source: claude-code-best/src/utils/betas.ts:233-261 (getAllModelBetas).
156
+ *
157
+ * The Anthropic SDK later materializes this array into the
158
+ * `anthropic-beta` HTTP header as a comma-separated list. Sending it via
159
+ * the body instead of a static header is indistinguishable from the SDK
160
+ * wire format (we are literally doing the same thing the SDK does), but
161
+ * making it dynamic per-model avoids the Haiku mismatch where real CLI
162
+ * drops `claude-code-20250219` but our old static header always sent it.
163
+ */
164
+ function pickClaudeBetasForModel(model) {
165
+ const m = normalizeModel(model);
166
+ const isHaiku = m.includes("haiku");
167
+ const betas = [];
168
+ if (!isHaiku)
169
+ betas.push("claude-code-20250219");
170
+ // OAuth subscriber — always true for us since we only serve relay from
171
+ // Max-tier OAuth tokens.
172
+ betas.push("oauth-2025-04-20");
173
+ // Interleaved thinking — all Claude 4+ models support it.
174
+ if (modelSupportsThinking(model)) {
175
+ betas.push("interleaved-thinking-2025-05-14");
176
+ }
177
+ return betas;
178
+ }
91
179
  // ── Proxy (honor HTTPS_PROXY / http_proxy env vars) ──
92
180
  //
93
181
  // Node's native fetch does NOT read these env vars automatically, so if the
@@ -640,9 +728,15 @@ async function doCallClaudeApi(opts) {
640
728
  // message text so the cc_version.<FP3> suffix varies request-by-request,
641
729
  // matching what real Claude Code sends. See computeClaudeFingerprint().
642
730
  const attributionHeader = buildClaudeAttributionHeader(sanitizedPrompt, fingerprint.cc_version, fingerprint.cc_entrypoint);
731
+ // Per-request betas + thinking config, picked from the real CLI's
732
+ // per-model logic (see pickClaudeBetasForModel / pickClaudeThinkingConfig).
733
+ // These are two of the strongest fingerprint signals Anthropic could use
734
+ // to distinguish relay traffic from genuine CLI traffic.
735
+ const betasForRequest = pickClaudeBetasForModel(opts.model);
736
+ const { config: thinkingConfig, adjustedMaxTokens } = pickClaudeThinkingConfig(opts.model, maxTokens);
643
737
  const body = {
644
738
  model: normalizeModel(opts.model),
645
- max_tokens: maxTokens,
739
+ max_tokens: adjustedMaxTokens,
646
740
  system: [
647
741
  {
648
742
  type: "text",
@@ -672,8 +766,27 @@ async function doCallClaudeApi(opts) {
672
766
  },
673
767
  ],
674
768
  metadata: { user_id: buildMetadataUserID(fingerprint, sessionId) },
675
- stream: false,
769
+ // Real Claude Code ALWAYS sends stream:true on its main path
770
+ // (claude-code-sourcemap/src/services/api/claude.ts:1824 —
771
+ // `{ ...params, stream: true }`). The non-stream call at line 864 is
772
+ // only the fallback path triggered when the stream fails mid-response.
773
+ // Sending stream:false on every request is a statistical signal that
774
+ // Anthropic could use to identify relay clients vs real CLI — the
775
+ // entire account's traffic would be the opposite polarity of what the
776
+ // CLI ever emits. Switch to streaming to match.
777
+ stream: true,
778
+ // NOTE: `betas` is a client-side SDK-only param — the Anthropic SDK
779
+ // strips it out of the body and emits it as the `anthropic-beta`
780
+ // HTTP header. Anthropic's API rejects requests that carry `betas`
781
+ // in the wire body with `betas: Extra inputs are not permitted`.
782
+ // The header is set on the fetch call below, so don't put it here.
676
783
  };
784
+ // `thinking` is always set on Claude 4+ models by real CLI. Omitting it
785
+ // would be an account-wide zero-thinking anomaly. Adaptive for 4-6
786
+ // models, enabled+budget for 4-5 / haiku.
787
+ if (thinkingConfig) {
788
+ body.thinking = thinkingConfig;
789
+ }
677
790
  const bodyJson = JSON.stringify(body);
678
791
  let transientAttempt = 0;
679
792
  let hasRefreshed = false;
@@ -683,6 +796,15 @@ async function doCallClaudeApi(opts) {
683
796
  method: "POST",
684
797
  headers: {
685
798
  ...STATIC_CLAUDE_CODE_HEADERS,
799
+ // SSE streaming — Anthropic returns event-stream body when
800
+ // stream:true is set in the body. The SDK default sets an accept
801
+ // that includes text/event-stream; we match that exactly.
802
+ "accept": "application/json, text/event-stream",
803
+ // `anthropic-beta` is what the Anthropic SDK generates from the
804
+ // body's `betas` array. We could leave body.betas and drop this
805
+ // header, but some Anthropic deploys check header presence too,
806
+ // so we send both for safety. The values must match.
807
+ "anthropic-beta": betasForRequest.join(","),
686
808
  "user-agent": fingerprint.user_agent,
687
809
  "authorization": `Bearer ${creds.accessToken}`,
688
810
  "x-claude-code-session-id": sessionId,
@@ -695,7 +817,10 @@ async function doCallClaudeApi(opts) {
695
817
  if (sessionWin)
696
818
  rateGuard?.setSessionWindow(sessionWin);
697
819
  if (resp.ok) {
698
- const parsed = parseResponse(await resp.json(), opts.model);
820
+ // Stream parser real Claude Code's main path uses stream:true; see
821
+ // body construction above. parseClaudeSseResponse aggregates text
822
+ // deltas + usage until message_stop, matching SDK semantics.
823
+ const parsed = await parseClaudeSseResponse(resp, opts.model);
699
824
  recordSpendFromUsage(parsed, opts.model);
700
825
  return parsed;
701
826
  }
@@ -756,22 +881,152 @@ function recordSpendFromUsage(parsed, model) {
756
881
  // subscription meter and what will actually burn the account.
757
882
  rateGuard.recordSpend(cost.apiCost);
758
883
  }
759
- function parseResponse(data, fallbackModel) {
760
- const text = (data.content ?? [])
761
- .filter((c) => c.type === "text" && typeof c.text === "string")
762
- .map((c) => c.text)
763
- .join("");
764
- const usage = data.usage ?? {};
884
+ /**
885
+ * Parse an Anthropic SSE `/v1/messages` stream response into a ParsedOutput.
886
+ *
887
+ * Wire format (Anthropic docs — beta.messages.create({stream: true})):
888
+ *
889
+ * event: message_start
890
+ * data: {"type":"message_start","message":{"id":"...","model":"...","usage":{"input_tokens":10,...}}}
891
+ *
892
+ * event: content_block_start
893
+ * data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
894
+ *
895
+ * event: content_block_delta
896
+ * data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}
897
+ *
898
+ * ... more deltas ...
899
+ *
900
+ * event: content_block_stop
901
+ * data: {"type":"content_block_stop","index":0}
902
+ *
903
+ * event: message_delta
904
+ * data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":42}}
905
+ *
906
+ * event: message_stop
907
+ * data: {"type":"message_stop"}
908
+ *
909
+ * event: ping (keepalive — ignore)
910
+ *
911
+ * event: error (upstream error — throw)
912
+ * data: {"type":"error","error":{"type":"overloaded_error","message":"..."}}
913
+ */
914
+ async function parseClaudeSseResponse(resp, fallbackModel) {
915
+ const reader = resp.body?.getReader();
916
+ if (!reader) {
917
+ throw new Error("Claude streamGenerateContent returned no body");
918
+ }
919
+ const decoder = new TextDecoder("utf-8");
920
+ let buffer = "";
921
+ let text = "";
922
+ let model = fallbackModel;
923
+ let inputTokens = 0;
924
+ let outputTokens = 0;
925
+ let cacheCreation = 0;
926
+ let cacheRead = 0;
927
+ let streamError;
928
+ const processChunk = (jsonStr) => {
929
+ const trimmed = jsonStr.trim();
930
+ if (!trimmed)
931
+ return;
932
+ let chunk;
933
+ try {
934
+ chunk = JSON.parse(trimmed);
935
+ }
936
+ catch {
937
+ return;
938
+ }
939
+ switch (chunk.type) {
940
+ case "message_start": {
941
+ if (chunk.message?.model)
942
+ model = chunk.message.model;
943
+ const u = chunk.message?.usage;
944
+ if (u) {
945
+ if (typeof u.input_tokens === "number")
946
+ inputTokens = u.input_tokens;
947
+ if (typeof u.output_tokens === "number")
948
+ outputTokens = u.output_tokens;
949
+ if (typeof u.cache_creation_input_tokens === "number") {
950
+ cacheCreation = u.cache_creation_input_tokens;
951
+ }
952
+ if (typeof u.cache_read_input_tokens === "number") {
953
+ cacheRead = u.cache_read_input_tokens;
954
+ }
955
+ }
956
+ break;
957
+ }
958
+ case "content_block_delta": {
959
+ // We only accumulate text_delta. input_json_delta is for tool calls,
960
+ // which we don't surface from the relay path (the buyer gets the
961
+ // model's final text response, not in-flight tool plumbing).
962
+ if (chunk.delta?.type === "text_delta" && typeof chunk.delta.text === "string") {
963
+ text += chunk.delta.text;
964
+ }
965
+ break;
966
+ }
967
+ case "message_delta": {
968
+ // message_delta carries the final output_tokens count and
969
+ // potentially an updated usage (e.g. cache hits applied late).
970
+ const u = chunk.usage;
971
+ if (u) {
972
+ if (typeof u.output_tokens === "number")
973
+ outputTokens = u.output_tokens;
974
+ if (typeof u.input_tokens === "number")
975
+ inputTokens = u.input_tokens;
976
+ if (typeof u.cache_creation_input_tokens === "number") {
977
+ cacheCreation = u.cache_creation_input_tokens;
978
+ }
979
+ if (typeof u.cache_read_input_tokens === "number") {
980
+ cacheRead = u.cache_read_input_tokens;
981
+ }
982
+ }
983
+ break;
984
+ }
985
+ case "error": {
986
+ streamError = chunk.error;
987
+ break;
988
+ }
989
+ // message_stop / content_block_start / content_block_stop / ping —
990
+ // structural, nothing to accumulate.
991
+ default:
992
+ break;
993
+ }
994
+ };
995
+ while (true) {
996
+ const { value, done } = await reader.read();
997
+ if (done)
998
+ break;
999
+ buffer += decoder.decode(value, { stream: true });
1000
+ let newlineIdx;
1001
+ while ((newlineIdx = buffer.indexOf("\n")) >= 0) {
1002
+ const line = buffer.slice(0, newlineIdx).replace(/\r$/, "");
1003
+ buffer = buffer.slice(newlineIdx + 1);
1004
+ if (!line)
1005
+ continue;
1006
+ // SSE dispatches on `data: ...` lines. `event: ...` names are
1007
+ // informational (the chunk JSON's `type` field is authoritative).
1008
+ if (line.startsWith("data:")) {
1009
+ processChunk(line.slice(5));
1010
+ }
1011
+ }
1012
+ }
1013
+ // Flush trailing line (rare — most servers end with a \n\n).
1014
+ if (buffer.startsWith("data:")) {
1015
+ processChunk(buffer.slice(5));
1016
+ }
1017
+ if (streamError) {
1018
+ throw new Error(`Anthropic stream error: ${streamError.type ?? "unknown"} — ${streamError.message ?? ""}`);
1019
+ }
765
1020
  return {
766
1021
  text,
767
1022
  sessionId: "",
768
1023
  usage: {
769
- input_tokens: usage.input_tokens ?? 0,
770
- output_tokens: usage.output_tokens ?? 0,
771
- cache_creation_tokens: usage.cache_creation_input_tokens ?? 0,
772
- cache_read_tokens: usage.cache_read_input_tokens ?? 0,
1024
+ input_tokens: inputTokens,
1025
+ output_tokens: outputTokens,
1026
+ cache_creation_tokens: cacheCreation,
1027
+ cache_read_tokens: cacheRead,
773
1028
  },
774
- model: data.model ?? fallbackModel,
1029
+ model,
775
1030
  costUsd: 0,
776
1031
  };
777
1032
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmoney",
3
- "version": "0.14.1",
3
+ "version": "0.14.3",
4
4
  "description": "ClawMoney CLI -- Earn rewards with your AI agent",
5
5
  "type": "module",
6
6
  "bin": {