clawmoney 0.14.1 → 0.14.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/relay/upstream/claude-api.js +274 -19
- package/package.json +1 -1
|
@@ -53,8 +53,21 @@ const DEFAULT_USER_AGENT = `claude-cli/${DEFAULT_CLI_VERSION} (external, ${DEFAU
|
|
|
53
53
|
// same file — both projects have the identical string. This value is part
|
|
54
54
|
// of Anthropic's server-side check that the request came from a real CLI.
|
|
55
55
|
const CLAUDE_FINGERPRINT_SALT = "59cf53e54c78";
|
|
56
|
+
// Headers that real Claude Code emits on every /v1/messages call. The
|
|
57
|
+
// Anthropic SDK would inject these automatically; since we bypass the SDK
|
|
58
|
+
// and hand-roll the fetch call we have to include them verbatim.
|
|
59
|
+
//
|
|
60
|
+
// Note the deliberate omissions:
|
|
61
|
+
// - `anthropic-beta` is NOT static — it is per-request and derived from
|
|
62
|
+
// the model via pickClaudeBetasForModel(). Real Claude Code passes
|
|
63
|
+
// the list via the SDK's `betas: [...]` body param and the SDK then
|
|
64
|
+
// emits it as a comma-joined `anthropic-beta` header. We do the same
|
|
65
|
+
// thing by building the header inline in doCallClaudeApi so Haiku
|
|
66
|
+
// requests drop `claude-code-20250219` like the real CLI.
|
|
67
|
+
// - `accept` is overridden per-request to `text/event-stream` when we
|
|
68
|
+
// set stream:true (see doCallClaudeApi). Leaving it out of the static
|
|
69
|
+
// set so we can pick the right value at call time.
|
|
56
70
|
const STATIC_CLAUDE_CODE_HEADERS = {
|
|
57
|
-
"accept": "application/json",
|
|
58
71
|
"x-stainless-retry-count": "0",
|
|
59
72
|
"x-stainless-timeout": "600",
|
|
60
73
|
"x-stainless-lang": "js",
|
|
@@ -67,10 +80,6 @@ const STATIC_CLAUDE_CODE_HEADERS = {
|
|
|
67
80
|
"anthropic-version": "2023-06-01",
|
|
68
81
|
"x-app": "cli",
|
|
69
82
|
"content-type": "application/json",
|
|
70
|
-
// Minimal beta set that Max-tier subscriptions always accept. Adding
|
|
71
|
-
// context-1m or context-management here will get rejected as "long
|
|
72
|
-
// context beta not available for this subscription" on non-Enterprise tiers.
|
|
73
|
-
"anthropic-beta": "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14",
|
|
74
83
|
};
|
|
75
84
|
// System prompt captured from real Claude Code ≥ 2.1.x. The first marker line
|
|
76
85
|
// matches claudeCodeSystemPrompts template #2 in sub2api's validator
|
|
@@ -88,6 +97,85 @@ const MODEL_ID_OVERRIDES = {
|
|
|
88
97
|
function normalizeModel(model) {
|
|
89
98
|
return MODEL_ID_OVERRIDES[model] ?? model;
|
|
90
99
|
}
|
|
100
|
+
// ── Per-model thinking + betas selection (mirrors real Claude Code) ──
|
|
101
|
+
//
|
|
102
|
+
// Real Claude Code ALWAYS sends a `thinking` body field for Claude 4+
|
|
103
|
+
// models, and the shape depends on whether the model supports adaptive
|
|
104
|
+
// thinking. Source: claude-code-best/src/utils/thinking.ts:
|
|
105
|
+
// - modelSupportsThinking() → any canonical name NOT matching "claude-3-"
|
|
106
|
+
// - modelSupportsAdaptiveThinking() → only canonical names containing
|
|
107
|
+
// "opus-4-6" or "sonnet-4-6"
|
|
108
|
+
//
|
|
109
|
+
// If we send requests to Anthropic without this field but with Claude 4+
|
|
110
|
+
// models, the per-account traffic pattern is "zero thinking on every
|
|
111
|
+
// message" which is a clear relay-farm fingerprint (real users on these
|
|
112
|
+
// tiers get adaptive thinking automatically and have no way to turn it
|
|
113
|
+
// off short of setting alwaysThinkingEnabled=false).
|
|
114
|
+
function modelSupportsThinking(model) {
|
|
115
|
+
return !normalizeModel(model).includes("claude-3-");
|
|
116
|
+
}
|
|
117
|
+
function modelSupportsAdaptiveThinking(model) {
|
|
118
|
+
const m = normalizeModel(model);
|
|
119
|
+
return m.includes("opus-4-6") || m.includes("sonnet-4-6");
|
|
120
|
+
}
|
|
121
|
+
// Anthropic's /v1/messages rejects thinking.enabled.budget_tokens < 1024.
|
|
122
|
+
const CLAUDE_MIN_THINKING_BUDGET = 1024;
|
|
123
|
+
function pickClaudeThinkingConfig(model, maxTokens) {
|
|
124
|
+
if (!modelSupportsThinking(model)) {
|
|
125
|
+
return { config: undefined, adjustedMaxTokens: maxTokens };
|
|
126
|
+
}
|
|
127
|
+
if (modelSupportsAdaptiveThinking(model)) {
|
|
128
|
+
// Adaptive has no fixed budget — the API internally picks. Don't
|
|
129
|
+
// inflate max_tokens; keep caller's cap.
|
|
130
|
+
return { config: { type: "adaptive" }, adjustedMaxTokens: maxTokens };
|
|
131
|
+
}
|
|
132
|
+
// Budget thinking (4-5 / haiku-4-5): budget_tokens must be >= 1024 AND
|
|
133
|
+
// strictly less than max_tokens. If caller's max_tokens is too low to
|
|
134
|
+
// fit the 1024 floor + 1, bump max_tokens so we can send a valid
|
|
135
|
+
// thinking block. Real Claude Code uses `getMaxThinkingTokensForModel
|
|
136
|
+
// = getModelMaxOutputTokens(model).upperLimit - 1` which is usually
|
|
137
|
+
// many thousands, but for a relay we want to respect the caller's cap
|
|
138
|
+
// unless it would force an invalid request.
|
|
139
|
+
const requiredMax = CLAUDE_MIN_THINKING_BUDGET + 1;
|
|
140
|
+
const adjustedMaxTokens = Math.max(maxTokens, requiredMax);
|
|
141
|
+
const budget = Math.max(CLAUDE_MIN_THINKING_BUDGET, adjustedMaxTokens - 1);
|
|
142
|
+
return {
|
|
143
|
+
config: { type: "enabled", budget_tokens: budget },
|
|
144
|
+
adjustedMaxTokens,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Assemble the `betas` array that goes into the /v1/messages body. Real
|
|
149
|
+
* Claude Code constructs this dynamically per-request from
|
|
150
|
+
* getAllModelBetas() — the key branches are:
|
|
151
|
+
* 1. non-haiku → push `claude-code-20250219`
|
|
152
|
+
* 2. OAuth subscriber → push `oauth-2025-04-20`
|
|
153
|
+
* 3. model supports interleaved-source-processing (ISP, i.e. any 4+) →
|
|
154
|
+
* push `interleaved-thinking-2025-05-14`
|
|
155
|
+
* Source: claude-code-best/src/utils/betas.ts:233-261 (getAllModelBetas).
|
|
156
|
+
*
|
|
157
|
+
* The Anthropic SDK later materializes this array into the
|
|
158
|
+
* `anthropic-beta` HTTP header as a comma-separated list. Sending it via
|
|
159
|
+
* the body instead of a static header is indistinguishable from the SDK
|
|
160
|
+
* wire format (we are literally doing the same thing the SDK does), but
|
|
161
|
+
* making it dynamic per-model avoids the Haiku mismatch where real CLI
|
|
162
|
+
* drops `claude-code-20250219` but our old static header always sent it.
|
|
163
|
+
*/
|
|
164
|
+
function pickClaudeBetasForModel(model) {
|
|
165
|
+
const m = normalizeModel(model);
|
|
166
|
+
const isHaiku = m.includes("haiku");
|
|
167
|
+
const betas = [];
|
|
168
|
+
if (!isHaiku)
|
|
169
|
+
betas.push("claude-code-20250219");
|
|
170
|
+
// OAuth subscriber — always true for us since we only serve relay from
|
|
171
|
+
// Max-tier OAuth tokens.
|
|
172
|
+
betas.push("oauth-2025-04-20");
|
|
173
|
+
// Interleaved thinking — all Claude 4+ models support it.
|
|
174
|
+
if (modelSupportsThinking(model)) {
|
|
175
|
+
betas.push("interleaved-thinking-2025-05-14");
|
|
176
|
+
}
|
|
177
|
+
return betas;
|
|
178
|
+
}
|
|
91
179
|
// ── Proxy (honor HTTPS_PROXY / http_proxy env vars) ──
|
|
92
180
|
//
|
|
93
181
|
// Node's native fetch does NOT read these env vars automatically, so if the
|
|
@@ -640,9 +728,15 @@ async function doCallClaudeApi(opts) {
|
|
|
640
728
|
// message text so the cc_version.<FP3> suffix varies request-by-request,
|
|
641
729
|
// matching what real Claude Code sends. See computeClaudeFingerprint().
|
|
642
730
|
const attributionHeader = buildClaudeAttributionHeader(sanitizedPrompt, fingerprint.cc_version, fingerprint.cc_entrypoint);
|
|
731
|
+
// Per-request betas + thinking config, picked from the real CLI's
|
|
732
|
+
// per-model logic (see pickClaudeBetasForModel / pickClaudeThinkingConfig).
|
|
733
|
+
// These are two of the strongest fingerprint signals Anthropic could use
|
|
734
|
+
// to distinguish relay traffic from genuine CLI traffic.
|
|
735
|
+
const betasForRequest = pickClaudeBetasForModel(opts.model);
|
|
736
|
+
const { config: thinkingConfig, adjustedMaxTokens } = pickClaudeThinkingConfig(opts.model, maxTokens);
|
|
643
737
|
const body = {
|
|
644
738
|
model: normalizeModel(opts.model),
|
|
645
|
-
max_tokens:
|
|
739
|
+
max_tokens: adjustedMaxTokens,
|
|
646
740
|
system: [
|
|
647
741
|
{
|
|
648
742
|
type: "text",
|
|
@@ -672,8 +766,27 @@ async function doCallClaudeApi(opts) {
|
|
|
672
766
|
},
|
|
673
767
|
],
|
|
674
768
|
metadata: { user_id: buildMetadataUserID(fingerprint, sessionId) },
|
|
675
|
-
stream:
|
|
769
|
+
// Real Claude Code ALWAYS sends stream:true on its main path
|
|
770
|
+
// (claude-code-sourcemap/src/services/api/claude.ts:1824 —
|
|
771
|
+
// `{ ...params, stream: true }`). The non-stream call at line 864 is
|
|
772
|
+
// only the fallback path triggered when the stream fails mid-response.
|
|
773
|
+
// Sending stream:false on every request is a statistical signal that
|
|
774
|
+
// Anthropic could use to identify relay clients vs real CLI — the
|
|
775
|
+
// entire account's traffic would be the opposite polarity of what the
|
|
776
|
+
// CLI ever emits. Switch to streaming to match.
|
|
777
|
+
stream: true,
|
|
778
|
+
// NOTE: `betas` is a client-side SDK-only param — the Anthropic SDK
|
|
779
|
+
// strips it out of the body and emits it as the `anthropic-beta`
|
|
780
|
+
// HTTP header. Anthropic's API rejects requests that carry `betas`
|
|
781
|
+
// in the wire body with `betas: Extra inputs are not permitted`.
|
|
782
|
+
// The header is set on the fetch call below, so don't put it here.
|
|
676
783
|
};
|
|
784
|
+
// `thinking` is always set on Claude 4+ models by real CLI. Omitting it
|
|
785
|
+
// would be an account-wide zero-thinking anomaly. Adaptive for 4-6
|
|
786
|
+
// models, enabled+budget for 4-5 / haiku.
|
|
787
|
+
if (thinkingConfig) {
|
|
788
|
+
body.thinking = thinkingConfig;
|
|
789
|
+
}
|
|
677
790
|
const bodyJson = JSON.stringify(body);
|
|
678
791
|
let transientAttempt = 0;
|
|
679
792
|
let hasRefreshed = false;
|
|
@@ -683,6 +796,15 @@ async function doCallClaudeApi(opts) {
|
|
|
683
796
|
method: "POST",
|
|
684
797
|
headers: {
|
|
685
798
|
...STATIC_CLAUDE_CODE_HEADERS,
|
|
799
|
+
// SSE streaming — Anthropic returns event-stream body when
|
|
800
|
+
// stream:true is set in the body. The SDK default sets an accept
|
|
801
|
+
// that includes text/event-stream; we match that exactly.
|
|
802
|
+
"accept": "application/json, text/event-stream",
|
|
803
|
+
// `anthropic-beta` is what the Anthropic SDK generates from the
|
|
804
|
+
// body's `betas` array. We could leave body.betas and drop this
|
|
805
|
+
// header, but some Anthropic deploys check header presence too,
|
|
806
|
+
// so we send both for safety. The values must match.
|
|
807
|
+
"anthropic-beta": betasForRequest.join(","),
|
|
686
808
|
"user-agent": fingerprint.user_agent,
|
|
687
809
|
"authorization": `Bearer ${creds.accessToken}`,
|
|
688
810
|
"x-claude-code-session-id": sessionId,
|
|
@@ -695,7 +817,10 @@ async function doCallClaudeApi(opts) {
|
|
|
695
817
|
if (sessionWin)
|
|
696
818
|
rateGuard?.setSessionWindow(sessionWin);
|
|
697
819
|
if (resp.ok) {
|
|
698
|
-
|
|
820
|
+
// Stream parser — real Claude Code's main path uses stream:true; see
|
|
821
|
+
// body construction above. parseClaudeSseResponse aggregates text
|
|
822
|
+
// deltas + usage until message_stop, matching SDK semantics.
|
|
823
|
+
const parsed = await parseClaudeSseResponse(resp, opts.model);
|
|
699
824
|
recordSpendFromUsage(parsed, opts.model);
|
|
700
825
|
return parsed;
|
|
701
826
|
}
|
|
@@ -756,22 +881,152 @@ function recordSpendFromUsage(parsed, model) {
|
|
|
756
881
|
// subscription meter and what will actually burn the account.
|
|
757
882
|
rateGuard.recordSpend(cost.apiCost);
|
|
758
883
|
}
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
884
|
+
/**
|
|
885
|
+
* Parse an Anthropic SSE `/v1/messages` stream response into a ParsedOutput.
|
|
886
|
+
*
|
|
887
|
+
* Wire format (Anthropic docs — beta.messages.create({stream: true})):
|
|
888
|
+
*
|
|
889
|
+
* event: message_start
|
|
890
|
+
* data: {"type":"message_start","message":{"id":"...","model":"...","usage":{"input_tokens":10,...}}}
|
|
891
|
+
*
|
|
892
|
+
* event: content_block_start
|
|
893
|
+
* data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
|
|
894
|
+
*
|
|
895
|
+
* event: content_block_delta
|
|
896
|
+
* data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}
|
|
897
|
+
*
|
|
898
|
+
* ... more deltas ...
|
|
899
|
+
*
|
|
900
|
+
* event: content_block_stop
|
|
901
|
+
* data: {"type":"content_block_stop","index":0}
|
|
902
|
+
*
|
|
903
|
+
* event: message_delta
|
|
904
|
+
* data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":42}}
|
|
905
|
+
*
|
|
906
|
+
* event: message_stop
|
|
907
|
+
* data: {"type":"message_stop"}
|
|
908
|
+
*
|
|
909
|
+
* event: ping (keepalive — ignore)
|
|
910
|
+
*
|
|
911
|
+
* event: error (upstream error — throw)
|
|
912
|
+
* data: {"type":"error","error":{"type":"overloaded_error","message":"..."}}
|
|
913
|
+
*/
|
|
914
|
+
async function parseClaudeSseResponse(resp, fallbackModel) {
|
|
915
|
+
const reader = resp.body?.getReader();
|
|
916
|
+
if (!reader) {
|
|
917
|
+
throw new Error("Claude streamGenerateContent returned no body");
|
|
918
|
+
}
|
|
919
|
+
const decoder = new TextDecoder("utf-8");
|
|
920
|
+
let buffer = "";
|
|
921
|
+
let text = "";
|
|
922
|
+
let model = fallbackModel;
|
|
923
|
+
let inputTokens = 0;
|
|
924
|
+
let outputTokens = 0;
|
|
925
|
+
let cacheCreation = 0;
|
|
926
|
+
let cacheRead = 0;
|
|
927
|
+
let streamError;
|
|
928
|
+
const processChunk = (jsonStr) => {
|
|
929
|
+
const trimmed = jsonStr.trim();
|
|
930
|
+
if (!trimmed)
|
|
931
|
+
return;
|
|
932
|
+
let chunk;
|
|
933
|
+
try {
|
|
934
|
+
chunk = JSON.parse(trimmed);
|
|
935
|
+
}
|
|
936
|
+
catch {
|
|
937
|
+
return;
|
|
938
|
+
}
|
|
939
|
+
switch (chunk.type) {
|
|
940
|
+
case "message_start": {
|
|
941
|
+
if (chunk.message?.model)
|
|
942
|
+
model = chunk.message.model;
|
|
943
|
+
const u = chunk.message?.usage;
|
|
944
|
+
if (u) {
|
|
945
|
+
if (typeof u.input_tokens === "number")
|
|
946
|
+
inputTokens = u.input_tokens;
|
|
947
|
+
if (typeof u.output_tokens === "number")
|
|
948
|
+
outputTokens = u.output_tokens;
|
|
949
|
+
if (typeof u.cache_creation_input_tokens === "number") {
|
|
950
|
+
cacheCreation = u.cache_creation_input_tokens;
|
|
951
|
+
}
|
|
952
|
+
if (typeof u.cache_read_input_tokens === "number") {
|
|
953
|
+
cacheRead = u.cache_read_input_tokens;
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
break;
|
|
957
|
+
}
|
|
958
|
+
case "content_block_delta": {
|
|
959
|
+
// We only accumulate text_delta. input_json_delta is for tool calls,
|
|
960
|
+
// which we don't surface from the relay path (the buyer gets the
|
|
961
|
+
// model's final text response, not in-flight tool plumbing).
|
|
962
|
+
if (chunk.delta?.type === "text_delta" && typeof chunk.delta.text === "string") {
|
|
963
|
+
text += chunk.delta.text;
|
|
964
|
+
}
|
|
965
|
+
break;
|
|
966
|
+
}
|
|
967
|
+
case "message_delta": {
|
|
968
|
+
// message_delta carries the final output_tokens count and
|
|
969
|
+
// potentially an updated usage (e.g. cache hits applied late).
|
|
970
|
+
const u = chunk.usage;
|
|
971
|
+
if (u) {
|
|
972
|
+
if (typeof u.output_tokens === "number")
|
|
973
|
+
outputTokens = u.output_tokens;
|
|
974
|
+
if (typeof u.input_tokens === "number")
|
|
975
|
+
inputTokens = u.input_tokens;
|
|
976
|
+
if (typeof u.cache_creation_input_tokens === "number") {
|
|
977
|
+
cacheCreation = u.cache_creation_input_tokens;
|
|
978
|
+
}
|
|
979
|
+
if (typeof u.cache_read_input_tokens === "number") {
|
|
980
|
+
cacheRead = u.cache_read_input_tokens;
|
|
981
|
+
}
|
|
982
|
+
}
|
|
983
|
+
break;
|
|
984
|
+
}
|
|
985
|
+
case "error": {
|
|
986
|
+
streamError = chunk.error;
|
|
987
|
+
break;
|
|
988
|
+
}
|
|
989
|
+
// message_stop / content_block_start / content_block_stop / ping —
|
|
990
|
+
// structural, nothing to accumulate.
|
|
991
|
+
default:
|
|
992
|
+
break;
|
|
993
|
+
}
|
|
994
|
+
};
|
|
995
|
+
while (true) {
|
|
996
|
+
const { value, done } = await reader.read();
|
|
997
|
+
if (done)
|
|
998
|
+
break;
|
|
999
|
+
buffer += decoder.decode(value, { stream: true });
|
|
1000
|
+
let newlineIdx;
|
|
1001
|
+
while ((newlineIdx = buffer.indexOf("\n")) >= 0) {
|
|
1002
|
+
const line = buffer.slice(0, newlineIdx).replace(/\r$/, "");
|
|
1003
|
+
buffer = buffer.slice(newlineIdx + 1);
|
|
1004
|
+
if (!line)
|
|
1005
|
+
continue;
|
|
1006
|
+
// SSE dispatches on `data: ...` lines. `event: ...` names are
|
|
1007
|
+
// informational (the chunk JSON's `type` field is authoritative).
|
|
1008
|
+
if (line.startsWith("data:")) {
|
|
1009
|
+
processChunk(line.slice(5));
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
}
|
|
1013
|
+
// Flush trailing line (rare — most servers end with a \n\n).
|
|
1014
|
+
if (buffer.startsWith("data:")) {
|
|
1015
|
+
processChunk(buffer.slice(5));
|
|
1016
|
+
}
|
|
1017
|
+
if (streamError) {
|
|
1018
|
+
throw new Error(`Anthropic stream error: ${streamError.type ?? "unknown"} — ${streamError.message ?? ""}`);
|
|
1019
|
+
}
|
|
765
1020
|
return {
|
|
766
1021
|
text,
|
|
767
1022
|
sessionId: "",
|
|
768
1023
|
usage: {
|
|
769
|
-
input_tokens:
|
|
770
|
-
output_tokens:
|
|
771
|
-
cache_creation_tokens:
|
|
772
|
-
cache_read_tokens:
|
|
1024
|
+
input_tokens: inputTokens,
|
|
1025
|
+
output_tokens: outputTokens,
|
|
1026
|
+
cache_creation_tokens: cacheCreation,
|
|
1027
|
+
cache_read_tokens: cacheRead,
|
|
773
1028
|
},
|
|
774
|
-
model
|
|
1029
|
+
model,
|
|
775
1030
|
costUsd: 0,
|
|
776
1031
|
};
|
|
777
1032
|
}
|