clawmoney 0.14.0 → 0.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/relay/upstream/claude-api.js +226 -17
- package/package.json +1 -1
|
@@ -21,7 +21,7 @@ import { execFileSync } from "node:child_process";
|
|
|
21
21
|
import { readFileSync, writeFileSync, existsSync } from "node:fs";
|
|
22
22
|
import { join } from "node:path";
|
|
23
23
|
import { homedir, userInfo } from "node:os";
|
|
24
|
-
import { randomUUID } from "node:crypto";
|
|
24
|
+
import { randomUUID, createHash } from "node:crypto";
|
|
25
25
|
import { ProxyAgent, setGlobalDispatcher } from "undici";
|
|
26
26
|
import { relayLogger as logger } from "../logger.js";
|
|
27
27
|
import { RateGuard, RateGuardBudgetExceededError, RateGuardCooldownError, } from "./rate-guard.js";
|
|
@@ -39,9 +39,20 @@ const FINGERPRINT_FILE = join(CLAWMONEY_DIR, "claude-fingerprint.json");
|
|
|
39
39
|
// schema). Bootstrapping with the new capture script will replace these
|
|
40
40
|
// with the values observed on the actual Provider machine.
|
|
41
41
|
const DEFAULT_CLI_VERSION = "2.1.100";
|
|
42
|
-
|
|
42
|
+
// NOTE: DEFAULT_CC_VERSION is only used as a fallback if the fingerprint file
|
|
43
|
+
// doesn't tell us the CLI's base version. The 3-char suffix is always
|
|
44
|
+
// recomputed per-request via computeClaudeFingerprint() — storing a baked
|
|
45
|
+
// suffix here would make every request look identical to Anthropic's
|
|
46
|
+
// fingerprint matcher, which is the relay-farm signature we want to avoid.
|
|
47
|
+
const DEFAULT_CC_VERSION = DEFAULT_CLI_VERSION;
|
|
43
48
|
const DEFAULT_CC_ENTRYPOINT = "cli";
|
|
44
49
|
const DEFAULT_USER_AGENT = `claude-cli/${DEFAULT_CLI_VERSION} (external, ${DEFAULT_CC_ENTRYPOINT})`;
|
|
50
|
+
// Hardcoded salt from Claude Code's backend fingerprint validator. Lifted
|
|
51
|
+
// verbatim from `src/utils/fingerprint.ts` in the reconstructed source map
|
|
52
|
+
// (claude-code-sourcemap) and cross-checked against cc-haha's copy of the
|
|
53
|
+
// same file — both projects have the identical string. This value is part
|
|
54
|
+
// of Anthropic's server-side check that the request came from a real CLI.
|
|
55
|
+
const CLAUDE_FINGERPRINT_SALT = "59cf53e54c78";
|
|
45
56
|
const STATIC_CLAUDE_CODE_HEADERS = {
|
|
46
57
|
"accept": "application/json",
|
|
47
58
|
"x-stainless-retry-count": "0",
|
|
@@ -126,11 +137,22 @@ function loadFingerprint() {
|
|
|
126
137
|
}
|
|
127
138
|
// Older fingerprint files only have device_id + account_uuid. Fill in
|
|
128
139
|
// sensible defaults for the new fields so we stay backward-compatible.
|
|
140
|
+
//
|
|
141
|
+
// cc_version sanitization: older capture scripts recorded the full
|
|
142
|
+
// "<CLI-version>.<3char-hash>" string Anthropic sent back (e.g.
|
|
143
|
+
// "2.1.100.c68"). That trailing hash is a per-request fingerprint of
|
|
144
|
+
// the prompt content — baking it into every outbound request means all
|
|
145
|
+
// of this provider's traffic shares the same fingerprint suffix even
|
|
146
|
+
// though prompts differ, which is a strong relay-farm signal. Strip it
|
|
147
|
+
// here so the at-rest cc_version is the bare CLI version, and let
|
|
148
|
+
// computeClaudeFingerprint() recompute the suffix per request.
|
|
149
|
+
const rawCcVersion = raw.cc_version ?? DEFAULT_CC_VERSION;
|
|
150
|
+
const cleanCcVersion = rawCcVersion.replace(/\.[a-f0-9]{3}$/i, "");
|
|
129
151
|
cachedFingerprint = {
|
|
130
152
|
device_id: raw.device_id,
|
|
131
153
|
account_uuid: raw.account_uuid,
|
|
132
154
|
user_agent: raw.user_agent ?? DEFAULT_USER_AGENT,
|
|
133
|
-
cc_version:
|
|
155
|
+
cc_version: cleanCcVersion,
|
|
134
156
|
cc_entrypoint: raw.cc_entrypoint ?? DEFAULT_CC_ENTRYPOINT,
|
|
135
157
|
};
|
|
136
158
|
if (raw.user_agent || raw.cc_version || raw.cc_entrypoint) {
|
|
@@ -196,6 +218,48 @@ const IDENTITY_REPLACEMENTS = [
|
|
|
196
218
|
"You are Claude Code, Anthropic's official CLI for Claude.",
|
|
197
219
|
],
|
|
198
220
|
];
|
|
221
|
+
// ── Attribution fingerprint ──
|
|
222
|
+
//
|
|
223
|
+
// Claude Code's server-side fingerprint validator expects the outgoing
|
|
224
|
+
// /v1/messages request to contain, as the first system block, a text node
|
|
225
|
+
// of the form:
|
|
226
|
+
//
|
|
227
|
+
// x-anthropic-billing-header: cc_version=<CLI-VERSION>.<FP3>; cc_entrypoint=<EP>;
|
|
228
|
+
//
|
|
229
|
+
// where <FP3> is a per-request 3-hex-char hash that Anthropic derives from
|
|
230
|
+
// the first user message's content and the CLI version. The algorithm is
|
|
231
|
+
// verbatim from the reconstructed Claude Code source
|
|
232
|
+
// (claude-code-sourcemap/restored-src/src/utils/fingerprint.ts, cross-
|
|
233
|
+
// verified against cc-haha/src/utils/fingerprint.ts):
|
|
234
|
+
//
|
|
235
|
+
// chars = msg[4] + msg[7] + msg[20] (each char, "0" if OOB)
|
|
236
|
+
// input = SALT + chars + version
|
|
237
|
+
// hash = sha256(input).hex
|
|
238
|
+
// fp = hash[:3]
|
|
239
|
+
//
|
|
240
|
+
// If every request we send reuses the SAME baked <FP3> (e.g. the one that
|
|
241
|
+
// happened to be recorded when capture-claude-request.mjs ran), Anthropic
|
|
242
|
+
// can observe: same account_uuid, wildly different first-user-message
|
|
243
|
+
// texts, but identical cc_version suffix — a strong relay-farm signal.
|
|
244
|
+
// Computing it per request removes that signal.
|
|
245
|
+
function computeClaudeFingerprint(firstUserMessageText, cliVersion) {
|
|
246
|
+
const indices = [4, 7, 20];
|
|
247
|
+
const chars = indices.map((i) => firstUserMessageText[i] ?? "0").join("");
|
|
248
|
+
const input = `${CLAUDE_FINGERPRINT_SALT}${chars}${cliVersion}`;
|
|
249
|
+
return createHash("sha256").update(input).digest("hex").slice(0, 3);
|
|
250
|
+
}
|
|
251
|
+
function buildClaudeAttributionHeader(firstUserMessageText, cliVersion, entrypoint) {
|
|
252
|
+
const fp = computeClaudeFingerprint(firstUserMessageText, cliVersion);
|
|
253
|
+
// NOTE: real Claude Code optionally appends ` cch=00000;` when its Bun
|
|
254
|
+
// native client has NATIVE_CLIENT_ATTESTATION enabled — the Bun HTTP
|
|
255
|
+
// stack then rewrites the zeros with an attestation token in-flight.
|
|
256
|
+
// We can't replicate that (no Bun runtime, no native attester), and the
|
|
257
|
+
// server also accepts the header without it (feature() guarded in
|
|
258
|
+
// sourcemap's getAttributionHeader), so we omit cch entirely rather
|
|
259
|
+
// than sending a literal `cch=00000;` that would fail attestation on
|
|
260
|
+
// tiers where Anthropic validates it.
|
|
261
|
+
return `x-anthropic-billing-header: cc_version=${cliVersion}.${fp}; cc_entrypoint=${entrypoint};`;
|
|
262
|
+
}
|
|
199
263
|
function sanitizePrompt(prompt) {
|
|
200
264
|
if (!prompt)
|
|
201
265
|
return prompt;
|
|
@@ -572,13 +636,17 @@ async function doCallClaudeApi(opts) {
|
|
|
572
636
|
// one-shot sessions.
|
|
573
637
|
const sessionId = getMaskedSessionId();
|
|
574
638
|
const maxTokens = opts.maxTokens ?? 4096;
|
|
639
|
+
// Dynamic attribution header — computed per request from the first user
|
|
640
|
+
// message text so the cc_version.<FP3> suffix varies request-by-request,
|
|
641
|
+
// matching what real Claude Code sends. See computeClaudeFingerprint().
|
|
642
|
+
const attributionHeader = buildClaudeAttributionHeader(sanitizedPrompt, fingerprint.cc_version, fingerprint.cc_entrypoint);
|
|
575
643
|
const body = {
|
|
576
644
|
model: normalizeModel(opts.model),
|
|
577
645
|
max_tokens: maxTokens,
|
|
578
646
|
system: [
|
|
579
647
|
{
|
|
580
648
|
type: "text",
|
|
581
|
-
text:
|
|
649
|
+
text: attributionHeader,
|
|
582
650
|
},
|
|
583
651
|
{
|
|
584
652
|
type: "text",
|
|
@@ -604,7 +672,15 @@ async function doCallClaudeApi(opts) {
|
|
|
604
672
|
},
|
|
605
673
|
],
|
|
606
674
|
metadata: { user_id: buildMetadataUserID(fingerprint, sessionId) },
|
|
607
|
-
stream:
|
|
675
|
+
// Real Claude Code ALWAYS sends stream:true on its main path
|
|
676
|
+
// (claude-code-sourcemap/src/services/api/claude.ts:1824 —
|
|
677
|
+
// `{ ...params, stream: true }`). The non-stream call at line 864 is
|
|
678
|
+
// only the fallback path triggered when the stream fails mid-response.
|
|
679
|
+
// Sending stream:false on every request is a statistical signal that
|
|
680
|
+
// Anthropic could use to identify relay clients vs real CLI — the
|
|
681
|
+
// entire account's traffic would be the opposite polarity of what the
|
|
682
|
+
// CLI ever emits. Switch to streaming to match.
|
|
683
|
+
stream: true,
|
|
608
684
|
};
|
|
609
685
|
const bodyJson = JSON.stringify(body);
|
|
610
686
|
let transientAttempt = 0;
|
|
@@ -627,7 +703,10 @@ async function doCallClaudeApi(opts) {
|
|
|
627
703
|
if (sessionWin)
|
|
628
704
|
rateGuard?.setSessionWindow(sessionWin);
|
|
629
705
|
if (resp.ok) {
|
|
630
|
-
|
|
706
|
+
// Stream parser — real Claude Code's main path uses stream:true; see
|
|
707
|
+
// body construction above. parseClaudeSseResponse aggregates text
|
|
708
|
+
// deltas + usage until message_stop, matching SDK semantics.
|
|
709
|
+
const parsed = await parseClaudeSseResponse(resp, opts.model);
|
|
631
710
|
recordSpendFromUsage(parsed, opts.model);
|
|
632
711
|
return parsed;
|
|
633
712
|
}
|
|
@@ -688,22 +767,152 @@ function recordSpendFromUsage(parsed, model) {
|
|
|
688
767
|
// subscription meter and what will actually burn the account.
|
|
689
768
|
rateGuard.recordSpend(cost.apiCost);
|
|
690
769
|
}
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
770
|
+
/**
|
|
771
|
+
* Parse an Anthropic SSE `/v1/messages` stream response into a ParsedOutput.
|
|
772
|
+
*
|
|
773
|
+
* Wire format (Anthropic docs — beta.messages.create({stream: true})):
|
|
774
|
+
*
|
|
775
|
+
* event: message_start
|
|
776
|
+
* data: {"type":"message_start","message":{"id":"...","model":"...","usage":{"input_tokens":10,...}}}
|
|
777
|
+
*
|
|
778
|
+
* event: content_block_start
|
|
779
|
+
* data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
|
|
780
|
+
*
|
|
781
|
+
* event: content_block_delta
|
|
782
|
+
* data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}
|
|
783
|
+
*
|
|
784
|
+
* ... more deltas ...
|
|
785
|
+
*
|
|
786
|
+
* event: content_block_stop
|
|
787
|
+
* data: {"type":"content_block_stop","index":0}
|
|
788
|
+
*
|
|
789
|
+
* event: message_delta
|
|
790
|
+
* data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":42}}
|
|
791
|
+
*
|
|
792
|
+
* event: message_stop
|
|
793
|
+
* data: {"type":"message_stop"}
|
|
794
|
+
*
|
|
795
|
+
* event: ping (keepalive — ignore)
|
|
796
|
+
*
|
|
797
|
+
* event: error (upstream error — throw)
|
|
798
|
+
* data: {"type":"error","error":{"type":"overloaded_error","message":"..."}}
|
|
799
|
+
*/
|
|
800
|
+
async function parseClaudeSseResponse(resp, fallbackModel) {
|
|
801
|
+
const reader = resp.body?.getReader();
|
|
802
|
+
if (!reader) {
|
|
803
|
+
throw new Error("Claude streamGenerateContent returned no body");
|
|
804
|
+
}
|
|
805
|
+
const decoder = new TextDecoder("utf-8");
|
|
806
|
+
let buffer = "";
|
|
807
|
+
let text = "";
|
|
808
|
+
let model = fallbackModel;
|
|
809
|
+
let inputTokens = 0;
|
|
810
|
+
let outputTokens = 0;
|
|
811
|
+
let cacheCreation = 0;
|
|
812
|
+
let cacheRead = 0;
|
|
813
|
+
let streamError;
|
|
814
|
+
const processChunk = (jsonStr) => {
|
|
815
|
+
const trimmed = jsonStr.trim();
|
|
816
|
+
if (!trimmed)
|
|
817
|
+
return;
|
|
818
|
+
let chunk;
|
|
819
|
+
try {
|
|
820
|
+
chunk = JSON.parse(trimmed);
|
|
821
|
+
}
|
|
822
|
+
catch {
|
|
823
|
+
return;
|
|
824
|
+
}
|
|
825
|
+
switch (chunk.type) {
|
|
826
|
+
case "message_start": {
|
|
827
|
+
if (chunk.message?.model)
|
|
828
|
+
model = chunk.message.model;
|
|
829
|
+
const u = chunk.message?.usage;
|
|
830
|
+
if (u) {
|
|
831
|
+
if (typeof u.input_tokens === "number")
|
|
832
|
+
inputTokens = u.input_tokens;
|
|
833
|
+
if (typeof u.output_tokens === "number")
|
|
834
|
+
outputTokens = u.output_tokens;
|
|
835
|
+
if (typeof u.cache_creation_input_tokens === "number") {
|
|
836
|
+
cacheCreation = u.cache_creation_input_tokens;
|
|
837
|
+
}
|
|
838
|
+
if (typeof u.cache_read_input_tokens === "number") {
|
|
839
|
+
cacheRead = u.cache_read_input_tokens;
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
break;
|
|
843
|
+
}
|
|
844
|
+
case "content_block_delta": {
|
|
845
|
+
// We only accumulate text_delta. input_json_delta is for tool calls,
|
|
846
|
+
// which we don't surface from the relay path (the buyer gets the
|
|
847
|
+
// model's final text response, not in-flight tool plumbing).
|
|
848
|
+
if (chunk.delta?.type === "text_delta" && typeof chunk.delta.text === "string") {
|
|
849
|
+
text += chunk.delta.text;
|
|
850
|
+
}
|
|
851
|
+
break;
|
|
852
|
+
}
|
|
853
|
+
case "message_delta": {
|
|
854
|
+
// message_delta carries the final output_tokens count and
|
|
855
|
+
// potentially an updated usage (e.g. cache hits applied late).
|
|
856
|
+
const u = chunk.usage;
|
|
857
|
+
if (u) {
|
|
858
|
+
if (typeof u.output_tokens === "number")
|
|
859
|
+
outputTokens = u.output_tokens;
|
|
860
|
+
if (typeof u.input_tokens === "number")
|
|
861
|
+
inputTokens = u.input_tokens;
|
|
862
|
+
if (typeof u.cache_creation_input_tokens === "number") {
|
|
863
|
+
cacheCreation = u.cache_creation_input_tokens;
|
|
864
|
+
}
|
|
865
|
+
if (typeof u.cache_read_input_tokens === "number") {
|
|
866
|
+
cacheRead = u.cache_read_input_tokens;
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
break;
|
|
870
|
+
}
|
|
871
|
+
case "error": {
|
|
872
|
+
streamError = chunk.error;
|
|
873
|
+
break;
|
|
874
|
+
}
|
|
875
|
+
// message_stop / content_block_start / content_block_stop / ping —
|
|
876
|
+
// structural, nothing to accumulate.
|
|
877
|
+
default:
|
|
878
|
+
break;
|
|
879
|
+
}
|
|
880
|
+
};
|
|
881
|
+
while (true) {
|
|
882
|
+
const { value, done } = await reader.read();
|
|
883
|
+
if (done)
|
|
884
|
+
break;
|
|
885
|
+
buffer += decoder.decode(value, { stream: true });
|
|
886
|
+
let newlineIdx;
|
|
887
|
+
while ((newlineIdx = buffer.indexOf("\n")) >= 0) {
|
|
888
|
+
const line = buffer.slice(0, newlineIdx).replace(/\r$/, "");
|
|
889
|
+
buffer = buffer.slice(newlineIdx + 1);
|
|
890
|
+
if (!line)
|
|
891
|
+
continue;
|
|
892
|
+
// SSE dispatches on `data: ...` lines. `event: ...` names are
|
|
893
|
+
// informational (the chunk JSON's `type` field is authoritative).
|
|
894
|
+
if (line.startsWith("data:")) {
|
|
895
|
+
processChunk(line.slice(5));
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
// Flush trailing line (rare — most servers end with a \n\n).
|
|
900
|
+
if (buffer.startsWith("data:")) {
|
|
901
|
+
processChunk(buffer.slice(5));
|
|
902
|
+
}
|
|
903
|
+
if (streamError) {
|
|
904
|
+
throw new Error(`Anthropic stream error: ${streamError.type ?? "unknown"} — ${streamError.message ?? ""}`);
|
|
905
|
+
}
|
|
697
906
|
return {
|
|
698
907
|
text,
|
|
699
908
|
sessionId: "",
|
|
700
909
|
usage: {
|
|
701
|
-
input_tokens:
|
|
702
|
-
output_tokens:
|
|
703
|
-
cache_creation_tokens:
|
|
704
|
-
cache_read_tokens:
|
|
910
|
+
input_tokens: inputTokens,
|
|
911
|
+
output_tokens: outputTokens,
|
|
912
|
+
cache_creation_tokens: cacheCreation,
|
|
913
|
+
cache_read_tokens: cacheRead,
|
|
705
914
|
},
|
|
706
|
-
model
|
|
915
|
+
model,
|
|
707
916
|
costUsd: 0,
|
|
708
917
|
};
|
|
709
918
|
}
|