clawmoney 0.15.5 → 0.15.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/relay/provider.js
CHANGED
|
@@ -3,7 +3,7 @@ import { join } from "node:path";
|
|
|
3
3
|
import { homedir } from "node:os";
|
|
4
4
|
import YAML from "yaml";
|
|
5
5
|
import { RelayWsClient } from "./ws-client.js";
|
|
6
|
-
import { callClaudeApi, preflightClaudeApi, getRateGuardSnapshot as getClaudeRateGuardSnapshot, } from "./upstream/claude-api.js";
|
|
6
|
+
import { callClaudeApi, callClaudeApiPassthrough, preflightClaudeApi, getRateGuardSnapshot as getClaudeRateGuardSnapshot, } from "./upstream/claude-api.js";
|
|
7
7
|
import { callCodexApi, preflightCodexApi, getRateGuardSnapshot as getCodexRateGuardSnapshot, } from "./upstream/codex-api.js";
|
|
8
8
|
import { callGeminiApi, preflightGeminiApi, getGeminiRateGuardSnapshot, } from "./upstream/gemini-api.js";
|
|
9
9
|
import { callAntigravityApi, preflightAntigravityApi, getAntigravityRateGuardSnapshot, } from "./upstream/antigravity-api.js";
|
|
@@ -161,7 +161,7 @@ function extractMessageText(content) {
|
|
|
161
161
|
function messagesToPrompt(messages) {
|
|
162
162
|
return messages.map((m) => extractMessageText(m.content)).join("\n");
|
|
163
163
|
}
|
|
164
|
-
async function executeRelayRequest(request, config) {
|
|
164
|
+
async function executeRelayRequest(request, config, sendChunk) {
|
|
165
165
|
const { request_id, max_budget_usd } = request;
|
|
166
166
|
const cliType = request.cli_type ?? config.relay.cli_type;
|
|
167
167
|
const model = request.model ?? config.relay.model;
|
|
@@ -213,11 +213,38 @@ async function executeRelayRequest(request, config) {
|
|
|
213
213
|
});
|
|
214
214
|
}
|
|
215
215
|
else {
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
216
|
+
// Claude: two modes.
|
|
217
|
+
//
|
|
218
|
+
// 1. PASSTHROUGH (preferred when Hub supplies request.passthrough_body):
|
|
219
|
+
// the Hub is acting as a transparent ANTHROPIC_BASE_URL proxy for a
|
|
220
|
+
// real Claude Code (or anthropic-SDK) client. Forward the buyer's
|
|
221
|
+
// actual body — tools, multi-turn messages, thinking, system, etc.
|
|
222
|
+
// all preserved — with surgical fingerprint rewrites so Anthropic
|
|
223
|
+
// sees a stable per-OAuth-account identity instead of a rotating
|
|
224
|
+
// buyer-identity signal.
|
|
225
|
+
//
|
|
226
|
+
// 2. TEMPLATE (fallback when no passthrough_body): the legacy
|
|
227
|
+
// chat-relay path. Daemon constructs a synthetic single-user-message
|
|
228
|
+
// request body that matches the real CC wire fingerprint exactly,
|
|
229
|
+
// dropping everything the buyer sent except the concatenated prompt
|
|
230
|
+
// text. Used for OpenAI-compatible /v1/chat/completions and any
|
|
231
|
+
// other client that doesn't need real agentic support.
|
|
232
|
+
if (request.passthrough_body) {
|
|
233
|
+
parsed = await callClaudeApiPassthrough({
|
|
234
|
+
clientBody: request.passthrough_body,
|
|
235
|
+
model,
|
|
236
|
+
clientBeta: request.anthropic_beta,
|
|
237
|
+
onRawEvent: sendChunk,
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
else {
|
|
241
|
+
parsed = await callClaudeApi({
|
|
242
|
+
prompt,
|
|
243
|
+
model,
|
|
244
|
+
maxTokens: max_budget_usd ? undefined : 4096,
|
|
245
|
+
onRawEvent: sendChunk,
|
|
246
|
+
});
|
|
247
|
+
}
|
|
221
248
|
}
|
|
222
249
|
const elapsedMs = Date.now() - startMs;
|
|
223
250
|
const answer = parsed.text.replace(/\n/g, " ").slice(0, 80);
|
|
@@ -330,7 +357,20 @@ export function runRelayProvider(cliOverride) {
|
|
|
330
357
|
}
|
|
331
358
|
activeTasks.add(request.request_id);
|
|
332
359
|
logger.info(`Processing relay request=${request.request_id} (active=${activeTasks.size}/${config.relay.concurrency})`);
|
|
333
|
-
|
|
360
|
+
// Per-request SSE chunk forwarder. Each raw Anthropic SSE frame is sent
|
|
361
|
+
// to the Hub as its own WS event so the Hub can relay it straight to the
|
|
362
|
+
// buyer — drops TTFT from "whole response" to "first-token-from-upstream".
|
|
363
|
+
// WS sends are fire-and-forget here; the final relay_response still
|
|
364
|
+
// carries the fully aggregated content as a fallback for Hubs that
|
|
365
|
+
// haven't wired up chunk forwarding yet.
|
|
366
|
+
const sendChunk = (sse) => {
|
|
367
|
+
wsClient.send({
|
|
368
|
+
event: "relay_stream_chunk",
|
|
369
|
+
request_id: request.request_id,
|
|
370
|
+
sse,
|
|
371
|
+
});
|
|
372
|
+
};
|
|
373
|
+
executeRelayRequest(request, config, sendChunk)
|
|
334
374
|
.then((response) => {
|
|
335
375
|
const sent = wsClient.send(response);
|
|
336
376
|
if (sent) {
|
package/dist/relay/types.d.ts
CHANGED
|
@@ -17,6 +17,8 @@ export interface RelayRequest {
|
|
|
17
17
|
stateful?: boolean;
|
|
18
18
|
model?: string;
|
|
19
19
|
max_budget_usd?: number;
|
|
20
|
+
passthrough_body?: Record<string, unknown>;
|
|
21
|
+
anthropic_beta?: string;
|
|
20
22
|
}
|
|
21
23
|
export interface RelayConnectedEvent {
|
|
22
24
|
event: "connected";
|
|
@@ -51,7 +53,12 @@ export interface RelayResponse {
|
|
|
51
53
|
error?: string;
|
|
52
54
|
session_window?: RelayResponseSessionWindow;
|
|
53
55
|
}
|
|
54
|
-
export
|
|
56
|
+
export interface RelayStreamChunkEvent {
|
|
57
|
+
event: "relay_stream_chunk";
|
|
58
|
+
request_id: string;
|
|
59
|
+
sse: string;
|
|
60
|
+
}
|
|
61
|
+
export type RelayOutgoingEvent = RelayResponse | RelayStreamChunkEvent;
|
|
55
62
|
export interface ParsedOutput {
|
|
56
63
|
text: string;
|
|
57
64
|
sessionId: string;
|
|
@@ -27,5 +27,13 @@ export interface CallClaudeApiOptions {
|
|
|
27
27
|
prompt: string;
|
|
28
28
|
model: string;
|
|
29
29
|
maxTokens?: number;
|
|
30
|
+
onRawEvent?: (rawFrame: string) => void;
|
|
30
31
|
}
|
|
31
32
|
export declare function callClaudeApi(opts: CallClaudeApiOptions): Promise<ParsedOutput>;
|
|
33
|
+
export interface CallClaudeApiPassthroughOptions {
|
|
34
|
+
clientBody: Record<string, unknown>;
|
|
35
|
+
model: string;
|
|
36
|
+
clientBeta?: string;
|
|
37
|
+
onRawEvent?: (rawFrame: string) => void;
|
|
38
|
+
}
|
|
39
|
+
export declare function callClaudeApiPassthrough(opts: CallClaudeApiPassthroughOptions): Promise<ParsedOutput>;
|
|
@@ -696,6 +696,12 @@ export async function callClaudeApi(opts) {
|
|
|
696
696
|
configureRateGuard();
|
|
697
697
|
return rateGuard.run(() => doCallClaudeApi(opts));
|
|
698
698
|
}
|
|
699
|
+
export async function callClaudeApiPassthrough(opts) {
|
|
700
|
+
configureDispatcher();
|
|
701
|
+
if (!rateGuard)
|
|
702
|
+
configureRateGuard();
|
|
703
|
+
return rateGuard.run(() => doCallClaudeApiPassthrough(opts));
|
|
704
|
+
}
|
|
699
705
|
// Maximum number of automatic retries on transient upstream errors
|
|
700
706
|
// (429 / 5xx). Matches the Anthropic official SDK default. Does NOT count
|
|
701
707
|
// the initial attempt or the one-shot 401-refresh retry.
|
|
@@ -820,7 +826,10 @@ async function doCallClaudeApi(opts) {
|
|
|
820
826
|
// Stream parser — real Claude Code's main path uses stream:true; see
|
|
821
827
|
// body construction above. parseClaudeSseResponse aggregates text
|
|
822
828
|
// deltas + usage until message_stop, matching SDK semantics.
|
|
823
|
-
|
|
829
|
+
// When opts.onRawEvent is set, each SSE frame is also forwarded
|
|
830
|
+
// verbatim so the Hub can stream it through to the end client in
|
|
831
|
+
// real time instead of waiting for the whole response.
|
|
832
|
+
const parsed = await parseClaudeSseResponse(resp, opts.model, opts.onRawEvent);
|
|
824
833
|
recordSpendFromUsage(parsed, opts.model);
|
|
825
834
|
return parsed;
|
|
826
835
|
}
|
|
@@ -871,6 +880,230 @@ async function doCallClaudeApi(opts) {
|
|
|
871
880
|
throw new Error(`Anthropic ${resp.status}: ${errText.slice(0, 400)}`);
|
|
872
881
|
}
|
|
873
882
|
}
|
|
883
|
+
// ── Passthrough helpers ──────────────────────────────────────────────────
|
|
884
|
+
// Extract the first user message's text content, regardless of whether
|
|
885
|
+
// content is a plain string (OpenAI-style) or an array of content blocks
|
|
886
|
+
// (real Anthropic shape). Used for computing the attribution header FP3.
|
|
887
|
+
function extractFirstUserMessageText(messages) {
|
|
888
|
+
if (!Array.isArray(messages))
|
|
889
|
+
return "";
|
|
890
|
+
for (const msg of messages) {
|
|
891
|
+
if (!msg || typeof msg !== "object")
|
|
892
|
+
continue;
|
|
893
|
+
const m = msg;
|
|
894
|
+
if (m.role !== "user")
|
|
895
|
+
continue;
|
|
896
|
+
const content = m.content;
|
|
897
|
+
if (typeof content === "string")
|
|
898
|
+
return content;
|
|
899
|
+
if (Array.isArray(content)) {
|
|
900
|
+
for (const block of content) {
|
|
901
|
+
if (block &&
|
|
902
|
+
typeof block === "object" &&
|
|
903
|
+
block.type === "text" &&
|
|
904
|
+
typeof block.text === "string") {
|
|
905
|
+
return block.text;
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
return "";
|
|
910
|
+
}
|
|
911
|
+
return "";
|
|
912
|
+
}
|
|
913
|
+
// Merge the fingerprint-required betas with the buyer's anthropic-beta
|
|
914
|
+
// list. Required betas (oauth, claude-code, interleaved-thinking) are
|
|
915
|
+
// non-negotiable — they must be present for an OAuth token to work. The
|
|
916
|
+
// buyer's extras (context-management, advisor-tool, etc.) are appended so
|
|
917
|
+
// newer Claude Code versions can request features our fingerprint file
|
|
918
|
+
// doesn't know about yet. Deduplicates and preserves order.
|
|
919
|
+
function mergeBetas(required, clientBeta) {
|
|
920
|
+
const seen = new Set();
|
|
921
|
+
const out = [];
|
|
922
|
+
for (const b of required) {
|
|
923
|
+
const t = b.trim();
|
|
924
|
+
if (t && !seen.has(t)) {
|
|
925
|
+
seen.add(t);
|
|
926
|
+
out.push(t);
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
if (clientBeta) {
|
|
930
|
+
for (const b of clientBeta.split(",")) {
|
|
931
|
+
const t = b.trim();
|
|
932
|
+
if (t && !seen.has(t)) {
|
|
933
|
+
seen.add(t);
|
|
934
|
+
out.push(t);
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
return out.join(",");
|
|
939
|
+
}
|
|
940
|
+
// Rewrite the first system block's x-anthropic-billing-header (if present)
|
|
941
|
+
// so cc_version and FP3 match OUR fingerprint and the buyer's actual
|
|
942
|
+
// first user message. Real Claude Code always emits this block; sub2api's
|
|
943
|
+
// gateway_service.go mirrors it verbatim but rewrites the version to
|
|
944
|
+
// match the account's pinned UA (syncBillingHeaderVersion, gateway_billing_header.go).
|
|
945
|
+
//
|
|
946
|
+
// Critical because Anthropic's validator expects cc_version.<FP3> where
|
|
947
|
+
// FP3 is a deterministic hash of (message_chars + cli_version). If we
|
|
948
|
+
// leave the buyer's FP3 in place but their UA was a different version
|
|
949
|
+
// from our pinned UA, the FP3 no longer matches cli_version in the header
|
|
950
|
+
// and the validator rejects the request.
|
|
951
|
+
function syncPassthroughBillingHeader(body, fingerprint) {
|
|
952
|
+
if (!Array.isArray(body.system))
|
|
953
|
+
return;
|
|
954
|
+
const system = body.system;
|
|
955
|
+
if (system.length === 0)
|
|
956
|
+
return;
|
|
957
|
+
const firstBlock = system[0];
|
|
958
|
+
if (!firstBlock ||
|
|
959
|
+
typeof firstBlock !== "object" ||
|
|
960
|
+
firstBlock.type !== "text" ||
|
|
961
|
+
typeof firstBlock.text !== "string") {
|
|
962
|
+
return;
|
|
963
|
+
}
|
|
964
|
+
const currentText = firstBlock.text;
|
|
965
|
+
if (!currentText.startsWith("x-anthropic-billing-header:")) {
|
|
966
|
+
// Non-CC client didn't include a billing header — leave system alone.
|
|
967
|
+
// If we're strict about this we could PREPEND one, but for now we
|
|
968
|
+
// only touch what exists so non-CC passthrough (e.g. anthropic SDK
|
|
969
|
+
// direct) works without extra surgery.
|
|
970
|
+
return;
|
|
971
|
+
}
|
|
972
|
+
const firstUserMsg = extractFirstUserMessageText(body.messages);
|
|
973
|
+
const newHeader = buildClaudeAttributionHeader(firstUserMsg, fingerprint.cc_version, fingerprint.cc_entrypoint);
|
|
974
|
+
firstBlock.text = newHeader;
|
|
975
|
+
}
|
|
976
|
+
// Walk system text blocks and rewrite third-party identity sentences
|
|
977
|
+
// (OpenCode, etc.) to the Claude Code banner. sub2api does the same thing
|
|
978
|
+
// in gateway_service.go:sanitizeSystemText — without it Anthropic's
|
|
979
|
+
// system-prompt dice-coefficient validator will 403 the request because
|
|
980
|
+
// the system prompt doesn't score high enough against the known real
|
|
981
|
+
// Claude Code templates.
|
|
982
|
+
function sanitizePassthroughSystemArray(body) {
|
|
983
|
+
if (!Array.isArray(body.system))
|
|
984
|
+
return;
|
|
985
|
+
for (const block of body.system) {
|
|
986
|
+
if (block &&
|
|
987
|
+
typeof block === "object" &&
|
|
988
|
+
block.type === "text" &&
|
|
989
|
+
typeof block.text === "string") {
|
|
990
|
+
block.text = sanitizePrompt(block.text);
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
async function doCallClaudeApiPassthrough(opts) {
|
|
995
|
+
const fingerprint = loadFingerprint();
|
|
996
|
+
autoBumpFingerprintUaVersion();
|
|
997
|
+
// Fresh read after any autobump, since it mutates cachedFingerprint in place.
|
|
998
|
+
const fp = loadFingerprint();
|
|
999
|
+
const sessionId = getMaskedSessionId();
|
|
1000
|
+
// Shallow clone so we don't mutate the buyer's dict on the way back out
|
|
1001
|
+
// of provider.ts — defensive against the Hub reusing the same dict for
|
|
1002
|
+
// multiple dispatch attempts.
|
|
1003
|
+
const body = { ...opts.clientBody };
|
|
1004
|
+
// Normalize model to canonical long-form. Anthropic OAuth rejects the
|
|
1005
|
+
// short form for some versions (e.g. claude-sonnet-4-5 → must be
|
|
1006
|
+
// claude-sonnet-4-5-20250929).
|
|
1007
|
+
body.model = normalizeModel(opts.model);
|
|
1008
|
+
// Force stream:true. Daemon always needs SSE wire format to drive
|
|
1009
|
+
// parseClaudeSseResponse, regardless of what the buyer asked for.
|
|
1010
|
+
body.stream = true;
|
|
1011
|
+
// sub2api drops these on OAuth (gateway_service.go:1082-1092). Keeping
|
|
1012
|
+
// them in the body risks Anthropic flagging the request shape as
|
|
1013
|
+
// non-Claude-Code, since real CC never sends them.
|
|
1014
|
+
delete body.temperature;
|
|
1015
|
+
delete body.tool_choice;
|
|
1016
|
+
// Rewrite metadata.user_id with our masked-session-bound fingerprint
|
|
1017
|
+
// identity. All other metadata fields are preserved.
|
|
1018
|
+
const metadata = body.metadata && typeof body.metadata === "object"
|
|
1019
|
+
? body.metadata
|
|
1020
|
+
: {};
|
|
1021
|
+
body.metadata = {
|
|
1022
|
+
...metadata,
|
|
1023
|
+
user_id: buildMetadataUserID(fp, sessionId),
|
|
1024
|
+
};
|
|
1025
|
+
// Sanitize system: replace third-party identity sentences + sync
|
|
1026
|
+
// billing header cc_version to match our pinned CLI version.
|
|
1027
|
+
sanitizePassthroughSystemArray(body);
|
|
1028
|
+
syncPassthroughBillingHeader(body, fp);
|
|
1029
|
+
// Clamp thinking.budget_tokens to Anthropic's minimum so buyer-chosen
|
|
1030
|
+
// small budgets don't 400. If max_tokens < budget_tokens + 1, bump
|
|
1031
|
+
// max_tokens too so the request stays valid.
|
|
1032
|
+
const thinking = body.thinking;
|
|
1033
|
+
if (thinking && typeof thinking === "object") {
|
|
1034
|
+
const t = thinking;
|
|
1035
|
+
if (t.type === "enabled" && typeof t.budget_tokens === "number") {
|
|
1036
|
+
if (t.budget_tokens < CLAUDE_MIN_THINKING_BUDGET) {
|
|
1037
|
+
t.budget_tokens = CLAUDE_MIN_THINKING_BUDGET;
|
|
1038
|
+
}
|
|
1039
|
+
if (typeof body.max_tokens === "number" &&
|
|
1040
|
+
body.max_tokens <= t.budget_tokens) {
|
|
1041
|
+
body.max_tokens = t.budget_tokens + 1;
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
}
|
|
1045
|
+
// Ensure tools is at least an empty array so request shape matches real
|
|
1046
|
+
// CC (which always sends tools even if empty).
|
|
1047
|
+
if (!Array.isArray(body.tools)) {
|
|
1048
|
+
body.tools = [];
|
|
1049
|
+
}
|
|
1050
|
+
const bodyJson = JSON.stringify(body);
|
|
1051
|
+
// Merge required betas with buyer's betas for the header.
|
|
1052
|
+
const requiredBetas = pickClaudeBetasForModel(opts.model);
|
|
1053
|
+
const mergedBetas = mergeBetas(requiredBetas, opts.clientBeta);
|
|
1054
|
+
let transientAttempt = 0;
|
|
1055
|
+
let hasRefreshed = false;
|
|
1056
|
+
while (true) {
|
|
1057
|
+
const creds = await getFreshCreds();
|
|
1058
|
+
const resp = await fetch(ANTHROPIC_MESSAGES_URL, {
|
|
1059
|
+
method: "POST",
|
|
1060
|
+
headers: {
|
|
1061
|
+
...STATIC_CLAUDE_CODE_HEADERS,
|
|
1062
|
+
"accept": "application/json, text/event-stream",
|
|
1063
|
+
"anthropic-beta": mergedBetas,
|
|
1064
|
+
"user-agent": fp.user_agent,
|
|
1065
|
+
"authorization": `Bearer ${creds.accessToken}`,
|
|
1066
|
+
"x-claude-code-session-id": sessionId,
|
|
1067
|
+
},
|
|
1068
|
+
body: bodyJson,
|
|
1069
|
+
});
|
|
1070
|
+
const sessionWin = extractSessionWindowFromHeaders(resp.headers);
|
|
1071
|
+
if (sessionWin)
|
|
1072
|
+
rateGuard?.setSessionWindow(sessionWin);
|
|
1073
|
+
if (resp.ok) {
|
|
1074
|
+
const parsed = await parseClaudeSseResponse(resp, opts.model, opts.onRawEvent);
|
|
1075
|
+
recordSpendFromUsage(parsed, opts.model);
|
|
1076
|
+
return parsed;
|
|
1077
|
+
}
|
|
1078
|
+
const errText = await resp.text();
|
|
1079
|
+
if (resp.status === 429) {
|
|
1080
|
+
const cooldown = extractCooldownUntilFromHeaders(resp.headers);
|
|
1081
|
+
if (cooldown && rateGuard) {
|
|
1082
|
+
rateGuard.triggerCooldown(cooldown.untilMs, cooldown.reason);
|
|
1083
|
+
}
|
|
1084
|
+
else if (rateGuard) {
|
|
1085
|
+
rateGuard.triggerCooldown(Date.now() + 5 * 60_000, "fallback 5m (no reset header)");
|
|
1086
|
+
}
|
|
1087
|
+
throw new Error(`Anthropic 429 rate-limited: ${errText.slice(0, 300)}`);
|
|
1088
|
+
}
|
|
1089
|
+
if (resp.status === 401 && !hasRefreshed) {
|
|
1090
|
+
logger.warn("[claude-api] 401 from upstream (passthrough), refreshing token + retry");
|
|
1091
|
+
hasRefreshed = true;
|
|
1092
|
+
cachedCreds = null;
|
|
1093
|
+
continue;
|
|
1094
|
+
}
|
|
1095
|
+
const isTransient = resp.status >= 500 && resp.status <= 599;
|
|
1096
|
+
if (isTransient && transientAttempt < MAX_TRANSIENT_RETRIES) {
|
|
1097
|
+
const retryAfter = parseRetryAfterMs(resp.headers.get("retry-after"));
|
|
1098
|
+
const backoffMs = retryAfter ?? 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
|
|
1099
|
+
logger.warn(`[claude-api] ${resp.status} from upstream (passthrough attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${errText.slice(0, 200)}`);
|
|
1100
|
+
await new Promise((r) => setTimeout(r, backoffMs));
|
|
1101
|
+
transientAttempt++;
|
|
1102
|
+
continue;
|
|
1103
|
+
}
|
|
1104
|
+
throw new Error(`Anthropic ${resp.status}: ${errText.slice(0, 400)}`);
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
874
1107
|
function recordSpendFromUsage(parsed, model) {
|
|
875
1108
|
if (!rateGuard)
|
|
876
1109
|
return;
|
|
@@ -911,7 +1144,7 @@ function recordSpendFromUsage(parsed, model) {
|
|
|
911
1144
|
* event: error (upstream error — throw)
|
|
912
1145
|
* data: {"type":"error","error":{"type":"overloaded_error","message":"..."}}
|
|
913
1146
|
*/
|
|
914
|
-
async function parseClaudeSseResponse(resp, fallbackModel) {
|
|
1147
|
+
async function parseClaudeSseResponse(resp, fallbackModel, onRawFrame) {
|
|
915
1148
|
const reader = resp.body?.getReader();
|
|
916
1149
|
if (!reader) {
|
|
917
1150
|
throw new Error("Claude streamGenerateContent returned no body");
|
|
@@ -925,6 +1158,10 @@ async function parseClaudeSseResponse(resp, fallbackModel) {
|
|
|
925
1158
|
let cacheCreation = 0;
|
|
926
1159
|
let cacheRead = 0;
|
|
927
1160
|
let streamError;
|
|
1161
|
+
// Accumulates one SSE frame (everything between blank lines) so we can
|
|
1162
|
+
// emit the full `event: X\ndata: Y\n\n` block via onRawFrame. SSE frames
|
|
1163
|
+
// are terminated by an empty line per the spec.
|
|
1164
|
+
let frameLines = [];
|
|
928
1165
|
const processChunk = (jsonStr) => {
|
|
929
1166
|
const trimmed = jsonStr.trim();
|
|
930
1167
|
if (!trimmed)
|
|
@@ -992,6 +1229,22 @@ async function parseClaudeSseResponse(resp, fallbackModel) {
|
|
|
992
1229
|
break;
|
|
993
1230
|
}
|
|
994
1231
|
};
|
|
1232
|
+
const flushFrame = () => {
|
|
1233
|
+
if (frameLines.length === 0)
|
|
1234
|
+
return;
|
|
1235
|
+
// Forward the raw SSE frame verbatim so consumers see it exactly as
|
|
1236
|
+
// Anthropic emitted it (including the event: name line, which Claude
|
|
1237
|
+
// Code's SDK parser uses as the dispatch key).
|
|
1238
|
+
if (onRawFrame) {
|
|
1239
|
+
onRawFrame(frameLines.join("\n") + "\n\n");
|
|
1240
|
+
}
|
|
1241
|
+
for (const line of frameLines) {
|
|
1242
|
+
if (line.startsWith("data:")) {
|
|
1243
|
+
processChunk(line.slice(5));
|
|
1244
|
+
}
|
|
1245
|
+
}
|
|
1246
|
+
frameLines = [];
|
|
1247
|
+
};
|
|
995
1248
|
while (true) {
|
|
996
1249
|
const { value, done } = await reader.read();
|
|
997
1250
|
if (done)
|
|
@@ -1001,19 +1254,18 @@ async function parseClaudeSseResponse(resp, fallbackModel) {
|
|
|
1001
1254
|
while ((newlineIdx = buffer.indexOf("\n")) >= 0) {
|
|
1002
1255
|
const line = buffer.slice(0, newlineIdx).replace(/\r$/, "");
|
|
1003
1256
|
buffer = buffer.slice(newlineIdx + 1);
|
|
1004
|
-
if (
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1257
|
+
if (line === "") {
|
|
1258
|
+
// Blank line = end of SSE frame.
|
|
1259
|
+
flushFrame();
|
|
1260
|
+
}
|
|
1261
|
+
else {
|
|
1262
|
+
frameLines.push(line);
|
|
1010
1263
|
}
|
|
1011
1264
|
}
|
|
1012
1265
|
}
|
|
1013
|
-
// Flush trailing
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
}
|
|
1266
|
+
// Flush any trailing frame without a final blank line. Rare, but SSE
|
|
1267
|
+
// allows a stream to end without a terminating \n\n.
|
|
1268
|
+
flushFrame();
|
|
1017
1269
|
if (streamError) {
|
|
1018
1270
|
throw new Error(`Anthropic stream error: ${streamError.type ?? "unknown"} — ${streamError.message ?? ""}`);
|
|
1019
1271
|
}
|