clawmoney 0.15.5 → 0.15.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/relay/provider.js
CHANGED
|
@@ -161,7 +161,7 @@ function extractMessageText(content) {
|
|
|
161
161
|
function messagesToPrompt(messages) {
|
|
162
162
|
return messages.map((m) => extractMessageText(m.content)).join("\n");
|
|
163
163
|
}
|
|
164
|
-
async function executeRelayRequest(request, config) {
|
|
164
|
+
async function executeRelayRequest(request, config, sendChunk) {
|
|
165
165
|
const { request_id, max_budget_usd } = request;
|
|
166
166
|
const cliType = request.cli_type ?? config.relay.cli_type;
|
|
167
167
|
const model = request.model ?? config.relay.model;
|
|
@@ -217,6 +217,12 @@ async function executeRelayRequest(request, config) {
|
|
|
217
217
|
prompt,
|
|
218
218
|
model,
|
|
219
219
|
maxTokens: max_budget_usd ? undefined : 4096,
|
|
220
|
+
// Forward each raw Anthropic SSE frame to the Hub in real time
|
|
221
|
+
// so the end client sees tokens as they're generated (instead of
|
|
222
|
+
// waiting for the whole response to arrive). Only claude-api has
|
|
223
|
+
// true pass-through streaming today — codex/gemini/antigravity
|
|
224
|
+
// still buffer the full response upstream and emit a single frame.
|
|
225
|
+
onRawEvent: sendChunk,
|
|
220
226
|
});
|
|
221
227
|
}
|
|
222
228
|
const elapsedMs = Date.now() - startMs;
|
|
@@ -330,7 +336,20 @@ export function runRelayProvider(cliOverride) {
|
|
|
330
336
|
}
|
|
331
337
|
activeTasks.add(request.request_id);
|
|
332
338
|
logger.info(`Processing relay request=${request.request_id} (active=${activeTasks.size}/${config.relay.concurrency})`);
|
|
333
|
-
|
|
339
|
+
// Per-request SSE chunk forwarder. Each raw Anthropic SSE frame is sent
|
|
340
|
+
// to the Hub as its own WS event so the Hub can relay it straight to the
|
|
341
|
+
// buyer — drops TTFT from "whole response" to "first-token-from-upstream".
|
|
342
|
+
// WS sends are fire-and-forget here; the final relay_response still
|
|
343
|
+
// carries the fully aggregated content as a fallback for Hubs that
|
|
344
|
+
// haven't wired up chunk forwarding yet.
|
|
345
|
+
const sendChunk = (sse) => {
|
|
346
|
+
wsClient.send({
|
|
347
|
+
event: "relay_stream_chunk",
|
|
348
|
+
request_id: request.request_id,
|
|
349
|
+
sse,
|
|
350
|
+
});
|
|
351
|
+
};
|
|
352
|
+
executeRelayRequest(request, config, sendChunk)
|
|
334
353
|
.then((response) => {
|
|
335
354
|
const sent = wsClient.send(response);
|
|
336
355
|
if (sent) {
|
package/dist/relay/types.d.ts
CHANGED
|
@@ -51,7 +51,12 @@ export interface RelayResponse {
|
|
|
51
51
|
error?: string;
|
|
52
52
|
session_window?: RelayResponseSessionWindow;
|
|
53
53
|
}
|
|
54
|
-
export
|
|
54
|
+
export interface RelayStreamChunkEvent {
|
|
55
|
+
event: "relay_stream_chunk";
|
|
56
|
+
request_id: string;
|
|
57
|
+
sse: string;
|
|
58
|
+
}
|
|
59
|
+
export type RelayOutgoingEvent = RelayResponse | RelayStreamChunkEvent;
|
|
55
60
|
export interface ParsedOutput {
|
|
56
61
|
text: string;
|
|
57
62
|
sessionId: string;
|
|
@@ -820,7 +820,10 @@ async function doCallClaudeApi(opts) {
|
|
|
820
820
|
// Stream parser — real Claude Code's main path uses stream:true; see
|
|
821
821
|
// body construction above. parseClaudeSseResponse aggregates text
|
|
822
822
|
// deltas + usage until message_stop, matching SDK semantics.
|
|
823
|
-
|
|
823
|
+
// When opts.onRawEvent is set, each SSE frame is also forwarded
|
|
824
|
+
// verbatim so the Hub can stream it through to the end client in
|
|
825
|
+
// real time instead of waiting for the whole response.
|
|
826
|
+
const parsed = await parseClaudeSseResponse(resp, opts.model, opts.onRawEvent);
|
|
824
827
|
recordSpendFromUsage(parsed, opts.model);
|
|
825
828
|
return parsed;
|
|
826
829
|
}
|
|
@@ -911,7 +914,7 @@ function recordSpendFromUsage(parsed, model) {
|
|
|
911
914
|
* event: error (upstream error — throw)
|
|
912
915
|
* data: {"type":"error","error":{"type":"overloaded_error","message":"..."}}
|
|
913
916
|
*/
|
|
914
|
-
async function parseClaudeSseResponse(resp, fallbackModel) {
|
|
917
|
+
async function parseClaudeSseResponse(resp, fallbackModel, onRawFrame) {
|
|
915
918
|
const reader = resp.body?.getReader();
|
|
916
919
|
if (!reader) {
|
|
917
920
|
throw new Error("Claude streamGenerateContent returned no body");
|
|
@@ -925,6 +928,10 @@ async function parseClaudeSseResponse(resp, fallbackModel) {
|
|
|
925
928
|
let cacheCreation = 0;
|
|
926
929
|
let cacheRead = 0;
|
|
927
930
|
let streamError;
|
|
931
|
+
// Accumulates one SSE frame (everything between blank lines) so we can
|
|
932
|
+
// emit the full `event: X\ndata: Y\n\n` block via onRawFrame. SSE frames
|
|
933
|
+
// are terminated by an empty line per the spec.
|
|
934
|
+
let frameLines = [];
|
|
928
935
|
const processChunk = (jsonStr) => {
|
|
929
936
|
const trimmed = jsonStr.trim();
|
|
930
937
|
if (!trimmed)
|
|
@@ -992,6 +999,22 @@ async function parseClaudeSseResponse(resp, fallbackModel) {
|
|
|
992
999
|
break;
|
|
993
1000
|
}
|
|
994
1001
|
};
|
|
1002
|
+
const flushFrame = () => {
|
|
1003
|
+
if (frameLines.length === 0)
|
|
1004
|
+
return;
|
|
1005
|
+
// Forward the raw SSE frame verbatim so consumers see it exactly as
|
|
1006
|
+
// Anthropic emitted it (including the event: name line, which Claude
|
|
1007
|
+
// Code's SDK parser uses as the dispatch key).
|
|
1008
|
+
if (onRawFrame) {
|
|
1009
|
+
onRawFrame(frameLines.join("\n") + "\n\n");
|
|
1010
|
+
}
|
|
1011
|
+
for (const line of frameLines) {
|
|
1012
|
+
if (line.startsWith("data:")) {
|
|
1013
|
+
processChunk(line.slice(5));
|
|
1014
|
+
}
|
|
1015
|
+
}
|
|
1016
|
+
frameLines = [];
|
|
1017
|
+
};
|
|
995
1018
|
while (true) {
|
|
996
1019
|
const { value, done } = await reader.read();
|
|
997
1020
|
if (done)
|
|
@@ -1001,19 +1024,18 @@ async function parseClaudeSseResponse(resp, fallbackModel) {
|
|
|
1001
1024
|
while ((newlineIdx = buffer.indexOf("\n")) >= 0) {
|
|
1002
1025
|
const line = buffer.slice(0, newlineIdx).replace(/\r$/, "");
|
|
1003
1026
|
buffer = buffer.slice(newlineIdx + 1);
|
|
1004
|
-
if (
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1027
|
+
if (line === "") {
|
|
1028
|
+
// Blank line = end of SSE frame.
|
|
1029
|
+
flushFrame();
|
|
1030
|
+
}
|
|
1031
|
+
else {
|
|
1032
|
+
frameLines.push(line);
|
|
1010
1033
|
}
|
|
1011
1034
|
}
|
|
1012
1035
|
}
|
|
1013
|
-
// Flush trailing
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
}
|
|
1036
|
+
// Flush any trailing frame without a final blank line. Rare, but SSE
|
|
1037
|
+
// allows a stream to end without a terminating \n\n.
|
|
1038
|
+
flushFrame();
|
|
1017
1039
|
if (streamError) {
|
|
1018
1040
|
throw new Error(`Anthropic stream error: ${streamError.type ?? "unknown"} — ${streamError.message ?? ""}`);
|
|
1019
1041
|
}
|