@threaded/ai 1.0.25 → 1.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +15 -0
- package/.lore +65 -0
- package/dist/index.cjs +138 -17
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +11 -1
- package/dist/index.d.ts +11 -1
- package/dist/index.js +137 -17
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"WebSearch",
|
|
5
|
+
"WebFetch(domain:platform.openai.com)",
|
|
6
|
+
"WebFetch(domain:docs.anthropic.com)",
|
|
7
|
+
"WebFetch(domain:ai.google.dev)",
|
|
8
|
+
"WebFetch(domain:docs.x.ai)",
|
|
9
|
+
"WebFetch(domain:docs.cloud.google.com)",
|
|
10
|
+
"WebFetch(domain:latenode.com)",
|
|
11
|
+
"WebFetch(domain:github.com)",
|
|
12
|
+
"WebFetch(domain:docs.aimlapi.com)"
|
|
13
|
+
]
|
|
14
|
+
}
|
|
15
|
+
}
|
package/.lore
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
-- core pattern --
|
|
2
|
+
|
|
3
|
+
everything is built on compose/scope/model. compose chains steps, scope isolates context, model calls an LLM.
|
|
4
|
+
|
|
5
|
+
```js
|
|
6
|
+
const workflow = compose(
|
|
7
|
+
scope({ system: "...", tools: [...] }, model({ model: "openai/gpt-4o" }))
|
|
8
|
+
)
|
|
9
|
+
const ctx = await workflow("user message")
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
-- key setup --
|
|
13
|
+
|
|
14
|
+
must call setKeys() before any model() call, or set env vars (OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, XAI_API_KEY)
|
|
15
|
+
|
|
16
|
+
```js
|
|
17
|
+
import { setKeys } from "@threaded/ai"
|
|
18
|
+
setKeys({ openai: process.env.OPENAI_API_KEY })
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
-- model naming --
|
|
22
|
+
|
|
23
|
+
always "provider/model-name": openai/gpt-4o, anthropic/claude-sonnet-4-5-20250929, google/gemini-2.5-flash, xai/grok-4-1-fast-non-reasoning. no prefix defaults to huggingface.
|
|
24
|
+
|
|
25
|
+
-- structured output --
|
|
26
|
+
|
|
27
|
+
pass schema (zod or json schema) to model(). response comes back as JSON in lastResponse.content. claude wraps it in markdown code fences - strip them before JSON.parse.
|
|
28
|
+
|
|
29
|
+
```js
|
|
30
|
+
function extractResult(ctx) {
|
|
31
|
+
const last = ctx.history?.findLast(m => m.role === 'assistant')
|
|
32
|
+
if (!last?.content) return null
|
|
33
|
+
let text = typeof last.content === 'string' ? last.content : last.content[0]?.text || ''
|
|
34
|
+
const fenced = text.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/)
|
|
35
|
+
if (fenced) text = fenced[1]
|
|
36
|
+
return JSON.parse(text.trim())
|
|
37
|
+
}
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
-- pitfalls --
|
|
41
|
+
|
|
42
|
+
do NOT use Inherit.Nothing - it drops the user prompt entirely. use Inherit.Conversation (default) instead.
|
|
43
|
+
|
|
44
|
+
until: noToolsCalled() loops can cause runaway API calls (5-6+ per invocation). use maxCalls() on tools to limit.
|
|
45
|
+
|
|
46
|
+
silent scopes run analysis without polluting chat history but still accumulate token usage on ctx.usage.
|
|
47
|
+
|
|
48
|
+
events are local only in the scope where stream callback is set. nested silent scopes with their own stream get their own events.
|
|
49
|
+
|
|
50
|
+
-- rate limiting --
|
|
51
|
+
|
|
52
|
+
anthropic tier 1 has very low limits (30K input tokens/min). when using anthropic, add delays between calls and use maxCalls() aggressively. 90s backoff on rate limit errors.
|
|
53
|
+
|
|
54
|
+
-- token usage --
|
|
55
|
+
|
|
56
|
+
ctx.usage accumulates across all model() calls, tool loops, and nested scopes. always available after workflow completes:
|
|
57
|
+
{ promptTokens, completionTokens, totalTokens }
|
|
58
|
+
|
|
59
|
+
-- integration patterns --
|
|
60
|
+
|
|
61
|
+
SSE streaming: pass stream callback in scope config, write events to res as `data: ${JSON.stringify(event)}\n\n`
|
|
62
|
+
|
|
63
|
+
tool approval: use toolConfig.requireApproval + approvalCallback for gating dangerous tool execution in web UIs
|
|
64
|
+
|
|
65
|
+
MCP tools: createMCPTools(client) converts MCP server tools to native format. tools are prefixed with server name.
|
package/dist/index.cjs
CHANGED
|
@@ -33,6 +33,7 @@ __export(index_exports, {
|
|
|
33
33
|
IMAGE_EDIT_MODEL_SCHEMA: () => IMAGE_EDIT_MODEL_SCHEMA,
|
|
34
34
|
IMAGE_MODEL_SCHEMA: () => IMAGE_MODEL_SCHEMA,
|
|
35
35
|
Inherit: () => Inherit,
|
|
36
|
+
addUsage: () => addUsage,
|
|
36
37
|
appendToLastRequest: () => appendToLastRequest,
|
|
37
38
|
compose: () => compose,
|
|
38
39
|
convertMCPSchemaToToolSchema: () => convertMCPSchemaToToolSchema,
|
|
@@ -222,6 +223,11 @@ var maxCalls = (toolConfig, maxCalls2) => ({
|
|
|
222
223
|
...toolConfig,
|
|
223
224
|
_maxCalls: maxCalls2
|
|
224
225
|
});
|
|
226
|
+
var addUsage = (existing, promptTokens, completionTokens, totalTokens) => ({
|
|
227
|
+
promptTokens: (existing?.promptTokens || 0) + promptTokens,
|
|
228
|
+
completionTokens: (existing?.completionTokens || 0) + completionTokens,
|
|
229
|
+
totalTokens: (existing?.totalTokens || 0) + totalTokens
|
|
230
|
+
});
|
|
225
231
|
|
|
226
232
|
// src/embed.ts
|
|
227
233
|
var modelCache = /* @__PURE__ */ new Map();
|
|
@@ -533,7 +539,8 @@ var callOpenAI = async (config, ctx) => {
|
|
|
533
539
|
const body = {
|
|
534
540
|
model: model2,
|
|
535
541
|
messages,
|
|
536
|
-
stream: !!ctx.stream
|
|
542
|
+
stream: !!ctx.stream,
|
|
543
|
+
...ctx.stream && { stream_options: { include_usage: true } }
|
|
537
544
|
};
|
|
538
545
|
if (schema) {
|
|
539
546
|
body.response_format = {
|
|
@@ -578,7 +585,8 @@ var callOpenAI = async (config, ctx) => {
|
|
|
578
585
|
return {
|
|
579
586
|
...ctx,
|
|
580
587
|
lastResponse: msg,
|
|
581
|
-
history: [...ctx.history, msg]
|
|
588
|
+
history: [...ctx.history, msg],
|
|
589
|
+
usage: addUsage(ctx.usage, data.usage?.prompt_tokens || 0, data.usage?.completion_tokens || 0, data.usage?.total_tokens || 0)
|
|
582
590
|
};
|
|
583
591
|
};
|
|
584
592
|
var handleOpenAIStream = async (response, ctx) => {
|
|
@@ -587,6 +595,7 @@ var handleOpenAIStream = async (response, ctx) => {
|
|
|
587
595
|
let fullContent = "";
|
|
588
596
|
let toolCalls = [];
|
|
589
597
|
let buffer = "";
|
|
598
|
+
let streamUsage = null;
|
|
590
599
|
try {
|
|
591
600
|
while (true) {
|
|
592
601
|
if (ctx.abortSignal?.aborted) {
|
|
@@ -604,6 +613,9 @@ var handleOpenAIStream = async (response, ctx) => {
|
|
|
604
613
|
if (!data) continue;
|
|
605
614
|
try {
|
|
606
615
|
const parsed = JSON.parse(data);
|
|
616
|
+
if (parsed.usage) {
|
|
617
|
+
streamUsage = parsed.usage;
|
|
618
|
+
}
|
|
607
619
|
const delta = parsed.choices?.[0]?.delta;
|
|
608
620
|
if (delta?.content) {
|
|
609
621
|
fullContent += delta.content;
|
|
@@ -629,10 +641,15 @@ var handleOpenAIStream = async (response, ctx) => {
|
|
|
629
641
|
if (toolCalls.length > 0) {
|
|
630
642
|
msg.tool_calls = toolCalls;
|
|
631
643
|
}
|
|
644
|
+
const usage = addUsage(ctx.usage, streamUsage?.prompt_tokens || 0, streamUsage?.completion_tokens || 0, streamUsage?.total_tokens || 0);
|
|
645
|
+
if (ctx.stream && streamUsage) {
|
|
646
|
+
ctx.stream({ type: "usage", usage });
|
|
647
|
+
}
|
|
632
648
|
return {
|
|
633
649
|
...ctx,
|
|
634
650
|
lastResponse: msg,
|
|
635
|
-
history: [...ctx.history, msg]
|
|
651
|
+
history: [...ctx.history, msg],
|
|
652
|
+
usage
|
|
636
653
|
};
|
|
637
654
|
};
|
|
638
655
|
|
|
@@ -768,10 +785,13 @@ Return only the JSON object, no other text or formatting.`;
|
|
|
768
785
|
}
|
|
769
786
|
];
|
|
770
787
|
}
|
|
788
|
+
const inputTokens = data.usage?.input_tokens || 0;
|
|
789
|
+
const outputTokens = data.usage?.output_tokens || 0;
|
|
771
790
|
return {
|
|
772
791
|
...ctx,
|
|
773
792
|
lastResponse: msg,
|
|
774
|
-
history: [...ctx.history, msg]
|
|
793
|
+
history: [...ctx.history, msg],
|
|
794
|
+
usage: addUsage(ctx.usage, inputTokens, outputTokens, inputTokens + outputTokens)
|
|
775
795
|
};
|
|
776
796
|
};
|
|
777
797
|
var handleAnthropicStream = async (response, ctx) => {
|
|
@@ -780,6 +800,8 @@ var handleAnthropicStream = async (response, ctx) => {
|
|
|
780
800
|
let fullContent = "";
|
|
781
801
|
const toolCalls = [];
|
|
782
802
|
let buffer = "";
|
|
803
|
+
let inputTokens = 0;
|
|
804
|
+
let outputTokens = 0;
|
|
783
805
|
try {
|
|
784
806
|
while (true) {
|
|
785
807
|
if (ctx.abortSignal?.aborted) {
|
|
@@ -796,6 +818,12 @@ var handleAnthropicStream = async (response, ctx) => {
|
|
|
796
818
|
if (!data) continue;
|
|
797
819
|
try {
|
|
798
820
|
const parsed = JSON.parse(data);
|
|
821
|
+
if (parsed.type === "message_start" && parsed.message?.usage) {
|
|
822
|
+
inputTokens = parsed.message.usage.input_tokens || 0;
|
|
823
|
+
}
|
|
824
|
+
if (parsed.type === "message_delta" && parsed.usage) {
|
|
825
|
+
outputTokens = parsed.usage.output_tokens || 0;
|
|
826
|
+
}
|
|
799
827
|
if (parsed.type === "content_block_delta" && parsed.delta?.text) {
|
|
800
828
|
fullContent += parsed.delta.text;
|
|
801
829
|
if (ctx.stream) {
|
|
@@ -835,10 +863,15 @@ var handleAnthropicStream = async (response, ctx) => {
|
|
|
835
863
|
if (toolCalls.length > 0) {
|
|
836
864
|
msg.tool_calls = toolCalls.map(({ index, ...tc }) => tc);
|
|
837
865
|
}
|
|
866
|
+
const usage = addUsage(ctx.usage, inputTokens, outputTokens, inputTokens + outputTokens);
|
|
867
|
+
if (ctx.stream && (inputTokens || outputTokens)) {
|
|
868
|
+
ctx.stream({ type: "usage", usage });
|
|
869
|
+
}
|
|
838
870
|
return {
|
|
839
871
|
...ctx,
|
|
840
872
|
lastResponse: msg,
|
|
841
|
-
history: [...ctx.history, msg]
|
|
873
|
+
history: [...ctx.history, msg],
|
|
874
|
+
usage
|
|
842
875
|
};
|
|
843
876
|
};
|
|
844
877
|
|
|
@@ -991,10 +1024,12 @@ var callGoogle = async (config, ctx) => {
|
|
|
991
1024
|
if (toolCalls.length > 0) {
|
|
992
1025
|
msg.tool_calls = toolCalls;
|
|
993
1026
|
}
|
|
1027
|
+
const um = data.usageMetadata;
|
|
994
1028
|
return {
|
|
995
1029
|
...ctx,
|
|
996
1030
|
lastResponse: msg,
|
|
997
|
-
history: [...ctx.history, msg]
|
|
1031
|
+
history: [...ctx.history, msg],
|
|
1032
|
+
usage: addUsage(ctx.usage, um?.promptTokenCount || 0, um?.candidatesTokenCount || 0, um?.totalTokenCount || 0)
|
|
998
1033
|
};
|
|
999
1034
|
};
|
|
1000
1035
|
var handleGoogleStream = async (response, ctx) => {
|
|
@@ -1003,6 +1038,7 @@ var handleGoogleStream = async (response, ctx) => {
|
|
|
1003
1038
|
let fullContent = "";
|
|
1004
1039
|
const toolCalls = [];
|
|
1005
1040
|
let buffer = "";
|
|
1041
|
+
let usageMetadata = null;
|
|
1006
1042
|
try {
|
|
1007
1043
|
while (true) {
|
|
1008
1044
|
if (ctx.abortSignal?.aborted) {
|
|
@@ -1019,6 +1055,9 @@ var handleGoogleStream = async (response, ctx) => {
|
|
|
1019
1055
|
if (!data) continue;
|
|
1020
1056
|
try {
|
|
1021
1057
|
const parsed = JSON.parse(data);
|
|
1058
|
+
if (parsed.usageMetadata) {
|
|
1059
|
+
usageMetadata = parsed.usageMetadata;
|
|
1060
|
+
}
|
|
1022
1061
|
const candidate = parsed.candidates?.[0];
|
|
1023
1062
|
const parts = candidate?.content?.parts || [];
|
|
1024
1063
|
for (const part of parts) {
|
|
@@ -1058,18 +1097,75 @@ var handleGoogleStream = async (response, ctx) => {
|
|
|
1058
1097
|
if (toolCalls.length > 0) {
|
|
1059
1098
|
msg.tool_calls = toolCalls;
|
|
1060
1099
|
}
|
|
1100
|
+
const um = usageMetadata;
|
|
1101
|
+
const usage = addUsage(ctx.usage, um?.promptTokenCount || 0, um?.candidatesTokenCount || 0, um?.totalTokenCount || 0);
|
|
1102
|
+
if (ctx.stream && um) {
|
|
1103
|
+
ctx.stream({ type: "usage", usage });
|
|
1104
|
+
}
|
|
1061
1105
|
return {
|
|
1062
1106
|
...ctx,
|
|
1063
1107
|
lastResponse: msg,
|
|
1064
|
-
history: [...ctx.history, msg]
|
|
1108
|
+
history: [...ctx.history, msg],
|
|
1109
|
+
usage
|
|
1065
1110
|
};
|
|
1066
1111
|
};
|
|
1067
1112
|
|
|
1068
1113
|
// src/providers/huggingface.ts
|
|
1114
|
+
var modelCache2 = /* @__PURE__ */ new Map();
|
|
1115
|
+
var formatMessages = (instructions, history) => {
|
|
1116
|
+
const messages = [];
|
|
1117
|
+
if (instructions) {
|
|
1118
|
+
messages.push({ role: "system", content: instructions });
|
|
1119
|
+
}
|
|
1120
|
+
for (const msg of history) {
|
|
1121
|
+
messages.push({ role: msg.role, content: msg.content });
|
|
1122
|
+
}
|
|
1123
|
+
return messages;
|
|
1124
|
+
};
|
|
1069
1125
|
var callHuggingFace = async (config, ctx) => {
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
)
|
|
1126
|
+
const { model: model2, instructions, schema } = config;
|
|
1127
|
+
const { pipeline } = await import("@huggingface/transformers");
|
|
1128
|
+
if (!modelCache2.has(model2)) {
|
|
1129
|
+
const generator2 = await pipeline("text-generation", model2, {
|
|
1130
|
+
dtype: "q4f16"
|
|
1131
|
+
});
|
|
1132
|
+
modelCache2.set(model2, generator2);
|
|
1133
|
+
}
|
|
1134
|
+
const generator = modelCache2.get(model2);
|
|
1135
|
+
const messages = formatMessages(instructions, ctx.history);
|
|
1136
|
+
if (schema) {
|
|
1137
|
+
const schemaMsg = messages.find((m) => m.role === "system");
|
|
1138
|
+
const schemaInstructions = [
|
|
1139
|
+
"you must respond with valid JSON matching this schema:",
|
|
1140
|
+
JSON.stringify(schema.schema, null, 2),
|
|
1141
|
+
"respond ONLY with the JSON object, no other text."
|
|
1142
|
+
].join("\n");
|
|
1143
|
+
if (schemaMsg) {
|
|
1144
|
+
schemaMsg.content += "\n\n" + schemaInstructions;
|
|
1145
|
+
} else {
|
|
1146
|
+
messages.unshift({ role: "system", content: schemaInstructions });
|
|
1147
|
+
}
|
|
1148
|
+
}
|
|
1149
|
+
const output = await generator(messages, {
|
|
1150
|
+
max_new_tokens: 2048,
|
|
1151
|
+
do_sample: false
|
|
1152
|
+
});
|
|
1153
|
+
const generatedMessages = output[0].generated_text;
|
|
1154
|
+
const lastMessage = generatedMessages.at(-1);
|
|
1155
|
+
const content = lastMessage?.content || "";
|
|
1156
|
+
const msg = {
|
|
1157
|
+
role: "assistant",
|
|
1158
|
+
content
|
|
1159
|
+
};
|
|
1160
|
+
if (ctx.stream) {
|
|
1161
|
+
ctx.stream({ type: "content", content });
|
|
1162
|
+
}
|
|
1163
|
+
return {
|
|
1164
|
+
...ctx,
|
|
1165
|
+
lastResponse: msg,
|
|
1166
|
+
history: [...ctx.history, msg],
|
|
1167
|
+
usage: addUsage(ctx.usage, 0, 0, 0)
|
|
1168
|
+
};
|
|
1073
1169
|
};
|
|
1074
1170
|
|
|
1075
1171
|
// src/providers/xai.ts
|
|
@@ -1110,7 +1206,8 @@ var callXAI = async (config, ctx) => {
|
|
|
1110
1206
|
const body = {
|
|
1111
1207
|
model: model2,
|
|
1112
1208
|
messages,
|
|
1113
|
-
stream: !!ctx.stream
|
|
1209
|
+
stream: !!ctx.stream,
|
|
1210
|
+
...ctx.stream && { stream_options: { include_usage: true } }
|
|
1114
1211
|
};
|
|
1115
1212
|
if (schema) {
|
|
1116
1213
|
body.response_format = {
|
|
@@ -1155,7 +1252,8 @@ var callXAI = async (config, ctx) => {
|
|
|
1155
1252
|
return {
|
|
1156
1253
|
...ctx,
|
|
1157
1254
|
lastResponse: msg,
|
|
1158
|
-
history: [...ctx.history, msg]
|
|
1255
|
+
history: [...ctx.history, msg],
|
|
1256
|
+
usage: addUsage(ctx.usage, data.usage?.prompt_tokens || 0, data.usage?.completion_tokens || 0, data.usage?.total_tokens || 0)
|
|
1159
1257
|
};
|
|
1160
1258
|
};
|
|
1161
1259
|
var handleXAIStream = async (response, ctx) => {
|
|
@@ -1164,6 +1262,7 @@ var handleXAIStream = async (response, ctx) => {
|
|
|
1164
1262
|
let fullContent = "";
|
|
1165
1263
|
let toolCalls = [];
|
|
1166
1264
|
let buffer = "";
|
|
1265
|
+
let streamUsage = null;
|
|
1167
1266
|
try {
|
|
1168
1267
|
while (true) {
|
|
1169
1268
|
if (ctx.abortSignal?.aborted) {
|
|
@@ -1181,6 +1280,9 @@ var handleXAIStream = async (response, ctx) => {
|
|
|
1181
1280
|
if (!data) continue;
|
|
1182
1281
|
try {
|
|
1183
1282
|
const parsed = JSON.parse(data);
|
|
1283
|
+
if (parsed.usage) {
|
|
1284
|
+
streamUsage = parsed.usage;
|
|
1285
|
+
}
|
|
1184
1286
|
const delta = parsed.choices?.[0]?.delta;
|
|
1185
1287
|
if (delta?.content) {
|
|
1186
1288
|
fullContent += delta.content;
|
|
@@ -1206,10 +1308,15 @@ var handleXAIStream = async (response, ctx) => {
|
|
|
1206
1308
|
if (toolCalls.length > 0) {
|
|
1207
1309
|
msg.tool_calls = toolCalls;
|
|
1208
1310
|
}
|
|
1311
|
+
const usage = addUsage(ctx.usage, streamUsage?.prompt_tokens || 0, streamUsage?.completion_tokens || 0, streamUsage?.total_tokens || 0);
|
|
1312
|
+
if (ctx.stream && streamUsage) {
|
|
1313
|
+
ctx.stream({ type: "usage", usage });
|
|
1314
|
+
}
|
|
1209
1315
|
return {
|
|
1210
1316
|
...ctx,
|
|
1211
1317
|
lastResponse: msg,
|
|
1212
|
-
history: [...ctx.history, msg]
|
|
1318
|
+
history: [...ctx.history, msg],
|
|
1319
|
+
usage
|
|
1213
1320
|
};
|
|
1214
1321
|
};
|
|
1215
1322
|
|
|
@@ -1242,7 +1349,8 @@ var callLocal = async (config, ctx) => {
|
|
|
1242
1349
|
const body = {
|
|
1243
1350
|
model: model2,
|
|
1244
1351
|
messages,
|
|
1245
|
-
stream: !!ctx.stream
|
|
1352
|
+
stream: !!ctx.stream,
|
|
1353
|
+
...ctx.stream && { stream_options: { include_usage: true } }
|
|
1246
1354
|
};
|
|
1247
1355
|
if (schema) {
|
|
1248
1356
|
body.response_format = {
|
|
@@ -1290,7 +1398,8 @@ var callLocal = async (config, ctx) => {
|
|
|
1290
1398
|
return {
|
|
1291
1399
|
...ctx,
|
|
1292
1400
|
lastResponse: msg,
|
|
1293
|
-
history: [...ctx.history, msg]
|
|
1401
|
+
history: [...ctx.history, msg],
|
|
1402
|
+
usage: addUsage(ctx.usage, data.usage?.prompt_tokens || 0, data.usage?.completion_tokens || 0, data.usage?.total_tokens || 0)
|
|
1294
1403
|
};
|
|
1295
1404
|
};
|
|
1296
1405
|
var handleLocalStream = async (response, ctx) => {
|
|
@@ -1299,6 +1408,7 @@ var handleLocalStream = async (response, ctx) => {
|
|
|
1299
1408
|
let fullContent = "";
|
|
1300
1409
|
let toolCalls = [];
|
|
1301
1410
|
let buffer = "";
|
|
1411
|
+
let streamUsage = null;
|
|
1302
1412
|
try {
|
|
1303
1413
|
while (true) {
|
|
1304
1414
|
if (ctx.abortSignal?.aborted) {
|
|
@@ -1316,6 +1426,9 @@ var handleLocalStream = async (response, ctx) => {
|
|
|
1316
1426
|
if (!data) continue;
|
|
1317
1427
|
try {
|
|
1318
1428
|
const parsed = JSON.parse(data);
|
|
1429
|
+
if (parsed.usage) {
|
|
1430
|
+
streamUsage = parsed.usage;
|
|
1431
|
+
}
|
|
1319
1432
|
const delta = parsed.choices?.[0]?.delta;
|
|
1320
1433
|
if (delta?.content) {
|
|
1321
1434
|
fullContent += delta.content;
|
|
@@ -1341,10 +1454,15 @@ var handleLocalStream = async (response, ctx) => {
|
|
|
1341
1454
|
if (toolCalls.length > 0) {
|
|
1342
1455
|
msg.tool_calls = toolCalls;
|
|
1343
1456
|
}
|
|
1457
|
+
const usage = addUsage(ctx.usage, streamUsage?.prompt_tokens || 0, streamUsage?.completion_tokens || 0, streamUsage?.total_tokens || 0);
|
|
1458
|
+
if (ctx.stream && streamUsage) {
|
|
1459
|
+
ctx.stream({ type: "usage", usage });
|
|
1460
|
+
}
|
|
1344
1461
|
return {
|
|
1345
1462
|
...ctx,
|
|
1346
1463
|
lastResponse: msg,
|
|
1347
|
-
history: [...ctx.history, msg]
|
|
1464
|
+
history: [...ctx.history, msg],
|
|
1465
|
+
usage
|
|
1348
1466
|
};
|
|
1349
1467
|
};
|
|
1350
1468
|
|
|
@@ -1861,6 +1979,7 @@ var scopeContext = (config, ctx) => {
|
|
|
1861
1979
|
}
|
|
1862
1980
|
scopedCtx.stream = ctx.stream;
|
|
1863
1981
|
scopedCtx.abortSignal = ctx.abortSignal;
|
|
1982
|
+
scopedCtx.usage = ctx.usage;
|
|
1864
1983
|
if (config.tools) {
|
|
1865
1984
|
const toolDefinitions = config.tools.map(toolConfigToToolDefinition);
|
|
1866
1985
|
const toolExecutors = config.tools.reduce(
|
|
@@ -1914,7 +2033,8 @@ var scope = (config, ...steps) => {
|
|
|
1914
2033
|
history: config.silent ? ctx.history : scopedCtx.history,
|
|
1915
2034
|
lastResponse: config.silent ? ctx.lastResponse : scopedCtx.lastResponse,
|
|
1916
2035
|
lastRequest: config.silent ? ctx.lastRequest : scopedCtx.lastRequest,
|
|
1917
|
-
stopReason: config.silent ? ctx.stopReason : scopedCtx.stopReason
|
|
2036
|
+
stopReason: config.silent ? ctx.stopReason : scopedCtx.stopReason,
|
|
2037
|
+
usage: scopedCtx.usage
|
|
1918
2038
|
};
|
|
1919
2039
|
};
|
|
1920
2040
|
};
|
|
@@ -1976,6 +2096,7 @@ var rateLimited = (config) => (fn) => {
|
|
|
1976
2096
|
IMAGE_EDIT_MODEL_SCHEMA,
|
|
1977
2097
|
IMAGE_MODEL_SCHEMA,
|
|
1978
2098
|
Inherit,
|
|
2099
|
+
addUsage,
|
|
1979
2100
|
appendToLastRequest,
|
|
1980
2101
|
compose,
|
|
1981
2102
|
convertMCPSchemaToToolSchema,
|