@threaded/ai 1.0.25 → 1.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "WebSearch",
5
+ "WebFetch(domain:platform.openai.com)",
6
+ "WebFetch(domain:docs.anthropic.com)",
7
+ "WebFetch(domain:ai.google.dev)",
8
+ "WebFetch(domain:docs.x.ai)",
9
+ "WebFetch(domain:docs.cloud.google.com)",
10
+ "WebFetch(domain:latenode.com)",
11
+ "WebFetch(domain:github.com)",
12
+ "WebFetch(domain:docs.aimlapi.com)"
13
+ ]
14
+ }
15
+ }
package/.lore ADDED
@@ -0,0 +1,65 @@
1
+ -- core pattern --
2
+
3
+ everything is built on compose/scope/model. compose chains steps, scope isolates context, model calls an LLM.
4
+
5
+ ```js
6
+ const workflow = compose(
7
+ scope({ system: "...", tools: [...] }, model({ model: "openai/gpt-4o" }))
8
+ )
9
+ const ctx = await workflow("user message")
10
+ ```
11
+
12
+ -- key setup --
13
+
14
+ must call setKeys() before any model() call, or set env vars (OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, XAI_API_KEY)
15
+
16
+ ```js
17
+ import { setKeys } from "@threaded/ai"
18
+ setKeys({ openai: process.env.OPENAI_API_KEY })
19
+ ```
20
+
21
+ -- model naming --
22
+
23
+ always "provider/model-name": openai/gpt-4o, anthropic/claude-sonnet-4-5-20250929, google/gemini-2.5-flash, xai/grok-4-1-fast-non-reasoning. no prefix defaults to huggingface.
24
+
25
+ -- structured output --
26
+
27
+ pass schema (zod or json schema) to model(). response comes back as JSON in lastResponse.content. claude wraps it in markdown code fences - strip them before JSON.parse.
28
+
29
+ ```js
30
+ function extractResult(ctx) {
31
+ const last = ctx.history?.findLast(m => m.role === 'assistant')
32
+ if (!last?.content) return null
33
+ let text = typeof last.content === 'string' ? last.content : last.content[0]?.text || ''
34
+ const fenced = text.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/)
35
+ if (fenced) text = fenced[1]
36
+ return JSON.parse(text.trim())
37
+ }
38
+ ```
39
+
40
+ -- pitfalls --
41
+
42
+ do NOT use Inherit.Nothing - it drops the user prompt entirely. use Inherit.Conversation (default) instead.
43
+
44
+ until: noToolsCalled() loops can cause runaway API calls (5-6+ per invocation). use maxCalls() on tools to limit.
45
+
46
+ silent scopes run analysis without polluting chat history but still accumulate token usage on ctx.usage.
47
+
48
+ events are local only in the scope where stream callback is set. nested silent scopes with their own stream get their own events.
49
+
50
+ -- rate limiting --
51
+
52
+ anthropic tier 1 has very low limits (30K input tokens/min). when using anthropic, add delays between calls and use maxCalls() aggressively. 90s backoff on rate limit errors.
53
+
54
+ -- token usage --
55
+
56
+ ctx.usage accumulates across all model() calls, tool loops, and nested scopes. always available after workflow completes:
57
+ { promptTokens, completionTokens, totalTokens }
58
+
59
+ -- integration patterns --
60
+
61
+ SSE streaming: pass stream callback in scope config, write events to res as `data: ${JSON.stringify(event)}\n\n`
62
+
63
+ tool approval: use toolConfig.requireApproval + approvalCallback for gating dangerous tool execution in web UIs
64
+
65
+ MCP tools: createMCPTools(client) converts MCP server tools to native format. tools are prefixed with server name.
package/dist/index.cjs CHANGED
@@ -33,6 +33,7 @@ __export(index_exports, {
33
33
  IMAGE_EDIT_MODEL_SCHEMA: () => IMAGE_EDIT_MODEL_SCHEMA,
34
34
  IMAGE_MODEL_SCHEMA: () => IMAGE_MODEL_SCHEMA,
35
35
  Inherit: () => Inherit,
36
+ addUsage: () => addUsage,
36
37
  appendToLastRequest: () => appendToLastRequest,
37
38
  compose: () => compose,
38
39
  convertMCPSchemaToToolSchema: () => convertMCPSchemaToToolSchema,
@@ -222,6 +223,11 @@ var maxCalls = (toolConfig, maxCalls2) => ({
222
223
  ...toolConfig,
223
224
  _maxCalls: maxCalls2
224
225
  });
226
+ var addUsage = (existing, promptTokens, completionTokens, totalTokens) => ({
227
+ promptTokens: (existing?.promptTokens || 0) + promptTokens,
228
+ completionTokens: (existing?.completionTokens || 0) + completionTokens,
229
+ totalTokens: (existing?.totalTokens || 0) + totalTokens
230
+ });
225
231
 
226
232
  // src/embed.ts
227
233
  var modelCache = /* @__PURE__ */ new Map();
@@ -533,7 +539,8 @@ var callOpenAI = async (config, ctx) => {
533
539
  const body = {
534
540
  model: model2,
535
541
  messages,
536
- stream: !!ctx.stream
542
+ stream: !!ctx.stream,
543
+ ...ctx.stream && { stream_options: { include_usage: true } }
537
544
  };
538
545
  if (schema) {
539
546
  body.response_format = {
@@ -578,7 +585,8 @@ var callOpenAI = async (config, ctx) => {
578
585
  return {
579
586
  ...ctx,
580
587
  lastResponse: msg,
581
- history: [...ctx.history, msg]
588
+ history: [...ctx.history, msg],
589
+ usage: addUsage(ctx.usage, data.usage?.prompt_tokens || 0, data.usage?.completion_tokens || 0, data.usage?.total_tokens || 0)
582
590
  };
583
591
  };
584
592
  var handleOpenAIStream = async (response, ctx) => {
@@ -587,6 +595,7 @@ var handleOpenAIStream = async (response, ctx) => {
587
595
  let fullContent = "";
588
596
  let toolCalls = [];
589
597
  let buffer = "";
598
+ let streamUsage = null;
590
599
  try {
591
600
  while (true) {
592
601
  if (ctx.abortSignal?.aborted) {
@@ -604,6 +613,9 @@ var handleOpenAIStream = async (response, ctx) => {
604
613
  if (!data) continue;
605
614
  try {
606
615
  const parsed = JSON.parse(data);
616
+ if (parsed.usage) {
617
+ streamUsage = parsed.usage;
618
+ }
607
619
  const delta = parsed.choices?.[0]?.delta;
608
620
  if (delta?.content) {
609
621
  fullContent += delta.content;
@@ -629,10 +641,15 @@ var handleOpenAIStream = async (response, ctx) => {
629
641
  if (toolCalls.length > 0) {
630
642
  msg.tool_calls = toolCalls;
631
643
  }
644
+ const usage = addUsage(ctx.usage, streamUsage?.prompt_tokens || 0, streamUsage?.completion_tokens || 0, streamUsage?.total_tokens || 0);
645
+ if (ctx.stream && streamUsage) {
646
+ ctx.stream({ type: "usage", usage });
647
+ }
632
648
  return {
633
649
  ...ctx,
634
650
  lastResponse: msg,
635
- history: [...ctx.history, msg]
651
+ history: [...ctx.history, msg],
652
+ usage
636
653
  };
637
654
  };
638
655
 
@@ -768,10 +785,13 @@ Return only the JSON object, no other text or formatting.`;
768
785
  }
769
786
  ];
770
787
  }
788
+ const inputTokens = data.usage?.input_tokens || 0;
789
+ const outputTokens = data.usage?.output_tokens || 0;
771
790
  return {
772
791
  ...ctx,
773
792
  lastResponse: msg,
774
- history: [...ctx.history, msg]
793
+ history: [...ctx.history, msg],
794
+ usage: addUsage(ctx.usage, inputTokens, outputTokens, inputTokens + outputTokens)
775
795
  };
776
796
  };
777
797
  var handleAnthropicStream = async (response, ctx) => {
@@ -780,6 +800,8 @@ var handleAnthropicStream = async (response, ctx) => {
780
800
  let fullContent = "";
781
801
  const toolCalls = [];
782
802
  let buffer = "";
803
+ let inputTokens = 0;
804
+ let outputTokens = 0;
783
805
  try {
784
806
  while (true) {
785
807
  if (ctx.abortSignal?.aborted) {
@@ -796,6 +818,12 @@ var handleAnthropicStream = async (response, ctx) => {
796
818
  if (!data) continue;
797
819
  try {
798
820
  const parsed = JSON.parse(data);
821
+ if (parsed.type === "message_start" && parsed.message?.usage) {
822
+ inputTokens = parsed.message.usage.input_tokens || 0;
823
+ }
824
+ if (parsed.type === "message_delta" && parsed.usage) {
825
+ outputTokens = parsed.usage.output_tokens || 0;
826
+ }
799
827
  if (parsed.type === "content_block_delta" && parsed.delta?.text) {
800
828
  fullContent += parsed.delta.text;
801
829
  if (ctx.stream) {
@@ -835,10 +863,15 @@ var handleAnthropicStream = async (response, ctx) => {
835
863
  if (toolCalls.length > 0) {
836
864
  msg.tool_calls = toolCalls.map(({ index, ...tc }) => tc);
837
865
  }
866
+ const usage = addUsage(ctx.usage, inputTokens, outputTokens, inputTokens + outputTokens);
867
+ if (ctx.stream && (inputTokens || outputTokens)) {
868
+ ctx.stream({ type: "usage", usage });
869
+ }
838
870
  return {
839
871
  ...ctx,
840
872
  lastResponse: msg,
841
- history: [...ctx.history, msg]
873
+ history: [...ctx.history, msg],
874
+ usage
842
875
  };
843
876
  };
844
877
 
@@ -991,10 +1024,12 @@ var callGoogle = async (config, ctx) => {
991
1024
  if (toolCalls.length > 0) {
992
1025
  msg.tool_calls = toolCalls;
993
1026
  }
1027
+ const um = data.usageMetadata;
994
1028
  return {
995
1029
  ...ctx,
996
1030
  lastResponse: msg,
997
- history: [...ctx.history, msg]
1031
+ history: [...ctx.history, msg],
1032
+ usage: addUsage(ctx.usage, um?.promptTokenCount || 0, um?.candidatesTokenCount || 0, um?.totalTokenCount || 0)
998
1033
  };
999
1034
  };
1000
1035
  var handleGoogleStream = async (response, ctx) => {
@@ -1003,6 +1038,7 @@ var handleGoogleStream = async (response, ctx) => {
1003
1038
  let fullContent = "";
1004
1039
  const toolCalls = [];
1005
1040
  let buffer = "";
1041
+ let usageMetadata = null;
1006
1042
  try {
1007
1043
  while (true) {
1008
1044
  if (ctx.abortSignal?.aborted) {
@@ -1019,6 +1055,9 @@ var handleGoogleStream = async (response, ctx) => {
1019
1055
  if (!data) continue;
1020
1056
  try {
1021
1057
  const parsed = JSON.parse(data);
1058
+ if (parsed.usageMetadata) {
1059
+ usageMetadata = parsed.usageMetadata;
1060
+ }
1022
1061
  const candidate = parsed.candidates?.[0];
1023
1062
  const parts = candidate?.content?.parts || [];
1024
1063
  for (const part of parts) {
@@ -1058,18 +1097,75 @@ var handleGoogleStream = async (response, ctx) => {
1058
1097
  if (toolCalls.length > 0) {
1059
1098
  msg.tool_calls = toolCalls;
1060
1099
  }
1100
+ const um = usageMetadata;
1101
+ const usage = addUsage(ctx.usage, um?.promptTokenCount || 0, um?.candidatesTokenCount || 0, um?.totalTokenCount || 0);
1102
+ if (ctx.stream && um) {
1103
+ ctx.stream({ type: "usage", usage });
1104
+ }
1061
1105
  return {
1062
1106
  ...ctx,
1063
1107
  lastResponse: msg,
1064
- history: [...ctx.history, msg]
1108
+ history: [...ctx.history, msg],
1109
+ usage
1065
1110
  };
1066
1111
  };
1067
1112
 
1068
1113
  // src/providers/huggingface.ts
1114
+ var modelCache2 = /* @__PURE__ */ new Map();
1115
+ var formatMessages = (instructions, history) => {
1116
+ const messages = [];
1117
+ if (instructions) {
1118
+ messages.push({ role: "system", content: instructions });
1119
+ }
1120
+ for (const msg of history) {
1121
+ messages.push({ role: msg.role, content: msg.content });
1122
+ }
1123
+ return messages;
1124
+ };
1069
1125
  var callHuggingFace = async (config, ctx) => {
1070
- throw new Error(
1071
- "Hugging Face provider not yet implemented. Use openai/, anthropic/, or google/ prefixes."
1072
- );
1126
+ const { model: model2, instructions, schema } = config;
1127
+ const { pipeline } = await import("@huggingface/transformers");
1128
+ if (!modelCache2.has(model2)) {
1129
+ const generator2 = await pipeline("text-generation", model2, {
1130
+ dtype: "q4f16"
1131
+ });
1132
+ modelCache2.set(model2, generator2);
1133
+ }
1134
+ const generator = modelCache2.get(model2);
1135
+ const messages = formatMessages(instructions, ctx.history);
1136
+ if (schema) {
1137
+ const schemaMsg = messages.find((m) => m.role === "system");
1138
+ const schemaInstructions = [
1139
+ "you must respond with valid JSON matching this schema:",
1140
+ JSON.stringify(schema.schema, null, 2),
1141
+ "respond ONLY with the JSON object, no other text."
1142
+ ].join("\n");
1143
+ if (schemaMsg) {
1144
+ schemaMsg.content += "\n\n" + schemaInstructions;
1145
+ } else {
1146
+ messages.unshift({ role: "system", content: schemaInstructions });
1147
+ }
1148
+ }
1149
+ const output = await generator(messages, {
1150
+ max_new_tokens: 2048,
1151
+ do_sample: false
1152
+ });
1153
+ const generatedMessages = output[0].generated_text;
1154
+ const lastMessage = generatedMessages.at(-1);
1155
+ const content = lastMessage?.content || "";
1156
+ const msg = {
1157
+ role: "assistant",
1158
+ content
1159
+ };
1160
+ if (ctx.stream) {
1161
+ ctx.stream({ type: "content", content });
1162
+ }
1163
+ return {
1164
+ ...ctx,
1165
+ lastResponse: msg,
1166
+ history: [...ctx.history, msg],
1167
+ usage: addUsage(ctx.usage, 0, 0, 0)
1168
+ };
1073
1169
  };
1074
1170
 
1075
1171
  // src/providers/xai.ts
@@ -1110,7 +1206,8 @@ var callXAI = async (config, ctx) => {
1110
1206
  const body = {
1111
1207
  model: model2,
1112
1208
  messages,
1113
- stream: !!ctx.stream
1209
+ stream: !!ctx.stream,
1210
+ ...ctx.stream && { stream_options: { include_usage: true } }
1114
1211
  };
1115
1212
  if (schema) {
1116
1213
  body.response_format = {
@@ -1155,7 +1252,8 @@ var callXAI = async (config, ctx) => {
1155
1252
  return {
1156
1253
  ...ctx,
1157
1254
  lastResponse: msg,
1158
- history: [...ctx.history, msg]
1255
+ history: [...ctx.history, msg],
1256
+ usage: addUsage(ctx.usage, data.usage?.prompt_tokens || 0, data.usage?.completion_tokens || 0, data.usage?.total_tokens || 0)
1159
1257
  };
1160
1258
  };
1161
1259
  var handleXAIStream = async (response, ctx) => {
@@ -1164,6 +1262,7 @@ var handleXAIStream = async (response, ctx) => {
1164
1262
  let fullContent = "";
1165
1263
  let toolCalls = [];
1166
1264
  let buffer = "";
1265
+ let streamUsage = null;
1167
1266
  try {
1168
1267
  while (true) {
1169
1268
  if (ctx.abortSignal?.aborted) {
@@ -1181,6 +1280,9 @@ var handleXAIStream = async (response, ctx) => {
1181
1280
  if (!data) continue;
1182
1281
  try {
1183
1282
  const parsed = JSON.parse(data);
1283
+ if (parsed.usage) {
1284
+ streamUsage = parsed.usage;
1285
+ }
1184
1286
  const delta = parsed.choices?.[0]?.delta;
1185
1287
  if (delta?.content) {
1186
1288
  fullContent += delta.content;
@@ -1206,10 +1308,15 @@ var handleXAIStream = async (response, ctx) => {
1206
1308
  if (toolCalls.length > 0) {
1207
1309
  msg.tool_calls = toolCalls;
1208
1310
  }
1311
+ const usage = addUsage(ctx.usage, streamUsage?.prompt_tokens || 0, streamUsage?.completion_tokens || 0, streamUsage?.total_tokens || 0);
1312
+ if (ctx.stream && streamUsage) {
1313
+ ctx.stream({ type: "usage", usage });
1314
+ }
1209
1315
  return {
1210
1316
  ...ctx,
1211
1317
  lastResponse: msg,
1212
- history: [...ctx.history, msg]
1318
+ history: [...ctx.history, msg],
1319
+ usage
1213
1320
  };
1214
1321
  };
1215
1322
 
@@ -1242,7 +1349,8 @@ var callLocal = async (config, ctx) => {
1242
1349
  const body = {
1243
1350
  model: model2,
1244
1351
  messages,
1245
- stream: !!ctx.stream
1352
+ stream: !!ctx.stream,
1353
+ ...ctx.stream && { stream_options: { include_usage: true } }
1246
1354
  };
1247
1355
  if (schema) {
1248
1356
  body.response_format = {
@@ -1290,7 +1398,8 @@ var callLocal = async (config, ctx) => {
1290
1398
  return {
1291
1399
  ...ctx,
1292
1400
  lastResponse: msg,
1293
- history: [...ctx.history, msg]
1401
+ history: [...ctx.history, msg],
1402
+ usage: addUsage(ctx.usage, data.usage?.prompt_tokens || 0, data.usage?.completion_tokens || 0, data.usage?.total_tokens || 0)
1294
1403
  };
1295
1404
  };
1296
1405
  var handleLocalStream = async (response, ctx) => {
@@ -1299,6 +1408,7 @@ var handleLocalStream = async (response, ctx) => {
1299
1408
  let fullContent = "";
1300
1409
  let toolCalls = [];
1301
1410
  let buffer = "";
1411
+ let streamUsage = null;
1302
1412
  try {
1303
1413
  while (true) {
1304
1414
  if (ctx.abortSignal?.aborted) {
@@ -1316,6 +1426,9 @@ var handleLocalStream = async (response, ctx) => {
1316
1426
  if (!data) continue;
1317
1427
  try {
1318
1428
  const parsed = JSON.parse(data);
1429
+ if (parsed.usage) {
1430
+ streamUsage = parsed.usage;
1431
+ }
1319
1432
  const delta = parsed.choices?.[0]?.delta;
1320
1433
  if (delta?.content) {
1321
1434
  fullContent += delta.content;
@@ -1341,10 +1454,15 @@ var handleLocalStream = async (response, ctx) => {
1341
1454
  if (toolCalls.length > 0) {
1342
1455
  msg.tool_calls = toolCalls;
1343
1456
  }
1457
+ const usage = addUsage(ctx.usage, streamUsage?.prompt_tokens || 0, streamUsage?.completion_tokens || 0, streamUsage?.total_tokens || 0);
1458
+ if (ctx.stream && streamUsage) {
1459
+ ctx.stream({ type: "usage", usage });
1460
+ }
1344
1461
  return {
1345
1462
  ...ctx,
1346
1463
  lastResponse: msg,
1347
- history: [...ctx.history, msg]
1464
+ history: [...ctx.history, msg],
1465
+ usage
1348
1466
  };
1349
1467
  };
1350
1468
 
@@ -1861,6 +1979,7 @@ var scopeContext = (config, ctx) => {
1861
1979
  }
1862
1980
  scopedCtx.stream = ctx.stream;
1863
1981
  scopedCtx.abortSignal = ctx.abortSignal;
1982
+ scopedCtx.usage = ctx.usage;
1864
1983
  if (config.tools) {
1865
1984
  const toolDefinitions = config.tools.map(toolConfigToToolDefinition);
1866
1985
  const toolExecutors = config.tools.reduce(
@@ -1914,7 +2033,8 @@ var scope = (config, ...steps) => {
1914
2033
  history: config.silent ? ctx.history : scopedCtx.history,
1915
2034
  lastResponse: config.silent ? ctx.lastResponse : scopedCtx.lastResponse,
1916
2035
  lastRequest: config.silent ? ctx.lastRequest : scopedCtx.lastRequest,
1917
- stopReason: config.silent ? ctx.stopReason : scopedCtx.stopReason
2036
+ stopReason: config.silent ? ctx.stopReason : scopedCtx.stopReason,
2037
+ usage: scopedCtx.usage
1918
2038
  };
1919
2039
  };
1920
2040
  };
@@ -1976,6 +2096,7 @@ var rateLimited = (config) => (fn) => {
1976
2096
  IMAGE_EDIT_MODEL_SCHEMA,
1977
2097
  IMAGE_MODEL_SCHEMA,
1978
2098
  Inherit,
2099
+ addUsage,
1979
2100
  appendToLastRequest,
1980
2101
  compose,
1981
2102
  convertMCPSchemaToToolSchema,