@yourgpt/llm-sdk 2.1.8 → 2.1.10-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/adapters/index.d.mts +38 -4
  2. package/dist/adapters/index.d.ts +38 -4
  3. package/dist/adapters/index.js +318 -8
  4. package/dist/adapters/index.mjs +318 -8
  5. package/dist/{base-iGi9Va6Z.d.ts → base-DN1EfKnE.d.mts} +2 -1
  6. package/dist/{base-D-U61JaB.d.mts → base-DuUNxtVg.d.ts} +2 -1
  7. package/dist/fallback/index.d.mts +4 -4
  8. package/dist/fallback/index.d.ts +4 -4
  9. package/dist/index.d.mts +7 -7
  10. package/dist/index.d.ts +7 -7
  11. package/dist/index.js +43 -23
  12. package/dist/index.mjs +43 -23
  13. package/dist/providers/anthropic/index.d.mts +3 -3
  14. package/dist/providers/anthropic/index.d.ts +3 -3
  15. package/dist/providers/anthropic/index.js +17 -0
  16. package/dist/providers/anthropic/index.mjs +17 -0
  17. package/dist/providers/azure/index.d.mts +3 -3
  18. package/dist/providers/azure/index.d.ts +3 -3
  19. package/dist/providers/fireworks/index.d.mts +1 -1
  20. package/dist/providers/fireworks/index.d.ts +1 -1
  21. package/dist/providers/google/index.d.mts +3 -3
  22. package/dist/providers/google/index.d.ts +3 -3
  23. package/dist/providers/google/index.js +311 -8
  24. package/dist/providers/google/index.mjs +311 -8
  25. package/dist/providers/ollama/index.d.mts +4 -4
  26. package/dist/providers/ollama/index.d.ts +4 -4
  27. package/dist/providers/openai/index.d.mts +3 -3
  28. package/dist/providers/openai/index.d.ts +3 -3
  29. package/dist/providers/openai/index.js +321 -8
  30. package/dist/providers/openai/index.mjs +321 -8
  31. package/dist/providers/openrouter/index.d.mts +7 -3
  32. package/dist/providers/openrouter/index.d.ts +7 -3
  33. package/dist/providers/openrouter/index.js +601 -11
  34. package/dist/providers/openrouter/index.mjs +601 -11
  35. package/dist/providers/togetherai/index.d.mts +61 -2
  36. package/dist/providers/togetherai/index.d.ts +61 -2
  37. package/dist/providers/togetherai/index.js +1030 -2
  38. package/dist/providers/togetherai/index.mjs +1029 -2
  39. package/dist/providers/xai/index.d.mts +3 -3
  40. package/dist/providers/xai/index.d.ts +3 -3
  41. package/dist/providers/xai/index.js +311 -8
  42. package/dist/providers/xai/index.mjs +311 -8
  43. package/dist/{types-D4YfrQJR.d.mts → types-BNCmlJMs.d.mts} +1 -1
  44. package/dist/{types-DRqxMIjF.d.mts → types-CMMQ8s2O.d.mts} +1 -1
  45. package/dist/{types-CR8mi9I0.d.ts → types-CMvvDo-E.d.mts} +12 -1
  46. package/dist/{types-CR8mi9I0.d.mts → types-CMvvDo-E.d.ts} +12 -1
  47. package/dist/{types-BctsnC3g.d.ts → types-DhktekQ3.d.ts} +1 -1
  48. package/dist/{types-38yolWJn.d.ts → types-Pj-vpmoT.d.ts} +1 -1
  49. package/dist/yourgpt/index.d.mts +1 -1
  50. package/dist/yourgpt/index.d.ts +1 -1
  51. package/package.json +1 -1
@@ -7,6 +7,9 @@ var DEFAULT_MODEL_CONFIG = {
7
7
  jsonMode: true,
8
8
  maxTokens: 128e3
9
9
  };
10
+ function isOpenAIReasoningModel(modelId) {
11
+ return modelId.startsWith("openai/o1") || modelId.startsWith("openai/o3") || modelId.startsWith("openai/o4") || modelId.startsWith("openai/gpt-5");
12
+ }
10
13
  function openrouter(modelId, options = {}) {
11
14
  const apiKey = options.apiKey ?? process.env.OPENROUTER_API_KEY;
12
15
  const baseURL = options.baseURL ?? "https://openrouter.ai/api/v1";
@@ -38,7 +41,7 @@ function openrouter(modelId, options = {}) {
38
41
  supportsTools: modelConfig.tools,
39
42
  supportsStreaming: true,
40
43
  supportsJsonMode: modelConfig.jsonMode,
41
- supportsThinking: false,
44
+ supportsThinking: true,
42
45
  supportsPDF: false,
43
46
  maxTokens: modelConfig.maxTokens,
44
47
  supportedImageTypes: modelConfig.vision ? ["image/png", "image/jpeg", "image/gif", "image/webp"] : []
@@ -81,6 +84,11 @@ function openrouter(modelId, options = {}) {
81
84
  };
82
85
  },
83
86
  async *doStream(params) {
87
+ if (!options.disableThinking && isOpenAIReasoningModel(modelId)) {
88
+ const client3 = await getClient();
89
+ yield* doStreamResponsesAPI(client3, modelId, params);
90
+ return;
91
+ }
84
92
  const client2 = await getClient();
85
93
  const messages = formatMessagesForOpenRouter(params.messages);
86
94
  const requestBody = {
@@ -88,7 +96,8 @@ function openrouter(modelId, options = {}) {
88
96
  messages,
89
97
  temperature: params.temperature,
90
98
  max_tokens: params.maxTokens,
91
- stream: true
99
+ stream: true,
100
+ ...!options.disableThinking ? { reasoning: { max_tokens: 8e3 }, include_reasoning: true } : {}
92
101
  };
93
102
  if (params.tools) {
94
103
  requestBody.tools = params.tools;
@@ -100,6 +109,7 @@ function openrouter(modelId, options = {}) {
100
109
  let currentToolCall = null;
101
110
  let totalPromptTokens = 0;
102
111
  let totalCompletionTokens = 0;
112
+ let orReasoningStarted = false;
103
113
  for await (const chunk of stream) {
104
114
  if (params.signal?.aborted) {
105
115
  yield { type: "error", error: new Error("Aborted") };
@@ -110,6 +120,20 @@ function openrouter(modelId, options = {}) {
110
120
  if (delta?.content) {
111
121
  yield { type: "text-delta", text: delta.content };
112
122
  }
123
+ const rc = delta?.reasoning_content ?? delta?.reasoning ?? null;
124
+ if (rc) {
125
+ const rcText = typeof rc === "string" ? rc : Array.isArray(rc) && rc[0]?.text ? rc[0].text : "";
126
+ if (rcText) {
127
+ if (!orReasoningStarted) {
128
+ yield { type: "thinking:start" };
129
+ orReasoningStarted = true;
130
+ }
131
+ yield { type: "thinking:delta", content: rcText };
132
+ }
133
+ } else if (orReasoningStarted && (delta?.content || choice?.finish_reason)) {
134
+ yield { type: "thinking:end" };
135
+ orReasoningStarted = false;
136
+ }
113
137
  if (delta?.tool_calls) {
114
138
  for (const tc of delta.tool_calls) {
115
139
  if (tc.id) {
@@ -228,6 +252,268 @@ function formatMessagesForOpenRouter(messages) {
228
252
  }
229
253
  });
230
254
  }
255
+ function formatMessagesForResponsesAPI(messages) {
256
+ const out = [];
257
+ for (const msg of messages) {
258
+ if (msg.role === "system") {
259
+ out.push({
260
+ type: "message",
261
+ role: "system",
262
+ content: [
263
+ {
264
+ type: "input_text",
265
+ text: typeof msg.content === "string" ? msg.content : ""
266
+ }
267
+ ]
268
+ });
269
+ continue;
270
+ }
271
+ if (msg.role === "user") {
272
+ const parts = [];
273
+ if (typeof msg.content === "string") {
274
+ parts.push({ type: "input_text", text: msg.content });
275
+ } else {
276
+ for (const part of msg.content) {
277
+ if (part.type === "text") {
278
+ parts.push({ type: "input_text", text: part.text });
279
+ } else if (part.type === "image") {
280
+ const imageData = typeof part.image === "string" ? part.image : Buffer.from(part.image).toString("base64");
281
+ const url = imageData.startsWith("data:") ? imageData : `data:${part.mimeType ?? "image/png"};base64,${imageData}`;
282
+ parts.push({ type: "input_image", image_url: url });
283
+ }
284
+ }
285
+ }
286
+ out.push({ type: "message", role: "user", content: parts });
287
+ continue;
288
+ }
289
+ if (msg.role === "assistant") {
290
+ if (msg.toolCalls && msg.toolCalls.length > 0) {
291
+ for (const tc of msg.toolCalls) {
292
+ out.push({
293
+ type: "function_call",
294
+ call_id: tc.id,
295
+ name: tc.name,
296
+ arguments: JSON.stringify(tc.args ?? {})
297
+ });
298
+ }
299
+ if (typeof msg.content === "string" && msg.content.length > 0) {
300
+ out.push({
301
+ type: "message",
302
+ role: "assistant",
303
+ content: [{ type: "output_text", text: msg.content }]
304
+ });
305
+ }
306
+ } else {
307
+ const text = typeof msg.content === "string" ? msg.content : "";
308
+ out.push({
309
+ type: "message",
310
+ role: "assistant",
311
+ content: [{ type: "output_text", text }]
312
+ });
313
+ }
314
+ continue;
315
+ }
316
+ if (msg.role === "tool") {
317
+ out.push({
318
+ type: "function_call_output",
319
+ call_id: msg.toolCallId,
320
+ output: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content)
321
+ });
322
+ continue;
323
+ }
324
+ }
325
+ return out;
326
+ }
327
+ function formatToolsForResponsesAPI(tools) {
328
+ if (!tools || tools.length === 0) return void 0;
329
+ return tools.map((t) => {
330
+ if (t?.name && t?.parameters && t?.type === "function") return t;
331
+ const fn = t?.function ?? t;
332
+ return {
333
+ type: "function",
334
+ name: fn.name,
335
+ description: fn.description,
336
+ parameters: fn.parameters ?? { type: "object", properties: {} }
337
+ };
338
+ });
339
+ }
340
+ async function* doStreamResponsesAPI(client, modelId, params) {
341
+ const systemTexts = [];
342
+ const nonSystem = [];
343
+ for (const m of params.messages) {
344
+ if (m.role === "system" && typeof m.content === "string") {
345
+ systemTexts.push(m.content);
346
+ } else {
347
+ nonSystem.push(m);
348
+ }
349
+ }
350
+ const instructions = systemTexts.join("\n\n") || void 0;
351
+ const input = formatMessagesForResponsesAPI(nonSystem);
352
+ const requestBody = {
353
+ model: modelId,
354
+ input,
355
+ stream: true,
356
+ reasoning: { effort: "medium", summary: "auto" }
357
+ };
358
+ if (instructions) requestBody.instructions = instructions;
359
+ if (typeof params.maxTokens === "number")
360
+ requestBody.max_output_tokens = params.maxTokens;
361
+ if (typeof params.temperature === "number")
362
+ requestBody.temperature = params.temperature;
363
+ const tools = formatToolsForResponsesAPI(params.tools);
364
+ if (tools) requestBody.tools = tools;
365
+ let stream;
366
+ try {
367
+ stream = await client.responses.create(requestBody);
368
+ } catch (err) {
369
+ yield {
370
+ type: "error",
371
+ error: err instanceof Error ? err : new Error(String(err))
372
+ };
373
+ return;
374
+ }
375
+ const toolCalls = /* @__PURE__ */ new Map();
376
+ let reasoningStarted = false;
377
+ let textStarted = false;
378
+ let totalPromptTokens = 0;
379
+ let totalCompletionTokens = 0;
380
+ let finishEmitted = false;
381
+ for await (const evt of stream) {
382
+ if (params.signal?.aborted) {
383
+ yield { type: "error", error: new Error("Aborted") };
384
+ return;
385
+ }
386
+ const t = evt?.type ?? "";
387
+ if (t === "response.reasoning_summary_text.delta") {
388
+ const delta = evt.delta ?? "";
389
+ if (!delta) continue;
390
+ if (!reasoningStarted) {
391
+ yield { type: "thinking:start" };
392
+ reasoningStarted = true;
393
+ }
394
+ yield { type: "thinking:delta", content: delta };
395
+ continue;
396
+ }
397
+ if (t === "response.reasoning_summary_text.done" || t === "response.reasoning.done") {
398
+ continue;
399
+ }
400
+ if (t === "response.output_text.delta") {
401
+ const text = evt.delta ?? "";
402
+ if (!text) continue;
403
+ if (reasoningStarted && !textStarted) {
404
+ yield { type: "thinking:end" };
405
+ textStarted = true;
406
+ }
407
+ yield { type: "text-delta", text };
408
+ continue;
409
+ }
410
+ if (t === "response.output_item.added") {
411
+ const item = evt.item;
412
+ if (item?.type === "function_call") {
413
+ const id = item.call_id ?? item.id ?? "";
414
+ if (id) {
415
+ toolCalls.set(id, {
416
+ id,
417
+ name: item.name ?? "",
418
+ arguments: item.arguments ?? ""
419
+ });
420
+ }
421
+ }
422
+ continue;
423
+ }
424
+ if (t === "response.function_call_arguments.delta") {
425
+ const id = evt.call_id ?? evt.item_id ?? "";
426
+ const delta = evt.delta ?? "";
427
+ if (!id || !delta) continue;
428
+ const existing = toolCalls.get(id);
429
+ if (existing) {
430
+ existing.arguments += delta;
431
+ } else {
432
+ toolCalls.set(id, { id, name: "", arguments: delta });
433
+ }
434
+ continue;
435
+ }
436
+ if (t === "response.output_item.done") {
437
+ const item = evt.item;
438
+ if (item?.type === "function_call") {
439
+ const id = item.call_id ?? item.id ?? "";
440
+ const tc = toolCalls.get(id);
441
+ const name = tc?.name || item.name || "";
442
+ const argsStr = tc?.arguments || item.arguments || "{}";
443
+ let args = {};
444
+ try {
445
+ args = JSON.parse(argsStr || "{}");
446
+ } catch {
447
+ args = {};
448
+ }
449
+ if (id && name) {
450
+ yield {
451
+ type: "tool-call",
452
+ toolCall: { id, name, args }
453
+ };
454
+ }
455
+ toolCalls.delete(id);
456
+ }
457
+ continue;
458
+ }
459
+ if (t === "response.completed") {
460
+ const usage = evt.response?.usage;
461
+ if (usage) {
462
+ totalPromptTokens = usage.input_tokens ?? 0;
463
+ totalCompletionTokens = usage.output_tokens ?? 0;
464
+ }
465
+ for (const tc of toolCalls.values()) {
466
+ let args = {};
467
+ try {
468
+ args = JSON.parse(tc.arguments || "{}");
469
+ } catch {
470
+ args = {};
471
+ }
472
+ if (tc.id && tc.name) {
473
+ yield {
474
+ type: "tool-call",
475
+ toolCall: { id: tc.id, name: tc.name, args }
476
+ };
477
+ }
478
+ }
479
+ toolCalls.clear();
480
+ if (reasoningStarted && !textStarted) {
481
+ yield { type: "thinking:end" };
482
+ }
483
+ const finishReason = toolCalls.size > 0 ? "tool-calls" : "stop";
484
+ yield {
485
+ type: "finish",
486
+ finishReason,
487
+ usage: {
488
+ promptTokens: totalPromptTokens,
489
+ completionTokens: totalCompletionTokens,
490
+ totalTokens: totalPromptTokens + totalCompletionTokens
491
+ }
492
+ };
493
+ finishEmitted = true;
494
+ continue;
495
+ }
496
+ if (t === "response.error" || t === "error") {
497
+ const msg = evt.error?.message || evt.message || "Responses API error";
498
+ yield { type: "error", error: new Error(msg) };
499
+ return;
500
+ }
501
+ }
502
+ if (!finishEmitted) {
503
+ if (reasoningStarted && !textStarted) {
504
+ yield { type: "thinking:end" };
505
+ }
506
+ yield {
507
+ type: "finish",
508
+ finishReason: "stop",
509
+ usage: {
510
+ promptTokens: totalPromptTokens,
511
+ completionTokens: totalCompletionTokens,
512
+ totalTokens: totalPromptTokens + totalCompletionTokens
513
+ }
514
+ };
515
+ }
516
+ }
231
517
  async function fetchOpenRouterModels(apiKey) {
232
518
  const headers = {
233
519
  "Content-Type": "application/json"
@@ -473,6 +759,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
473
759
  if (baseUrl.includes("generativelanguage.googleapis.com")) return "google";
474
760
  if (baseUrl.includes("x.ai")) return "xai";
475
761
  if (baseUrl.includes("azure")) return "azure";
762
+ if (baseUrl.includes("openrouter.ai")) return "openrouter";
476
763
  return "openai";
477
764
  }
478
765
  async getClient() {
@@ -572,6 +859,256 @@ var OpenAIAdapter = class _OpenAIAdapter {
572
859
  rawResponse: response
573
860
  };
574
861
  }
862
+ /**
863
+ * OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
864
+ * reasoning content on the chat-completions endpoint. To surface reasoning
865
+ * SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
866
+ * Responses API, which streams `response.reasoning_summary_text.delta` events.
867
+ *
868
+ * Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
869
+ * openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
870
+ */
871
+ isOpenAIReasoningModelOnOpenRouter(activeModel) {
872
+ if (this.provider !== "openrouter") return false;
873
+ return activeModel.startsWith("openai/o1") || activeModel.startsWith("openai/o3") || activeModel.startsWith("openai/o4") || activeModel.startsWith("openai/gpt-5");
874
+ }
875
+ /**
876
+ * Convert ActionDefinition[] (the chat-completions tool shape used by the
877
+ * adapter) to the Responses API tool shape.
878
+ */
879
+ buildResponsesToolsFromActions(actions) {
880
+ if (!actions || actions.length === 0) return void 0;
881
+ const formatted = formatTools(actions);
882
+ return formatted.map((t) => ({
883
+ type: "function",
884
+ name: t.function.name,
885
+ description: t.function.description,
886
+ parameters: t.function.parameters
887
+ }));
888
+ }
889
+ /**
890
+ * Streaming Responses API path for OpenAI reasoning models on OpenRouter.
891
+ *
892
+ * Maps Responses API SSE events back to the same StreamEvent shapes the
893
+ * chat-completions path emits, so downstream consumers (processChunk.ts,
894
+ * frontend tool handlers, plan approval, specialist delegations) see
895
+ * identical events regardless of which path produced them.
896
+ *
897
+ * response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
898
+ * response.output_text.delta → message:delta
899
+ * response.output_item.added (function_call) → action:start (queued buffer)
900
+ * response.function_call_arguments.delta → action:args (progressive)
901
+ * response.output_item.done (function_call) → final action:args + action:end
902
+ * response.completed → message:end + done(usage)
903
+ * response.error → error
904
+ */
905
+ async *streamWithResponsesAPI(request, activeModel, messageId) {
906
+ const client = await this.getClient();
907
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
908
+ const payload = {
909
+ model: activeModel,
910
+ input: this.buildResponsesInput(request),
911
+ stream: true,
912
+ reasoning: {
913
+ effort: request.config?.reasoningEffort ?? "medium",
914
+ summary: "auto"
915
+ }
916
+ };
917
+ if (request.systemPrompt) payload.instructions = request.systemPrompt;
918
+ if (typeof maxTokensValue === "number")
919
+ payload.max_output_tokens = maxTokensValue;
920
+ const tools = this.buildResponsesToolsFromActions(request.actions);
921
+ if (tools && tools.length > 0) payload.tools = tools;
922
+ logProviderPayload(
923
+ "openai",
924
+ "responses-api request payload",
925
+ payload,
926
+ request.debug
927
+ );
928
+ let stream;
929
+ try {
930
+ stream = await client.responses.create(payload);
931
+ } catch (error) {
932
+ yield {
933
+ type: "error",
934
+ message: error instanceof Error ? error.message : "Unknown error",
935
+ code: "OPENAI_RESPONSES_ERROR"
936
+ };
937
+ return;
938
+ }
939
+ const toolBuffers = /* @__PURE__ */ new Map();
940
+ const itemIdToCallId = /* @__PURE__ */ new Map();
941
+ let usage;
942
+ let reasoningStarted = false;
943
+ let textStarted = false;
944
+ let finishEmitted = false;
945
+ const resolveCallId = (evt) => {
946
+ if (evt?.call_id) return evt.call_id;
947
+ if (evt?.item_id) return itemIdToCallId.get(evt.item_id) ?? evt.item_id;
948
+ if (evt?.item?.call_id) return evt.item.call_id;
949
+ if (evt?.item?.id) return evt.item.id;
950
+ return "";
951
+ };
952
+ try {
953
+ for await (const evt of stream) {
954
+ logProviderPayload(
955
+ "openai",
956
+ "responses-api stream chunk",
957
+ evt,
958
+ request.debug
959
+ );
960
+ if (request.signal?.aborted) break;
961
+ const t = evt?.type ?? "";
962
+ if (t === "response.reasoning_summary_text.delta") {
963
+ const delta = evt.delta ?? "";
964
+ if (!delta) continue;
965
+ if (!reasoningStarted) {
966
+ yield { type: "thinking:start" };
967
+ reasoningStarted = true;
968
+ }
969
+ yield { type: "thinking:delta", content: delta };
970
+ continue;
971
+ }
972
+ if (t === "response.reasoning_summary_text.done" || t === "response.reasoning.done") {
973
+ continue;
974
+ }
975
+ if (t === "response.output_text.delta") {
976
+ const text = evt.delta ?? "";
977
+ if (!text) continue;
978
+ if (reasoningStarted && !textStarted) {
979
+ yield { type: "thinking:end" };
980
+ textStarted = true;
981
+ }
982
+ yield { type: "message:delta", content: text };
983
+ continue;
984
+ }
985
+ if (t === "response.output_item.added") {
986
+ const item = evt.item;
987
+ if (item?.type === "function_call") {
988
+ const callId = item.call_id ?? item.id ?? "";
989
+ const itemId = item.id ?? callId;
990
+ if (callId) {
991
+ if (itemId && itemId !== callId) {
992
+ itemIdToCallId.set(itemId, callId);
993
+ }
994
+ if (!toolBuffers.has(callId)) {
995
+ toolBuffers.set(callId, {
996
+ id: callId,
997
+ name: item.name ?? "",
998
+ arguments: item.arguments ?? "",
999
+ emittedStart: false
1000
+ });
1001
+ }
1002
+ const buf = toolBuffers.get(callId);
1003
+ if (buf.name && !buf.emittedStart) {
1004
+ yield { type: "action:start", id: buf.id, name: buf.name };
1005
+ buf.emittedStart = true;
1006
+ }
1007
+ }
1008
+ }
1009
+ continue;
1010
+ }
1011
+ if (t === "response.function_call_arguments.delta") {
1012
+ const callId = resolveCallId(evt);
1013
+ const delta = evt.delta ?? "";
1014
+ if (!callId || !delta) continue;
1015
+ let buf = toolBuffers.get(callId);
1016
+ if (!buf) {
1017
+ buf = { id: callId, name: "", arguments: "", emittedStart: false };
1018
+ toolBuffers.set(callId, buf);
1019
+ }
1020
+ buf.arguments += delta;
1021
+ if (buf.emittedStart) {
1022
+ yield {
1023
+ type: "action:args",
1024
+ id: buf.id,
1025
+ args: buf.arguments
1026
+ };
1027
+ }
1028
+ continue;
1029
+ }
1030
+ if (t === "response.output_item.done") {
1031
+ const item = evt.item;
1032
+ if (item?.type === "function_call") {
1033
+ const callId = item.call_id ?? item.id ?? "";
1034
+ const buf = toolBuffers.get(callId);
1035
+ const name = buf?.name || item.name || "";
1036
+ const argsStr = buf?.arguments || item.arguments || "{}";
1037
+ if (callId && name) {
1038
+ if (!buf?.emittedStart) {
1039
+ yield { type: "action:start", id: callId, name };
1040
+ }
1041
+ yield {
1042
+ type: "action:args",
1043
+ id: callId,
1044
+ args: argsStr
1045
+ };
1046
+ yield {
1047
+ type: "action:end",
1048
+ id: callId,
1049
+ name
1050
+ };
1051
+ }
1052
+ toolBuffers.delete(callId);
1053
+ }
1054
+ continue;
1055
+ }
1056
+ if (t === "response.completed") {
1057
+ const u = evt.response?.usage;
1058
+ if (u) {
1059
+ usage = {
1060
+ prompt_tokens: u.input_tokens ?? 0,
1061
+ completion_tokens: u.output_tokens ?? 0,
1062
+ total_tokens: u.total_tokens ?? (u.input_tokens ?? 0) + (u.output_tokens ?? 0)
1063
+ };
1064
+ }
1065
+ for (const buf of toolBuffers.values()) {
1066
+ if (!buf.id || !buf.name) continue;
1067
+ if (!buf.emittedStart) {
1068
+ yield { type: "action:start", id: buf.id, name: buf.name };
1069
+ }
1070
+ yield {
1071
+ type: "action:args",
1072
+ id: buf.id,
1073
+ args: buf.arguments || "{}"
1074
+ };
1075
+ yield { type: "action:end", id: buf.id, name: buf.name };
1076
+ }
1077
+ toolBuffers.clear();
1078
+ if (reasoningStarted && !textStarted) {
1079
+ yield { type: "thinking:end" };
1080
+ }
1081
+ yield { type: "message:end" };
1082
+ yield { type: "done", usage };
1083
+ finishEmitted = true;
1084
+ continue;
1085
+ }
1086
+ if (t === "response.error" || t === "error") {
1087
+ const msg = evt.error?.message || evt.message || "Responses API error";
1088
+ yield {
1089
+ type: "error",
1090
+ message: msg,
1091
+ code: "OPENAI_RESPONSES_ERROR"
1092
+ };
1093
+ return;
1094
+ }
1095
+ }
1096
+ } catch (error) {
1097
+ yield {
1098
+ type: "error",
1099
+ message: error instanceof Error ? error.message : "Unknown error",
1100
+ code: "OPENAI_RESPONSES_ERROR"
1101
+ };
1102
+ return;
1103
+ }
1104
+ if (!finishEmitted) {
1105
+ if (reasoningStarted && !textStarted) {
1106
+ yield { type: "thinking:end" };
1107
+ }
1108
+ yield { type: "message:end" };
1109
+ yield { type: "done", usage };
1110
+ }
1111
+ }
575
1112
  async completeWithResponses(request) {
576
1113
  const client = await this.getClient();
577
1114
  const openaiToolOptions = request.providerToolOptions?.openai;
@@ -705,16 +1242,37 @@ var OpenAIAdapter = class _OpenAIAdapter {
705
1242
  name: openaiToolOptions.toolChoice.name
706
1243
  }
707
1244
  } : openaiToolOptions?.toolChoice;
1245
+ const isOpenRouter = this.provider === "openrouter";
1246
+ const activeModel = request.config?.model || this.model;
1247
+ const modelSlug = activeModel.replace("openai/", "");
1248
+ const isOSeries = /^o[1-9]/.test(modelSlug);
1249
+ const isOpenAIOnOpenRouter = isOpenRouter && activeModel.startsWith("openai/");
1250
+ if (!this.config.disableThinking && this.isOpenAIReasoningModelOnOpenRouter(activeModel)) {
1251
+ yield* this.streamWithResponsesAPI(request, activeModel, messageId);
1252
+ return;
1253
+ }
1254
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
708
1255
  const payload = {
709
- model: request.config?.model || this.model,
1256
+ model: activeModel,
710
1257
  messages,
711
1258
  tools: tools.length > 0 ? tools : void 0,
712
1259
  tool_choice: tools.length > 0 ? toolChoice : void 0,
713
1260
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
714
- temperature: request.config?.temperature ?? this.config.temperature,
715
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
716
1261
  stream: true,
717
- stream_options: { include_usage: true }
1262
+ stream_options: { include_usage: true },
1263
+ // o-series: use max_completion_tokens + reasoning_effort, no temperature
1264
+ // regular models: use max_tokens + temperature
1265
+ ...isOSeries ? {
1266
+ max_completion_tokens: maxTokensValue,
1267
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
1268
+ } : {
1269
+ temperature: request.config?.temperature ?? this.config.temperature,
1270
+ max_tokens: maxTokensValue
1271
+ },
1272
+ // Non-OpenAI OpenRouter models support OR's reasoning/include_reasoning params.
1273
+ // When disableThinking=true we must explicitly send include_reasoning:false because
1274
+ // models like Qwen3 and DeepSeek-R1 reason by default even without the reasoning param.
1275
+ ...isOpenRouter && !isOpenAIOnOpenRouter ? this.config.disableThinking ? { include_reasoning: false } : { reasoning: { max_tokens: 8e3 }, include_reasoning: true } : {}
718
1276
  };
719
1277
  logProviderPayload("openai", "request payload", payload, request.debug);
720
1278
  const stream = await client.chat.completions.create(payload);
@@ -722,6 +1280,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
722
1280
  const collectedCitations = [];
723
1281
  let citationIndex = 0;
724
1282
  let usage;
1283
+ let adapterReasoningStarted = false;
725
1284
  for await (const chunk of stream) {
726
1285
  logProviderPayload("openai", "stream chunk", chunk, request.debug);
727
1286
  if (request.signal?.aborted) {
@@ -732,6 +1291,22 @@ var OpenAIAdapter = class _OpenAIAdapter {
732
1291
  if (delta?.content) {
733
1292
  yield { type: "message:delta", content: delta.content };
734
1293
  }
1294
+ if (isOpenRouter) {
1295
+ const rc = delta?.reasoning_content ?? delta?.reasoning ?? null;
1296
+ if (rc) {
1297
+ const rcText = typeof rc === "string" ? rc : Array.isArray(rc) && rc[0]?.text ? rc[0].text : "";
1298
+ if (rcText) {
1299
+ if (!adapterReasoningStarted) {
1300
+ yield { type: "thinking:start" };
1301
+ adapterReasoningStarted = true;
1302
+ }
1303
+ yield { type: "thinking:delta", content: rcText };
1304
+ }
1305
+ } else if (adapterReasoningStarted && (delta?.content || choice?.finish_reason)) {
1306
+ yield { type: "thinking:end" };
1307
+ adapterReasoningStarted = false;
1308
+ }
1309
+ }
735
1310
  const annotations = delta?.annotations;
736
1311
  if (annotations && annotations.length > 0) {
737
1312
  for (const annotation of annotations) {
@@ -779,6 +1354,11 @@ var OpenAIAdapter = class _OpenAIAdapter {
779
1354
  };
780
1355
  } else if (currentToolCall && toolCall.function?.arguments) {
781
1356
  currentToolCall.arguments += toolCall.function.arguments;
1357
+ yield {
1358
+ type: "action:args",
1359
+ id: currentToolCall.id,
1360
+ args: currentToolCall.arguments
1361
+ };
782
1362
  }
783
1363
  }
784
1364
  }
@@ -854,15 +1434,24 @@ var OpenAIAdapter = class _OpenAIAdapter {
854
1434
  name: openaiToolOptions.toolChoice.name
855
1435
  }
856
1436
  } : openaiToolOptions?.toolChoice;
1437
+ const activeModel2 = request.config?.model || this.model;
1438
+ const modelSlug2 = activeModel2.replace("openai/", "");
1439
+ const isOSeries2 = /^o[1-9]/.test(modelSlug2);
1440
+ const maxTokensValue2 = request.config?.maxTokens ?? this.config.maxTokens;
857
1441
  const payload = {
858
- model: request.config?.model || this.model,
1442
+ model: activeModel2,
859
1443
  messages,
860
1444
  tools: tools.length > 0 ? tools : void 0,
861
1445
  tool_choice: tools.length > 0 ? toolChoice : void 0,
862
1446
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
863
- temperature: request.config?.temperature ?? this.config.temperature,
864
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
865
- stream: false
1447
+ stream: false,
1448
+ ...isOSeries2 ? {
1449
+ max_completion_tokens: maxTokensValue2,
1450
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
1451
+ } : {
1452
+ temperature: request.config?.temperature ?? this.config.temperature,
1453
+ max_tokens: maxTokensValue2
1454
+ }
866
1455
  };
867
1456
  logProviderPayload("openai", "request payload", payload, request.debug);
868
1457
  const response = await client.chat.completions.create(payload);
@@ -945,7 +1534,8 @@ function createOpenRouter(config = {}) {
945
1534
  return createOpenAIAdapter({
946
1535
  apiKey,
947
1536
  model: modelId,
948
- baseUrl
1537
+ baseUrl,
1538
+ disableThinking: config.disableThinking
949
1539
  });
950
1540
  };
951
1541
  const getCapabilities = (modelId) => {