@yourgpt/llm-sdk 2.1.9 → 2.1.10-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/adapters/index.d.mts +38 -4
  2. package/dist/adapters/index.d.ts +38 -4
  3. package/dist/adapters/index.js +318 -8
  4. package/dist/adapters/index.mjs +318 -8
  5. package/dist/{base-iGi9Va6Z.d.ts → base-DN1EfKnE.d.mts} +2 -1
  6. package/dist/{base-D-U61JaB.d.mts → base-DuUNxtVg.d.ts} +2 -1
  7. package/dist/fallback/index.d.mts +4 -4
  8. package/dist/fallback/index.d.ts +4 -4
  9. package/dist/index.d.mts +7 -7
  10. package/dist/index.d.ts +7 -7
  11. package/dist/index.js +43 -23
  12. package/dist/index.mjs +43 -23
  13. package/dist/providers/anthropic/index.d.mts +3 -3
  14. package/dist/providers/anthropic/index.d.ts +3 -3
  15. package/dist/providers/anthropic/index.js +17 -0
  16. package/dist/providers/anthropic/index.mjs +17 -0
  17. package/dist/providers/azure/index.d.mts +3 -3
  18. package/dist/providers/azure/index.d.ts +3 -3
  19. package/dist/providers/fireworks/index.d.mts +1 -1
  20. package/dist/providers/fireworks/index.d.ts +1 -1
  21. package/dist/providers/google/index.d.mts +3 -3
  22. package/dist/providers/google/index.d.ts +3 -3
  23. package/dist/providers/google/index.js +311 -8
  24. package/dist/providers/google/index.mjs +311 -8
  25. package/dist/providers/ollama/index.d.mts +4 -4
  26. package/dist/providers/ollama/index.d.ts +4 -4
  27. package/dist/providers/openai/index.d.mts +3 -3
  28. package/dist/providers/openai/index.d.ts +3 -3
  29. package/dist/providers/openai/index.js +321 -8
  30. package/dist/providers/openai/index.mjs +321 -8
  31. package/dist/providers/openrouter/index.d.mts +7 -3
  32. package/dist/providers/openrouter/index.d.ts +7 -3
  33. package/dist/providers/openrouter/index.js +601 -11
  34. package/dist/providers/openrouter/index.mjs +601 -11
  35. package/dist/providers/togetherai/index.d.mts +3 -3
  36. package/dist/providers/togetherai/index.d.ts +3 -3
  37. package/dist/providers/togetherai/index.js +311 -8
  38. package/dist/providers/togetherai/index.mjs +311 -8
  39. package/dist/providers/xai/index.d.mts +3 -3
  40. package/dist/providers/xai/index.d.ts +3 -3
  41. package/dist/providers/xai/index.js +311 -8
  42. package/dist/providers/xai/index.mjs +311 -8
  43. package/dist/{types-D4YfrQJR.d.mts → types-BNCmlJMs.d.mts} +1 -1
  44. package/dist/{types-DRqxMIjF.d.mts → types-CMMQ8s2O.d.mts} +1 -1
  45. package/dist/{types-CR8mi9I0.d.ts → types-CMvvDo-E.d.mts} +12 -1
  46. package/dist/{types-CR8mi9I0.d.mts → types-CMvvDo-E.d.ts} +12 -1
  47. package/dist/{types-BctsnC3g.d.ts → types-DhktekQ3.d.ts} +1 -1
  48. package/dist/{types-38yolWJn.d.ts → types-Pj-vpmoT.d.ts} +1 -1
  49. package/dist/yourgpt/index.d.mts +1 -1
  50. package/dist/yourgpt/index.d.ts +1 -1
  51. package/package.json +1 -1
@@ -5,6 +5,9 @@ var DEFAULT_MODEL_CONFIG = {
5
5
  jsonMode: true,
6
6
  maxTokens: 128e3
7
7
  };
8
+ function isOpenAIReasoningModel(modelId) {
9
+ return modelId.startsWith("openai/o1") || modelId.startsWith("openai/o3") || modelId.startsWith("openai/o4") || modelId.startsWith("openai/gpt-5");
10
+ }
8
11
  function openrouter(modelId, options = {}) {
9
12
  const apiKey = options.apiKey ?? process.env.OPENROUTER_API_KEY;
10
13
  const baseURL = options.baseURL ?? "https://openrouter.ai/api/v1";
@@ -36,7 +39,7 @@ function openrouter(modelId, options = {}) {
36
39
  supportsTools: modelConfig.tools,
37
40
  supportsStreaming: true,
38
41
  supportsJsonMode: modelConfig.jsonMode,
39
- supportsThinking: false,
42
+ supportsThinking: true,
40
43
  supportsPDF: false,
41
44
  maxTokens: modelConfig.maxTokens,
42
45
  supportedImageTypes: modelConfig.vision ? ["image/png", "image/jpeg", "image/gif", "image/webp"] : []
@@ -79,6 +82,11 @@ function openrouter(modelId, options = {}) {
79
82
  };
80
83
  },
81
84
  async *doStream(params) {
85
+ if (!options.disableThinking && isOpenAIReasoningModel(modelId)) {
86
+ const client3 = await getClient();
87
+ yield* doStreamResponsesAPI(client3, modelId, params);
88
+ return;
89
+ }
82
90
  const client2 = await getClient();
83
91
  const messages = formatMessagesForOpenRouter(params.messages);
84
92
  const requestBody = {
@@ -86,7 +94,8 @@ function openrouter(modelId, options = {}) {
86
94
  messages,
87
95
  temperature: params.temperature,
88
96
  max_tokens: params.maxTokens,
89
- stream: true
97
+ stream: true,
98
+ ...!options.disableThinking ? { reasoning: { max_tokens: 8e3 }, include_reasoning: true } : {}
90
99
  };
91
100
  if (params.tools) {
92
101
  requestBody.tools = params.tools;
@@ -98,6 +107,7 @@ function openrouter(modelId, options = {}) {
98
107
  let currentToolCall = null;
99
108
  let totalPromptTokens = 0;
100
109
  let totalCompletionTokens = 0;
110
+ let orReasoningStarted = false;
101
111
  for await (const chunk of stream) {
102
112
  if (params.signal?.aborted) {
103
113
  yield { type: "error", error: new Error("Aborted") };
@@ -108,6 +118,20 @@ function openrouter(modelId, options = {}) {
108
118
  if (delta?.content) {
109
119
  yield { type: "text-delta", text: delta.content };
110
120
  }
121
+ const rc = delta?.reasoning_content ?? delta?.reasoning ?? null;
122
+ if (rc) {
123
+ const rcText = typeof rc === "string" ? rc : Array.isArray(rc) && rc[0]?.text ? rc[0].text : "";
124
+ if (rcText) {
125
+ if (!orReasoningStarted) {
126
+ yield { type: "thinking:start" };
127
+ orReasoningStarted = true;
128
+ }
129
+ yield { type: "thinking:delta", content: rcText };
130
+ }
131
+ } else if (orReasoningStarted && (delta?.content || choice?.finish_reason)) {
132
+ yield { type: "thinking:end" };
133
+ orReasoningStarted = false;
134
+ }
111
135
  if (delta?.tool_calls) {
112
136
  for (const tc of delta.tool_calls) {
113
137
  if (tc.id) {
@@ -226,6 +250,268 @@ function formatMessagesForOpenRouter(messages) {
226
250
  }
227
251
  });
228
252
  }
253
+ function formatMessagesForResponsesAPI(messages) {
254
+ const out = [];
255
+ for (const msg of messages) {
256
+ if (msg.role === "system") {
257
+ out.push({
258
+ type: "message",
259
+ role: "system",
260
+ content: [
261
+ {
262
+ type: "input_text",
263
+ text: typeof msg.content === "string" ? msg.content : ""
264
+ }
265
+ ]
266
+ });
267
+ continue;
268
+ }
269
+ if (msg.role === "user") {
270
+ const parts = [];
271
+ if (typeof msg.content === "string") {
272
+ parts.push({ type: "input_text", text: msg.content });
273
+ } else {
274
+ for (const part of msg.content) {
275
+ if (part.type === "text") {
276
+ parts.push({ type: "input_text", text: part.text });
277
+ } else if (part.type === "image") {
278
+ const imageData = typeof part.image === "string" ? part.image : Buffer.from(part.image).toString("base64");
279
+ const url = imageData.startsWith("data:") ? imageData : `data:${part.mimeType ?? "image/png"};base64,${imageData}`;
280
+ parts.push({ type: "input_image", image_url: url });
281
+ }
282
+ }
283
+ }
284
+ out.push({ type: "message", role: "user", content: parts });
285
+ continue;
286
+ }
287
+ if (msg.role === "assistant") {
288
+ if (msg.toolCalls && msg.toolCalls.length > 0) {
289
+ for (const tc of msg.toolCalls) {
290
+ out.push({
291
+ type: "function_call",
292
+ call_id: tc.id,
293
+ name: tc.name,
294
+ arguments: JSON.stringify(tc.args ?? {})
295
+ });
296
+ }
297
+ if (typeof msg.content === "string" && msg.content.length > 0) {
298
+ out.push({
299
+ type: "message",
300
+ role: "assistant",
301
+ content: [{ type: "output_text", text: msg.content }]
302
+ });
303
+ }
304
+ } else {
305
+ const text = typeof msg.content === "string" ? msg.content : "";
306
+ out.push({
307
+ type: "message",
308
+ role: "assistant",
309
+ content: [{ type: "output_text", text }]
310
+ });
311
+ }
312
+ continue;
313
+ }
314
+ if (msg.role === "tool") {
315
+ out.push({
316
+ type: "function_call_output",
317
+ call_id: msg.toolCallId,
318
+ output: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content)
319
+ });
320
+ continue;
321
+ }
322
+ }
323
+ return out;
324
+ }
325
+ function formatToolsForResponsesAPI(tools) {
326
+ if (!tools || tools.length === 0) return void 0;
327
+ return tools.map((t) => {
328
+ if (t?.name && t?.parameters && t?.type === "function") return t;
329
+ const fn = t?.function ?? t;
330
+ return {
331
+ type: "function",
332
+ name: fn.name,
333
+ description: fn.description,
334
+ parameters: fn.parameters ?? { type: "object", properties: {} }
335
+ };
336
+ });
337
+ }
338
+ async function* doStreamResponsesAPI(client, modelId, params) {
339
+ const systemTexts = [];
340
+ const nonSystem = [];
341
+ for (const m of params.messages) {
342
+ if (m.role === "system" && typeof m.content === "string") {
343
+ systemTexts.push(m.content);
344
+ } else {
345
+ nonSystem.push(m);
346
+ }
347
+ }
348
+ const instructions = systemTexts.join("\n\n") || void 0;
349
+ const input = formatMessagesForResponsesAPI(nonSystem);
350
+ const requestBody = {
351
+ model: modelId,
352
+ input,
353
+ stream: true,
354
+ reasoning: { effort: "medium", summary: "auto" }
355
+ };
356
+ if (instructions) requestBody.instructions = instructions;
357
+ if (typeof params.maxTokens === "number")
358
+ requestBody.max_output_tokens = params.maxTokens;
359
+ if (typeof params.temperature === "number")
360
+ requestBody.temperature = params.temperature;
361
+ const tools = formatToolsForResponsesAPI(params.tools);
362
+ if (tools) requestBody.tools = tools;
363
+ let stream;
364
+ try {
365
+ stream = await client.responses.create(requestBody);
366
+ } catch (err) {
367
+ yield {
368
+ type: "error",
369
+ error: err instanceof Error ? err : new Error(String(err))
370
+ };
371
+ return;
372
+ }
373
+ const toolCalls = /* @__PURE__ */ new Map();
374
+ let reasoningStarted = false;
375
+ let textStarted = false;
376
+ let totalPromptTokens = 0;
377
+ let totalCompletionTokens = 0;
378
+ let finishEmitted = false;
379
+ for await (const evt of stream) {
380
+ if (params.signal?.aborted) {
381
+ yield { type: "error", error: new Error("Aborted") };
382
+ return;
383
+ }
384
+ const t = evt?.type ?? "";
385
+ if (t === "response.reasoning_summary_text.delta") {
386
+ const delta = evt.delta ?? "";
387
+ if (!delta) continue;
388
+ if (!reasoningStarted) {
389
+ yield { type: "thinking:start" };
390
+ reasoningStarted = true;
391
+ }
392
+ yield { type: "thinking:delta", content: delta };
393
+ continue;
394
+ }
395
+ if (t === "response.reasoning_summary_text.done" || t === "response.reasoning.done") {
396
+ continue;
397
+ }
398
+ if (t === "response.output_text.delta") {
399
+ const text = evt.delta ?? "";
400
+ if (!text) continue;
401
+ if (reasoningStarted && !textStarted) {
402
+ yield { type: "thinking:end" };
403
+ textStarted = true;
404
+ }
405
+ yield { type: "text-delta", text };
406
+ continue;
407
+ }
408
+ if (t === "response.output_item.added") {
409
+ const item = evt.item;
410
+ if (item?.type === "function_call") {
411
+ const id = item.call_id ?? item.id ?? "";
412
+ if (id) {
413
+ toolCalls.set(id, {
414
+ id,
415
+ name: item.name ?? "",
416
+ arguments: item.arguments ?? ""
417
+ });
418
+ }
419
+ }
420
+ continue;
421
+ }
422
+ if (t === "response.function_call_arguments.delta") {
423
+ const id = evt.call_id ?? evt.item_id ?? "";
424
+ const delta = evt.delta ?? "";
425
+ if (!id || !delta) continue;
426
+ const existing = toolCalls.get(id);
427
+ if (existing) {
428
+ existing.arguments += delta;
429
+ } else {
430
+ toolCalls.set(id, { id, name: "", arguments: delta });
431
+ }
432
+ continue;
433
+ }
434
+ if (t === "response.output_item.done") {
435
+ const item = evt.item;
436
+ if (item?.type === "function_call") {
437
+ const id = item.call_id ?? item.id ?? "";
438
+ const tc = toolCalls.get(id);
439
+ const name = tc?.name || item.name || "";
440
+ const argsStr = tc?.arguments || item.arguments || "{}";
441
+ let args = {};
442
+ try {
443
+ args = JSON.parse(argsStr || "{}");
444
+ } catch {
445
+ args = {};
446
+ }
447
+ if (id && name) {
448
+ yield {
449
+ type: "tool-call",
450
+ toolCall: { id, name, args }
451
+ };
452
+ }
453
+ toolCalls.delete(id);
454
+ }
455
+ continue;
456
+ }
457
+ if (t === "response.completed") {
458
+ const usage = evt.response?.usage;
459
+ if (usage) {
460
+ totalPromptTokens = usage.input_tokens ?? 0;
461
+ totalCompletionTokens = usage.output_tokens ?? 0;
462
+ }
463
+ for (const tc of toolCalls.values()) {
464
+ let args = {};
465
+ try {
466
+ args = JSON.parse(tc.arguments || "{}");
467
+ } catch {
468
+ args = {};
469
+ }
470
+ if (tc.id && tc.name) {
471
+ yield {
472
+ type: "tool-call",
473
+ toolCall: { id: tc.id, name: tc.name, args }
474
+ };
475
+ }
476
+ }
477
+ toolCalls.clear();
478
+ if (reasoningStarted && !textStarted) {
479
+ yield { type: "thinking:end" };
480
+ }
481
+ const finishReason = toolCalls.size > 0 ? "tool-calls" : "stop";
482
+ yield {
483
+ type: "finish",
484
+ finishReason,
485
+ usage: {
486
+ promptTokens: totalPromptTokens,
487
+ completionTokens: totalCompletionTokens,
488
+ totalTokens: totalPromptTokens + totalCompletionTokens
489
+ }
490
+ };
491
+ finishEmitted = true;
492
+ continue;
493
+ }
494
+ if (t === "response.error" || t === "error") {
495
+ const msg = evt.error?.message || evt.message || "Responses API error";
496
+ yield { type: "error", error: new Error(msg) };
497
+ return;
498
+ }
499
+ }
500
+ if (!finishEmitted) {
501
+ if (reasoningStarted && !textStarted) {
502
+ yield { type: "thinking:end" };
503
+ }
504
+ yield {
505
+ type: "finish",
506
+ finishReason: "stop",
507
+ usage: {
508
+ promptTokens: totalPromptTokens,
509
+ completionTokens: totalCompletionTokens,
510
+ totalTokens: totalPromptTokens + totalCompletionTokens
511
+ }
512
+ };
513
+ }
514
+ }
229
515
  async function fetchOpenRouterModels(apiKey) {
230
516
  const headers = {
231
517
  "Content-Type": "application/json"
@@ -471,6 +757,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
471
757
  if (baseUrl.includes("generativelanguage.googleapis.com")) return "google";
472
758
  if (baseUrl.includes("x.ai")) return "xai";
473
759
  if (baseUrl.includes("azure")) return "azure";
760
+ if (baseUrl.includes("openrouter.ai")) return "openrouter";
474
761
  return "openai";
475
762
  }
476
763
  async getClient() {
@@ -570,6 +857,256 @@ var OpenAIAdapter = class _OpenAIAdapter {
570
857
  rawResponse: response
571
858
  };
572
859
  }
860
+ /**
861
+ * OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
862
+ * reasoning content on the chat-completions endpoint. To surface reasoning
863
+ * SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
864
+ * Responses API, which streams `response.reasoning_summary_text.delta` events.
865
+ *
866
+ * Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
867
+ * openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
868
+ */
869
+ isOpenAIReasoningModelOnOpenRouter(activeModel) {
870
+ if (this.provider !== "openrouter") return false;
871
+ return activeModel.startsWith("openai/o1") || activeModel.startsWith("openai/o3") || activeModel.startsWith("openai/o4") || activeModel.startsWith("openai/gpt-5");
872
+ }
873
+ /**
874
+ * Convert ActionDefinition[] (the chat-completions tool shape used by the
875
+ * adapter) to the Responses API tool shape.
876
+ */
877
+ buildResponsesToolsFromActions(actions) {
878
+ if (!actions || actions.length === 0) return void 0;
879
+ const formatted = formatTools(actions);
880
+ return formatted.map((t) => ({
881
+ type: "function",
882
+ name: t.function.name,
883
+ description: t.function.description,
884
+ parameters: t.function.parameters
885
+ }));
886
+ }
887
+ /**
888
+ * Streaming Responses API path for OpenAI reasoning models on OpenRouter.
889
+ *
890
+ * Maps Responses API SSE events back to the same StreamEvent shapes the
891
+ * chat-completions path emits, so downstream consumers (processChunk.ts,
892
+ * frontend tool handlers, plan approval, specialist delegations) see
893
+ * identical events regardless of which path produced them.
894
+ *
895
+ * response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
896
+ * response.output_text.delta → message:delta
897
+ * response.output_item.added (function_call) → action:start (queued buffer)
898
+ * response.function_call_arguments.delta → action:args (progressive)
899
+ * response.output_item.done (function_call) → final action:args + action:end
900
+ * response.completed → message:end + done(usage)
901
+ * response.error → error
902
+ */
903
+ async *streamWithResponsesAPI(request, activeModel, messageId) {
904
+ const client = await this.getClient();
905
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
906
+ const payload = {
907
+ model: activeModel,
908
+ input: this.buildResponsesInput(request),
909
+ stream: true,
910
+ reasoning: {
911
+ effort: request.config?.reasoningEffort ?? "medium",
912
+ summary: "auto"
913
+ }
914
+ };
915
+ if (request.systemPrompt) payload.instructions = request.systemPrompt;
916
+ if (typeof maxTokensValue === "number")
917
+ payload.max_output_tokens = maxTokensValue;
918
+ const tools = this.buildResponsesToolsFromActions(request.actions);
919
+ if (tools && tools.length > 0) payload.tools = tools;
920
+ logProviderPayload(
921
+ "openai",
922
+ "responses-api request payload",
923
+ payload,
924
+ request.debug
925
+ );
926
+ let stream;
927
+ try {
928
+ stream = await client.responses.create(payload);
929
+ } catch (error) {
930
+ yield {
931
+ type: "error",
932
+ message: error instanceof Error ? error.message : "Unknown error",
933
+ code: "OPENAI_RESPONSES_ERROR"
934
+ };
935
+ return;
936
+ }
937
+ const toolBuffers = /* @__PURE__ */ new Map();
938
+ const itemIdToCallId = /* @__PURE__ */ new Map();
939
+ let usage;
940
+ let reasoningStarted = false;
941
+ let textStarted = false;
942
+ let finishEmitted = false;
943
+ const resolveCallId = (evt) => {
944
+ if (evt?.call_id) return evt.call_id;
945
+ if (evt?.item_id) return itemIdToCallId.get(evt.item_id) ?? evt.item_id;
946
+ if (evt?.item?.call_id) return evt.item.call_id;
947
+ if (evt?.item?.id) return evt.item.id;
948
+ return "";
949
+ };
950
+ try {
951
+ for await (const evt of stream) {
952
+ logProviderPayload(
953
+ "openai",
954
+ "responses-api stream chunk",
955
+ evt,
956
+ request.debug
957
+ );
958
+ if (request.signal?.aborted) break;
959
+ const t = evt?.type ?? "";
960
+ if (t === "response.reasoning_summary_text.delta") {
961
+ const delta = evt.delta ?? "";
962
+ if (!delta) continue;
963
+ if (!reasoningStarted) {
964
+ yield { type: "thinking:start" };
965
+ reasoningStarted = true;
966
+ }
967
+ yield { type: "thinking:delta", content: delta };
968
+ continue;
969
+ }
970
+ if (t === "response.reasoning_summary_text.done" || t === "response.reasoning.done") {
971
+ continue;
972
+ }
973
+ if (t === "response.output_text.delta") {
974
+ const text = evt.delta ?? "";
975
+ if (!text) continue;
976
+ if (reasoningStarted && !textStarted) {
977
+ yield { type: "thinking:end" };
978
+ textStarted = true;
979
+ }
980
+ yield { type: "message:delta", content: text };
981
+ continue;
982
+ }
983
+ if (t === "response.output_item.added") {
984
+ const item = evt.item;
985
+ if (item?.type === "function_call") {
986
+ const callId = item.call_id ?? item.id ?? "";
987
+ const itemId = item.id ?? callId;
988
+ if (callId) {
989
+ if (itemId && itemId !== callId) {
990
+ itemIdToCallId.set(itemId, callId);
991
+ }
992
+ if (!toolBuffers.has(callId)) {
993
+ toolBuffers.set(callId, {
994
+ id: callId,
995
+ name: item.name ?? "",
996
+ arguments: item.arguments ?? "",
997
+ emittedStart: false
998
+ });
999
+ }
1000
+ const buf = toolBuffers.get(callId);
1001
+ if (buf.name && !buf.emittedStart) {
1002
+ yield { type: "action:start", id: buf.id, name: buf.name };
1003
+ buf.emittedStart = true;
1004
+ }
1005
+ }
1006
+ }
1007
+ continue;
1008
+ }
1009
+ if (t === "response.function_call_arguments.delta") {
1010
+ const callId = resolveCallId(evt);
1011
+ const delta = evt.delta ?? "";
1012
+ if (!callId || !delta) continue;
1013
+ let buf = toolBuffers.get(callId);
1014
+ if (!buf) {
1015
+ buf = { id: callId, name: "", arguments: "", emittedStart: false };
1016
+ toolBuffers.set(callId, buf);
1017
+ }
1018
+ buf.arguments += delta;
1019
+ if (buf.emittedStart) {
1020
+ yield {
1021
+ type: "action:args",
1022
+ id: buf.id,
1023
+ args: buf.arguments
1024
+ };
1025
+ }
1026
+ continue;
1027
+ }
1028
+ if (t === "response.output_item.done") {
1029
+ const item = evt.item;
1030
+ if (item?.type === "function_call") {
1031
+ const callId = item.call_id ?? item.id ?? "";
1032
+ const buf = toolBuffers.get(callId);
1033
+ const name = buf?.name || item.name || "";
1034
+ const argsStr = buf?.arguments || item.arguments || "{}";
1035
+ if (callId && name) {
1036
+ if (!buf?.emittedStart) {
1037
+ yield { type: "action:start", id: callId, name };
1038
+ }
1039
+ yield {
1040
+ type: "action:args",
1041
+ id: callId,
1042
+ args: argsStr
1043
+ };
1044
+ yield {
1045
+ type: "action:end",
1046
+ id: callId,
1047
+ name
1048
+ };
1049
+ }
1050
+ toolBuffers.delete(callId);
1051
+ }
1052
+ continue;
1053
+ }
1054
+ if (t === "response.completed") {
1055
+ const u = evt.response?.usage;
1056
+ if (u) {
1057
+ usage = {
1058
+ prompt_tokens: u.input_tokens ?? 0,
1059
+ completion_tokens: u.output_tokens ?? 0,
1060
+ total_tokens: u.total_tokens ?? (u.input_tokens ?? 0) + (u.output_tokens ?? 0)
1061
+ };
1062
+ }
1063
+ for (const buf of toolBuffers.values()) {
1064
+ if (!buf.id || !buf.name) continue;
1065
+ if (!buf.emittedStart) {
1066
+ yield { type: "action:start", id: buf.id, name: buf.name };
1067
+ }
1068
+ yield {
1069
+ type: "action:args",
1070
+ id: buf.id,
1071
+ args: buf.arguments || "{}"
1072
+ };
1073
+ yield { type: "action:end", id: buf.id, name: buf.name };
1074
+ }
1075
+ toolBuffers.clear();
1076
+ if (reasoningStarted && !textStarted) {
1077
+ yield { type: "thinking:end" };
1078
+ }
1079
+ yield { type: "message:end" };
1080
+ yield { type: "done", usage };
1081
+ finishEmitted = true;
1082
+ continue;
1083
+ }
1084
+ if (t === "response.error" || t === "error") {
1085
+ const msg = evt.error?.message || evt.message || "Responses API error";
1086
+ yield {
1087
+ type: "error",
1088
+ message: msg,
1089
+ code: "OPENAI_RESPONSES_ERROR"
1090
+ };
1091
+ return;
1092
+ }
1093
+ }
1094
+ } catch (error) {
1095
+ yield {
1096
+ type: "error",
1097
+ message: error instanceof Error ? error.message : "Unknown error",
1098
+ code: "OPENAI_RESPONSES_ERROR"
1099
+ };
1100
+ return;
1101
+ }
1102
+ if (!finishEmitted) {
1103
+ if (reasoningStarted && !textStarted) {
1104
+ yield { type: "thinking:end" };
1105
+ }
1106
+ yield { type: "message:end" };
1107
+ yield { type: "done", usage };
1108
+ }
1109
+ }
573
1110
  async completeWithResponses(request) {
574
1111
  const client = await this.getClient();
575
1112
  const openaiToolOptions = request.providerToolOptions?.openai;
@@ -703,16 +1240,37 @@ var OpenAIAdapter = class _OpenAIAdapter {
703
1240
  name: openaiToolOptions.toolChoice.name
704
1241
  }
705
1242
  } : openaiToolOptions?.toolChoice;
1243
+ const isOpenRouter = this.provider === "openrouter";
1244
+ const activeModel = request.config?.model || this.model;
1245
+ const modelSlug = activeModel.replace("openai/", "");
1246
+ const isOSeries = /^o[1-9]/.test(modelSlug);
1247
+ const isOpenAIOnOpenRouter = isOpenRouter && activeModel.startsWith("openai/");
1248
+ if (!this.config.disableThinking && this.isOpenAIReasoningModelOnOpenRouter(activeModel)) {
1249
+ yield* this.streamWithResponsesAPI(request, activeModel, messageId);
1250
+ return;
1251
+ }
1252
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
706
1253
  const payload = {
707
- model: request.config?.model || this.model,
1254
+ model: activeModel,
708
1255
  messages,
709
1256
  tools: tools.length > 0 ? tools : void 0,
710
1257
  tool_choice: tools.length > 0 ? toolChoice : void 0,
711
1258
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
712
- temperature: request.config?.temperature ?? this.config.temperature,
713
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
714
1259
  stream: true,
715
- stream_options: { include_usage: true }
1260
+ stream_options: { include_usage: true },
1261
+ // o-series: use max_completion_tokens + reasoning_effort, no temperature
1262
+ // regular models: use max_tokens + temperature
1263
+ ...isOSeries ? {
1264
+ max_completion_tokens: maxTokensValue,
1265
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
1266
+ } : {
1267
+ temperature: request.config?.temperature ?? this.config.temperature,
1268
+ max_tokens: maxTokensValue
1269
+ },
1270
+ // Non-OpenAI OpenRouter models support OR's reasoning/include_reasoning params.
1271
+ // When disableThinking=true we must explicitly send include_reasoning:false because
1272
+ // models like Qwen3 and DeepSeek-R1 reason by default even without the reasoning param.
1273
+ ...isOpenRouter && !isOpenAIOnOpenRouter ? this.config.disableThinking ? { include_reasoning: false } : { reasoning: { max_tokens: 8e3 }, include_reasoning: true } : {}
716
1274
  };
717
1275
  logProviderPayload("openai", "request payload", payload, request.debug);
718
1276
  const stream = await client.chat.completions.create(payload);
@@ -720,6 +1278,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
720
1278
  const collectedCitations = [];
721
1279
  let citationIndex = 0;
722
1280
  let usage;
1281
+ let adapterReasoningStarted = false;
723
1282
  for await (const chunk of stream) {
724
1283
  logProviderPayload("openai", "stream chunk", chunk, request.debug);
725
1284
  if (request.signal?.aborted) {
@@ -730,6 +1289,22 @@ var OpenAIAdapter = class _OpenAIAdapter {
730
1289
  if (delta?.content) {
731
1290
  yield { type: "message:delta", content: delta.content };
732
1291
  }
1292
+ if (isOpenRouter) {
1293
+ const rc = delta?.reasoning_content ?? delta?.reasoning ?? null;
1294
+ if (rc) {
1295
+ const rcText = typeof rc === "string" ? rc : Array.isArray(rc) && rc[0]?.text ? rc[0].text : "";
1296
+ if (rcText) {
1297
+ if (!adapterReasoningStarted) {
1298
+ yield { type: "thinking:start" };
1299
+ adapterReasoningStarted = true;
1300
+ }
1301
+ yield { type: "thinking:delta", content: rcText };
1302
+ }
1303
+ } else if (adapterReasoningStarted && (delta?.content || choice?.finish_reason)) {
1304
+ yield { type: "thinking:end" };
1305
+ adapterReasoningStarted = false;
1306
+ }
1307
+ }
733
1308
  const annotations = delta?.annotations;
734
1309
  if (annotations && annotations.length > 0) {
735
1310
  for (const annotation of annotations) {
@@ -777,6 +1352,11 @@ var OpenAIAdapter = class _OpenAIAdapter {
777
1352
  };
778
1353
  } else if (currentToolCall && toolCall.function?.arguments) {
779
1354
  currentToolCall.arguments += toolCall.function.arguments;
1355
+ yield {
1356
+ type: "action:args",
1357
+ id: currentToolCall.id,
1358
+ args: currentToolCall.arguments
1359
+ };
780
1360
  }
781
1361
  }
782
1362
  }
@@ -852,15 +1432,24 @@ var OpenAIAdapter = class _OpenAIAdapter {
852
1432
  name: openaiToolOptions.toolChoice.name
853
1433
  }
854
1434
  } : openaiToolOptions?.toolChoice;
1435
+ const activeModel2 = request.config?.model || this.model;
1436
+ const modelSlug2 = activeModel2.replace("openai/", "");
1437
+ const isOSeries2 = /^o[1-9]/.test(modelSlug2);
1438
+ const maxTokensValue2 = request.config?.maxTokens ?? this.config.maxTokens;
855
1439
  const payload = {
856
- model: request.config?.model || this.model,
1440
+ model: activeModel2,
857
1441
  messages,
858
1442
  tools: tools.length > 0 ? tools : void 0,
859
1443
  tool_choice: tools.length > 0 ? toolChoice : void 0,
860
1444
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
861
- temperature: request.config?.temperature ?? this.config.temperature,
862
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
863
- stream: false
1445
+ stream: false,
1446
+ ...isOSeries2 ? {
1447
+ max_completion_tokens: maxTokensValue2,
1448
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
1449
+ } : {
1450
+ temperature: request.config?.temperature ?? this.config.temperature,
1451
+ max_tokens: maxTokensValue2
1452
+ }
864
1453
  };
865
1454
  logProviderPayload("openai", "request payload", payload, request.debug);
866
1455
  const response = await client.chat.completions.create(payload);
@@ -943,7 +1532,8 @@ function createOpenRouter(config = {}) {
943
1532
  return createOpenAIAdapter({
944
1533
  apiKey,
945
1534
  model: modelId,
946
- baseUrl
1535
+ baseUrl,
1536
+ disableThinking: config.disableThinking
947
1537
  });
948
1538
  };
949
1539
  const getCapabilities = (modelId) => {