@shawnstack/quickforge 1.3.18 → 1.3.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/README.md +10 -10
  2. package/bin/quickforge.mjs +258 -49
  3. package/dist/assets/anthropic-Bj3HAZgj.js +39 -0
  4. package/dist/assets/azure-openai-responses-IdZZrSrI.js +1 -0
  5. package/dist/assets/github-copilot-headers-CMb2BbzT.js +1 -0
  6. package/dist/assets/google-Brt_lS1J.js +1 -0
  7. package/dist/assets/{google-shared-XhYUKiGZ.js → google-shared-CLc4ziON.js} +3 -3
  8. package/dist/assets/google-vertex-B6HsoZ34.js +1 -0
  9. package/dist/assets/{index-Dm7aEWvT.js → index-D0CVLdX_.js} +525 -489
  10. package/dist/assets/index-D0W9hAl_.css +3 -0
  11. package/dist/assets/{mistral-DxhS4Wkn.js → mistral-CenXqwPz.js} +3 -3
  12. package/dist/assets/openai-codex-responses-D9ffGwbj.js +7 -0
  13. package/dist/assets/openai-completions-eWdeSGBG.js +5 -0
  14. package/dist/assets/openai-responses-Cavpmjeu.js +1 -0
  15. package/dist/assets/{openai-responses-shared-f_P3e1nz.js → openai-responses-shared-DF3ZGaUx.js} +5 -3
  16. package/dist/assets/transform-messages-CmnxG9RB.js +1 -0
  17. package/dist/index.html +2 -2
  18. package/node_modules/@anthropic-ai/sdk/CHANGELOG.md +34 -0
  19. package/node_modules/@anthropic-ai/sdk/bin/migration-config.json +185 -0
  20. package/node_modules/@anthropic-ai/sdk/package.json +1 -1
  21. package/node_modules/@anthropic-ai/sdk/resources/beta/beta.js +4 -0
  22. package/node_modules/@anthropic-ai/sdk/resources/beta/beta.mjs +4 -0
  23. package/node_modules/@anthropic-ai/sdk/resources/beta/files.js +5 -5
  24. package/node_modules/@anthropic-ai/sdk/resources/beta/files.mjs +5 -5
  25. package/node_modules/@anthropic-ai/sdk/resources/beta/index.js +11 -9
  26. package/node_modules/@anthropic-ai/sdk/resources/beta/index.mjs +1 -0
  27. package/node_modules/@anthropic-ai/sdk/resources/beta/memory-stores/index.js +11 -0
  28. package/node_modules/@anthropic-ai/sdk/resources/beta/memory-stores/index.mjs +5 -0
  29. package/node_modules/@anthropic-ai/sdk/resources/beta/memory-stores/memories.js +130 -0
  30. package/node_modules/@anthropic-ai/sdk/resources/beta/memory-stores/memories.mjs +126 -0
  31. package/node_modules/@anthropic-ai/sdk/resources/beta/memory-stores/memory-stores.js +145 -0
  32. package/node_modules/@anthropic-ai/sdk/resources/beta/memory-stores/memory-stores.mjs +140 -0
  33. package/node_modules/@anthropic-ai/sdk/resources/beta/memory-stores/memory-versions.js +81 -0
  34. package/node_modules/@anthropic-ai/sdk/resources/beta/memory-stores/memory-versions.mjs +77 -0
  35. package/node_modules/@anthropic-ai/sdk/resources/beta/memory-stores.js +6 -0
  36. package/node_modules/@anthropic-ai/sdk/resources/beta/memory-stores.mjs +3 -0
  37. package/node_modules/@anthropic-ai/sdk/tools/memory/node.js +12 -5
  38. package/node_modules/@anthropic-ai/sdk/tools/memory/node.mjs +12 -5
  39. package/node_modules/@anthropic-ai/sdk/version.js +1 -1
  40. package/node_modules/@anthropic-ai/sdk/version.mjs +1 -1
  41. package/node_modules/@aws-sdk/client-bedrock-runtime/package.json +5 -5
  42. package/node_modules/@aws-sdk/core/package.json +2 -2
  43. package/node_modules/@aws-sdk/credential-provider-env/package.json +2 -2
  44. package/node_modules/@aws-sdk/credential-provider-http/dist-cjs/fromHttp/fromHttp.js +12 -6
  45. package/node_modules/@aws-sdk/credential-provider-http/dist-es/fromHttp/fromHttp.js +12 -6
  46. package/node_modules/@aws-sdk/credential-provider-http/package.json +3 -2
  47. package/node_modules/@aws-sdk/credential-provider-ini/package.json +9 -9
  48. package/node_modules/@aws-sdk/credential-provider-login/package.json +3 -3
  49. package/node_modules/@aws-sdk/credential-provider-node/package.json +7 -7
  50. package/node_modules/@aws-sdk/credential-provider-process/package.json +2 -2
  51. package/node_modules/@aws-sdk/credential-provider-sso/package.json +4 -4
  52. package/node_modules/@aws-sdk/credential-provider-web-identity/package.json +3 -3
  53. package/node_modules/@aws-sdk/middleware-websocket/package.json +2 -2
  54. package/node_modules/@aws-sdk/nested-clients/dist-cjs/submodules/cognito-identity/index.js +1 -1
  55. package/node_modules/@aws-sdk/nested-clients/dist-cjs/submodules/signin/index.js +1 -1
  56. package/node_modules/@aws-sdk/nested-clients/dist-cjs/submodules/sso/index.js +1 -1
  57. package/node_modules/@aws-sdk/nested-clients/dist-cjs/submodules/sso-oidc/index.js +1 -1
  58. package/node_modules/@aws-sdk/nested-clients/dist-cjs/submodules/sts/index.js +1 -1
  59. package/node_modules/@aws-sdk/nested-clients/package.json +3 -3
  60. package/node_modules/@aws-sdk/signature-v4-multi-region/package.json +1 -2
  61. package/node_modules/@aws-sdk/token-providers/package.json +3 -3
  62. package/node_modules/@aws-sdk/xml-builder/package.json +2 -2
  63. package/node_modules/@mariozechner/pi-agent-core/README.md +14 -0
  64. package/node_modules/@mariozechner/pi-agent-core/dist/agent-loop.js +9 -0
  65. package/node_modules/@mariozechner/pi-agent-core/dist/agent.js +1 -1
  66. package/node_modules/@mariozechner/pi-agent-core/package.json +2 -2
  67. package/node_modules/@mariozechner/pi-ai/README.md +20 -31
  68. package/node_modules/@mariozechner/pi-ai/dist/env-api-keys.js +7 -0
  69. package/node_modules/@mariozechner/pi-ai/dist/index.js +2 -0
  70. package/node_modules/@mariozechner/pi-ai/dist/models.generated.js +2420 -1213
  71. package/node_modules/@mariozechner/pi-ai/dist/models.js +28 -20
  72. package/node_modules/@mariozechner/pi-ai/dist/providers/amazon-bedrock.js +11 -11
  73. package/node_modules/@mariozechner/pi-ai/dist/providers/anthropic.js +43 -26
  74. package/node_modules/@mariozechner/pi-ai/dist/providers/azure-openai-responses.js +12 -6
  75. package/node_modules/@mariozechner/pi-ai/dist/providers/cloudflare.js +10 -3
  76. package/node_modules/@mariozechner/pi-ai/dist/providers/google-shared.js +4 -13
  77. package/node_modules/@mariozechner/pi-ai/dist/providers/google-vertex.js +4 -3
  78. package/node_modules/@mariozechner/pi-ai/dist/providers/google.js +4 -3
  79. package/node_modules/@mariozechner/pi-ai/dist/providers/mistral.js +8 -7
  80. package/node_modules/@mariozechner/pi-ai/dist/providers/openai-codex-responses.js +296 -41
  81. package/node_modules/@mariozechner/pi-ai/dist/providers/openai-completions.js +169 -153
  82. package/node_modules/@mariozechner/pi-ai/dist/providers/openai-responses-shared.js +14 -1
  83. package/node_modules/@mariozechner/pi-ai/dist/providers/openai-responses.js +22 -8
  84. package/node_modules/@mariozechner/pi-ai/dist/providers/register-builtins.js +0 -18
  85. package/node_modules/@mariozechner/pi-ai/dist/providers/simple-options.js +1 -0
  86. package/node_modules/@mariozechner/pi-ai/dist/session-resources.js +22 -0
  87. package/node_modules/@mariozechner/pi-ai/dist/utils/diagnostics.js +25 -0
  88. package/node_modules/@mariozechner/pi-ai/dist/utils/oauth/index.js +0 -10
  89. package/node_modules/@mariozechner/pi-ai/dist/utils/oauth/openai-codex.js +25 -14
  90. package/node_modules/@mariozechner/pi-ai/dist/utils/overflow.js +14 -0
  91. package/node_modules/@mariozechner/pi-ai/package.json +2 -6
  92. package/package.json +3 -3
  93. package/server/agent-manager.mjs +279 -12
  94. package/server/auto-compaction.mjs +1 -2
  95. package/server/conversation-compaction.mjs +0 -5
  96. package/server/index.mjs +1 -0
  97. package/server/routes/static.mjs +1 -0
  98. package/server/routes/tools.mjs +3 -1
  99. package/server/session-utils.mjs +6 -1
  100. package/server/share-store.mjs +27 -4
  101. package/server/subagents.mjs +101 -0
  102. package/server/system-prompt.mjs +30 -1
  103. package/server/tools/definitions.mjs +18 -0
  104. package/server/tools/index.mjs +1013 -911
  105. package/dist/assets/anthropic-Ck2DxOfr.js +0 -39
  106. package/dist/assets/azure-openai-responses-DIoz5q4Z.js +0 -1
  107. package/dist/assets/github-copilot-headers-CrI0CIJ7.js +0 -1
  108. package/dist/assets/google-Dau-4ve_.js +0 -1
  109. package/dist/assets/google-gemini-cli-DttMmbGb.js +0 -2
  110. package/dist/assets/google-vertex-BeukMl44.js +0 -1
  111. package/dist/assets/index-DgJVElbv.css +0 -3
  112. package/dist/assets/openai-codex-responses-X3sTzNAa.js +0 -7
  113. package/dist/assets/openai-completions-CRB9Vm0w.js +0 -5
  114. package/dist/assets/openai-responses-DXluu3oi.js +0 -1
  115. package/dist/assets/transform-messages-CV4kCtBB.js +0 -1
  116. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/LICENSE +0 -201
  117. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/README.md +0 -62
  118. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/dist-cjs/index.js +0 -156
  119. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/dist-es/constants.js +0 -2
  120. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/dist-es/fromEnvSigningName.js +0 -16
  121. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/dist-es/fromSso.js +0 -80
  122. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/dist-es/fromStatic.js +0 -8
  123. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/dist-es/getNewSsoOidcToken.js +0 -11
  124. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/dist-es/getSsoOidcClient.js +0 -10
  125. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/dist-es/index.js +0 -4
  126. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/dist-es/nodeProvider.js +0 -5
  127. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/dist-es/validateTokenExpiry.js +0 -7
  128. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/dist-es/validateTokenKey.js +0 -7
  129. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/dist-es/writeSSOTokenToFile.js +0 -8
  130. package/node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers/package.json +0 -69
  131. package/node_modules/@mariozechner/pi-ai/dist/providers/google-gemini-cli.js +0 -779
  132. package/node_modules/@mariozechner/pi-ai/dist/utils/oauth/google-antigravity.js +0 -377
  133. package/node_modules/@mariozechner/pi-ai/dist/utils/oauth/google-gemini-cli.js +0 -482
@@ -1,13 +1,13 @@
1
1
  import OpenAI from "openai";
2
2
  import { getEnvApiKey } from "../env-api-keys.js";
3
- import { calculateCost, supportsXhigh } from "../models.js";
3
+ import { calculateCost, clampThinkingLevel } from "../models.js";
4
4
  import { AssistantMessageEventStream } from "../utils/event-stream.js";
5
5
  import { headersToRecord } from "../utils/headers.js";
6
6
  import { parseStreamingJson } from "../utils/json-parse.js";
7
7
  import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
8
8
  import { isCloudflareProvider, resolveCloudflareBaseUrl } from "./cloudflare.js";
9
9
  import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
10
- import { buildBaseOptions, clampReasoning } from "./simple-options.js";
10
+ import { buildBaseOptions } from "./simple-options.js";
11
11
  import { transformMessages } from "./transform-messages.js";
12
12
  /**
13
13
  * Check if conversation messages contain tool calls or tool results.
@@ -89,46 +89,103 @@ export const streamOpenAICompletions = (model, context, options) => {
89
89
  .withResponse();
90
90
  await options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);
91
91
  stream.push({ type: "start", partial: output });
92
- let currentBlock = null;
92
+ let textBlock = null;
93
+ let thinkingBlock = null;
94
+ const toolCallBlocksByIndex = new Map();
95
+ const toolCallBlocksById = new Map();
93
96
  const blocks = output.content;
94
- const getContentIndex = (block) => (block ? blocks.indexOf(block) : -1);
95
- const currentContentIndex = () => getContentIndex(currentBlock);
96
- const finishCurrentBlock = (block) => {
97
- if (block) {
98
- const contentIndex = getContentIndex(block);
99
- if (contentIndex === -1) {
100
- return;
101
- }
102
- if (block.type === "text") {
103
- stream.push({
104
- type: "text_end",
105
- contentIndex,
106
- content: block.text,
107
- partial: output,
108
- });
109
- }
110
- else if (block.type === "thinking") {
111
- stream.push({
112
- type: "thinking_end",
113
- contentIndex,
114
- content: block.thinking,
115
- partial: output,
116
- });
97
+ const getContentIndex = (block) => blocks.indexOf(block);
98
+ const finishBlock = (block) => {
99
+ const contentIndex = getContentIndex(block);
100
+ if (contentIndex === -1) {
101
+ return;
102
+ }
103
+ if (block.type === "text") {
104
+ stream.push({
105
+ type: "text_end",
106
+ contentIndex,
107
+ content: block.text,
108
+ partial: output,
109
+ });
110
+ }
111
+ else if (block.type === "thinking") {
112
+ stream.push({
113
+ type: "thinking_end",
114
+ contentIndex,
115
+ content: block.thinking,
116
+ partial: output,
117
+ });
118
+ }
119
+ else if (block.type === "toolCall") {
120
+ block.arguments = parseStreamingJson(block.partialArgs);
121
+ // Finalize in-place and strip the scratch buffers so replay only
122
+ // carries parsed arguments.
123
+ delete block.partialArgs;
124
+ delete block.streamIndex;
125
+ stream.push({
126
+ type: "toolcall_end",
127
+ contentIndex,
128
+ toolCall: block,
129
+ partial: output,
130
+ });
131
+ }
132
+ };
133
+ const ensureTextBlock = () => {
134
+ if (!textBlock) {
135
+ textBlock = { type: "text", text: "" };
136
+ blocks.push(textBlock);
137
+ stream.push({ type: "text_start", contentIndex: getContentIndex(textBlock), partial: output });
138
+ }
139
+ return textBlock;
140
+ };
141
+ const ensureThinkingBlock = (thinkingSignature) => {
142
+ if (!thinkingBlock) {
143
+ thinkingBlock = {
144
+ type: "thinking",
145
+ thinking: "",
146
+ thinkingSignature,
147
+ };
148
+ blocks.push(thinkingBlock);
149
+ stream.push({ type: "thinking_start", contentIndex: getContentIndex(thinkingBlock), partial: output });
150
+ }
151
+ return thinkingBlock;
152
+ };
153
+ const ensureToolCallBlock = (toolCall) => {
154
+ const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
155
+ let block = streamIndex !== undefined ? toolCallBlocksByIndex.get(streamIndex) : undefined;
156
+ if (!block && toolCall.id) {
157
+ block = toolCallBlocksById.get(toolCall.id);
158
+ }
159
+ if (!block) {
160
+ block = {
161
+ type: "toolCall",
162
+ id: toolCall.id || "",
163
+ name: toolCall.function?.name || "",
164
+ arguments: {},
165
+ partialArgs: "",
166
+ streamIndex,
167
+ };
168
+ if (streamIndex !== undefined) {
169
+ toolCallBlocksByIndex.set(streamIndex, block);
117
170
  }
118
- else if (block.type === "toolCall") {
119
- block.arguments = parseStreamingJson(block.partialArgs);
120
- // Finalize in-place and strip the scratch buffers so replay only
121
- // carries parsed arguments.
122
- delete block.partialArgs;
123
- delete block.streamIndex;
124
- stream.push({
125
- type: "toolcall_end",
126
- contentIndex,
127
- toolCall: block,
128
- partial: output,
129
- });
171
+ if (toolCall.id) {
172
+ toolCallBlocksById.set(toolCall.id, block);
130
173
  }
174
+ blocks.push(block);
175
+ stream.push({
176
+ type: "toolcall_start",
177
+ contentIndex: getContentIndex(block),
178
+ partial: output,
179
+ });
131
180
  }
181
+ if (streamIndex !== undefined && block.streamIndex === undefined) {
182
+ block.streamIndex = streamIndex;
183
+ toolCallBlocksByIndex.set(streamIndex, block);
184
+ }
185
+ if (toolCall.id) {
186
+ toolCallBlocksById.set(toolCall.id, block);
187
+ }
188
+ return block;
132
189
  };
133
190
  for await (const chunk of openaiStream) {
134
191
  if (!chunk || typeof chunk !== "object")
@@ -136,6 +193,9 @@ export const streamOpenAICompletions = (model, context, options) => {
136
193
  // OpenAI documents ChatCompletionChunk.id as the unique chat completion identifier,
137
194
  // and each chunk in a streamed completion carries the same id.
138
195
  output.responseId ||= chunk.id;
196
+ if (typeof chunk.model === "string" && chunk.model.length > 0 && chunk.model !== model.id) {
197
+ output.responseModel ||= chunk.model;
198
+ }
139
199
  if (chunk.usage) {
140
200
  output.usage = parseChunkUsage(chunk.usage, model);
141
201
  }
@@ -158,55 +218,37 @@ export const streamOpenAICompletions = (model, context, options) => {
158
218
  if (choice.delta.content !== null &&
159
219
  choice.delta.content !== undefined &&
160
220
  choice.delta.content.length > 0) {
161
- if (!currentBlock || currentBlock.type !== "text") {
162
- finishCurrentBlock(currentBlock);
163
- currentBlock = { type: "text", text: "" };
164
- output.content.push(currentBlock);
165
- stream.push({ type: "text_start", contentIndex: currentContentIndex(), partial: output });
166
- }
167
- if (currentBlock.type === "text") {
168
- currentBlock.text += choice.delta.content;
169
- stream.push({
170
- type: "text_delta",
171
- contentIndex: currentContentIndex(),
172
- delta: choice.delta.content,
173
- partial: output,
174
- });
175
- }
221
+ const block = ensureTextBlock();
222
+ block.text += choice.delta.content;
223
+ stream.push({
224
+ type: "text_delta",
225
+ contentIndex: getContentIndex(block),
226
+ delta: choice.delta.content,
227
+ partial: output,
228
+ });
176
229
  }
177
230
  // Some endpoints return reasoning in reasoning_content (llama.cpp),
178
231
  // or reasoning (other openai compatible endpoints)
179
232
  // Use the first non-empty reasoning field to avoid duplication
180
233
  // (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
181
234
  const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
235
+ const deltaFields = choice.delta;
182
236
  let foundReasoningField = null;
183
237
  for (const field of reasoningFields) {
184
- if (choice.delta[field] !== null &&
185
- choice.delta[field] !== undefined &&
186
- choice.delta[field].length > 0) {
187
- if (!foundReasoningField) {
188
- foundReasoningField = field;
189
- break;
190
- }
238
+ const value = deltaFields[field];
239
+ if (typeof value === "string" && value.length > 0) {
240
+ foundReasoningField = field;
241
+ break;
191
242
  }
192
243
  }
193
244
  if (foundReasoningField) {
194
- if (!currentBlock || currentBlock.type !== "thinking") {
195
- finishCurrentBlock(currentBlock);
196
- currentBlock = {
197
- type: "thinking",
198
- thinking: "",
199
- thinkingSignature: foundReasoningField,
200
- };
201
- output.content.push(currentBlock);
202
- stream.push({ type: "thinking_start", contentIndex: currentContentIndex(), partial: output });
203
- }
204
- if (currentBlock.type === "thinking") {
205
- const delta = choice.delta[foundReasoningField];
206
- currentBlock.thinking += delta;
245
+ const delta = deltaFields[foundReasoningField];
246
+ if (typeof delta === "string" && delta.length > 0) {
247
+ const block = ensureThinkingBlock(foundReasoningField);
248
+ block.thinking += delta;
207
249
  stream.push({
208
250
  type: "thinking_delta",
209
- contentIndex: currentContentIndex(),
251
+ contentIndex: getContentIndex(block),
210
252
  delta,
211
253
  partial: output,
212
254
  });
@@ -214,50 +256,26 @@ export const streamOpenAICompletions = (model, context, options) => {
214
256
  }
215
257
  if (choice?.delta?.tool_calls) {
216
258
  for (const toolCall of choice.delta.tool_calls) {
217
- const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
218
- const sameToolCall = currentBlock?.type === "toolCall" &&
219
- ((streamIndex !== undefined && currentBlock.streamIndex === streamIndex) ||
220
- (streamIndex === undefined && toolCall.id && currentBlock.id === toolCall.id));
221
- if (!sameToolCall) {
222
- finishCurrentBlock(currentBlock);
223
- currentBlock = {
224
- type: "toolCall",
225
- id: toolCall.id || "",
226
- name: toolCall.function?.name || "",
227
- arguments: {},
228
- partialArgs: "",
229
- streamIndex,
230
- };
231
- output.content.push(currentBlock);
232
- stream.push({
233
- type: "toolcall_start",
234
- contentIndex: getContentIndex(currentBlock),
235
- partial: output,
236
- });
259
+ const block = ensureToolCallBlock(toolCall);
260
+ if (!block.id && toolCall.id) {
261
+ block.id = toolCall.id;
262
+ toolCallBlocksById.set(toolCall.id, block);
237
263
  }
238
- const currentToolCallBlock = currentBlock?.type === "toolCall" ? currentBlock : null;
239
- if (currentToolCallBlock) {
240
- if (!currentToolCallBlock.id && toolCall.id)
241
- currentToolCallBlock.id = toolCall.id;
242
- if (!currentToolCallBlock.name && toolCall.function?.name) {
243
- currentToolCallBlock.name = toolCall.function.name;
244
- }
245
- if (currentToolCallBlock.streamIndex === undefined && streamIndex !== undefined) {
246
- currentToolCallBlock.streamIndex = streamIndex;
247
- }
248
- let delta = "";
249
- if (toolCall.function?.arguments) {
250
- delta = toolCall.function.arguments;
251
- currentToolCallBlock.partialArgs += toolCall.function.arguments;
252
- currentToolCallBlock.arguments = parseStreamingJson(currentToolCallBlock.partialArgs);
253
- }
254
- stream.push({
255
- type: "toolcall_delta",
256
- contentIndex: getContentIndex(currentToolCallBlock),
257
- delta,
258
- partial: output,
259
- });
264
+ if (!block.name && toolCall.function?.name) {
265
+ block.name = toolCall.function.name;
266
+ }
267
+ let delta = "";
268
+ if (toolCall.function?.arguments) {
269
+ delta = toolCall.function.arguments;
270
+ block.partialArgs = (block.partialArgs ?? "") + toolCall.function.arguments;
271
+ block.arguments = parseStreamingJson(block.partialArgs);
260
272
  }
273
+ stream.push({
274
+ type: "toolcall_delta",
275
+ contentIndex: getContentIndex(block),
276
+ delta,
277
+ partial: output,
278
+ });
261
279
  }
262
280
  }
263
281
  const reasoningDetails = choice.delta.reasoning_details;
@@ -273,7 +291,9 @@ export const streamOpenAICompletions = (model, context, options) => {
273
291
  }
274
292
  }
275
293
  }
276
- finishCurrentBlock(currentBlock);
294
+ for (const block of blocks) {
295
+ finishBlock(block);
296
+ }
277
297
  if (options?.signal?.aborted) {
278
298
  throw new Error("Request was aborted");
279
299
  }
@@ -311,7 +331,8 @@ export const streamSimpleOpenAICompletions = (model, context, options) => {
311
331
  throw new Error(`No API key for provider: ${model.provider}`);
312
332
  }
313
333
  const base = buildBaseOptions(model, options, apiKey);
314
- const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
334
+ const clampedReasoning = options?.reasoning ? clampThinkingLevel(model, options.reasoning) : undefined;
335
+ const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning;
315
336
  const toolChoice = options?.toolChoice;
316
337
  return streamOpenAICompletions(model, context, {
317
338
  ...base,
@@ -344,11 +365,18 @@ function createClient(model, context, apiKey, optionsHeaders, sessionId, compat
344
365
  if (optionsHeaders) {
345
366
  Object.assign(headers, optionsHeaders);
346
367
  }
368
+ const defaultHeaders = model.provider === "cloudflare-ai-gateway"
369
+ ? {
370
+ ...headers,
371
+ Authorization: headers.Authorization ?? null,
372
+ "cf-aig-authorization": `Bearer ${apiKey}`,
373
+ }
374
+ : headers;
347
375
  return new OpenAI({
348
376
  apiKey,
349
377
  baseURL: isCloudflareProvider(model.provider) ? resolveCloudflareBaseUrl(model) : model.baseUrl,
350
378
  dangerouslyAllowBrowser: true,
351
- defaultHeaders: headers,
379
+ defaultHeaders,
352
380
  });
353
381
  }
354
382
  function buildParams(model, context, options, compat = getCompat(model), cacheRetention = resolveCacheRetention(options?.cacheRetention)) {
@@ -412,7 +440,8 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
412
440
  else if (compat.thinkingFormat === "deepseek" && model.reasoning) {
413
441
  params.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
414
442
  if (options?.reasoningEffort) {
415
- params.reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap);
443
+ params.reasoning_effort =
444
+ model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
416
445
  }
417
446
  }
418
447
  else if (compat.thinkingFormat === "openrouter" && model.reasoning) {
@@ -420,16 +449,22 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
420
449
  const openRouterParams = params;
421
450
  if (options?.reasoningEffort) {
422
451
  openRouterParams.reasoning = {
423
- effort: mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap),
452
+ effort: model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort,
424
453
  };
425
454
  }
426
- else {
427
- openRouterParams.reasoning = { effort: "none" };
455
+ else if (model.thinkingLevelMap?.off !== null) {
456
+ openRouterParams.reasoning = { effort: model.thinkingLevelMap?.off ?? "none" };
428
457
  }
429
458
  }
430
459
  else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
431
460
  // OpenAI-style reasoning_effort
432
- params.reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap);
461
+ params.reasoning_effort = model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
462
+ }
463
+ else if (!options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
464
+ const offValue = model.thinkingLevelMap?.off;
465
+ if (typeof offValue === "string") {
466
+ params.reasoning_effort = offValue;
467
+ }
433
468
  }
434
469
  // OpenRouter provider routing preferences
435
470
  if (model.baseUrl.includes("openrouter.ai") && model.compat?.openRouterRouting) {
@@ -449,9 +484,6 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
449
484
  }
450
485
  return params;
451
486
  }
452
- function mapReasoningEffort(effort, reasoningEffortMap) {
453
- return reasoningEffortMap[effort] ?? effort;
454
- }
455
487
  function getCompatCacheControl(compat, cacheRetention) {
456
488
  if (compat.cacheControlFormat !== "anthropic" || cacheRetention === "none") {
457
489
  return undefined;
@@ -762,7 +794,7 @@ function convertTools(tools, compat) {
762
794
  }
763
795
  function parseChunkUsage(rawUsage, model) {
764
796
  const promptTokens = rawUsage.prompt_tokens || 0;
765
- const reportedCachedTokens = rawUsage.prompt_tokens_details?.cached_tokens || 0;
797
+ const reportedCachedTokens = rawUsage.prompt_tokens_details?.cached_tokens ?? rawUsage.prompt_cache_hit_tokens ?? 0;
766
798
  const cacheWriteTokens = rawUsage.prompt_tokens_details?.cache_write_tokens || 0;
767
799
  // Normalize to pi-ai semantics:
768
800
  // - cacheRead: hits from cache created by previous requests only
@@ -816,7 +848,9 @@ function detectCompat(model) {
816
848
  const provider = model.provider;
817
849
  const baseUrl = model.baseUrl;
818
850
  const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
851
+ const isMoonshot = provider === "moonshotai" || provider === "moonshotai-cn" || baseUrl.includes("api.moonshot.");
819
852
  const isCloudflareWorkersAI = provider === "cloudflare-workers-ai" || baseUrl.includes("api.cloudflare.com");
853
+ const isCloudflareAiGateway = provider === "cloudflare-ai-gateway" || baseUrl.includes("gateway.ai.cloudflare.com");
820
854
  const isNonStandard = provider === "cerebras" ||
821
855
  baseUrl.includes("cerebras.ai") ||
822
856
  provider === "xai" ||
@@ -824,36 +858,19 @@ function detectCompat(model) {
824
858
  baseUrl.includes("chutes.ai") ||
825
859
  baseUrl.includes("deepseek.com") ||
826
860
  isZai ||
861
+ isMoonshot ||
827
862
  provider === "opencode" ||
828
863
  baseUrl.includes("opencode.ai") ||
829
- isCloudflareWorkersAI;
830
- const useMaxTokens = baseUrl.includes("chutes.ai");
864
+ isCloudflareWorkersAI ||
865
+ isCloudflareAiGateway;
866
+ const useMaxTokens = baseUrl.includes("chutes.ai") || isMoonshot || isCloudflareAiGateway;
831
867
  const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
832
- const isGroq = provider === "groq" || baseUrl.includes("groq.com");
833
868
  const isDeepSeek = provider === "deepseek" || baseUrl.includes("deepseek.com");
834
869
  const cacheControlFormat = provider === "openrouter" && model.id.startsWith("anthropic/") ? "anthropic" : undefined;
835
- const reasoningEffortMap = isDeepSeek
836
- ? {
837
- minimal: "high",
838
- low: "high",
839
- medium: "high",
840
- high: "high",
841
- xhigh: "max",
842
- }
843
- : isGroq && model.id === "qwen/qwen3-32b"
844
- ? {
845
- minimal: "default",
846
- low: "default",
847
- medium: "default",
848
- high: "default",
849
- xhigh: "default",
850
- }
851
- : {};
852
870
  return {
853
871
  supportsStore: !isNonStandard,
854
872
  supportsDeveloperRole: !isNonStandard,
855
- supportsReasoningEffort: !isGrok && !isZai,
856
- reasoningEffortMap,
873
+ supportsReasoningEffort: !isGrok && !isZai && !isMoonshot && !isCloudflareAiGateway,
857
874
  supportsUsageInStreaming: true,
858
875
  maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
859
876
  requiresToolResultName: false,
@@ -870,10 +887,10 @@ function detectCompat(model) {
870
887
  openRouterRouting: {},
871
888
  vercelGatewayRouting: {},
872
889
  zaiToolStream: false,
873
- supportsStrictMode: true,
890
+ supportsStrictMode: !isMoonshot && !isCloudflareAiGateway,
874
891
  cacheControlFormat,
875
892
  sendSessionAffinityHeaders: false,
876
- supportsLongCacheRetention: true,
893
+ supportsLongCacheRetention: !(isCloudflareWorkersAI || isCloudflareAiGateway),
877
894
  };
878
895
  }
879
896
  /**
@@ -888,7 +905,6 @@ function getCompat(model) {
888
905
  supportsStore: model.compat.supportsStore ?? detected.supportsStore,
889
906
  supportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
890
907
  supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
891
- reasoningEffortMap: model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,
892
908
  supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,
893
909
  maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
894
910
  requiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,
@@ -287,6 +287,17 @@ export async function processResponsesStream(openaiStream, output, stream, model
287
287
  }
288
288
  }
289
289
  }
290
+ else if (event.type === "response.reasoning_text.delta") {
291
+ if (currentItem?.type === "reasoning" && currentBlock?.type === "thinking") {
292
+ currentBlock.thinking += event.delta;
293
+ stream.push({
294
+ type: "thinking_delta",
295
+ contentIndex: blockIndex(),
296
+ delta: event.delta,
297
+ partial: output,
298
+ });
299
+ }
300
+ }
290
301
  else if (event.type === "response.content_part.added") {
291
302
  if (currentItem?.type === "message") {
292
303
  currentItem.content = currentItem.content || [];
@@ -365,7 +376,9 @@ export async function processResponsesStream(openaiStream, output, stream, model
365
376
  else if (event.type === "response.output_item.done") {
366
377
  const item = event.item;
367
378
  if (item.type === "reasoning" && currentBlock?.type === "thinking") {
368
- currentBlock.thinking = item.summary?.map((s) => s.text).join("\n\n") || "";
379
+ const summaryText = item.summary?.map((s) => s.text).join("\n\n") || "";
380
+ const contentText = item.content?.map((c) => c.text).join("\n\n") || "";
381
+ currentBlock.thinking = summaryText || contentText || currentBlock.thinking;
369
382
  currentBlock.thinkingSignature = JSON.stringify(item);
370
383
  stream.push({
371
384
  type: "thinking_end",
@@ -1,11 +1,12 @@
1
1
  import OpenAI from "openai";
2
2
  import { getEnvApiKey } from "../env-api-keys.js";
3
- import { supportsXhigh } from "../models.js";
3
+ import { clampThinkingLevel } from "../models.js";
4
4
  import { AssistantMessageEventStream } from "../utils/event-stream.js";
5
5
  import { headersToRecord } from "../utils/headers.js";
6
+ import { isCloudflareProvider, resolveCloudflareBaseUrl } from "./cloudflare.js";
6
7
  import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
7
8
  import { convertResponsesMessages, convertResponsesTools, processResponsesStream } from "./openai-responses-shared.js";
8
- import { buildBaseOptions, clampReasoning } from "./simple-options.js";
9
+ import { buildBaseOptions } from "./simple-options.js";
9
10
  const OPENAI_TOOL_CALL_PROVIDERS = new Set(["openai", "openai-codex", "opencode"]);
10
11
  /**
11
12
  * Resolve cache retention preference.
@@ -105,7 +106,8 @@ export const streamSimpleOpenAIResponses = (model, context, options) => {
105
106
  throw new Error(`No API key for provider: ${model.provider}`);
106
107
  }
107
108
  const base = buildBaseOptions(model, options, apiKey);
108
- const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
109
+ const clampedReasoning = options?.reasoning ? clampThinkingLevel(model, options.reasoning) : undefined;
110
+ const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning;
109
111
  return streamOpenAIResponses(model, context, {
110
112
  ...base,
111
113
  reasoningEffort,
@@ -138,11 +140,18 @@ function createClient(model, context, apiKey, optionsHeaders, sessionId) {
138
140
  if (optionsHeaders) {
139
141
  Object.assign(headers, optionsHeaders);
140
142
  }
143
+ const defaultHeaders = model.provider === "cloudflare-ai-gateway"
144
+ ? {
145
+ ...headers,
146
+ Authorization: headers.Authorization ?? null,
147
+ "cf-aig-authorization": `Bearer ${apiKey}`,
148
+ }
149
+ : headers;
141
150
  return new OpenAI({
142
151
  apiKey,
143
- baseURL: model.baseUrl,
152
+ baseURL: isCloudflareProvider(model.provider) ? resolveCloudflareBaseUrl(model) : model.baseUrl,
144
153
  dangerouslyAllowBrowser: true,
145
- defaultHeaders: headers,
154
+ defaultHeaders,
146
155
  });
147
156
  }
148
157
  function buildParams(model, context, options) {
@@ -171,14 +180,19 @@ function buildParams(model, context, options) {
171
180
  }
172
181
  if (model.reasoning) {
173
182
  if (options?.reasoningEffort || options?.reasoningSummary) {
183
+ const effort = options?.reasoningEffort
184
+ ? (model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort)
185
+ : "medium";
174
186
  params.reasoning = {
175
- effort: options?.reasoningEffort || "medium",
187
+ effort: effort,
176
188
  summary: options?.reasoningSummary || "auto",
177
189
  };
178
190
  params.include = ["reasoning.encrypted_content"];
179
191
  }
180
- else if (model.provider !== "github-copilot") {
181
- params.reasoning = { effort: "none" };
192
+ else if (model.provider !== "github-copilot" && model.thinkingLevelMap?.off !== null) {
193
+ params.reasoning = {
194
+ effort: (model.thinkingLevelMap?.off ?? "none"),
195
+ };
182
196
  }
183
197
  }
184
198
  return params;
@@ -4,7 +4,6 @@ const importNodeOnlyProvider = (specifier) => import(specifier);
4
4
  let anthropicProviderModulePromise;
5
5
  let azureOpenAIResponsesProviderModulePromise;
6
6
  let googleProviderModulePromise;
7
- let googleGeminiCliProviderModulePromise;
8
7
  let googleVertexProviderModulePromise;
9
8
  let mistralProviderModulePromise;
10
9
  let openAICodexResponsesProviderModulePromise;
@@ -108,16 +107,6 @@ function loadGoogleProviderModule() {
108
107
  });
109
108
  return googleProviderModulePromise;
110
109
  }
111
- function loadGoogleGeminiCliProviderModule() {
112
- googleGeminiCliProviderModulePromise ||= import("./google-gemini-cli.js").then((module) => {
113
- const provider = module;
114
- return {
115
- stream: provider.streamGoogleGeminiCli,
116
- streamSimple: provider.streamSimpleGoogleGeminiCli,
117
- };
118
- });
119
- return googleGeminiCliProviderModulePromise;
120
- }
121
110
  function loadGoogleVertexProviderModule() {
122
111
  googleVertexProviderModulePromise ||= import("./google-vertex.js").then((module) => {
123
112
  const provider = module;
@@ -187,8 +176,6 @@ export const streamAzureOpenAIResponses = createLazyStream(loadAzureOpenAIRespon
187
176
  export const streamSimpleAzureOpenAIResponses = createLazySimpleStream(loadAzureOpenAIResponsesProviderModule);
188
177
  export const streamGoogle = createLazyStream(loadGoogleProviderModule);
189
178
  export const streamSimpleGoogle = createLazySimpleStream(loadGoogleProviderModule);
190
- export const streamGoogleGeminiCli = createLazyStream(loadGoogleGeminiCliProviderModule);
191
- export const streamSimpleGoogleGeminiCli = createLazySimpleStream(loadGoogleGeminiCliProviderModule);
192
179
  export const streamGoogleVertex = createLazyStream(loadGoogleVertexProviderModule);
193
180
  export const streamSimpleGoogleVertex = createLazySimpleStream(loadGoogleVertexProviderModule);
194
181
  export const streamMistral = createLazyStream(loadMistralProviderModule);
@@ -237,11 +224,6 @@ export function registerBuiltInApiProviders() {
237
224
  stream: streamGoogle,
238
225
  streamSimple: streamSimpleGoogle,
239
226
  });
240
- registerApiProvider({
241
- api: "google-gemini-cli",
242
- stream: streamGoogleGeminiCli,
243
- streamSimple: streamSimpleGoogleGeminiCli,
244
- });
245
227
  registerApiProvider({
246
228
  api: "google-vertex",
247
229
  stream: streamGoogleVertex,
@@ -4,6 +4,7 @@ export function buildBaseOptions(model, options, apiKey) {
4
4
  maxTokens: options?.maxTokens ?? (model.maxTokens > 0 ? Math.min(model.maxTokens, 32000) : undefined),
5
5
  signal: options?.signal,
6
6
  apiKey: apiKey || options?.apiKey,
7
+ transport: options?.transport,
7
8
  cacheRetention: options?.cacheRetention,
8
9
  sessionId: options?.sessionId,
9
10
  headers: options?.headers,
@@ -0,0 +1,22 @@
1
+ const sessionResourceCleanups = new Set();
2
+ export function registerSessionResourceCleanup(cleanup) {
3
+ sessionResourceCleanups.add(cleanup);
4
+ return () => {
5
+ sessionResourceCleanups.delete(cleanup);
6
+ };
7
+ }
8
+ export function cleanupSessionResources(sessionId) {
9
+ const errors = [];
10
+ for (const cleanup of sessionResourceCleanups) {
11
+ try {
12
+ cleanup(sessionId);
13
+ }
14
+ catch (error) {
15
+ errors.push(error);
16
+ }
17
+ }
18
+ if (errors.length > 0) {
19
+ throw new AggregateError(errors, "Failed to cleanup session resources");
20
+ }
21
+ }
22
+ //# sourceMappingURL=session-resources.js.map