@eminent337/aery-ai 0.67.74 → 0.67.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +12 -5
  2. package/dist/env-api-keys.d.ts +9 -0
  3. package/dist/env-api-keys.d.ts.map +1 -1
  4. package/dist/env-api-keys.js +89 -34
  5. package/dist/env-api-keys.js.map +1 -1
  6. package/dist/index.d.ts +2 -2
  7. package/dist/index.d.ts.map +1 -1
  8. package/dist/index.js +1 -1
  9. package/dist/index.js.map +1 -1
  10. package/dist/models.d.ts +2 -1
  11. package/dist/models.d.ts.map +1 -1
  12. package/dist/models.generated.d.ts +2877 -1647
  13. package/dist/models.generated.d.ts.map +1 -1
  14. package/dist/models.generated.js +1702 -496
  15. package/dist/models.generated.js.map +1 -1
  16. package/dist/models.js +7 -2
  17. package/dist/models.js.map +1 -1
  18. package/dist/providers/amazon-bedrock.d.ts.map +1 -1
  19. package/dist/providers/amazon-bedrock.js +97 -34
  20. package/dist/providers/amazon-bedrock.js.map +1 -1
  21. package/dist/providers/anthropic.d.ts.map +1 -1
  22. package/dist/providers/anthropic.js +42 -17
  23. package/dist/providers/anthropic.js.map +1 -1
  24. package/dist/providers/azure-openai-responses.d.ts.map +1 -1
  25. package/dist/providers/azure-openai-responses.js +24 -5
  26. package/dist/providers/azure-openai-responses.js.map +1 -1
  27. package/dist/providers/cloudflare.d.ts +7 -0
  28. package/dist/providers/cloudflare.d.ts.map +1 -0
  29. package/dist/providers/cloudflare.js +19 -0
  30. package/dist/providers/cloudflare.js.map +1 -0
  31. package/dist/providers/google-gemini-cli.d.ts.map +1 -1
  32. package/dist/providers/google-gemini-cli.js +2 -2
  33. package/dist/providers/google-gemini-cli.js.map +1 -1
  34. package/dist/providers/google-vertex.d.ts.map +1 -1
  35. package/dist/providers/google-vertex.js +34 -13
  36. package/dist/providers/google-vertex.js.map +1 -1
  37. package/dist/providers/openai-codex-responses.d.ts.map +1 -1
  38. package/dist/providers/openai-codex-responses.js +11 -10
  39. package/dist/providers/openai-codex-responses.js.map +1 -1
  40. package/dist/providers/openai-completions.d.ts.map +1 -1
  41. package/dist/providers/openai-completions.js +102 -47
  42. package/dist/providers/openai-completions.js.map +1 -1
  43. package/dist/providers/openai-responses.d.ts.map +1 -1
  44. package/dist/providers/openai-responses.js +26 -23
  45. package/dist/providers/openai-responses.js.map +1 -1
  46. package/dist/providers/simple-options.d.ts.map +1 -1
  47. package/dist/providers/simple-options.js +2 -0
  48. package/dist/providers/simple-options.js.map +1 -1
  49. package/dist/providers/transform-messages.d.ts.map +1 -1
  50. package/dist/providers/transform-messages.js +2 -0
  51. package/dist/providers/transform-messages.js.map +1 -1
  52. package/dist/types.d.ts +36 -5
  53. package/dist/types.d.ts.map +1 -1
  54. package/dist/types.js.map +1 -1
  55. package/dist/utils/typebox-helpers.d.ts +1 -1
  56. package/dist/utils/typebox-helpers.d.ts.map +1 -1
  57. package/dist/utils/typebox-helpers.js +1 -1
  58. package/dist/utils/typebox-helpers.js.map +1 -1
  59. package/dist/utils/validation.d.ts.map +1 -1
  60. package/dist/utils/validation.js +242 -41
  61. package/dist/utils/validation.js.map +1 -1
  62. package/package.json +2 -4
@@ -5,6 +5,7 @@ import { AssistantMessageEventStream } from "../utils/event-stream.js";
5
5
  import { headersToRecord } from "../utils/headers.js";
6
6
  import { parseStreamingJson } from "../utils/json-parse.js";
7
7
  import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
8
+ import { isCloudflareProvider, resolveCloudflareBaseUrl } from "./cloudflare.js";
8
9
  import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
9
10
  import { buildBaseOptions, clampReasoning } from "./simple-options.js";
10
11
  import { transformMessages } from "./transform-messages.js";
@@ -78,20 +79,30 @@ export const streamOpenAICompletions = (model, context, options) => {
78
79
  if (nextParams !== undefined) {
79
80
  params = nextParams;
80
81
  }
82
+ const requestOptions = {
83
+ ...(options?.signal ? { signal: options.signal } : {}),
84
+ ...(options?.timeoutMs !== undefined ? { timeout: options.timeoutMs } : {}),
85
+ ...(options?.maxRetries !== undefined ? { maxRetries: options.maxRetries } : {}),
86
+ };
81
87
  const { data: openaiStream, response } = await client.chat.completions
82
- .create(params, { signal: options?.signal })
88
+ .create(params, requestOptions)
83
89
  .withResponse();
84
90
  await options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);
85
91
  stream.push({ type: "start", partial: output });
86
92
  let currentBlock = null;
87
93
  const blocks = output.content;
88
- const blockIndex = () => blocks.length - 1;
94
+ const getContentIndex = (block) => (block ? blocks.indexOf(block) : -1);
95
+ const currentContentIndex = () => getContentIndex(currentBlock);
89
96
  const finishCurrentBlock = (block) => {
90
97
  if (block) {
98
+ const contentIndex = getContentIndex(block);
99
+ if (contentIndex === -1) {
100
+ return;
101
+ }
91
102
  if (block.type === "text") {
92
103
  stream.push({
93
104
  type: "text_end",
94
- contentIndex: blockIndex(),
105
+ contentIndex,
95
106
  content: block.text,
96
107
  partial: output,
97
108
  });
@@ -99,19 +110,20 @@ export const streamOpenAICompletions = (model, context, options) => {
99
110
  else if (block.type === "thinking") {
100
111
  stream.push({
101
112
  type: "thinking_end",
102
- contentIndex: blockIndex(),
113
+ contentIndex,
103
114
  content: block.thinking,
104
115
  partial: output,
105
116
  });
106
117
  }
107
118
  else if (block.type === "toolCall") {
108
119
  block.arguments = parseStreamingJson(block.partialArgs);
109
- // Finalize in-place and strip the scratch buffer so replay only
120
+ // Finalize in-place and strip the scratch buffers so replay only
110
121
  // carries parsed arguments.
111
122
  delete block.partialArgs;
123
+ delete block.streamIndex;
112
124
  stream.push({
113
125
  type: "toolcall_end",
114
- contentIndex: blockIndex(),
126
+ contentIndex,
115
127
  toolCall: block,
116
128
  partial: output,
117
129
  });
@@ -150,13 +162,13 @@ export const streamOpenAICompletions = (model, context, options) => {
150
162
  finishCurrentBlock(currentBlock);
151
163
  currentBlock = { type: "text", text: "" };
152
164
  output.content.push(currentBlock);
153
- stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
165
+ stream.push({ type: "text_start", contentIndex: currentContentIndex(), partial: output });
154
166
  }
155
167
  if (currentBlock.type === "text") {
156
168
  currentBlock.text += choice.delta.content;
157
169
  stream.push({
158
170
  type: "text_delta",
159
- contentIndex: blockIndex(),
171
+ contentIndex: currentContentIndex(),
160
172
  delta: choice.delta.content,
161
173
  partial: output,
162
174
  });
@@ -187,14 +199,14 @@ export const streamOpenAICompletions = (model, context, options) => {
187
199
  thinkingSignature: foundReasoningField,
188
200
  };
189
201
  output.content.push(currentBlock);
190
- stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
202
+ stream.push({ type: "thinking_start", contentIndex: currentContentIndex(), partial: output });
191
203
  }
192
204
  if (currentBlock.type === "thinking") {
193
205
  const delta = choice.delta[foundReasoningField];
194
206
  currentBlock.thinking += delta;
195
207
  stream.push({
196
208
  type: "thinking_delta",
197
- contentIndex: blockIndex(),
209
+ contentIndex: currentContentIndex(),
198
210
  delta,
199
211
  partial: output,
200
212
  });
@@ -202,9 +214,11 @@ export const streamOpenAICompletions = (model, context, options) => {
202
214
  }
203
215
  if (choice?.delta?.tool_calls) {
204
216
  for (const toolCall of choice.delta.tool_calls) {
205
- if (!currentBlock ||
206
- currentBlock.type !== "toolCall" ||
207
- (toolCall.id && currentBlock.id !== toolCall.id)) {
217
+ const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
218
+ const sameToolCall = currentBlock?.type === "toolCall" &&
219
+ ((streamIndex !== undefined && currentBlock.streamIndex === streamIndex) ||
220
+ (streamIndex === undefined && toolCall.id && currentBlock.id === toolCall.id));
221
+ if (!sameToolCall) {
208
222
  finishCurrentBlock(currentBlock);
209
223
  currentBlock = {
210
224
  type: "toolCall",
@@ -212,24 +226,34 @@ export const streamOpenAICompletions = (model, context, options) => {
212
226
  name: toolCall.function?.name || "",
213
227
  arguments: {},
214
228
  partialArgs: "",
229
+ streamIndex,
215
230
  };
216
231
  output.content.push(currentBlock);
217
- stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
232
+ stream.push({
233
+ type: "toolcall_start",
234
+ contentIndex: getContentIndex(currentBlock),
235
+ partial: output,
236
+ });
218
237
  }
219
- if (currentBlock.type === "toolCall") {
220
- if (toolCall.id)
221
- currentBlock.id = toolCall.id;
222
- if (toolCall.function?.name)
223
- currentBlock.name = toolCall.function.name;
238
+ const currentToolCallBlock = currentBlock?.type === "toolCall" ? currentBlock : null;
239
+ if (currentToolCallBlock) {
240
+ if (!currentToolCallBlock.id && toolCall.id)
241
+ currentToolCallBlock.id = toolCall.id;
242
+ if (!currentToolCallBlock.name && toolCall.function?.name) {
243
+ currentToolCallBlock.name = toolCall.function.name;
244
+ }
245
+ if (currentToolCallBlock.streamIndex === undefined && streamIndex !== undefined) {
246
+ currentToolCallBlock.streamIndex = streamIndex;
247
+ }
224
248
  let delta = "";
225
249
  if (toolCall.function?.arguments) {
226
250
  delta = toolCall.function.arguments;
227
- currentBlock.partialArgs += toolCall.function.arguments;
228
- currentBlock.arguments = parseStreamingJson(currentBlock.partialArgs);
251
+ currentToolCallBlock.partialArgs += toolCall.function.arguments;
252
+ currentToolCallBlock.arguments = parseStreamingJson(currentToolCallBlock.partialArgs);
229
253
  }
230
254
  stream.push({
231
255
  type: "toolcall_delta",
232
- contentIndex: blockIndex(),
256
+ contentIndex: getContentIndex(currentToolCallBlock),
233
257
  delta,
234
258
  partial: output,
235
259
  });
@@ -265,8 +289,9 @@ export const streamOpenAICompletions = (model, context, options) => {
265
289
  catch (error) {
266
290
  for (const block of output.content) {
267
291
  delete block.index;
268
- // partialArgs is only a streaming scratch buffer; never persist it.
292
+ // Streaming scratch buffers are only used during parsing; never persist them.
269
293
  delete block.partialArgs;
294
+ delete block.streamIndex;
270
295
  }
271
296
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
272
297
  output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
@@ -321,20 +346,23 @@ function createClient(model, context, apiKey, optionsHeaders, sessionId, compat
321
346
  }
322
347
  return new OpenAI({
323
348
  apiKey,
324
- baseURL: model.baseUrl,
349
+ baseURL: isCloudflareProvider(model.provider) ? resolveCloudflareBaseUrl(model) : model.baseUrl,
325
350
  dangerouslyAllowBrowser: true,
326
351
  defaultHeaders: headers,
327
352
  });
328
353
  }
329
354
  function buildParams(model, context, options, compat = getCompat(model), cacheRetention = resolveCacheRetention(options?.cacheRetention)) {
330
355
  const messages = convertMessages(model, context, compat);
331
- const cacheControl = getCompatCacheControl(model, compat, cacheRetention);
356
+ const cacheControl = getCompatCacheControl(compat, cacheRetention);
332
357
  const params = {
333
358
  model: model.id,
334
359
  messages,
335
360
  stream: true,
336
- prompt_cache_key: model.baseUrl.includes("api.openai.com") && cacheRetention !== "none" ? options?.sessionId : undefined,
337
- prompt_cache_retention: model.baseUrl.includes("api.openai.com") && cacheRetention === "long" ? "24h" : undefined,
361
+ prompt_cache_key: (model.baseUrl.includes("api.openai.com") && cacheRetention !== "none") ||
362
+ (cacheRetention === "long" && compat.supportsLongCacheRetention)
363
+ ? options?.sessionId
364
+ : undefined,
365
+ prompt_cache_retention: cacheRetention === "long" && compat.supportsLongCacheRetention ? "24h" : undefined,
338
366
  };
339
367
  if (compat.supportsUsageInStreaming !== false) {
340
368
  params.stream_options = { include_usage: true };
@@ -353,7 +381,7 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
353
381
  if (options?.temperature !== undefined) {
354
382
  params.temperature = options.temperature;
355
383
  }
356
- if (context.tools) {
384
+ if (context.tools && context.tools.length > 0) {
357
385
  params.tools = convertTools(context.tools, compat);
358
386
  if (compat.zaiToolStream) {
359
387
  params.tool_stream = true;
@@ -381,6 +409,12 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
381
409
  preserve_thinking: true,
382
410
  };
383
411
  }
412
+ else if (compat.thinkingFormat === "deepseek" && model.reasoning) {
413
+ params.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
414
+ if (options?.reasoningEffort) {
415
+ params.reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap);
416
+ }
417
+ }
384
418
  else if (compat.thinkingFormat === "openrouter" && model.reasoning) {
385
419
  // OpenRouter normalizes reasoning across providers via a nested reasoning object.
386
420
  const openRouterParams = params;
@@ -418,11 +452,11 @@ function buildParams(model, context, options, compat = getCompat(model), cacheRe
418
452
  function mapReasoningEffort(effort, reasoningEffortMap) {
419
453
  return reasoningEffortMap[effort] ?? effort;
420
454
  }
421
- function getCompatCacheControl(model, compat, cacheRetention) {
455
+ function getCompatCacheControl(compat, cacheRetention) {
422
456
  if (compat.cacheControlFormat !== "anthropic" || cacheRetention === "none") {
423
457
  return undefined;
424
458
  }
425
- const ttl = cacheRetention === "long" && model.baseUrl.includes("api.anthropic.com") ? "1h" : undefined;
459
+ const ttl = cacheRetention === "long" && compat.supportsLongCacheRetention ? "1h" : undefined;
426
460
  return { type: "ephemeral", ...(ttl ? { ttl } : {}) };
427
461
  }
428
462
  function applyAnthropicCacheControl(messages, tools, cacheControl) {
@@ -631,6 +665,11 @@ export function convertMessages(model, context, compat) {
631
665
  assistantMsg.reasoning_details = reasoningDetails;
632
666
  }
633
667
  }
668
+ if (compat.requiresReasoningContentOnAssistantMessages &&
669
+ model.reasoning &&
670
+ assistantMsg.reasoning_content === undefined) {
671
+ assistantMsg.reasoning_content = "";
672
+ }
634
673
  // Skip assistant messages that have no content and no tool calls.
635
674
  // Some providers require "either content or tool_calls, but not none".
636
675
  // Other providers also don't accept empty assistant messages.
@@ -725,7 +764,6 @@ function parseChunkUsage(rawUsage, model) {
725
764
  const promptTokens = rawUsage.prompt_tokens || 0;
726
765
  const reportedCachedTokens = rawUsage.prompt_tokens_details?.cached_tokens || 0;
727
766
  const cacheWriteTokens = rawUsage.prompt_tokens_details?.cache_write_tokens || 0;
728
- const reasoningTokens = rawUsage.completion_tokens_details?.reasoning_tokens || 0;
729
767
  // Normalize to pi-ai semantics:
730
768
  // - cacheRead: hits from cache created by previous requests only
731
769
  // - cacheWrite: tokens written to cache in this request
@@ -733,9 +771,8 @@ function parseChunkUsage(rawUsage, model) {
733
771
  // as (previous hits + current writes). In that case, remove cacheWrite from cacheRead.
734
772
  const cacheReadTokens = cacheWriteTokens > 0 ? Math.max(0, reportedCachedTokens - cacheWriteTokens) : reportedCachedTokens;
735
773
  const input = Math.max(0, promptTokens - cacheReadTokens - cacheWriteTokens);
736
- // Compute totalTokens ourselves since we add reasoning_tokens to output
737
- // and some providers (e.g., Groq) don't include them in total_tokens
738
- const outputTokens = (rawUsage.completion_tokens || 0) + reasoningTokens;
774
+ // OpenAI completion_tokens already includes reasoning_tokens.
775
+ const outputTokens = rawUsage.completion_tokens || 0;
739
776
  const usage = {
740
777
  input,
741
778
  output: outputTokens,
@@ -779,6 +816,7 @@ function detectCompat(model) {
779
816
  const provider = model.provider;
780
817
  const baseUrl = model.baseUrl;
781
818
  const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
819
+ const isCloudflareWorkersAI = provider === "cloudflare-workers-ai" || baseUrl.includes("api.cloudflare.com");
782
820
  const isNonStandard = provider === "cerebras" ||
783
821
  baseUrl.includes("cerebras.ai") ||
784
822
  provider === "xai" ||
@@ -787,20 +825,30 @@ function detectCompat(model) {
787
825
  baseUrl.includes("deepseek.com") ||
788
826
  isZai ||
789
827
  provider === "opencode" ||
790
- baseUrl.includes("opencode.ai");
828
+ baseUrl.includes("opencode.ai") ||
829
+ isCloudflareWorkersAI;
791
830
  const useMaxTokens = baseUrl.includes("chutes.ai");
792
831
  const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
793
832
  const isGroq = provider === "groq" || baseUrl.includes("groq.com");
833
+ const isDeepSeek = provider === "deepseek" || baseUrl.includes("deepseek.com");
794
834
  const cacheControlFormat = provider === "openrouter" && model.id.startsWith("anthropic/") ? "anthropic" : undefined;
795
- const reasoningEffortMap = isGroq && model.id === "qwen/qwen3-32b"
835
+ const reasoningEffortMap = isDeepSeek
796
836
  ? {
797
- minimal: "default",
798
- low: "default",
799
- medium: "default",
800
- high: "default",
801
- xhigh: "default",
837
+ minimal: "high",
838
+ low: "high",
839
+ medium: "high",
840
+ high: "high",
841
+ xhigh: "max",
802
842
  }
803
- : {};
843
+ : isGroq && model.id === "qwen/qwen3-32b"
844
+ ? {
845
+ minimal: "default",
846
+ low: "default",
847
+ medium: "default",
848
+ high: "default",
849
+ xhigh: "default",
850
+ }
851
+ : {};
804
852
  return {
805
853
  supportsStore: !isNonStandard,
806
854
  supportsDeveloperRole: !isNonStandard,
@@ -811,17 +859,21 @@ function detectCompat(model) {
811
859
  requiresToolResultName: false,
812
860
  requiresAssistantAfterToolResult: false,
813
861
  requiresThinkingAsText: false,
814
- thinkingFormat: isZai
815
- ? "zai"
816
- : provider === "openrouter" || baseUrl.includes("openrouter.ai")
817
- ? "openrouter"
818
- : "openai",
862
+ requiresReasoningContentOnAssistantMessages: isDeepSeek,
863
+ thinkingFormat: isDeepSeek
864
+ ? "deepseek"
865
+ : isZai
866
+ ? "zai"
867
+ : provider === "openrouter" || baseUrl.includes("openrouter.ai")
868
+ ? "openrouter"
869
+ : "openai",
819
870
  openRouterRouting: {},
820
871
  vercelGatewayRouting: {},
821
872
  zaiToolStream: false,
822
873
  supportsStrictMode: true,
823
874
  cacheControlFormat,
824
875
  sendSessionAffinityHeaders: false,
876
+ supportsLongCacheRetention: true,
825
877
  };
826
878
  }
827
879
  /**
@@ -842,6 +894,8 @@ function getCompat(model) {
842
894
  requiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,
843
895
  requiresAssistantAfterToolResult: model.compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult,
844
896
  requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
897
+ requiresReasoningContentOnAssistantMessages: model.compat.requiresReasoningContentOnAssistantMessages ??
898
+ detected.requiresReasoningContentOnAssistantMessages,
845
899
  thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
846
900
  openRouterRouting: model.compat.openRouterRouting ?? {},
847
901
  vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
@@ -849,6 +903,7 @@ function getCompat(model) {
849
903
  supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
850
904
  cacheControlFormat: model.compat.cacheControlFormat ?? detected.cacheControlFormat,
851
905
  sendSessionAffinityHeaders: model.compat.sendSessionAffinityHeaders ?? detected.sendSessionAffinityHeaders,
906
+ supportsLongCacheRetention: model.compat.supportsLongCacheRetention ?? detected.supportsLongCacheRetention,
852
907
  };
853
908
  }
854
909
  //# sourceMappingURL=openai-completions.js.map