@librechat/agents 3.1.74 → 3.1.75-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/README.md +66 -0
  2. package/dist/cjs/agents/AgentContext.cjs +84 -37
  3. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  4. package/dist/cjs/graphs/Graph.cjs +13 -3
  5. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  6. package/dist/cjs/langchain/google-common.cjs +3 -0
  7. package/dist/cjs/langchain/google-common.cjs.map +1 -0
  8. package/dist/cjs/langchain/index.cjs +86 -0
  9. package/dist/cjs/langchain/index.cjs.map +1 -0
  10. package/dist/cjs/langchain/language_models/chat_models.cjs +3 -0
  11. package/dist/cjs/langchain/language_models/chat_models.cjs.map +1 -0
  12. package/dist/cjs/langchain/messages/tool.cjs +3 -0
  13. package/dist/cjs/langchain/messages/tool.cjs.map +1 -0
  14. package/dist/cjs/langchain/messages.cjs +51 -0
  15. package/dist/cjs/langchain/messages.cjs.map +1 -0
  16. package/dist/cjs/langchain/openai.cjs +3 -0
  17. package/dist/cjs/langchain/openai.cjs.map +1 -0
  18. package/dist/cjs/langchain/prompts.cjs +11 -0
  19. package/dist/cjs/langchain/prompts.cjs.map +1 -0
  20. package/dist/cjs/langchain/runnables.cjs +19 -0
  21. package/dist/cjs/langchain/runnables.cjs.map +1 -0
  22. package/dist/cjs/langchain/tools.cjs +23 -0
  23. package/dist/cjs/langchain/tools.cjs.map +1 -0
  24. package/dist/cjs/langchain/utils/env.cjs +11 -0
  25. package/dist/cjs/langchain/utils/env.cjs.map +1 -0
  26. package/dist/cjs/llm/anthropic/index.cjs +145 -52
  27. package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
  28. package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
  29. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +25 -15
  30. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  31. package/dist/cjs/llm/anthropic/utils/message_outputs.cjs +84 -70
  32. package/dist/cjs/llm/anthropic/utils/message_outputs.cjs.map +1 -1
  33. package/dist/cjs/llm/bedrock/index.cjs +1 -1
  34. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  35. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +213 -3
  36. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  37. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +2 -1
  38. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
  39. package/dist/cjs/llm/google/utils/common.cjs +5 -4
  40. package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
  41. package/dist/cjs/llm/openai/index.cjs +468 -647
  42. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  43. package/dist/cjs/llm/openai/utils/index.cjs +1 -448
  44. package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
  45. package/dist/cjs/llm/openrouter/index.cjs +57 -175
  46. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  47. package/dist/cjs/llm/vertexai/index.cjs +5 -3
  48. package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
  49. package/dist/cjs/main.cjs +83 -3
  50. package/dist/cjs/main.cjs.map +1 -1
  51. package/dist/cjs/messages/cache.cjs +39 -4
  52. package/dist/cjs/messages/cache.cjs.map +1 -1
  53. package/dist/cjs/messages/core.cjs +7 -6
  54. package/dist/cjs/messages/core.cjs.map +1 -1
  55. package/dist/cjs/messages/format.cjs +7 -6
  56. package/dist/cjs/messages/format.cjs.map +1 -1
  57. package/dist/cjs/messages/langchain.cjs +26 -0
  58. package/dist/cjs/messages/langchain.cjs.map +1 -0
  59. package/dist/cjs/messages/prune.cjs +7 -6
  60. package/dist/cjs/messages/prune.cjs.map +1 -1
  61. package/dist/cjs/tools/ToolNode.cjs +5 -1
  62. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  63. package/dist/esm/agents/AgentContext.mjs +85 -38
  64. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  65. package/dist/esm/graphs/Graph.mjs +13 -3
  66. package/dist/esm/graphs/Graph.mjs.map +1 -1
  67. package/dist/esm/langchain/google-common.mjs +2 -0
  68. package/dist/esm/langchain/google-common.mjs.map +1 -0
  69. package/dist/esm/langchain/index.mjs +5 -0
  70. package/dist/esm/langchain/index.mjs.map +1 -0
  71. package/dist/esm/langchain/language_models/chat_models.mjs +2 -0
  72. package/dist/esm/langchain/language_models/chat_models.mjs.map +1 -0
  73. package/dist/esm/langchain/messages/tool.mjs +2 -0
  74. package/dist/esm/langchain/messages/tool.mjs.map +1 -0
  75. package/dist/esm/langchain/messages.mjs +2 -0
  76. package/dist/esm/langchain/messages.mjs.map +1 -0
  77. package/dist/esm/langchain/openai.mjs +2 -0
  78. package/dist/esm/langchain/openai.mjs.map +1 -0
  79. package/dist/esm/langchain/prompts.mjs +2 -0
  80. package/dist/esm/langchain/prompts.mjs.map +1 -0
  81. package/dist/esm/langchain/runnables.mjs +2 -0
  82. package/dist/esm/langchain/runnables.mjs.map +1 -0
  83. package/dist/esm/langchain/tools.mjs +2 -0
  84. package/dist/esm/langchain/tools.mjs.map +1 -0
  85. package/dist/esm/langchain/utils/env.mjs +2 -0
  86. package/dist/esm/langchain/utils/env.mjs.map +1 -0
  87. package/dist/esm/llm/anthropic/index.mjs +146 -54
  88. package/dist/esm/llm/anthropic/index.mjs.map +1 -1
  89. package/dist/esm/llm/anthropic/types.mjs.map +1 -1
  90. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +25 -15
  91. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  92. package/dist/esm/llm/anthropic/utils/message_outputs.mjs +84 -71
  93. package/dist/esm/llm/anthropic/utils/message_outputs.mjs.map +1 -1
  94. package/dist/esm/llm/bedrock/index.mjs +1 -1
  95. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  96. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +214 -4
  97. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  98. package/dist/esm/llm/bedrock/utils/message_outputs.mjs +2 -1
  99. package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
  100. package/dist/esm/llm/google/utils/common.mjs +5 -4
  101. package/dist/esm/llm/google/utils/common.mjs.map +1 -1
  102. package/dist/esm/llm/openai/index.mjs +469 -648
  103. package/dist/esm/llm/openai/index.mjs.map +1 -1
  104. package/dist/esm/llm/openai/utils/index.mjs +4 -449
  105. package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
  106. package/dist/esm/llm/openrouter/index.mjs +57 -175
  107. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  108. package/dist/esm/llm/vertexai/index.mjs +5 -3
  109. package/dist/esm/llm/vertexai/index.mjs.map +1 -1
  110. package/dist/esm/main.mjs +4 -0
  111. package/dist/esm/main.mjs.map +1 -1
  112. package/dist/esm/messages/cache.mjs +39 -4
  113. package/dist/esm/messages/cache.mjs.map +1 -1
  114. package/dist/esm/messages/core.mjs +7 -6
  115. package/dist/esm/messages/core.mjs.map +1 -1
  116. package/dist/esm/messages/format.mjs +7 -6
  117. package/dist/esm/messages/format.mjs.map +1 -1
  118. package/dist/esm/messages/langchain.mjs +23 -0
  119. package/dist/esm/messages/langchain.mjs.map +1 -0
  120. package/dist/esm/messages/prune.mjs +7 -6
  121. package/dist/esm/messages/prune.mjs.map +1 -1
  122. package/dist/esm/tools/ToolNode.mjs +5 -1
  123. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  124. package/dist/types/agents/AgentContext.d.ts +14 -4
  125. package/dist/types/agents/__tests__/promptCacheLiveHelpers.d.ts +46 -0
  126. package/dist/types/index.d.ts +1 -0
  127. package/dist/types/langchain/google-common.d.ts +1 -0
  128. package/dist/types/langchain/index.d.ts +8 -0
  129. package/dist/types/langchain/language_models/chat_models.d.ts +1 -0
  130. package/dist/types/langchain/messages/tool.d.ts +1 -0
  131. package/dist/types/langchain/messages.d.ts +2 -0
  132. package/dist/types/langchain/openai.d.ts +1 -0
  133. package/dist/types/langchain/prompts.d.ts +1 -0
  134. package/dist/types/langchain/runnables.d.ts +2 -0
  135. package/dist/types/langchain/tools.d.ts +2 -0
  136. package/dist/types/langchain/utils/env.d.ts +1 -0
  137. package/dist/types/llm/anthropic/index.d.ts +22 -9
  138. package/dist/types/llm/anthropic/types.d.ts +5 -1
  139. package/dist/types/llm/anthropic/utils/message_outputs.d.ts +13 -6
  140. package/dist/types/llm/anthropic/utils/output_parsers.d.ts +1 -1
  141. package/dist/types/llm/openai/index.d.ts +21 -24
  142. package/dist/types/llm/openrouter/index.d.ts +11 -9
  143. package/dist/types/llm/vertexai/index.d.ts +1 -0
  144. package/dist/types/messages/cache.d.ts +4 -1
  145. package/dist/types/messages/langchain.d.ts +27 -0
  146. package/dist/types/types/graph.d.ts +26 -38
  147. package/dist/types/types/llm.d.ts +3 -3
  148. package/dist/types/types/run.d.ts +2 -0
  149. package/dist/types/types/stream.d.ts +1 -1
  150. package/package.json +80 -17
  151. package/src/agents/AgentContext.ts +123 -44
  152. package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +116 -0
  153. package/src/agents/__tests__/AgentContext.bedrock.live.test.ts +149 -0
  154. package/src/agents/__tests__/AgentContext.test.ts +155 -2
  155. package/src/agents/__tests__/promptCacheLiveHelpers.ts +165 -0
  156. package/src/graphs/Graph.ts +24 -4
  157. package/src/graphs/__tests__/composition.smoke.test.ts +188 -0
  158. package/src/index.ts +3 -0
  159. package/src/langchain/google-common.ts +1 -0
  160. package/src/langchain/index.ts +8 -0
  161. package/src/langchain/language_models/chat_models.ts +1 -0
  162. package/src/langchain/messages/tool.ts +5 -0
  163. package/src/langchain/messages.ts +21 -0
  164. package/src/langchain/openai.ts +1 -0
  165. package/src/langchain/prompts.ts +1 -0
  166. package/src/langchain/runnables.ts +7 -0
  167. package/src/langchain/tools.ts +8 -0
  168. package/src/langchain/utils/env.ts +1 -0
  169. package/src/llm/anthropic/index.ts +252 -84
  170. package/src/llm/anthropic/llm.spec.ts +751 -102
  171. package/src/llm/anthropic/types.ts +9 -1
  172. package/src/llm/anthropic/utils/message_inputs.ts +43 -20
  173. package/src/llm/anthropic/utils/message_outputs.ts +119 -101
  174. package/src/llm/anthropic/utils/server-tool-inputs.test.ts +77 -0
  175. package/src/llm/bedrock/index.ts +2 -2
  176. package/src/llm/bedrock/llm.spec.ts +341 -0
  177. package/src/llm/bedrock/utils/message_inputs.ts +303 -4
  178. package/src/llm/bedrock/utils/message_outputs.ts +2 -1
  179. package/src/llm/custom-chat-models.smoke.test.ts +662 -0
  180. package/src/llm/google/llm.spec.ts +339 -57
  181. package/src/llm/google/utils/common.ts +53 -48
  182. package/src/llm/openai/contentBlocks.test.ts +346 -0
  183. package/src/llm/openai/index.ts +736 -837
  184. package/src/llm/openai/utils/index.ts +84 -64
  185. package/src/llm/openrouter/index.ts +124 -247
  186. package/src/llm/openrouter/reasoning.test.ts +8 -1
  187. package/src/llm/vertexai/index.ts +11 -5
  188. package/src/llm/vertexai/llm.spec.ts +28 -1
  189. package/src/messages/cache.test.ts +106 -4
  190. package/src/messages/cache.ts +57 -5
  191. package/src/messages/core.ts +16 -9
  192. package/src/messages/format.ts +9 -6
  193. package/src/messages/langchain.ts +39 -0
  194. package/src/messages/prune.ts +12 -8
  195. package/src/scripts/caching.ts +2 -3
  196. package/src/specs/anthropic.simple.test.ts +61 -0
  197. package/src/specs/summarization.test.ts +58 -61
  198. package/src/tools/ToolNode.ts +5 -1
  199. package/src/types/graph.ts +35 -88
  200. package/src/types/llm.ts +3 -3
  201. package/src/types/run.ts +2 -0
  202. package/src/types/stream.ts +1 -1
  203. package/src/utils/llmConfig.ts +1 -6
@@ -1,36 +1,40 @@
1
1
  import { AzureOpenAI as AzureOpenAIClient } from 'openai';
2
2
  import { ChatXAI as OriginalChatXAI } from '@langchain/xai';
3
3
  import { ChatGenerationChunk } from '@langchain/core/outputs';
4
- import { AIMessage, AIMessageChunk } from '@langchain/core/messages';
4
+ import {
5
+ AIMessage,
6
+ AIMessageChunk,
7
+ isAIMessage,
8
+ } from '@langchain/core/messages';
5
9
  import { ToolDefinition } from '@langchain/core/language_models/base';
6
- import { isLangChainTool } from '@langchain/core/utils/function_calling';
10
+ import {
11
+ convertToOpenAITool,
12
+ isLangChainTool,
13
+ } from '@langchain/core/utils/function_calling';
7
14
  import { ChatDeepSeek as OriginalChatDeepSeek } from '@langchain/deepseek';
8
15
  import { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
9
16
  import {
10
17
  getEndpoint,
11
18
  OpenAIClient,
12
- formatToOpenAITool,
19
+ getHeadersWithUserAgent,
13
20
  ChatOpenAI as OriginalChatOpenAI,
21
+ ChatOpenAIResponses as OriginalChatOpenAIResponses,
22
+ ChatOpenAICompletions as OriginalChatOpenAICompletions,
14
23
  AzureChatOpenAI as OriginalAzureChatOpenAI,
24
+ AzureChatOpenAIResponses as OriginalAzureChatOpenAIResponses,
25
+ AzureChatOpenAICompletions as OriginalAzureChatOpenAICompletions,
15
26
  } from '@langchain/openai';
27
+ import type { HeaderValue, HeadersLike } from './types';
16
28
  import type {
17
- OpenAIChatCallOptions,
18
- OpenAIRoleEnum,
19
- HeaderValue,
20
- HeadersLike,
21
- } from './types';
29
+ BaseMessage,
30
+ BaseMessageChunk,
31
+ UsageMetadata,
32
+ } from '@langchain/core/messages';
22
33
  import type { BindToolsInput } from '@langchain/core/language_models/chat_models';
23
- import type { BaseMessage, UsageMetadata } from '@langchain/core/messages';
24
- import type { ChatResult, ChatGeneration } from '@langchain/core/outputs';
34
+ import type { ChatGeneration, ChatResult } from '@langchain/core/outputs';
25
35
  import type { ChatXAIInput } from '@langchain/xai';
26
36
  import type * as t from '@langchain/openai';
27
- import {
28
- isReasoningModel,
29
- _convertMessagesToOpenAIParams,
30
- _convertMessagesToOpenAIResponsesParams,
31
- _convertOpenAIResponsesDeltaToBaseMessageChunk,
32
- type ResponseReturnStreamEvents,
33
- } from './utils';
37
+ import { isReasoningModel, _convertMessagesToOpenAIParams } from './utils';
34
38
  import { sleep } from '@/utils';
35
39
 
36
40
  // eslint-disable-next-line @typescript-eslint/explicit-function-return-type
@@ -79,10 +83,192 @@ export function normalizeHeaders(
79
83
  return Object.fromEntries(output.entries());
80
84
  }
81
85
 
86
+ type OpenAICoreRequestOptions = OpenAIClient.RequestOptions;
82
87
  type OpenAICompletionParam =
83
88
  OpenAIClient.Chat.Completions.ChatCompletionMessageParam;
89
+ type OpenAIClientConfig = NonNullable<
90
+ ConstructorParameters<typeof OpenAIClient>[0]
91
+ >;
92
+ type LibreChatOpenAIFields = t.ChatOpenAIFields & {
93
+ _lc_stream_delay?: number;
94
+ includeReasoningContent?: boolean;
95
+ includeReasoningDetails?: boolean;
96
+ convertReasoningDetailsToContent?: boolean;
97
+ };
98
+ type LibreChatAzureOpenAIFields = t.AzureOpenAIInput & {
99
+ _lc_stream_delay?: number;
100
+ };
101
+ type ReasoningCallOptions = {
102
+ reasoning?: OpenAIClient.Reasoning;
103
+ reasoningEffort?: OpenAIClient.Reasoning['effort'];
104
+ };
105
+ type OpenAIDeltaWithLibreChatFields = Record<string, unknown> & {
106
+ reasoning?: unknown;
107
+ reasoning_details?: unknown;
108
+ provider_specific_fields?: unknown;
109
+ };
110
+ type OpenAIClientOwner = {
111
+ client?: OpenAIClient;
112
+ clientConfig: OpenAIClientConfig;
113
+ timeout?: number;
114
+ };
115
+ type AbortableOpenAIClient = CustomOpenAIClient | CustomAzureOpenAIClient;
116
+ type OpenAIClientDelegate = {
117
+ client?: AbortableOpenAIClient;
118
+ _getClientOptions(
119
+ options: OpenAICoreRequestOptions | undefined
120
+ ): OpenAICoreRequestOptions;
121
+ };
122
+
123
+ function getExposedOpenAIClient(
124
+ completions: OpenAIClientDelegate,
125
+ responses: OpenAIClientDelegate,
126
+ preferResponses: boolean
127
+ ): AbortableOpenAIClient {
128
+ const responsesClient = responses.client;
129
+ if (responsesClient?.abortHandler != null) {
130
+ return responsesClient;
131
+ }
132
+ const completionsClient = completions.client;
133
+ if (completionsClient?.abortHandler != null) {
134
+ return completionsClient;
135
+ }
84
136
 
85
- type OpenAICoreRequestOptions = OpenAIClient.RequestOptions;
137
+ const delegate = preferResponses ? responses : completions;
138
+ delegate._getClientOptions(undefined);
139
+ return delegate.client as AbortableOpenAIClient;
140
+ }
141
+
142
+ function getReasoningParams(
143
+ baseReasoning: OpenAIClient.Reasoning | undefined,
144
+ options?: ReasoningCallOptions
145
+ ): OpenAIClient.Reasoning | undefined {
146
+ let reasoning: OpenAIClient.Reasoning | undefined;
147
+ if (baseReasoning !== undefined) {
148
+ reasoning = {
149
+ ...reasoning,
150
+ ...baseReasoning,
151
+ };
152
+ }
153
+ if (options?.reasoning !== undefined) {
154
+ reasoning = {
155
+ ...reasoning,
156
+ ...options.reasoning,
157
+ };
158
+ }
159
+ if (
160
+ options?.reasoningEffort !== undefined &&
161
+ reasoning?.effort === undefined
162
+ ) {
163
+ reasoning = {
164
+ ...reasoning,
165
+ effort: options.reasoningEffort,
166
+ };
167
+ }
168
+ return reasoning;
169
+ }
170
+
171
+ function getGatedReasoningParams(
172
+ model: string,
173
+ baseReasoning: OpenAIClient.Reasoning | undefined,
174
+ options?: ReasoningCallOptions
175
+ ): OpenAIClient.Reasoning | undefined {
176
+ if (!isReasoningModel(model)) {
177
+ return;
178
+ }
179
+ return getReasoningParams(baseReasoning, options);
180
+ }
181
+
182
+ function attachLibreChatDeltaFields(
183
+ chunk: BaseMessageChunk,
184
+ delta: Record<string, unknown>
185
+ ): BaseMessageChunk {
186
+ if (!AIMessageChunk.isInstance(chunk)) {
187
+ return chunk;
188
+ }
189
+
190
+ const libreChatDelta = delta as OpenAIDeltaWithLibreChatFields;
191
+ if (
192
+ libreChatDelta.reasoning != null &&
193
+ chunk.additional_kwargs.reasoning_content == null
194
+ ) {
195
+ chunk.additional_kwargs.reasoning_content = libreChatDelta.reasoning;
196
+ }
197
+ if (libreChatDelta.reasoning_details != null) {
198
+ chunk.additional_kwargs.reasoning_details =
199
+ libreChatDelta.reasoning_details;
200
+ }
201
+ if (libreChatDelta.provider_specific_fields != null) {
202
+ chunk.additional_kwargs.provider_specific_fields =
203
+ libreChatDelta.provider_specific_fields;
204
+ }
205
+ return chunk;
206
+ }
207
+
208
+ function attachLibreChatMessageFields(
209
+ message: BaseMessage,
210
+ rawMessage: Record<string, unknown>
211
+ ): BaseMessage {
212
+ if (!isAIMessage(message)) {
213
+ return message;
214
+ }
215
+ if (
216
+ rawMessage.reasoning != null &&
217
+ message.additional_kwargs.reasoning_content == null
218
+ ) {
219
+ message.additional_kwargs.reasoning_content = rawMessage.reasoning;
220
+ }
221
+ if (rawMessage.reasoning_details != null) {
222
+ message.additional_kwargs.reasoning_details = rawMessage.reasoning_details;
223
+ }
224
+ if (rawMessage.provider_specific_fields != null) {
225
+ message.additional_kwargs.provider_specific_fields =
226
+ rawMessage.provider_specific_fields;
227
+ }
228
+ return message;
229
+ }
230
+
231
+ function getCustomOpenAIClientOptions(
232
+ owner: OpenAIClientOwner,
233
+ options?: OpenAICoreRequestOptions
234
+ ): OpenAICoreRequestOptions {
235
+ if (!(owner.client as OpenAIClient | undefined)) {
236
+ const openAIEndpointConfig: t.OpenAIEndpointConfig = {
237
+ baseURL: owner.clientConfig.baseURL,
238
+ };
239
+
240
+ const endpoint = getEndpoint(openAIEndpointConfig);
241
+ const params = {
242
+ ...owner.clientConfig,
243
+ baseURL: endpoint,
244
+ timeout: owner.timeout,
245
+ maxRetries: 0,
246
+ };
247
+ if (params.baseURL == null) {
248
+ delete params.baseURL;
249
+ }
250
+
251
+ params.defaultHeaders = getHeadersWithUserAgent(params.defaultHeaders);
252
+ owner.client = new CustomOpenAIClient(params);
253
+ }
254
+ const requestOptions = {
255
+ ...owner.clientConfig,
256
+ ...options,
257
+ } as OpenAICoreRequestOptions;
258
+ return requestOptions;
259
+ }
260
+
261
+ async function* delayStreamChunks<T>(
262
+ chunks: AsyncGenerator<T>,
263
+ delay?: number
264
+ ): AsyncGenerator<T> {
265
+ for await (const chunk of chunks) {
266
+ yield chunk;
267
+ if (delay != null) {
268
+ await sleep(delay);
269
+ }
270
+ }
271
+ }
86
272
 
87
273
  function createAbortHandler(controller: AbortController): () => void {
88
274
  return function (): void {
@@ -113,7 +299,7 @@ export function _convertToOpenAITool(
113
299
  let toolDef: OpenAIClient.ChatCompletionTool | undefined;
114
300
 
115
301
  if (isLangChainTool(tool)) {
116
- toolDef = formatToOpenAITool(tool);
302
+ toolDef = convertToOpenAITool(tool);
117
303
  } else {
118
304
  toolDef = tool as ToolDefinition;
119
305
  }
@@ -195,134 +381,240 @@ export class CustomAzureOpenAIClient extends AzureOpenAIClient {
195
381
  }
196
382
  }
197
383
 
198
- /** @ts-expect-error We are intentionally overriding `getReasoningParams` */
199
- export class ChatOpenAI extends OriginalChatOpenAI<t.ChatOpenAICallOptions> {
200
- _lc_stream_delay?: number;
384
+ class LibreChatOpenAICompletions extends OriginalChatOpenAICompletions {
385
+ private includeReasoningContent?: boolean;
386
+ private includeReasoningDetails?: boolean;
387
+ private convertReasoningDetailsToContent?: boolean;
201
388
 
202
- constructor(
203
- fields?: t.ChatOpenAICallOptions & {
204
- _lc_stream_delay?: number;
205
- } & t.OpenAIChatInput['modelKwargs']
206
- ) {
389
+ constructor(fields?: LibreChatOpenAIFields) {
207
390
  super(fields);
208
- this._lc_stream_delay = fields?._lc_stream_delay;
391
+ this.includeReasoningContent = fields?.includeReasoningContent;
392
+ this.includeReasoningDetails = fields?.includeReasoningDetails;
393
+ this.convertReasoningDetailsToContent =
394
+ fields?.convertReasoningDetailsToContent;
209
395
  }
210
396
 
211
- public get exposedClient(): CustomOpenAIClient {
212
- return this.client;
213
- }
214
- static lc_name(): string {
215
- return 'LibreChatOpenAI';
397
+ protected _getReasoningParams(
398
+ options?: this['ParsedCallOptions']
399
+ ): OpenAIClient.Reasoning | undefined {
400
+ return getReasoningParams(this.reasoning, options);
216
401
  }
217
- protected _getClientOptions(
402
+
403
+ _getClientOptions(
218
404
  options?: OpenAICoreRequestOptions
219
405
  ): OpenAICoreRequestOptions {
220
- if (!(this.client as OpenAIClient | undefined)) {
221
- const openAIEndpointConfig: t.OpenAIEndpointConfig = {
222
- baseURL: this.clientConfig.baseURL,
223
- };
224
-
225
- const endpoint = getEndpoint(openAIEndpointConfig);
226
- const params = {
227
- ...this.clientConfig,
228
- baseURL: endpoint,
229
- timeout: this.timeout,
230
- maxRetries: 0,
231
- };
232
- if (params.baseURL == null) {
233
- delete params.baseURL;
234
- }
235
-
236
- this.client = new CustomOpenAIClient(params);
237
- }
238
- const requestOptions = {
239
- ...this.clientConfig,
240
- ...options,
241
- } as OpenAICoreRequestOptions;
242
- return requestOptions;
406
+ return getCustomOpenAIClientOptions(this, options);
243
407
  }
244
408
 
245
- /**
246
- * Returns backwards compatible reasoning parameters from constructor params and call options
247
- * @internal
248
- */
249
- getReasoningParams(
250
- options?: this['ParsedCallOptions']
251
- ): OpenAIClient.Reasoning | undefined {
252
- // apply options in reverse order of importance -- newer options supersede older options
253
- let reasoning: OpenAIClient.Reasoning | undefined;
254
- if (this.reasoning !== undefined) {
255
- reasoning = {
256
- ...reasoning,
257
- ...this.reasoning,
258
- };
259
- }
260
- if (options?.reasoning !== undefined) {
261
- reasoning = {
262
- ...reasoning,
263
- ...options.reasoning,
264
- };
265
- }
266
-
267
- return reasoning;
409
+ protected _convertCompletionsDeltaToBaseMessageChunk(
410
+ delta: Record<string, unknown>,
411
+ rawResponse: OpenAIClient.Chat.Completions.ChatCompletionChunk,
412
+ defaultRole?: OpenAIClient.Chat.ChatCompletionRole
413
+ ): BaseMessageChunk {
414
+ return attachLibreChatDeltaFields(
415
+ super._convertCompletionsDeltaToBaseMessageChunk(
416
+ delta,
417
+ rawResponse,
418
+ defaultRole
419
+ ),
420
+ delta
421
+ );
268
422
  }
269
423
 
270
- protected _getReasoningParams(
271
- options?: this['ParsedCallOptions']
272
- ): OpenAIClient.Reasoning | undefined {
273
- return this.getReasoningParams(options);
424
+ protected _convertCompletionsMessageToBaseMessage(
425
+ message: OpenAIClient.ChatCompletionMessage,
426
+ rawResponse: OpenAIClient.ChatCompletion
427
+ ): BaseMessage {
428
+ return attachLibreChatMessageFields(
429
+ super._convertCompletionsMessageToBaseMessage(message, rawResponse),
430
+ message as unknown as Record<string, unknown>
431
+ );
274
432
  }
275
433
 
276
- async *_streamResponseChunks(
434
+ async _generate(
277
435
  messages: BaseMessage[],
278
436
  options: this['ParsedCallOptions'],
279
437
  runManager?: CallbackManagerForLLMRun
280
- ): AsyncGenerator<ChatGenerationChunk> {
281
- if (!this._useResponseApi(options)) {
282
- return yield* this._streamResponseChunks2(messages, options, runManager);
438
+ ): Promise<ChatResult> {
439
+ if (
440
+ this.includeReasoningContent !== true &&
441
+ this.includeReasoningDetails !== true
442
+ ) {
443
+ return super._generate(messages, options, runManager);
283
444
  }
284
- const streamIterable = await this.responseApiWithRetry(
445
+
446
+ options.signal?.throwIfAborted();
447
+ const usageMetadata: Partial<UsageMetadata> = {};
448
+ const params = this.invocationParams(options);
449
+ const messagesMapped = _convertMessagesToOpenAIParams(
450
+ messages,
451
+ this.model,
285
452
  {
286
- ...this.invocationParams<'responses'>(options, { streaming: true }),
287
- input: _convertMessagesToOpenAIResponsesParams(
288
- messages,
289
- this.model,
290
- this.zdrEnabled
291
- ),
292
- stream: true,
293
- },
294
- options
453
+ includeReasoningContent: this.includeReasoningContent,
454
+ includeReasoningDetails: this.includeReasoningDetails,
455
+ convertReasoningDetailsToContent: this.convertReasoningDetailsToContent,
456
+ }
295
457
  );
296
458
 
297
- for await (const data of streamIterable) {
298
- const chunk = _convertOpenAIResponsesDeltaToBaseMessageChunk(
299
- data as ResponseReturnStreamEvents
300
- );
301
- if (chunk == null) continue;
302
- yield chunk;
303
- if (this._lc_stream_delay != null) {
304
- await sleep(this._lc_stream_delay);
459
+ if (params.stream === true) {
460
+ const stream = this._streamResponseChunks(messages, options, runManager);
461
+ const finalChunks = new Map<number, ChatGenerationChunk>();
462
+ for await (const chunk of stream) {
463
+ chunk.message.response_metadata = {
464
+ ...chunk.generationInfo,
465
+ ...chunk.message.response_metadata,
466
+ };
467
+ const index =
468
+ typeof chunk.generationInfo?.completion === 'number'
469
+ ? chunk.generationInfo.completion
470
+ : 0;
471
+ const existingChunk = finalChunks.get(index);
472
+ if (existingChunk == null) {
473
+ finalChunks.set(index, chunk);
474
+ } else {
475
+ finalChunks.set(index, existingChunk.concat(chunk));
476
+ }
305
477
  }
306
- await runManager?.handleLLMNewToken(
307
- chunk.text || '',
308
- undefined,
309
- undefined,
310
- undefined,
311
- undefined,
312
- { chunk }
478
+ const generations = Array.from(finalChunks.entries())
479
+ .sort(([aKey], [bKey]) => aKey - bKey)
480
+ .map(([, value]) => value);
481
+ const { functions, function_call } = this.invocationParams(options);
482
+ const promptTokenUsage = await this._getEstimatedTokenCountFromPrompt(
483
+ messages,
484
+ functions,
485
+ function_call
313
486
  );
487
+ const completionTokenUsage =
488
+ await this._getNumTokensFromGenerations(generations);
489
+ usageMetadata.input_tokens = promptTokenUsage;
490
+ usageMetadata.output_tokens = completionTokenUsage;
491
+ usageMetadata.total_tokens = promptTokenUsage + completionTokenUsage;
492
+ return {
493
+ generations,
494
+ llmOutput: {
495
+ estimatedTokenUsage: {
496
+ promptTokens: usageMetadata.input_tokens,
497
+ completionTokens: usageMetadata.output_tokens,
498
+ totalTokens: usageMetadata.total_tokens,
499
+ },
500
+ },
501
+ };
314
502
  }
315
503
 
316
- return;
504
+ const data = await this.completionWithRetry(
505
+ {
506
+ ...params,
507
+ stream: false,
508
+ messages: messagesMapped,
509
+ },
510
+ {
511
+ signal: options.signal,
512
+ ...options.options,
513
+ }
514
+ );
515
+ const {
516
+ completion_tokens: completionTokens,
517
+ prompt_tokens: promptTokens,
518
+ total_tokens: totalTokens,
519
+ prompt_tokens_details: promptTokensDetails,
520
+ completion_tokens_details: completionTokensDetails,
521
+ } = data.usage ?? {};
522
+
523
+ if (completionTokens != null) {
524
+ usageMetadata.output_tokens =
525
+ (usageMetadata.output_tokens ?? 0) + completionTokens;
526
+ }
527
+ if (promptTokens != null) {
528
+ usageMetadata.input_tokens =
529
+ (usageMetadata.input_tokens ?? 0) + promptTokens;
530
+ }
531
+ if (totalTokens != null) {
532
+ usageMetadata.total_tokens =
533
+ (usageMetadata.total_tokens ?? 0) + totalTokens;
534
+ }
535
+ if (
536
+ promptTokensDetails?.audio_tokens != null ||
537
+ promptTokensDetails?.cached_tokens != null
538
+ ) {
539
+ usageMetadata.input_token_details = {
540
+ ...(promptTokensDetails.audio_tokens != null && {
541
+ audio: promptTokensDetails.audio_tokens,
542
+ }),
543
+ ...(promptTokensDetails.cached_tokens != null && {
544
+ cache_read: promptTokensDetails.cached_tokens,
545
+ }),
546
+ };
547
+ }
548
+ if (
549
+ completionTokensDetails?.audio_tokens != null ||
550
+ completionTokensDetails?.reasoning_tokens != null
551
+ ) {
552
+ usageMetadata.output_token_details = {
553
+ ...(completionTokensDetails.audio_tokens != null && {
554
+ audio: completionTokensDetails.audio_tokens,
555
+ }),
556
+ ...(completionTokensDetails.reasoning_tokens != null && {
557
+ reasoning: completionTokensDetails.reasoning_tokens,
558
+ }),
559
+ };
560
+ }
561
+
562
+ const generations: ChatGeneration[] = [];
563
+ for (const part of data.choices) {
564
+ const generation: ChatGeneration = {
565
+ text: part.message.content ?? '',
566
+ message: this._convertCompletionsMessageToBaseMessage(
567
+ part.message,
568
+ data
569
+ ),
570
+ };
571
+ generation.generationInfo = {
572
+ finish_reason: part.finish_reason,
573
+ ...(part.logprobs ? { logprobs: part.logprobs } : {}),
574
+ };
575
+ if (isAIMessage(generation.message)) {
576
+ generation.message.usage_metadata = usageMetadata as UsageMetadata;
577
+ }
578
+ generation.message = new AIMessage(
579
+ Object.fromEntries(
580
+ Object.entries(generation.message).filter(
581
+ ([key]) => !key.startsWith('lc_')
582
+ )
583
+ )
584
+ );
585
+ generations.push(generation);
586
+ }
587
+ return {
588
+ generations,
589
+ llmOutput: {
590
+ tokenUsage: {
591
+ promptTokens: usageMetadata.input_tokens,
592
+ completionTokens: usageMetadata.output_tokens,
593
+ totalTokens: usageMetadata.total_tokens,
594
+ },
595
+ },
596
+ };
317
597
  }
318
598
 
319
- async *_streamResponseChunks2(
599
+ async *_streamResponseChunks(
320
600
  messages: BaseMessage[],
321
601
  options: this['ParsedCallOptions'],
322
602
  runManager?: CallbackManagerForLLMRun
323
603
  ): AsyncGenerator<ChatGenerationChunk> {
604
+ if (
605
+ this.includeReasoningContent !== true &&
606
+ this.includeReasoningDetails !== true
607
+ ) {
608
+ yield* super._streamResponseChunks(messages, options, runManager);
609
+ return;
610
+ }
611
+
324
612
  const messagesMapped: OpenAICompletionParam[] =
325
- _convertMessagesToOpenAIParams(messages, this.model);
613
+ _convertMessagesToOpenAIParams(messages, this.model, {
614
+ includeReasoningContent: this.includeReasoningContent,
615
+ includeReasoningDetails: this.includeReasoningDetails,
616
+ convertReasoningDetailsToContent: this.convertReasoningDetailsToContent,
617
+ });
326
618
 
327
619
  const params = {
328
620
  ...this.invocationParams(options, {
@@ -331,43 +623,42 @@ export class ChatOpenAI extends OriginalChatOpenAI<t.ChatOpenAICallOptions> {
331
623
  messages: messagesMapped,
332
624
  stream: true as const,
333
625
  };
334
- let defaultRole: OpenAIRoleEnum | undefined;
626
+ let defaultRole: OpenAIClient.Chat.ChatCompletionRole | undefined;
335
627
 
336
628
  const streamIterable = await this.completionWithRetry(params, options);
337
629
  let usage: OpenAIClient.Completions.CompletionUsage | undefined;
338
630
  for await (const data of streamIterable) {
339
- const choice = data.choices[0] as
340
- | Partial<OpenAIClient.Chat.Completions.ChatCompletionChunk.Choice>
341
- | undefined;
342
- if (data.usage) {
631
+ if (options.signal?.aborted === true) {
632
+ return;
633
+ }
634
+ type StreamChoice = Omit<
635
+ OpenAIClient.Chat.Completions.ChatCompletionChunk.Choice,
636
+ 'delta'
637
+ > & {
638
+ delta?: OpenAIClient.Chat.Completions.ChatCompletionChunk.Choice['delta'];
639
+ };
640
+ const choices = data.choices as StreamChoice[] | undefined;
641
+ const choice = choices?.[0];
642
+ if (data.usage != null) {
343
643
  usage = data.usage;
344
644
  }
345
- if (!choice) {
645
+ if (choice == null) {
346
646
  continue;
347
647
  }
348
648
 
349
649
  const { delta } = choice;
350
- if (!delta) {
650
+ if (delta == null) {
351
651
  continue;
352
652
  }
353
- const chunk = this._convertOpenAIDeltaToBaseMessageChunk(
354
- delta,
653
+ const chunk = this._convertCompletionsDeltaToBaseMessageChunk(
654
+ delta as unknown as Record<string, unknown>,
355
655
  data,
356
656
  defaultRole
357
657
  );
358
- if ('reasoning_content' in delta) {
359
- chunk.additional_kwargs.reasoning_content = delta.reasoning_content;
360
- } else if ('reasoning' in delta) {
361
- chunk.additional_kwargs.reasoning_content = delta.reasoning;
362
- }
363
- if ('provider_specific_fields' in delta) {
364
- chunk.additional_kwargs.provider_specific_fields =
365
- delta.provider_specific_fields;
366
- }
367
658
  defaultRole = delta.role ?? defaultRole;
368
659
  const newTokenIndices = {
369
660
  prompt: options.promptIndex ?? 0,
370
- completion: choice.index ?? 0,
661
+ completion: choice.index,
371
662
  };
372
663
  if (typeof chunk.content !== 'string') {
373
664
  // eslint-disable-next-line no-console
@@ -376,17 +667,14 @@ export class ChatOpenAI extends OriginalChatOpenAI<t.ChatOpenAICallOptions> {
376
667
  );
377
668
  continue;
378
669
  }
379
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
380
- const generationInfo: Record<string, any> = { ...newTokenIndices };
670
+ const generationInfo: Record<string, unknown> = { ...newTokenIndices };
381
671
  if (choice.finish_reason != null) {
382
672
  generationInfo.finish_reason = choice.finish_reason;
383
- // Only include system fingerprint in the last chunk for now
384
- // to avoid concatenation issues
385
673
  generationInfo.system_fingerprint = data.system_fingerprint;
386
674
  generationInfo.model_name = data.model;
387
675
  generationInfo.service_tier = data.service_tier;
388
676
  }
389
- if (this.logprobs == true) {
677
+ if (this.logprobs === true) {
390
678
  generationInfo.logprobs = choice.logprobs;
391
679
  }
392
680
  const generationChunk = new ChatGenerationChunk({
@@ -395,11 +683,8 @@ export class ChatOpenAI extends OriginalChatOpenAI<t.ChatOpenAICallOptions> {
395
683
  generationInfo,
396
684
  });
397
685
  yield generationChunk;
398
- if (this._lc_stream_delay != null) {
399
- await sleep(this._lc_stream_delay);
400
- }
401
686
  await runManager?.handleLLMNewToken(
402
- generationChunk.text || '',
687
+ generationChunk.text,
403
688
  newTokenIndices,
404
689
  undefined,
405
690
  undefined,
@@ -427,9 +712,7 @@ export class ChatOpenAI extends OriginalChatOpenAI<t.ChatOpenAICallOptions> {
427
712
  const generationChunk = new ChatGenerationChunk({
428
713
  message: new AIMessageChunk({
429
714
  content: '',
430
- response_metadata: {
431
- usage: { ...usage },
432
- },
715
+ response_metadata: { usage: { ...usage } },
433
716
  usage_metadata: {
434
717
  input_tokens: usage.prompt_tokens,
435
718
  output_tokens: usage.completion_tokens,
@@ -445,9 +728,17 @@ export class ChatOpenAI extends OriginalChatOpenAI<t.ChatOpenAICallOptions> {
445
728
  text: '',
446
729
  });
447
730
  yield generationChunk;
448
- if (this._lc_stream_delay != null) {
449
- await sleep(this._lc_stream_delay);
450
- }
731
+ await runManager?.handleLLMNewToken(
732
+ generationChunk.text,
733
+ {
734
+ prompt: 0,
735
+ completion: 0,
736
+ },
737
+ undefined,
738
+ undefined,
739
+ undefined,
740
+ { chunk: generationChunk }
741
+ );
451
742
  }
452
743
  if (options.signal?.aborted === true) {
453
744
  throw new Error('AbortError');
@@ -455,57 +746,28 @@ export class ChatOpenAI extends OriginalChatOpenAI<t.ChatOpenAICallOptions> {
455
746
  }
456
747
  }
457
748
 
458
- /** @ts-expect-error We are intentionally overriding `getReasoningParams` */
459
- export class AzureChatOpenAI extends OriginalAzureChatOpenAI {
460
- _lc_stream_delay?: number;
461
-
462
- constructor(fields?: t.AzureOpenAIInput & { _lc_stream_delay?: number }) {
463
- super(fields);
464
- this._lc_stream_delay = fields?._lc_stream_delay;
465
- }
466
-
467
- public get exposedClient(): CustomOpenAIClient {
468
- return this.client;
469
- }
470
- static lc_name(): 'LibreChatAzureOpenAI' {
471
- return 'LibreChatAzureOpenAI';
472
- }
473
- /**
474
- * Returns backwards compatible reasoning parameters from constructor params and call options
475
- * @internal
476
- */
477
- getReasoningParams(
749
+ class LibreChatOpenAIResponses extends OriginalChatOpenAIResponses {
750
+ protected _getReasoningParams(
478
751
  options?: this['ParsedCallOptions']
479
752
  ): OpenAIClient.Reasoning | undefined {
480
- if (!isReasoningModel(this.model)) {
481
- return;
482
- }
483
-
484
- // apply options in reverse order of importance -- newer options supersede older options
485
- let reasoning: OpenAIClient.Reasoning | undefined;
486
- if (this.reasoning !== undefined) {
487
- reasoning = {
488
- ...reasoning,
489
- ...this.reasoning,
490
- };
491
- }
492
- if (options?.reasoning !== undefined) {
493
- reasoning = {
494
- ...reasoning,
495
- ...options.reasoning,
496
- };
497
- }
753
+ return getReasoningParams(this.reasoning, options);
754
+ }
498
755
 
499
- return reasoning;
756
+ _getClientOptions(
757
+ options?: OpenAICoreRequestOptions
758
+ ): OpenAICoreRequestOptions {
759
+ return getCustomOpenAIClientOptions(this, options);
500
760
  }
761
+ }
501
762
 
763
+ class LibreChatAzureOpenAICompletions extends OriginalAzureChatOpenAICompletions {
502
764
  protected _getReasoningParams(
503
765
  options?: this['ParsedCallOptions']
504
766
  ): OpenAIClient.Reasoning | undefined {
505
- return this.getReasoningParams(options);
767
+ return getGatedReasoningParams(this.model, this.reasoning, options);
506
768
  }
507
769
 
508
- protected _getClientOptions(
770
+ _getClientOptions(
509
771
  options: OpenAICoreRequestOptions | undefined
510
772
  ): OpenAICoreRequestOptions {
511
773
  if (!(this.client as unknown as AzureOpenAIClient | undefined)) {
@@ -567,162 +829,112 @@ export class AzureChatOpenAI extends OriginalAzureChatOpenAI {
567
829
  }
568
830
  return requestOptions;
569
831
  }
570
- async *_streamResponseChunks(
571
- messages: BaseMessage[],
572
- options: this['ParsedCallOptions'],
573
- runManager?: CallbackManagerForLLMRun
574
- ): AsyncGenerator<ChatGenerationChunk> {
575
- if (!this._useResponseApi(options)) {
576
- return yield* super._streamResponseChunks(messages, options, runManager);
577
- }
578
- const streamIterable = await this.responseApiWithRetry(
579
- {
580
- ...this.invocationParams<'responses'>(options, { streaming: true }),
581
- input: _convertMessagesToOpenAIResponsesParams(
582
- messages,
583
- this.model,
584
- this.zdrEnabled
585
- ),
586
- stream: true,
587
- },
588
- options
589
- );
832
+ }
590
833
 
591
- for await (const data of streamIterable) {
592
- const chunk = _convertOpenAIResponsesDeltaToBaseMessageChunk(
593
- data as ResponseReturnStreamEvents
594
- );
595
- if (chunk == null) continue;
596
- yield chunk;
597
- if (this._lc_stream_delay != null) {
598
- await sleep(this._lc_stream_delay);
599
- }
600
- await runManager?.handleLLMNewToken(
601
- chunk.text || '',
602
- undefined,
603
- undefined,
604
- undefined,
605
- undefined,
606
- { chunk }
607
- );
608
- }
609
-
610
- return;
611
- }
612
- }
613
- export class ChatDeepSeek extends OriginalChatDeepSeek {
614
- public get exposedClient(): CustomOpenAIClient {
615
- return this.client;
616
- }
617
- static lc_name(): 'LibreChatDeepSeek' {
618
- return 'LibreChatDeepSeek';
834
+ class LibreChatAzureOpenAIResponses extends OriginalAzureChatOpenAIResponses {
835
+ protected _getReasoningParams(
836
+ options?: this['ParsedCallOptions']
837
+ ): OpenAIClient.Reasoning | undefined {
838
+ return getGatedReasoningParams(this.model, this.reasoning, options);
619
839
  }
620
840
 
621
- protected _convertMessages(messages: BaseMessage[]): OpenAICompletionParam[] {
622
- return _convertMessagesToOpenAIParams(messages, this.model, {
623
- includeReasoningContent: true,
624
- });
625
- }
841
+ _getClientOptions(
842
+ options: OpenAICoreRequestOptions | undefined
843
+ ): OpenAICoreRequestOptions {
844
+ if (!(this.client as unknown as AzureOpenAIClient | undefined)) {
845
+ const openAIEndpointConfig: t.OpenAIEndpointConfig = {
846
+ azureOpenAIApiDeploymentName: this.azureOpenAIApiDeploymentName,
847
+ azureOpenAIApiInstanceName: this.azureOpenAIApiInstanceName,
848
+ azureOpenAIApiKey: this.azureOpenAIApiKey,
849
+ azureOpenAIBasePath: this.azureOpenAIBasePath,
850
+ azureADTokenProvider: this.azureADTokenProvider,
851
+ baseURL: this.clientConfig.baseURL,
852
+ };
626
853
 
627
- async _generate(
628
- messages: BaseMessage[],
629
- options: this['ParsedCallOptions'] | undefined,
630
- runManager?: CallbackManagerForLLMRun
631
- ): Promise<ChatResult> {
632
- const params = this.invocationParams(options);
854
+ const endpoint = getEndpoint(openAIEndpointConfig);
633
855
 
634
- if (params.stream === true) {
635
- return super._generate(messages, options ?? {}, runManager);
636
- }
856
+ const params = {
857
+ ...this.clientConfig,
858
+ baseURL: endpoint,
859
+ timeout: this.timeout,
860
+ maxRetries: 0,
861
+ };
637
862
 
638
- const messagesMapped = this._convertMessages(messages);
639
- const data = await this.completionWithRetry(
640
- {
641
- ...params,
642
- stream: false,
643
- messages: messagesMapped,
644
- },
645
- {
646
- signal: options?.signal,
647
- ...options?.options,
863
+ if (!this.azureADTokenProvider) {
864
+ params.apiKey = openAIEndpointConfig.azureOpenAIApiKey;
648
865
  }
649
- );
650
866
 
651
- const { completion_tokens, prompt_tokens, total_tokens } = data.usage ?? {};
867
+ if (params.baseURL == null) {
868
+ delete params.baseURL;
869
+ }
652
870
 
653
- const generations = [];
654
- for (const part of data.choices ?? []) {
655
- const text = part.message.content ?? '';
656
- const generation: ChatGeneration = {
657
- text: typeof text === 'string' ? text : '',
658
- message: this._convertResponseToMessage(part, data),
659
- };
660
- generation.generationInfo = {
661
- ...(part.finish_reason != null
662
- ? { finish_reason: part.finish_reason }
663
- : {}),
664
- ...(part.logprobs ? { logprobs: part.logprobs } : {}),
871
+ const defaultHeaders = normalizeHeaders(params.defaultHeaders);
872
+ params.defaultHeaders = {
873
+ ...params.defaultHeaders,
874
+ 'User-Agent':
875
+ defaultHeaders['User-Agent'] != null
876
+ ? `${defaultHeaders['User-Agent']}: librechat-azure-openai-v2`
877
+ : 'librechat-azure-openai-v2',
665
878
  };
666
- generations.push(generation);
879
+
880
+ this.client = new CustomAzureOpenAIClient({
881
+ apiVersion: this.azureOpenAIApiVersion,
882
+ azureADTokenProvider: this.azureADTokenProvider,
883
+ ...(params as t.AzureOpenAIInput),
884
+ }) as unknown as CustomOpenAIClient;
667
885
  }
668
886
 
669
- return {
670
- generations,
671
- llmOutput: {
672
- tokenUsage: {
673
- completionTokens: completion_tokens,
674
- promptTokens: prompt_tokens,
675
- totalTokens: total_tokens,
676
- },
677
- },
678
- };
887
+ const requestOptions = {
888
+ ...this.clientConfig,
889
+ ...options,
890
+ } as OpenAICoreRequestOptions;
891
+ if (this.azureOpenAIApiKey != null) {
892
+ requestOptions.headers = {
893
+ 'api-key': this.azureOpenAIApiKey,
894
+ ...requestOptions.headers,
895
+ };
896
+ requestOptions.query = {
897
+ 'api-version': this.azureOpenAIApiVersion,
898
+ ...requestOptions.query,
899
+ };
900
+ }
901
+ return requestOptions;
679
902
  }
903
+ }
680
904
 
681
- protected _convertResponseToMessage(
682
- choice: OpenAIClient.Chat.Completions.ChatCompletion.Choice,
683
- data: OpenAIClient.Chat.Completions.ChatCompletion
684
- ): AIMessage {
685
- const { message } = choice;
686
- const rawToolCalls = message.tool_calls;
687
- const toolCalls = rawToolCalls?.map((tc) => ({
688
- id: tc.id,
689
- name: tc.function.name,
690
- args: JSON.parse(tc.function.arguments || '{}'),
691
- type: 'tool_call' as const,
692
- }));
693
-
694
- const additional_kwargs: Record<string, unknown> = {};
695
- if (rawToolCalls) {
696
- additional_kwargs.tool_calls = rawToolCalls;
697
- }
698
- if (
699
- 'reasoning_content' in message &&
700
- message.reasoning_content != null &&
701
- message.reasoning_content !== ''
702
- ) {
703
- additional_kwargs.reasoning_content = message.reasoning_content;
704
- }
905
+ function withLibreChatOpenAIFields(
906
+ fields?: LibreChatOpenAIFields
907
+ ): LibreChatOpenAIFields {
908
+ const nextFields = fields ?? {};
909
+ return {
910
+ ...nextFields,
911
+ completions:
912
+ nextFields.completions ?? new LibreChatOpenAICompletions(nextFields),
913
+ responses: nextFields.responses ?? new LibreChatOpenAIResponses(nextFields),
914
+ };
915
+ }
705
916
 
706
- return new AIMessage({
707
- content: message.content ?? '',
708
- tool_calls: toolCalls,
709
- additional_kwargs,
710
- usage_metadata: data.usage
711
- ? {
712
- input_tokens: data.usage.prompt_tokens,
713
- output_tokens: data.usage.completion_tokens,
714
- total_tokens: data.usage.total_tokens,
715
- }
716
- : undefined,
717
- response_metadata: {
718
- model_name: data.model,
719
- system_fingerprint: data.system_fingerprint,
720
- finish_reason: choice.finish_reason,
721
- },
722
- });
917
+ export class ChatOpenAI extends OriginalChatOpenAI<t.ChatOpenAICallOptions> {
918
+ _lc_stream_delay?: number;
919
+
920
+ constructor(
921
+ fields?: LibreChatOpenAIFields & t.OpenAIChatInput['modelKwargs']
922
+ ) {
923
+ super(withLibreChatOpenAIFields(fields));
924
+ this._lc_stream_delay = fields?._lc_stream_delay;
723
925
  }
724
926
 
725
- protected _getClientOptions(
927
+ public get exposedClient(): CustomOpenAIClient {
928
+ return getExposedOpenAIClient(
929
+ this.completions as OpenAIClientDelegate,
930
+ this.responses as OpenAIClientDelegate,
931
+ this._useResponsesApi(undefined)
932
+ ) as CustomOpenAIClient;
933
+ }
934
+ static lc_name(): string {
935
+ return 'LibreChatOpenAI';
936
+ }
937
+ _getClientOptions(
726
938
  options?: OpenAICoreRequestOptions
727
939
  ): OpenAICoreRequestOptions {
728
940
  if (!(this.client as OpenAIClient | undefined)) {
@@ -750,256 +962,188 @@ export class ChatDeepSeek extends OriginalChatDeepSeek {
750
962
  return requestOptions;
751
963
  }
752
964
 
965
+ /**
966
+ * Returns backwards compatible reasoning parameters from constructor params and call options
967
+ * @internal
968
+ */
969
+ getReasoningParams(
970
+ options?: this['ParsedCallOptions']
971
+ ): OpenAIClient.Reasoning | undefined {
972
+ return getReasoningParams(this.reasoning, options);
973
+ }
974
+
975
+ protected _getReasoningParams(
976
+ options?: this['ParsedCallOptions']
977
+ ): OpenAIClient.Reasoning | undefined {
978
+ return this.getReasoningParams(options);
979
+ }
980
+
753
981
  async *_streamResponseChunks(
754
982
  messages: BaseMessage[],
755
983
  options: this['ParsedCallOptions'],
756
984
  runManager?: CallbackManagerForLLMRun
757
985
  ): AsyncGenerator<ChatGenerationChunk> {
758
- const messagesMapped: OpenAICompletionParam[] =
759
- _convertMessagesToOpenAIParams(messages, this.model, {
760
- includeReasoningContent: true,
761
- });
986
+ yield* delayStreamChunks(
987
+ super._streamResponseChunks(messages, options, runManager),
988
+ this._lc_stream_delay
989
+ );
990
+ }
991
+ }
762
992
 
763
- const params = {
764
- ...this.invocationParams(options, {
765
- streaming: true,
766
- }),
767
- messages: messagesMapped,
768
- stream: true as const,
769
- };
770
- let defaultRole: OpenAIRoleEnum | undefined;
993
+ export class AzureChatOpenAI extends OriginalAzureChatOpenAI {
994
+ _lc_stream_delay?: number;
771
995
 
772
- const streamIterable = await this.completionWithRetry(params, options);
773
- let usage: OpenAIClient.Completions.CompletionUsage | undefined;
774
- for await (const data of streamIterable) {
775
- const choice = data.choices[0] as
776
- | Partial<OpenAIClient.Chat.Completions.ChatCompletionChunk.Choice>
777
- | undefined;
778
- if (data.usage) {
779
- usage = data.usage;
780
- }
781
- if (!choice) {
782
- continue;
783
- }
996
+ constructor(fields?: LibreChatAzureOpenAIFields) {
997
+ super(fields);
998
+ this.completions = new LibreChatAzureOpenAICompletions(fields);
999
+ this.responses = new LibreChatAzureOpenAIResponses(fields);
1000
+ this._lc_stream_delay = fields?._lc_stream_delay;
1001
+ }
784
1002
 
785
- const { delta } = choice;
786
- if (!delta) {
787
- continue;
788
- }
789
- const chunk = this._convertOpenAIDeltaToBaseMessageChunk(
790
- delta,
791
- data,
792
- defaultRole
793
- );
794
- if ('reasoning_content' in delta) {
795
- chunk.additional_kwargs.reasoning_content = delta.reasoning_content;
796
- }
797
- defaultRole = delta.role ?? defaultRole;
798
- const newTokenIndices = {
799
- prompt: (options as OpenAIChatCallOptions).promptIndex ?? 0,
800
- completion: choice.index ?? 0,
1003
+ public get exposedClient(): CustomOpenAIClient {
1004
+ return getExposedOpenAIClient(
1005
+ this.completions as OpenAIClientDelegate,
1006
+ this.responses as OpenAIClientDelegate,
1007
+ this._useResponsesApi(undefined)
1008
+ ) as CustomOpenAIClient;
1009
+ }
1010
+ static lc_name(): 'LibreChatAzureOpenAI' {
1011
+ return 'LibreChatAzureOpenAI';
1012
+ }
1013
+ /**
1014
+ * Returns backwards compatible reasoning parameters from constructor params and call options
1015
+ * @internal
1016
+ */
1017
+ getReasoningParams(
1018
+ options?: this['ParsedCallOptions']
1019
+ ): OpenAIClient.Reasoning | undefined {
1020
+ return getGatedReasoningParams(this.model, this.reasoning, options);
1021
+ }
1022
+
1023
+ protected _getReasoningParams(
1024
+ options?: this['ParsedCallOptions']
1025
+ ): OpenAIClient.Reasoning | undefined {
1026
+ return this.getReasoningParams(options);
1027
+ }
1028
+
1029
+ _getClientOptions(
1030
+ options: OpenAICoreRequestOptions | undefined
1031
+ ): OpenAICoreRequestOptions {
1032
+ if (!(this.client as unknown as AzureOpenAIClient | undefined)) {
1033
+ const openAIEndpointConfig: t.OpenAIEndpointConfig = {
1034
+ azureOpenAIApiDeploymentName: this.azureOpenAIApiDeploymentName,
1035
+ azureOpenAIApiInstanceName: this.azureOpenAIApiInstanceName,
1036
+ azureOpenAIApiKey: this.azureOpenAIApiKey,
1037
+ azureOpenAIBasePath: this.azureOpenAIBasePath,
1038
+ azureADTokenProvider: this.azureADTokenProvider,
1039
+ baseURL: this.clientConfig.baseURL,
801
1040
  };
802
- if (typeof chunk.content !== 'string') {
803
- // eslint-disable-next-line no-console
804
- console.log(
805
- '[WARNING]: Received non-string content from OpenAI. This is currently not supported.'
806
- );
807
- continue;
808
- }
809
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
810
- const generationInfo: Record<string, any> = { ...newTokenIndices };
811
- if (choice.finish_reason != null) {
812
- generationInfo.finish_reason = choice.finish_reason;
813
- generationInfo.system_fingerprint = data.system_fingerprint;
814
- generationInfo.model_name = data.model;
815
- generationInfo.service_tier = data.service_tier;
1041
+
1042
+ const endpoint = getEndpoint(openAIEndpointConfig);
1043
+
1044
+ const params = {
1045
+ ...this.clientConfig,
1046
+ baseURL: endpoint,
1047
+ timeout: this.timeout,
1048
+ maxRetries: 0,
1049
+ };
1050
+
1051
+ if (!this.azureADTokenProvider) {
1052
+ params.apiKey = openAIEndpointConfig.azureOpenAIApiKey;
816
1053
  }
817
- if (this.logprobs == true) {
818
- generationInfo.logprobs = choice.logprobs;
1054
+
1055
+ if (params.baseURL == null) {
1056
+ delete params.baseURL;
819
1057
  }
820
- const generationChunk = new ChatGenerationChunk({
821
- message: chunk,
822
- text: chunk.content,
823
- generationInfo,
824
- });
825
- yield generationChunk;
826
- await runManager?.handleLLMNewToken(
827
- generationChunk.text || '',
828
- newTokenIndices,
829
- undefined,
830
- undefined,
831
- undefined,
832
- { chunk: generationChunk }
833
- );
1058
+
1059
+ const defaultHeaders = normalizeHeaders(params.defaultHeaders);
1060
+ params.defaultHeaders = {
1061
+ ...params.defaultHeaders,
1062
+ 'User-Agent':
1063
+ defaultHeaders['User-Agent'] != null
1064
+ ? `${defaultHeaders['User-Agent']}: librechat-azure-openai-v2`
1065
+ : 'librechat-azure-openai-v2',
1066
+ };
1067
+
1068
+ this.client = new CustomAzureOpenAIClient({
1069
+ apiVersion: this.azureOpenAIApiVersion,
1070
+ azureADTokenProvider: this.azureADTokenProvider,
1071
+ ...(params as t.AzureOpenAIInput),
1072
+ }) as unknown as CustomOpenAIClient;
834
1073
  }
835
- if (usage) {
836
- const inputTokenDetails = {
837
- ...(usage.prompt_tokens_details?.audio_tokens != null && {
838
- audio: usage.prompt_tokens_details.audio_tokens,
839
- }),
840
- ...(usage.prompt_tokens_details?.cached_tokens != null && {
841
- cache_read: usage.prompt_tokens_details.cached_tokens,
842
- }),
1074
+
1075
+ const requestOptions = {
1076
+ ...this.clientConfig,
1077
+ ...options,
1078
+ } as OpenAICoreRequestOptions;
1079
+ if (this.azureOpenAIApiKey != null) {
1080
+ requestOptions.headers = {
1081
+ 'api-key': this.azureOpenAIApiKey,
1082
+ ...requestOptions.headers,
843
1083
  };
844
- const outputTokenDetails = {
845
- ...(usage.completion_tokens_details?.audio_tokens != null && {
846
- audio: usage.completion_tokens_details.audio_tokens,
847
- }),
848
- ...(usage.completion_tokens_details?.reasoning_tokens != null && {
849
- reasoning: usage.completion_tokens_details.reasoning_tokens,
850
- }),
1084
+ requestOptions.query = {
1085
+ 'api-version': this.azureOpenAIApiVersion,
1086
+ ...requestOptions.query,
851
1087
  };
852
- const generationChunk = new ChatGenerationChunk({
853
- message: new AIMessageChunk({
854
- content: '',
855
- response_metadata: {
856
- usage: { ...usage },
857
- },
858
- usage_metadata: {
859
- input_tokens: usage.prompt_tokens,
860
- output_tokens: usage.completion_tokens,
861
- total_tokens: usage.total_tokens,
862
- ...(Object.keys(inputTokenDetails).length > 0 && {
863
- input_token_details: inputTokenDetails,
864
- }),
865
- ...(Object.keys(outputTokenDetails).length > 0 && {
866
- output_token_details: outputTokenDetails,
867
- }),
868
- },
869
- }),
870
- text: '',
871
- });
872
- yield generationChunk;
873
1088
  }
874
- if (options.signal?.aborted === true) {
875
- throw new Error('AbortError');
876
- }
877
- }
878
- }
879
-
880
- /** xAI-specific usage metadata type */
881
- export interface XAIUsageMetadata
882
- extends OpenAIClient.Completions.CompletionUsage {
883
- prompt_tokens_details?: {
884
- audio_tokens?: number;
885
- cached_tokens?: number;
886
- text_tokens?: number;
887
- image_tokens?: number;
888
- };
889
- completion_tokens_details?: {
890
- audio_tokens?: number;
891
- reasoning_tokens?: number;
892
- accepted_prediction_tokens?: number;
893
- rejected_prediction_tokens?: number;
894
- };
895
- num_sources_used?: number;
896
- }
897
-
898
- export class ChatMoonshot extends ChatOpenAI {
899
- static lc_name(): 'LibreChatMoonshot' {
900
- return 'LibreChatMoonshot';
901
- }
902
-
903
- protected _convertMessages(messages: BaseMessage[]): OpenAICompletionParam[] {
904
- return _convertMessagesToOpenAIParams(messages, this.model, {
905
- includeReasoningContent: true,
906
- });
1089
+ return requestOptions;
907
1090
  }
908
-
909
- async _generate(
1091
+ async *_streamResponseChunks(
910
1092
  messages: BaseMessage[],
911
1093
  options: this['ParsedCallOptions'],
912
1094
  runManager?: CallbackManagerForLLMRun
913
- ): Promise<ChatResult> {
914
- const params = this.invocationParams(options);
1095
+ ): AsyncGenerator<ChatGenerationChunk> {
1096
+ yield* delayStreamChunks(
1097
+ super._streamResponseChunks(messages, options, runManager),
1098
+ this._lc_stream_delay
1099
+ );
1100
+ }
1101
+ }
1102
+ export class ChatDeepSeek extends OriginalChatDeepSeek {
1103
+ _lc_stream_delay?: number;
915
1104
 
916
- if (params.stream === true) {
917
- return super._generate(messages, options, runManager);
1105
+ constructor(
1106
+ fields?: ConstructorParameters<typeof OriginalChatDeepSeek>[0] & {
1107
+ _lc_stream_delay?: number;
918
1108
  }
1109
+ ) {
1110
+ super(fields);
1111
+ this._lc_stream_delay = fields?._lc_stream_delay;
1112
+ }
919
1113
 
920
- const messagesMapped = this._convertMessages(messages);
921
- const data = await this.completionWithRetry(
922
- {
923
- ...params,
924
- stream: false,
925
- messages: messagesMapped,
926
- },
927
- {
928
- signal: options.signal,
929
- ...options.options,
930
- }
931
- );
932
-
933
- const { completion_tokens, prompt_tokens, total_tokens } = data.usage ?? {};
1114
+ public get exposedClient(): CustomOpenAIClient {
1115
+ return this.client;
1116
+ }
1117
+ static lc_name(): 'LibreChatDeepSeek' {
1118
+ return 'LibreChatDeepSeek';
1119
+ }
934
1120
 
935
- const generations = [];
936
- for (const part of data.choices ?? []) {
937
- const text = part.message.content ?? '';
938
- const generation: ChatGeneration = {
939
- text: typeof text === 'string' ? text : '',
940
- message: this._convertResponseToMessage(part, data),
941
- };
942
- generation.generationInfo = {
943
- ...(part.finish_reason ? { finish_reason: part.finish_reason } : {}),
944
- ...(part.logprobs ? { logprobs: part.logprobs } : {}),
1121
+ _getClientOptions(
1122
+ options?: OpenAICoreRequestOptions
1123
+ ): OpenAICoreRequestOptions {
1124
+ if (!(this.client as OpenAIClient | undefined)) {
1125
+ const openAIEndpointConfig: t.OpenAIEndpointConfig = {
1126
+ baseURL: this.clientConfig.baseURL,
945
1127
  };
946
- generations.push(generation);
947
- }
948
1128
 
949
- return {
950
- generations,
951
- llmOutput: {
952
- tokenUsage: {
953
- completionTokens: completion_tokens,
954
- promptTokens: prompt_tokens,
955
- totalTokens: total_tokens,
956
- },
957
- },
958
- };
959
- }
1129
+ const endpoint = getEndpoint(openAIEndpointConfig);
1130
+ const params = {
1131
+ ...this.clientConfig,
1132
+ baseURL: endpoint,
1133
+ timeout: this.timeout,
1134
+ maxRetries: 0,
1135
+ };
1136
+ if (params.baseURL == null) {
1137
+ delete params.baseURL;
1138
+ }
960
1139
 
961
- protected _convertResponseToMessage(
962
- choice: OpenAIClient.Chat.Completions.ChatCompletion.Choice,
963
- data: OpenAIClient.Chat.Completions.ChatCompletion
964
- ): AIMessage {
965
- const { message } = choice;
966
- const rawToolCalls = message.tool_calls;
967
- const toolCalls = rawToolCalls?.map((tc) => ({
968
- id: tc.id,
969
- name: tc.function.name,
970
- args: JSON.parse(tc.function.arguments || '{}'),
971
- type: 'tool_call' as const,
972
- }));
973
-
974
- const additional_kwargs: Record<string, unknown> = {};
975
- if (rawToolCalls) {
976
- additional_kwargs.tool_calls = rawToolCalls;
977
- }
978
- if (
979
- 'reasoning_content' in message &&
980
- message.reasoning_content != null &&
981
- message.reasoning_content !== ''
982
- ) {
983
- additional_kwargs.reasoning_content = message.reasoning_content;
1140
+ this.client = new CustomOpenAIClient(params);
984
1141
  }
985
-
986
- return new AIMessage({
987
- content: message.content ?? '',
988
- tool_calls: toolCalls,
989
- additional_kwargs,
990
- usage_metadata: data.usage
991
- ? {
992
- input_tokens: data.usage.prompt_tokens,
993
- output_tokens: data.usage.completion_tokens,
994
- total_tokens: data.usage.total_tokens,
995
- }
996
- : undefined,
997
- response_metadata: {
998
- model_name: data.model,
999
- system_fingerprint: data.system_fingerprint,
1000
- finish_reason: choice.finish_reason,
1001
- },
1002
- });
1142
+ const requestOptions = {
1143
+ ...this.clientConfig,
1144
+ ...options,
1145
+ } as OpenAICoreRequestOptions;
1146
+ return requestOptions;
1003
1147
  }
1004
1148
 
1005
1149
  async *_streamResponseChunks(
@@ -1007,131 +1151,43 @@ export class ChatMoonshot extends ChatOpenAI {
1007
1151
  options: this['ParsedCallOptions'],
1008
1152
  runManager?: CallbackManagerForLLMRun
1009
1153
  ): AsyncGenerator<ChatGenerationChunk> {
1010
- const messagesMapped: OpenAICompletionParam[] =
1011
- _convertMessagesToOpenAIParams(messages, this.model, {
1012
- includeReasoningContent: true,
1013
- });
1154
+ yield* delayStreamChunks(
1155
+ super._streamResponseChunks(messages, options, runManager),
1156
+ this._lc_stream_delay
1157
+ );
1158
+ }
1159
+ }
1014
1160
 
1015
- const params = {
1016
- ...this.invocationParams(options, {
1017
- streaming: true,
1018
- }),
1019
- messages: messagesMapped,
1020
- stream: true as const,
1021
- };
1022
- let defaultRole: OpenAIRoleEnum | undefined;
1161
+ /** xAI-specific usage metadata type */
1162
+ export interface XAIUsageMetadata
1163
+ extends OpenAIClient.Completions.CompletionUsage {
1164
+ prompt_tokens_details?: {
1165
+ audio_tokens?: number;
1166
+ cached_tokens?: number;
1167
+ text_tokens?: number;
1168
+ image_tokens?: number;
1169
+ };
1170
+ completion_tokens_details?: {
1171
+ audio_tokens?: number;
1172
+ reasoning_tokens?: number;
1173
+ accepted_prediction_tokens?: number;
1174
+ rejected_prediction_tokens?: number;
1175
+ };
1176
+ num_sources_used?: number;
1177
+ }
1023
1178
 
1024
- const streamIterable = await this.completionWithRetry(params, options);
1025
- let usage: OpenAIClient.Completions.CompletionUsage | undefined;
1026
- for await (const data of streamIterable) {
1027
- const choice = data.choices[0] as
1028
- | Partial<OpenAIClient.Chat.Completions.ChatCompletionChunk.Choice>
1029
- | undefined;
1030
- if (data.usage) {
1031
- usage = data.usage;
1032
- }
1033
- if (!choice) {
1034
- continue;
1035
- }
1179
+ export class ChatMoonshot extends ChatOpenAI {
1180
+ constructor(
1181
+ fields?: LibreChatOpenAIFields & t.OpenAIChatInput['modelKwargs']
1182
+ ) {
1183
+ super({
1184
+ ...fields,
1185
+ includeReasoningContent: true,
1186
+ });
1187
+ }
1036
1188
 
1037
- const { delta } = choice;
1038
- if (!delta) {
1039
- continue;
1040
- }
1041
- const chunk = this._convertOpenAIDeltaToBaseMessageChunk(
1042
- delta,
1043
- data,
1044
- defaultRole
1045
- );
1046
- if ('reasoning_content' in delta) {
1047
- chunk.additional_kwargs.reasoning_content = delta.reasoning_content;
1048
- }
1049
- defaultRole = delta.role ?? defaultRole;
1050
- const newTokenIndices = {
1051
- prompt: (options as OpenAIChatCallOptions).promptIndex ?? 0,
1052
- completion: choice.index ?? 0,
1053
- };
1054
- if (typeof chunk.content !== 'string') {
1055
- // eslint-disable-next-line no-console
1056
- console.log(
1057
- '[WARNING]: Received non-string content from OpenAI. This is currently not supported.'
1058
- );
1059
- continue;
1060
- }
1061
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
1062
- const generationInfo: Record<string, any> = { ...newTokenIndices };
1063
- if (choice.finish_reason != null) {
1064
- generationInfo.finish_reason = choice.finish_reason;
1065
- generationInfo.system_fingerprint = data.system_fingerprint;
1066
- generationInfo.model_name = data.model;
1067
- generationInfo.service_tier = data.service_tier;
1068
- }
1069
- if (this.logprobs == true) {
1070
- generationInfo.logprobs = choice.logprobs;
1071
- }
1072
- const generationChunk = new ChatGenerationChunk({
1073
- message: chunk,
1074
- text: chunk.content,
1075
- generationInfo,
1076
- });
1077
- yield generationChunk;
1078
- if (this._lc_stream_delay != null) {
1079
- await sleep(this._lc_stream_delay);
1080
- }
1081
- await runManager?.handleLLMNewToken(
1082
- generationChunk.text || '',
1083
- newTokenIndices,
1084
- undefined,
1085
- undefined,
1086
- undefined,
1087
- { chunk: generationChunk }
1088
- );
1089
- }
1090
- if (usage) {
1091
- const inputTokenDetails = {
1092
- ...(usage.prompt_tokens_details?.audio_tokens != null && {
1093
- audio: usage.prompt_tokens_details.audio_tokens,
1094
- }),
1095
- ...(usage.prompt_tokens_details?.cached_tokens != null && {
1096
- cache_read: usage.prompt_tokens_details.cached_tokens,
1097
- }),
1098
- };
1099
- const outputTokenDetails = {
1100
- ...(usage.completion_tokens_details?.audio_tokens != null && {
1101
- audio: usage.completion_tokens_details.audio_tokens,
1102
- }),
1103
- ...(usage.completion_tokens_details?.reasoning_tokens != null && {
1104
- reasoning: usage.completion_tokens_details.reasoning_tokens,
1105
- }),
1106
- };
1107
- const generationChunk = new ChatGenerationChunk({
1108
- message: new AIMessageChunk({
1109
- content: '',
1110
- response_metadata: {
1111
- usage: { ...usage },
1112
- },
1113
- usage_metadata: {
1114
- input_tokens: usage.prompt_tokens,
1115
- output_tokens: usage.completion_tokens,
1116
- total_tokens: usage.total_tokens,
1117
- ...(Object.keys(inputTokenDetails).length > 0 && {
1118
- input_token_details: inputTokenDetails,
1119
- }),
1120
- ...(Object.keys(outputTokenDetails).length > 0 && {
1121
- output_token_details: outputTokenDetails,
1122
- }),
1123
- },
1124
- }),
1125
- text: '',
1126
- });
1127
- yield generationChunk;
1128
- if (this._lc_stream_delay != null) {
1129
- await sleep(this._lc_stream_delay);
1130
- }
1131
- }
1132
- if (options.signal?.aborted === true) {
1133
- throw new Error('AbortError');
1134
- }
1189
+ static lc_name(): 'LibreChatMoonshot' {
1190
+ return 'LibreChatMoonshot';
1135
1191
  }
1136
1192
  }
1137
1193
 
@@ -1168,7 +1224,7 @@ export class ChatXAI extends OriginalChatXAI {
1168
1224
  return this.client;
1169
1225
  }
1170
1226
 
1171
- protected _getClientOptions(
1227
+ _getClientOptions(
1172
1228
  options?: OpenAICoreRequestOptions
1173
1229
  ): OpenAICoreRequestOptions {
1174
1230
  if (!(this.client as OpenAIClient | undefined)) {
@@ -1201,166 +1257,9 @@ export class ChatXAI extends OriginalChatXAI {
1201
1257
  options: this['ParsedCallOptions'],
1202
1258
  runManager?: CallbackManagerForLLMRun
1203
1259
  ): AsyncGenerator<ChatGenerationChunk> {
1204
- const messagesMapped: OpenAICompletionParam[] =
1205
- _convertMessagesToOpenAIParams(messages, this.model);
1206
-
1207
- const params = {
1208
- ...this.invocationParams(options, {
1209
- streaming: true,
1210
- }),
1211
- messages: messagesMapped,
1212
- stream: true as const,
1213
- };
1214
- let defaultRole: OpenAIRoleEnum | undefined;
1215
-
1216
- const streamIterable = await this.completionWithRetry(params, options);
1217
- let usage: OpenAIClient.Completions.CompletionUsage | undefined;
1218
- for await (const data of streamIterable) {
1219
- const choice = data.choices[0] as
1220
- | Partial<OpenAIClient.Chat.Completions.ChatCompletionChunk.Choice>
1221
- | undefined;
1222
- if (data.usage) {
1223
- usage = data.usage;
1224
- }
1225
- if (!choice) {
1226
- continue;
1227
- }
1228
-
1229
- const { delta } = choice;
1230
- if (!delta) {
1231
- continue;
1232
- }
1233
- const chunk = this._convertOpenAIDeltaToBaseMessageChunk(
1234
- delta,
1235
- data,
1236
- defaultRole
1237
- );
1238
- if (chunk.usage_metadata != null) {
1239
- chunk.usage_metadata = {
1240
- input_tokens:
1241
- (chunk.usage_metadata as Partial<UsageMetadata>).input_tokens ?? 0,
1242
- output_tokens:
1243
- (chunk.usage_metadata as Partial<UsageMetadata>).output_tokens ?? 0,
1244
- total_tokens:
1245
- (chunk.usage_metadata as Partial<UsageMetadata>).total_tokens ?? 0,
1246
- };
1247
- }
1248
- if ('reasoning_content' in delta) {
1249
- chunk.additional_kwargs.reasoning_content = delta.reasoning_content;
1250
- }
1251
- defaultRole = delta.role ?? defaultRole;
1252
- const newTokenIndices = {
1253
- prompt: (options as OpenAIChatCallOptions).promptIndex ?? 0,
1254
- completion: choice.index ?? 0,
1255
- };
1256
- if (typeof chunk.content !== 'string') {
1257
- // eslint-disable-next-line no-console
1258
- console.log(
1259
- '[WARNING]: Received non-string content from OpenAI. This is currently not supported.'
1260
- );
1261
- continue;
1262
- }
1263
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
1264
- const generationInfo: Record<string, any> = { ...newTokenIndices };
1265
- if (choice.finish_reason != null) {
1266
- generationInfo.finish_reason = choice.finish_reason;
1267
- // Only include system fingerprint in the last chunk for now
1268
- // to avoid concatenation issues
1269
- generationInfo.system_fingerprint = data.system_fingerprint;
1270
- generationInfo.model_name = data.model;
1271
- generationInfo.service_tier = data.service_tier;
1272
- }
1273
- if (this.logprobs == true) {
1274
- generationInfo.logprobs = choice.logprobs;
1275
- }
1276
- const generationChunk = new ChatGenerationChunk({
1277
- message: chunk,
1278
- text: chunk.content,
1279
- generationInfo,
1280
- });
1281
- yield generationChunk;
1282
- if (this._lc_stream_delay != null) {
1283
- await sleep(this._lc_stream_delay);
1284
- }
1285
- await runManager?.handleLLMNewToken(
1286
- generationChunk.text || '',
1287
- newTokenIndices,
1288
- undefined,
1289
- undefined,
1290
- undefined,
1291
- { chunk: generationChunk }
1292
- );
1293
- }
1294
- if (usage) {
1295
- // Type assertion for xAI-specific usage structure
1296
- const xaiUsage = usage as XAIUsageMetadata;
1297
- const inputTokenDetails = {
1298
- // Standard OpenAI fields
1299
- ...(usage.prompt_tokens_details?.audio_tokens != null && {
1300
- audio: usage.prompt_tokens_details.audio_tokens,
1301
- }),
1302
- ...(usage.prompt_tokens_details?.cached_tokens != null && {
1303
- cache_read: usage.prompt_tokens_details.cached_tokens,
1304
- }),
1305
- // Add xAI-specific prompt token details if they exist
1306
- ...(xaiUsage.prompt_tokens_details?.text_tokens != null && {
1307
- text: xaiUsage.prompt_tokens_details.text_tokens,
1308
- }),
1309
- ...(xaiUsage.prompt_tokens_details?.image_tokens != null && {
1310
- image: xaiUsage.prompt_tokens_details.image_tokens,
1311
- }),
1312
- };
1313
- const outputTokenDetails = {
1314
- // Standard OpenAI fields
1315
- ...(usage.completion_tokens_details?.audio_tokens != null && {
1316
- audio: usage.completion_tokens_details.audio_tokens,
1317
- }),
1318
- ...(usage.completion_tokens_details?.reasoning_tokens != null && {
1319
- reasoning: usage.completion_tokens_details.reasoning_tokens,
1320
- }),
1321
- // Add xAI-specific completion token details if they exist
1322
- ...(xaiUsage.completion_tokens_details?.accepted_prediction_tokens !=
1323
- null && {
1324
- accepted_prediction:
1325
- xaiUsage.completion_tokens_details.accepted_prediction_tokens,
1326
- }),
1327
- ...(xaiUsage.completion_tokens_details?.rejected_prediction_tokens !=
1328
- null && {
1329
- rejected_prediction:
1330
- xaiUsage.completion_tokens_details.rejected_prediction_tokens,
1331
- }),
1332
- };
1333
- const generationChunk = new ChatGenerationChunk({
1334
- message: new AIMessageChunk({
1335
- content: '',
1336
- response_metadata: {
1337
- usage: { ...usage },
1338
- // Include xAI-specific metadata if it exists
1339
- ...(xaiUsage.num_sources_used != null && {
1340
- num_sources_used: xaiUsage.num_sources_used,
1341
- }),
1342
- },
1343
- usage_metadata: {
1344
- input_tokens: usage.prompt_tokens,
1345
- output_tokens: usage.completion_tokens,
1346
- total_tokens: usage.total_tokens,
1347
- ...(Object.keys(inputTokenDetails).length > 0 && {
1348
- input_token_details: inputTokenDetails,
1349
- }),
1350
- ...(Object.keys(outputTokenDetails).length > 0 && {
1351
- output_token_details: outputTokenDetails,
1352
- }),
1353
- },
1354
- }),
1355
- text: '',
1356
- });
1357
- yield generationChunk;
1358
- if (this._lc_stream_delay != null) {
1359
- await sleep(this._lc_stream_delay);
1360
- }
1361
- }
1362
- if (options.signal?.aborted === true) {
1363
- throw new Error('AbortError');
1364
- }
1260
+ yield* delayStreamChunks(
1261
+ super._streamResponseChunks(messages, options, runManager),
1262
+ this._lc_stream_delay
1263
+ );
1365
1264
  }
1366
1265
  }