@smythos/sre 1.5.42 → 1.5.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. package/dist/index.js +16 -17
  2. package/dist/index.js.map +1 -1
  3. package/dist/types/Components/GenAILLM.class.d.ts +22 -5
  4. package/dist/types/helpers/AWSLambdaCode.helper.d.ts +8 -5
  5. package/dist/types/index.d.ts +1 -0
  6. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Groq.class.d.ts +7 -0
  7. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.d.ts +0 -4
  8. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.d.ts +0 -4
  9. package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/utils.d.ts +6 -0
  10. package/dist/types/types/LLM.types.d.ts +8 -0
  11. package/package.json +5 -2
  12. package/src/Components/GenAILLM.class.ts +30 -6
  13. package/src/helpers/AWSLambdaCode.helper.ts +82 -22
  14. package/src/helpers/Conversation.helper.ts +8 -5
  15. package/src/index.ts +193 -192
  16. package/src/index.ts.bak +193 -192
  17. package/src/subsystems/ComputeManager/Code.service/connectors/AWSLambdaCode.class.ts +10 -8
  18. package/src/subsystems/LLMManager/LLM.service/LLMCredentials.helper.ts +3 -1
  19. package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +112 -92
  20. package/src/subsystems/LLMManager/LLM.service/connectors/openai/OpenAIConnector.class.ts +2 -2
  21. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +31 -31
  22. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +31 -22
  23. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/utils.ts +11 -0
  24. package/src/subsystems/LLMManager/ModelsProvider.service/ModelsProviderConnector.ts +9 -0
  25. package/src/subsystems/MemoryManager/Cache.service/connectors/RedisCache.class.ts +0 -18
  26. package/src/types/LLM.types.ts +10 -0
@@ -23,6 +23,7 @@ import { SystemEvents } from '@sre/Core/SystemEvents';
23
23
  type ChatCompletionCreateParams = {
24
24
  model: string;
25
25
  messages: any;
26
+ max_completion_tokens?: number;
26
27
  max_tokens?: number;
27
28
  temperature?: number;
28
29
  stop?: string[];
@@ -30,8 +31,11 @@ type ChatCompletionCreateParams = {
30
31
  tools?: any;
31
32
  tool_choice?: string;
32
33
  stream?: boolean;
34
+ reasoning_effort?: 'none' | 'default' | 'low' | 'medium' | 'high';
33
35
  };
34
36
 
37
+ const MODELS_WITHOUT_REASONING_EFFORT_SUPPORT = ['deepseek-r1-distill-llama-70b'];
38
+
35
39
  export class GroqConnector extends LLMConnector {
36
40
  public name = 'LLM:Groq';
37
41
 
@@ -44,113 +48,105 @@ export class GroqConnector extends LLMConnector {
44
48
  }
45
49
 
46
50
  protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
47
- try {
48
- const groq = await this.getClient(context);
49
- const result = await groq.chat.completions.create(body);
50
- const message = result?.choices?.[0]?.message;
51
- const finishReason = result?.choices?.[0]?.finish_reason;
52
- const toolCalls = message?.tool_calls;
53
- const usage = result.usage;
54
- this.reportUsage(usage, {
55
- modelEntryName: context.modelEntryName,
56
- keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
57
- agentId: context.agentId,
58
- teamId: context.teamId,
59
- });
60
-
61
- let toolsData: ToolData[] = [];
62
- let useTool = false;
63
-
64
- if (toolCalls) {
65
- toolsData = toolCalls.map((tool, index) => ({
66
- index,
67
- id: tool.id,
68
- type: tool.type,
69
- name: tool.function.name,
70
- arguments: tool.function.arguments,
71
- role: TLLMMessageRole.Assistant,
72
- }));
73
- useTool = true;
74
- }
51
+ const groq = await this.getClient(context);
52
+ const result = await groq.chat.completions.create(body);
53
+ const message = result?.choices?.[0]?.message;
54
+ const finishReason = result?.choices?.[0]?.finish_reason;
55
+ const toolCalls = message?.tool_calls;
56
+ const usage = result.usage;
57
+ this.reportUsage(usage, {
58
+ modelEntryName: context.modelEntryName,
59
+ keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
60
+ agentId: context.agentId,
61
+ teamId: context.teamId,
62
+ });
75
63
 
76
- return {
77
- content: message?.content ?? '',
78
- finishReason,
79
- useTool,
80
- toolsData,
81
- message,
82
- usage,
83
- };
84
- } catch (error: any) {
85
- throw error;
64
+ let toolsData: ToolData[] = [];
65
+ let useTool = false;
66
+
67
+ if (toolCalls) {
68
+ toolsData = toolCalls.map((tool, index) => ({
69
+ index,
70
+ id: tool.id,
71
+ type: tool.type,
72
+ name: tool.function.name,
73
+ arguments: tool.function.arguments,
74
+ role: TLLMMessageRole.Assistant,
75
+ }));
76
+ useTool = true;
86
77
  }
78
+
79
+ return {
80
+ content: message?.content ?? '',
81
+ finishReason,
82
+ useTool,
83
+ toolsData,
84
+ message,
85
+ usage,
86
+ };
87
87
  }
88
88
 
89
89
  protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
90
90
  const emitter = new EventEmitter();
91
91
  const usage_data = [];
92
92
 
93
- try {
94
- const groq = await this.getClient(context);
95
- const stream = await groq.chat.completions.create({ ...body, stream: true, stream_options: { include_usage: true } });
96
-
97
- let toolsData: ToolData[] = [];
98
-
99
- (async () => {
100
- for await (const chunk of stream as any) {
101
- const delta = chunk.choices[0]?.delta;
102
- const usage = chunk['x_groq']?.usage || chunk['usage'];
103
-
104
- if (usage) {
105
- usage_data.push(usage);
106
- }
107
- emitter.emit('data', delta);
108
-
109
- if (delta?.content) {
110
- emitter.emit('content', delta.content);
111
- }
112
-
113
- if (delta?.tool_calls) {
114
- delta.tool_calls.forEach((toolCall, index) => {
115
- if (!toolsData[index]) {
116
- toolsData[index] = {
117
- index,
118
- id: toolCall.id,
119
- type: toolCall.type,
120
- name: toolCall.function?.name,
121
- arguments: toolCall.function?.arguments,
122
- role: 'assistant',
123
- };
124
- } else {
125
- toolsData[index].arguments += toolCall.function?.arguments || '';
126
- }
127
- });
128
- }
93
+ const groq = await this.getClient(context);
94
+ const stream = await groq.chat.completions.create({ ...body, stream: true, stream_options: { include_usage: true } });
95
+
96
+ let toolsData: ToolData[] = [];
97
+
98
+ (async () => {
99
+ for await (const chunk of stream as any) {
100
+ const delta = chunk.choices[0]?.delta;
101
+ const usage = chunk['x_groq']?.usage || chunk['usage'];
102
+
103
+ if (usage) {
104
+ usage_data.push(usage);
129
105
  }
106
+ emitter.emit('data', delta);
130
107
 
131
- if (toolsData.length > 0) {
132
- emitter.emit(TLLMEvent.ToolInfo, toolsData);
108
+ if (delta?.content) {
109
+ emitter.emit('content', delta.content);
133
110
  }
134
111
 
135
- usage_data.forEach((usage) => {
136
- // probably we can acc them and send them as one event
137
- this.reportUsage(usage, {
138
- modelEntryName: context.modelEntryName,
139
- keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
140
- agentId: context.agentId,
141
- teamId: context.teamId,
112
+ if (delta?.tool_calls) {
113
+ delta.tool_calls.forEach((toolCall, index) => {
114
+ if (!toolsData[index]) {
115
+ toolsData[index] = {
116
+ index,
117
+ id: toolCall.id,
118
+ type: toolCall.type,
119
+ name: toolCall.function?.name,
120
+ arguments: toolCall.function?.arguments,
121
+ role: 'assistant',
122
+ };
123
+ } else {
124
+ toolsData[index].arguments += toolCall.function?.arguments || '';
125
+ }
142
126
  });
127
+ }
128
+ }
129
+
130
+ if (toolsData.length > 0) {
131
+ emitter.emit(TLLMEvent.ToolInfo, toolsData);
132
+ }
133
+
134
+ usage_data.forEach((usage) => {
135
+ // probably we can acc them and send them as one event
136
+ this.reportUsage(usage, {
137
+ modelEntryName: context.modelEntryName,
138
+ keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
139
+ agentId: context.agentId,
140
+ teamId: context.teamId,
143
141
  });
142
+ });
144
143
 
145
- setTimeout(() => {
146
- emitter.emit('end', toolsData);
147
- }, 100);
148
- })();
144
+ setTimeout(() => {
145
+ emitter.emit('end', toolsData);
146
+ }, 100);
147
+ })();
149
148
 
150
- return emitter;
151
- } catch (error: any) {
152
- throw error;
153
- }
149
+ return emitter;
154
150
  }
155
151
 
156
152
  protected async reqBodyAdapter(params: TLLMPreparedParams): Promise<ChatCompletionCreateParams> {
@@ -172,7 +168,15 @@ export class GroqConnector extends LLMConnector {
172
168
  }
173
169
  //#endregion Handle JSON response format
174
170
 
175
- if (params.maxTokens !== undefined) body.max_tokens = params.maxTokens;
171
+ const allowReasoning = params.useReasoning && params.capabilities?.reasoning;
172
+
173
+ if (params.maxTokens !== undefined) {
174
+ if (allowReasoning) {
175
+ body.max_completion_tokens = params.maxTokens;
176
+ } else {
177
+ body.max_tokens = params.maxTokens;
178
+ }
179
+ }
176
180
  if (params.temperature !== undefined) body.temperature = params.temperature;
177
181
  if (params.topP !== undefined) body.top_p = params.topP;
178
182
  if (params.stopSequences?.length) body.stop = params.stopSequences;
@@ -180,6 +184,15 @@ export class GroqConnector extends LLMConnector {
180
184
  if (params.toolsConfig?.tools) body.tools = params.toolsConfig?.tools;
181
185
  if (params.toolsConfig?.tool_choice) body.tool_choice = params.toolsConfig?.tool_choice as any;
182
186
 
187
+ // Apply user-specified reasoning parameters
188
+ if (
189
+ allowReasoning &&
190
+ isValidGroqReasoningEffort(params?.reasoningEffort) &&
191
+ !MODELS_WITHOUT_REASONING_EFFORT_SUPPORT.includes(params?.modelEntryName)
192
+ ) {
193
+ if (params.reasoningEffort !== undefined) body.reasoning_effort = params.reasoningEffort;
194
+ }
195
+
183
196
  return body;
184
197
  }
185
198
 
@@ -282,3 +295,10 @@ export class GroqConnector extends LLMConnector {
282
295
  });
283
296
  }
284
297
  }
298
+ /**
299
+ * Type guard to check if a value is a valid OpenAI reasoning effort.
300
+ * Uses array includes for better maintainability when OpenAI adds new values.
301
+ */
302
+ export function isValidGroqReasoningEffort(value: unknown): value is 'low' | 'medium' | 'high' | 'none' | 'default' {
303
+ return ['none', 'default', 'low', 'medium', 'high'].includes(value as string);
304
+ }
@@ -173,7 +173,7 @@ export class OpenAIConnector extends LLMConnector {
173
173
  const openai = await this.getClient(context);
174
174
  const response = await openai.images.generate(body as OpenAI.Images.ImageGenerateParams);
175
175
 
176
- return response;
176
+ return response as OpenAI.ImagesResponse;
177
177
  }
178
178
 
179
179
  protected async imageEditRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<OpenAI.ImagesResponse> {
@@ -182,7 +182,7 @@ export class OpenAIConnector extends LLMConnector {
182
182
  const openai = await this.getClient(context);
183
183
  const response = await openai.images.edit(_body);
184
184
 
185
- return response;
185
+ return response as OpenAI.ImagesResponse;
186
186
  }
187
187
  // #endregion
188
188
 
@@ -2,17 +2,7 @@ import EventEmitter from 'events';
2
2
  import OpenAI from 'openai';
3
3
  import { BinaryInput } from '@sre/helpers/BinaryInput.helper';
4
4
  import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
5
- import {
6
- TLLMParams,
7
- TLLMPreparedParams,
8
- ILLMRequestContext,
9
- ToolData,
10
- TLLMMessageRole,
11
- APIKeySource,
12
- TLLMEvent,
13
- OpenAIToolDefinition,
14
- LegacyToolDefinition,
15
- } from '@sre/types/LLM.types';
5
+ import { TLLMParams, TLLMPreparedParams, ILLMRequestContext, ToolData, TLLMMessageRole, APIKeySource, TLLMEvent } from '@sre/types/LLM.types';
16
6
  import { OpenAIApiInterface, ToolConfig } from './OpenAIApiInterface';
17
7
  import { HandlerDependencies } from '../types';
18
8
  import { JSON_RESPONSE_INSTRUCTION, SUPPORTED_MIME_TYPES_MAP } from '@sre/constants';
@@ -23,6 +13,8 @@ import {
23
13
  MODELS_WITHOUT_JSON_RESPONSE_SUPPORT,
24
14
  } from './constants';
25
15
 
16
+ import { isValidOpenAIReasoningEffort } from './utils';
17
+
26
18
  // File size limits in bytes
27
19
  const MAX_IMAGE_SIZE = 20 * 1024 * 1024; // 20MB
28
20
  const MAX_DOCUMENT_SIZE = 25 * 1024 * 1024; // 25MB
@@ -67,9 +59,6 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
67
59
 
68
60
  public handleStream(stream: AsyncIterable<OpenAI.ChatCompletionChunk>, context: ILLMRequestContext): EventEmitter {
69
61
  const emitter = new EventEmitter();
70
- const usage_data: OpenAI.Completions.CompletionUsage[] = [];
71
- const reportedUsage: any[] = [];
72
- let finishReason = 'stop';
73
62
 
74
63
  // Process stream asynchronously while returning emitter immediately
75
64
  (async () => {
@@ -77,12 +66,14 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
77
66
 
78
67
  try {
79
68
  // Step 1: Process the stream
80
- const streamResult = await this.processStream(stream, emitter, usage_data);
69
+ const streamResult = await this.processStream(stream, emitter);
81
70
  finalToolsData = streamResult.toolsData;
82
- finishReason = streamResult.finishReason;
71
+
72
+ const finishReason = streamResult.finishReason || 'stop';
73
+ const usageData = streamResult.usageData;
83
74
 
84
75
  // Step 2: Report usage statistics
85
- this.reportUsageStatistics(usage_data, context, reportedUsage);
76
+ const reportedUsage = this.reportUsageStatistics(usageData, context);
86
77
 
87
78
  // Step 3: Emit final events
88
79
  this.emitFinalEvents(emitter, finalToolsData, reportedUsage, finishReason);
@@ -172,6 +163,18 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
172
163
  body.stop = params.stopSequences;
173
164
  }
174
165
 
166
+ // #region GPT 5 specific fields
167
+ const isGPT5ReasoningModels = params.modelEntryName?.includes('gpt-5') && params?.capabilities?.reasoning;
168
+ if (isGPT5ReasoningModels && params?.verbosity) {
169
+ body.verbosity = params.verbosity;
170
+ }
171
+
172
+ // We need to validate the `reasoningEffort` parameter for OpenAI models, since models like `qwen/qwen3-32b` and `deepseek-r1-distill-llama-70b` (available via Groq) also support this parameter but use different values, such as `none` and `default`. These values are valid in our system but not specifically for OpenAI.
173
+ if (isGPT5ReasoningModels && isValidOpenAIReasoningEffort(params.reasoningEffort)) {
174
+ body.reasoning_effort = params.reasoningEffort;
175
+ }
176
+ // #endregion GPT 5 specific fields
177
+
175
178
  // Handle tools configuration
176
179
  if (params?.toolsConfig?.tools && params?.toolsConfig?.tools?.length > 0) {
177
180
  body.tools = params?.toolsConfig?.tools as OpenAI.ChatCompletionTool[];
@@ -181,20 +184,13 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
181
184
  return body;
182
185
  }
183
186
 
184
- /**
185
- * Type guard to check if a tool is an OpenAI tool definition
186
- */
187
- private isOpenAIToolDefinition(tool: OpenAIToolDefinition | LegacyToolDefinition): tool is OpenAIToolDefinition {
188
- return 'parameters' in tool;
189
- }
190
-
191
187
  /**
192
188
  * Transform OpenAI tool definitions to ChatCompletionTool format
193
189
  */
194
190
  public transformToolsConfig(config: ToolConfig): OpenAI.ChatCompletionTool[] {
195
191
  return config.toolDefinitions.map((tool) => {
196
192
  // Handle OpenAI tool definition format
197
- if (this.isOpenAIToolDefinition(tool)) {
193
+ if ('parameters' in tool) {
198
194
  return {
199
195
  type: 'function',
200
196
  function: {
@@ -259,11 +255,11 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
259
255
  */
260
256
  private async processStream(
261
257
  stream: AsyncIterable<OpenAI.ChatCompletionChunk>,
262
- emitter: EventEmitter,
263
- usage_data: OpenAI.Completions.CompletionUsage[]
264
- ): Promise<{ toolsData: ToolData[]; finishReason: string }> {
258
+ emitter: EventEmitter
259
+ ): Promise<{ toolsData: ToolData[]; finishReason: string; usageData: any[] }> {
265
260
  let toolsData: ToolData[] = [];
266
261
  let finishReason = 'stop';
262
+ const usageData = [];
267
263
 
268
264
  for await (const part of stream) {
269
265
  const delta = part.choices[0]?.delta;
@@ -271,7 +267,7 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
271
267
 
272
268
  // Collect usage statistics
273
269
  if (usage) {
274
- usage_data.push(usage);
270
+ usageData.push(usage);
275
271
  }
276
272
 
277
273
  // Emit data event for delta
@@ -315,7 +311,7 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
315
311
  }
316
312
  }
317
313
 
318
- return { toolsData: this.extractToolCalls(toolsData), finishReason };
314
+ return { toolsData: this.extractToolCalls(toolsData), finishReason, usageData };
319
315
  }
320
316
 
321
317
  /**
@@ -335,12 +331,16 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
335
331
  /**
336
332
  * Report usage statistics
337
333
  */
338
- private reportUsageStatistics(usage_data: OpenAI.Completions.CompletionUsage[], context: ILLMRequestContext, reportedUsage: any[]): void {
334
+ private reportUsageStatistics(usage_data: OpenAI.Completions.CompletionUsage[], context: ILLMRequestContext): any[] {
335
+ const reportedUsage: any[] = [];
336
+
339
337
  // Report normal usage
340
338
  usage_data.forEach((usage) => {
341
339
  const reported = this.deps.reportUsage(usage, this.buildUsageContext(context));
342
340
  reportedUsage.push(reported);
343
341
  });
342
+
343
+ return reportedUsage;
344
344
  }
345
345
 
346
346
  /**
@@ -22,12 +22,12 @@ import { OpenAIApiInterface, ToolConfig } from './OpenAIApiInterface';
22
22
  import { HandlerDependencies, TToolType } from '../types';
23
23
  import { SUPPORTED_MIME_TYPES_MAP } from '@sre/constants';
24
24
  import { MODELS_WITHOUT_TEMPERATURE_SUPPORT, SEARCH_TOOL_COSTS } from './constants';
25
+ import { isValidOpenAIReasoningEffort } from './utils';
25
26
 
26
27
  // File size limits in bytes
27
28
  const MAX_IMAGE_SIZE = 20 * 1024 * 1024; // 20MB
28
29
  const MAX_DOCUMENT_SIZE = 25 * 1024 * 1024; // 25MB
29
30
 
30
- type TSearchContextSize = 'low' | 'medium' | 'high';
31
31
  type TSearchLocation = {
32
32
  type: 'approximate';
33
33
  city?: string;
@@ -75,9 +75,6 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
75
75
 
76
76
  public handleStream(stream: Stream<OpenAI.Responses.ResponseStreamEvent>, context: ILLMRequestContext): EventEmitter {
77
77
  const emitter = new EventEmitter();
78
- const usage_data: any[] = [];
79
- const reportedUsage: any[] = [];
80
- let finishReason = 'stop';
81
78
 
82
79
  // Process stream asynchronously while returning emitter immediately
83
80
  (async () => {
@@ -85,12 +82,14 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
85
82
 
86
83
  try {
87
84
  // Step 1: Process the stream
88
- const streamResult = await this.processStream(stream, emitter, usage_data);
85
+ const streamResult = await this.processStream(stream, emitter);
89
86
  finalToolsData = streamResult.toolsData;
90
- finishReason = streamResult.finishReason;
87
+
88
+ const finishReason = streamResult.finishReason || 'stop';
89
+ const usageData = streamResult.usageData;
91
90
 
92
91
  // Step 2: Report usage statistics
93
- this.reportUsageStatistics(usage_data, context, reportedUsage);
92
+ const reportedUsage = this.reportUsageStatistics(usageData, context);
94
93
 
95
94
  // Step 3: Emit final events
96
95
  this.emitFinalEvents(emitter, finalToolsData, reportedUsage, finishReason);
@@ -107,11 +106,11 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
107
106
  */
108
107
  private async processStream(
109
108
  stream: Stream<OpenAI.Responses.ResponseStreamEvent>,
110
- emitter: EventEmitter,
111
- usage_data: any[]
112
- ): Promise<{ toolsData: ToolData[]; finishReason: string }> {
109
+ emitter: EventEmitter
110
+ ): Promise<{ toolsData: ToolData[]; finishReason: string; usageData: any[] }> {
113
111
  let toolsData: ToolData[] = [];
114
112
  let finishReason = 'stop';
113
+ const usageData = [];
115
114
 
116
115
  for await (const part of stream) {
117
116
  // Handle different event types from the Responses API stream
@@ -189,12 +188,12 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
189
188
  }
190
189
 
191
190
  // Handle usage statistics from response object
192
- if ('response' in part && (part as any).response?.usage) {
193
- usage_data.push((part as any).response.usage);
191
+ if (part?.type === 'response.completed' && part?.response?.usage) {
192
+ usageData.push(part.response.usage);
194
193
  }
195
194
  }
196
195
 
197
- return { toolsData: this.extractToolCalls(toolsData), finishReason };
196
+ return { toolsData: this.extractToolCalls(toolsData), finishReason, usageData };
198
197
  }
199
198
 
200
199
  /**
@@ -214,7 +213,9 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
214
213
  /**
215
214
  * Report usage statistics
216
215
  */
217
- private reportUsageStatistics(usage_data: any[], context: ILLMRequestContext, reportedUsage: any[]): void {
216
+ private reportUsageStatistics(usage_data: any[], context: ILLMRequestContext): any[] {
217
+ const reportedUsage: any[] = [];
218
+
218
219
  // Report normal usage
219
220
  usage_data.forEach((usage) => {
220
221
  // Convert ResponseUsage to CompletionUsage format for compatibility
@@ -234,6 +235,8 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
234
235
  const reported = this.deps.reportUsage(searchUsage, this.buildUsageContext(context));
235
236
  reportedUsage.push(reported);
236
237
  }
238
+
239
+ return reportedUsage;
237
240
  }
238
241
 
239
242
  /**
@@ -310,6 +313,19 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
310
313
  body.top_p = params.topP;
311
314
  }
312
315
 
316
+ // #region GPT 5 specific fields
317
+
318
+ const isGPT5ReasoningModels = params.modelEntryName?.includes('gpt-5') && params?.capabilities?.reasoning;
319
+ if (isGPT5ReasoningModels && params?.verbosity) {
320
+ body.text = { verbosity: params.verbosity };
321
+ }
322
+
323
+ // We need to validate the `reasoningEffort` parameter for OpenAI models, since models like `qwen/qwen3-32b` and `deepseek-r1-distill-llama-70b` (available via Groq) also support this parameter but use different values, such as `none` and `default`. These values are valid in our system but not specifically for OpenAI.
324
+ if (isGPT5ReasoningModels && isValidOpenAIReasoningEffort(params.reasoningEffort)) {
325
+ body.reasoning = { effort: params.reasoningEffort };
326
+ }
327
+ // #endregion GPT 5 specific fields
328
+
313
329
  let tools: OpenAI.Responses.Tool[] = [];
314
330
 
315
331
  if (params?.toolsConfig?.tools && params?.toolsConfig?.tools?.length > 0) {
@@ -333,20 +349,13 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
333
349
  return body;
334
350
  }
335
351
 
336
- /**
337
- * Type guard to check if a tool is an OpenAI tool definition
338
- */
339
- private isOpenAIToolDefinition(tool: OpenAIToolDefinition | LegacyToolDefinition): tool is OpenAIToolDefinition {
340
- return 'parameters' in tool;
341
- }
342
-
343
352
  /**
344
353
  * Transform OpenAI tool definitions to Responses.Tool format
345
354
  */
346
355
  public transformToolsConfig(config: ToolConfig): OpenAI.Responses.Tool[] {
347
356
  return config.toolDefinitions.map((tool) => {
348
357
  // Handle OpenAI tool definition format
349
- if (this.isOpenAIToolDefinition(tool)) {
358
+ if ('parameters' in tool) {
350
359
  return {
351
360
  type: 'function' as const,
352
361
  name: tool.name,
@@ -0,0 +1,11 @@
1
+ import OpenAI from 'openai';
2
+
3
+ // * We may move some OpenAI Connector–related utility functions here in the future.
4
+
5
+ /**
6
+ * Type guard to check if a value is a valid OpenAI reasoning effort.
7
+ * Uses array includes for better maintainability when OpenAI adds new values.
8
+ */
9
+ export function isValidOpenAIReasoningEffort(value: unknown): value is OpenAI.Responses.ResponseCreateParams['reasoning']['effort'] {
10
+ return ['minimal', 'low', 'medium', 'high'].includes(value as string);
11
+ }
@@ -59,6 +59,15 @@ export abstract class ModelsProviderConnector extends SecureConnector {
59
59
  return null;
60
60
  }
61
61
  }
62
+ //Workaround : non-blocking auto-refresh of team models
63
+ //this will force team models to refresh for the next request
64
+ //TODO: we need a more elegant cache invalidation mechanism, and only refresh the team models if the custom models have changed
65
+ setImmediate(async () => {
66
+ const _customModels = await this.getCustomModels(candidate);
67
+ teamModels = { ...teamModels, ..._customModels };
68
+ });
69
+
70
+ //immediatelly return the team models
62
71
  return teamModels;
63
72
  };
64
73
  loadTeamModels();
@@ -26,27 +26,9 @@ export class RedisCache extends CacheConnector {
26
26
  let host = sentinels.length === 1 ? sentinels[0].host : null;
27
27
  let port = sentinels.length === 1 ? sentinels[0].port : null;
28
28
 
29
- const redisConfig = {
30
- // HEAVILY OPTIMIZED: Aggressive storm prevention parameters
31
- maxRetriesPerRequest: 1, // VERY LIMITED retries (official)
32
- retryDelayOnFailover: 50, // Fast failover (official)
33
- connectTimeout: 3000, // SHORT timeout (official)
34
- lazyConnect: false,
35
- enableReadyCheck: false, // Skip ready check for speed (official)
36
- commandTimeout: 2000, // VERY SHORT command timeout (official)
37
- keepAlive: 10000, // Shorter keepalive - 10sec (official)
38
- family: 4, // Force IPv4 (official)
39
- maxLoadingTimeout: 2000, // Short loading timeout (official)
40
- // Additional aggressive settings
41
- enableOfflineQueue: false, // Disable offline queue (official)
42
- db: 0, // Explicit DB (official)
43
- stringNumbers: false, // No string conversion (official)
44
- };
45
-
46
29
  this.redis = new IORedis({
47
30
  ...(host ? { host, port } : { sentinels, name: _settings.name || process.env.REDIS_MASTER_NAME }),
48
31
  password: _settings.password || process.env.REDIS_PASSWORD,
49
- ...redisConfig,
50
32
  });
51
33
 
52
34
  this.redis.on('error', (error) => {
@@ -45,6 +45,9 @@ export type ILLMConnectorCredentials = BasicCredentials | BedrockCredentials | V
45
45
  export type TOpenAIResponseToolChoice = OpenAI.Responses.ToolChoiceOptions | OpenAI.Responses.ToolChoiceTypes | OpenAI.Responses.ToolChoiceFunction;
46
46
  export type TLLMToolChoice = OpenAI.ChatCompletionToolChoiceOption;
47
47
 
48
+ // Local alias to the upstream OpenAI reasoning effort union type
49
+ export type OpenAIReasoningEffort = NonNullable<OpenAI.Responses.ResponseCreateParams['reasoning']>['effort'];
50
+
48
51
  export type TOpenAIToolsInfo = {
49
52
  webSearch: {
50
53
  enabled: boolean;
@@ -149,7 +152,14 @@ export type TLLMParams = {
149
152
  // #endregion
150
153
 
151
154
  useReasoning?: boolean;
155
+ /**
156
+ * Controls the level of effort the model will put into reasoning
157
+ * For GPT-OSS models (20B, 120B): "low" | "medium" | "high"
158
+ * For Qwen 3 32B: "none" | "default"
159
+ */
160
+ reasoningEffort?: 'none' | 'default' | OpenAIReasoningEffort;
152
161
  max_output_tokens?: number;
162
+ verbosity?: OpenAI.Responses.ResponseCreateParams['text']['verbosity'];
153
163
  abortSignal?: AbortSignal;
154
164
  };
155
165