@smythos/sre 1.5.42 → 1.5.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +16 -17
- package/dist/index.js.map +1 -1
- package/dist/types/Components/GenAILLM.class.d.ts +22 -5
- package/dist/types/helpers/AWSLambdaCode.helper.d.ts +8 -5
- package/dist/types/index.d.ts +1 -0
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/Groq.class.d.ts +7 -0
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.d.ts +0 -4
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.d.ts +0 -4
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/utils.d.ts +6 -0
- package/dist/types/types/LLM.types.d.ts +8 -0
- package/package.json +5 -2
- package/src/Components/GenAILLM.class.ts +30 -6
- package/src/helpers/AWSLambdaCode.helper.ts +82 -22
- package/src/helpers/Conversation.helper.ts +8 -5
- package/src/index.ts +193 -192
- package/src/index.ts.bak +193 -192
- package/src/subsystems/ComputeManager/Code.service/connectors/AWSLambdaCode.class.ts +10 -8
- package/src/subsystems/LLMManager/LLM.service/LLMCredentials.helper.ts +3 -1
- package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +112 -92
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/OpenAIConnector.class.ts +2 -2
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +31 -31
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +31 -22
- package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/utils.ts +11 -0
- package/src/subsystems/LLMManager/ModelsProvider.service/ModelsProviderConnector.ts +9 -0
- package/src/subsystems/MemoryManager/Cache.service/connectors/RedisCache.class.ts +0 -18
- package/src/types/LLM.types.ts +10 -0
|
@@ -23,6 +23,7 @@ import { SystemEvents } from '@sre/Core/SystemEvents';
|
|
|
23
23
|
type ChatCompletionCreateParams = {
|
|
24
24
|
model: string;
|
|
25
25
|
messages: any;
|
|
26
|
+
max_completion_tokens?: number;
|
|
26
27
|
max_tokens?: number;
|
|
27
28
|
temperature?: number;
|
|
28
29
|
stop?: string[];
|
|
@@ -30,8 +31,11 @@ type ChatCompletionCreateParams = {
|
|
|
30
31
|
tools?: any;
|
|
31
32
|
tool_choice?: string;
|
|
32
33
|
stream?: boolean;
|
|
34
|
+
reasoning_effort?: 'none' | 'default' | 'low' | 'medium' | 'high';
|
|
33
35
|
};
|
|
34
36
|
|
|
37
|
+
const MODELS_WITHOUT_REASONING_EFFORT_SUPPORT = ['deepseek-r1-distill-llama-70b'];
|
|
38
|
+
|
|
35
39
|
export class GroqConnector extends LLMConnector {
|
|
36
40
|
public name = 'LLM:Groq';
|
|
37
41
|
|
|
@@ -44,113 +48,105 @@ export class GroqConnector extends LLMConnector {
|
|
|
44
48
|
}
|
|
45
49
|
|
|
46
50
|
protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
let toolsData: ToolData[] = [];
|
|
62
|
-
let useTool = false;
|
|
63
|
-
|
|
64
|
-
if (toolCalls) {
|
|
65
|
-
toolsData = toolCalls.map((tool, index) => ({
|
|
66
|
-
index,
|
|
67
|
-
id: tool.id,
|
|
68
|
-
type: tool.type,
|
|
69
|
-
name: tool.function.name,
|
|
70
|
-
arguments: tool.function.arguments,
|
|
71
|
-
role: TLLMMessageRole.Assistant,
|
|
72
|
-
}));
|
|
73
|
-
useTool = true;
|
|
74
|
-
}
|
|
51
|
+
const groq = await this.getClient(context);
|
|
52
|
+
const result = await groq.chat.completions.create(body);
|
|
53
|
+
const message = result?.choices?.[0]?.message;
|
|
54
|
+
const finishReason = result?.choices?.[0]?.finish_reason;
|
|
55
|
+
const toolCalls = message?.tool_calls;
|
|
56
|
+
const usage = result.usage;
|
|
57
|
+
this.reportUsage(usage, {
|
|
58
|
+
modelEntryName: context.modelEntryName,
|
|
59
|
+
keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
|
|
60
|
+
agentId: context.agentId,
|
|
61
|
+
teamId: context.teamId,
|
|
62
|
+
});
|
|
75
63
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
64
|
+
let toolsData: ToolData[] = [];
|
|
65
|
+
let useTool = false;
|
|
66
|
+
|
|
67
|
+
if (toolCalls) {
|
|
68
|
+
toolsData = toolCalls.map((tool, index) => ({
|
|
69
|
+
index,
|
|
70
|
+
id: tool.id,
|
|
71
|
+
type: tool.type,
|
|
72
|
+
name: tool.function.name,
|
|
73
|
+
arguments: tool.function.arguments,
|
|
74
|
+
role: TLLMMessageRole.Assistant,
|
|
75
|
+
}));
|
|
76
|
+
useTool = true;
|
|
86
77
|
}
|
|
78
|
+
|
|
79
|
+
return {
|
|
80
|
+
content: message?.content ?? '',
|
|
81
|
+
finishReason,
|
|
82
|
+
useTool,
|
|
83
|
+
toolsData,
|
|
84
|
+
message,
|
|
85
|
+
usage,
|
|
86
|
+
};
|
|
87
87
|
}
|
|
88
88
|
|
|
89
89
|
protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
|
|
90
90
|
const emitter = new EventEmitter();
|
|
91
91
|
const usage_data = [];
|
|
92
92
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
(
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
usage_data.push(usage);
|
|
106
|
-
}
|
|
107
|
-
emitter.emit('data', delta);
|
|
108
|
-
|
|
109
|
-
if (delta?.content) {
|
|
110
|
-
emitter.emit('content', delta.content);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
if (delta?.tool_calls) {
|
|
114
|
-
delta.tool_calls.forEach((toolCall, index) => {
|
|
115
|
-
if (!toolsData[index]) {
|
|
116
|
-
toolsData[index] = {
|
|
117
|
-
index,
|
|
118
|
-
id: toolCall.id,
|
|
119
|
-
type: toolCall.type,
|
|
120
|
-
name: toolCall.function?.name,
|
|
121
|
-
arguments: toolCall.function?.arguments,
|
|
122
|
-
role: 'assistant',
|
|
123
|
-
};
|
|
124
|
-
} else {
|
|
125
|
-
toolsData[index].arguments += toolCall.function?.arguments || '';
|
|
126
|
-
}
|
|
127
|
-
});
|
|
128
|
-
}
|
|
93
|
+
const groq = await this.getClient(context);
|
|
94
|
+
const stream = await groq.chat.completions.create({ ...body, stream: true, stream_options: { include_usage: true } });
|
|
95
|
+
|
|
96
|
+
let toolsData: ToolData[] = [];
|
|
97
|
+
|
|
98
|
+
(async () => {
|
|
99
|
+
for await (const chunk of stream as any) {
|
|
100
|
+
const delta = chunk.choices[0]?.delta;
|
|
101
|
+
const usage = chunk['x_groq']?.usage || chunk['usage'];
|
|
102
|
+
|
|
103
|
+
if (usage) {
|
|
104
|
+
usage_data.push(usage);
|
|
129
105
|
}
|
|
106
|
+
emitter.emit('data', delta);
|
|
130
107
|
|
|
131
|
-
if (
|
|
132
|
-
emitter.emit(
|
|
108
|
+
if (delta?.content) {
|
|
109
|
+
emitter.emit('content', delta.content);
|
|
133
110
|
}
|
|
134
111
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
112
|
+
if (delta?.tool_calls) {
|
|
113
|
+
delta.tool_calls.forEach((toolCall, index) => {
|
|
114
|
+
if (!toolsData[index]) {
|
|
115
|
+
toolsData[index] = {
|
|
116
|
+
index,
|
|
117
|
+
id: toolCall.id,
|
|
118
|
+
type: toolCall.type,
|
|
119
|
+
name: toolCall.function?.name,
|
|
120
|
+
arguments: toolCall.function?.arguments,
|
|
121
|
+
role: 'assistant',
|
|
122
|
+
};
|
|
123
|
+
} else {
|
|
124
|
+
toolsData[index].arguments += toolCall.function?.arguments || '';
|
|
125
|
+
}
|
|
142
126
|
});
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (toolsData.length > 0) {
|
|
131
|
+
emitter.emit(TLLMEvent.ToolInfo, toolsData);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
usage_data.forEach((usage) => {
|
|
135
|
+
// probably we can acc them and send them as one event
|
|
136
|
+
this.reportUsage(usage, {
|
|
137
|
+
modelEntryName: context.modelEntryName,
|
|
138
|
+
keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
|
|
139
|
+
agentId: context.agentId,
|
|
140
|
+
teamId: context.teamId,
|
|
143
141
|
});
|
|
142
|
+
});
|
|
144
143
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
144
|
+
setTimeout(() => {
|
|
145
|
+
emitter.emit('end', toolsData);
|
|
146
|
+
}, 100);
|
|
147
|
+
})();
|
|
149
148
|
|
|
150
|
-
|
|
151
|
-
} catch (error: any) {
|
|
152
|
-
throw error;
|
|
153
|
-
}
|
|
149
|
+
return emitter;
|
|
154
150
|
}
|
|
155
151
|
|
|
156
152
|
protected async reqBodyAdapter(params: TLLMPreparedParams): Promise<ChatCompletionCreateParams> {
|
|
@@ -172,7 +168,15 @@ export class GroqConnector extends LLMConnector {
|
|
|
172
168
|
}
|
|
173
169
|
//#endregion Handle JSON response format
|
|
174
170
|
|
|
175
|
-
|
|
171
|
+
const allowReasoning = params.useReasoning && params.capabilities?.reasoning;
|
|
172
|
+
|
|
173
|
+
if (params.maxTokens !== undefined) {
|
|
174
|
+
if (allowReasoning) {
|
|
175
|
+
body.max_completion_tokens = params.maxTokens;
|
|
176
|
+
} else {
|
|
177
|
+
body.max_tokens = params.maxTokens;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
176
180
|
if (params.temperature !== undefined) body.temperature = params.temperature;
|
|
177
181
|
if (params.topP !== undefined) body.top_p = params.topP;
|
|
178
182
|
if (params.stopSequences?.length) body.stop = params.stopSequences;
|
|
@@ -180,6 +184,15 @@ export class GroqConnector extends LLMConnector {
|
|
|
180
184
|
if (params.toolsConfig?.tools) body.tools = params.toolsConfig?.tools;
|
|
181
185
|
if (params.toolsConfig?.tool_choice) body.tool_choice = params.toolsConfig?.tool_choice as any;
|
|
182
186
|
|
|
187
|
+
// Apply user-specified reasoning parameters
|
|
188
|
+
if (
|
|
189
|
+
allowReasoning &&
|
|
190
|
+
isValidGroqReasoningEffort(params?.reasoningEffort) &&
|
|
191
|
+
!MODELS_WITHOUT_REASONING_EFFORT_SUPPORT.includes(params?.modelEntryName)
|
|
192
|
+
) {
|
|
193
|
+
if (params.reasoningEffort !== undefined) body.reasoning_effort = params.reasoningEffort;
|
|
194
|
+
}
|
|
195
|
+
|
|
183
196
|
return body;
|
|
184
197
|
}
|
|
185
198
|
|
|
@@ -282,3 +295,10 @@ export class GroqConnector extends LLMConnector {
|
|
|
282
295
|
});
|
|
283
296
|
}
|
|
284
297
|
}
|
|
298
|
+
/**
|
|
299
|
+
* Type guard to check if a value is a valid OpenAI reasoning effort.
|
|
300
|
+
* Uses array includes for better maintainability when OpenAI adds new values.
|
|
301
|
+
*/
|
|
302
|
+
export function isValidGroqReasoningEffort(value: unknown): value is 'low' | 'medium' | 'high' | 'none' | 'default' {
|
|
303
|
+
return ['none', 'default', 'low', 'medium', 'high'].includes(value as string);
|
|
304
|
+
}
|
|
@@ -173,7 +173,7 @@ export class OpenAIConnector extends LLMConnector {
|
|
|
173
173
|
const openai = await this.getClient(context);
|
|
174
174
|
const response = await openai.images.generate(body as OpenAI.Images.ImageGenerateParams);
|
|
175
175
|
|
|
176
|
-
return response;
|
|
176
|
+
return response as OpenAI.ImagesResponse;
|
|
177
177
|
}
|
|
178
178
|
|
|
179
179
|
protected async imageEditRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<OpenAI.ImagesResponse> {
|
|
@@ -182,7 +182,7 @@ export class OpenAIConnector extends LLMConnector {
|
|
|
182
182
|
const openai = await this.getClient(context);
|
|
183
183
|
const response = await openai.images.edit(_body);
|
|
184
184
|
|
|
185
|
-
return response;
|
|
185
|
+
return response as OpenAI.ImagesResponse;
|
|
186
186
|
}
|
|
187
187
|
// #endregion
|
|
188
188
|
|
|
@@ -2,17 +2,7 @@ import EventEmitter from 'events';
|
|
|
2
2
|
import OpenAI from 'openai';
|
|
3
3
|
import { BinaryInput } from '@sre/helpers/BinaryInput.helper';
|
|
4
4
|
import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
|
|
5
|
-
import {
|
|
6
|
-
TLLMParams,
|
|
7
|
-
TLLMPreparedParams,
|
|
8
|
-
ILLMRequestContext,
|
|
9
|
-
ToolData,
|
|
10
|
-
TLLMMessageRole,
|
|
11
|
-
APIKeySource,
|
|
12
|
-
TLLMEvent,
|
|
13
|
-
OpenAIToolDefinition,
|
|
14
|
-
LegacyToolDefinition,
|
|
15
|
-
} from '@sre/types/LLM.types';
|
|
5
|
+
import { TLLMParams, TLLMPreparedParams, ILLMRequestContext, ToolData, TLLMMessageRole, APIKeySource, TLLMEvent } from '@sre/types/LLM.types';
|
|
16
6
|
import { OpenAIApiInterface, ToolConfig } from './OpenAIApiInterface';
|
|
17
7
|
import { HandlerDependencies } from '../types';
|
|
18
8
|
import { JSON_RESPONSE_INSTRUCTION, SUPPORTED_MIME_TYPES_MAP } from '@sre/constants';
|
|
@@ -23,6 +13,8 @@ import {
|
|
|
23
13
|
MODELS_WITHOUT_JSON_RESPONSE_SUPPORT,
|
|
24
14
|
} from './constants';
|
|
25
15
|
|
|
16
|
+
import { isValidOpenAIReasoningEffort } from './utils';
|
|
17
|
+
|
|
26
18
|
// File size limits in bytes
|
|
27
19
|
const MAX_IMAGE_SIZE = 20 * 1024 * 1024; // 20MB
|
|
28
20
|
const MAX_DOCUMENT_SIZE = 25 * 1024 * 1024; // 25MB
|
|
@@ -67,9 +59,6 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
|
|
|
67
59
|
|
|
68
60
|
public handleStream(stream: AsyncIterable<OpenAI.ChatCompletionChunk>, context: ILLMRequestContext): EventEmitter {
|
|
69
61
|
const emitter = new EventEmitter();
|
|
70
|
-
const usage_data: OpenAI.Completions.CompletionUsage[] = [];
|
|
71
|
-
const reportedUsage: any[] = [];
|
|
72
|
-
let finishReason = 'stop';
|
|
73
62
|
|
|
74
63
|
// Process stream asynchronously while returning emitter immediately
|
|
75
64
|
(async () => {
|
|
@@ -77,12 +66,14 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
|
|
|
77
66
|
|
|
78
67
|
try {
|
|
79
68
|
// Step 1: Process the stream
|
|
80
|
-
const streamResult = await this.processStream(stream, emitter
|
|
69
|
+
const streamResult = await this.processStream(stream, emitter);
|
|
81
70
|
finalToolsData = streamResult.toolsData;
|
|
82
|
-
|
|
71
|
+
|
|
72
|
+
const finishReason = streamResult.finishReason || 'stop';
|
|
73
|
+
const usageData = streamResult.usageData;
|
|
83
74
|
|
|
84
75
|
// Step 2: Report usage statistics
|
|
85
|
-
this.reportUsageStatistics(
|
|
76
|
+
const reportedUsage = this.reportUsageStatistics(usageData, context);
|
|
86
77
|
|
|
87
78
|
// Step 3: Emit final events
|
|
88
79
|
this.emitFinalEvents(emitter, finalToolsData, reportedUsage, finishReason);
|
|
@@ -172,6 +163,18 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
|
|
|
172
163
|
body.stop = params.stopSequences;
|
|
173
164
|
}
|
|
174
165
|
|
|
166
|
+
// #region GPT 5 specific fields
|
|
167
|
+
const isGPT5ReasoningModels = params.modelEntryName?.includes('gpt-5') && params?.capabilities?.reasoning;
|
|
168
|
+
if (isGPT5ReasoningModels && params?.verbosity) {
|
|
169
|
+
body.verbosity = params.verbosity;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// We need to validate the `reasoningEffort` parameter for OpenAI models, since models like `qwen/qwen3-32b` and `deepseek-r1-distill-llama-70b` (available via Groq) also support this parameter but use different values, such as `none` and `default`. These values are valid in our system but not specifically for OpenAI.
|
|
173
|
+
if (isGPT5ReasoningModels && isValidOpenAIReasoningEffort(params.reasoningEffort)) {
|
|
174
|
+
body.reasoning_effort = params.reasoningEffort;
|
|
175
|
+
}
|
|
176
|
+
// #endregion GPT 5 specific fields
|
|
177
|
+
|
|
175
178
|
// Handle tools configuration
|
|
176
179
|
if (params?.toolsConfig?.tools && params?.toolsConfig?.tools?.length > 0) {
|
|
177
180
|
body.tools = params?.toolsConfig?.tools as OpenAI.ChatCompletionTool[];
|
|
@@ -181,20 +184,13 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
|
|
|
181
184
|
return body;
|
|
182
185
|
}
|
|
183
186
|
|
|
184
|
-
/**
|
|
185
|
-
* Type guard to check if a tool is an OpenAI tool definition
|
|
186
|
-
*/
|
|
187
|
-
private isOpenAIToolDefinition(tool: OpenAIToolDefinition | LegacyToolDefinition): tool is OpenAIToolDefinition {
|
|
188
|
-
return 'parameters' in tool;
|
|
189
|
-
}
|
|
190
|
-
|
|
191
187
|
/**
|
|
192
188
|
* Transform OpenAI tool definitions to ChatCompletionTool format
|
|
193
189
|
*/
|
|
194
190
|
public transformToolsConfig(config: ToolConfig): OpenAI.ChatCompletionTool[] {
|
|
195
191
|
return config.toolDefinitions.map((tool) => {
|
|
196
192
|
// Handle OpenAI tool definition format
|
|
197
|
-
if (
|
|
193
|
+
if ('parameters' in tool) {
|
|
198
194
|
return {
|
|
199
195
|
type: 'function',
|
|
200
196
|
function: {
|
|
@@ -259,11 +255,11 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
|
|
|
259
255
|
*/
|
|
260
256
|
private async processStream(
|
|
261
257
|
stream: AsyncIterable<OpenAI.ChatCompletionChunk>,
|
|
262
|
-
emitter: EventEmitter
|
|
263
|
-
|
|
264
|
-
): Promise<{ toolsData: ToolData[]; finishReason: string }> {
|
|
258
|
+
emitter: EventEmitter
|
|
259
|
+
): Promise<{ toolsData: ToolData[]; finishReason: string; usageData: any[] }> {
|
|
265
260
|
let toolsData: ToolData[] = [];
|
|
266
261
|
let finishReason = 'stop';
|
|
262
|
+
const usageData = [];
|
|
267
263
|
|
|
268
264
|
for await (const part of stream) {
|
|
269
265
|
const delta = part.choices[0]?.delta;
|
|
@@ -271,7 +267,7 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
|
|
|
271
267
|
|
|
272
268
|
// Collect usage statistics
|
|
273
269
|
if (usage) {
|
|
274
|
-
|
|
270
|
+
usageData.push(usage);
|
|
275
271
|
}
|
|
276
272
|
|
|
277
273
|
// Emit data event for delta
|
|
@@ -315,7 +311,7 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
|
|
|
315
311
|
}
|
|
316
312
|
}
|
|
317
313
|
|
|
318
|
-
return { toolsData: this.extractToolCalls(toolsData), finishReason };
|
|
314
|
+
return { toolsData: this.extractToolCalls(toolsData), finishReason, usageData };
|
|
319
315
|
}
|
|
320
316
|
|
|
321
317
|
/**
|
|
@@ -335,12 +331,16 @@ export class ChatCompletionsApiInterface extends OpenAIApiInterface {
|
|
|
335
331
|
/**
|
|
336
332
|
* Report usage statistics
|
|
337
333
|
*/
|
|
338
|
-
private reportUsageStatistics(usage_data: OpenAI.Completions.CompletionUsage[], context: ILLMRequestContext
|
|
334
|
+
private reportUsageStatistics(usage_data: OpenAI.Completions.CompletionUsage[], context: ILLMRequestContext): any[] {
|
|
335
|
+
const reportedUsage: any[] = [];
|
|
336
|
+
|
|
339
337
|
// Report normal usage
|
|
340
338
|
usage_data.forEach((usage) => {
|
|
341
339
|
const reported = this.deps.reportUsage(usage, this.buildUsageContext(context));
|
|
342
340
|
reportedUsage.push(reported);
|
|
343
341
|
});
|
|
342
|
+
|
|
343
|
+
return reportedUsage;
|
|
344
344
|
}
|
|
345
345
|
|
|
346
346
|
/**
|
|
@@ -22,12 +22,12 @@ import { OpenAIApiInterface, ToolConfig } from './OpenAIApiInterface';
|
|
|
22
22
|
import { HandlerDependencies, TToolType } from '../types';
|
|
23
23
|
import { SUPPORTED_MIME_TYPES_MAP } from '@sre/constants';
|
|
24
24
|
import { MODELS_WITHOUT_TEMPERATURE_SUPPORT, SEARCH_TOOL_COSTS } from './constants';
|
|
25
|
+
import { isValidOpenAIReasoningEffort } from './utils';
|
|
25
26
|
|
|
26
27
|
// File size limits in bytes
|
|
27
28
|
const MAX_IMAGE_SIZE = 20 * 1024 * 1024; // 20MB
|
|
28
29
|
const MAX_DOCUMENT_SIZE = 25 * 1024 * 1024; // 25MB
|
|
29
30
|
|
|
30
|
-
type TSearchContextSize = 'low' | 'medium' | 'high';
|
|
31
31
|
type TSearchLocation = {
|
|
32
32
|
type: 'approximate';
|
|
33
33
|
city?: string;
|
|
@@ -75,9 +75,6 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
|
|
|
75
75
|
|
|
76
76
|
public handleStream(stream: Stream<OpenAI.Responses.ResponseStreamEvent>, context: ILLMRequestContext): EventEmitter {
|
|
77
77
|
const emitter = new EventEmitter();
|
|
78
|
-
const usage_data: any[] = [];
|
|
79
|
-
const reportedUsage: any[] = [];
|
|
80
|
-
let finishReason = 'stop';
|
|
81
78
|
|
|
82
79
|
// Process stream asynchronously while returning emitter immediately
|
|
83
80
|
(async () => {
|
|
@@ -85,12 +82,14 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
|
|
|
85
82
|
|
|
86
83
|
try {
|
|
87
84
|
// Step 1: Process the stream
|
|
88
|
-
const streamResult = await this.processStream(stream, emitter
|
|
85
|
+
const streamResult = await this.processStream(stream, emitter);
|
|
89
86
|
finalToolsData = streamResult.toolsData;
|
|
90
|
-
|
|
87
|
+
|
|
88
|
+
const finishReason = streamResult.finishReason || 'stop';
|
|
89
|
+
const usageData = streamResult.usageData;
|
|
91
90
|
|
|
92
91
|
// Step 2: Report usage statistics
|
|
93
|
-
this.reportUsageStatistics(
|
|
92
|
+
const reportedUsage = this.reportUsageStatistics(usageData, context);
|
|
94
93
|
|
|
95
94
|
// Step 3: Emit final events
|
|
96
95
|
this.emitFinalEvents(emitter, finalToolsData, reportedUsage, finishReason);
|
|
@@ -107,11 +106,11 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
|
|
|
107
106
|
*/
|
|
108
107
|
private async processStream(
|
|
109
108
|
stream: Stream<OpenAI.Responses.ResponseStreamEvent>,
|
|
110
|
-
emitter: EventEmitter
|
|
111
|
-
|
|
112
|
-
): Promise<{ toolsData: ToolData[]; finishReason: string }> {
|
|
109
|
+
emitter: EventEmitter
|
|
110
|
+
): Promise<{ toolsData: ToolData[]; finishReason: string; usageData: any[] }> {
|
|
113
111
|
let toolsData: ToolData[] = [];
|
|
114
112
|
let finishReason = 'stop';
|
|
113
|
+
const usageData = [];
|
|
115
114
|
|
|
116
115
|
for await (const part of stream) {
|
|
117
116
|
// Handle different event types from the Responses API stream
|
|
@@ -189,12 +188,12 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
|
|
|
189
188
|
}
|
|
190
189
|
|
|
191
190
|
// Handle usage statistics from response object
|
|
192
|
-
if ('response'
|
|
193
|
-
|
|
191
|
+
if (part?.type === 'response.completed' && part?.response?.usage) {
|
|
192
|
+
usageData.push(part.response.usage);
|
|
194
193
|
}
|
|
195
194
|
}
|
|
196
195
|
|
|
197
|
-
return { toolsData: this.extractToolCalls(toolsData), finishReason };
|
|
196
|
+
return { toolsData: this.extractToolCalls(toolsData), finishReason, usageData };
|
|
198
197
|
}
|
|
199
198
|
|
|
200
199
|
/**
|
|
@@ -214,7 +213,9 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
|
|
|
214
213
|
/**
|
|
215
214
|
* Report usage statistics
|
|
216
215
|
*/
|
|
217
|
-
private reportUsageStatistics(usage_data: any[], context: ILLMRequestContext
|
|
216
|
+
private reportUsageStatistics(usage_data: any[], context: ILLMRequestContext): any[] {
|
|
217
|
+
const reportedUsage: any[] = [];
|
|
218
|
+
|
|
218
219
|
// Report normal usage
|
|
219
220
|
usage_data.forEach((usage) => {
|
|
220
221
|
// Convert ResponseUsage to CompletionUsage format for compatibility
|
|
@@ -234,6 +235,8 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
|
|
|
234
235
|
const reported = this.deps.reportUsage(searchUsage, this.buildUsageContext(context));
|
|
235
236
|
reportedUsage.push(reported);
|
|
236
237
|
}
|
|
238
|
+
|
|
239
|
+
return reportedUsage;
|
|
237
240
|
}
|
|
238
241
|
|
|
239
242
|
/**
|
|
@@ -310,6 +313,19 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
|
|
|
310
313
|
body.top_p = params.topP;
|
|
311
314
|
}
|
|
312
315
|
|
|
316
|
+
// #region GPT 5 specific fields
|
|
317
|
+
|
|
318
|
+
const isGPT5ReasoningModels = params.modelEntryName?.includes('gpt-5') && params?.capabilities?.reasoning;
|
|
319
|
+
if (isGPT5ReasoningModels && params?.verbosity) {
|
|
320
|
+
body.text = { verbosity: params.verbosity };
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// We need to validate the `reasoningEffort` parameter for OpenAI models, since models like `qwen/qwen3-32b` and `deepseek-r1-distill-llama-70b` (available via Groq) also support this parameter but use different values, such as `none` and `default`. These values are valid in our system but not specifically for OpenAI.
|
|
324
|
+
if (isGPT5ReasoningModels && isValidOpenAIReasoningEffort(params.reasoningEffort)) {
|
|
325
|
+
body.reasoning = { effort: params.reasoningEffort };
|
|
326
|
+
}
|
|
327
|
+
// #endregion GPT 5 specific fields
|
|
328
|
+
|
|
313
329
|
let tools: OpenAI.Responses.Tool[] = [];
|
|
314
330
|
|
|
315
331
|
if (params?.toolsConfig?.tools && params?.toolsConfig?.tools?.length > 0) {
|
|
@@ -333,20 +349,13 @@ export class ResponsesApiInterface extends OpenAIApiInterface {
|
|
|
333
349
|
return body;
|
|
334
350
|
}
|
|
335
351
|
|
|
336
|
-
/**
|
|
337
|
-
* Type guard to check if a tool is an OpenAI tool definition
|
|
338
|
-
*/
|
|
339
|
-
private isOpenAIToolDefinition(tool: OpenAIToolDefinition | LegacyToolDefinition): tool is OpenAIToolDefinition {
|
|
340
|
-
return 'parameters' in tool;
|
|
341
|
-
}
|
|
342
|
-
|
|
343
352
|
/**
|
|
344
353
|
* Transform OpenAI tool definitions to Responses.Tool format
|
|
345
354
|
*/
|
|
346
355
|
public transformToolsConfig(config: ToolConfig): OpenAI.Responses.Tool[] {
|
|
347
356
|
return config.toolDefinitions.map((tool) => {
|
|
348
357
|
// Handle OpenAI tool definition format
|
|
349
|
-
if (
|
|
358
|
+
if ('parameters' in tool) {
|
|
350
359
|
return {
|
|
351
360
|
type: 'function' as const,
|
|
352
361
|
name: tool.name,
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import OpenAI from 'openai';
|
|
2
|
+
|
|
3
|
+
// * We may move some OpenAI Connector–related utility functions here in the future.
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Type guard to check if a value is a valid OpenAI reasoning effort.
|
|
7
|
+
* Uses array includes for better maintainability when OpenAI adds new values.
|
|
8
|
+
*/
|
|
9
|
+
export function isValidOpenAIReasoningEffort(value: unknown): value is OpenAI.Responses.ResponseCreateParams['reasoning']['effort'] {
|
|
10
|
+
return ['minimal', 'low', 'medium', 'high'].includes(value as string);
|
|
11
|
+
}
|
|
@@ -59,6 +59,15 @@ export abstract class ModelsProviderConnector extends SecureConnector {
|
|
|
59
59
|
return null;
|
|
60
60
|
}
|
|
61
61
|
}
|
|
62
|
+
//Workaround : non-blocking auto-refresh of team models
|
|
63
|
+
//this will force team models to refresh for the next request
|
|
64
|
+
//TODO: we need a more elegant cache invalidation mechanism, and only refresh the team models if the custom models have changed
|
|
65
|
+
setImmediate(async () => {
|
|
66
|
+
const _customModels = await this.getCustomModels(candidate);
|
|
67
|
+
teamModels = { ...teamModels, ..._customModels };
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
//immediatelly return the team models
|
|
62
71
|
return teamModels;
|
|
63
72
|
};
|
|
64
73
|
loadTeamModels();
|
|
@@ -26,27 +26,9 @@ export class RedisCache extends CacheConnector {
|
|
|
26
26
|
let host = sentinels.length === 1 ? sentinels[0].host : null;
|
|
27
27
|
let port = sentinels.length === 1 ? sentinels[0].port : null;
|
|
28
28
|
|
|
29
|
-
const redisConfig = {
|
|
30
|
-
// HEAVILY OPTIMIZED: Aggressive storm prevention parameters
|
|
31
|
-
maxRetriesPerRequest: 1, // VERY LIMITED retries (official)
|
|
32
|
-
retryDelayOnFailover: 50, // Fast failover (official)
|
|
33
|
-
connectTimeout: 3000, // SHORT timeout (official)
|
|
34
|
-
lazyConnect: false,
|
|
35
|
-
enableReadyCheck: false, // Skip ready check for speed (official)
|
|
36
|
-
commandTimeout: 2000, // VERY SHORT command timeout (official)
|
|
37
|
-
keepAlive: 10000, // Shorter keepalive - 10sec (official)
|
|
38
|
-
family: 4, // Force IPv4 (official)
|
|
39
|
-
maxLoadingTimeout: 2000, // Short loading timeout (official)
|
|
40
|
-
// Additional aggressive settings
|
|
41
|
-
enableOfflineQueue: false, // Disable offline queue (official)
|
|
42
|
-
db: 0, // Explicit DB (official)
|
|
43
|
-
stringNumbers: false, // No string conversion (official)
|
|
44
|
-
};
|
|
45
|
-
|
|
46
29
|
this.redis = new IORedis({
|
|
47
30
|
...(host ? { host, port } : { sentinels, name: _settings.name || process.env.REDIS_MASTER_NAME }),
|
|
48
31
|
password: _settings.password || process.env.REDIS_PASSWORD,
|
|
49
|
-
...redisConfig,
|
|
50
32
|
});
|
|
51
33
|
|
|
52
34
|
this.redis.on('error', (error) => {
|
package/src/types/LLM.types.ts
CHANGED
|
@@ -45,6 +45,9 @@ export type ILLMConnectorCredentials = BasicCredentials | BedrockCredentials | V
|
|
|
45
45
|
export type TOpenAIResponseToolChoice = OpenAI.Responses.ToolChoiceOptions | OpenAI.Responses.ToolChoiceTypes | OpenAI.Responses.ToolChoiceFunction;
|
|
46
46
|
export type TLLMToolChoice = OpenAI.ChatCompletionToolChoiceOption;
|
|
47
47
|
|
|
48
|
+
// Local alias to the upstream OpenAI reasoning effort union type
|
|
49
|
+
export type OpenAIReasoningEffort = NonNullable<OpenAI.Responses.ResponseCreateParams['reasoning']>['effort'];
|
|
50
|
+
|
|
48
51
|
export type TOpenAIToolsInfo = {
|
|
49
52
|
webSearch: {
|
|
50
53
|
enabled: boolean;
|
|
@@ -149,7 +152,14 @@ export type TLLMParams = {
|
|
|
149
152
|
// #endregion
|
|
150
153
|
|
|
151
154
|
useReasoning?: boolean;
|
|
155
|
+
/**
|
|
156
|
+
* Controls the level of effort the model will put into reasoning
|
|
157
|
+
* For GPT-OSS models (20B, 120B): "low" | "medium" | "high"
|
|
158
|
+
* For Qwen 3 32B: "none" | "default"
|
|
159
|
+
*/
|
|
160
|
+
reasoningEffort?: 'none' | 'default' | OpenAIReasoningEffort;
|
|
152
161
|
max_output_tokens?: number;
|
|
162
|
+
verbosity?: OpenAI.Responses.ResponseCreateParams['text']['verbosity'];
|
|
153
163
|
abortSignal?: AbortSignal;
|
|
154
164
|
};
|
|
155
165
|
|