@smythos/sre 1.5.41 → 1.5.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +9 -10
- package/dist/index.js.map +1 -1
- package/dist/types/Components/AgentPlugin.class.d.ts +1 -1
- package/dist/types/Components/GPTPlugin.class.d.ts +1 -1
- package/dist/types/Components/GenAILLM.class.d.ts +16 -5
- package/dist/types/Components/MCPClient.class.d.ts +1 -1
- package/dist/types/Components/OpenAPI.class.d.ts +1 -1
- package/dist/types/helpers/Conversation.helper.d.ts +2 -2
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/Groq.class.d.ts +2 -0
- package/dist/types/types/LLM.types.d.ts +6 -0
- package/package.json +1 -1
- package/src/Components/GenAILLM.class.ts +23 -6
- package/src/helpers/Conversation.helper.ts +10 -7
- package/src/subsystems/LLMManager/LLM.service/LLMCredentials.helper.ts +3 -1
- package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +99 -92
- package/src/subsystems/LLMManager/ModelsProvider.service/ModelsProviderConnector.ts +9 -0
- package/src/subsystems/MemoryManager/Cache.service/connectors/RedisCache.class.ts +0 -18
- package/src/types/LLM.types.ts +6 -0
|
@@ -27,11 +27,6 @@ export declare class GenAILLM extends Component {
|
|
|
27
27
|
min: number;
|
|
28
28
|
label: string;
|
|
29
29
|
};
|
|
30
|
-
maxThinkingTokens: {
|
|
31
|
-
type: string;
|
|
32
|
-
min: number;
|
|
33
|
-
label: string;
|
|
34
|
-
};
|
|
35
30
|
stopSequences: {
|
|
36
31
|
type: string;
|
|
37
32
|
max: number;
|
|
@@ -221,6 +216,22 @@ export declare class GenAILLM extends Component {
|
|
|
221
216
|
label: string;
|
|
222
217
|
allowEmpty: boolean;
|
|
223
218
|
};
|
|
219
|
+
useReasoning: {
|
|
220
|
+
type: string;
|
|
221
|
+
description: string;
|
|
222
|
+
label: string;
|
|
223
|
+
};
|
|
224
|
+
reasoningEffort: {
|
|
225
|
+
type: string;
|
|
226
|
+
valid: string[];
|
|
227
|
+
description: string;
|
|
228
|
+
label: string;
|
|
229
|
+
};
|
|
230
|
+
maxThinkingTokens: {
|
|
231
|
+
type: string;
|
|
232
|
+
min: number;
|
|
233
|
+
label: string;
|
|
234
|
+
};
|
|
224
235
|
};
|
|
225
236
|
inputs: {
|
|
226
237
|
Input: {
|
|
@@ -46,8 +46,8 @@ export declare class Conversation extends EventEmitter {
|
|
|
46
46
|
agentVersion?: string;
|
|
47
47
|
});
|
|
48
48
|
get ready(): any;
|
|
49
|
-
prompt(message?: string | any, toolHeaders?: {}, concurrentToolCalls?: number, abortSignal?: AbortSignal): Promise<
|
|
50
|
-
streamPrompt(message?: string | any, toolHeaders?: {}, concurrentToolCalls?: number, abortSignal?: AbortSignal): Promise<
|
|
49
|
+
prompt(message?: string | any, toolHeaders?: {}, concurrentToolCalls?: number, abortSignal?: AbortSignal): Promise<string>;
|
|
50
|
+
streamPrompt(message?: string | any, toolHeaders?: {}, concurrentToolCalls?: number, abortSignal?: AbortSignal): Promise<string>;
|
|
51
51
|
private resolveToolEndpoint;
|
|
52
52
|
private useTool;
|
|
53
53
|
addTool(tool: {
|
|
@@ -5,6 +5,7 @@ import { LLMConnector } from '../LLMConnector';
|
|
|
5
5
|
type ChatCompletionCreateParams = {
|
|
6
6
|
model: string;
|
|
7
7
|
messages: any;
|
|
8
|
+
max_completion_tokens?: number;
|
|
8
9
|
max_tokens?: number;
|
|
9
10
|
temperature?: number;
|
|
10
11
|
stop?: string[];
|
|
@@ -12,6 +13,7 @@ type ChatCompletionCreateParams = {
|
|
|
12
13
|
tools?: any;
|
|
13
14
|
tool_choice?: string;
|
|
14
15
|
stream?: boolean;
|
|
16
|
+
reasoning_effort?: 'none' | 'default' | 'low' | 'medium' | 'high';
|
|
15
17
|
};
|
|
16
18
|
export declare class GroqConnector extends LLMConnector {
|
|
17
19
|
name: string;
|
|
@@ -124,6 +124,12 @@ export type TLLMParams = {
|
|
|
124
124
|
fromDate?: string;
|
|
125
125
|
toDate?: string;
|
|
126
126
|
useReasoning?: boolean;
|
|
127
|
+
/**
|
|
128
|
+
* Controls the level of effort the model will put into reasoning
|
|
129
|
+
* For GPT-OSS models (20B, 120B): "low" | "medium" | "high"
|
|
130
|
+
* For Qwen 3 32B: "none" | "default"
|
|
131
|
+
*/
|
|
132
|
+
reasoningEffort?: 'none' | 'default' | 'low' | 'medium' | 'high';
|
|
127
133
|
max_output_tokens?: number;
|
|
128
134
|
abortSignal?: AbortSignal;
|
|
129
135
|
};
|
package/package.json
CHANGED
|
@@ -36,11 +36,6 @@ export class GenAILLM extends Component {
|
|
|
36
36
|
min: 1,
|
|
37
37
|
label: 'Maximum Tokens',
|
|
38
38
|
},
|
|
39
|
-
maxThinkingTokens: {
|
|
40
|
-
type: 'number',
|
|
41
|
-
min: 1,
|
|
42
|
-
label: 'Maximum Thinking Tokens',
|
|
43
|
-
},
|
|
44
39
|
stopSequences: {
|
|
45
40
|
type: 'string',
|
|
46
41
|
max: 400,
|
|
@@ -236,6 +231,25 @@ export class GenAILLM extends Component {
|
|
|
236
231
|
allowEmpty: true,
|
|
237
232
|
},
|
|
238
233
|
// #endregion
|
|
234
|
+
|
|
235
|
+
// #region Reasoning
|
|
236
|
+
useReasoning: {
|
|
237
|
+
type: 'boolean',
|
|
238
|
+
description: 'If true, the component will use reasoning capabilities for complex problem-solving',
|
|
239
|
+
label: 'Use Reasoning',
|
|
240
|
+
},
|
|
241
|
+
reasoningEffort: {
|
|
242
|
+
type: 'string',
|
|
243
|
+
valid: ['none', 'default', 'low', 'medium', 'high'],
|
|
244
|
+
description: 'Controls the level of effort the model will put into reasoning',
|
|
245
|
+
label: 'Reasoning Effort',
|
|
246
|
+
},
|
|
247
|
+
maxThinkingTokens: {
|
|
248
|
+
type: 'number',
|
|
249
|
+
min: 1,
|
|
250
|
+
label: 'Maximum Thinking Tokens',
|
|
251
|
+
},
|
|
252
|
+
// #endregion
|
|
239
253
|
},
|
|
240
254
|
inputs: {
|
|
241
255
|
Input: {
|
|
@@ -306,8 +320,11 @@ export class GenAILLM extends Component {
|
|
|
306
320
|
.label('To Date'),
|
|
307
321
|
// #endregion
|
|
308
322
|
|
|
323
|
+
// #region Reasoning
|
|
309
324
|
useReasoning: Joi.boolean().optional().label('Use Reasoning'),
|
|
310
|
-
|
|
325
|
+
reasoningEffort: Joi.string().valid('none', 'default', 'low', 'medium', 'high').optional().allow('').label('Reasoning Effort'),
|
|
326
|
+
maxThinkingTokens: Joi.number().min(1).optional().label('Maximum Thinking Tokens'),
|
|
327
|
+
// #endregion
|
|
311
328
|
});
|
|
312
329
|
constructor() {
|
|
313
330
|
super();
|
|
@@ -348,11 +348,13 @@ export class Conversation extends EventEmitter {
|
|
|
348
348
|
// console.log('Passthrough skiped content ', content);
|
|
349
349
|
// return;
|
|
350
350
|
// }
|
|
351
|
-
const lastMessage = this._context?.messages?.[this._context?.messages?.length - 1];
|
|
352
|
-
//const skip = lastMessage?.
|
|
351
|
+
//const lastMessage = this._context?.messages?.[this._context?.messages?.length - 1];
|
|
352
|
+
//const skip = lastMessage?.__smyth_data__?.internal;
|
|
353
353
|
|
|
354
354
|
//skip if the content is the last generated message after a passthrough content
|
|
355
|
-
//if (skip)
|
|
355
|
+
// if (skip) {
|
|
356
|
+
// let s = true;
|
|
357
|
+
// }
|
|
356
358
|
_content += content;
|
|
357
359
|
this.emit(TLLMEvent.Content, content);
|
|
358
360
|
});
|
|
@@ -494,8 +496,9 @@ export class Conversation extends EventEmitter {
|
|
|
494
496
|
//delete toolHeaders['x-passthrough'];
|
|
495
497
|
} else {
|
|
496
498
|
//this._context.addAssistantMessage(passThroughContent, message_id);
|
|
497
|
-
|
|
498
|
-
|
|
499
|
+
|
|
500
|
+
//llmMessage.content += '\n' + passThroughContent;
|
|
501
|
+
this._context.addToolMessage(llmMessage, processedToolsData, message_id, { passThrough: true });
|
|
499
502
|
|
|
500
503
|
//this._context.addAssistantMessage(passThroughContent, message_id, { passthrough: true });
|
|
501
504
|
//this should not be stored in the persistent conversation store
|
|
@@ -545,7 +548,7 @@ export class Conversation extends EventEmitter {
|
|
|
545
548
|
return '';
|
|
546
549
|
});
|
|
547
550
|
_content += toolsContent;
|
|
548
|
-
let content = JSONContent(_content).tryParse();
|
|
551
|
+
//let content = JSONContent(_content).tryParse();
|
|
549
552
|
|
|
550
553
|
// let streamPromise = new Promise((resolve, reject) => {
|
|
551
554
|
// eventEmitter.on('end', async () => {
|
|
@@ -573,7 +576,7 @@ export class Conversation extends EventEmitter {
|
|
|
573
576
|
//console.log('tool content', content);
|
|
574
577
|
}
|
|
575
578
|
|
|
576
|
-
return
|
|
579
|
+
return _content;
|
|
577
580
|
}
|
|
578
581
|
|
|
579
582
|
private resolveToolEndpoint(baseUrl: string, method: string, endpoint: string, params: Record<string, any>): string {
|
|
@@ -5,7 +5,9 @@ import { TBedrockSettings, TCustomLLMModel, TLLMCredentials, TLLMModel, TVertexA
|
|
|
5
5
|
export async function getLLMCredentials(candidate: AccessCandidate, modelInfo: TLLMModel | TCustomLLMModel) {
|
|
6
6
|
//create a credentials list that we can iterate over
|
|
7
7
|
//if the credentials are not provided, we will use None as a default in order to return empty credentials
|
|
8
|
-
const credentialsList: any[] = !Array.isArray(modelInfo.credentials)
|
|
8
|
+
const credentialsList: any[] = !Array.isArray(modelInfo.credentials)
|
|
9
|
+
? [modelInfo.credentials || TLLMCredentials.Internal]
|
|
10
|
+
: modelInfo.credentials || [TLLMCredentials.Internal];
|
|
9
11
|
|
|
10
12
|
for (let credentialsMode of credentialsList) {
|
|
11
13
|
if (typeof credentialsMode === 'object') {
|
|
@@ -23,6 +23,7 @@ import { SystemEvents } from '@sre/Core/SystemEvents';
|
|
|
23
23
|
type ChatCompletionCreateParams = {
|
|
24
24
|
model: string;
|
|
25
25
|
messages: any;
|
|
26
|
+
max_completion_tokens?: number;
|
|
26
27
|
max_tokens?: number;
|
|
27
28
|
temperature?: number;
|
|
28
29
|
stop?: string[];
|
|
@@ -30,6 +31,7 @@ type ChatCompletionCreateParams = {
|
|
|
30
31
|
tools?: any;
|
|
31
32
|
tool_choice?: string;
|
|
32
33
|
stream?: boolean;
|
|
34
|
+
reasoning_effort?: 'none' | 'default' | 'low' | 'medium' | 'high';
|
|
33
35
|
};
|
|
34
36
|
|
|
35
37
|
export class GroqConnector extends LLMConnector {
|
|
@@ -44,113 +46,105 @@ export class GroqConnector extends LLMConnector {
|
|
|
44
46
|
}
|
|
45
47
|
|
|
46
48
|
protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
let toolsData: ToolData[] = [];
|
|
62
|
-
let useTool = false;
|
|
63
|
-
|
|
64
|
-
if (toolCalls) {
|
|
65
|
-
toolsData = toolCalls.map((tool, index) => ({
|
|
66
|
-
index,
|
|
67
|
-
id: tool.id,
|
|
68
|
-
type: tool.type,
|
|
69
|
-
name: tool.function.name,
|
|
70
|
-
arguments: tool.function.arguments,
|
|
71
|
-
role: TLLMMessageRole.Assistant,
|
|
72
|
-
}));
|
|
73
|
-
useTool = true;
|
|
74
|
-
}
|
|
49
|
+
const groq = await this.getClient(context);
|
|
50
|
+
const result = await groq.chat.completions.create(body);
|
|
51
|
+
const message = result?.choices?.[0]?.message;
|
|
52
|
+
const finishReason = result?.choices?.[0]?.finish_reason;
|
|
53
|
+
const toolCalls = message?.tool_calls;
|
|
54
|
+
const usage = result.usage;
|
|
55
|
+
this.reportUsage(usage, {
|
|
56
|
+
modelEntryName: context.modelEntryName,
|
|
57
|
+
keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
|
|
58
|
+
agentId: context.agentId,
|
|
59
|
+
teamId: context.teamId,
|
|
60
|
+
});
|
|
75
61
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
62
|
+
let toolsData: ToolData[] = [];
|
|
63
|
+
let useTool = false;
|
|
64
|
+
|
|
65
|
+
if (toolCalls) {
|
|
66
|
+
toolsData = toolCalls.map((tool, index) => ({
|
|
67
|
+
index,
|
|
68
|
+
id: tool.id,
|
|
69
|
+
type: tool.type,
|
|
70
|
+
name: tool.function.name,
|
|
71
|
+
arguments: tool.function.arguments,
|
|
72
|
+
role: TLLMMessageRole.Assistant,
|
|
73
|
+
}));
|
|
74
|
+
useTool = true;
|
|
86
75
|
}
|
|
76
|
+
|
|
77
|
+
return {
|
|
78
|
+
content: message?.content ?? '',
|
|
79
|
+
finishReason,
|
|
80
|
+
useTool,
|
|
81
|
+
toolsData,
|
|
82
|
+
message,
|
|
83
|
+
usage,
|
|
84
|
+
};
|
|
87
85
|
}
|
|
88
86
|
|
|
89
87
|
protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
|
|
90
88
|
const emitter = new EventEmitter();
|
|
91
89
|
const usage_data = [];
|
|
92
90
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
(
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
usage_data.push(usage);
|
|
106
|
-
}
|
|
107
|
-
emitter.emit('data', delta);
|
|
108
|
-
|
|
109
|
-
if (delta?.content) {
|
|
110
|
-
emitter.emit('content', delta.content);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
if (delta?.tool_calls) {
|
|
114
|
-
delta.tool_calls.forEach((toolCall, index) => {
|
|
115
|
-
if (!toolsData[index]) {
|
|
116
|
-
toolsData[index] = {
|
|
117
|
-
index,
|
|
118
|
-
id: toolCall.id,
|
|
119
|
-
type: toolCall.type,
|
|
120
|
-
name: toolCall.function?.name,
|
|
121
|
-
arguments: toolCall.function?.arguments,
|
|
122
|
-
role: 'assistant',
|
|
123
|
-
};
|
|
124
|
-
} else {
|
|
125
|
-
toolsData[index].arguments += toolCall.function?.arguments || '';
|
|
126
|
-
}
|
|
127
|
-
});
|
|
128
|
-
}
|
|
91
|
+
const groq = await this.getClient(context);
|
|
92
|
+
const stream = await groq.chat.completions.create({ ...body, stream: true, stream_options: { include_usage: true } });
|
|
93
|
+
|
|
94
|
+
let toolsData: ToolData[] = [];
|
|
95
|
+
|
|
96
|
+
(async () => {
|
|
97
|
+
for await (const chunk of stream as any) {
|
|
98
|
+
const delta = chunk.choices[0]?.delta;
|
|
99
|
+
const usage = chunk['x_groq']?.usage || chunk['usage'];
|
|
100
|
+
|
|
101
|
+
if (usage) {
|
|
102
|
+
usage_data.push(usage);
|
|
129
103
|
}
|
|
104
|
+
emitter.emit('data', delta);
|
|
130
105
|
|
|
131
|
-
if (
|
|
132
|
-
emitter.emit(
|
|
106
|
+
if (delta?.content) {
|
|
107
|
+
emitter.emit('content', delta.content);
|
|
133
108
|
}
|
|
134
109
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
110
|
+
if (delta?.tool_calls) {
|
|
111
|
+
delta.tool_calls.forEach((toolCall, index) => {
|
|
112
|
+
if (!toolsData[index]) {
|
|
113
|
+
toolsData[index] = {
|
|
114
|
+
index,
|
|
115
|
+
id: toolCall.id,
|
|
116
|
+
type: toolCall.type,
|
|
117
|
+
name: toolCall.function?.name,
|
|
118
|
+
arguments: toolCall.function?.arguments,
|
|
119
|
+
role: 'assistant',
|
|
120
|
+
};
|
|
121
|
+
} else {
|
|
122
|
+
toolsData[index].arguments += toolCall.function?.arguments || '';
|
|
123
|
+
}
|
|
142
124
|
});
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
if (toolsData.length > 0) {
|
|
129
|
+
emitter.emit(TLLMEvent.ToolInfo, toolsData);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
usage_data.forEach((usage) => {
|
|
133
|
+
// probably we can acc them and send them as one event
|
|
134
|
+
this.reportUsage(usage, {
|
|
135
|
+
modelEntryName: context.modelEntryName,
|
|
136
|
+
keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
|
|
137
|
+
agentId: context.agentId,
|
|
138
|
+
teamId: context.teamId,
|
|
143
139
|
});
|
|
140
|
+
});
|
|
144
141
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
142
|
+
setTimeout(() => {
|
|
143
|
+
emitter.emit('end', toolsData);
|
|
144
|
+
}, 100);
|
|
145
|
+
})();
|
|
149
146
|
|
|
150
|
-
|
|
151
|
-
} catch (error: any) {
|
|
152
|
-
throw error;
|
|
153
|
-
}
|
|
147
|
+
return emitter;
|
|
154
148
|
}
|
|
155
149
|
|
|
156
150
|
protected async reqBodyAdapter(params: TLLMPreparedParams): Promise<ChatCompletionCreateParams> {
|
|
@@ -172,7 +166,15 @@ export class GroqConnector extends LLMConnector {
|
|
|
172
166
|
}
|
|
173
167
|
//#endregion Handle JSON response format
|
|
174
168
|
|
|
175
|
-
|
|
169
|
+
const isReasoningModel = params.useReasoning && params.capabilities?.reasoning;
|
|
170
|
+
|
|
171
|
+
if (params.maxTokens !== undefined) {
|
|
172
|
+
if (isReasoningModel) {
|
|
173
|
+
body.max_completion_tokens = params.maxTokens;
|
|
174
|
+
} else {
|
|
175
|
+
body.max_tokens = params.maxTokens;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
176
178
|
if (params.temperature !== undefined) body.temperature = params.temperature;
|
|
177
179
|
if (params.topP !== undefined) body.top_p = params.topP;
|
|
178
180
|
if (params.stopSequences?.length) body.stop = params.stopSequences;
|
|
@@ -180,6 +182,11 @@ export class GroqConnector extends LLMConnector {
|
|
|
180
182
|
if (params.toolsConfig?.tools) body.tools = params.toolsConfig?.tools;
|
|
181
183
|
if (params.toolsConfig?.tool_choice) body.tool_choice = params.toolsConfig?.tool_choice as any;
|
|
182
184
|
|
|
185
|
+
// Apply user-specified reasoning parameters
|
|
186
|
+
if (isReasoningModel) {
|
|
187
|
+
if (params.reasoningEffort !== undefined) body.reasoning_effort = params.reasoningEffort;
|
|
188
|
+
}
|
|
189
|
+
|
|
183
190
|
return body;
|
|
184
191
|
}
|
|
185
192
|
|
|
@@ -59,6 +59,15 @@ export abstract class ModelsProviderConnector extends SecureConnector {
|
|
|
59
59
|
return null;
|
|
60
60
|
}
|
|
61
61
|
}
|
|
62
|
+
//Workaround : non-blocking auto-refresh of team models
|
|
63
|
+
//this will force team models to refresh for the next request
|
|
64
|
+
//TODO: we need a more elegant cache invalidation mechanism, and only refresh the team models if the custom models have changed
|
|
65
|
+
setImmediate(async () => {
|
|
66
|
+
const _customModels = await this.getCustomModels(candidate);
|
|
67
|
+
teamModels = { ...teamModels, ..._customModels };
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
//immediatelly return the team models
|
|
62
71
|
return teamModels;
|
|
63
72
|
};
|
|
64
73
|
loadTeamModels();
|
|
@@ -26,27 +26,9 @@ export class RedisCache extends CacheConnector {
|
|
|
26
26
|
let host = sentinels.length === 1 ? sentinels[0].host : null;
|
|
27
27
|
let port = sentinels.length === 1 ? sentinels[0].port : null;
|
|
28
28
|
|
|
29
|
-
const redisConfig = {
|
|
30
|
-
// HEAVILY OPTIMIZED: Aggressive storm prevention parameters
|
|
31
|
-
maxRetriesPerRequest: 1, // VERY LIMITED retries (official)
|
|
32
|
-
retryDelayOnFailover: 50, // Fast failover (official)
|
|
33
|
-
connectTimeout: 3000, // SHORT timeout (official)
|
|
34
|
-
lazyConnect: false,
|
|
35
|
-
enableReadyCheck: false, // Skip ready check for speed (official)
|
|
36
|
-
commandTimeout: 2000, // VERY SHORT command timeout (official)
|
|
37
|
-
keepAlive: 10000, // Shorter keepalive - 10sec (official)
|
|
38
|
-
family: 4, // Force IPv4 (official)
|
|
39
|
-
maxLoadingTimeout: 2000, // Short loading timeout (official)
|
|
40
|
-
// Additional aggressive settings
|
|
41
|
-
enableOfflineQueue: false, // Disable offline queue (official)
|
|
42
|
-
db: 0, // Explicit DB (official)
|
|
43
|
-
stringNumbers: false, // No string conversion (official)
|
|
44
|
-
};
|
|
45
|
-
|
|
46
29
|
this.redis = new IORedis({
|
|
47
30
|
...(host ? { host, port } : { sentinels, name: _settings.name || process.env.REDIS_MASTER_NAME }),
|
|
48
31
|
password: _settings.password || process.env.REDIS_PASSWORD,
|
|
49
|
-
...redisConfig,
|
|
50
32
|
});
|
|
51
33
|
|
|
52
34
|
this.redis.on('error', (error) => {
|
package/src/types/LLM.types.ts
CHANGED
|
@@ -149,6 +149,12 @@ export type TLLMParams = {
|
|
|
149
149
|
// #endregion
|
|
150
150
|
|
|
151
151
|
useReasoning?: boolean;
|
|
152
|
+
/**
|
|
153
|
+
* Controls the level of effort the model will put into reasoning
|
|
154
|
+
* For GPT-OSS models (20B, 120B): "low" | "medium" | "high"
|
|
155
|
+
* For Qwen 3 32B: "none" | "default"
|
|
156
|
+
*/
|
|
157
|
+
reasoningEffort?: 'none' | 'default' | 'low' | 'medium' | 'high';
|
|
152
158
|
max_output_tokens?: number;
|
|
153
159
|
abortSignal?: AbortSignal;
|
|
154
160
|
};
|