genai-lite 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +150 -5
- package/dist/config/presets.json +121 -17
- package/dist/llm/LLMService.d.ts +39 -2
- package/dist/llm/LLMService.js +291 -78
- package/dist/llm/LLMService.prepareMessage.test.d.ts +1 -0
- package/dist/llm/LLMService.prepareMessage.test.js +303 -0
- package/dist/llm/LLMService.sendMessage.preset.test.d.ts +1 -0
- package/dist/llm/LLMService.sendMessage.preset.test.js +153 -0
- package/dist/llm/LLMService.test.js +83 -0
- package/dist/llm/clients/AnthropicClientAdapter.js +64 -10
- package/dist/llm/clients/AnthropicClientAdapter.test.js +7 -1
- package/dist/llm/clients/GeminiClientAdapter.js +70 -11
- package/dist/llm/clients/GeminiClientAdapter.test.js +121 -1
- package/dist/llm/clients/MockClientAdapter.test.js +7 -1
- package/dist/llm/clients/OpenAIClientAdapter.js +26 -10
- package/dist/llm/clients/OpenAIClientAdapter.test.js +7 -1
- package/dist/llm/config.js +112 -2
- package/dist/llm/config.test.js +17 -0
- package/dist/llm/types.d.ts +106 -0
- package/package.json +3 -2
- package/src/config/presets.json +122 -17
|
@@ -133,6 +133,42 @@ class GeminiClientAdapter {
|
|
|
133
133
|
stopSequences: request.settings.stopSequences,
|
|
134
134
|
}),
|
|
135
135
|
};
|
|
136
|
+
// Handle reasoning/thinking configuration
|
|
137
|
+
if (request.settings.reasoning && !request.settings.reasoning.exclude) {
|
|
138
|
+
const reasoning = request.settings.reasoning;
|
|
139
|
+
let thinkingBudget;
|
|
140
|
+
// Convert reasoning settings to Gemini's thinkingConfig
|
|
141
|
+
if (reasoning.maxTokens !== undefined) {
|
|
142
|
+
thinkingBudget = reasoning.maxTokens;
|
|
143
|
+
}
|
|
144
|
+
else if (reasoning.effort) {
|
|
145
|
+
// Convert effort levels to token budgets
|
|
146
|
+
// Get model info to determine max budget
|
|
147
|
+
const modelId = request.modelId;
|
|
148
|
+
const maxBudget = modelId.includes('flash') ? 24576 : 65536; // Default max budgets
|
|
149
|
+
switch (reasoning.effort) {
|
|
150
|
+
case 'high':
|
|
151
|
+
thinkingBudget = Math.floor(maxBudget * 0.8);
|
|
152
|
+
break;
|
|
153
|
+
case 'medium':
|
|
154
|
+
thinkingBudget = Math.floor(maxBudget * 0.5);
|
|
155
|
+
break;
|
|
156
|
+
case 'low':
|
|
157
|
+
thinkingBudget = Math.floor(maxBudget * 0.2);
|
|
158
|
+
break;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
else if (reasoning.enabled !== false) {
|
|
162
|
+
// Use model default or dynamic budget (-1)
|
|
163
|
+
thinkingBudget = -1; // Let model decide
|
|
164
|
+
}
|
|
165
|
+
if (thinkingBudget !== undefined) {
|
|
166
|
+
generationConfig.thinkingConfig = {
|
|
167
|
+
thinkingBudget: thinkingBudget,
|
|
168
|
+
includeThoughts: true // Request thought summaries in response
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
}
|
|
136
172
|
// Map safety settings from Athanor format to Gemini SDK format
|
|
137
173
|
const safetySettings = request.settings.geminiSafetySettings?.map((setting) => ({
|
|
138
174
|
category: setting.category,
|
|
@@ -155,25 +191,48 @@ class GeminiClientAdapter {
|
|
|
155
191
|
createSuccessResponse(response, request) {
|
|
156
192
|
// Extract content from the response object
|
|
157
193
|
const candidate = response.candidates?.[0];
|
|
158
|
-
|
|
194
|
+
let content = "";
|
|
195
|
+
let reasoning;
|
|
196
|
+
// Process all parts to extract content and thought summaries
|
|
197
|
+
if (candidate?.content?.parts) {
|
|
198
|
+
const thoughtParts = [];
|
|
199
|
+
const contentParts = [];
|
|
200
|
+
for (const part of candidate.content.parts) {
|
|
201
|
+
if (part.thought) {
|
|
202
|
+
// This is a thought summary
|
|
203
|
+
thoughtParts.push(part.text || "");
|
|
204
|
+
}
|
|
205
|
+
else if (part.text) {
|
|
206
|
+
// Regular content
|
|
207
|
+
contentParts.push(part.text);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
content = contentParts.join("");
|
|
211
|
+
if (thoughtParts.length > 0) {
|
|
212
|
+
reasoning = thoughtParts.join("\n\n");
|
|
213
|
+
}
|
|
214
|
+
}
|
|
159
215
|
// Extract usage data if available
|
|
160
216
|
const usageMetadata = response.usageMetadata || {};
|
|
161
217
|
const finishReason = this.mapGeminiFinishReason(candidate?.finishReason || null);
|
|
218
|
+
const choice = {
|
|
219
|
+
message: {
|
|
220
|
+
role: "assistant",
|
|
221
|
+
content: content,
|
|
222
|
+
},
|
|
223
|
+
finish_reason: finishReason,
|
|
224
|
+
index: 0,
|
|
225
|
+
};
|
|
226
|
+
// Include reasoning if available and not excluded
|
|
227
|
+
if (reasoning && request.settings.reasoning && !request.settings.reasoning.exclude) {
|
|
228
|
+
choice.reasoning = reasoning;
|
|
229
|
+
}
|
|
162
230
|
return {
|
|
163
231
|
id: this.generateResponseId(),
|
|
164
232
|
provider: request.providerId,
|
|
165
233
|
model: response.modelUsed || request.modelId,
|
|
166
234
|
created: Math.floor(Date.now() / 1000),
|
|
167
|
-
choices: [
|
|
168
|
-
{
|
|
169
|
-
message: {
|
|
170
|
-
role: "assistant",
|
|
171
|
-
content: content,
|
|
172
|
-
},
|
|
173
|
-
finish_reason: finishReason,
|
|
174
|
-
index: 0,
|
|
175
|
-
},
|
|
176
|
-
],
|
|
235
|
+
choices: [choice],
|
|
177
236
|
usage: usageMetadata
|
|
178
237
|
? {
|
|
179
238
|
prompt_tokens: usageMetadata.promptTokenCount || 0,
|
|
@@ -37,7 +37,13 @@ describe('GeminiClientAdapter', () => {
|
|
|
37
37
|
stopSequences: [],
|
|
38
38
|
user: 'test-user',
|
|
39
39
|
geminiSafetySettings: [],
|
|
40
|
-
supportsSystemMessage: true
|
|
40
|
+
supportsSystemMessage: true,
|
|
41
|
+
reasoning: {
|
|
42
|
+
enabled: false,
|
|
43
|
+
effort: undefined,
|
|
44
|
+
maxTokens: undefined,
|
|
45
|
+
exclude: false
|
|
46
|
+
}
|
|
41
47
|
}
|
|
42
48
|
};
|
|
43
49
|
});
|
|
@@ -195,6 +201,120 @@ describe('GeminiClientAdapter', () => {
|
|
|
195
201
|
expect(successResponse.choices[0].finish_reason).toBe(expected);
|
|
196
202
|
}
|
|
197
203
|
});
|
|
204
|
+
describe('reasoning/thinking configuration', () => {
|
|
205
|
+
it('should add thinking config when reasoning is enabled with maxTokens', async () => {
|
|
206
|
+
const requestWithReasoning = {
|
|
207
|
+
...basicRequest,
|
|
208
|
+
settings: {
|
|
209
|
+
...basicRequest.settings,
|
|
210
|
+
reasoning: {
|
|
211
|
+
enabled: true,
|
|
212
|
+
maxTokens: 5000,
|
|
213
|
+
effort: undefined,
|
|
214
|
+
exclude: false
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
};
|
|
218
|
+
mockGenerateContent.mockResolvedValueOnce({
|
|
219
|
+
text: () => 'Response with thinking',
|
|
220
|
+
candidates: [{
|
|
221
|
+
finishReason: 'STOP',
|
|
222
|
+
content: {
|
|
223
|
+
parts: [{ text: 'Response with thinking' }]
|
|
224
|
+
}
|
|
225
|
+
}],
|
|
226
|
+
usageMetadata: {}
|
|
227
|
+
});
|
|
228
|
+
await adapter.sendMessage(requestWithReasoning, 'test-api-key');
|
|
229
|
+
const callArgs = mockGenerateContent.mock.calls[0][0];
|
|
230
|
+
expect(callArgs.config.thinkingConfig).toEqual({
|
|
231
|
+
thinkingBudget: 5000,
|
|
232
|
+
includeThoughts: true
|
|
233
|
+
});
|
|
234
|
+
});
|
|
235
|
+
it('should convert effort levels to thinking budget', async () => {
|
|
236
|
+
const requestWithEffort = {
|
|
237
|
+
...basicRequest,
|
|
238
|
+
settings: {
|
|
239
|
+
...basicRequest.settings,
|
|
240
|
+
reasoning: {
|
|
241
|
+
enabled: true,
|
|
242
|
+
effort: 'high',
|
|
243
|
+
maxTokens: undefined,
|
|
244
|
+
exclude: false
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
};
|
|
248
|
+
mockGenerateContent.mockResolvedValueOnce({
|
|
249
|
+
text: () => 'Response',
|
|
250
|
+
candidates: [{
|
|
251
|
+
finishReason: 'STOP',
|
|
252
|
+
content: {
|
|
253
|
+
parts: [{ text: 'Response' }]
|
|
254
|
+
}
|
|
255
|
+
}],
|
|
256
|
+
usageMetadata: {}
|
|
257
|
+
});
|
|
258
|
+
await adapter.sendMessage(requestWithEffort, 'test-api-key');
|
|
259
|
+
const callArgs = mockGenerateContent.mock.calls[0][0];
|
|
260
|
+
// For gemini-2.5-pro (not flash), max budget is 65536, high effort = 80%
|
|
261
|
+
expect(callArgs.config.thinkingConfig?.thinkingBudget).toBe(Math.floor(65536 * 0.8));
|
|
262
|
+
});
|
|
263
|
+
it('should use dynamic budget (-1) when reasoning enabled without specific settings', async () => {
|
|
264
|
+
const requestWithBasicReasoning = {
|
|
265
|
+
...basicRequest,
|
|
266
|
+
settings: {
|
|
267
|
+
...basicRequest.settings,
|
|
268
|
+
reasoning: {
|
|
269
|
+
enabled: true,
|
|
270
|
+
effort: undefined,
|
|
271
|
+
maxTokens: undefined,
|
|
272
|
+
exclude: false
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
};
|
|
276
|
+
mockGenerateContent.mockResolvedValueOnce({
|
|
277
|
+
text: () => 'Response',
|
|
278
|
+
candidates: [{
|
|
279
|
+
finishReason: 'STOP',
|
|
280
|
+
content: {
|
|
281
|
+
parts: [{ text: 'Response' }]
|
|
282
|
+
}
|
|
283
|
+
}],
|
|
284
|
+
usageMetadata: {}
|
|
285
|
+
});
|
|
286
|
+
await adapter.sendMessage(requestWithBasicReasoning, 'test-api-key');
|
|
287
|
+
const callArgs = mockGenerateContent.mock.calls[0][0];
|
|
288
|
+
expect(callArgs.config.thinkingConfig?.thinkingBudget).toBe(-1);
|
|
289
|
+
});
|
|
290
|
+
it('should exclude thinking config when reasoning.exclude is true', async () => {
|
|
291
|
+
const requestWithExclude = {
|
|
292
|
+
...basicRequest,
|
|
293
|
+
settings: {
|
|
294
|
+
...basicRequest.settings,
|
|
295
|
+
reasoning: {
|
|
296
|
+
enabled: true,
|
|
297
|
+
maxTokens: 5000,
|
|
298
|
+
effort: undefined,
|
|
299
|
+
exclude: true
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
};
|
|
303
|
+
mockGenerateContent.mockResolvedValueOnce({
|
|
304
|
+
text: () => 'Response',
|
|
305
|
+
candidates: [{
|
|
306
|
+
finishReason: 'STOP',
|
|
307
|
+
content: {
|
|
308
|
+
parts: [{ text: 'Response' }]
|
|
309
|
+
}
|
|
310
|
+
}],
|
|
311
|
+
usageMetadata: {}
|
|
312
|
+
});
|
|
313
|
+
await adapter.sendMessage(requestWithExclude, 'test-api-key');
|
|
314
|
+
const callArgs = mockGenerateContent.mock.calls[0][0];
|
|
315
|
+
expect(callArgs.config.thinkingConfig).toBeUndefined();
|
|
316
|
+
});
|
|
317
|
+
});
|
|
198
318
|
describe('error handling', () => {
|
|
199
319
|
it('should handle API key errors', async () => {
|
|
200
320
|
const apiError = new Error('API key not valid');
|
|
@@ -20,7 +20,13 @@ describe('MockClientAdapter', () => {
|
|
|
20
20
|
stopSequences: [],
|
|
21
21
|
user: 'test-user',
|
|
22
22
|
geminiSafetySettings: [],
|
|
23
|
-
supportsSystemMessage: true
|
|
23
|
+
supportsSystemMessage: true,
|
|
24
|
+
reasoning: {
|
|
25
|
+
enabled: false,
|
|
26
|
+
effort: undefined,
|
|
27
|
+
maxTokens: undefined,
|
|
28
|
+
exclude: false
|
|
29
|
+
}
|
|
24
30
|
}
|
|
25
31
|
};
|
|
26
32
|
});
|
|
@@ -64,6 +64,18 @@ class OpenAIClientAdapter {
|
|
|
64
64
|
user: request.settings.user,
|
|
65
65
|
}),
|
|
66
66
|
};
|
|
67
|
+
// Handle reasoning configuration for OpenAI models (o-series)
|
|
68
|
+
if (request.settings.reasoning && !request.settings.reasoning.exclude) {
|
|
69
|
+
const reasoning = request.settings.reasoning;
|
|
70
|
+
// OpenAI uses reasoning_effort for o-series models
|
|
71
|
+
if (reasoning.effort) {
|
|
72
|
+
completionParams.reasoning_effort = reasoning.effort;
|
|
73
|
+
}
|
|
74
|
+
else if (reasoning.enabled !== false) {
|
|
75
|
+
// Default to medium effort if reasoning is enabled
|
|
76
|
+
completionParams.reasoning_effort = 'medium';
|
|
77
|
+
}
|
|
78
|
+
}
|
|
67
79
|
console.log(`OpenAI API parameters:`, {
|
|
68
80
|
model: completionParams.model,
|
|
69
81
|
temperature: completionParams.temperature,
|
|
@@ -163,21 +175,25 @@ class OpenAIClientAdapter {
|
|
|
163
175
|
if (!choice || !choice.message) {
|
|
164
176
|
throw new Error("Invalid completion structure from OpenAI API");
|
|
165
177
|
}
|
|
178
|
+
const responseChoice = {
|
|
179
|
+
message: {
|
|
180
|
+
role: choice.message.role,
|
|
181
|
+
content: choice.message.content || "",
|
|
182
|
+
},
|
|
183
|
+
finish_reason: choice.finish_reason,
|
|
184
|
+
index: choice.index,
|
|
185
|
+
};
|
|
186
|
+
// Check for reasoning content if OpenAI starts returning it
|
|
187
|
+
// (Currently o-series models don't return reasoning tokens)
|
|
188
|
+
if (choice.reasoning && request.settings.reasoning && !request.settings.reasoning.exclude) {
|
|
189
|
+
responseChoice.reasoning = choice.reasoning;
|
|
190
|
+
}
|
|
166
191
|
return {
|
|
167
192
|
id: completion.id,
|
|
168
193
|
provider: request.providerId,
|
|
169
194
|
model: completion.model || request.modelId,
|
|
170
195
|
created: completion.created,
|
|
171
|
-
choices: [
|
|
172
|
-
{
|
|
173
|
-
message: {
|
|
174
|
-
role: choice.message.role,
|
|
175
|
-
content: choice.message.content || "",
|
|
176
|
-
},
|
|
177
|
-
finish_reason: choice.finish_reason,
|
|
178
|
-
index: choice.index,
|
|
179
|
-
},
|
|
180
|
-
],
|
|
196
|
+
choices: [responseChoice],
|
|
181
197
|
usage: completion.usage
|
|
182
198
|
? {
|
|
183
199
|
prompt_tokens: completion.usage.prompt_tokens,
|
|
@@ -38,7 +38,13 @@ describe('OpenAIClientAdapter', () => {
|
|
|
38
38
|
stopSequences: [],
|
|
39
39
|
user: 'test-user',
|
|
40
40
|
geminiSafetySettings: [],
|
|
41
|
-
supportsSystemMessage: true
|
|
41
|
+
supportsSystemMessage: true,
|
|
42
|
+
reasoning: {
|
|
43
|
+
enabled: false,
|
|
44
|
+
effort: undefined,
|
|
45
|
+
maxTokens: undefined,
|
|
46
|
+
exclude: false
|
|
47
|
+
}
|
|
42
48
|
}
|
|
43
49
|
};
|
|
44
50
|
});
|
package/dist/llm/config.js
CHANGED
|
@@ -57,6 +57,12 @@ exports.DEFAULT_LLM_SETTINGS = {
|
|
|
57
57
|
{ category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_NONE" },
|
|
58
58
|
{ category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" },
|
|
59
59
|
],
|
|
60
|
+
reasoning: {
|
|
61
|
+
enabled: false,
|
|
62
|
+
effort: undefined,
|
|
63
|
+
maxTokens: undefined,
|
|
64
|
+
exclude: false,
|
|
65
|
+
},
|
|
60
66
|
};
|
|
61
67
|
/**
|
|
62
68
|
* Per-provider default setting overrides
|
|
@@ -126,6 +132,16 @@ exports.SUPPORTED_MODELS = [
|
|
|
126
132
|
supportsPromptCache: true,
|
|
127
133
|
cacheWritesPrice: 3.75,
|
|
128
134
|
cacheReadsPrice: 0.3,
|
|
135
|
+
reasoning: {
|
|
136
|
+
supported: true,
|
|
137
|
+
enabledByDefault: false,
|
|
138
|
+
canDisable: true,
|
|
139
|
+
minBudget: 1024,
|
|
140
|
+
maxBudget: 32000,
|
|
141
|
+
defaultBudget: 10000,
|
|
142
|
+
outputType: 'summary',
|
|
143
|
+
requiresStreamingAbove: 21333,
|
|
144
|
+
},
|
|
129
145
|
},
|
|
130
146
|
{
|
|
131
147
|
id: "claude-opus-4-20250514",
|
|
@@ -140,6 +156,16 @@ exports.SUPPORTED_MODELS = [
|
|
|
140
156
|
supportsPromptCache: true,
|
|
141
157
|
cacheWritesPrice: 18.75,
|
|
142
158
|
cacheReadsPrice: 1.5,
|
|
159
|
+
reasoning: {
|
|
160
|
+
supported: true,
|
|
161
|
+
enabledByDefault: false,
|
|
162
|
+
canDisable: true,
|
|
163
|
+
minBudget: 1024,
|
|
164
|
+
maxBudget: 32000,
|
|
165
|
+
defaultBudget: 10000,
|
|
166
|
+
outputType: 'summary',
|
|
167
|
+
requiresStreamingAbove: 21333,
|
|
168
|
+
},
|
|
143
169
|
},
|
|
144
170
|
{
|
|
145
171
|
id: "claude-3-7-sonnet-20250219",
|
|
@@ -154,6 +180,16 @@ exports.SUPPORTED_MODELS = [
|
|
|
154
180
|
supportsPromptCache: true,
|
|
155
181
|
cacheWritesPrice: 3.75,
|
|
156
182
|
cacheReadsPrice: 0.3,
|
|
183
|
+
reasoning: {
|
|
184
|
+
supported: true,
|
|
185
|
+
enabledByDefault: false,
|
|
186
|
+
canDisable: true,
|
|
187
|
+
minBudget: 1024,
|
|
188
|
+
maxBudget: 32000,
|
|
189
|
+
defaultBudget: 10000,
|
|
190
|
+
outputType: 'full',
|
|
191
|
+
requiresStreamingAbove: 21333,
|
|
192
|
+
},
|
|
157
193
|
},
|
|
158
194
|
{
|
|
159
195
|
id: "claude-3-5-sonnet-20241022",
|
|
@@ -196,6 +232,19 @@ exports.SUPPORTED_MODELS = [
|
|
|
196
232
|
supportsImages: true,
|
|
197
233
|
supportsPromptCache: true,
|
|
198
234
|
cacheReadsPrice: 0.31,
|
|
235
|
+
reasoning: {
|
|
236
|
+
supported: true,
|
|
237
|
+
enabledByDefault: true,
|
|
238
|
+
canDisable: false,
|
|
239
|
+
minBudget: 1024,
|
|
240
|
+
maxBudget: 65536,
|
|
241
|
+
defaultBudget: -1,
|
|
242
|
+
dynamicBudget: {
|
|
243
|
+
value: -1,
|
|
244
|
+
description: "Let model decide based on query complexity",
|
|
245
|
+
},
|
|
246
|
+
outputType: 'summary',
|
|
247
|
+
},
|
|
199
248
|
},
|
|
200
249
|
{
|
|
201
250
|
id: "gemini-2.5-flash",
|
|
@@ -208,9 +257,18 @@ exports.SUPPORTED_MODELS = [
|
|
|
208
257
|
maxTokens: 65536,
|
|
209
258
|
supportsImages: true,
|
|
210
259
|
supportsPromptCache: true,
|
|
211
|
-
|
|
260
|
+
reasoning: {
|
|
261
|
+
supported: true,
|
|
262
|
+
enabledByDefault: true,
|
|
263
|
+
canDisable: true,
|
|
264
|
+
minBudget: 1024,
|
|
212
265
|
maxBudget: 24576,
|
|
213
|
-
|
|
266
|
+
defaultBudget: -1,
|
|
267
|
+
dynamicBudget: {
|
|
268
|
+
value: -1,
|
|
269
|
+
description: "Let model decide based on query complexity",
|
|
270
|
+
},
|
|
271
|
+
outputType: 'summary',
|
|
214
272
|
},
|
|
215
273
|
},
|
|
216
274
|
{
|
|
@@ -224,6 +282,19 @@ exports.SUPPORTED_MODELS = [
|
|
|
224
282
|
maxTokens: 64000,
|
|
225
283
|
supportsImages: true,
|
|
226
284
|
supportsPromptCache: true,
|
|
285
|
+
reasoning: {
|
|
286
|
+
supported: true,
|
|
287
|
+
enabledByDefault: false,
|
|
288
|
+
canDisable: true,
|
|
289
|
+
minBudget: 512,
|
|
290
|
+
maxBudget: 24576,
|
|
291
|
+
defaultBudget: -1,
|
|
292
|
+
dynamicBudget: {
|
|
293
|
+
value: -1,
|
|
294
|
+
description: "Let model decide based on query complexity",
|
|
295
|
+
},
|
|
296
|
+
outputType: 'summary',
|
|
297
|
+
},
|
|
227
298
|
},
|
|
228
299
|
{
|
|
229
300
|
id: "gemini-2.0-flash",
|
|
@@ -265,6 +336,12 @@ exports.SUPPORTED_MODELS = [
|
|
|
265
336
|
supportsPromptCache: true,
|
|
266
337
|
cacheReadsPrice: 0.275,
|
|
267
338
|
unsupportedParameters: ["topP"],
|
|
339
|
+
reasoning: {
|
|
340
|
+
supported: true,
|
|
341
|
+
enabledByDefault: true,
|
|
342
|
+
canDisable: false,
|
|
343
|
+
outputType: 'none',
|
|
344
|
+
},
|
|
268
345
|
},
|
|
269
346
|
{
|
|
270
347
|
id: "gpt-4.1",
|
|
@@ -401,6 +478,16 @@ function getDefaultSettingsForModel(modelId, providerId) {
|
|
|
401
478
|
if (modelInfo && modelInfo.maxTokens !== undefined) {
|
|
402
479
|
mergedSettings.maxTokens = modelInfo.maxTokens;
|
|
403
480
|
}
|
|
481
|
+
// Handle reasoning settings based on model capabilities
|
|
482
|
+
if (modelInfo?.reasoning?.supported) {
|
|
483
|
+
// If the model has reasoning enabled by default, update the settings
|
|
484
|
+
if (modelInfo.reasoning.enabledByDefault) {
|
|
485
|
+
mergedSettings.reasoning = {
|
|
486
|
+
...mergedSettings.reasoning,
|
|
487
|
+
enabled: true,
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
}
|
|
404
491
|
// Filter out undefined values and ensure required fields
|
|
405
492
|
return Object.fromEntries(Object.entries(mergedSettings).filter(([_, value]) => value !== undefined));
|
|
406
493
|
}
|
|
@@ -504,5 +591,28 @@ function validateLLMSettings(settings) {
|
|
|
504
591
|
}
|
|
505
592
|
}
|
|
506
593
|
}
|
|
594
|
+
if (settings.reasoning !== undefined) {
|
|
595
|
+
if (typeof settings.reasoning !== "object" || settings.reasoning === null) {
|
|
596
|
+
errors.push("reasoning must be an object");
|
|
597
|
+
}
|
|
598
|
+
else {
|
|
599
|
+
if (settings.reasoning.enabled !== undefined && typeof settings.reasoning.enabled !== "boolean") {
|
|
600
|
+
errors.push("reasoning.enabled must be a boolean");
|
|
601
|
+
}
|
|
602
|
+
if (settings.reasoning.effort !== undefined) {
|
|
603
|
+
if (!["high", "medium", "low"].includes(settings.reasoning.effort)) {
|
|
604
|
+
errors.push("reasoning.effort must be 'high', 'medium', or 'low'");
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
if (settings.reasoning.maxTokens !== undefined) {
|
|
608
|
+
if (!Number.isInteger(settings.reasoning.maxTokens) || settings.reasoning.maxTokens < 0) {
|
|
609
|
+
errors.push("reasoning.maxTokens must be a non-negative integer");
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
if (settings.reasoning.exclude !== undefined && typeof settings.reasoning.exclude !== "boolean") {
|
|
613
|
+
errors.push("reasoning.exclude must be a boolean");
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
}
|
|
507
617
|
return errors;
|
|
508
618
|
}
|
package/dist/llm/config.test.js
CHANGED
|
@@ -143,6 +143,23 @@ describe('LLM Config', () => {
|
|
|
143
143
|
};
|
|
144
144
|
expect((0, config_1.validateLLMSettings)(validGeminiSettings)).toEqual([]);
|
|
145
145
|
});
|
|
146
|
+
it('should validate reasoning settings', () => {
|
|
147
|
+
// Invalid reasoning object
|
|
148
|
+
expect((0, config_1.validateLLMSettings)({ reasoning: 'invalid' })).toContain('reasoning must be an object');
|
|
149
|
+
// Invalid enabled value
|
|
150
|
+
expect((0, config_1.validateLLMSettings)({ reasoning: { enabled: 'yes' } })).toContain('reasoning.enabled must be a boolean');
|
|
151
|
+
// Invalid effort value
|
|
152
|
+
expect((0, config_1.validateLLMSettings)({ reasoning: { effort: 'maximum' } })).toContain("reasoning.effort must be 'high', 'medium', or 'low'");
|
|
153
|
+
expect((0, config_1.validateLLMSettings)({ reasoning: { effort: 'high' } })).toEqual([]);
|
|
154
|
+
// Invalid maxTokens value
|
|
155
|
+
expect((0, config_1.validateLLMSettings)({ reasoning: { maxTokens: -100 } })).toContain('reasoning.maxTokens must be a non-negative integer');
|
|
156
|
+
expect((0, config_1.validateLLMSettings)({ reasoning: { maxTokens: 1.5 } })).toContain('reasoning.maxTokens must be a non-negative integer');
|
|
157
|
+
expect((0, config_1.validateLLMSettings)({ reasoning: { maxTokens: 5000 } })).toEqual([]);
|
|
158
|
+
// Invalid exclude value
|
|
159
|
+
expect((0, config_1.validateLLMSettings)({ reasoning: { exclude: 'yes' } })).toContain('reasoning.exclude must be a boolean');
|
|
160
|
+
// Valid reasoning settings
|
|
161
|
+
expect((0, config_1.validateLLMSettings)({ reasoning: { enabled: true, effort: 'medium', maxTokens: 10000, exclude: false } })).toEqual([]);
|
|
162
|
+
});
|
|
146
163
|
it('should return multiple errors for multiple invalid fields', () => {
|
|
147
164
|
const invalidSettings = {
|
|
148
165
|
temperature: -1,
|
package/dist/llm/types.d.ts
CHANGED
|
@@ -29,6 +29,19 @@ export interface GeminiSafetySetting {
|
|
|
29
29
|
category: GeminiHarmCategory;
|
|
30
30
|
threshold: GeminiHarmBlockThreshold;
|
|
31
31
|
}
|
|
32
|
+
/**
|
|
33
|
+
* Reasoning/thinking configuration for LLM requests
|
|
34
|
+
*/
|
|
35
|
+
export interface LLMReasoningSettings {
|
|
36
|
+
/** Enable reasoning/thinking mode */
|
|
37
|
+
enabled?: boolean;
|
|
38
|
+
/** Effort-based control (OpenAI style) */
|
|
39
|
+
effort?: 'high' | 'medium' | 'low';
|
|
40
|
+
/** Token-based control (Anthropic/Gemini style) */
|
|
41
|
+
maxTokens?: number;
|
|
42
|
+
/** Exclude reasoning from response (keep internal only) */
|
|
43
|
+
exclude?: boolean;
|
|
44
|
+
}
|
|
32
45
|
/**
|
|
33
46
|
* Configurable settings for LLM requests
|
|
34
47
|
*/
|
|
@@ -51,6 +64,8 @@ export interface LLMSettings {
|
|
|
51
64
|
supportsSystemMessage?: boolean;
|
|
52
65
|
/** Gemini-specific safety settings for content filtering */
|
|
53
66
|
geminiSafetySettings?: GeminiSafetySetting[];
|
|
67
|
+
/** Universal reasoning/thinking configuration */
|
|
68
|
+
reasoning?: LLMReasoningSettings;
|
|
54
69
|
}
|
|
55
70
|
/**
|
|
56
71
|
* Request structure for chat completion
|
|
@@ -62,6 +77,17 @@ export interface LLMChatRequest {
|
|
|
62
77
|
systemMessage?: string;
|
|
63
78
|
settings?: LLMSettings;
|
|
64
79
|
}
|
|
80
|
+
/**
|
|
81
|
+
* Extended request structure that supports preset IDs
|
|
82
|
+
*/
|
|
83
|
+
export interface LLMChatRequestWithPreset extends Omit<LLMChatRequest, 'providerId' | 'modelId'> {
|
|
84
|
+
/** Provider ID (required if not using presetId) */
|
|
85
|
+
providerId?: ApiProviderId;
|
|
86
|
+
/** Model ID (required if not using presetId) */
|
|
87
|
+
modelId?: string;
|
|
88
|
+
/** Preset ID (alternative to providerId/modelId) */
|
|
89
|
+
presetId?: string;
|
|
90
|
+
}
|
|
65
91
|
/**
|
|
66
92
|
* Individual choice in an LLM response
|
|
67
93
|
*/
|
|
@@ -69,6 +95,10 @@ export interface LLMChoice {
|
|
|
69
95
|
message: LLMMessage;
|
|
70
96
|
finish_reason: string | null;
|
|
71
97
|
index?: number;
|
|
98
|
+
/** Reasoning/thinking content (if available and not excluded) */
|
|
99
|
+
reasoning?: string;
|
|
100
|
+
/** Provider-specific reasoning details that need to be preserved */
|
|
101
|
+
reasoning_details?: any;
|
|
72
102
|
}
|
|
73
103
|
/**
|
|
74
104
|
* Token usage information from LLM APIs
|
|
@@ -117,6 +147,34 @@ export interface ProviderInfo {
|
|
|
117
147
|
name: string;
|
|
118
148
|
unsupportedParameters?: (keyof LLMSettings)[];
|
|
119
149
|
}
|
|
150
|
+
/**
|
|
151
|
+
* Reasoning/thinking capabilities for a model
|
|
152
|
+
*/
|
|
153
|
+
export interface ModelReasoningCapabilities {
|
|
154
|
+
/** Does this model support reasoning/thinking? */
|
|
155
|
+
supported: boolean;
|
|
156
|
+
/** Is reasoning enabled by default? */
|
|
157
|
+
enabledByDefault?: boolean;
|
|
158
|
+
/** Can reasoning be disabled? (e.g., Gemini Pro can't) */
|
|
159
|
+
canDisable?: boolean;
|
|
160
|
+
/** Minimum token budget for reasoning */
|
|
161
|
+
minBudget?: number;
|
|
162
|
+
/** Maximum token budget for reasoning */
|
|
163
|
+
maxBudget?: number;
|
|
164
|
+
/** Default token budget if not specified */
|
|
165
|
+
defaultBudget?: number;
|
|
166
|
+
/** Special budget values (e.g., -1 for Gemini's dynamic) */
|
|
167
|
+
dynamicBudget?: {
|
|
168
|
+
value: number;
|
|
169
|
+
description: string;
|
|
170
|
+
};
|
|
171
|
+
/** Price per 1M reasoning tokens (optional - if not set, uses regular outputPrice) */
|
|
172
|
+
outputPrice?: number;
|
|
173
|
+
/** What type of reasoning output is returned */
|
|
174
|
+
outputType?: 'full' | 'summary' | 'none';
|
|
175
|
+
/** Token count above which streaming is required */
|
|
176
|
+
requiresStreamingAbove?: number;
|
|
177
|
+
}
|
|
120
178
|
/**
|
|
121
179
|
* Information about a supported LLM model
|
|
122
180
|
*/
|
|
@@ -132,10 +190,13 @@ export interface ModelInfo {
|
|
|
132
190
|
maxTokens?: number;
|
|
133
191
|
supportsImages?: boolean;
|
|
134
192
|
supportsPromptCache: boolean;
|
|
193
|
+
/** @deprecated Use reasoning instead */
|
|
135
194
|
thinkingConfig?: {
|
|
136
195
|
maxBudget?: number;
|
|
137
196
|
outputPrice?: number;
|
|
138
197
|
};
|
|
198
|
+
/** Reasoning/thinking capabilities */
|
|
199
|
+
reasoning?: ModelReasoningCapabilities;
|
|
139
200
|
cacheWritesPrice?: number;
|
|
140
201
|
cacheReadsPrice?: number;
|
|
141
202
|
unsupportedParameters?: (keyof LLMSettings)[];
|
|
@@ -153,3 +214,48 @@ export declare const LLM_IPC_CHANNELS: {
|
|
|
153
214
|
* Type for LLM IPC channel names
|
|
154
215
|
*/
|
|
155
216
|
export type LLMIPCChannelName = (typeof LLM_IPC_CHANNELS)[keyof typeof LLM_IPC_CHANNELS];
|
|
217
|
+
/**
|
|
218
|
+
* Options for preparing messages with model context
|
|
219
|
+
*/
|
|
220
|
+
export interface PrepareMessageOptions {
|
|
221
|
+
/** Template string to render with variables and model context */
|
|
222
|
+
template?: string;
|
|
223
|
+
/** Variables to inject into the template */
|
|
224
|
+
variables?: Record<string, any>;
|
|
225
|
+
/** Pre-built messages (alternative to template) */
|
|
226
|
+
messages?: LLMMessage[];
|
|
227
|
+
/** Model selection - use preset ID */
|
|
228
|
+
presetId?: string;
|
|
229
|
+
/** Model selection - use provider ID (requires modelId) */
|
|
230
|
+
providerId?: ApiProviderId;
|
|
231
|
+
/** Model selection - use model ID (requires providerId) */
|
|
232
|
+
modelId?: string;
|
|
233
|
+
/** Optional settings override */
|
|
234
|
+
settings?: LLMSettings;
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Model context variables injected into templates
|
|
238
|
+
*/
|
|
239
|
+
export interface ModelContext {
|
|
240
|
+
/** Whether reasoning/thinking is enabled for this request */
|
|
241
|
+
thinking_enabled: boolean;
|
|
242
|
+
/** Whether the model supports reasoning/thinking */
|
|
243
|
+
thinking_available: boolean;
|
|
244
|
+
/** The resolved model ID */
|
|
245
|
+
model_id: string;
|
|
246
|
+
/** The resolved provider ID */
|
|
247
|
+
provider_id: string;
|
|
248
|
+
/** Reasoning effort level if specified */
|
|
249
|
+
reasoning_effort?: string;
|
|
250
|
+
/** Reasoning max tokens if specified */
|
|
251
|
+
reasoning_max_tokens?: number;
|
|
252
|
+
}
|
|
253
|
+
/**
|
|
254
|
+
* Result of preparing messages with model context
|
|
255
|
+
*/
|
|
256
|
+
export interface PrepareMessageResult {
|
|
257
|
+
/** The prepared messages ready to send */
|
|
258
|
+
messages: LLMMessage[];
|
|
259
|
+
/** Model context that was injected into the template */
|
|
260
|
+
modelContext: ModelContext;
|
|
261
|
+
}
|