mcp-rubber-duck 1.2.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +1 -0
- package/.github/workflows/security.yml +4 -2
- package/.github/workflows/semantic-release.yml +4 -2
- package/CHANGELOG.md +20 -0
- package/README.md +116 -2
- package/audit-ci.json +3 -1
- package/dist/config/types.d.ts +78 -0
- package/dist/config/types.d.ts.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +150 -0
- package/dist/server.js.map +1 -1
- package/dist/services/consensus.d.ts +28 -0
- package/dist/services/consensus.d.ts.map +1 -0
- package/dist/services/consensus.js +257 -0
- package/dist/services/consensus.js.map +1 -0
- package/dist/services/mcp-client-manager.d.ts.map +1 -1
- package/dist/services/mcp-client-manager.js +1 -3
- package/dist/services/mcp-client-manager.js.map +1 -1
- package/dist/tools/duck-debate.d.ts +16 -0
- package/dist/tools/duck-debate.d.ts.map +1 -0
- package/dist/tools/duck-debate.js +272 -0
- package/dist/tools/duck-debate.js.map +1 -0
- package/dist/tools/duck-iterate.d.ts +14 -0
- package/dist/tools/duck-iterate.d.ts.map +1 -0
- package/dist/tools/duck-iterate.js +195 -0
- package/dist/tools/duck-iterate.js.map +1 -0
- package/dist/tools/duck-judge.d.ts +15 -0
- package/dist/tools/duck-judge.d.ts.map +1 -0
- package/dist/tools/duck-judge.js +208 -0
- package/dist/tools/duck-judge.js.map +1 -0
- package/dist/tools/duck-vote.d.ts +14 -0
- package/dist/tools/duck-vote.d.ts.map +1 -0
- package/dist/tools/duck-vote.js +46 -0
- package/dist/tools/duck-vote.js.map +1 -0
- package/package.json +1 -1
- package/src/config/types.ts +92 -0
- package/src/server.ts +154 -0
- package/src/services/consensus.ts +324 -0
- package/src/services/mcp-client-manager.ts +1 -3
- package/src/tools/duck-debate.ts +383 -0
- package/src/tools/duck-iterate.ts +253 -0
- package/src/tools/duck-judge.ts +301 -0
- package/src/tools/duck-vote.ts +87 -0
- package/tests/consensus.test.ts +282 -0
- package/tests/duck-debate.test.ts +286 -0
- package/tests/duck-iterate.test.ts +249 -0
- package/tests/duck-judge.test.ts +296 -0
- package/tests/duck-vote.test.ts +250 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
import { describe, it, expect, jest, beforeEach } from '@jest/globals';
|
|
2
|
+
|
|
3
|
+
// Mock OpenAI BEFORE importing the provider
|
|
4
|
+
const mockCreate = jest.fn();
|
|
5
|
+
jest.mock('openai', () => {
|
|
6
|
+
const MockOpenAI = jest.fn().mockImplementation(() => ({
|
|
7
|
+
chat: {
|
|
8
|
+
completions: {
|
|
9
|
+
create: mockCreate,
|
|
10
|
+
},
|
|
11
|
+
},
|
|
12
|
+
}));
|
|
13
|
+
return {
|
|
14
|
+
__esModule: true,
|
|
15
|
+
default: MockOpenAI,
|
|
16
|
+
};
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
// Mock config manager and logger
|
|
20
|
+
jest.mock('../src/config/config');
|
|
21
|
+
jest.mock('../src/utils/logger');
|
|
22
|
+
|
|
23
|
+
import { duckJudgeTool } from '../src/tools/duck-judge';
|
|
24
|
+
import { ProviderManager } from '../src/providers/manager';
|
|
25
|
+
import { ConfigManager } from '../src/config/config';
|
|
26
|
+
import { DuckResponse } from '../src/config/types';
|
|
27
|
+
|
|
28
|
+
describe('duckJudgeTool', () => {
|
|
29
|
+
let mockProviderManager: ProviderManager;
|
|
30
|
+
let mockConfigManager: jest.Mocked<ConfigManager>;
|
|
31
|
+
|
|
32
|
+
const mockResponses: DuckResponse[] = [
|
|
33
|
+
{
|
|
34
|
+
provider: 'openai',
|
|
35
|
+
nickname: 'GPT-4',
|
|
36
|
+
model: 'gpt-4',
|
|
37
|
+
content: 'Response from GPT-4 about error handling using try-catch blocks.',
|
|
38
|
+
latency: 1000,
|
|
39
|
+
cached: false,
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
provider: 'gemini',
|
|
43
|
+
nickname: 'Gemini',
|
|
44
|
+
model: 'gemini-pro',
|
|
45
|
+
content: 'Response from Gemini about error handling using Result types.',
|
|
46
|
+
latency: 1500,
|
|
47
|
+
cached: false,
|
|
48
|
+
},
|
|
49
|
+
];
|
|
50
|
+
|
|
51
|
+
beforeEach(() => {
|
|
52
|
+
jest.clearAllMocks();
|
|
53
|
+
|
|
54
|
+
mockConfigManager = {
|
|
55
|
+
getConfig: jest.fn().mockReturnValue({
|
|
56
|
+
providers: {
|
|
57
|
+
openai: {
|
|
58
|
+
api_key: 'key1',
|
|
59
|
+
base_url: 'https://api.openai.com/v1',
|
|
60
|
+
default_model: 'gpt-4',
|
|
61
|
+
nickname: 'GPT-4',
|
|
62
|
+
models: ['gpt-4'],
|
|
63
|
+
},
|
|
64
|
+
gemini: {
|
|
65
|
+
api_key: 'key2',
|
|
66
|
+
base_url: 'https://api.gemini.com/v1',
|
|
67
|
+
default_model: 'gemini-pro',
|
|
68
|
+
nickname: 'Gemini',
|
|
69
|
+
models: ['gemini-pro'],
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
default_provider: 'openai',
|
|
73
|
+
cache_ttl: 300,
|
|
74
|
+
enable_failover: true,
|
|
75
|
+
default_temperature: 0.7,
|
|
76
|
+
}),
|
|
77
|
+
} as any;
|
|
78
|
+
|
|
79
|
+
mockProviderManager = new ProviderManager(mockConfigManager);
|
|
80
|
+
|
|
81
|
+
// Override the client method on all providers
|
|
82
|
+
const provider1 = mockProviderManager.getProvider('openai');
|
|
83
|
+
const provider2 = mockProviderManager.getProvider('gemini');
|
|
84
|
+
provider1['client'].chat.completions.create = mockCreate;
|
|
85
|
+
provider2['client'].chat.completions.create = mockCreate;
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it('should throw error when responses are missing', async () => {
|
|
89
|
+
await expect(
|
|
90
|
+
duckJudgeTool(mockProviderManager, {})
|
|
91
|
+
).rejects.toThrow('At least one response is required');
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
it('should throw error when responses is empty array', async () => {
|
|
95
|
+
await expect(
|
|
96
|
+
duckJudgeTool(mockProviderManager, { responses: [] })
|
|
97
|
+
).rejects.toThrow('At least one response is required');
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it('should throw error when only one response provided', async () => {
|
|
101
|
+
await expect(
|
|
102
|
+
duckJudgeTool(mockProviderManager, { responses: [mockResponses[0]] })
|
|
103
|
+
).rejects.toThrow('At least two responses are required');
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
it('should evaluate responses and return rankings', async () => {
|
|
107
|
+
const judgeResponse = JSON.stringify({
|
|
108
|
+
rankings: [
|
|
109
|
+
{ provider: 'gemini', score: 85, justification: 'Better type safety explanation' },
|
|
110
|
+
{ provider: 'openai', score: 75, justification: 'Good but less comprehensive' },
|
|
111
|
+
],
|
|
112
|
+
criteria_scores: {
|
|
113
|
+
gemini: { accuracy: 85, completeness: 90, clarity: 80 },
|
|
114
|
+
openai: { accuracy: 75, completeness: 70, clarity: 80 },
|
|
115
|
+
},
|
|
116
|
+
summary: 'Gemini provided a more comprehensive response with better type safety coverage.',
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
mockCreate.mockResolvedValueOnce({
|
|
120
|
+
choices: [{
|
|
121
|
+
message: { content: judgeResponse },
|
|
122
|
+
finish_reason: 'stop',
|
|
123
|
+
}],
|
|
124
|
+
usage: { prompt_tokens: 100, completion_tokens: 50, total_tokens: 150 },
|
|
125
|
+
model: 'gpt-4',
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
const result = await duckJudgeTool(mockProviderManager, {
|
|
129
|
+
responses: mockResponses,
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
expect(result.content).toHaveLength(1);
|
|
133
|
+
expect(result.content[0].type).toBe('text');
|
|
134
|
+
|
|
135
|
+
const text = result.content[0].text;
|
|
136
|
+
expect(text).toContain('Judge Evaluation');
|
|
137
|
+
expect(text).toContain('#1');
|
|
138
|
+
expect(text).toContain('#2');
|
|
139
|
+
expect(text).toContain('gemini');
|
|
140
|
+
expect(text).toContain('85/100');
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it('should use specified judge provider', async () => {
|
|
144
|
+
const judgeResponse = JSON.stringify({
|
|
145
|
+
rankings: [
|
|
146
|
+
{ provider: 'openai', score: 80, justification: 'Good response' },
|
|
147
|
+
{ provider: 'gemini', score: 70, justification: 'Okay response' },
|
|
148
|
+
],
|
|
149
|
+
summary: 'OpenAI wins.',
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
mockCreate.mockResolvedValueOnce({
|
|
153
|
+
choices: [{
|
|
154
|
+
message: { content: judgeResponse },
|
|
155
|
+
finish_reason: 'stop',
|
|
156
|
+
}],
|
|
157
|
+
usage: { prompt_tokens: 100, completion_tokens: 50, total_tokens: 150 },
|
|
158
|
+
model: 'gemini-pro',
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
const result = await duckJudgeTool(mockProviderManager, {
|
|
162
|
+
responses: mockResponses,
|
|
163
|
+
judge: 'gemini',
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
const text = result.content[0].text;
|
|
167
|
+
expect(text).toContain('Gemini');
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
it('should use custom criteria', async () => {
|
|
171
|
+
const judgeResponse = JSON.stringify({
|
|
172
|
+
rankings: [
|
|
173
|
+
{ provider: 'openai', score: 90, justification: 'Most secure' },
|
|
174
|
+
{ provider: 'gemini', score: 85, justification: 'Good security' },
|
|
175
|
+
],
|
|
176
|
+
summary: 'Security focused evaluation.',
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
mockCreate.mockResolvedValueOnce({
|
|
180
|
+
choices: [{
|
|
181
|
+
message: { content: judgeResponse },
|
|
182
|
+
finish_reason: 'stop',
|
|
183
|
+
}],
|
|
184
|
+
usage: { prompt_tokens: 100, completion_tokens: 50, total_tokens: 150 },
|
|
185
|
+
model: 'gpt-4',
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
const result = await duckJudgeTool(mockProviderManager, {
|
|
189
|
+
responses: mockResponses,
|
|
190
|
+
criteria: ['security', 'performance', 'maintainability'],
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
const text = result.content[0].text;
|
|
194
|
+
expect(text).toContain('security');
|
|
195
|
+
expect(text).toContain('performance');
|
|
196
|
+
expect(text).toContain('maintainability');
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
it('should handle persona parameter', async () => {
|
|
200
|
+
const judgeResponse = JSON.stringify({
|
|
201
|
+
rankings: [
|
|
202
|
+
{ provider: 'openai', score: 85, justification: 'Senior approved' },
|
|
203
|
+
{ provider: 'gemini', score: 80, justification: 'Good for juniors' },
|
|
204
|
+
],
|
|
205
|
+
summary: 'From a senior perspective.',
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
mockCreate.mockResolvedValueOnce({
|
|
209
|
+
choices: [{
|
|
210
|
+
message: { content: judgeResponse },
|
|
211
|
+
finish_reason: 'stop',
|
|
212
|
+
}],
|
|
213
|
+
usage: { prompt_tokens: 100, completion_tokens: 50, total_tokens: 150 },
|
|
214
|
+
model: 'gpt-4',
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
const result = await duckJudgeTool(mockProviderManager, {
|
|
218
|
+
responses: mockResponses,
|
|
219
|
+
persona: 'senior engineer',
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
expect(result.content[0].text).toContain('Judge Evaluation');
|
|
223
|
+
expect(mockCreate).toHaveBeenCalledTimes(1);
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
it('should handle invalid JSON gracefully with fallback', async () => {
|
|
227
|
+
mockCreate.mockResolvedValueOnce({
|
|
228
|
+
choices: [{
|
|
229
|
+
message: { content: 'This is not valid JSON at all, just some random text.' },
|
|
230
|
+
finish_reason: 'stop',
|
|
231
|
+
}],
|
|
232
|
+
usage: { prompt_tokens: 100, completion_tokens: 50, total_tokens: 150 },
|
|
233
|
+
model: 'gpt-4',
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
const result = await duckJudgeTool(mockProviderManager, {
|
|
237
|
+
responses: mockResponses,
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
const text = result.content[0].text;
|
|
241
|
+
expect(text).toContain('Judge Evaluation');
|
|
242
|
+
expect(text).toContain('Unable to parse');
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
it('should handle JSON with extra text around it', async () => {
|
|
246
|
+
const judgeResponse = `Here is my evaluation:
|
|
247
|
+
{"rankings": [{"provider": "openai", "score": 90, "justification": "Best"}], "summary": "Done"}
|
|
248
|
+
Hope this helps!`;
|
|
249
|
+
|
|
250
|
+
mockCreate.mockResolvedValueOnce({
|
|
251
|
+
choices: [{
|
|
252
|
+
message: { content: judgeResponse },
|
|
253
|
+
finish_reason: 'stop',
|
|
254
|
+
}],
|
|
255
|
+
usage: { prompt_tokens: 100, completion_tokens: 50, total_tokens: 150 },
|
|
256
|
+
model: 'gpt-4',
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
const result = await duckJudgeTool(mockProviderManager, {
|
|
260
|
+
responses: mockResponses,
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
const text = result.content[0].text;
|
|
264
|
+
expect(text).toContain('90/100');
|
|
265
|
+
expect(text).toContain('openai');
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
it('should include missing providers in rankings', async () => {
|
|
269
|
+
// Judge only ranks one provider
|
|
270
|
+
const judgeResponse = JSON.stringify({
|
|
271
|
+
rankings: [
|
|
272
|
+
{ provider: 'openai', score: 85, justification: 'Good' },
|
|
273
|
+
],
|
|
274
|
+
summary: 'Only evaluated one.',
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
mockCreate.mockResolvedValueOnce({
|
|
278
|
+
choices: [{
|
|
279
|
+
message: { content: judgeResponse },
|
|
280
|
+
finish_reason: 'stop',
|
|
281
|
+
}],
|
|
282
|
+
usage: { prompt_tokens: 100, completion_tokens: 50, total_tokens: 150 },
|
|
283
|
+
model: 'gpt-4',
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
const result = await duckJudgeTool(mockProviderManager, {
|
|
287
|
+
responses: mockResponses,
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
const text = result.content[0].text;
|
|
291
|
+
// Should include both providers even though only one was ranked
|
|
292
|
+
expect(text).toContain('openai');
|
|
293
|
+
expect(text).toContain('gemini');
|
|
294
|
+
expect(text).toContain('Not evaluated');
|
|
295
|
+
});
|
|
296
|
+
});
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
import { describe, it, expect, jest, beforeEach } from '@jest/globals';
|
|
2
|
+
|
|
3
|
+
// Mock OpenAI BEFORE importing the provider
|
|
4
|
+
const mockCreate = jest.fn();
|
|
5
|
+
jest.mock('openai', () => {
|
|
6
|
+
const MockOpenAI = jest.fn().mockImplementation(() => ({
|
|
7
|
+
chat: {
|
|
8
|
+
completions: {
|
|
9
|
+
create: mockCreate,
|
|
10
|
+
},
|
|
11
|
+
},
|
|
12
|
+
}));
|
|
13
|
+
return {
|
|
14
|
+
__esModule: true,
|
|
15
|
+
default: MockOpenAI,
|
|
16
|
+
};
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
// Mock config manager and logger
|
|
20
|
+
jest.mock('../src/config/config');
|
|
21
|
+
jest.mock('../src/utils/logger');
|
|
22
|
+
|
|
23
|
+
import { duckVoteTool } from '../src/tools/duck-vote';
|
|
24
|
+
import { ProviderManager } from '../src/providers/manager';
|
|
25
|
+
import { ConfigManager } from '../src/config/config';
|
|
26
|
+
|
|
27
|
+
describe('duckVoteTool', () => {
|
|
28
|
+
let mockProviderManager: ProviderManager;
|
|
29
|
+
let mockConfigManager: jest.Mocked<ConfigManager>;
|
|
30
|
+
|
|
31
|
+
beforeEach(() => {
|
|
32
|
+
jest.clearAllMocks();
|
|
33
|
+
|
|
34
|
+
mockConfigManager = {
|
|
35
|
+
getConfig: jest.fn().mockReturnValue({
|
|
36
|
+
providers: {
|
|
37
|
+
openai: {
|
|
38
|
+
api_key: 'key1',
|
|
39
|
+
base_url: 'https://api.openai.com/v1',
|
|
40
|
+
default_model: 'gpt-4',
|
|
41
|
+
nickname: 'GPT-4',
|
|
42
|
+
models: ['gpt-4'],
|
|
43
|
+
},
|
|
44
|
+
gemini: {
|
|
45
|
+
api_key: 'key2',
|
|
46
|
+
base_url: 'https://api.gemini.com/v1',
|
|
47
|
+
default_model: 'gemini-pro',
|
|
48
|
+
nickname: 'Gemini',
|
|
49
|
+
models: ['gemini-pro'],
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
default_provider: 'openai',
|
|
53
|
+
cache_ttl: 300,
|
|
54
|
+
enable_failover: true,
|
|
55
|
+
default_temperature: 0.7,
|
|
56
|
+
}),
|
|
57
|
+
} as any;
|
|
58
|
+
|
|
59
|
+
mockProviderManager = new ProviderManager(mockConfigManager);
|
|
60
|
+
|
|
61
|
+
// Override the client method on all providers
|
|
62
|
+
const provider1 = mockProviderManager.getProvider('openai');
|
|
63
|
+
const provider2 = mockProviderManager.getProvider('gemini');
|
|
64
|
+
provider1['client'].chat.completions.create = mockCreate;
|
|
65
|
+
provider2['client'].chat.completions.create = mockCreate;
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it('should throw error when question is missing', async () => {
|
|
69
|
+
await expect(
|
|
70
|
+
duckVoteTool(mockProviderManager, { options: ['A', 'B'] })
|
|
71
|
+
).rejects.toThrow('Question is required');
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it('should throw error when options are missing', async () => {
|
|
75
|
+
await expect(
|
|
76
|
+
duckVoteTool(mockProviderManager, { question: 'Test?' })
|
|
77
|
+
).rejects.toThrow('At least 2 options are required');
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it('should throw error when less than 2 options', async () => {
|
|
81
|
+
await expect(
|
|
82
|
+
duckVoteTool(mockProviderManager, { question: 'Test?', options: ['A'] })
|
|
83
|
+
).rejects.toThrow('At least 2 options are required');
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
it('should throw error when more than 10 options', async () => {
|
|
87
|
+
const options = Array.from({ length: 11 }, (_, i) => `Option ${i + 1}`);
|
|
88
|
+
await expect(
|
|
89
|
+
duckVoteTool(mockProviderManager, { question: 'Test?', options })
|
|
90
|
+
).rejects.toThrow('Maximum 10 options allowed');
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
it('should conduct vote with all providers', async () => {
|
|
94
|
+
// Mock responses with valid JSON votes
|
|
95
|
+
mockCreate
|
|
96
|
+
.mockResolvedValueOnce({
|
|
97
|
+
choices: [{
|
|
98
|
+
message: { content: '{"choice": "Option A", "confidence": 85, "reasoning": "Best for performance"}' },
|
|
99
|
+
finish_reason: 'stop',
|
|
100
|
+
}],
|
|
101
|
+
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
|
102
|
+
model: 'gpt-4',
|
|
103
|
+
})
|
|
104
|
+
.mockResolvedValueOnce({
|
|
105
|
+
choices: [{
|
|
106
|
+
message: { content: '{"choice": "Option A", "confidence": 75, "reasoning": "Scalable solution"}' },
|
|
107
|
+
finish_reason: 'stop',
|
|
108
|
+
}],
|
|
109
|
+
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
|
110
|
+
model: 'gemini-pro',
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
const result = await duckVoteTool(mockProviderManager, {
|
|
114
|
+
question: 'Best approach?',
|
|
115
|
+
options: ['Option A', 'Option B'],
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
expect(result.content).toHaveLength(1);
|
|
119
|
+
expect(result.content[0].type).toBe('text');
|
|
120
|
+
|
|
121
|
+
const text = result.content[0].text;
|
|
122
|
+
expect(text).toContain('Vote Results');
|
|
123
|
+
expect(text).toContain('Best approach?');
|
|
124
|
+
expect(text).toContain('Option A');
|
|
125
|
+
expect(text).toContain('Winner');
|
|
126
|
+
expect(text).toContain('unanimous');
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it('should handle split votes', async () => {
|
|
130
|
+
mockCreate
|
|
131
|
+
.mockResolvedValueOnce({
|
|
132
|
+
choices: [{
|
|
133
|
+
message: { content: '{"choice": "Option A", "confidence": 60, "reasoning": "Good choice"}' },
|
|
134
|
+
finish_reason: 'stop',
|
|
135
|
+
}],
|
|
136
|
+
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
|
137
|
+
model: 'gpt-4',
|
|
138
|
+
})
|
|
139
|
+
.mockResolvedValueOnce({
|
|
140
|
+
choices: [{
|
|
141
|
+
message: { content: '{"choice": "Option B", "confidence": 90, "reasoning": "Better choice"}' },
|
|
142
|
+
finish_reason: 'stop',
|
|
143
|
+
}],
|
|
144
|
+
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
|
145
|
+
model: 'gemini-pro',
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
const result = await duckVoteTool(mockProviderManager, {
|
|
149
|
+
question: 'Which option?',
|
|
150
|
+
options: ['Option A', 'Option B'],
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
const text = result.content[0].text;
|
|
154
|
+
expect(text).toContain('split');
|
|
155
|
+
expect(text).toContain('tie-breaker');
|
|
156
|
+
expect(text).toContain('Option B'); // Higher confidence wins
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it('should use specific voters when provided', async () => {
|
|
160
|
+
mockCreate.mockResolvedValueOnce({
|
|
161
|
+
choices: [{
|
|
162
|
+
message: { content: '{"choice": "Option A", "confidence": 80, "reasoning": "Only choice"}' },
|
|
163
|
+
finish_reason: 'stop',
|
|
164
|
+
}],
|
|
165
|
+
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
|
166
|
+
model: 'gpt-4',
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
const result = await duckVoteTool(mockProviderManager, {
|
|
170
|
+
question: 'Test?',
|
|
171
|
+
options: ['Option A', 'Option B'],
|
|
172
|
+
voters: ['openai'],
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
expect(mockCreate).toHaveBeenCalledTimes(1);
|
|
176
|
+
expect(result.content[0].text).toContain('1/1 valid votes');
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
it('should handle invalid JSON responses gracefully', async () => {
|
|
180
|
+
mockCreate
|
|
181
|
+
.mockResolvedValueOnce({
|
|
182
|
+
choices: [{
|
|
183
|
+
message: { content: 'I think Option A is clearly the best because of its simplicity.' },
|
|
184
|
+
finish_reason: 'stop',
|
|
185
|
+
}],
|
|
186
|
+
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
|
187
|
+
model: 'gpt-4',
|
|
188
|
+
})
|
|
189
|
+
.mockResolvedValueOnce({
|
|
190
|
+
choices: [{
|
|
191
|
+
message: { content: '{"choice": "Option B", "confidence": 70}' },
|
|
192
|
+
finish_reason: 'stop',
|
|
193
|
+
}],
|
|
194
|
+
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
|
195
|
+
model: 'gemini-pro',
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
const result = await duckVoteTool(mockProviderManager, {
|
|
199
|
+
question: 'Test?',
|
|
200
|
+
options: ['Option A', 'Option B'],
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
// Should still work - fallback parsing should find "Option A"
|
|
204
|
+
const text = result.content[0].text;
|
|
205
|
+
expect(text).toContain('2/2 valid votes');
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
it('should handle provider errors gracefully', async () => {
|
|
209
|
+
mockCreate
|
|
210
|
+
.mockResolvedValueOnce({
|
|
211
|
+
choices: [{
|
|
212
|
+
message: { content: '{"choice": "Option A", "confidence": 85}' },
|
|
213
|
+
finish_reason: 'stop',
|
|
214
|
+
}],
|
|
215
|
+
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
|
216
|
+
model: 'gpt-4',
|
|
217
|
+
})
|
|
218
|
+
.mockRejectedValueOnce(new Error('API Error'));
|
|
219
|
+
|
|
220
|
+
const result = await duckVoteTool(mockProviderManager, {
|
|
221
|
+
question: 'Test?',
|
|
222
|
+
options: ['Option A', 'Option B'],
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
// One valid vote, one error
|
|
226
|
+
const text = result.content[0].text;
|
|
227
|
+
expect(text).toContain('Option A');
|
|
228
|
+
expect(text).toContain('Invalid vote'); // Error response should be marked invalid
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
it('should work without reasoning requirement', async () => {
|
|
232
|
+
mockCreate.mockResolvedValueOnce({
|
|
233
|
+
choices: [{
|
|
234
|
+
message: { content: '{"choice": "Option A", "confidence": 80}' },
|
|
235
|
+
finish_reason: 'stop',
|
|
236
|
+
}],
|
|
237
|
+
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
|
238
|
+
model: 'gpt-4',
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
const result = await duckVoteTool(mockProviderManager, {
|
|
242
|
+
question: 'Test?',
|
|
243
|
+
options: ['Option A', 'Option B'],
|
|
244
|
+
voters: ['openai'],
|
|
245
|
+
require_reasoning: false,
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
expect(result.content[0].text).toContain('Option A');
|
|
249
|
+
});
|
|
250
|
+
});
|