mcp-rubber-duck 1.2.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +1 -0
- package/.github/workflows/security.yml +4 -2
- package/.github/workflows/semantic-release.yml +4 -2
- package/CHANGELOG.md +20 -0
- package/README.md +116 -2
- package/audit-ci.json +3 -1
- package/dist/config/types.d.ts +78 -0
- package/dist/config/types.d.ts.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +150 -0
- package/dist/server.js.map +1 -1
- package/dist/services/consensus.d.ts +28 -0
- package/dist/services/consensus.d.ts.map +1 -0
- package/dist/services/consensus.js +257 -0
- package/dist/services/consensus.js.map +1 -0
- package/dist/services/mcp-client-manager.d.ts.map +1 -1
- package/dist/services/mcp-client-manager.js +1 -3
- package/dist/services/mcp-client-manager.js.map +1 -1
- package/dist/tools/duck-debate.d.ts +16 -0
- package/dist/tools/duck-debate.d.ts.map +1 -0
- package/dist/tools/duck-debate.js +272 -0
- package/dist/tools/duck-debate.js.map +1 -0
- package/dist/tools/duck-iterate.d.ts +14 -0
- package/dist/tools/duck-iterate.d.ts.map +1 -0
- package/dist/tools/duck-iterate.js +195 -0
- package/dist/tools/duck-iterate.js.map +1 -0
- package/dist/tools/duck-judge.d.ts +15 -0
- package/dist/tools/duck-judge.d.ts.map +1 -0
- package/dist/tools/duck-judge.js +208 -0
- package/dist/tools/duck-judge.js.map +1 -0
- package/dist/tools/duck-vote.d.ts +14 -0
- package/dist/tools/duck-vote.d.ts.map +1 -0
- package/dist/tools/duck-vote.js +46 -0
- package/dist/tools/duck-vote.js.map +1 -0
- package/package.json +1 -1
- package/src/config/types.ts +92 -0
- package/src/server.ts +154 -0
- package/src/services/consensus.ts +324 -0
- package/src/services/mcp-client-manager.ts +1 -3
- package/src/tools/duck-debate.ts +383 -0
- package/src/tools/duck-iterate.ts +253 -0
- package/src/tools/duck-judge.ts +301 -0
- package/src/tools/duck-vote.ts +87 -0
- package/tests/consensus.test.ts +282 -0
- package/tests/duck-debate.test.ts +286 -0
- package/tests/duck-iterate.test.ts +249 -0
- package/tests/duck-judge.test.ts +296 -0
- package/tests/duck-vote.test.ts +250 -0
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
import { ProviderManager } from '../providers/manager.js';
|
|
2
|
+
import { DuckResponse, JudgeEvaluation } from '../config/types.js';
|
|
3
|
+
import { logger } from '../utils/logger.js';
|
|
4
|
+
|
|
5
|
+
export interface DuckJudgeArgs {
|
|
6
|
+
responses: DuckResponse[];
|
|
7
|
+
judge?: string;
|
|
8
|
+
criteria?: string[];
|
|
9
|
+
persona?: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
interface ParsedJudgment {
|
|
13
|
+
rankings: Array<{
|
|
14
|
+
provider: string;
|
|
15
|
+
score: number;
|
|
16
|
+
justification: string;
|
|
17
|
+
}>;
|
|
18
|
+
criteria_scores?: Record<string, Record<string, number>>;
|
|
19
|
+
summary: string;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const DEFAULT_CRITERIA = ['accuracy', 'completeness', 'clarity'];
|
|
23
|
+
|
|
24
|
+
export async function duckJudgeTool(
|
|
25
|
+
providerManager: ProviderManager,
|
|
26
|
+
args: Record<string, unknown>
|
|
27
|
+
) {
|
|
28
|
+
const {
|
|
29
|
+
responses,
|
|
30
|
+
judge,
|
|
31
|
+
criteria = DEFAULT_CRITERIA,
|
|
32
|
+
persona,
|
|
33
|
+
} = args as unknown as DuckJudgeArgs;
|
|
34
|
+
|
|
35
|
+
// Validate inputs
|
|
36
|
+
if (!responses || !Array.isArray(responses) || responses.length === 0) {
|
|
37
|
+
throw new Error('At least one response is required to judge');
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (responses.length === 1) {
|
|
41
|
+
throw new Error('At least two responses are required for comparison');
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Determine judge provider
|
|
45
|
+
const judgeProvider = judge || providerManager.getProviderNames()[0];
|
|
46
|
+
if (!judgeProvider) {
|
|
47
|
+
throw new Error('No judge provider available');
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
logger.info(`Starting judgment with ${judgeProvider} on ${responses.length} responses`);
|
|
51
|
+
|
|
52
|
+
// Build the judgment prompt
|
|
53
|
+
const prompt = buildJudgePrompt(responses, criteria, persona);
|
|
54
|
+
|
|
55
|
+
// Get judgment from the judge duck
|
|
56
|
+
const judgeResponse = await providerManager.askDuck(judgeProvider, prompt);
|
|
57
|
+
|
|
58
|
+
// Parse the judgment
|
|
59
|
+
const evaluation = parseJudgment(
|
|
60
|
+
judgeResponse.content,
|
|
61
|
+
judgeResponse.provider,
|
|
62
|
+
judgeResponse.nickname,
|
|
63
|
+
responses,
|
|
64
|
+
criteria
|
|
65
|
+
);
|
|
66
|
+
|
|
67
|
+
// Format output
|
|
68
|
+
const formattedOutput = formatJudgeResult(evaluation);
|
|
69
|
+
|
|
70
|
+
logger.info(
|
|
71
|
+
`Judgment completed by ${judgeProvider}: #1 is ${evaluation.rankings[0]?.provider || 'unknown'}`
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
return {
|
|
75
|
+
content: [
|
|
76
|
+
{
|
|
77
|
+
type: 'text',
|
|
78
|
+
text: formattedOutput,
|
|
79
|
+
},
|
|
80
|
+
],
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function buildJudgePrompt(
|
|
85
|
+
responses: DuckResponse[],
|
|
86
|
+
criteria: string[],
|
|
87
|
+
persona?: string
|
|
88
|
+
): string {
|
|
89
|
+
const criteriaList = criteria.map((c, i) => `${i + 1}. ${c}`).join('\n');
|
|
90
|
+
|
|
91
|
+
const responsesText = responses.map((r, i) =>
|
|
92
|
+
`--- Response ${i + 1} (${r.nickname} / ${r.provider}) ---\n${r.content}\n`
|
|
93
|
+
).join('\n');
|
|
94
|
+
|
|
95
|
+
const personaText = persona
|
|
96
|
+
? `You are a ${persona} evaluating these responses.\n\n`
|
|
97
|
+
: '';
|
|
98
|
+
|
|
99
|
+
return `${personaText}You are a judge evaluating ${responses.length} responses to the same prompt.
|
|
100
|
+
|
|
101
|
+
RESPONSES TO EVALUATE:
|
|
102
|
+
${responsesText}
|
|
103
|
+
|
|
104
|
+
EVALUATION CRITERIA:
|
|
105
|
+
${criteriaList}
|
|
106
|
+
|
|
107
|
+
INSTRUCTIONS:
|
|
108
|
+
1. Evaluate each response against ALL criteria
|
|
109
|
+
2. Assign a score from 0-100 for each response
|
|
110
|
+
3. Rank responses from best to worst
|
|
111
|
+
4. Provide a brief justification for each ranking
|
|
112
|
+
5. Give a final summary
|
|
113
|
+
|
|
114
|
+
Respond with ONLY a JSON object in this exact format:
|
|
115
|
+
{
|
|
116
|
+
"rankings": [
|
|
117
|
+
{"provider": "<provider name>", "score": <0-100>, "justification": "<brief explanation>"},
|
|
118
|
+
{"provider": "<provider name>", "score": <0-100>, "justification": "<brief explanation>"}
|
|
119
|
+
],
|
|
120
|
+
"criteria_scores": {
|
|
121
|
+
"<provider>": {${criteria.map(c => `"${c}": <0-100>`).join(', ')}}
|
|
122
|
+
},
|
|
123
|
+
"summary": "<overall assessment and recommendation>"
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
IMPORTANT:
|
|
127
|
+
- Rankings must be ordered from highest score to lowest
|
|
128
|
+
- Use the exact provider names from the responses
|
|
129
|
+
- Do NOT include any text before or after the JSON
|
|
130
|
+
- Do NOT use markdown code blocks`;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function matchProvider(
|
|
134
|
+
judgeProviderName: string,
|
|
135
|
+
originalResponses: DuckResponse[]
|
|
136
|
+
): DuckResponse | undefined {
|
|
137
|
+
const nameLower = judgeProviderName.toLowerCase();
|
|
138
|
+
|
|
139
|
+
// Try exact match first
|
|
140
|
+
const exactMatch = originalResponses.find(r => r.provider.toLowerCase() === nameLower);
|
|
141
|
+
if (exactMatch) return exactMatch;
|
|
142
|
+
|
|
143
|
+
// Try matching by provider name contained in judge's response
|
|
144
|
+
const containsMatch = originalResponses.find(r =>
|
|
145
|
+
nameLower.includes(r.provider.toLowerCase()) ||
|
|
146
|
+
nameLower.includes(r.nickname.toLowerCase())
|
|
147
|
+
);
|
|
148
|
+
if (containsMatch) return containsMatch;
|
|
149
|
+
|
|
150
|
+
// Try matching by nickname
|
|
151
|
+
const nicknameMatch = originalResponses.find(r =>
|
|
152
|
+
r.nickname.toLowerCase() === nameLower
|
|
153
|
+
);
|
|
154
|
+
if (nicknameMatch) return nicknameMatch;
|
|
155
|
+
|
|
156
|
+
return undefined;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function parseJudgment(
|
|
160
|
+
response: string,
|
|
161
|
+
judgeProvider: string,
|
|
162
|
+
judgeNickname: string,
|
|
163
|
+
originalResponses: DuckResponse[],
|
|
164
|
+
criteria: string[]
|
|
165
|
+
): JudgeEvaluation {
|
|
166
|
+
const evaluation: JudgeEvaluation = {
|
|
167
|
+
judge: judgeProvider,
|
|
168
|
+
judgeNickname: judgeNickname,
|
|
169
|
+
prompt: '', // Will be filled by caller if needed
|
|
170
|
+
criteria,
|
|
171
|
+
rankings: [],
|
|
172
|
+
criteriaScores: {},
|
|
173
|
+
summary: '',
|
|
174
|
+
rawResponse: response,
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
try {
|
|
178
|
+
// Try to extract JSON from the response
|
|
179
|
+
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
180
|
+
if (!jsonMatch) {
|
|
181
|
+
logger.warn(`No JSON found in judge response from ${judgeProvider}`);
|
|
182
|
+
return createFallbackEvaluation(evaluation, originalResponses, response);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const parsed = JSON.parse(jsonMatch[0]) as ParsedJudgment;
|
|
186
|
+
const matchedProviders = new Set<string>();
|
|
187
|
+
|
|
188
|
+
// Parse rankings
|
|
189
|
+
if (Array.isArray(parsed.rankings)) {
|
|
190
|
+
for (const [index, r] of parsed.rankings.entries()) {
|
|
191
|
+
const matched = matchProvider(r.provider, originalResponses);
|
|
192
|
+
if (matched && !matchedProviders.has(matched.provider)) {
|
|
193
|
+
matchedProviders.add(matched.provider);
|
|
194
|
+
evaluation.rankings.push({
|
|
195
|
+
provider: matched.provider,
|
|
196
|
+
nickname: matched.nickname,
|
|
197
|
+
rank: index + 1,
|
|
198
|
+
score: typeof r.score === 'number' ? Math.max(0, Math.min(100, r.score)) : 0,
|
|
199
|
+
justification: r.justification?.toString() || '',
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Parse criteria scores
|
|
206
|
+
if (parsed.criteria_scores && typeof parsed.criteria_scores === 'object') {
|
|
207
|
+
evaluation.criteriaScores = parsed.criteria_scores;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Parse summary
|
|
211
|
+
if (parsed.summary) {
|
|
212
|
+
evaluation.summary = parsed.summary.toString();
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
} catch (error) {
|
|
216
|
+
logger.warn(`Failed to parse JSON judgment from ${judgeProvider}:`, error);
|
|
217
|
+
return createFallbackEvaluation(evaluation, originalResponses, response);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Ensure all original responses are represented
|
|
221
|
+
const rankedProviders = new Set(evaluation.rankings.map(r => r.provider));
|
|
222
|
+
for (const resp of originalResponses) {
|
|
223
|
+
if (!rankedProviders.has(resp.provider)) {
|
|
224
|
+
evaluation.rankings.push({
|
|
225
|
+
provider: resp.provider,
|
|
226
|
+
nickname: resp.nickname,
|
|
227
|
+
rank: evaluation.rankings.length + 1,
|
|
228
|
+
score: 0,
|
|
229
|
+
justification: 'Not evaluated by judge',
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
return evaluation;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
function createFallbackEvaluation(
|
|
238
|
+
evaluation: JudgeEvaluation,
|
|
239
|
+
originalResponses: DuckResponse[],
|
|
240
|
+
rawResponse: string
|
|
241
|
+
): JudgeEvaluation {
|
|
242
|
+
// Create a basic evaluation when parsing fails
|
|
243
|
+
evaluation.rankings = originalResponses.map((r, index) => ({
|
|
244
|
+
provider: r.provider,
|
|
245
|
+
nickname: r.nickname,
|
|
246
|
+
rank: index + 1,
|
|
247
|
+
score: 50,
|
|
248
|
+
justification: 'Unable to parse judge response',
|
|
249
|
+
}));
|
|
250
|
+
evaluation.summary = `Judge evaluation parsing failed. Raw response available for review.`;
|
|
251
|
+
evaluation.rawResponse = rawResponse;
|
|
252
|
+
return evaluation;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
function formatJudgeResult(evaluation: JudgeEvaluation): string {
|
|
256
|
+
let output = `⚖️ **Judge Evaluation**\n`;
|
|
257
|
+
output += `═══════════════════════════════════════\n\n`;
|
|
258
|
+
output += `**Judge:** ${evaluation.judgeNickname} (${evaluation.judge})\n`;
|
|
259
|
+
output += `**Criteria:** ${evaluation.criteria.join(', ')}\n\n`;
|
|
260
|
+
|
|
261
|
+
// Rankings
|
|
262
|
+
output += `**Rankings:**\n`;
|
|
263
|
+
output += `─────────────────────────────────────\n`;
|
|
264
|
+
|
|
265
|
+
for (const ranking of evaluation.rankings) {
|
|
266
|
+
const medal = ranking.rank === 1 ? '🥇' : ranking.rank === 2 ? '🥈' : ranking.rank === 3 ? '🥉' : ' ';
|
|
267
|
+
const bar = '█'.repeat(Math.floor(ranking.score / 10));
|
|
268
|
+
const emptyBar = '░'.repeat(10 - Math.floor(ranking.score / 10));
|
|
269
|
+
|
|
270
|
+
output += `${medal} **#${ranking.rank} ${ranking.nickname}** (${ranking.provider})\n`;
|
|
271
|
+
output += ` Score: ${bar}${emptyBar} ${ranking.score}/100\n`;
|
|
272
|
+
output += ` 💭 "${ranking.justification}"\n\n`;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// Criteria breakdown if available
|
|
276
|
+
if (Object.keys(evaluation.criteriaScores).length > 0) {
|
|
277
|
+
output += `**Criteria Breakdown:**\n`;
|
|
278
|
+
output += `─────────────────────────────────────\n`;
|
|
279
|
+
|
|
280
|
+
for (const [provider, scores] of Object.entries(evaluation.criteriaScores)) {
|
|
281
|
+
output += `📊 **${provider}:**\n`;
|
|
282
|
+
for (const [criterion, score] of Object.entries(scores)) {
|
|
283
|
+
const criterionScore = typeof score === 'number' ? score : 0;
|
|
284
|
+
output += ` • ${criterion}: ${criterionScore}/100\n`;
|
|
285
|
+
}
|
|
286
|
+
output += `\n`;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// Summary
|
|
291
|
+
if (evaluation.summary) {
|
|
292
|
+
output += `**Summary:**\n`;
|
|
293
|
+
output += `─────────────────────────────────────\n`;
|
|
294
|
+
output += `${evaluation.summary}\n\n`;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
output += `═══════════════════════════════════════\n`;
|
|
298
|
+
output += `📋 Evaluated ${evaluation.rankings.length} responses\n`;
|
|
299
|
+
|
|
300
|
+
return output;
|
|
301
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { ProviderManager } from '../providers/manager.js';
|
|
2
|
+
import { ConsensusService } from '../services/consensus.js';
|
|
3
|
+
import { VoteResult } from '../config/types.js';
|
|
4
|
+
import { logger } from '../utils/logger.js';
|
|
5
|
+
|
|
6
|
+
export interface DuckVoteArgs {
|
|
7
|
+
question: string;
|
|
8
|
+
options: string[];
|
|
9
|
+
voters?: string[];
|
|
10
|
+
require_reasoning?: boolean;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export async function duckVoteTool(
|
|
14
|
+
providerManager: ProviderManager,
|
|
15
|
+
args: Record<string, unknown>
|
|
16
|
+
) {
|
|
17
|
+
const {
|
|
18
|
+
question,
|
|
19
|
+
options,
|
|
20
|
+
voters,
|
|
21
|
+
require_reasoning = true,
|
|
22
|
+
} = args as unknown as DuckVoteArgs;
|
|
23
|
+
|
|
24
|
+
// Validate inputs
|
|
25
|
+
if (!question || typeof question !== 'string') {
|
|
26
|
+
throw new Error('Question is required');
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (!options || !Array.isArray(options) || options.length < 2) {
|
|
30
|
+
throw new Error('At least 2 options are required');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (options.length > 10) {
|
|
34
|
+
throw new Error('Maximum 10 options allowed');
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Get voters (all providers if not specified)
|
|
38
|
+
const voterNames = voters && voters.length > 0
|
|
39
|
+
? voters
|
|
40
|
+
: providerManager.getProviderNames();
|
|
41
|
+
|
|
42
|
+
if (voterNames.length === 0) {
|
|
43
|
+
throw new Error('No voters available');
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
logger.info(`Starting vote with ${voterNames.length} voters on: "${question}"`);
|
|
47
|
+
|
|
48
|
+
const consensusService = new ConsensusService();
|
|
49
|
+
const votePrompt = consensusService.buildVotePrompt(
|
|
50
|
+
question,
|
|
51
|
+
options,
|
|
52
|
+
require_reasoning
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
// Get votes from all ducks in parallel
|
|
56
|
+
const responses = await providerManager.compareDucks(votePrompt, voterNames);
|
|
57
|
+
|
|
58
|
+
// Parse votes
|
|
59
|
+
const votes: VoteResult[] = responses.map(response => {
|
|
60
|
+
return consensusService.parseVote(
|
|
61
|
+
response.content,
|
|
62
|
+
response.provider,
|
|
63
|
+
response.nickname,
|
|
64
|
+
options
|
|
65
|
+
);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
// Aggregate results
|
|
69
|
+
const aggregatedResult = consensusService.aggregateVotes(question, options, votes);
|
|
70
|
+
|
|
71
|
+
// Format output
|
|
72
|
+
const formattedOutput = consensusService.formatVoteResult(aggregatedResult);
|
|
73
|
+
|
|
74
|
+
logger.info(
|
|
75
|
+
`Vote completed: ${aggregatedResult.consensusLevel} consensus, ` +
|
|
76
|
+
`winner: ${aggregatedResult.winner || 'none'}`
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
return {
|
|
80
|
+
content: [
|
|
81
|
+
{
|
|
82
|
+
type: 'text',
|
|
83
|
+
text: formattedOutput,
|
|
84
|
+
},
|
|
85
|
+
],
|
|
86
|
+
};
|
|
87
|
+
}
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach } from '@jest/globals';
|
|
2
|
+
import { ConsensusService } from '../src/services/consensus';
|
|
3
|
+
import { VoteResult } from '../src/config/types';
|
|
4
|
+
|
|
5
|
+
describe('ConsensusService', () => {
|
|
6
|
+
let service: ConsensusService;
|
|
7
|
+
|
|
8
|
+
beforeEach(() => {
|
|
9
|
+
service = new ConsensusService();
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
describe('buildVotePrompt', () => {
|
|
13
|
+
it('should build a vote prompt with reasoning', () => {
|
|
14
|
+
const prompt = service.buildVotePrompt(
|
|
15
|
+
'Best programming language?',
|
|
16
|
+
['Python', 'JavaScript', 'Rust'],
|
|
17
|
+
true
|
|
18
|
+
);
|
|
19
|
+
|
|
20
|
+
expect(prompt).toContain('Best programming language?');
|
|
21
|
+
expect(prompt).toContain('1. Python');
|
|
22
|
+
expect(prompt).toContain('2. JavaScript');
|
|
23
|
+
expect(prompt).toContain('3. Rust');
|
|
24
|
+
expect(prompt).toContain('"reasoning"');
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it('should build a vote prompt without reasoning', () => {
|
|
28
|
+
const prompt = service.buildVotePrompt(
|
|
29
|
+
'Best color?',
|
|
30
|
+
['Red', 'Blue'],
|
|
31
|
+
false
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
expect(prompt).toContain('Best color?');
|
|
35
|
+
expect(prompt).toContain('1. Red');
|
|
36
|
+
expect(prompt).toContain('2. Blue');
|
|
37
|
+
expect(prompt).not.toContain('"reasoning"');
|
|
38
|
+
});
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
describe('parseVote', () => {
|
|
42
|
+
const options = ['Option A', 'Option B', 'Option C'];
|
|
43
|
+
|
|
44
|
+
it('should parse valid JSON vote with all fields', () => {
|
|
45
|
+
const response = JSON.stringify({
|
|
46
|
+
choice: 'Option A',
|
|
47
|
+
confidence: 85,
|
|
48
|
+
reasoning: 'It is the best option because...',
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
const result = service.parseVote(response, 'test-provider', 'Test Duck', options);
|
|
52
|
+
|
|
53
|
+
expect(result.choice).toBe('Option A');
|
|
54
|
+
expect(result.confidence).toBe(85);
|
|
55
|
+
expect(result.reasoning).toBe('It is the best option because...');
|
|
56
|
+
expect(result.voter).toBe('test-provider');
|
|
57
|
+
expect(result.nickname).toBe('Test Duck');
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it('should parse vote with extra text around JSON', () => {
|
|
61
|
+
const response = `Here is my vote:
|
|
62
|
+
{"choice": "Option B", "confidence": 70, "reasoning": "Good choice"}
|
|
63
|
+
Thank you!`;
|
|
64
|
+
|
|
65
|
+
const result = service.parseVote(response, 'provider1', 'Duck 1', options);
|
|
66
|
+
|
|
67
|
+
expect(result.choice).toBe('Option B');
|
|
68
|
+
expect(result.confidence).toBe(70);
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
it('should clamp confidence to 0-100 range', () => {
|
|
72
|
+
const response = JSON.stringify({
|
|
73
|
+
choice: 'Option A',
|
|
74
|
+
confidence: 150,
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
const result = service.parseVote(response, 'provider1', 'Duck 1', options);
|
|
78
|
+
expect(result.confidence).toBe(100);
|
|
79
|
+
|
|
80
|
+
const response2 = JSON.stringify({
|
|
81
|
+
choice: 'Option A',
|
|
82
|
+
confidence: -20,
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
const result2 = service.parseVote(response2, 'provider1', 'Duck 1', options);
|
|
86
|
+
expect(result2.confidence).toBe(0);
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it('should handle case-insensitive choice matching', () => {
|
|
90
|
+
const response = JSON.stringify({
|
|
91
|
+
choice: 'option a',
|
|
92
|
+
confidence: 80,
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
const result = service.parseVote(response, 'provider1', 'Duck 1', options);
|
|
96
|
+
expect(result.choice).toBe('Option A');
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it('should handle partial/fuzzy choice matching', () => {
|
|
100
|
+
const response = JSON.stringify({
|
|
101
|
+
choice: 'A',
|
|
102
|
+
confidence: 80,
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
const result = service.parseVote(response, 'provider1', 'Duck 1', options);
|
|
106
|
+
expect(result.choice).toBe('Option A');
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it('should fallback parse when JSON is invalid', () => {
|
|
110
|
+
const response = 'I think Option C is the best because it has great features.';
|
|
111
|
+
|
|
112
|
+
const result = service.parseVote(response, 'provider1', 'Duck 1', options);
|
|
113
|
+
expect(result.choice).toBe('Option C');
|
|
114
|
+
expect(result.confidence).toBe(50); // Default fallback confidence
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it('should return empty choice when no match found', () => {
|
|
118
|
+
const response = 'I cannot decide on any of these options.';
|
|
119
|
+
|
|
120
|
+
const result = service.parseVote(response, 'provider1', 'Duck 1', options);
|
|
121
|
+
expect(result.choice).toBe('');
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
it('should parse string confidence values', () => {
|
|
125
|
+
const response = JSON.stringify({
|
|
126
|
+
choice: 'Option A',
|
|
127
|
+
confidence: '75',
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
const result = service.parseVote(response, 'provider1', 'Duck 1', options);
|
|
131
|
+
expect(result.confidence).toBe(75);
|
|
132
|
+
});
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
describe('aggregateVotes', () => {
|
|
136
|
+
const question = 'Best framework?';
|
|
137
|
+
const options = ['React', 'Vue', 'Angular'];
|
|
138
|
+
|
|
139
|
+
it('should aggregate votes and determine winner', () => {
|
|
140
|
+
const votes: VoteResult[] = [
|
|
141
|
+
{ voter: 'p1', nickname: 'D1', choice: 'React', confidence: 80, reasoning: 'R1', rawResponse: '' },
|
|
142
|
+
{ voter: 'p2', nickname: 'D2', choice: 'React', confidence: 90, reasoning: 'R2', rawResponse: '' },
|
|
143
|
+
{ voter: 'p3', nickname: 'D3', choice: 'Vue', confidence: 70, reasoning: 'R3', rawResponse: '' },
|
|
144
|
+
];
|
|
145
|
+
|
|
146
|
+
const result = service.aggregateVotes(question, options, votes);
|
|
147
|
+
|
|
148
|
+
expect(result.winner).toBe('React');
|
|
149
|
+
expect(result.tally['React']).toBe(2);
|
|
150
|
+
expect(result.tally['Vue']).toBe(1);
|
|
151
|
+
expect(result.tally['Angular']).toBe(0);
|
|
152
|
+
expect(result.isTie).toBe(false);
|
|
153
|
+
expect(result.consensusLevel).toBe('majority');
|
|
154
|
+
expect(result.validVotes).toBe(3);
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
it('should detect unanimous consensus', () => {
|
|
158
|
+
const votes: VoteResult[] = [
|
|
159
|
+
{ voter: 'p1', nickname: 'D1', choice: 'React', confidence: 80, reasoning: '', rawResponse: '' },
|
|
160
|
+
{ voter: 'p2', nickname: 'D2', choice: 'React', confidence: 90, reasoning: '', rawResponse: '' },
|
|
161
|
+
{ voter: 'p3', nickname: 'D3', choice: 'React', confidence: 85, reasoning: '', rawResponse: '' },
|
|
162
|
+
];
|
|
163
|
+
|
|
164
|
+
const result = service.aggregateVotes(question, options, votes);
|
|
165
|
+
|
|
166
|
+
expect(result.winner).toBe('React');
|
|
167
|
+
expect(result.consensusLevel).toBe('unanimous');
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
it('should break tie by confidence', () => {
|
|
171
|
+
const votes: VoteResult[] = [
|
|
172
|
+
{ voter: 'p1', nickname: 'D1', choice: 'React', confidence: 60, reasoning: '', rawResponse: '' },
|
|
173
|
+
{ voter: 'p2', nickname: 'D2', choice: 'Vue', confidence: 90, reasoning: '', rawResponse: '' },
|
|
174
|
+
];
|
|
175
|
+
|
|
176
|
+
const result = service.aggregateVotes(question, options, votes);
|
|
177
|
+
|
|
178
|
+
expect(result.isTie).toBe(true);
|
|
179
|
+
expect(result.winner).toBe('Vue'); // Higher confidence
|
|
180
|
+
expect(result.consensusLevel).toBe('split');
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
it('should handle no valid votes', () => {
|
|
184
|
+
const votes: VoteResult[] = [
|
|
185
|
+
{ voter: 'p1', nickname: 'D1', choice: '', confidence: 0, reasoning: '', rawResponse: '' },
|
|
186
|
+
{ voter: 'p2', nickname: 'D2', choice: 'InvalidOption', confidence: 50, reasoning: '', rawResponse: '' },
|
|
187
|
+
];
|
|
188
|
+
|
|
189
|
+
const result = service.aggregateVotes(question, options, votes);
|
|
190
|
+
|
|
191
|
+
expect(result.winner).toBeNull();
|
|
192
|
+
expect(result.validVotes).toBe(0);
|
|
193
|
+
expect(result.consensusLevel).toBe('none');
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
it('should calculate average confidence per option', () => {
|
|
197
|
+
const votes: VoteResult[] = [
|
|
198
|
+
{ voter: 'p1', nickname: 'D1', choice: 'React', confidence: 80, reasoning: '', rawResponse: '' },
|
|
199
|
+
{ voter: 'p2', nickname: 'D2', choice: 'React', confidence: 60, reasoning: '', rawResponse: '' },
|
|
200
|
+
{ voter: 'p3', nickname: 'D3', choice: 'Vue', confidence: 90, reasoning: '', rawResponse: '' },
|
|
201
|
+
];
|
|
202
|
+
|
|
203
|
+
const result = service.aggregateVotes(question, options, votes);
|
|
204
|
+
|
|
205
|
+
expect(result.confidenceByOption['React']).toBe(70); // (80+60)/2
|
|
206
|
+
expect(result.confidenceByOption['Vue']).toBe(90);
|
|
207
|
+
expect(result.confidenceByOption['Angular']).toBe(0);
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
it('should detect plurality consensus', () => {
|
|
211
|
+
const votes: VoteResult[] = [
|
|
212
|
+
{ voter: 'p1', nickname: 'D1', choice: 'React', confidence: 80, reasoning: '', rawResponse: '' },
|
|
213
|
+
{ voter: 'p2', nickname: 'D2', choice: 'React', confidence: 70, reasoning: '', rawResponse: '' },
|
|
214
|
+
{ voter: 'p3', nickname: 'D3', choice: 'Vue', confidence: 90, reasoning: '', rawResponse: '' },
|
|
215
|
+
{ voter: 'p4', nickname: 'D4', choice: 'Angular', confidence: 60, reasoning: '', rawResponse: '' },
|
|
216
|
+
{ voter: 'p5', nickname: 'D5', choice: 'Angular', confidence: 50, reasoning: '', rawResponse: '' },
|
|
217
|
+
];
|
|
218
|
+
|
|
219
|
+
const result = service.aggregateVotes(question, options, votes);
|
|
220
|
+
|
|
221
|
+
// React has 2, Angular has 2, Vue has 1 - it's a tie
|
|
222
|
+
// But if we modify to have clear plurality:
|
|
223
|
+
const votes2: VoteResult[] = [
|
|
224
|
+
{ voter: 'p1', nickname: 'D1', choice: 'React', confidence: 80, reasoning: '', rawResponse: '' },
|
|
225
|
+
{ voter: 'p2', nickname: 'D2', choice: 'React', confidence: 70, reasoning: '', rawResponse: '' },
|
|
226
|
+
{ voter: 'p3', nickname: 'D3', choice: 'Vue', confidence: 90, reasoning: '', rawResponse: '' },
|
|
227
|
+
{ voter: 'p4', nickname: 'D4', choice: 'Angular', confidence: 60, reasoning: '', rawResponse: '' },
|
|
228
|
+
];
|
|
229
|
+
|
|
230
|
+
const result2 = service.aggregateVotes(question, options, votes2);
|
|
231
|
+
expect(result2.winner).toBe('React');
|
|
232
|
+
expect(result2.consensusLevel).toBe('plurality'); // 2/4 = 50%, not majority
|
|
233
|
+
});
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
describe('formatVoteResult', () => {
|
|
237
|
+
it('should format vote results with winner', () => {
|
|
238
|
+
const votes: VoteResult[] = [
|
|
239
|
+
{ voter: 'p1', nickname: 'Duck 1', choice: 'Option A', confidence: 85, reasoning: 'Best choice', rawResponse: '' },
|
|
240
|
+
{ voter: 'p2', nickname: 'Duck 2', choice: 'Option A', confidence: 75, reasoning: 'Agreed', rawResponse: '' },
|
|
241
|
+
];
|
|
242
|
+
|
|
243
|
+
const aggregated = service.aggregateVotes('Test question?', ['Option A', 'Option B'], votes);
|
|
244
|
+
const formatted = service.formatVoteResult(aggregated);
|
|
245
|
+
|
|
246
|
+
expect(formatted).toContain('Vote Results');
|
|
247
|
+
expect(formatted).toContain('Test question?');
|
|
248
|
+
expect(formatted).toContain('Winner');
|
|
249
|
+
expect(formatted).toContain('Option A');
|
|
250
|
+
expect(formatted).toContain('unanimous');
|
|
251
|
+
expect(formatted).toContain('Duck 1');
|
|
252
|
+
expect(formatted).toContain('Duck 2');
|
|
253
|
+
expect(formatted).toContain('Best choice');
|
|
254
|
+
expect(formatted).toContain('2/2 valid votes');
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
it('should format results with invalid votes', () => {
|
|
258
|
+
const votes: VoteResult[] = [
|
|
259
|
+
{ voter: 'p1', nickname: 'Duck 1', choice: '', confidence: 0, reasoning: '', rawResponse: '' },
|
|
260
|
+
];
|
|
261
|
+
|
|
262
|
+
const aggregated = service.aggregateVotes('Test?', ['A', 'B'], votes);
|
|
263
|
+
const formatted = service.formatVoteResult(aggregated);
|
|
264
|
+
|
|
265
|
+
expect(formatted).toContain('No valid votes');
|
|
266
|
+
expect(formatted).toContain('Invalid vote');
|
|
267
|
+
expect(formatted).toContain('0/1 valid votes');
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
it('should indicate tie-breaker when applicable', () => {
|
|
271
|
+
const votes: VoteResult[] = [
|
|
272
|
+
{ voter: 'p1', nickname: 'D1', choice: 'A', confidence: 60, reasoning: '', rawResponse: '' },
|
|
273
|
+
{ voter: 'p2', nickname: 'D2', choice: 'B', confidence: 90, reasoning: '', rawResponse: '' },
|
|
274
|
+
];
|
|
275
|
+
|
|
276
|
+
const aggregated = service.aggregateVotes('Test?', ['A', 'B'], votes);
|
|
277
|
+
const formatted = service.formatVoteResult(aggregated);
|
|
278
|
+
|
|
279
|
+
expect(formatted).toContain('tie-breaker');
|
|
280
|
+
});
|
|
281
|
+
});
|
|
282
|
+
});
|