mcp-rubber-duck 1.2.5 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.desktop.example +1 -1
- package/.env.pi.example +1 -1
- package/.env.template +1 -1
- package/.eslintrc.json +1 -0
- package/CHANGELOG.md +19 -0
- package/README.md +238 -44
- package/assets/mcp-rubber-duck.png +0 -0
- package/audit-ci.json +2 -1
- package/config/config.example.json +4 -4
- package/dist/config/config.js +4 -4
- package/dist/config/config.js.map +1 -1
- package/dist/config/types.d.ts +78 -0
- package/dist/config/types.d.ts.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +150 -0
- package/dist/server.js.map +1 -1
- package/dist/services/consensus.d.ts +28 -0
- package/dist/services/consensus.d.ts.map +1 -0
- package/dist/services/consensus.js +257 -0
- package/dist/services/consensus.js.map +1 -0
- package/dist/tools/duck-debate.d.ts +16 -0
- package/dist/tools/duck-debate.d.ts.map +1 -0
- package/dist/tools/duck-debate.js +272 -0
- package/dist/tools/duck-debate.js.map +1 -0
- package/dist/tools/duck-iterate.d.ts +14 -0
- package/dist/tools/duck-iterate.d.ts.map +1 -0
- package/dist/tools/duck-iterate.js +195 -0
- package/dist/tools/duck-iterate.js.map +1 -0
- package/dist/tools/duck-judge.d.ts +15 -0
- package/dist/tools/duck-judge.d.ts.map +1 -0
- package/dist/tools/duck-judge.js +208 -0
- package/dist/tools/duck-judge.js.map +1 -0
- package/dist/tools/duck-vote.d.ts +14 -0
- package/dist/tools/duck-vote.d.ts.map +1 -0
- package/dist/tools/duck-vote.js +46 -0
- package/dist/tools/duck-vote.js.map +1 -0
- package/docker-compose.yml +1 -1
- package/package.json +1 -1
- package/src/config/config.ts +4 -4
- package/src/config/types.ts +92 -0
- package/src/server.ts +154 -0
- package/src/services/consensus.ts +324 -0
- package/src/tools/duck-debate.ts +383 -0
- package/src/tools/duck-iterate.ts +253 -0
- package/src/tools/duck-judge.ts +301 -0
- package/src/tools/duck-vote.ts +87 -0
- package/tests/consensus.test.ts +282 -0
- package/tests/duck-debate.test.ts +286 -0
- package/tests/duck-iterate.test.ts +249 -0
- package/tests/duck-judge.test.ts +296 -0
- package/tests/duck-vote.test.ts +250 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import { logger } from '../utils/logger.js';
|
|
2
|
+
const DEFAULT_CRITERIA = ['accuracy', 'completeness', 'clarity'];
|
|
3
|
+
export async function duckJudgeTool(providerManager, args) {
|
|
4
|
+
const { responses, judge, criteria = DEFAULT_CRITERIA, persona, } = args;
|
|
5
|
+
// Validate inputs
|
|
6
|
+
if (!responses || !Array.isArray(responses) || responses.length === 0) {
|
|
7
|
+
throw new Error('At least one response is required to judge');
|
|
8
|
+
}
|
|
9
|
+
if (responses.length === 1) {
|
|
10
|
+
throw new Error('At least two responses are required for comparison');
|
|
11
|
+
}
|
|
12
|
+
// Determine judge provider
|
|
13
|
+
const judgeProvider = judge || providerManager.getProviderNames()[0];
|
|
14
|
+
if (!judgeProvider) {
|
|
15
|
+
throw new Error('No judge provider available');
|
|
16
|
+
}
|
|
17
|
+
logger.info(`Starting judgment with ${judgeProvider} on ${responses.length} responses`);
|
|
18
|
+
// Build the judgment prompt
|
|
19
|
+
const prompt = buildJudgePrompt(responses, criteria, persona);
|
|
20
|
+
// Get judgment from the judge duck
|
|
21
|
+
const judgeResponse = await providerManager.askDuck(judgeProvider, prompt);
|
|
22
|
+
// Parse the judgment
|
|
23
|
+
const evaluation = parseJudgment(judgeResponse.content, judgeResponse.provider, judgeResponse.nickname, responses, criteria);
|
|
24
|
+
// Format output
|
|
25
|
+
const formattedOutput = formatJudgeResult(evaluation);
|
|
26
|
+
logger.info(`Judgment completed by ${judgeProvider}: #1 is ${evaluation.rankings[0]?.provider || 'unknown'}`);
|
|
27
|
+
return {
|
|
28
|
+
content: [
|
|
29
|
+
{
|
|
30
|
+
type: 'text',
|
|
31
|
+
text: formattedOutput,
|
|
32
|
+
},
|
|
33
|
+
],
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
function buildJudgePrompt(responses, criteria, persona) {
|
|
37
|
+
const criteriaList = criteria.map((c, i) => `${i + 1}. ${c}`).join('\n');
|
|
38
|
+
const responsesText = responses.map((r, i) => `--- Response ${i + 1} (${r.nickname} / ${r.provider}) ---\n${r.content}\n`).join('\n');
|
|
39
|
+
const personaText = persona
|
|
40
|
+
? `You are a ${persona} evaluating these responses.\n\n`
|
|
41
|
+
: '';
|
|
42
|
+
return `${personaText}You are a judge evaluating ${responses.length} responses to the same prompt.
|
|
43
|
+
|
|
44
|
+
RESPONSES TO EVALUATE:
|
|
45
|
+
${responsesText}
|
|
46
|
+
|
|
47
|
+
EVALUATION CRITERIA:
|
|
48
|
+
${criteriaList}
|
|
49
|
+
|
|
50
|
+
INSTRUCTIONS:
|
|
51
|
+
1. Evaluate each response against ALL criteria
|
|
52
|
+
2. Assign a score from 0-100 for each response
|
|
53
|
+
3. Rank responses from best to worst
|
|
54
|
+
4. Provide a brief justification for each ranking
|
|
55
|
+
5. Give a final summary
|
|
56
|
+
|
|
57
|
+
Respond with ONLY a JSON object in this exact format:
|
|
58
|
+
{
|
|
59
|
+
"rankings": [
|
|
60
|
+
{"provider": "<provider name>", "score": <0-100>, "justification": "<brief explanation>"},
|
|
61
|
+
{"provider": "<provider name>", "score": <0-100>, "justification": "<brief explanation>"}
|
|
62
|
+
],
|
|
63
|
+
"criteria_scores": {
|
|
64
|
+
"<provider>": {${criteria.map(c => `"${c}": <0-100>`).join(', ')}}
|
|
65
|
+
},
|
|
66
|
+
"summary": "<overall assessment and recommendation>"
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
IMPORTANT:
|
|
70
|
+
- Rankings must be ordered from highest score to lowest
|
|
71
|
+
- Use the exact provider names from the responses
|
|
72
|
+
- Do NOT include any text before or after the JSON
|
|
73
|
+
- Do NOT use markdown code blocks`;
|
|
74
|
+
}
|
|
75
|
+
function matchProvider(judgeProviderName, originalResponses) {
|
|
76
|
+
const nameLower = judgeProviderName.toLowerCase();
|
|
77
|
+
// Try exact match first
|
|
78
|
+
const exactMatch = originalResponses.find(r => r.provider.toLowerCase() === nameLower);
|
|
79
|
+
if (exactMatch)
|
|
80
|
+
return exactMatch;
|
|
81
|
+
// Try matching by provider name contained in judge's response
|
|
82
|
+
const containsMatch = originalResponses.find(r => nameLower.includes(r.provider.toLowerCase()) ||
|
|
83
|
+
nameLower.includes(r.nickname.toLowerCase()));
|
|
84
|
+
if (containsMatch)
|
|
85
|
+
return containsMatch;
|
|
86
|
+
// Try matching by nickname
|
|
87
|
+
const nicknameMatch = originalResponses.find(r => r.nickname.toLowerCase() === nameLower);
|
|
88
|
+
if (nicknameMatch)
|
|
89
|
+
return nicknameMatch;
|
|
90
|
+
return undefined;
|
|
91
|
+
}
|
|
92
|
+
function parseJudgment(response, judgeProvider, judgeNickname, originalResponses, criteria) {
|
|
93
|
+
const evaluation = {
|
|
94
|
+
judge: judgeProvider,
|
|
95
|
+
judgeNickname: judgeNickname,
|
|
96
|
+
prompt: '', // Will be filled by caller if needed
|
|
97
|
+
criteria,
|
|
98
|
+
rankings: [],
|
|
99
|
+
criteriaScores: {},
|
|
100
|
+
summary: '',
|
|
101
|
+
rawResponse: response,
|
|
102
|
+
};
|
|
103
|
+
try {
|
|
104
|
+
// Try to extract JSON from the response
|
|
105
|
+
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
106
|
+
if (!jsonMatch) {
|
|
107
|
+
logger.warn(`No JSON found in judge response from ${judgeProvider}`);
|
|
108
|
+
return createFallbackEvaluation(evaluation, originalResponses, response);
|
|
109
|
+
}
|
|
110
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
111
|
+
const matchedProviders = new Set();
|
|
112
|
+
// Parse rankings
|
|
113
|
+
if (Array.isArray(parsed.rankings)) {
|
|
114
|
+
for (const [index, r] of parsed.rankings.entries()) {
|
|
115
|
+
const matched = matchProvider(r.provider, originalResponses);
|
|
116
|
+
if (matched && !matchedProviders.has(matched.provider)) {
|
|
117
|
+
matchedProviders.add(matched.provider);
|
|
118
|
+
evaluation.rankings.push({
|
|
119
|
+
provider: matched.provider,
|
|
120
|
+
nickname: matched.nickname,
|
|
121
|
+
rank: index + 1,
|
|
122
|
+
score: typeof r.score === 'number' ? Math.max(0, Math.min(100, r.score)) : 0,
|
|
123
|
+
justification: r.justification?.toString() || '',
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
// Parse criteria scores
|
|
129
|
+
if (parsed.criteria_scores && typeof parsed.criteria_scores === 'object') {
|
|
130
|
+
evaluation.criteriaScores = parsed.criteria_scores;
|
|
131
|
+
}
|
|
132
|
+
// Parse summary
|
|
133
|
+
if (parsed.summary) {
|
|
134
|
+
evaluation.summary = parsed.summary.toString();
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
catch (error) {
|
|
138
|
+
logger.warn(`Failed to parse JSON judgment from ${judgeProvider}:`, error);
|
|
139
|
+
return createFallbackEvaluation(evaluation, originalResponses, response);
|
|
140
|
+
}
|
|
141
|
+
// Ensure all original responses are represented
|
|
142
|
+
const rankedProviders = new Set(evaluation.rankings.map(r => r.provider));
|
|
143
|
+
for (const resp of originalResponses) {
|
|
144
|
+
if (!rankedProviders.has(resp.provider)) {
|
|
145
|
+
evaluation.rankings.push({
|
|
146
|
+
provider: resp.provider,
|
|
147
|
+
nickname: resp.nickname,
|
|
148
|
+
rank: evaluation.rankings.length + 1,
|
|
149
|
+
score: 0,
|
|
150
|
+
justification: 'Not evaluated by judge',
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
return evaluation;
|
|
155
|
+
}
|
|
156
|
+
function createFallbackEvaluation(evaluation, originalResponses, rawResponse) {
|
|
157
|
+
// Create a basic evaluation when parsing fails
|
|
158
|
+
evaluation.rankings = originalResponses.map((r, index) => ({
|
|
159
|
+
provider: r.provider,
|
|
160
|
+
nickname: r.nickname,
|
|
161
|
+
rank: index + 1,
|
|
162
|
+
score: 50,
|
|
163
|
+
justification: 'Unable to parse judge response',
|
|
164
|
+
}));
|
|
165
|
+
evaluation.summary = `Judge evaluation parsing failed. Raw response available for review.`;
|
|
166
|
+
evaluation.rawResponse = rawResponse;
|
|
167
|
+
return evaluation;
|
|
168
|
+
}
|
|
169
|
+
function formatJudgeResult(evaluation) {
|
|
170
|
+
let output = `⚖️ **Judge Evaluation**\n`;
|
|
171
|
+
output += `═══════════════════════════════════════\n\n`;
|
|
172
|
+
output += `**Judge:** ${evaluation.judgeNickname} (${evaluation.judge})\n`;
|
|
173
|
+
output += `**Criteria:** ${evaluation.criteria.join(', ')}\n\n`;
|
|
174
|
+
// Rankings
|
|
175
|
+
output += `**Rankings:**\n`;
|
|
176
|
+
output += `─────────────────────────────────────\n`;
|
|
177
|
+
for (const ranking of evaluation.rankings) {
|
|
178
|
+
const medal = ranking.rank === 1 ? '🥇' : ranking.rank === 2 ? '🥈' : ranking.rank === 3 ? '🥉' : ' ';
|
|
179
|
+
const bar = '█'.repeat(Math.floor(ranking.score / 10));
|
|
180
|
+
const emptyBar = '░'.repeat(10 - Math.floor(ranking.score / 10));
|
|
181
|
+
output += `${medal} **#${ranking.rank} ${ranking.nickname}** (${ranking.provider})\n`;
|
|
182
|
+
output += ` Score: ${bar}${emptyBar} ${ranking.score}/100\n`;
|
|
183
|
+
output += ` 💭 "${ranking.justification}"\n\n`;
|
|
184
|
+
}
|
|
185
|
+
// Criteria breakdown if available
|
|
186
|
+
if (Object.keys(evaluation.criteriaScores).length > 0) {
|
|
187
|
+
output += `**Criteria Breakdown:**\n`;
|
|
188
|
+
output += `─────────────────────────────────────\n`;
|
|
189
|
+
for (const [provider, scores] of Object.entries(evaluation.criteriaScores)) {
|
|
190
|
+
output += `📊 **${provider}:**\n`;
|
|
191
|
+
for (const [criterion, score] of Object.entries(scores)) {
|
|
192
|
+
const criterionScore = typeof score === 'number' ? score : 0;
|
|
193
|
+
output += ` • ${criterion}: ${criterionScore}/100\n`;
|
|
194
|
+
}
|
|
195
|
+
output += `\n`;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
// Summary
|
|
199
|
+
if (evaluation.summary) {
|
|
200
|
+
output += `**Summary:**\n`;
|
|
201
|
+
output += `─────────────────────────────────────\n`;
|
|
202
|
+
output += `${evaluation.summary}\n\n`;
|
|
203
|
+
}
|
|
204
|
+
output += `═══════════════════════════════════════\n`;
|
|
205
|
+
output += `📋 Evaluated ${evaluation.rankings.length} responses\n`;
|
|
206
|
+
return output;
|
|
207
|
+
}
|
|
208
|
+
//# sourceMappingURL=duck-judge.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"duck-judge.js","sourceRoot":"","sources":["../../src/tools/duck-judge.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAmB5C,MAAM,gBAAgB,GAAG,CAAC,UAAU,EAAE,cAAc,EAAE,SAAS,CAAC,CAAC;AAEjE,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,eAAgC,EAChC,IAA6B;IAE7B,MAAM,EACJ,SAAS,EACT,KAAK,EACL,QAAQ,GAAG,gBAAgB,EAC3B,OAAO,GACR,GAAG,IAAgC,CAAC;IAErC,kBAAkB;IAClB,IAAI,CAAC,SAAS,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtE,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;IAChE,CAAC;IAED,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAC;IACxE,CAAC;IAED,2BAA2B;IAC3B,MAAM,aAAa,GAAG,KAAK,IAAI,eAAe,CAAC,gBAAgB,EAAE,CAAC,CAAC,CAAC,CAAC;IACrE,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,CAAC;IACjD,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,0BAA0B,aAAa,OAAO,SAAS,CAAC,MAAM,YAAY,CAAC,CAAC;IAExF,4BAA4B;IAC5B,MAAM,MAAM,GAAG,gBAAgB,CAAC,SAAS,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IAE9D,mCAAmC;IACnC,MAAM,aAAa,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;IAE3E,qBAAqB;IACrB,MAAM,UAAU,GAAG,aAAa,CAC9B,aAAa,CAAC,OAAO,EACrB,aAAa,CAAC,QAAQ,EACtB,aAAa,CAAC,QAAQ,EACtB,SAAS,EACT,QAAQ,CACT,CAAC;IAEF,gBAAgB;IAChB,MAAM,eAAe,GAAG,iBAAiB,CAAC,UAAU,CAAC,CAAC;IAEtD,MAAM,CAAC,IAAI,CACT,yBAAyB,aAAa,WAAW,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,IAAI,SAAS,EAAE,CACjG,CAAC;IAEF,OAAO;QACL,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,eAAe;aACtB;SACF;KACF,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CACvB,SAAyB,EACzB,QAAkB,EAClB,OAAgB;IAEhB,MAAM,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEzE,MAAM,aAAa,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAC3C,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,QAAQ,MAAM,CAAC,CAAC,QAAQ,UAAU,CAAC,CAAC,OAAO,IAAI,CAC5E,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEb,MAAM,WAAW,GAAG,OAAO;QACzB,CAAC,CAAC,aAAa,OAAO,kCAAkC;QACxD,CAAC,CAAC,EAAE,CAAC;IAEP,OAAO,GAAG,WAAW,8BAA8B,SAAS,CAAC,MAAM;;;EAGnE,aAAa;;;EAGb,YAAY;;;;;;;;;;;;;;;;qBAgBO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;;;;;;;;;kCASlC,CAAC;AACnC,CAAC;AAED,SAAS,aAAa,CACpB,iBAAyB,EACzB,iBAAiC;IAEjC,MAAM,SAAS,GAAG,iBAAiB,CAAC,WAAW,EAAE,CAAC;IAElD,wBAAwB;IACxB,MAAM,UAAU,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,KAAK,SAAS,CAAC,CAAC;IACvF,IAAI,UAAU;QAAE,OAAO,UAAU,CAAC;IAElC,8DAA8D;IAC9D,MAAM,aAAa,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAC/C,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;QAC5C,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,CAC7C,CAAC;IACF,IAAI,aAAa;QAAE,OAAO,aAAa,CAAC;IAExC,2BAA2B;IAC3B,MAAM,aAAa,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAC/C,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,KAAK,SAAS,CACvC,CAAC;IACF,IAAI,aAAa;QAAE,OAAO,aAAa,CAAC;IAExC,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,aAAa,CACpB,QAAgB,EAChB,aAAqB,EACrB,aAAqB,EACrB,iBAAiC,EACjC,QAAkB;IAElB,MAAM,UAAU,GAAoB;QAClC,KAAK,EAAE,aAAa;QACpB,aAAa,EAAE,aAAa;QAC5B,MAAM,EAAE,EAAE,EAAE,qCAAqC;QACjD,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,cAAc,EAAE,EAAE;QAClB,OAAO,EAAE,EAAE;QACX,WAAW,EAAE,QAAQ;KACtB,CAAC;IAEF,IAAI,CAAC;QACH,wCAAwC;QACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAChD,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC,wCAAwC,aAAa,EAAE,CAAC,CAAC;YACrE,OAAO,wBAAwB,CAAC,UAAU,EAAE,iBAAiB,EAAE,QAAQ,CAAC,CAAC;QAC3E,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAmB,CAAC;QAC1D,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAU,CAAC;QAE3C,iBAAiB;QACjB,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YACnC,KAAK,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,OAAO,EAAE,EAAE,CAAC;gBACnD,MAAM,OAAO,GAAG,aAAa,CAAC,CAAC,CAAC,QAAQ,EAAE,iBAAiB,CAAC,CAAC;gBAC7D,IAAI,OAAO,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACvD,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;oBACvC,UAAU,CAAC,QAAQ,CAAC,IAAI,CAAC;wBACvB,QAAQ,EAAE,OAAO,CAAC,QAAQ;wBAC1B,QAAQ,EAAE,OAAO,CAAC,QAAQ;wBAC1B,IAAI,EAAE,KAAK,GAAG,CAAC;wBACf,KAAK,EAAE,OAAO,CAAC,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;wBAC5E,aAAa,EAAE,CAAC,CAAC,aAAa,EAAE,QAAQ,EAAE,IAAI,EAAE;qBACjD,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;QAED,wBAAwB;QACxB,IAAI,MAAM,CAAC,eAAe,IAAI,OAAO,MAAM,CAAC,eAAe,KAAK,QAAQ,EAAE,CAAC;YACzE,UAAU,CAAC,cAAc,GAAG,MAAM,CAAC,eAAe,CAAC;QACrD,CAAC;QAED,gBAAgB;QAChB,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACnB,UAAU,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC;QACjD,CAAC;IAEH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CAAC,sCAAsC,aAAa,GAAG,EAAE,KAAK,CAAC,CAAC;QAC3E,OAAO,wBAAwB,CAAC,UAAU,EAAE,iBAAiB,EAAE,QAAQ,CAAC,CAAC;IAC3E,CAAC;IAED,gDAAgD;IAChD,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC1E,KAAK,MAAM,IAAI,IAAI,iBAAiB,EAAE,CAAC;QACrC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;YACxC,UAAU,CAAC,QAAQ,CAAC,IAAI,CAAC;gBACvB,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC;gBACpC,KAAK,EAAE,CAAC;gBACR,aAAa,EAAE,wBAAwB;aACxC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,SAAS,wBAAwB,CAC/B,UAA2B,EAC3B,iBAAiC,EACjC,WAAmB;IAEnB,+CAA+C;IAC/C,UAAU,CAAC,QAAQ,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;QACzD,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,IAAI,EAAE,KAAK,GAAG,CAAC;QACf,KAAK,EAAE,EAAE;QACT,aAAa,EAAE,gCAAgC;KAChD,CAAC,CAAC,CAAC;IACJ,UAAU,CAAC,OAAO,GAAG,qEAAqE,CAAC;IAC3F,UAAU,CAAC,WAAW,GAAG,WAAW,CAAC;IACrC,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,SAAS,iBAAiB,CAAC,UAA2B;IACpD,IAAI,MAAM,GAAG,2BAA2B,CAAC;IACzC,MAAM,IAAI,6CAA6C,CAAC;IACxD,MAAM,IAAI,cAAc,UAAU,CAAC,aAAa,KAAK,UAAU,CAAC,KAAK,KAAK,CAAC;IAC3E,MAAM,IAAI,iBAAiB,UAAU,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;IAEhE,WAAW;IACX,MAAM,IAAI,iBAAiB,CAAC;IAC5B,MAAM,IAAI,yCAAyC,CAAC;IAEpD,KAAK,MAAM,OAAO,IAAI,UAAU,CAAC,QAAQ,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;QACvG,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC,CAAC;QACvD,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC,CAAC;QAEjE,MAAM,IAAI,GAAG,KAAK,OAAO,OAAO,CAAC,IAAI,IAAI,OAAO,CAAC,QAAQ,OAAO,OAAO,CAAC,QAAQ,KAAK,CAAC;QACtF,MAAM,IAAI,aAAa,GAAG,GAAG,QAAQ,IAAI,OAAO,CAAC,KAAK,QAAQ,CAAC;QAC/D,MAAM,IAAI,UAAU,OAAO,CAAC,aAAa,OAAO,CAAC;IACnD,CAAC;IAED,kCAAkC;IAClC,IAAI,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtD,MAAM,IAAI,2BAA2B,CAAC;QACtC,MAAM,IAAI,yCAAyC,CAAC;QAEpD,KAAK,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;YAC3E,MAAM,IAAI,QAAQ,QAAQ,OAAO,CAAC;YAClC,KAAK,MAAM,CAAC,SAAS,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;gBACxD,MAAM,cAAc,GAAG,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC7D,MAAM,IAAI,QAAQ,SAAS,KAAK,cAAc,QAAQ,CAAC;YACzD,CAAC;YACD,MAAM,IAAI,IAAI,CAAC;QACjB,CAAC;IACH,CAAC;IAED,UAAU;IACV,IAAI,UAAU,CAAC,OAAO,EAAE,CAAC;QACvB,MAAM,IAAI,gBAAgB,CAAC;QAC3B,MAAM,IAAI,yCAAyC,CAAC;QACpD,MAAM,IAAI,GAAG,UAAU,CAAC,OAAO,MAAM,CAAC;IACxC,CAAC;IAED,MAAM,IAAI,2CAA2C,CAAC;IACtD,MAAM,IAAI,gBAAgB,UAAU,CAAC,QAAQ,CAAC,MAAM,cAAc,CAAC;IAEnE,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { ProviderManager } from '../providers/manager.js';
|
|
2
|
+
export interface DuckVoteArgs {
|
|
3
|
+
question: string;
|
|
4
|
+
options: string[];
|
|
5
|
+
voters?: string[];
|
|
6
|
+
require_reasoning?: boolean;
|
|
7
|
+
}
|
|
8
|
+
export declare function duckVoteTool(providerManager: ProviderManager, args: Record<string, unknown>): Promise<{
|
|
9
|
+
content: {
|
|
10
|
+
type: string;
|
|
11
|
+
text: string;
|
|
12
|
+
}[];
|
|
13
|
+
}>;
|
|
14
|
+
//# sourceMappingURL=duck-vote.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"duck-vote.d.ts","sourceRoot":"","sources":["../../src/tools/duck-vote.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAK1D,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,iBAAiB,CAAC,EAAE,OAAO,CAAC;CAC7B;AAED,wBAAsB,YAAY,CAChC,eAAe,EAAE,eAAe,EAChC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;;;;;GAwE9B"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { ConsensusService } from '../services/consensus.js';
|
|
2
|
+
import { logger } from '../utils/logger.js';
|
|
3
|
+
export async function duckVoteTool(providerManager, args) {
|
|
4
|
+
const { question, options, voters, require_reasoning = true, } = args;
|
|
5
|
+
// Validate inputs
|
|
6
|
+
if (!question || typeof question !== 'string') {
|
|
7
|
+
throw new Error('Question is required');
|
|
8
|
+
}
|
|
9
|
+
if (!options || !Array.isArray(options) || options.length < 2) {
|
|
10
|
+
throw new Error('At least 2 options are required');
|
|
11
|
+
}
|
|
12
|
+
if (options.length > 10) {
|
|
13
|
+
throw new Error('Maximum 10 options allowed');
|
|
14
|
+
}
|
|
15
|
+
// Get voters (all providers if not specified)
|
|
16
|
+
const voterNames = voters && voters.length > 0
|
|
17
|
+
? voters
|
|
18
|
+
: providerManager.getProviderNames();
|
|
19
|
+
if (voterNames.length === 0) {
|
|
20
|
+
throw new Error('No voters available');
|
|
21
|
+
}
|
|
22
|
+
logger.info(`Starting vote with ${voterNames.length} voters on: "${question}"`);
|
|
23
|
+
const consensusService = new ConsensusService();
|
|
24
|
+
const votePrompt = consensusService.buildVotePrompt(question, options, require_reasoning);
|
|
25
|
+
// Get votes from all ducks in parallel
|
|
26
|
+
const responses = await providerManager.compareDucks(votePrompt, voterNames);
|
|
27
|
+
// Parse votes
|
|
28
|
+
const votes = responses.map(response => {
|
|
29
|
+
return consensusService.parseVote(response.content, response.provider, response.nickname, options);
|
|
30
|
+
});
|
|
31
|
+
// Aggregate results
|
|
32
|
+
const aggregatedResult = consensusService.aggregateVotes(question, options, votes);
|
|
33
|
+
// Format output
|
|
34
|
+
const formattedOutput = consensusService.formatVoteResult(aggregatedResult);
|
|
35
|
+
logger.info(`Vote completed: ${aggregatedResult.consensusLevel} consensus, ` +
|
|
36
|
+
`winner: ${aggregatedResult.winner || 'none'}`);
|
|
37
|
+
return {
|
|
38
|
+
content: [
|
|
39
|
+
{
|
|
40
|
+
type: 'text',
|
|
41
|
+
text: formattedOutput,
|
|
42
|
+
},
|
|
43
|
+
],
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
//# sourceMappingURL=duck-vote.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"duck-vote.js","sourceRoot":"","sources":["../../src/tools/duck-vote.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAE5D,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAS5C,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,eAAgC,EAChC,IAA6B;IAE7B,MAAM,EACJ,QAAQ,EACR,OAAO,EACP,MAAM,EACN,iBAAiB,GAAG,IAAI,GACzB,GAAG,IAA+B,CAAC;IAEpC,kBAAkB;IAClB,IAAI,CAAC,QAAQ,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC9C,MAAM,IAAI,KAAK,CAAC,sBAAsB,CAAC,CAAC;IAC1C,CAAC;IAED,IAAI,CAAC,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9D,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;IACrD,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;QACxB,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;IAChD,CAAC;IAED,8CAA8C;IAC9C,MAAM,UAAU,GAAG,MAAM,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAC5C,CAAC,CAAC,MAAM;QACR,CAAC,CAAC,eAAe,CAAC,gBAAgB,EAAE,CAAC;IAEvC,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC5B,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IACzC,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,sBAAsB,UAAU,CAAC,MAAM,gBAAgB,QAAQ,GAAG,CAAC,CAAC;IAEhF,MAAM,gBAAgB,GAAG,IAAI,gBAAgB,EAAE,CAAC;IAChD,MAAM,UAAU,GAAG,gBAAgB,CAAC,eAAe,CACjD,QAAQ,EACR,OAAO,EACP,iBAAiB,CAClB,CAAC;IAEF,uCAAuC;IACvC,MAAM,SAAS,GAAG,MAAM,eAAe,CAAC,YAAY,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;IAE7E,cAAc;IACd,MAAM,KAAK,GAAiB,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE;QACnD,OAAO,gBAAgB,CAAC,SAAS,CAC/B,QAAQ,CAAC,OAAO,EAChB,QAAQ,CAAC,QAAQ,EACjB,QAAQ,CAAC,QAAQ,EACjB,OAAO,CACR,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,oBAAoB;IACpB,MAAM,gBAAgB,GAAG,gBAAgB,CAAC,cAAc,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;IAEnF,gBAAgB;IAChB,MAAM,eAAe,GAAG,gBAAgB,CAAC,gBAAgB,CAAC,gBAAgB,CAAC,CAAC;IAE5E,MAAM,CAAC,IAAI,CACT,mBAAmB,gBAAgB,CAAC,cAAc,cAAc;QAChE,WAAW,gBAAgB,CAAC,MAAM,IAAI,MAAM,EAAE,CAC/C,CAAC;IAEF,OAAO;QACL,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,eAAe;aACtB;SACF;KACF,CAAC;AACJ,CAAC"}
|
package/docker-compose.yml
CHANGED
|
@@ -48,7 +48,7 @@ services:
|
|
|
48
48
|
|
|
49
49
|
# OpenAI Provider
|
|
50
50
|
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
|
51
|
-
- OPENAI_DEFAULT_MODEL=${OPENAI_DEFAULT_MODEL:-gpt-
|
|
51
|
+
- OPENAI_DEFAULT_MODEL=${OPENAI_DEFAULT_MODEL:-gpt-5.1}
|
|
52
52
|
|
|
53
53
|
# Google Gemini Provider
|
|
54
54
|
- GEMINI_API_KEY=${GEMINI_API_KEY}
|
package/package.json
CHANGED
package/src/config/config.ts
CHANGED
|
@@ -109,8 +109,8 @@ export class ConfigManager {
|
|
|
109
109
|
providers.openai = {
|
|
110
110
|
api_key: process.env.OPENAI_API_KEY,
|
|
111
111
|
base_url: 'https://api.openai.com/v1',
|
|
112
|
-
models: ['gpt-
|
|
113
|
-
default_model: process.env.OPENAI_DEFAULT_MODEL || 'gpt-
|
|
112
|
+
models: ['gpt-5.1', 'gpt-4.1', 'gpt-4o'],
|
|
113
|
+
default_model: process.env.OPENAI_DEFAULT_MODEL || 'gpt-5.1',
|
|
114
114
|
nickname: process.env.OPENAI_NICKNAME || 'GPT Duck',
|
|
115
115
|
};
|
|
116
116
|
}
|
|
@@ -120,7 +120,7 @@ export class ConfigManager {
|
|
|
120
120
|
providers.gemini = {
|
|
121
121
|
api_key: process.env.GEMINI_API_KEY,
|
|
122
122
|
base_url: 'https://generativelanguage.googleapis.com/v1beta/openai/',
|
|
123
|
-
models: ['gemini-2.5-
|
|
123
|
+
models: ['gemini-3-pro-preview', 'gemini-2.5-pro', 'gemini-2.5-flash'],
|
|
124
124
|
default_model: process.env.GEMINI_DEFAULT_MODEL || 'gemini-2.5-flash',
|
|
125
125
|
nickname: process.env.GEMINI_NICKNAME || 'Gemini Duck',
|
|
126
126
|
};
|
|
@@ -131,7 +131,7 @@ export class ConfigManager {
|
|
|
131
131
|
providers.groq = {
|
|
132
132
|
api_key: process.env.GROQ_API_KEY,
|
|
133
133
|
base_url: 'https://api.groq.com/openai/v1',
|
|
134
|
-
models: ['llama-
|
|
134
|
+
models: ['meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct', 'llama-3.3-70b-versatile'],
|
|
135
135
|
default_model: process.env.GROQ_DEFAULT_MODEL || 'llama-3.3-70b-versatile',
|
|
136
136
|
nickname: process.env.GROQ_NICKNAME || 'Groq Duck',
|
|
137
137
|
};
|
package/src/config/types.ts
CHANGED
|
@@ -86,4 +86,96 @@ export interface DuckResponse {
|
|
|
86
86
|
};
|
|
87
87
|
latency: number;
|
|
88
88
|
cached: boolean;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Consensus & Voting Types
|
|
92
|
+
export interface VoteResult {
|
|
93
|
+
voter: string;
|
|
94
|
+
nickname: string;
|
|
95
|
+
choice: string;
|
|
96
|
+
confidence: number;
|
|
97
|
+
reasoning: string;
|
|
98
|
+
rawResponse: string;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export interface AggregatedVote {
|
|
102
|
+
question: string;
|
|
103
|
+
options: string[];
|
|
104
|
+
winner: string | null;
|
|
105
|
+
isTie: boolean;
|
|
106
|
+
tally: Record<string, number>;
|
|
107
|
+
confidenceByOption: Record<string, number>;
|
|
108
|
+
votes: VoteResult[];
|
|
109
|
+
totalVoters: number;
|
|
110
|
+
validVotes: number;
|
|
111
|
+
consensusLevel: 'unanimous' | 'majority' | 'plurality' | 'split' | 'none';
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Judge Evaluation Types
|
|
115
|
+
export interface JudgeRanking {
|
|
116
|
+
provider: string;
|
|
117
|
+
nickname: string;
|
|
118
|
+
rank: number;
|
|
119
|
+
score: number;
|
|
120
|
+
justification: string;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export interface JudgeEvaluation {
|
|
124
|
+
judge: string;
|
|
125
|
+
judgeNickname: string;
|
|
126
|
+
prompt: string;
|
|
127
|
+
criteria: string[];
|
|
128
|
+
rankings: JudgeRanking[];
|
|
129
|
+
criteriaScores: Record<string, Record<string, number>>;
|
|
130
|
+
summary: string;
|
|
131
|
+
rawResponse: string;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Iteration Types
|
|
135
|
+
export interface IterationRound {
|
|
136
|
+
round: number;
|
|
137
|
+
provider: string;
|
|
138
|
+
nickname: string;
|
|
139
|
+
role: 'generator' | 'critic' | 'refiner';
|
|
140
|
+
content: string;
|
|
141
|
+
timestamp: Date;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export interface IterationResult {
|
|
145
|
+
prompt: string;
|
|
146
|
+
mode: 'refine' | 'critique-improve';
|
|
147
|
+
providers: [string, string];
|
|
148
|
+
rounds: IterationRound[];
|
|
149
|
+
finalResponse: string;
|
|
150
|
+
totalIterations: number;
|
|
151
|
+
converged: boolean;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Debate Types
|
|
155
|
+
export type DebateFormat = 'oxford' | 'socratic' | 'adversarial';
|
|
156
|
+
export type DebatePosition = 'pro' | 'con' | 'neutral';
|
|
157
|
+
|
|
158
|
+
export interface DebateParticipant {
|
|
159
|
+
provider: string;
|
|
160
|
+
nickname: string;
|
|
161
|
+
position: DebatePosition;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export interface DebateArgument {
|
|
165
|
+
round: number;
|
|
166
|
+
provider: string;
|
|
167
|
+
nickname: string;
|
|
168
|
+
position: DebatePosition;
|
|
169
|
+
content: string;
|
|
170
|
+
timestamp: Date;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
export interface DebateResult {
|
|
174
|
+
topic: string;
|
|
175
|
+
format: DebateFormat;
|
|
176
|
+
participants: DebateParticipant[];
|
|
177
|
+
rounds: DebateArgument[][];
|
|
178
|
+
synthesis: string;
|
|
179
|
+
synthesizer: string;
|
|
180
|
+
totalRounds: number;
|
|
89
181
|
}
|
package/src/server.ts
CHANGED
|
@@ -27,6 +27,10 @@ import { listDucksTool } from './tools/list-ducks.js';
|
|
|
27
27
|
import { listModelsTool } from './tools/list-models.js';
|
|
28
28
|
import { compareDucksTool } from './tools/compare-ducks.js';
|
|
29
29
|
import { duckCouncilTool } from './tools/duck-council.js';
|
|
30
|
+
import { duckVoteTool } from './tools/duck-vote.js';
|
|
31
|
+
import { duckJudgeTool } from './tools/duck-judge.js';
|
|
32
|
+
import { duckIterateTool } from './tools/duck-iterate.js';
|
|
33
|
+
import { duckDebateTool } from './tools/duck-debate.js';
|
|
30
34
|
|
|
31
35
|
// Import MCP tools
|
|
32
36
|
import { getPendingApprovalsTool } from './tools/get-pending-approvals.js';
|
|
@@ -162,6 +166,18 @@ export class RubberDuckServer {
|
|
|
162
166
|
}
|
|
163
167
|
return await duckCouncilTool(this.providerManager, args || {});
|
|
164
168
|
|
|
169
|
+
case 'duck_vote':
|
|
170
|
+
return await duckVoteTool(this.providerManager, args || {});
|
|
171
|
+
|
|
172
|
+
case 'duck_judge':
|
|
173
|
+
return await duckJudgeTool(this.providerManager, args || {});
|
|
174
|
+
|
|
175
|
+
case 'duck_iterate':
|
|
176
|
+
return await duckIterateTool(this.providerManager, args || {});
|
|
177
|
+
|
|
178
|
+
case 'duck_debate':
|
|
179
|
+
return await duckDebateTool(this.providerManager, args || {});
|
|
180
|
+
|
|
165
181
|
// MCP-specific tools
|
|
166
182
|
case 'get_pending_approvals':
|
|
167
183
|
if (!this.approvalService) {
|
|
@@ -487,6 +503,144 @@ export class RubberDuckServer {
|
|
|
487
503
|
required: ['prompt'],
|
|
488
504
|
},
|
|
489
505
|
},
|
|
506
|
+
{
|
|
507
|
+
name: 'duck_vote',
|
|
508
|
+
description: 'Have multiple ducks vote on options with reasoning. Returns vote tally, confidence scores, and consensus level.',
|
|
509
|
+
inputSchema: {
|
|
510
|
+
type: 'object',
|
|
511
|
+
properties: {
|
|
512
|
+
question: {
|
|
513
|
+
type: 'string',
|
|
514
|
+
description: 'The question to vote on (e.g., "Best approach for error handling?")',
|
|
515
|
+
},
|
|
516
|
+
options: {
|
|
517
|
+
type: 'array',
|
|
518
|
+
items: { type: 'string' },
|
|
519
|
+
minItems: 2,
|
|
520
|
+
maxItems: 10,
|
|
521
|
+
description: 'The options to vote on (2-10 options)',
|
|
522
|
+
},
|
|
523
|
+
voters: {
|
|
524
|
+
type: 'array',
|
|
525
|
+
items: { type: 'string' },
|
|
526
|
+
description: 'List of provider names to vote (optional, uses all if not specified)',
|
|
527
|
+
},
|
|
528
|
+
require_reasoning: {
|
|
529
|
+
type: 'boolean',
|
|
530
|
+
default: true,
|
|
531
|
+
description: 'Require ducks to explain their vote (default: true)',
|
|
532
|
+
},
|
|
533
|
+
},
|
|
534
|
+
required: ['question', 'options'],
|
|
535
|
+
},
|
|
536
|
+
},
|
|
537
|
+
{
|
|
538
|
+
name: 'duck_judge',
|
|
539
|
+
description: 'Have one duck evaluate and rank other ducks\' responses. Use after duck_council to get a comparative evaluation.',
|
|
540
|
+
inputSchema: {
|
|
541
|
+
type: 'object',
|
|
542
|
+
properties: {
|
|
543
|
+
responses: {
|
|
544
|
+
type: 'array',
|
|
545
|
+
items: {
|
|
546
|
+
type: 'object',
|
|
547
|
+
properties: {
|
|
548
|
+
provider: { type: 'string' },
|
|
549
|
+
nickname: { type: 'string' },
|
|
550
|
+
model: { type: 'string' },
|
|
551
|
+
content: { type: 'string' },
|
|
552
|
+
},
|
|
553
|
+
required: ['provider', 'nickname', 'content'],
|
|
554
|
+
},
|
|
555
|
+
minItems: 2,
|
|
556
|
+
description: 'Array of duck responses to evaluate (from duck_council output)',
|
|
557
|
+
},
|
|
558
|
+
judge: {
|
|
559
|
+
type: 'string',
|
|
560
|
+
description: 'Provider name of the judge duck (optional, uses first available)',
|
|
561
|
+
},
|
|
562
|
+
criteria: {
|
|
563
|
+
type: 'array',
|
|
564
|
+
items: { type: 'string' },
|
|
565
|
+
description: 'Evaluation criteria (default: ["accuracy", "completeness", "clarity"])',
|
|
566
|
+
},
|
|
567
|
+
persona: {
|
|
568
|
+
type: 'string',
|
|
569
|
+
description: 'Judge persona (e.g., "senior engineer", "security expert")',
|
|
570
|
+
},
|
|
571
|
+
},
|
|
572
|
+
required: ['responses'],
|
|
573
|
+
},
|
|
574
|
+
},
|
|
575
|
+
{
|
|
576
|
+
name: 'duck_iterate',
|
|
577
|
+
description: 'Iteratively refine a response between two ducks. One generates, the other critiques/improves, alternating for multiple rounds.',
|
|
578
|
+
inputSchema: {
|
|
579
|
+
type: 'object',
|
|
580
|
+
properties: {
|
|
581
|
+
prompt: {
|
|
582
|
+
type: 'string',
|
|
583
|
+
description: 'The initial prompt/task to iterate on',
|
|
584
|
+
},
|
|
585
|
+
iterations: {
|
|
586
|
+
type: 'number',
|
|
587
|
+
minimum: 1,
|
|
588
|
+
maximum: 10,
|
|
589
|
+
default: 3,
|
|
590
|
+
description: 'Number of iteration rounds (default: 3, max: 10)',
|
|
591
|
+
},
|
|
592
|
+
providers: {
|
|
593
|
+
type: 'array',
|
|
594
|
+
items: { type: 'string' },
|
|
595
|
+
minItems: 2,
|
|
596
|
+
maxItems: 2,
|
|
597
|
+
description: 'Exactly 2 provider names for the ping-pong iteration',
|
|
598
|
+
},
|
|
599
|
+
mode: {
|
|
600
|
+
type: 'string',
|
|
601
|
+
enum: ['refine', 'critique-improve'],
|
|
602
|
+
description: 'refine: each duck improves the previous response. critique-improve: alternates between critiquing and improving.',
|
|
603
|
+
},
|
|
604
|
+
},
|
|
605
|
+
required: ['prompt', 'providers', 'mode'],
|
|
606
|
+
},
|
|
607
|
+
},
|
|
608
|
+
{
|
|
609
|
+
name: 'duck_debate',
|
|
610
|
+
description: 'Structured multi-round debate between ducks. Supports oxford (pro/con), socratic (questioning), and adversarial (attack/defend) formats.',
|
|
611
|
+
inputSchema: {
|
|
612
|
+
type: 'object',
|
|
613
|
+
properties: {
|
|
614
|
+
prompt: {
|
|
615
|
+
type: 'string',
|
|
616
|
+
description: 'The debate topic or proposition',
|
|
617
|
+
},
|
|
618
|
+
rounds: {
|
|
619
|
+
type: 'number',
|
|
620
|
+
minimum: 1,
|
|
621
|
+
maximum: 10,
|
|
622
|
+
default: 3,
|
|
623
|
+
description: 'Number of debate rounds (default: 3)',
|
|
624
|
+
},
|
|
625
|
+
providers: {
|
|
626
|
+
type: 'array',
|
|
627
|
+
items: { type: 'string' },
|
|
628
|
+
minItems: 2,
|
|
629
|
+
description: 'Provider names to participate (min 2, uses all if not specified)',
|
|
630
|
+
},
|
|
631
|
+
format: {
|
|
632
|
+
type: 'string',
|
|
633
|
+
enum: ['oxford', 'socratic', 'adversarial'],
|
|
634
|
+
description: 'Debate format: oxford (pro/con), socratic (questioning), adversarial (attack/defend)',
|
|
635
|
+
},
|
|
636
|
+
synthesizer: {
|
|
637
|
+
type: 'string',
|
|
638
|
+
description: 'Provider to synthesize the debate (optional, uses first provider)',
|
|
639
|
+
},
|
|
640
|
+
},
|
|
641
|
+
required: ['prompt', 'format'],
|
|
642
|
+
},
|
|
643
|
+
},
|
|
490
644
|
];
|
|
491
645
|
|
|
492
646
|
// Add MCP-specific tools if enabled
|