dialectic 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cursor/commands/setup-test.mdc +175 -0
- package/.cursor/rules/basic-code-cleanup.mdc +1110 -0
- package/.cursor/rules/riper5.mdc +96 -0
- package/.env.example +6 -0
- package/AGENTS.md +1052 -0
- package/LICENSE +21 -0
- package/README.md +93 -0
- package/WARP.md +113 -0
- package/dialectic-1.0.0.tgz +0 -0
- package/dialectic.js +10 -0
- package/docs/commands.md +375 -0
- package/docs/configuration.md +882 -0
- package/docs/context_summarization.md +1023 -0
- package/docs/debate_flow.md +1127 -0
- package/docs/eval_flow.md +795 -0
- package/docs/evaluator.md +141 -0
- package/examples/debate-config-openrouter.json +48 -0
- package/examples/debate_config1.json +48 -0
- package/examples/eval/eval1/eval_config1.json +13 -0
- package/examples/eval/eval1/result1.json +62 -0
- package/examples/eval/eval1/result2.json +97 -0
- package/examples/eval_summary_format.md +11 -0
- package/examples/example3/debate-config.json +64 -0
- package/examples/example3/eval_config2.json +25 -0
- package/examples/example3/problem.md +17 -0
- package/examples/example3/rounds_test/eval_run.sh +16 -0
- package/examples/example3/rounds_test/run_test.sh +16 -0
- package/examples/kata1/architect-only-solution_2-rounds.json +121 -0
- package/examples/kata1/architect-perf-solution_2-rounds.json +234 -0
- package/examples/kata1/debate-config-kata1.json +54 -0
- package/examples/kata1/eval_architect-only_2-rounds.json +97 -0
- package/examples/kata1/eval_architect-perf_2-rounds.json +97 -0
- package/examples/kata1/kata1-report.md +12224 -0
- package/examples/kata1/kata1-report_temps-01_01_01_07.md +2451 -0
- package/examples/kata1/kata1.md +5 -0
- package/examples/kata1/meta.txt +1 -0
- package/examples/kata2/debate-config.json +54 -0
- package/examples/kata2/eval_config1.json +21 -0
- package/examples/kata2/eval_config2.json +25 -0
- package/examples/kata2/kata2.md +5 -0
- package/examples/kata2/only_architect/debate-config.json +45 -0
- package/examples/kata2/only_architect/eval_run.sh +11 -0
- package/examples/kata2/only_architect/run_test.sh +5 -0
- package/examples/kata2/rounds_test/eval_run.sh +11 -0
- package/examples/kata2/rounds_test/run_test.sh +5 -0
- package/examples/kata2/summary_length_test/eval_run.sh +11 -0
- package/examples/kata2/summary_length_test/eval_run_w_clarify.sh +7 -0
- package/examples/kata2/summary_length_test/run_test.sh +5 -0
- package/examples/task-queue/debate-config.json +76 -0
- package/examples/task-queue/debate_report.md +566 -0
- package/examples/task-queue/task-queue-system.md +25 -0
- package/jest.config.ts +13 -0
- package/multi_agent_debate_spec.md +2980 -0
- package/package.json +38 -0
- package/sanity-check-problem.txt +9 -0
- package/src/agents/prompts/architect-prompts.ts +203 -0
- package/src/agents/prompts/generalist-prompts.ts +157 -0
- package/src/agents/prompts/index.ts +41 -0
- package/src/agents/prompts/judge-prompts.ts +19 -0
- package/src/agents/prompts/kiss-prompts.ts +230 -0
- package/src/agents/prompts/performance-prompts.ts +142 -0
- package/src/agents/prompts/prompt-types.ts +68 -0
- package/src/agents/prompts/security-prompts.ts +149 -0
- package/src/agents/prompts/shared.ts +144 -0
- package/src/agents/prompts/testing-prompts.ts +149 -0
- package/src/agents/role-based-agent.ts +386 -0
- package/src/cli/commands/debate.ts +761 -0
- package/src/cli/commands/eval.ts +475 -0
- package/src/cli/commands/report.ts +265 -0
- package/src/cli/index.ts +79 -0
- package/src/core/agent.ts +198 -0
- package/src/core/clarifications.ts +34 -0
- package/src/core/judge.ts +257 -0
- package/src/core/orchestrator.ts +432 -0
- package/src/core/state-manager.ts +322 -0
- package/src/eval/evaluator-agent.ts +130 -0
- package/src/eval/prompts/system.md +41 -0
- package/src/eval/prompts/user.md +64 -0
- package/src/providers/llm-provider.ts +25 -0
- package/src/providers/openai-provider.ts +84 -0
- package/src/providers/openrouter-provider.ts +122 -0
- package/src/providers/provider-factory.ts +64 -0
- package/src/types/agent.types.ts +141 -0
- package/src/types/config.types.ts +47 -0
- package/src/types/debate.types.ts +237 -0
- package/src/types/eval.types.ts +85 -0
- package/src/utils/common.ts +104 -0
- package/src/utils/context-formatter.ts +102 -0
- package/src/utils/context-summarizer.ts +143 -0
- package/src/utils/env-loader.ts +46 -0
- package/src/utils/exit-codes.ts +5 -0
- package/src/utils/id.ts +11 -0
- package/src/utils/logger.ts +48 -0
- package/src/utils/paths.ts +10 -0
- package/src/utils/progress-ui.ts +313 -0
- package/src/utils/prompt-loader.ts +79 -0
- package/src/utils/report-generator.ts +301 -0
- package/tests/clarifications.spec.ts +128 -0
- package/tests/cli.debate.spec.ts +144 -0
- package/tests/config-loading.spec.ts +206 -0
- package/tests/context-summarizer.spec.ts +131 -0
- package/tests/debate-config-custom.json +38 -0
- package/tests/env-loader.spec.ts +149 -0
- package/tests/eval.command.spec.ts +1191 -0
- package/tests/logger.spec.ts +19 -0
- package/tests/openai-provider.spec.ts +26 -0
- package/tests/openrouter-provider.spec.ts +279 -0
- package/tests/orchestrator-summary.spec.ts +386 -0
- package/tests/orchestrator.spec.ts +207 -0
- package/tests/prompt-loader.spec.ts +52 -0
- package/tests/prompts/architect.md +16 -0
- package/tests/provider-factory.spec.ts +150 -0
- package/tests/report.command.spec.ts +546 -0
- package/tests/role-based-agent-summary.spec.ts +476 -0
- package/tests/security-agent.spec.ts +221 -0
- package/tests/shared-prompts.spec.ts +318 -0
- package/tests/state-manager.spec.ts +251 -0
- package/tests/summary-prompts.spec.ts +153 -0
- package/tsconfig.json +49 -0
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
import { AgentConfig, PromptSource } from '../types/agent.types';
|
|
2
|
+
import { DebateContext, DebateRound, Solution, DebateSummary, ContextPreparationResult, SummarizationConfig, CONTRIBUTION_TYPES } from '../types/debate.types';
|
|
3
|
+
import { LLMProvider } from '../providers/llm-provider';
|
|
4
|
+
import { ContextSummarizer, LengthBasedSummarizer } from '../utils/context-summarizer';
|
|
5
|
+
import { DEFAULT_JUDGE_SUMMARY_PROMPT } from '../agents/prompts/judge-prompts';
|
|
6
|
+
import { writeStderr } from '../cli/index';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Default system instructions for the judge when synthesizing a final solution.
|
|
10
|
+
*/
|
|
11
|
+
const DEFAULT_JUDGE_SYSTEM_PROMPT = `You are an expert technical judge responsible for synthesizing the best solution from multiple agent proposals and debates.
|
|
12
|
+
Be objective and evidence-based; combine complementary ideas; address concerns; provide recommendations and a confidence score.`;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Default temperature used by the judge when no temperature is provided on the config.
|
|
16
|
+
*/
|
|
17
|
+
const DEFAULT_JUDGE_TEMPERATURE = 0.3;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Default confidence score to return when a more sophisticated scoring mechanism is not implemented.
|
|
21
|
+
*/
|
|
22
|
+
const DEFAULT_CONFIDENCE_SCORE = 75;
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* JudgeAgent is responsible for synthesizing the best solution from the debate history.
|
|
26
|
+
*
|
|
27
|
+
* It consumes all proposals and critiques across rounds and produces a single Solution
|
|
28
|
+
* that combines the strongest ideas while acknowledging trade-offs and recommendations.
|
|
29
|
+
*/
|
|
30
|
+
export class JudgeAgent {
|
|
31
|
+
private readonly resolvedSystemPrompt: string;
|
|
32
|
+
public readonly promptSource?: PromptSource;
|
|
33
|
+
|
|
34
|
+
// Summarization-related fields
|
|
35
|
+
private readonly summarizer?: ContextSummarizer;
|
|
36
|
+
private readonly summaryConfig: SummarizationConfig;
|
|
37
|
+
public readonly summaryPromptSource?: PromptSource;
|
|
38
|
+
|
|
39
|
+
constructor(
|
|
40
|
+
private config: AgentConfig,
|
|
41
|
+
private provider: LLMProvider,
|
|
42
|
+
resolvedSystemPrompt: string,
|
|
43
|
+
promptSource: PromptSource | undefined,
|
|
44
|
+
summaryConfig: SummarizationConfig,
|
|
45
|
+
summaryPromptSource?: PromptSource
|
|
46
|
+
) {
|
|
47
|
+
this.resolvedSystemPrompt = resolvedSystemPrompt;
|
|
48
|
+
this.summaryConfig = summaryConfig;
|
|
49
|
+
|
|
50
|
+
if (promptSource !== undefined) {
|
|
51
|
+
this.promptSource = promptSource;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (summaryPromptSource !== undefined) {
|
|
55
|
+
this.summaryPromptSource = summaryPromptSource;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Initialize summarizer if summarization is enabled
|
|
59
|
+
if (summaryConfig.enabled) {
|
|
60
|
+
this.summarizer = new LengthBasedSummarizer(provider, {
|
|
61
|
+
model: this.config.model,
|
|
62
|
+
temperature: this.config.temperature ?? DEFAULT_JUDGE_TEMPERATURE,
|
|
63
|
+
provider: this.config.provider,
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Synthesizes a final Solution for the given problem using the full debate history.
|
|
70
|
+
*
|
|
71
|
+
* @param problem - The problem statement under debate.
|
|
72
|
+
* @param rounds - The debate rounds containing proposals and critiques.
|
|
73
|
+
* @param _context - Additional debate context (unused for now).
|
|
74
|
+
* @returns A synthesized Solution that includes a description and basic metadata.
|
|
75
|
+
*/
|
|
76
|
+
async synthesize(problem: string, rounds: DebateRound[], _context: DebateContext): Promise<Solution> {
|
|
77
|
+
const prompt = this.buildSynthesisPrompt(problem, rounds);
|
|
78
|
+
const systemPrompt = this.resolvedSystemPrompt;
|
|
79
|
+
const temperature = this.config.temperature ?? DEFAULT_JUDGE_TEMPERATURE;
|
|
80
|
+
|
|
81
|
+
const res = await this.provider.complete({
|
|
82
|
+
model: this.config.model,
|
|
83
|
+
temperature,
|
|
84
|
+
systemPrompt,
|
|
85
|
+
userPrompt: prompt,
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
description: res.text,
|
|
90
|
+
tradeoffs: [],
|
|
91
|
+
recommendations: [],
|
|
92
|
+
confidence: DEFAULT_CONFIDENCE_SCORE,
|
|
93
|
+
synthesizedBy: this.config.id,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Expose the default system prompt text for the judge.
|
|
99
|
+
*/
|
|
100
|
+
static defaultSystemPrompt(): string { return DEFAULT_JUDGE_SYSTEM_PROMPT; }
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Returns the default summary prompt for the judge.
|
|
104
|
+
*
|
|
105
|
+
* @param content - The content to summarize.
|
|
106
|
+
* @param maxLength - Maximum length for the summary.
|
|
107
|
+
* @returns The default summary prompt text for the judge.
|
|
108
|
+
*/
|
|
109
|
+
static defaultSummaryPrompt(content: string, maxLength: number): string {
|
|
110
|
+
return DEFAULT_JUDGE_SUMMARY_PROMPT(content, maxLength);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Determines if context summarization should occur based on configuration and final round content size.
|
|
115
|
+
*
|
|
116
|
+
* Summarization is triggered when:
|
|
117
|
+
* 1. Summarization is enabled in configuration
|
|
118
|
+
* 2. Debate rounds exist
|
|
119
|
+
* 3. Character count of final round's proposals and refinements exceeds threshold
|
|
120
|
+
*
|
|
121
|
+
* @param rounds - The debate rounds to evaluate.
|
|
122
|
+
* @returns True if summarization should occur, false otherwise.
|
|
123
|
+
*/
|
|
124
|
+
shouldSummarize(rounds: DebateRound[]): boolean {
|
|
125
|
+
|
|
126
|
+
if (!this.summaryConfig.enabled) {
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (!rounds || rounds.length === 0) {
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const finalRoundContent = this.getFinalRoundRelevantContent(rounds);
|
|
135
|
+
|
|
136
|
+
return finalRoundContent.length >= this.summaryConfig.threshold;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Extracts proposals and refinements from the final round for summarization.
|
|
141
|
+
*
|
|
142
|
+
* @param rounds - The debate rounds.
|
|
143
|
+
* @returns Concatenated text of final round's proposals and refinements.
|
|
144
|
+
*/
|
|
145
|
+
private getFinalRoundRelevantContent(rounds: DebateRound[]): string {
|
|
146
|
+
if (!rounds || rounds.length === 0) {
|
|
147
|
+
return '';
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const finalRound = rounds[rounds.length - 1];
|
|
151
|
+
if (!finalRound) {
|
|
152
|
+
return '';
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const relevantContributions: string[] = [];
|
|
156
|
+
|
|
157
|
+
for (const contribution of finalRound.contributions) {
|
|
158
|
+
if (contribution.type === CONTRIBUTION_TYPES.PROPOSAL ||
|
|
159
|
+
contribution.type === CONTRIBUTION_TYPES.REFINEMENT) {
|
|
160
|
+
relevantContributions.push(`[${contribution.agentRole}] ${contribution.type}:\n${contribution.content}`);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
return relevantContributions.join('\n\n');
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Prepares the debate context for synthesis, potentially summarizing it if needed.
|
|
169
|
+
*
|
|
170
|
+
* This method evaluates whether summarization is necessary using `shouldSummarize()`.
|
|
171
|
+
* If summarization is not needed, returns the original context unchanged.
|
|
172
|
+
* If summarization is needed, generates a concise summary from the judge's perspective
|
|
173
|
+
* and returns a new context with the summary field populated.
|
|
174
|
+
*
|
|
175
|
+
* On summarization errors, falls back to the final round's proposals and refinements.
|
|
176
|
+
*
|
|
177
|
+
* @param rounds - The debate rounds to prepare.
|
|
178
|
+
* @returns The context preparation result.
|
|
179
|
+
*/
|
|
180
|
+
async prepareContext(rounds: DebateRound[]): Promise<ContextPreparationResult> {
|
|
181
|
+
|
|
182
|
+
if (!this.shouldSummarize(rounds)) {
|
|
183
|
+
return { context: { problem: '', history: rounds } };
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
try {
|
|
188
|
+
const contentToSummarize = this.getFinalRoundRelevantContent(rounds);
|
|
189
|
+
|
|
190
|
+
if (!this.summarizer) {
|
|
191
|
+
|
|
192
|
+
writeStderr(`Warning: Judge ${this.config.name}: Summarization enabled but no summarizer available. Using final round content.\n`);
|
|
193
|
+
return { context: { problem: '', history: rounds } };
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
const summaryPrompt = DEFAULT_JUDGE_SUMMARY_PROMPT(contentToSummarize, this.summaryConfig.maxLength);
|
|
198
|
+
|
|
199
|
+
const result = await this.summarizer.summarize(
|
|
200
|
+
contentToSummarize,
|
|
201
|
+
this.config.role,
|
|
202
|
+
this.summaryConfig,
|
|
203
|
+
this.resolvedSystemPrompt,
|
|
204
|
+
summaryPrompt
|
|
205
|
+
);
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
const summary: DebateSummary = {
|
|
209
|
+
agentId: this.config.id,
|
|
210
|
+
agentRole: this.config.role,
|
|
211
|
+
summary: result.summary,
|
|
212
|
+
metadata: result.metadata,
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
return { context: { problem: '', history: rounds }, summary };
|
|
217
|
+
} catch (error: any) {
|
|
218
|
+
// Log error to stderr and fallback to final round content
|
|
219
|
+
writeStderr(
|
|
220
|
+
`Warning: Judge ${this.config.name}: Summarization failed with error: ${error.message}. Falling back to final round content.\n`
|
|
221
|
+
);
|
|
222
|
+
return { context: { problem: '', history: rounds } };
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Builds the synthesis prompt by stitching the problem and the complete debate history
|
|
228
|
+
* into a single, structured instruction for the LLM.
|
|
229
|
+
*
|
|
230
|
+
* @param problem - The problem statement.
|
|
231
|
+
* @param rounds - The debate rounds to summarize for the judge.
|
|
232
|
+
* @returns A complete user prompt string for the judge to synthesize a solution.
|
|
233
|
+
*/
|
|
234
|
+
private buildSynthesisPrompt(problem: string, rounds: DebateRound[]): string {
|
|
235
|
+
let text = `Problem: ${problem}\n\n`;
|
|
236
|
+
|
|
237
|
+
// Check if we should use summarization
|
|
238
|
+
if (this.shouldSummarize(rounds)) {
|
|
239
|
+
// Use only final round's proposals and refinements
|
|
240
|
+
const finalRoundContent = this.getFinalRoundRelevantContent(rounds);
|
|
241
|
+
if (finalRoundContent) {
|
|
242
|
+
text += `Final Round Key Contributions:\n${finalRoundContent}\n\n`;
|
|
243
|
+
}
|
|
244
|
+
} else {
|
|
245
|
+
// Use full history
|
|
246
|
+
rounds.forEach((round, idx) => {
|
|
247
|
+
text += `Round ${idx + 1}\n`;
|
|
248
|
+
for (const c of round.contributions) {
|
|
249
|
+
text += `[${c.agentRole}] ${c.type}:\n${c.content}\n\n`;
|
|
250
|
+
}
|
|
251
|
+
});
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
text += `\nSynthesize the best solution incorporating strongest ideas, addressing concerns, with clear recommendations and a confidence score.`;
|
|
255
|
+
return text;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
@@ -0,0 +1,432 @@
|
|
|
1
|
+
import { Agent } from './agent';
|
|
2
|
+
import { JudgeAgent } from './judge';
|
|
3
|
+
import { StateManager } from './state-manager';
|
|
4
|
+
import { DebateConfig, DebateContext, DebateResult, DebateState, DebateRound, Contribution, Solution, CONTRIBUTION_TYPES, ContributionType, AgentClarifications } from '../types/debate.types';
|
|
5
|
+
import { writeStderr } from '../cli/index';
|
|
6
|
+
import { AgentRole, Critique } from '../types/agent.types';
|
|
7
|
+
|
|
8
|
+
// Constants for agent activity descriptions used in progress tracking
|
|
9
|
+
const ACTIVITY_PROPOSING = 'proposing';
|
|
10
|
+
const ACTIVITY_CRITIQUING = 'critiquing';
|
|
11
|
+
const ACTIVITY_REFINING = 'refining';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Type guard to check if a Promise.allSettled result is fulfilled.
|
|
15
|
+
* @param result - The result from Promise.allSettled.
|
|
16
|
+
* @returns True if the result is fulfilled, false if rejected.
|
|
17
|
+
*/
|
|
18
|
+
function isFulfilled<T>(result: PromiseSettledResult<T>): result is PromiseFulfilledResult<T> {
|
|
19
|
+
return result.status === 'fulfilled';
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* OrchestratorHooks provides optional callbacks for receiving real-time notifications
|
|
24
|
+
* about debate progress. These hooks are intended for use by UI components, logging,
|
|
25
|
+
* or other observers that wish to track the debate's execution at a fine-grained level.
|
|
26
|
+
*
|
|
27
|
+
* All hooks are optional; implement only those needed for your use case.
|
|
28
|
+
*/
|
|
29
|
+
interface OrchestratorHooks {
|
|
30
|
+
/**
|
|
31
|
+
* Called when a phase (proposal, critique, or refinement) completes within a round.
|
|
32
|
+
* @param roundNumber - The current round number (1-indexed).
|
|
33
|
+
* @param phase - The type of phase that was completed.
|
|
34
|
+
*/
|
|
35
|
+
onPhaseComplete?: (roundNumber: number, phase: ContributionType) => void;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Called at the start of each debate round.
|
|
39
|
+
* @param roundNumber - The round number that is starting (1-indexed).
|
|
40
|
+
* @param totalRounds - The total number of rounds in the debate.
|
|
41
|
+
*/
|
|
42
|
+
onRoundStart?: (roundNumber: number, totalRounds: number) => void;
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Called at the start of a phase within a round.
|
|
46
|
+
* @param roundNumber - The current round number (1-indexed).
|
|
47
|
+
* @param phase - The type of phase that is starting.
|
|
48
|
+
* @param expectedTaskCount - The number of agent tasks expected in this phase.
|
|
49
|
+
*/
|
|
50
|
+
onPhaseStart?: (roundNumber: number, phase: ContributionType, expectedTaskCount: number) => void;
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Called when an agent begins an activity (e.g., proposing, critiquing, refining).
|
|
54
|
+
* @param agentName - The name of the agent starting the activity.
|
|
55
|
+
* @param activity - A description of the activity (e.g., "proposing").
|
|
56
|
+
*/
|
|
57
|
+
onAgentStart?: (agentName: string, activity: string) => void;
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Called when an agent completes an activity.
|
|
61
|
+
* @param agentName - The name of the agent completing the activity.
|
|
62
|
+
* @param activity - A description of the activity (e.g., "proposing").
|
|
63
|
+
*/
|
|
64
|
+
onAgentComplete?: (agentName: string, activity: string) => void;
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Called at the start of the synthesis phase (when the judge begins synthesizing a solution).
|
|
68
|
+
*/
|
|
69
|
+
onSynthesisStart?: () => void;
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Called when the synthesis phase is complete (when the judge has finished synthesizing a solution).
|
|
73
|
+
*/
|
|
74
|
+
onSynthesisComplete?: () => void;
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Called when an agent begins summarizing their context.
|
|
78
|
+
* @param agentName - The name of the agent starting summarization.
|
|
79
|
+
*/
|
|
80
|
+
onSummarizationStart?: (agentName: string) => void;
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Called when an agent completes context summarization.
|
|
84
|
+
* @param agentName - The name of the agent completing summarization.
|
|
85
|
+
* @param beforeChars - Character count before summarization.
|
|
86
|
+
* @param afterChars - Character count after summarization.
|
|
87
|
+
*/
|
|
88
|
+
onSummarizationComplete?: (agentName: string, beforeChars: number, afterChars: number) => void;
|
|
89
|
+
/**
|
|
90
|
+
* Called at the end of summarization for an agent even if no summary was produced,
|
|
91
|
+
* allowing the UI to clear any pending activity.
|
|
92
|
+
*/
|
|
93
|
+
onSummarizationEnd?: (agentName: string) => void;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* DebateOrchestrator coordinates multi-round debates between agents and a judge.
|
|
98
|
+
*
|
|
99
|
+
* Rounds and phases:
|
|
100
|
+
* - Executes N complete rounds as specified in DebateConfig.rounds
|
|
101
|
+
* - Each round runs all phases in order: proposal → critique → refinement
|
|
102
|
+
* - Proposals are fresh each round; agents may incorporate full history when includeFullHistory is true
|
|
103
|
+
*
|
|
104
|
+
* The orchestrator records contributions and metadata via the StateManager.
|
|
105
|
+
*
|
|
106
|
+
* Hooks:
|
|
107
|
+
* - Optionally accepts an onPhaseComplete callback to signal CLI after each phase
|
|
108
|
+
*
|
|
109
|
+
* @param agents - Participating agents.
|
|
110
|
+
* @param judge - The judge responsible for synthesis.
|
|
111
|
+
* @param stateManager - Persistence layer for debate state.
|
|
112
|
+
* @param config - Debate configuration and thresholds.
|
|
113
|
+
* @param hooks - Optional hooks for phase completion notifications.
|
|
114
|
+
*/
|
|
115
|
+
export class DebateOrchestrator {
|
|
116
|
+
constructor(
|
|
117
|
+
private agents: Agent[],
|
|
118
|
+
private judge: JudgeAgent,
|
|
119
|
+
private stateManager: StateManager,
|
|
120
|
+
private config: DebateConfig,
|
|
121
|
+
private hooks?: OrchestratorHooks
|
|
122
|
+
) {}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Runs the full debate workflow (proposal → critique → refinement → synthesis).
|
|
126
|
+
* Executes the specified number of rounds; each round performs all phases.
|
|
127
|
+
* Proposals are fresh each round; agents may incorporate history when enabled via config.
|
|
128
|
+
*
|
|
129
|
+
* After each phase completes, the optional onPhaseComplete hook is invoked.
|
|
130
|
+
*
|
|
131
|
+
* @param problem - The problem statement to debate.
|
|
132
|
+
* @param context - Optional additional context for agents and judge.
|
|
133
|
+
* @returns The DebateResult including final solution and metadata.
|
|
134
|
+
*/
|
|
135
|
+
async runDebate(problem: string, context?: string, clarifications?: AgentClarifications[]): Promise<DebateResult> {
|
|
136
|
+
const state = await this.stateManager.createDebate(problem, context);
|
|
137
|
+
if (clarifications && clarifications.length > 0) {
|
|
138
|
+
await this.stateManager.setClarifications(state.id, clarifications);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Execute N complete rounds: summarization -> proposal -> critique -> refinement
|
|
142
|
+
const total = Math.max(1, this.config.rounds);
|
|
143
|
+
for (let r = 1; r <= total; r++) {
|
|
144
|
+
this.hooks?.onRoundStart?.(r, total);
|
|
145
|
+
await this.stateManager.beginRound(state.id);
|
|
146
|
+
|
|
147
|
+
// Summarization phase: prepare contexts for all agents
|
|
148
|
+
const preparedContexts = await this.summarizationPhase(state, r);
|
|
149
|
+
|
|
150
|
+
await this.proposalPhase(state, r, preparedContexts);
|
|
151
|
+
this.hooks?.onPhaseComplete?.(r, CONTRIBUTION_TYPES.PROPOSAL);
|
|
152
|
+
|
|
153
|
+
await this.critiquePhase(state, r, preparedContexts);
|
|
154
|
+
this.hooks?.onPhaseComplete?.(r, CONTRIBUTION_TYPES.CRITIQUE);
|
|
155
|
+
|
|
156
|
+
await this.refinementPhase(state, r, preparedContexts);
|
|
157
|
+
this.hooks?.onPhaseComplete?.(r, CONTRIBUTION_TYPES.REFINEMENT);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
this.hooks?.onSynthesisStart?.();
|
|
161
|
+
const solution = await this.synthesisPhase(state);
|
|
162
|
+
this.hooks?.onSynthesisComplete?.();
|
|
163
|
+
await this.stateManager.completeDebate(state.id, solution);
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
debateId: state.id,
|
|
167
|
+
solution,
|
|
168
|
+
rounds: state.rounds,
|
|
169
|
+
metadata: {
|
|
170
|
+
totalRounds: state.rounds.length,
|
|
171
|
+
durationMs: Date.now() - state.createdAt.getTime(),
|
|
172
|
+
},
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Builds DebateContext for agent and judge calls.
|
|
178
|
+
* Includes full history when config.includeFullHistory is true.
|
|
179
|
+
*
|
|
180
|
+
* @param state - The current debate state.
|
|
181
|
+
* @returns Context object passed to agents and judge.
|
|
182
|
+
*/
|
|
183
|
+
private buildContext(state: DebateState): DebateContext {
|
|
184
|
+
const base: any = { problem: state.problem };
|
|
185
|
+
if (state.context !== undefined) base.context = state.context;
|
|
186
|
+
if (this.config.includeFullHistory) {
|
|
187
|
+
base.history = state.rounds;
|
|
188
|
+
}
|
|
189
|
+
base.includeFullHistory = this.config.includeFullHistory;
|
|
190
|
+
if (state.clarifications) {
|
|
191
|
+
base.clarifications = state.clarifications;
|
|
192
|
+
}
|
|
193
|
+
return base as DebateContext;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Summarization phase: Each agent prepares and potentially summarizes their context.
|
|
198
|
+
*
|
|
199
|
+
* This phase runs before the proposal phase of each round. Agents evaluate whether
|
|
200
|
+
* summarization is needed based on their configuration and history size, then generate
|
|
201
|
+
* summaries if necessary.
|
|
202
|
+
*
|
|
203
|
+
* @param state - Current debate state.
|
|
204
|
+
* @param roundNumber - Current round number for tracking.
|
|
205
|
+
* @returns A map of agent ID to prepared context for use in debate phases.
|
|
206
|
+
*/
|
|
207
|
+
private async summarizationPhase(
|
|
208
|
+
state: DebateState,
|
|
209
|
+
roundNumber: number
|
|
210
|
+
): Promise<Map<string, DebateContext>> {
|
|
211
|
+
const baseContext = this.buildContext(state);
|
|
212
|
+
const preparedContexts = new Map<string, DebateContext>();
|
|
213
|
+
|
|
214
|
+
for (const agent of this.agents) {
|
|
215
|
+
this.hooks?.onSummarizationStart?.(agent.config.name);
|
|
216
|
+
|
|
217
|
+
const result = await agent.prepareContext(baseContext, roundNumber);
|
|
218
|
+
|
|
219
|
+
if (result.summary) {
|
|
220
|
+
// Summary was created - store it and invoke completion hook
|
|
221
|
+
await this.stateManager.addSummary(state.id, result.summary);
|
|
222
|
+
this.hooks?.onSummarizationComplete?.( agent.config.name, result.summary.metadata.beforeChars, result.summary.metadata.afterChars );
|
|
223
|
+
} else {
|
|
224
|
+
// Ensure UI activity is cleared even when no summary is produced
|
|
225
|
+
this.hooks?.onSummarizationEnd?.(agent.config.name);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Store the prepared context for this agent
|
|
229
|
+
preparedContexts.set(agent.config.id, result.context);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return preparedContexts;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Builds a normalized Contribution object from an agent response.
|
|
237
|
+
* Ensures consistent metadata (latencyMs fallback and model assignment).
|
|
238
|
+
*
|
|
239
|
+
* @param agent - Source agent.
|
|
240
|
+
* @param type - Contribution type.
|
|
241
|
+
* @param content - Contribution content.
|
|
242
|
+
* @param existingMetadata - Metadata from the agent response.
|
|
243
|
+
* @param startedAtMs - Timestamp captured before calling the agent (used for latency fallback).
|
|
244
|
+
* @param targetAgentId - Optional target agent id (used for critiques).
|
|
245
|
+
* @returns Contribution ready to persist.
|
|
246
|
+
* @final
|
|
247
|
+
*/
|
|
248
|
+
private buildContribution( agent: Agent, type: Contribution['type'], content: string,
|
|
249
|
+
existingMetadata: Contribution['metadata'], startedAtMs: number, targetAgentId?: string ): Contribution
|
|
250
|
+
{
|
|
251
|
+
const agentId = agent.config.id;
|
|
252
|
+
const agentRole: AgentRole = agent.config.role;
|
|
253
|
+
|
|
254
|
+
// If the agent response omitted latencyMs, compute a wall-clock fallback to maintain
|
|
255
|
+
// consistent timing metrics across providers and implementations.
|
|
256
|
+
const metadata = {
|
|
257
|
+
...existingMetadata,
|
|
258
|
+
latencyMs: existingMetadata.latencyMs ?? (Date.now() - startedAtMs),
|
|
259
|
+
model: agent.config.model, // Always record the configured model to preserve source-of-truth for the run
|
|
260
|
+
};
|
|
261
|
+
|
|
262
|
+
const contribution: Contribution = { agentId, agentRole, type, content, metadata, };
|
|
263
|
+
|
|
264
|
+
if (targetAgentId) { // If the contribution is a critique, record the target agent id
|
|
265
|
+
contribution.targetAgentId = targetAgentId;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
return contribution;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* Invokes an agent's LLM to produce a proposal contribution for the current round.
|
|
273
|
+
*
|
|
274
|
+
* This method resolves the prepared context for the agent (if provided; otherwise falls back to a full buildContext),
|
|
275
|
+
* calls the agent's `propose` method to obtain the proposal, and wraps the result as a normalized Contribution object,
|
|
276
|
+
* including proper metadata such as tokens used, latency, and model identifier.
|
|
277
|
+
*
|
|
278
|
+
* @param agent - The agent instance generating the proposal.
|
|
279
|
+
* @param state - The current debate state, including problem and full debate context.
|
|
280
|
+
* @param preparedContexts - A map of agent IDs to their prepared context objects (possibly summarized).
|
|
281
|
+
* @param startedAtMs - The wall-clock timestamp (in ms) when the agent's proposal process started,
|
|
282
|
+
* used as a fallback for latency computation.
|
|
283
|
+
* @returns A Promise resolving to the constructed proposal Contribution, ready to be added to debate state.
|
|
284
|
+
*/
|
|
285
|
+
private async buildProposalContributionFromLLM( agent: Agent, state: DebateState, preparedContexts: Map<string, DebateContext>, startedAtMs: number ): Promise<Contribution>
|
|
286
|
+
{
|
|
287
|
+
const ctx = preparedContexts.get(agent.config.id) || this.buildContext(state);
|
|
288
|
+
const proposal = await agent.propose(state.problem, ctx);
|
|
289
|
+
return this.buildContribution( agent, CONTRIBUTION_TYPES.PROPOSAL, proposal.content, proposal.metadata, startedAtMs );
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Generates initial proposals from all agents for the given debate state.
|
|
294
|
+
* Uses a helper to unify contribution metadata handling.
|
|
295
|
+
* @param state - Current debate state.
|
|
296
|
+
* @param roundNumber - Current round number for progress tracking.
|
|
297
|
+
* @param preparedContexts - Map of agent ID to prepared (potentially summarized) context.
|
|
298
|
+
*/
|
|
299
|
+
private async proposalPhase(state: DebateState, roundNumber: number, preparedContexts: Map<string, DebateContext>) {
|
|
300
|
+
this.hooks?.onPhaseStart?.(roundNumber, CONTRIBUTION_TYPES.PROPOSAL, this.agents.length);
|
|
301
|
+
|
|
302
|
+
// Determine previous round once (if applicable). beginRound() appended the current round,
|
|
303
|
+
// so the prior round resides at length - 2 (persisted state order).
|
|
304
|
+
const prevRoundIndex = state.rounds.length - 2;
|
|
305
|
+
const prevRound: DebateRound | undefined = prevRoundIndex >= 0 ? state.rounds[prevRoundIndex] : undefined;
|
|
306
|
+
|
|
307
|
+
await Promise.all(
|
|
308
|
+
this.agents.map(async (agent) => {
|
|
309
|
+
this.hooks?.onAgentStart?.(agent.config.name, ACTIVITY_PROPOSING);
|
|
310
|
+
const started = Date.now();
|
|
311
|
+
let contribution: Contribution | undefined;
|
|
312
|
+
if (roundNumber === 1) {
|
|
313
|
+
contribution = await this.buildProposalContributionFromLLM(agent, state, preparedContexts, started);
|
|
314
|
+
}
|
|
315
|
+
else { // Rounds >= 2: carry over prior round refinements as this round's proposals; fallback to LLM if missing
|
|
316
|
+
|
|
317
|
+
const prevRefinement = (prevRound?.contributions || []).find((c) => c.type === CONTRIBUTION_TYPES.REFINEMENT && c.agentId === agent.config.id);
|
|
318
|
+
if (prevRefinement) {
|
|
319
|
+
const carryMetadata = { tokensUsed: 0, latencyMs: 0 } as Contribution['metadata'];
|
|
320
|
+
contribution = this.buildContribution( agent, CONTRIBUTION_TYPES.PROPOSAL, prevRefinement.content, carryMetadata, started );
|
|
321
|
+
}
|
|
322
|
+
else { // Fallback: warn and perform LLM proposal
|
|
323
|
+
writeStderr(`Warning: [Round ${roundNumber}] Missing previous refinement for ${agent.config.name}; falling back to LLM proposal.\n`);
|
|
324
|
+
contribution = await this.buildProposalContributionFromLLM(agent, state, preparedContexts, started);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
await this.stateManager.addContribution(state.id, contribution);
|
|
328
|
+
this.hooks?.onAgentComplete?.(agent.config.name, ACTIVITY_PROPOSING);
|
|
329
|
+
})
|
|
330
|
+
);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
/**
|
|
334
|
+
* Each agent critiques other agents' proposals from the previous round.
|
|
335
|
+
* @param state - Current debate state.
|
|
336
|
+
* @param roundNumber - Current round number for progress tracking.
|
|
337
|
+
* @param preparedContexts - Map of agent ID to prepared (potentially summarized) context.
|
|
338
|
+
*/
|
|
339
|
+
private async critiquePhase(state: DebateState, roundNumber: number, preparedContexts: Map<string, DebateContext>) {
|
|
340
|
+
// Get proposals from last round
|
|
341
|
+
const lastRound: DebateRound | undefined = state.rounds[state.rounds.length - 1];
|
|
342
|
+
const proposals = (lastRound?.contributions || []).filter((c) => c.type === CONTRIBUTION_TYPES.PROPOSAL);
|
|
343
|
+
|
|
344
|
+
// Calculate total critique tasks
|
|
345
|
+
const totalCritiques = this.agents.reduce((sum, agent) => {
|
|
346
|
+
const others = proposals.filter((p) => p.agentId !== agent.config.id);
|
|
347
|
+
return sum + others.length;
|
|
348
|
+
}, 0);
|
|
349
|
+
|
|
350
|
+
this.hooks?.onPhaseStart?.(roundNumber, CONTRIBUTION_TYPES.CRITIQUE, totalCritiques);
|
|
351
|
+
|
|
352
|
+
// Build array of async tasks for all agent-proposal pairs
|
|
353
|
+
const tasks: Array<() => Promise<Contribution>> = [];
|
|
354
|
+
|
|
355
|
+
for (const agent of this.agents) {
|
|
356
|
+
const others = proposals.filter((p) => p.agentId !== agent.config.id);
|
|
357
|
+
for (const prop of others) {
|
|
358
|
+
tasks.push(async () => {
|
|
359
|
+
const activity = `${ACTIVITY_CRITIQUING} ${prop.agentRole}`;
|
|
360
|
+
this.hooks?.onAgentStart?.(agent.config.name, activity);
|
|
361
|
+
try {
|
|
362
|
+
const started = Date.now();
|
|
363
|
+
const ctx = preparedContexts.get(agent.config.id) || this.buildContext(state);
|
|
364
|
+
const critique = await agent.critique({ content: prop.content, metadata: prop.metadata }, ctx);
|
|
365
|
+
const contribution = this.buildContribution( agent, CONTRIBUTION_TYPES.CRITIQUE, critique.content, critique.metadata, started, prop.agentId );
|
|
366
|
+
return contribution;
|
|
367
|
+
} finally {
|
|
368
|
+
this.hooks?.onAgentComplete?.(agent.config.name, activity);
|
|
369
|
+
}
|
|
370
|
+
});
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// Execute all tasks concurrently
|
|
375
|
+
const results = await Promise.allSettled(tasks.map((task) => task()));
|
|
376
|
+
const successfulContributions: Contribution[] = results.filter(isFulfilled).map((result) => result.value);
|
|
377
|
+
successfulContributions.forEach(async (contribution) => await this.stateManager.addContribution(state.id, contribution));
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* Each agent refines their own prior proposal using critiques from others.
|
|
382
|
+
* @param state - Current debate state.
|
|
383
|
+
* @param roundNumber - Current round number for progress tracking.
|
|
384
|
+
* @param preparedContexts - Map of agent ID to prepared (potentially summarized) context.
|
|
385
|
+
*/
|
|
386
|
+
private async refinementPhase(state: DebateState, roundNumber: number, preparedContexts: Map<string, DebateContext>) {
|
|
387
|
+
const prevRound: DebateRound | undefined = state.rounds[state.rounds.length - 1];
|
|
388
|
+
|
|
389
|
+
this.hooks?.onPhaseStart?.(roundNumber, CONTRIBUTION_TYPES.REFINEMENT, this.agents.length);
|
|
390
|
+
|
|
391
|
+
await Promise.all(
|
|
392
|
+
this.agents.map(async (agent) => {
|
|
393
|
+
this.hooks?.onAgentStart?.(agent.config.name, ACTIVITY_REFINING);
|
|
394
|
+
const agentId = agent.config.id;
|
|
395
|
+
const original = prevRound?.contributions.find((c) => c.type === CONTRIBUTION_TYPES.PROPOSAL && c.agentId === agentId);
|
|
396
|
+
const critiqueContributions = (prevRound?.contributions || []).filter((c) => c.type === CONTRIBUTION_TYPES.CRITIQUE && c.targetAgentId === agentId);
|
|
397
|
+
|
|
398
|
+
// Map Contribution[] to Critique[] by extracting only content and metadata
|
|
399
|
+
const critiques: Critique[] = critiqueContributions.map((c) => ({
|
|
400
|
+
content: c.content,
|
|
401
|
+
metadata: c.metadata
|
|
402
|
+
}));
|
|
403
|
+
|
|
404
|
+
const started = Date.now();
|
|
405
|
+
const ctx = preparedContexts.get(agent.config.id) || this.buildContext(state); // Use the prepared context for this agent
|
|
406
|
+
const refined = await agent.refine({ content: original?.content || '', metadata: original?.metadata || {} }, critiques, ctx);
|
|
407
|
+
const contribution = this.buildContribution( agent, CONTRIBUTION_TYPES.REFINEMENT, refined.content, refined.metadata, started );
|
|
408
|
+
await this.stateManager.addContribution(state.id, contribution);
|
|
409
|
+
this.hooks?.onAgentComplete?.(agent.config.name, ACTIVITY_REFINING);
|
|
410
|
+
})
|
|
411
|
+
);
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Invokes the judge to synthesize a final solution from all rounds.
|
|
416
|
+
* @param state - Current debate state.
|
|
417
|
+
* @returns The synthesized Solution.
|
|
418
|
+
*/
|
|
419
|
+
private async synthesisPhase(state: DebateState): Promise<Solution> {
|
|
420
|
+
// Prepare judge context with potential summarization
|
|
421
|
+
const result = await this.judge.prepareContext(state.rounds);
|
|
422
|
+
|
|
423
|
+
// Store judge summary if one was created
|
|
424
|
+
if (result.summary) {
|
|
425
|
+
await this.stateManager.addJudgeSummary(state.id, result.summary);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
const ctx = this.buildContext(state);
|
|
429
|
+
const solution = await this.judge.synthesize(state.problem, state.rounds, ctx);
|
|
430
|
+
return solution;
|
|
431
|
+
}
|
|
432
|
+
}
|