dialectic 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/.cursor/commands/setup-test.mdc +175 -0
  2. package/.cursor/rules/basic-code-cleanup.mdc +1110 -0
  3. package/.cursor/rules/riper5.mdc +96 -0
  4. package/.env.example +6 -0
  5. package/AGENTS.md +1052 -0
  6. package/LICENSE +21 -0
  7. package/README.md +93 -0
  8. package/WARP.md +113 -0
  9. package/dialectic-1.0.0.tgz +0 -0
  10. package/dialectic.js +10 -0
  11. package/docs/commands.md +375 -0
  12. package/docs/configuration.md +882 -0
  13. package/docs/context_summarization.md +1023 -0
  14. package/docs/debate_flow.md +1127 -0
  15. package/docs/eval_flow.md +795 -0
  16. package/docs/evaluator.md +141 -0
  17. package/examples/debate-config-openrouter.json +48 -0
  18. package/examples/debate_config1.json +48 -0
  19. package/examples/eval/eval1/eval_config1.json +13 -0
  20. package/examples/eval/eval1/result1.json +62 -0
  21. package/examples/eval/eval1/result2.json +97 -0
  22. package/examples/eval_summary_format.md +11 -0
  23. package/examples/example3/debate-config.json +64 -0
  24. package/examples/example3/eval_config2.json +25 -0
  25. package/examples/example3/problem.md +17 -0
  26. package/examples/example3/rounds_test/eval_run.sh +16 -0
  27. package/examples/example3/rounds_test/run_test.sh +16 -0
  28. package/examples/kata1/architect-only-solution_2-rounds.json +121 -0
  29. package/examples/kata1/architect-perf-solution_2-rounds.json +234 -0
  30. package/examples/kata1/debate-config-kata1.json +54 -0
  31. package/examples/kata1/eval_architect-only_2-rounds.json +97 -0
  32. package/examples/kata1/eval_architect-perf_2-rounds.json +97 -0
  33. package/examples/kata1/kata1-report.md +12224 -0
  34. package/examples/kata1/kata1-report_temps-01_01_01_07.md +2451 -0
  35. package/examples/kata1/kata1.md +5 -0
  36. package/examples/kata1/meta.txt +1 -0
  37. package/examples/kata2/debate-config.json +54 -0
  38. package/examples/kata2/eval_config1.json +21 -0
  39. package/examples/kata2/eval_config2.json +25 -0
  40. package/examples/kata2/kata2.md +5 -0
  41. package/examples/kata2/only_architect/debate-config.json +45 -0
  42. package/examples/kata2/only_architect/eval_run.sh +11 -0
  43. package/examples/kata2/only_architect/run_test.sh +5 -0
  44. package/examples/kata2/rounds_test/eval_run.sh +11 -0
  45. package/examples/kata2/rounds_test/run_test.sh +5 -0
  46. package/examples/kata2/summary_length_test/eval_run.sh +11 -0
  47. package/examples/kata2/summary_length_test/eval_run_w_clarify.sh +7 -0
  48. package/examples/kata2/summary_length_test/run_test.sh +5 -0
  49. package/examples/task-queue/debate-config.json +76 -0
  50. package/examples/task-queue/debate_report.md +566 -0
  51. package/examples/task-queue/task-queue-system.md +25 -0
  52. package/jest.config.ts +13 -0
  53. package/multi_agent_debate_spec.md +2980 -0
  54. package/package.json +38 -0
  55. package/sanity-check-problem.txt +9 -0
  56. package/src/agents/prompts/architect-prompts.ts +203 -0
  57. package/src/agents/prompts/generalist-prompts.ts +157 -0
  58. package/src/agents/prompts/index.ts +41 -0
  59. package/src/agents/prompts/judge-prompts.ts +19 -0
  60. package/src/agents/prompts/kiss-prompts.ts +230 -0
  61. package/src/agents/prompts/performance-prompts.ts +142 -0
  62. package/src/agents/prompts/prompt-types.ts +68 -0
  63. package/src/agents/prompts/security-prompts.ts +149 -0
  64. package/src/agents/prompts/shared.ts +144 -0
  65. package/src/agents/prompts/testing-prompts.ts +149 -0
  66. package/src/agents/role-based-agent.ts +386 -0
  67. package/src/cli/commands/debate.ts +761 -0
  68. package/src/cli/commands/eval.ts +475 -0
  69. package/src/cli/commands/report.ts +265 -0
  70. package/src/cli/index.ts +79 -0
  71. package/src/core/agent.ts +198 -0
  72. package/src/core/clarifications.ts +34 -0
  73. package/src/core/judge.ts +257 -0
  74. package/src/core/orchestrator.ts +432 -0
  75. package/src/core/state-manager.ts +322 -0
  76. package/src/eval/evaluator-agent.ts +130 -0
  77. package/src/eval/prompts/system.md +41 -0
  78. package/src/eval/prompts/user.md +64 -0
  79. package/src/providers/llm-provider.ts +25 -0
  80. package/src/providers/openai-provider.ts +84 -0
  81. package/src/providers/openrouter-provider.ts +122 -0
  82. package/src/providers/provider-factory.ts +64 -0
  83. package/src/types/agent.types.ts +141 -0
  84. package/src/types/config.types.ts +47 -0
  85. package/src/types/debate.types.ts +237 -0
  86. package/src/types/eval.types.ts +85 -0
  87. package/src/utils/common.ts +104 -0
  88. package/src/utils/context-formatter.ts +102 -0
  89. package/src/utils/context-summarizer.ts +143 -0
  90. package/src/utils/env-loader.ts +46 -0
  91. package/src/utils/exit-codes.ts +5 -0
  92. package/src/utils/id.ts +11 -0
  93. package/src/utils/logger.ts +48 -0
  94. package/src/utils/paths.ts +10 -0
  95. package/src/utils/progress-ui.ts +313 -0
  96. package/src/utils/prompt-loader.ts +79 -0
  97. package/src/utils/report-generator.ts +301 -0
  98. package/tests/clarifications.spec.ts +128 -0
  99. package/tests/cli.debate.spec.ts +144 -0
  100. package/tests/config-loading.spec.ts +206 -0
  101. package/tests/context-summarizer.spec.ts +131 -0
  102. package/tests/debate-config-custom.json +38 -0
  103. package/tests/env-loader.spec.ts +149 -0
  104. package/tests/eval.command.spec.ts +1191 -0
  105. package/tests/logger.spec.ts +19 -0
  106. package/tests/openai-provider.spec.ts +26 -0
  107. package/tests/openrouter-provider.spec.ts +279 -0
  108. package/tests/orchestrator-summary.spec.ts +386 -0
  109. package/tests/orchestrator.spec.ts +207 -0
  110. package/tests/prompt-loader.spec.ts +52 -0
  111. package/tests/prompts/architect.md +16 -0
  112. package/tests/provider-factory.spec.ts +150 -0
  113. package/tests/report.command.spec.ts +546 -0
  114. package/tests/role-based-agent-summary.spec.ts +476 -0
  115. package/tests/security-agent.spec.ts +221 -0
  116. package/tests/shared-prompts.spec.ts +318 -0
  117. package/tests/state-manager.spec.ts +251 -0
  118. package/tests/summary-prompts.spec.ts +153 -0
  119. package/tsconfig.json +49 -0
@@ -0,0 +1,19 @@
1
+ import { Logger } from '../src/utils/logger';
2
+
3
+ describe('Logger', () => {
4
+ it('prints minimal messages in non-verbose mode', () => {
5
+ const logger = new Logger(false);
6
+ const logSpy = jest.spyOn(console, 'log').mockImplementation(((() => {}) as any));
7
+ logger.info('hello');
8
+ expect(logSpy).toHaveBeenCalled();
9
+ logSpy.mockRestore();
10
+ });
11
+
12
+ it('prints additional details in verbose mode', () => {
13
+ const logger = new Logger(true);
14
+ const logSpy = jest.spyOn(console, 'log').mockImplementation(((() => {}) as any));
15
+ logger.debug('details');
16
+ expect(logSpy).toHaveBeenCalled();
17
+ logSpy.mockRestore();
18
+ });
19
+ });
@@ -0,0 +1,26 @@
1
+ import { OpenAIProvider } from '../src/providers/openai-provider';
2
+ import { LLMProvider } from '../src/providers/llm-provider';
3
+
4
+ // Mock the OpenAI SDK to avoid network calls and force fallback path
5
+ jest.mock('openai', () => {
6
+ return {
7
+ __esModule: true,
8
+ default: class OpenAIMock {
9
+ // No responses API -> forces fallback
10
+ public chat = {
11
+ completions: {
12
+ create: async (_: any) => ({ choices: [{ message: { content: 'ok' } }] }),
13
+ },
14
+ };
15
+ constructor(_opts: any) {}
16
+ },
17
+ };
18
+ });
19
+
20
+ describe('OpenAIProvider', () => {
21
+ it('falls back to chat completions when Responses API is unavailable', async () => {
22
+ const provider: LLMProvider = new OpenAIProvider('fake');
23
+ const res = await provider.complete({ model: 'gpt-4', systemPrompt: 'sys', userPrompt: 'hello', temperature: 0.5 });
24
+ expect(res.text).toBe('ok');
25
+ });
26
+ });
@@ -0,0 +1,279 @@
1
+ import { OpenRouterProvider } from '../src/providers/openrouter-provider';
2
+ import { CompletionRequest } from '../src/providers/llm-provider';
3
+
4
+ // Mock OpenAI SDK
5
+ const mockResponsesCreate = jest.fn();
6
+ const mockChatCompletionsCreate = jest.fn();
7
+
8
+ jest.mock('openai', () => {
9
+ return {
10
+ __esModule: true,
11
+ default: jest.fn().mockImplementation(() => ({
12
+ responses: {
13
+ create: mockResponsesCreate,
14
+ },
15
+ chat: {
16
+ completions: {
17
+ create: mockChatCompletionsCreate,
18
+ },
19
+ },
20
+ })),
21
+ };
22
+ });
23
+
24
+ describe('OpenRouterProvider', () => {
25
+ let provider: OpenRouterProvider;
26
+
27
+ beforeEach(() => {
28
+ provider = new OpenRouterProvider('test-api-key');
29
+
30
+ // Reset all mocks
31
+ jest.clearAllMocks();
32
+ });
33
+
34
+ afterEach(() => {
35
+ jest.clearAllMocks();
36
+ });
37
+
38
+ describe('constructor', () => {
39
+ it('should create OpenAI client with OpenRouter configuration', () => {
40
+ // Create a new provider to test constructor
41
+ new OpenRouterProvider('test-api-key');
42
+ const OpenAI = require('openai').default;
43
+ expect(OpenAI).toHaveBeenCalledWith({
44
+ apiKey: 'test-api-key',
45
+ baseURL: 'https://openrouter.ai/api/v1',
46
+ defaultHeaders: {
47
+ 'HTTP-Referer': 'dialectic',
48
+ 'X-Title': 'Dialectic - Multi-Agent Debate',
49
+ },
50
+ });
51
+ });
52
+ });
53
+
54
+ describe('complete', () => {
55
+ const mockRequest: CompletionRequest = {
56
+ model: 'openai/gpt-4',
57
+ systemPrompt: 'You are a helpful assistant.',
58
+ userPrompt: 'Hello, world!',
59
+ temperature: 0.7,
60
+ maxTokens: 100,
61
+ };
62
+
63
+ it('should use Responses API successfully', async () => {
64
+ const mockResponse = {
65
+ output_text: 'Hello! How can I help you?',
66
+ usage: {
67
+ input_tokens: 10,
68
+ output_tokens: 8,
69
+ total_tokens: 18,
70
+ },
71
+ };
72
+
73
+ mockResponsesCreate.mockResolvedValue(mockResponse);
74
+
75
+ const result = await provider.complete(mockRequest);
76
+
77
+ expect(mockResponsesCreate).toHaveBeenCalledWith({
78
+ model: 'openai/gpt-4',
79
+ temperature: 0.7,
80
+ input: [
81
+ { role: 'system', content: 'You are a helpful assistant.' },
82
+ { role: 'user', content: 'Hello, world!' },
83
+ ],
84
+ max_output_tokens: 100,
85
+ });
86
+
87
+ expect(result).toEqual({
88
+ text: 'Hello! How can I help you?',
89
+ usage: {
90
+ inputTokens: 10,
91
+ outputTokens: 8,
92
+ totalTokens: 18,
93
+ },
94
+ });
95
+ });
96
+
97
+ it('should handle Responses API with output array format', async () => {
98
+ const mockResponse = {
99
+ output: [
100
+ {
101
+ content: [
102
+ {
103
+ text: 'Hello! How can I help you?',
104
+ },
105
+ ],
106
+ },
107
+ ],
108
+ usage: {
109
+ input_tokens: 10,
110
+ output_tokens: 8,
111
+ total_tokens: 18,
112
+ },
113
+ };
114
+
115
+ mockResponsesCreate.mockResolvedValue(mockResponse);
116
+
117
+ const result = await provider.complete(mockRequest);
118
+
119
+ expect(result).toEqual({
120
+ text: 'Hello! How can I help you?',
121
+ usage: {
122
+ inputTokens: 10,
123
+ outputTokens: 8,
124
+ totalTokens: 18,
125
+ },
126
+ });
127
+ });
128
+
129
+ it('should fallback to Chat Completions API when Responses API fails', async () => {
130
+ const mockChatResponse = {
131
+ choices: [
132
+ {
133
+ message: {
134
+ content: 'Hello! How can I help you?',
135
+ },
136
+ },
137
+ ],
138
+ usage: {
139
+ prompt_tokens: 10,
140
+ completion_tokens: 8,
141
+ total_tokens: 18,
142
+ },
143
+ };
144
+
145
+ mockResponsesCreate.mockRejectedValue(new Error('Responses API not available'));
146
+ mockChatCompletionsCreate.mockResolvedValue(mockChatResponse);
147
+
148
+ const result = await provider.complete(mockRequest);
149
+
150
+ expect(mockChatCompletionsCreate).toHaveBeenCalledWith({
151
+ model: 'openai/gpt-4',
152
+ messages: [
153
+ { role: 'system', content: 'You are a helpful assistant.' },
154
+ { role: 'user', content: 'Hello, world!' },
155
+ ],
156
+ temperature: 0.7,
157
+ max_tokens: 100,
158
+ });
159
+
160
+ expect(result).toEqual({
161
+ text: 'Hello! How can I help you?',
162
+ usage: {
163
+ inputTokens: 10,
164
+ outputTokens: 8,
165
+ totalTokens: 18,
166
+ },
167
+ });
168
+ });
169
+
170
+ it('should handle requests without maxTokens', async () => {
171
+ const requestWithoutMaxTokens: CompletionRequest = {
172
+ model: mockRequest.model,
173
+ systemPrompt: mockRequest.systemPrompt,
174
+ userPrompt: mockRequest.userPrompt,
175
+ temperature: mockRequest.temperature,
176
+ // maxTokens intentionally omitted
177
+ };
178
+
179
+ const mockResponse = {
180
+ output_text: 'Hello! How can I help you?',
181
+ usage: {
182
+ input_tokens: 10,
183
+ output_tokens: 8,
184
+ total_tokens: 18,
185
+ },
186
+ };
187
+
188
+ mockResponsesCreate.mockResolvedValue(mockResponse);
189
+
190
+ await provider.complete(requestWithoutMaxTokens);
191
+
192
+ expect(mockResponsesCreate).toHaveBeenCalledWith({
193
+ model: 'openai/gpt-4',
194
+ temperature: 0.7,
195
+ input: [
196
+ { role: 'system', content: 'You are a helpful assistant.' },
197
+ { role: 'user', content: 'Hello, world!' },
198
+ ],
199
+ });
200
+ });
201
+
202
+ it('should handle requests with stopSequences', async () => {
203
+ const requestWithStopSequences = {
204
+ ...mockRequest,
205
+ stopSequences: ['\n\n', 'Human:'],
206
+ };
207
+
208
+ const mockResponse = {
209
+ output_text: 'Hello! How can I help you?',
210
+ usage: {
211
+ input_tokens: 10,
212
+ output_tokens: 8,
213
+ total_tokens: 18,
214
+ },
215
+ };
216
+
217
+ mockResponsesCreate.mockResolvedValue(mockResponse);
218
+
219
+ await provider.complete(requestWithStopSequences);
220
+
221
+ expect(mockResponsesCreate).toHaveBeenCalledWith({
222
+ model: 'openai/gpt-4',
223
+ temperature: 0.7,
224
+ input: [
225
+ { role: 'system', content: 'You are a helpful assistant.' },
226
+ { role: 'user', content: 'Hello, world!' },
227
+ ],
228
+ max_output_tokens: 100,
229
+ stop: ['\n\n', 'Human:'],
230
+ });
231
+ });
232
+
233
+ it('should handle responses without usage information', async () => {
234
+ const mockResponse = {
235
+ output_text: 'Hello! How can I help you?',
236
+ };
237
+
238
+ mockResponsesCreate.mockResolvedValue(mockResponse);
239
+
240
+ const result = await provider.complete(mockRequest);
241
+
242
+ expect(result).toEqual({
243
+ text: 'Hello! How can I help you?',
244
+ });
245
+ });
246
+
247
+ it('should handle different model names including prefixed models', async () => {
248
+ const anthropicRequest = {
249
+ ...mockRequest,
250
+ model: 'anthropic/claude-3-sonnet',
251
+ };
252
+
253
+ const mockResponse = {
254
+ output_text: 'Hello from Claude!',
255
+ usage: {
256
+ input_tokens: 10,
257
+ output_tokens: 8,
258
+ total_tokens: 18,
259
+ },
260
+ };
261
+
262
+ mockResponsesCreate.mockResolvedValue(mockResponse);
263
+
264
+ const result = await provider.complete(anthropicRequest);
265
+
266
+ expect(mockResponsesCreate).toHaveBeenCalledWith({
267
+ model: 'anthropic/claude-3-sonnet',
268
+ temperature: 0.7,
269
+ input: [
270
+ { role: 'system', content: 'You are a helpful assistant.' },
271
+ { role: 'user', content: 'Hello, world!' },
272
+ ],
273
+ max_output_tokens: 100,
274
+ });
275
+
276
+ expect(result.text).toBe('Hello from Claude!');
277
+ });
278
+ });
279
+ });
@@ -0,0 +1,386 @@
1
+ import { DebateOrchestrator } from '../src/core/orchestrator';
2
+ import { Agent } from '../src/core/agent';
3
+ import { StateManager } from '../src/core/state-manager';
4
+ import { AgentConfig, Proposal, Critique, AGENT_ROLES, LLM_PROVIDERS } from '../src/types/agent.types';
5
+ import { DebateContext, DebateConfig, DebateSummary, ContextPreparationResult, TERMINATION_TYPES, SYNTHESIS_METHODS, SUMMARIZATION_METHODS } from '../src/types/debate.types';
6
+ import fs from 'fs';
7
+ import path from 'path';
8
+ import os from 'os';
9
+
10
+ // Mock Agent that supports summarization
11
+ class MockAgent extends Agent {
12
+ private summaryToReturn?: DebateSummary;
13
+
14
+ constructor(config: AgentConfig, summaryToReturn?: DebateSummary) {
15
+ super(config, {} as any);
16
+ if (summaryToReturn) {
17
+ this.summaryToReturn = summaryToReturn;
18
+ }
19
+ }
20
+
21
+ setPreparedSummary(summary: DebateSummary) {
22
+ this.summaryToReturn = summary;
23
+ }
24
+
25
+ async propose(_problem: string, _context: DebateContext): Promise<Proposal> {
26
+ return { content: 'Mock proposal', metadata: { latencyMs: 100 } };
27
+ }
28
+
29
+ async critique(_proposal: Proposal, _context: DebateContext): Promise<Critique> {
30
+ return { content: 'Mock critique', metadata: { latencyMs: 100 } };
31
+ }
32
+
33
+ async refine(_originalProposal: Proposal, _critiques: Critique[], _context: DebateContext): Promise<Proposal> {
34
+ return { content: 'Mock refinement', metadata: { latencyMs: 100 } };
35
+ }
36
+
37
+ shouldSummarize(_context: DebateContext): boolean {
38
+ return this.summaryToReturn !== undefined;
39
+ }
40
+
41
+ async prepareContext(
42
+ context: DebateContext,
43
+ _roundNumber: number
44
+ ): Promise<ContextPreparationResult> {
45
+ if (this.summaryToReturn) {
46
+ return {
47
+ context,
48
+ summary: this.summaryToReturn
49
+ };
50
+ }
51
+ return { context };
52
+ }
53
+
54
+ // New abstract method requirement: return no clarifications by default
55
+ async askClarifyingQuestions(_problem: string, _context: DebateContext): Promise<{ questions: { id?: string; text: string }[] }> {
56
+ return { questions: [] };
57
+ }
58
+ }
59
+
60
+ // Mock Judge
61
+ class MockJudge extends Agent {
62
+ constructor() {
63
+ const config: AgentConfig = {
64
+ id: 'judge',
65
+ name: 'Judge',
66
+ role: AGENT_ROLES.GENERALIST,
67
+ model: 'gpt-4',
68
+ provider: LLM_PROVIDERS.OPENAI,
69
+ temperature: 0.3
70
+ };
71
+ super(config, {} as any);
72
+ }
73
+
74
+ async propose(_problem: string, _context: DebateContext): Promise<Proposal> {
75
+ return { content: 'Judge proposal', metadata: {} };
76
+ }
77
+
78
+ async critique(_proposal: Proposal, _context: DebateContext): Promise<Critique> {
79
+ return { content: 'Judge critique', metadata: {} };
80
+ }
81
+
82
+ async refine(_originalProposal: Proposal, _critiques: Critique[], _context: DebateContext): Promise<Proposal> {
83
+ return { content: 'Judge refinement', metadata: {} };
84
+ }
85
+
86
+ shouldSummarize(_context: DebateContext): boolean {
87
+ return false;
88
+ }
89
+
90
+ async prepareContext(context: DebateContext, _roundNumber: number): Promise<{ context: DebateContext }> {
91
+ return { context };
92
+ }
93
+
94
+ async synthesize(_context: DebateContext): Promise<any> {
95
+ return {
96
+ description: 'Final solution',
97
+ implementation: 'Implementation',
98
+ tradeoffs: [],
99
+ recommendations: [],
100
+ confidence: 90,
101
+ synthesizedBy: 'judge'
102
+ };
103
+ }
104
+
105
+ // New abstract method requirement: judge does not ask questions
106
+ async askClarifyingQuestions(_problem: string, _context: DebateContext): Promise<{ questions: { id?: string; text: string }[] }> {
107
+ return { questions: [] };
108
+ }
109
+ }
110
+
111
+ describe('DebateOrchestrator - summarizationPhase()', () => {
112
+ let tmpDir: string;
113
+
114
+ beforeEach(() => {
115
+ tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'debate-orch-'));
116
+ });
117
+
118
+ afterEach(() => {
119
+ try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
120
+ });
121
+
122
+ const config: DebateConfig = {
123
+ rounds: 1,
124
+ terminationCondition: { type: TERMINATION_TYPES.FIXED },
125
+ synthesisMethod: SYNTHESIS_METHODS.JUDGE,
126
+ includeFullHistory: true,
127
+ timeoutPerRound: 300000,
128
+ };
129
+
130
+ it('should call prepareContext for each agent', async () => {
131
+ const agent1Config: AgentConfig = {
132
+ id: 'agent-1',
133
+ name: 'Agent 1',
134
+ role: AGENT_ROLES.ARCHITECT,
135
+ model: 'gpt-4',
136
+ provider: LLM_PROVIDERS.OPENAI,
137
+ temperature: 0.5
138
+ };
139
+
140
+ const agent2Config: AgentConfig = {
141
+ id: 'agent-2',
142
+ name: 'Agent 2',
143
+ role: AGENT_ROLES.PERFORMANCE,
144
+ model: 'gpt-4',
145
+ provider: LLM_PROVIDERS.OPENAI,
146
+ temperature: 0.5
147
+ };
148
+
149
+ const agent1 = new MockAgent(agent1Config);
150
+ const agent2 = new MockAgent(agent2Config);
151
+ const judge = new MockJudge();
152
+ const stateManager = new StateManager(tmpDir);
153
+
154
+ const prepareContextSpy1 = jest.spyOn(agent1, 'prepareContext');
155
+ const prepareContextSpy2 = jest.spyOn(agent2, 'prepareContext');
156
+
157
+ const orchestrator = new DebateOrchestrator([agent1, agent2], judge as any, stateManager, config);
158
+
159
+ const state = await stateManager.createDebate('Test problem');
160
+
161
+ // Access private method via type assertion for testing
162
+ const result = await (orchestrator as any).summarizationPhase(state, 1);
163
+
164
+ expect(prepareContextSpy1).toHaveBeenCalledTimes(1);
165
+ expect(prepareContextSpy2).toHaveBeenCalledTimes(1);
166
+ expect(result).toBeInstanceOf(Map);
167
+ expect(result.size).toBe(2);
168
+ });
169
+
170
+ it('should invoke summarization hooks correctly', async () => {
171
+ const agentConfig: AgentConfig = {
172
+ id: 'agent-1',
173
+ name: 'Agent 1',
174
+ role: AGENT_ROLES.ARCHITECT,
175
+ model: 'gpt-4',
176
+ provider: LLM_PROVIDERS.OPENAI,
177
+ temperature: 0.5
178
+ };
179
+
180
+ const summary: DebateSummary = {
181
+ agentId: 'agent-1',
182
+ agentRole: AGENT_ROLES.ARCHITECT,
183
+ summary: 'Test summary',
184
+ metadata: {
185
+ beforeChars: 1000,
186
+ afterChars: 500,
187
+ method: SUMMARIZATION_METHODS.LENGTH_BASED,
188
+ timestamp: new Date(),
189
+ latencyMs: 200,
190
+ tokensUsed: 50
191
+ }
192
+ };
193
+
194
+ const agent = new MockAgent(agentConfig, summary);
195
+ const judge = new MockJudge();
196
+ const stateManager = new StateManager(tmpDir);
197
+
198
+ const onSummarizationStart = jest.fn();
199
+ const onSummarizationComplete = jest.fn();
200
+
201
+ const hooks = {
202
+ onSummarizationStart,
203
+ onSummarizationComplete
204
+ };
205
+
206
+ const orchestrator = new DebateOrchestrator([agent], judge as any, stateManager, config, hooks);
207
+
208
+ const state = await stateManager.createDebate('Test problem');
209
+ await stateManager.beginRound(state.id);
210
+
211
+ await (orchestrator as any).summarizationPhase(state, 1);
212
+
213
+ expect(onSummarizationStart).toHaveBeenCalledWith('Agent 1');
214
+ expect(onSummarizationComplete).toHaveBeenCalledWith('Agent 1', 1000, 500);
215
+ });
216
+
217
+ it('should store summaries via state manager', async () => {
218
+ const agentConfig: AgentConfig = {
219
+ id: 'agent-1',
220
+ name: 'Agent 1',
221
+ role: AGENT_ROLES.ARCHITECT,
222
+ model: 'gpt-4',
223
+ provider: LLM_PROVIDERS.OPENAI,
224
+ temperature: 0.5
225
+ };
226
+
227
+ const summary: DebateSummary = {
228
+ agentId: 'agent-1',
229
+ agentRole: AGENT_ROLES.ARCHITECT,
230
+ summary: 'Test summary',
231
+ metadata: {
232
+ beforeChars: 1000,
233
+ afterChars: 500,
234
+ method: SUMMARIZATION_METHODS.LENGTH_BASED,
235
+ timestamp: new Date()
236
+ }
237
+ };
238
+
239
+ const agent = new MockAgent(agentConfig, summary);
240
+ const judge = new MockJudge();
241
+ const stateManager = new StateManager(tmpDir);
242
+
243
+ const addSummarySpy = jest.spyOn(stateManager, 'addSummary');
244
+
245
+ const orchestrator = new DebateOrchestrator([agent], judge as any, stateManager, config);
246
+
247
+ const state = await stateManager.createDebate('Test problem');
248
+ await stateManager.beginRound(state.id);
249
+
250
+ await (orchestrator as any).summarizationPhase(state, 1);
251
+
252
+ expect(addSummarySpy).toHaveBeenCalledWith(state.id, summary);
253
+ });
254
+
255
+ it('should return prepared contexts map', async () => {
256
+ const agent1Config: AgentConfig = {
257
+ id: 'agent-1',
258
+ name: 'Agent 1',
259
+ role: AGENT_ROLES.ARCHITECT,
260
+ model: 'gpt-4',
261
+ provider: LLM_PROVIDERS.OPENAI,
262
+ temperature: 0.5
263
+ };
264
+
265
+ const agent2Config: AgentConfig = {
266
+ id: 'agent-2',
267
+ name: 'Agent 2',
268
+ role: AGENT_ROLES.PERFORMANCE,
269
+ model: 'gpt-4',
270
+ provider: LLM_PROVIDERS.OPENAI,
271
+ temperature: 0.5
272
+ };
273
+
274
+ const agent1 = new MockAgent(agent1Config);
275
+ const agent2 = new MockAgent(agent2Config);
276
+ const judge = new MockJudge();
277
+ const stateManager = new StateManager(tmpDir);
278
+
279
+ const orchestrator = new DebateOrchestrator([agent1, agent2], judge as any, stateManager, config);
280
+
281
+ const state = await stateManager.createDebate('Test problem');
282
+
283
+ const result = await (orchestrator as any).summarizationPhase(state, 1);
284
+
285
+ expect(result.get('agent-1')).toBeDefined();
286
+ expect(result.get('agent-2')).toBeDefined();
287
+ expect(result.get('agent-1').problem).toBe('Test problem');
288
+ });
289
+ });
290
+
291
+ describe('DebateOrchestrator - runDebate integration with summarization', () => {
292
+ let tmpDir: string;
293
+
294
+ beforeEach(() => {
295
+ tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'debate-orch-int-'));
296
+ });
297
+
298
+ afterEach(() => {
299
+ try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
300
+ });
301
+
302
+ it('should call summarization phase before proposal phase', async () => {
303
+ const agentConfig: AgentConfig = {
304
+ id: 'agent-1',
305
+ name: 'Agent 1',
306
+ role: AGENT_ROLES.ARCHITECT,
307
+ model: 'gpt-4',
308
+ provider: LLM_PROVIDERS.OPENAI,
309
+ temperature: 0.5
310
+ };
311
+
312
+ const agent = new MockAgent(agentConfig);
313
+ const judge = new MockJudge();
314
+ const stateManager = new StateManager(tmpDir);
315
+
316
+ const config: DebateConfig = {
317
+ rounds: 1,
318
+ terminationCondition: { type: TERMINATION_TYPES.FIXED },
319
+ synthesisMethod: SYNTHESIS_METHODS.JUDGE,
320
+ includeFullHistory: true,
321
+ timeoutPerRound: 300000,
322
+ };
323
+
324
+ const prepareContextSpy = jest.spyOn(agent, 'prepareContext');
325
+
326
+ const orchestrator = new DebateOrchestrator([agent], judge as any, stateManager, config);
327
+
328
+ await orchestrator.runDebate('Test problem');
329
+
330
+ // Should be called once per round
331
+ expect(prepareContextSpy).toHaveBeenCalled();
332
+ });
333
+
334
+ it('should persist summaries in debate state', async () => {
335
+ const agentId = 'agent-1';
336
+ const agentConfig: AgentConfig = {
337
+ id: agentId,
338
+ name: 'Agent 1',
339
+ role: AGENT_ROLES.ARCHITECT,
340
+ model: 'gpt-4',
341
+ provider: LLM_PROVIDERS.OPENAI,
342
+ temperature: 0.5
343
+ };
344
+
345
+ const summary: DebateSummary = {
346
+ agentId,
347
+ agentRole: AGENT_ROLES.ARCHITECT,
348
+ summary: 'Persisted summary',
349
+ metadata: {
350
+ beforeChars: 2000,
351
+ afterChars: 1000,
352
+ method: SUMMARIZATION_METHODS.LENGTH_BASED,
353
+ timestamp: new Date()
354
+ }
355
+ };
356
+
357
+ const agent = new MockAgent(agentConfig, summary);
358
+ const judge = new MockJudge();
359
+ const stateManager = new StateManager(tmpDir);
360
+
361
+ const config: DebateConfig = {
362
+ rounds: 1,
363
+ terminationCondition: { type: TERMINATION_TYPES.FIXED },
364
+ synthesisMethod: SYNTHESIS_METHODS.JUDGE,
365
+ includeFullHistory: true,
366
+ timeoutPerRound: 300000,
367
+ };
368
+
369
+ const orchestrator = new DebateOrchestrator([agent], judge as any, stateManager, config);
370
+
371
+ const result = await orchestrator.runDebate('Test problem');
372
+
373
+ const debate = await stateManager.getDebate(result.debateId);
374
+ expect(debate).toBeDefined();
375
+ const round = debate!.rounds[0];
376
+ expect(round).toBeDefined();
377
+ expect(round!.summaries).toBeDefined();
378
+ expect(Object.keys(round!.summaries!).length).toBe(1);
379
+ const summaries = round!.summaries;
380
+ if (summaries) {
381
+ expect(summaries[agentId]).toBeDefined();
382
+ expect(summaries[agentId]!.summary).toBe('Persisted summary');
383
+ }
384
+ });
385
+ });
386
+