dialectic 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cursor/commands/setup-test.mdc +175 -0
- package/.cursor/rules/basic-code-cleanup.mdc +1110 -0
- package/.cursor/rules/riper5.mdc +96 -0
- package/.env.example +6 -0
- package/AGENTS.md +1052 -0
- package/LICENSE +21 -0
- package/README.md +93 -0
- package/WARP.md +113 -0
- package/dialectic-1.0.0.tgz +0 -0
- package/dialectic.js +10 -0
- package/docs/commands.md +375 -0
- package/docs/configuration.md +882 -0
- package/docs/context_summarization.md +1023 -0
- package/docs/debate_flow.md +1127 -0
- package/docs/eval_flow.md +795 -0
- package/docs/evaluator.md +141 -0
- package/examples/debate-config-openrouter.json +48 -0
- package/examples/debate_config1.json +48 -0
- package/examples/eval/eval1/eval_config1.json +13 -0
- package/examples/eval/eval1/result1.json +62 -0
- package/examples/eval/eval1/result2.json +97 -0
- package/examples/eval_summary_format.md +11 -0
- package/examples/example3/debate-config.json +64 -0
- package/examples/example3/eval_config2.json +25 -0
- package/examples/example3/problem.md +17 -0
- package/examples/example3/rounds_test/eval_run.sh +16 -0
- package/examples/example3/rounds_test/run_test.sh +16 -0
- package/examples/kata1/architect-only-solution_2-rounds.json +121 -0
- package/examples/kata1/architect-perf-solution_2-rounds.json +234 -0
- package/examples/kata1/debate-config-kata1.json +54 -0
- package/examples/kata1/eval_architect-only_2-rounds.json +97 -0
- package/examples/kata1/eval_architect-perf_2-rounds.json +97 -0
- package/examples/kata1/kata1-report.md +12224 -0
- package/examples/kata1/kata1-report_temps-01_01_01_07.md +2451 -0
- package/examples/kata1/kata1.md +5 -0
- package/examples/kata1/meta.txt +1 -0
- package/examples/kata2/debate-config.json +54 -0
- package/examples/kata2/eval_config1.json +21 -0
- package/examples/kata2/eval_config2.json +25 -0
- package/examples/kata2/kata2.md +5 -0
- package/examples/kata2/only_architect/debate-config.json +45 -0
- package/examples/kata2/only_architect/eval_run.sh +11 -0
- package/examples/kata2/only_architect/run_test.sh +5 -0
- package/examples/kata2/rounds_test/eval_run.sh +11 -0
- package/examples/kata2/rounds_test/run_test.sh +5 -0
- package/examples/kata2/summary_length_test/eval_run.sh +11 -0
- package/examples/kata2/summary_length_test/eval_run_w_clarify.sh +7 -0
- package/examples/kata2/summary_length_test/run_test.sh +5 -0
- package/examples/task-queue/debate-config.json +76 -0
- package/examples/task-queue/debate_report.md +566 -0
- package/examples/task-queue/task-queue-system.md +25 -0
- package/jest.config.ts +13 -0
- package/multi_agent_debate_spec.md +2980 -0
- package/package.json +38 -0
- package/sanity-check-problem.txt +9 -0
- package/src/agents/prompts/architect-prompts.ts +203 -0
- package/src/agents/prompts/generalist-prompts.ts +157 -0
- package/src/agents/prompts/index.ts +41 -0
- package/src/agents/prompts/judge-prompts.ts +19 -0
- package/src/agents/prompts/kiss-prompts.ts +230 -0
- package/src/agents/prompts/performance-prompts.ts +142 -0
- package/src/agents/prompts/prompt-types.ts +68 -0
- package/src/agents/prompts/security-prompts.ts +149 -0
- package/src/agents/prompts/shared.ts +144 -0
- package/src/agents/prompts/testing-prompts.ts +149 -0
- package/src/agents/role-based-agent.ts +386 -0
- package/src/cli/commands/debate.ts +761 -0
- package/src/cli/commands/eval.ts +475 -0
- package/src/cli/commands/report.ts +265 -0
- package/src/cli/index.ts +79 -0
- package/src/core/agent.ts +198 -0
- package/src/core/clarifications.ts +34 -0
- package/src/core/judge.ts +257 -0
- package/src/core/orchestrator.ts +432 -0
- package/src/core/state-manager.ts +322 -0
- package/src/eval/evaluator-agent.ts +130 -0
- package/src/eval/prompts/system.md +41 -0
- package/src/eval/prompts/user.md +64 -0
- package/src/providers/llm-provider.ts +25 -0
- package/src/providers/openai-provider.ts +84 -0
- package/src/providers/openrouter-provider.ts +122 -0
- package/src/providers/provider-factory.ts +64 -0
- package/src/types/agent.types.ts +141 -0
- package/src/types/config.types.ts +47 -0
- package/src/types/debate.types.ts +237 -0
- package/src/types/eval.types.ts +85 -0
- package/src/utils/common.ts +104 -0
- package/src/utils/context-formatter.ts +102 -0
- package/src/utils/context-summarizer.ts +143 -0
- package/src/utils/env-loader.ts +46 -0
- package/src/utils/exit-codes.ts +5 -0
- package/src/utils/id.ts +11 -0
- package/src/utils/logger.ts +48 -0
- package/src/utils/paths.ts +10 -0
- package/src/utils/progress-ui.ts +313 -0
- package/src/utils/prompt-loader.ts +79 -0
- package/src/utils/report-generator.ts +301 -0
- package/tests/clarifications.spec.ts +128 -0
- package/tests/cli.debate.spec.ts +144 -0
- package/tests/config-loading.spec.ts +206 -0
- package/tests/context-summarizer.spec.ts +131 -0
- package/tests/debate-config-custom.json +38 -0
- package/tests/env-loader.spec.ts +149 -0
- package/tests/eval.command.spec.ts +1191 -0
- package/tests/logger.spec.ts +19 -0
- package/tests/openai-provider.spec.ts +26 -0
- package/tests/openrouter-provider.spec.ts +279 -0
- package/tests/orchestrator-summary.spec.ts +386 -0
- package/tests/orchestrator.spec.ts +207 -0
- package/tests/prompt-loader.spec.ts +52 -0
- package/tests/prompts/architect.md +16 -0
- package/tests/provider-factory.spec.ts +150 -0
- package/tests/report.command.spec.ts +546 -0
- package/tests/role-based-agent-summary.spec.ts +476 -0
- package/tests/security-agent.spec.ts +221 -0
- package/tests/shared-prompts.spec.ts +318 -0
- package/tests/state-manager.spec.ts +251 -0
- package/tests/summary-prompts.spec.ts +153 -0
- package/tsconfig.json +49 -0
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
import { RoleBasedAgent } from '../src/agents/role-based-agent';
|
|
2
|
+
import { LLMProvider } from '../src/providers/llm-provider';
|
|
3
|
+
import { AgentConfig, AGENT_ROLES, LLM_PROVIDERS } from '../src/types/agent.types';
|
|
4
|
+
import { DebateContext, SummarizationConfig, CONTRIBUTION_TYPES, SUMMARIZATION_METHODS } from '../src/types/debate.types';
|
|
5
|
+
|
|
6
|
+
// Mock LLM Provider
|
|
7
|
+
class MockLLMProvider implements LLMProvider {
|
|
8
|
+
private responses: string[] = [];
|
|
9
|
+
private currentIndex = 0;
|
|
10
|
+
|
|
11
|
+
constructor(responses: string[] = ['Mock summary']) {
|
|
12
|
+
this.responses = responses;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
async complete(_request: any): Promise<any> {
|
|
16
|
+
const response = this.responses[this.currentIndex % this.responses.length];
|
|
17
|
+
this.currentIndex++;
|
|
18
|
+
return {
|
|
19
|
+
text: response,
|
|
20
|
+
usage: { totalTokens: 50 }
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
describe('RoleBasedAgent - shouldSummarize()', () => {
|
|
26
|
+
const agentConfig: AgentConfig = {
|
|
27
|
+
id: 'test-agent',
|
|
28
|
+
name: 'Test Agent',
|
|
29
|
+
role: AGENT_ROLES.ARCHITECT,
|
|
30
|
+
model: 'gpt-4',
|
|
31
|
+
provider: LLM_PROVIDERS.OPENAI,
|
|
32
|
+
temperature: 0.5,
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
const summaryConfig: SummarizationConfig = {
|
|
36
|
+
enabled: true,
|
|
37
|
+
threshold: 100, // Low threshold for testing
|
|
38
|
+
maxLength: 50,
|
|
39
|
+
method: SUMMARIZATION_METHODS.LENGTH_BASED,
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
it('should return false when summarization is disabled', () => {
|
|
43
|
+
const provider = new MockLLMProvider();
|
|
44
|
+
const disabledConfig: SummarizationConfig = { ...summaryConfig, enabled: false };
|
|
45
|
+
const agent = RoleBasedAgent.create(
|
|
46
|
+
agentConfig,
|
|
47
|
+
provider,
|
|
48
|
+
'System prompt',
|
|
49
|
+
undefined,
|
|
50
|
+
disabledConfig,
|
|
51
|
+
undefined
|
|
52
|
+
);
|
|
53
|
+
|
|
54
|
+
const context: DebateContext = {
|
|
55
|
+
problem: 'Test problem',
|
|
56
|
+
history: [],
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
expect(agent.shouldSummarize(context)).toBe(false);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it('should return false when history is empty', () => {
|
|
63
|
+
const provider = new MockLLMProvider();
|
|
64
|
+
const agent = RoleBasedAgent.create(
|
|
65
|
+
agentConfig,
|
|
66
|
+
provider,
|
|
67
|
+
'System prompt',
|
|
68
|
+
undefined,
|
|
69
|
+
summaryConfig,
|
|
70
|
+
undefined
|
|
71
|
+
);
|
|
72
|
+
|
|
73
|
+
const context: DebateContext = {
|
|
74
|
+
problem: 'Test problem',
|
|
75
|
+
history: [],
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
expect(agent.shouldSummarize(context)).toBe(false);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('should return false when below threshold', () => {
|
|
82
|
+
const provider = new MockLLMProvider();
|
|
83
|
+
const agent = RoleBasedAgent.create(
|
|
84
|
+
agentConfig,
|
|
85
|
+
provider,
|
|
86
|
+
'System prompt',
|
|
87
|
+
undefined,
|
|
88
|
+
summaryConfig,
|
|
89
|
+
undefined
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
const context: DebateContext = {
|
|
93
|
+
problem: 'Test problem',
|
|
94
|
+
history: [{
|
|
95
|
+
roundNumber: 1,
|
|
96
|
+
timestamp: new Date(),
|
|
97
|
+
contributions: [
|
|
98
|
+
{
|
|
99
|
+
agentId: 'test-agent',
|
|
100
|
+
agentRole: AGENT_ROLES.ARCHITECT,
|
|
101
|
+
type: CONTRIBUTION_TYPES.PROPOSAL,
|
|
102
|
+
content: 'Short proposal', // Less than 100 chars
|
|
103
|
+
metadata: {}
|
|
104
|
+
}
|
|
105
|
+
]
|
|
106
|
+
}]
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
expect(agent.shouldSummarize(context)).toBe(false);
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
it('should return true when above threshold', () => {
|
|
113
|
+
const provider = new MockLLMProvider();
|
|
114
|
+
const agent = RoleBasedAgent.create(
|
|
115
|
+
agentConfig,
|
|
116
|
+
provider,
|
|
117
|
+
'System prompt',
|
|
118
|
+
undefined,
|
|
119
|
+
summaryConfig,
|
|
120
|
+
undefined
|
|
121
|
+
);
|
|
122
|
+
|
|
123
|
+
const longContent = 'a'.repeat(150); // Above 100 char threshold
|
|
124
|
+
|
|
125
|
+
const context: DebateContext = {
|
|
126
|
+
problem: 'Test problem',
|
|
127
|
+
history: [{
|
|
128
|
+
roundNumber: 1,
|
|
129
|
+
timestamp: new Date(),
|
|
130
|
+
contributions: [
|
|
131
|
+
{
|
|
132
|
+
agentId: 'test-agent',
|
|
133
|
+
agentRole: AGENT_ROLES.ARCHITECT,
|
|
134
|
+
type: CONTRIBUTION_TYPES.PROPOSAL,
|
|
135
|
+
content: longContent,
|
|
136
|
+
metadata: {}
|
|
137
|
+
}
|
|
138
|
+
]
|
|
139
|
+
}]
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
expect(agent.shouldSummarize(context)).toBe(true);
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
it('should correctly calculate character count from agent perspective', () => {
|
|
146
|
+
const provider = new MockLLMProvider();
|
|
147
|
+
const agent = RoleBasedAgent.create(
|
|
148
|
+
agentConfig,
|
|
149
|
+
provider,
|
|
150
|
+
'System prompt',
|
|
151
|
+
undefined,
|
|
152
|
+
summaryConfig,
|
|
153
|
+
undefined
|
|
154
|
+
);
|
|
155
|
+
|
|
156
|
+
const context: DebateContext = {
|
|
157
|
+
problem: 'Test problem',
|
|
158
|
+
history: [{
|
|
159
|
+
roundNumber: 1,
|
|
160
|
+
timestamp: new Date(),
|
|
161
|
+
contributions: [
|
|
162
|
+
// Agent's proposal (should count)
|
|
163
|
+
{
|
|
164
|
+
agentId: 'test-agent',
|
|
165
|
+
agentRole: AGENT_ROLES.ARCHITECT,
|
|
166
|
+
type: CONTRIBUTION_TYPES.PROPOSAL,
|
|
167
|
+
content: 'a'.repeat(60),
|
|
168
|
+
metadata: {}
|
|
169
|
+
},
|
|
170
|
+
// Critique received by agent (should NOT count - critiques excluded)
|
|
171
|
+
{
|
|
172
|
+
agentId: 'other-agent',
|
|
173
|
+
agentRole: AGENT_ROLES.PERFORMANCE,
|
|
174
|
+
type: CONTRIBUTION_TYPES.CRITIQUE,
|
|
175
|
+
content: 'b'.repeat(40),
|
|
176
|
+
targetAgentId: 'test-agent',
|
|
177
|
+
metadata: {}
|
|
178
|
+
},
|
|
179
|
+
// Agent's refinement (should count)
|
|
180
|
+
{
|
|
181
|
+
agentId: 'test-agent',
|
|
182
|
+
agentRole: AGENT_ROLES.ARCHITECT,
|
|
183
|
+
type: CONTRIBUTION_TYPES.REFINEMENT,
|
|
184
|
+
content: 'c'.repeat(60),
|
|
185
|
+
metadata: {}
|
|
186
|
+
},
|
|
187
|
+
// Critique of another agent (should NOT count)
|
|
188
|
+
{
|
|
189
|
+
agentId: 'other-agent',
|
|
190
|
+
agentRole: AGENT_ROLES.PERFORMANCE,
|
|
191
|
+
type: CONTRIBUTION_TYPES.CRITIQUE,
|
|
192
|
+
content: 'd'.repeat(200), // Large but shouldn't count
|
|
193
|
+
targetAgentId: 'different-agent',
|
|
194
|
+
metadata: {}
|
|
195
|
+
}
|
|
196
|
+
]
|
|
197
|
+
}]
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
// Total should be 60 + 60 = 120 (only proposals and refinements), above threshold of 100
|
|
201
|
+
expect(agent.shouldSummarize(context)).toBe(true);
|
|
202
|
+
});
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
describe('RoleBasedAgent - prepareContext()', () => {
|
|
206
|
+
const agentConfig: AgentConfig = {
|
|
207
|
+
id: 'test-agent',
|
|
208
|
+
name: 'Test Agent',
|
|
209
|
+
role: AGENT_ROLES.ARCHITECT,
|
|
210
|
+
model: 'gpt-4',
|
|
211
|
+
provider: LLM_PROVIDERS.OPENAI,
|
|
212
|
+
temperature: 0.5,
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
const summaryConfig: SummarizationConfig = {
|
|
216
|
+
enabled: true,
|
|
217
|
+
threshold: 100,
|
|
218
|
+
maxLength: 200,
|
|
219
|
+
method: SUMMARIZATION_METHODS.LENGTH_BASED,
|
|
220
|
+
};
|
|
221
|
+
|
|
222
|
+
it('should return original context when summarization disabled', async () => {
|
|
223
|
+
const provider = new MockLLMProvider();
|
|
224
|
+
const disabledConfig: SummarizationConfig = { ...summaryConfig, enabled: false };
|
|
225
|
+
const agent = RoleBasedAgent.create(
|
|
226
|
+
agentConfig,
|
|
227
|
+
provider,
|
|
228
|
+
'System prompt',
|
|
229
|
+
undefined,
|
|
230
|
+
disabledConfig,
|
|
231
|
+
undefined
|
|
232
|
+
);
|
|
233
|
+
|
|
234
|
+
const context: DebateContext = {
|
|
235
|
+
problem: 'Test problem',
|
|
236
|
+
history: []
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
const result = await agent.prepareContext(context, 1);
|
|
240
|
+
|
|
241
|
+
expect(result.context).toEqual(context);
|
|
242
|
+
expect(result.summary).toBeUndefined();
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
it('should return original context when below threshold', async () => {
|
|
246
|
+
const provider = new MockLLMProvider();
|
|
247
|
+
const agent = RoleBasedAgent.create(
|
|
248
|
+
agentConfig,
|
|
249
|
+
provider,
|
|
250
|
+
'System prompt',
|
|
251
|
+
undefined,
|
|
252
|
+
summaryConfig,
|
|
253
|
+
undefined
|
|
254
|
+
);
|
|
255
|
+
|
|
256
|
+
const context: DebateContext = {
|
|
257
|
+
problem: 'Test problem',
|
|
258
|
+
history: [{
|
|
259
|
+
roundNumber: 1,
|
|
260
|
+
timestamp: new Date(),
|
|
261
|
+
contributions: [{
|
|
262
|
+
agentId: 'test-agent',
|
|
263
|
+
agentRole: AGENT_ROLES.ARCHITECT,
|
|
264
|
+
type: CONTRIBUTION_TYPES.PROPOSAL,
|
|
265
|
+
content: 'Short',
|
|
266
|
+
metadata: {}
|
|
267
|
+
}]
|
|
268
|
+
}]
|
|
269
|
+
};
|
|
270
|
+
|
|
271
|
+
const result = await agent.prepareContext(context, 1);
|
|
272
|
+
|
|
273
|
+
expect(result.context).toEqual(context);
|
|
274
|
+
expect(result.summary).toBeUndefined();
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
it('should return context with summary when above threshold', async () => {
|
|
278
|
+
const provider = new MockLLMProvider(['Generated summary text']);
|
|
279
|
+
const agent = RoleBasedAgent.create(
|
|
280
|
+
agentConfig,
|
|
281
|
+
provider,
|
|
282
|
+
'System prompt',
|
|
283
|
+
undefined,
|
|
284
|
+
summaryConfig,
|
|
285
|
+
undefined
|
|
286
|
+
);
|
|
287
|
+
|
|
288
|
+
const longContent = 'a'.repeat(150);
|
|
289
|
+
const context: DebateContext = {
|
|
290
|
+
problem: 'Test problem',
|
|
291
|
+
history: [{
|
|
292
|
+
roundNumber: 1,
|
|
293
|
+
timestamp: new Date(),
|
|
294
|
+
contributions: [{
|
|
295
|
+
agentId: 'test-agent',
|
|
296
|
+
agentRole: AGENT_ROLES.ARCHITECT,
|
|
297
|
+
type: CONTRIBUTION_TYPES.PROPOSAL,
|
|
298
|
+
content: longContent,
|
|
299
|
+
metadata: {}
|
|
300
|
+
}]
|
|
301
|
+
}]
|
|
302
|
+
};
|
|
303
|
+
|
|
304
|
+
const result = await agent.prepareContext(context, 1);
|
|
305
|
+
|
|
306
|
+
// Summary is no longer stored in context, but returned separately
|
|
307
|
+
expect(result.context).toBeDefined();
|
|
308
|
+
expect(result.summary).toBeDefined();
|
|
309
|
+
expect(result.summary?.agentId).toBe('test-agent');
|
|
310
|
+
expect(result.summary?.agentRole).toBe(AGENT_ROLES.ARCHITECT);
|
|
311
|
+
expect(result.summary?.summary).toBe('Generated summary text');
|
|
312
|
+
expect(result.summary?.metadata.model).toBe(agentConfig.model);
|
|
313
|
+
expect(result.summary?.metadata.temperature).toBe(agentConfig.temperature);
|
|
314
|
+
expect(result.summary?.metadata.provider).toBe(agentConfig.provider);
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
it('should filter history to agent perspective', async () => {
|
|
318
|
+
const provider = new MockLLMProvider(['Summary']);
|
|
319
|
+
const agent = RoleBasedAgent.create(
|
|
320
|
+
agentConfig,
|
|
321
|
+
provider,
|
|
322
|
+
'System prompt',
|
|
323
|
+
undefined,
|
|
324
|
+
summaryConfig,
|
|
325
|
+
undefined
|
|
326
|
+
);
|
|
327
|
+
|
|
328
|
+
const context: DebateContext = {
|
|
329
|
+
problem: 'Test problem',
|
|
330
|
+
history: [{
|
|
331
|
+
roundNumber: 1,
|
|
332
|
+
timestamp: new Date(),
|
|
333
|
+
contributions: [
|
|
334
|
+
{
|
|
335
|
+
agentId: 'test-agent',
|
|
336
|
+
agentRole: AGENT_ROLES.ARCHITECT,
|
|
337
|
+
type: CONTRIBUTION_TYPES.PROPOSAL,
|
|
338
|
+
content: 'a'.repeat(60),
|
|
339
|
+
metadata: {}
|
|
340
|
+
},
|
|
341
|
+
{
|
|
342
|
+
agentId: 'test-agent',
|
|
343
|
+
agentRole: AGENT_ROLES.ARCHITECT,
|
|
344
|
+
type: CONTRIBUTION_TYPES.REFINEMENT,
|
|
345
|
+
content: 'b'.repeat(60),
|
|
346
|
+
metadata: {}
|
|
347
|
+
},
|
|
348
|
+
{
|
|
349
|
+
agentId: 'other-agent',
|
|
350
|
+
agentRole: AGENT_ROLES.PERFORMANCE,
|
|
351
|
+
type: CONTRIBUTION_TYPES.CRITIQUE,
|
|
352
|
+
content: 'Should not be included'.repeat(10),
|
|
353
|
+
targetAgentId: 'different-agent',
|
|
354
|
+
metadata: {}
|
|
355
|
+
}
|
|
356
|
+
]
|
|
357
|
+
}]
|
|
358
|
+
};
|
|
359
|
+
|
|
360
|
+
await agent.prepareContext(context, 1);
|
|
361
|
+
|
|
362
|
+
// Verify the LLM was called (which means filtering occurred and threshold was met)
|
|
363
|
+
expect(provider['currentIndex']).toBe(1);
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
it('should create correct DebateSummary object', async () => {
|
|
367
|
+
const provider = new MockLLMProvider(['Summary text']);
|
|
368
|
+
const agent = RoleBasedAgent.create(
|
|
369
|
+
agentConfig,
|
|
370
|
+
provider,
|
|
371
|
+
'System prompt',
|
|
372
|
+
undefined,
|
|
373
|
+
summaryConfig,
|
|
374
|
+
undefined
|
|
375
|
+
);
|
|
376
|
+
|
|
377
|
+
const longContent = 'x'.repeat(150);
|
|
378
|
+
const context: DebateContext = {
|
|
379
|
+
problem: 'Test problem',
|
|
380
|
+
history: [{
|
|
381
|
+
roundNumber: 1,
|
|
382
|
+
timestamp: new Date(),
|
|
383
|
+
contributions: [{
|
|
384
|
+
agentId: 'test-agent',
|
|
385
|
+
agentRole: AGENT_ROLES.ARCHITECT,
|
|
386
|
+
type: CONTRIBUTION_TYPES.PROPOSAL,
|
|
387
|
+
content: longContent,
|
|
388
|
+
metadata: {}
|
|
389
|
+
}]
|
|
390
|
+
}]
|
|
391
|
+
};
|
|
392
|
+
|
|
393
|
+
const result = await agent.prepareContext(context, 1);
|
|
394
|
+
|
|
395
|
+
expect(result.summary).toBeDefined();
|
|
396
|
+
expect(result.summary?.agentId).toBe('test-agent');
|
|
397
|
+
expect(result.summary?.agentRole).toBe(AGENT_ROLES.ARCHITECT);
|
|
398
|
+
expect(result.summary?.summary).toBe('Summary text');
|
|
399
|
+
expect(result.summary?.metadata.beforeChars).toBeGreaterThan(0);
|
|
400
|
+
expect(result.summary?.metadata.afterChars).toBe('Summary text'.length);
|
|
401
|
+
expect(result.summary?.metadata.method).toBe(SUMMARIZATION_METHODS.LENGTH_BASED);
|
|
402
|
+
expect(result.summary?.metadata.timestamp).toBeInstanceOf(Date);
|
|
403
|
+
expect(result.summary?.metadata.model).toBe(agentConfig.model);
|
|
404
|
+
expect(result.summary?.metadata.temperature).toBe(agentConfig.temperature);
|
|
405
|
+
expect(result.summary?.metadata.provider).toBe(agentConfig.provider);
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
it('should fallback to full history on error with warning', async () => {
|
|
409
|
+
class FailingProvider implements LLMProvider {
|
|
410
|
+
async complete(_request: any): Promise<any> {
|
|
411
|
+
throw new Error('LLM failure');
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
const provider = new FailingProvider();
|
|
416
|
+
const agent = RoleBasedAgent.create(
|
|
417
|
+
agentConfig,
|
|
418
|
+
provider,
|
|
419
|
+
'System prompt',
|
|
420
|
+
undefined,
|
|
421
|
+
summaryConfig,
|
|
422
|
+
undefined
|
|
423
|
+
);
|
|
424
|
+
|
|
425
|
+
const longContent = 'z'.repeat(150);
|
|
426
|
+
const context: DebateContext = {
|
|
427
|
+
problem: 'Test problem',
|
|
428
|
+
history: [{
|
|
429
|
+
roundNumber: 1,
|
|
430
|
+
timestamp: new Date(),
|
|
431
|
+
contributions: [{
|
|
432
|
+
agentId: 'test-agent',
|
|
433
|
+
agentRole: AGENT_ROLES.ARCHITECT,
|
|
434
|
+
type: CONTRIBUTION_TYPES.PROPOSAL,
|
|
435
|
+
content: longContent,
|
|
436
|
+
metadata: {}
|
|
437
|
+
}]
|
|
438
|
+
}]
|
|
439
|
+
};
|
|
440
|
+
|
|
441
|
+
// Mock stderr.write to verify warning is logged (changed from console.warn)
|
|
442
|
+
const stderrSpy = jest.spyOn(process.stderr, 'write').mockImplementation(() => true);
|
|
443
|
+
|
|
444
|
+
const result = await agent.prepareContext(context, 1);
|
|
445
|
+
|
|
446
|
+
expect(result.context).toEqual(context);
|
|
447
|
+
expect(result.summary).toBeUndefined();
|
|
448
|
+
expect(stderrSpy).toHaveBeenCalledWith(
|
|
449
|
+
expect.stringContaining('Summarization failed')
|
|
450
|
+
);
|
|
451
|
+
|
|
452
|
+
stderrSpy.mockRestore();
|
|
453
|
+
});
|
|
454
|
+
});
|
|
455
|
+
|
|
456
|
+
describe('RoleBasedAgent - defaultSummaryPrompt()', () => {
|
|
457
|
+
it('should return summary prompt for each role', () => {
|
|
458
|
+
const roles = [
|
|
459
|
+
AGENT_ROLES.ARCHITECT,
|
|
460
|
+
AGENT_ROLES.PERFORMANCE,
|
|
461
|
+
AGENT_ROLES.SECURITY,
|
|
462
|
+
AGENT_ROLES.TESTING,
|
|
463
|
+
AGENT_ROLES.GENERALIST
|
|
464
|
+
];
|
|
465
|
+
|
|
466
|
+
roles.forEach(role => {
|
|
467
|
+
const prompt = RoleBasedAgent.defaultSummaryPrompt(role, 'test content', 1000);
|
|
468
|
+
expect(prompt).toBeDefined();
|
|
469
|
+
expect(typeof prompt).toBe('string');
|
|
470
|
+
expect(prompt.length).toBeGreaterThan(0);
|
|
471
|
+
expect(prompt).toContain('test content');
|
|
472
|
+
expect(prompt).toContain('1000');
|
|
473
|
+
});
|
|
474
|
+
});
|
|
475
|
+
});
|
|
476
|
+
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
// Mock OpenAI SDK to avoid network calls during tests
|
|
2
|
+
jest.mock('openai', () => {
|
|
3
|
+
return {
|
|
4
|
+
__esModule: true,
|
|
5
|
+
default: class OpenAIMock {
|
|
6
|
+
public chat = {
|
|
7
|
+
completions: {
|
|
8
|
+
create: async (_: any) => ({
|
|
9
|
+
choices: [{ message: { content: 'Security solution text' } }],
|
|
10
|
+
usage: { total_tokens: 100, prompt_tokens: 50, completion_tokens: 50 }
|
|
11
|
+
}),
|
|
12
|
+
},
|
|
13
|
+
};
|
|
14
|
+
constructor(_opts: any) {}
|
|
15
|
+
},
|
|
16
|
+
};
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
import { RoleBasedAgent } from '../src/agents/role-based-agent';
|
|
20
|
+
import { createProvider } from '../src/providers/provider-factory';
|
|
21
|
+
import { AGENT_ROLES, LLM_PROVIDERS } from '../src/types/agent.types';
|
|
22
|
+
import { DEFAULT_SUMMARIZATION_ENABLED, DEFAULT_SUMMARIZATION_THRESHOLD, DEFAULT_SUMMARIZATION_MAX_LENGTH, DEFAULT_SUMMARIZATION_METHOD } from '../src/types/config.types';
|
|
23
|
+
|
|
24
|
+
describe('RoleBasedAgent (Security Role)', () => {
|
|
25
|
+
// Mock environment variable for provider factory
|
|
26
|
+
const originalEnv = process.env;
|
|
27
|
+
let mockProvider: any;
|
|
28
|
+
|
|
29
|
+
beforeAll(() => {
|
|
30
|
+
process.env.OPENAI_API_KEY = 'test-key';
|
|
31
|
+
mockProvider = createProvider('openai');
|
|
32
|
+
});
|
|
33
|
+
afterAll(() => {
|
|
34
|
+
process.env = originalEnv;
|
|
35
|
+
});
|
|
36
|
+
const mockConfig = {
|
|
37
|
+
id: 'test-security-agent',
|
|
38
|
+
name: 'Test Security Agent',
|
|
39
|
+
role: AGENT_ROLES.SECURITY,
|
|
40
|
+
model: 'gpt-4',
|
|
41
|
+
provider: LLM_PROVIDERS.OPENAI,
|
|
42
|
+
temperature: 0.5,
|
|
43
|
+
enabled: true
|
|
44
|
+
};
|
|
45
|
+
const mockContext = {
|
|
46
|
+
debateId: 'test-debate',
|
|
47
|
+
problem: 'Test problem',
|
|
48
|
+
currentRound: 1,
|
|
49
|
+
history: []
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
const defaultSummaryConfig = {
|
|
53
|
+
enabled: DEFAULT_SUMMARIZATION_ENABLED,
|
|
54
|
+
threshold: DEFAULT_SUMMARIZATION_THRESHOLD,
|
|
55
|
+
maxLength: DEFAULT_SUMMARIZATION_MAX_LENGTH,
|
|
56
|
+
method: DEFAULT_SUMMARIZATION_METHOD,
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
describe('RoleBasedAgent.create()', () => {
|
|
60
|
+
it('should create a RoleBasedAgent instance', () => {
|
|
61
|
+
const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test prompt', undefined, defaultSummaryConfig, undefined);
|
|
62
|
+
|
|
63
|
+
expect(agent).toBeInstanceOf(RoleBasedAgent);
|
|
64
|
+
expect(agent.config).toBe(mockConfig);
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
it('should create a RoleBasedAgent instance with prompt source metadata', () => {
|
|
68
|
+
const promptSource = { source: 'built-in' as const };
|
|
69
|
+
const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test prompt', promptSource, defaultSummaryConfig, undefined);
|
|
70
|
+
|
|
71
|
+
expect(agent).toBeInstanceOf(RoleBasedAgent);
|
|
72
|
+
expect(agent.promptSource).toBe(promptSource);
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
describe('defaultSystemPrompt()', () => {
|
|
77
|
+
it('should return expected security-focused system prompt content', () => {
|
|
78
|
+
const prompt = RoleBasedAgent.defaultSystemPrompt(AGENT_ROLES.SECURITY);
|
|
79
|
+
|
|
80
|
+
expect(prompt).toContain('security architect and engineer');
|
|
81
|
+
expect(prompt).toContain('Threat modeling');
|
|
82
|
+
expect(prompt).toContain('risk vectors');
|
|
83
|
+
expect(prompt).toContain('architectural security');
|
|
84
|
+
expect(prompt).toContain('Authentication');
|
|
85
|
+
expect(prompt).toContain('authorization');
|
|
86
|
+
expect(prompt).toContain('data protection');
|
|
87
|
+
expect(prompt).toContain('compliance');
|
|
88
|
+
expect(prompt).toContain('security controls');
|
|
89
|
+
expect(prompt).toContain('defense in depth');
|
|
90
|
+
expect(prompt).toContain('zero trust');
|
|
91
|
+
});
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
describe('propose()', () => {
|
|
95
|
+
it('should call proposeImpl with security-focused prompts', async () => {
|
|
96
|
+
const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test security prompt', undefined, defaultSummaryConfig, undefined);
|
|
97
|
+
const proposeImplSpy = jest.spyOn(agent, 'proposeImpl' as any);
|
|
98
|
+
|
|
99
|
+
const result = await agent.propose('Test problem', mockContext);
|
|
100
|
+
|
|
101
|
+
expect(proposeImplSpy).toHaveBeenCalledWith(
|
|
102
|
+
mockContext,
|
|
103
|
+
'Test security prompt',
|
|
104
|
+
expect.stringContaining('security specialist')
|
|
105
|
+
);
|
|
106
|
+
expect(proposeImplSpy).toHaveBeenCalledWith(
|
|
107
|
+
mockContext,
|
|
108
|
+
'Test security prompt',
|
|
109
|
+
expect.stringContaining('Threat Model')
|
|
110
|
+
);
|
|
111
|
+
expect(proposeImplSpy).toHaveBeenCalledWith(
|
|
112
|
+
mockContext,
|
|
113
|
+
'Test security prompt',
|
|
114
|
+
expect.stringContaining('Security Objectives')
|
|
115
|
+
);
|
|
116
|
+
expect(result).toBeDefined();
|
|
117
|
+
expect(result.content).toBe('Security solution text');
|
|
118
|
+
});
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
describe('critique()', () => {
|
|
122
|
+
it('should call critiqueImpl with security-focused prompts', async () => {
|
|
123
|
+
const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test security prompt', undefined, defaultSummaryConfig, undefined);
|
|
124
|
+
const critiqueImplSpy = jest.spyOn(agent, 'critiqueImpl' as any);
|
|
125
|
+
const mockProposal = {
|
|
126
|
+
content: 'Test proposal content',
|
|
127
|
+
metadata: { latencyMs: 100, model: 'gpt-4' }
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
const result = await agent.critique(mockProposal, mockContext);
|
|
131
|
+
|
|
132
|
+
expect(critiqueImplSpy).toHaveBeenCalledWith(
|
|
133
|
+
mockProposal,
|
|
134
|
+
mockContext,
|
|
135
|
+
'Test security prompt',
|
|
136
|
+
expect.stringContaining('security engineering perspective')
|
|
137
|
+
);
|
|
138
|
+
expect(critiqueImplSpy).toHaveBeenCalledWith(
|
|
139
|
+
mockProposal,
|
|
140
|
+
mockContext,
|
|
141
|
+
'Test security prompt',
|
|
142
|
+
expect.stringContaining('vulnerabilities')
|
|
143
|
+
);
|
|
144
|
+
expect(critiqueImplSpy).toHaveBeenCalledWith(
|
|
145
|
+
mockProposal,
|
|
146
|
+
mockContext,
|
|
147
|
+
'Test security prompt',
|
|
148
|
+
expect.stringContaining('missing controls')
|
|
149
|
+
);
|
|
150
|
+
expect(result).toBeDefined();
|
|
151
|
+
expect(result.content).toBe('Security solution text');
|
|
152
|
+
});
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
describe('refine()', () => {
|
|
156
|
+
it('should call refineImpl with security-focused prompts', async () => {
|
|
157
|
+
const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test security prompt', undefined, defaultSummaryConfig, undefined);
|
|
158
|
+
const refineImplSpy = jest.spyOn(agent, 'refineImpl' as any);
|
|
159
|
+
const mockProposal = {
|
|
160
|
+
content: 'Original proposal content',
|
|
161
|
+
metadata: { latencyMs: 100, model: 'gpt-4' }
|
|
162
|
+
};
|
|
163
|
+
const mockCritiques = [
|
|
164
|
+
{ content: 'First critique', metadata: { latencyMs: 50, model: 'gpt-4' } },
|
|
165
|
+
{ content: 'Second critique', metadata: { latencyMs: 60, model: 'gpt-4' } }
|
|
166
|
+
];
|
|
167
|
+
|
|
168
|
+
const result = await agent.refine(mockProposal, mockCritiques, mockContext);
|
|
169
|
+
|
|
170
|
+
expect(refineImplSpy).toHaveBeenCalledWith(
|
|
171
|
+
mockProposal,
|
|
172
|
+
mockCritiques,
|
|
173
|
+
mockContext,
|
|
174
|
+
'Test security prompt',
|
|
175
|
+
expect.stringContaining('security concerns')
|
|
176
|
+
);
|
|
177
|
+
expect(refineImplSpy).toHaveBeenCalledWith(
|
|
178
|
+
mockProposal,
|
|
179
|
+
mockCritiques,
|
|
180
|
+
mockContext,
|
|
181
|
+
'Test security prompt',
|
|
182
|
+
expect.stringContaining('strengthen the protection')
|
|
183
|
+
);
|
|
184
|
+
expect(refineImplSpy).toHaveBeenCalledWith(
|
|
185
|
+
mockProposal,
|
|
186
|
+
mockCritiques,
|
|
187
|
+
mockContext,
|
|
188
|
+
'Test security prompt',
|
|
189
|
+
expect.stringContaining('Revised Security Architecture')
|
|
190
|
+
);
|
|
191
|
+
expect(result).toBeDefined();
|
|
192
|
+
expect(result.content).toBe('Security solution text');
|
|
193
|
+
});
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
describe('prompt source metadata handling', () => {
|
|
197
|
+
it('should handle built-in prompt source metadata', () => {
|
|
198
|
+
const promptSource = { source: 'built-in' as const };
|
|
199
|
+
const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test prompt', promptSource, defaultSummaryConfig, undefined);
|
|
200
|
+
|
|
201
|
+
expect(agent.promptSource).toEqual(promptSource);
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
it('should handle file prompt source metadata', () => {
|
|
205
|
+
const promptSource = { source: 'file' as const, absPath: '/path/to/prompt.md' };
|
|
206
|
+
const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test prompt', promptSource, defaultSummaryConfig, undefined);
|
|
207
|
+
|
|
208
|
+
expect(agent.promptSource).toEqual(promptSource);
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
it('should handle undefined prompt source metadata', () => {
|
|
212
|
+
const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test prompt', undefined, defaultSummaryConfig, undefined);
|
|
213
|
+
|
|
214
|
+
expect(agent.promptSource).toBeUndefined();
|
|
215
|
+
});
|
|
216
|
+
});
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
// Note: Integration test with buildAgents function would require importing and setting up
|
|
220
|
+
// the entire buildAgents function with mocks, which is complex. The core functionality
|
|
221
|
+
// is already tested above through the RoleBasedAgent class methods.
|