dialectic 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/.cursor/commands/setup-test.mdc +175 -0
  2. package/.cursor/rules/basic-code-cleanup.mdc +1110 -0
  3. package/.cursor/rules/riper5.mdc +96 -0
  4. package/.env.example +6 -0
  5. package/AGENTS.md +1052 -0
  6. package/LICENSE +21 -0
  7. package/README.md +93 -0
  8. package/WARP.md +113 -0
  9. package/dialectic-1.0.0.tgz +0 -0
  10. package/dialectic.js +10 -0
  11. package/docs/commands.md +375 -0
  12. package/docs/configuration.md +882 -0
  13. package/docs/context_summarization.md +1023 -0
  14. package/docs/debate_flow.md +1127 -0
  15. package/docs/eval_flow.md +795 -0
  16. package/docs/evaluator.md +141 -0
  17. package/examples/debate-config-openrouter.json +48 -0
  18. package/examples/debate_config1.json +48 -0
  19. package/examples/eval/eval1/eval_config1.json +13 -0
  20. package/examples/eval/eval1/result1.json +62 -0
  21. package/examples/eval/eval1/result2.json +97 -0
  22. package/examples/eval_summary_format.md +11 -0
  23. package/examples/example3/debate-config.json +64 -0
  24. package/examples/example3/eval_config2.json +25 -0
  25. package/examples/example3/problem.md +17 -0
  26. package/examples/example3/rounds_test/eval_run.sh +16 -0
  27. package/examples/example3/rounds_test/run_test.sh +16 -0
  28. package/examples/kata1/architect-only-solution_2-rounds.json +121 -0
  29. package/examples/kata1/architect-perf-solution_2-rounds.json +234 -0
  30. package/examples/kata1/debate-config-kata1.json +54 -0
  31. package/examples/kata1/eval_architect-only_2-rounds.json +97 -0
  32. package/examples/kata1/eval_architect-perf_2-rounds.json +97 -0
  33. package/examples/kata1/kata1-report.md +12224 -0
  34. package/examples/kata1/kata1-report_temps-01_01_01_07.md +2451 -0
  35. package/examples/kata1/kata1.md +5 -0
  36. package/examples/kata1/meta.txt +1 -0
  37. package/examples/kata2/debate-config.json +54 -0
  38. package/examples/kata2/eval_config1.json +21 -0
  39. package/examples/kata2/eval_config2.json +25 -0
  40. package/examples/kata2/kata2.md +5 -0
  41. package/examples/kata2/only_architect/debate-config.json +45 -0
  42. package/examples/kata2/only_architect/eval_run.sh +11 -0
  43. package/examples/kata2/only_architect/run_test.sh +5 -0
  44. package/examples/kata2/rounds_test/eval_run.sh +11 -0
  45. package/examples/kata2/rounds_test/run_test.sh +5 -0
  46. package/examples/kata2/summary_length_test/eval_run.sh +11 -0
  47. package/examples/kata2/summary_length_test/eval_run_w_clarify.sh +7 -0
  48. package/examples/kata2/summary_length_test/run_test.sh +5 -0
  49. package/examples/task-queue/debate-config.json +76 -0
  50. package/examples/task-queue/debate_report.md +566 -0
  51. package/examples/task-queue/task-queue-system.md +25 -0
  52. package/jest.config.ts +13 -0
  53. package/multi_agent_debate_spec.md +2980 -0
  54. package/package.json +38 -0
  55. package/sanity-check-problem.txt +9 -0
  56. package/src/agents/prompts/architect-prompts.ts +203 -0
  57. package/src/agents/prompts/generalist-prompts.ts +157 -0
  58. package/src/agents/prompts/index.ts +41 -0
  59. package/src/agents/prompts/judge-prompts.ts +19 -0
  60. package/src/agents/prompts/kiss-prompts.ts +230 -0
  61. package/src/agents/prompts/performance-prompts.ts +142 -0
  62. package/src/agents/prompts/prompt-types.ts +68 -0
  63. package/src/agents/prompts/security-prompts.ts +149 -0
  64. package/src/agents/prompts/shared.ts +144 -0
  65. package/src/agents/prompts/testing-prompts.ts +149 -0
  66. package/src/agents/role-based-agent.ts +386 -0
  67. package/src/cli/commands/debate.ts +761 -0
  68. package/src/cli/commands/eval.ts +475 -0
  69. package/src/cli/commands/report.ts +265 -0
  70. package/src/cli/index.ts +79 -0
  71. package/src/core/agent.ts +198 -0
  72. package/src/core/clarifications.ts +34 -0
  73. package/src/core/judge.ts +257 -0
  74. package/src/core/orchestrator.ts +432 -0
  75. package/src/core/state-manager.ts +322 -0
  76. package/src/eval/evaluator-agent.ts +130 -0
  77. package/src/eval/prompts/system.md +41 -0
  78. package/src/eval/prompts/user.md +64 -0
  79. package/src/providers/llm-provider.ts +25 -0
  80. package/src/providers/openai-provider.ts +84 -0
  81. package/src/providers/openrouter-provider.ts +122 -0
  82. package/src/providers/provider-factory.ts +64 -0
  83. package/src/types/agent.types.ts +141 -0
  84. package/src/types/config.types.ts +47 -0
  85. package/src/types/debate.types.ts +237 -0
  86. package/src/types/eval.types.ts +85 -0
  87. package/src/utils/common.ts +104 -0
  88. package/src/utils/context-formatter.ts +102 -0
  89. package/src/utils/context-summarizer.ts +143 -0
  90. package/src/utils/env-loader.ts +46 -0
  91. package/src/utils/exit-codes.ts +5 -0
  92. package/src/utils/id.ts +11 -0
  93. package/src/utils/logger.ts +48 -0
  94. package/src/utils/paths.ts +10 -0
  95. package/src/utils/progress-ui.ts +313 -0
  96. package/src/utils/prompt-loader.ts +79 -0
  97. package/src/utils/report-generator.ts +301 -0
  98. package/tests/clarifications.spec.ts +128 -0
  99. package/tests/cli.debate.spec.ts +144 -0
  100. package/tests/config-loading.spec.ts +206 -0
  101. package/tests/context-summarizer.spec.ts +131 -0
  102. package/tests/debate-config-custom.json +38 -0
  103. package/tests/env-loader.spec.ts +149 -0
  104. package/tests/eval.command.spec.ts +1191 -0
  105. package/tests/logger.spec.ts +19 -0
  106. package/tests/openai-provider.spec.ts +26 -0
  107. package/tests/openrouter-provider.spec.ts +279 -0
  108. package/tests/orchestrator-summary.spec.ts +386 -0
  109. package/tests/orchestrator.spec.ts +207 -0
  110. package/tests/prompt-loader.spec.ts +52 -0
  111. package/tests/prompts/architect.md +16 -0
  112. package/tests/provider-factory.spec.ts +150 -0
  113. package/tests/report.command.spec.ts +546 -0
  114. package/tests/role-based-agent-summary.spec.ts +476 -0
  115. package/tests/security-agent.spec.ts +221 -0
  116. package/tests/shared-prompts.spec.ts +318 -0
  117. package/tests/state-manager.spec.ts +251 -0
  118. package/tests/summary-prompts.spec.ts +153 -0
  119. package/tsconfig.json +49 -0
@@ -0,0 +1,476 @@
1
+ import { RoleBasedAgent } from '../src/agents/role-based-agent';
2
+ import { LLMProvider } from '../src/providers/llm-provider';
3
+ import { AgentConfig, AGENT_ROLES, LLM_PROVIDERS } from '../src/types/agent.types';
4
+ import { DebateContext, SummarizationConfig, CONTRIBUTION_TYPES, SUMMARIZATION_METHODS } from '../src/types/debate.types';
5
+
6
+ // Mock LLM Provider
7
+ class MockLLMProvider implements LLMProvider {
8
+ private responses: string[] = [];
9
+ private currentIndex = 0;
10
+
11
+ constructor(responses: string[] = ['Mock summary']) {
12
+ this.responses = responses;
13
+ }
14
+
15
+ async complete(_request: any): Promise<any> {
16
+ const response = this.responses[this.currentIndex % this.responses.length];
17
+ this.currentIndex++;
18
+ return {
19
+ text: response,
20
+ usage: { totalTokens: 50 }
21
+ };
22
+ }
23
+ }
24
+
25
+ describe('RoleBasedAgent - shouldSummarize()', () => {
26
+ const agentConfig: AgentConfig = {
27
+ id: 'test-agent',
28
+ name: 'Test Agent',
29
+ role: AGENT_ROLES.ARCHITECT,
30
+ model: 'gpt-4',
31
+ provider: LLM_PROVIDERS.OPENAI,
32
+ temperature: 0.5,
33
+ };
34
+
35
+ const summaryConfig: SummarizationConfig = {
36
+ enabled: true,
37
+ threshold: 100, // Low threshold for testing
38
+ maxLength: 50,
39
+ method: SUMMARIZATION_METHODS.LENGTH_BASED,
40
+ };
41
+
42
+ it('should return false when summarization is disabled', () => {
43
+ const provider = new MockLLMProvider();
44
+ const disabledConfig: SummarizationConfig = { ...summaryConfig, enabled: false };
45
+ const agent = RoleBasedAgent.create(
46
+ agentConfig,
47
+ provider,
48
+ 'System prompt',
49
+ undefined,
50
+ disabledConfig,
51
+ undefined
52
+ );
53
+
54
+ const context: DebateContext = {
55
+ problem: 'Test problem',
56
+ history: [],
57
+ };
58
+
59
+ expect(agent.shouldSummarize(context)).toBe(false);
60
+ });
61
+
62
+ it('should return false when history is empty', () => {
63
+ const provider = new MockLLMProvider();
64
+ const agent = RoleBasedAgent.create(
65
+ agentConfig,
66
+ provider,
67
+ 'System prompt',
68
+ undefined,
69
+ summaryConfig,
70
+ undefined
71
+ );
72
+
73
+ const context: DebateContext = {
74
+ problem: 'Test problem',
75
+ history: [],
76
+ };
77
+
78
+ expect(agent.shouldSummarize(context)).toBe(false);
79
+ });
80
+
81
+ it('should return false when below threshold', () => {
82
+ const provider = new MockLLMProvider();
83
+ const agent = RoleBasedAgent.create(
84
+ agentConfig,
85
+ provider,
86
+ 'System prompt',
87
+ undefined,
88
+ summaryConfig,
89
+ undefined
90
+ );
91
+
92
+ const context: DebateContext = {
93
+ problem: 'Test problem',
94
+ history: [{
95
+ roundNumber: 1,
96
+ timestamp: new Date(),
97
+ contributions: [
98
+ {
99
+ agentId: 'test-agent',
100
+ agentRole: AGENT_ROLES.ARCHITECT,
101
+ type: CONTRIBUTION_TYPES.PROPOSAL,
102
+ content: 'Short proposal', // Less than 100 chars
103
+ metadata: {}
104
+ }
105
+ ]
106
+ }]
107
+ };
108
+
109
+ expect(agent.shouldSummarize(context)).toBe(false);
110
+ });
111
+
112
+ it('should return true when above threshold', () => {
113
+ const provider = new MockLLMProvider();
114
+ const agent = RoleBasedAgent.create(
115
+ agentConfig,
116
+ provider,
117
+ 'System prompt',
118
+ undefined,
119
+ summaryConfig,
120
+ undefined
121
+ );
122
+
123
+ const longContent = 'a'.repeat(150); // Above 100 char threshold
124
+
125
+ const context: DebateContext = {
126
+ problem: 'Test problem',
127
+ history: [{
128
+ roundNumber: 1,
129
+ timestamp: new Date(),
130
+ contributions: [
131
+ {
132
+ agentId: 'test-agent',
133
+ agentRole: AGENT_ROLES.ARCHITECT,
134
+ type: CONTRIBUTION_TYPES.PROPOSAL,
135
+ content: longContent,
136
+ metadata: {}
137
+ }
138
+ ]
139
+ }]
140
+ };
141
+
142
+ expect(agent.shouldSummarize(context)).toBe(true);
143
+ });
144
+
145
+ it('should correctly calculate character count from agent perspective', () => {
146
+ const provider = new MockLLMProvider();
147
+ const agent = RoleBasedAgent.create(
148
+ agentConfig,
149
+ provider,
150
+ 'System prompt',
151
+ undefined,
152
+ summaryConfig,
153
+ undefined
154
+ );
155
+
156
+ const context: DebateContext = {
157
+ problem: 'Test problem',
158
+ history: [{
159
+ roundNumber: 1,
160
+ timestamp: new Date(),
161
+ contributions: [
162
+ // Agent's proposal (should count)
163
+ {
164
+ agentId: 'test-agent',
165
+ agentRole: AGENT_ROLES.ARCHITECT,
166
+ type: CONTRIBUTION_TYPES.PROPOSAL,
167
+ content: 'a'.repeat(60),
168
+ metadata: {}
169
+ },
170
+ // Critique received by agent (should NOT count - critiques excluded)
171
+ {
172
+ agentId: 'other-agent',
173
+ agentRole: AGENT_ROLES.PERFORMANCE,
174
+ type: CONTRIBUTION_TYPES.CRITIQUE,
175
+ content: 'b'.repeat(40),
176
+ targetAgentId: 'test-agent',
177
+ metadata: {}
178
+ },
179
+ // Agent's refinement (should count)
180
+ {
181
+ agentId: 'test-agent',
182
+ agentRole: AGENT_ROLES.ARCHITECT,
183
+ type: CONTRIBUTION_TYPES.REFINEMENT,
184
+ content: 'c'.repeat(60),
185
+ metadata: {}
186
+ },
187
+ // Critique of another agent (should NOT count)
188
+ {
189
+ agentId: 'other-agent',
190
+ agentRole: AGENT_ROLES.PERFORMANCE,
191
+ type: CONTRIBUTION_TYPES.CRITIQUE,
192
+ content: 'd'.repeat(200), // Large but shouldn't count
193
+ targetAgentId: 'different-agent',
194
+ metadata: {}
195
+ }
196
+ ]
197
+ }]
198
+ };
199
+
200
+ // Total should be 60 + 60 = 120 (only proposals and refinements), above threshold of 100
201
+ expect(agent.shouldSummarize(context)).toBe(true);
202
+ });
203
+ });
204
+
205
+ describe('RoleBasedAgent - prepareContext()', () => {
206
+ const agentConfig: AgentConfig = {
207
+ id: 'test-agent',
208
+ name: 'Test Agent',
209
+ role: AGENT_ROLES.ARCHITECT,
210
+ model: 'gpt-4',
211
+ provider: LLM_PROVIDERS.OPENAI,
212
+ temperature: 0.5,
213
+ };
214
+
215
+ const summaryConfig: SummarizationConfig = {
216
+ enabled: true,
217
+ threshold: 100,
218
+ maxLength: 200,
219
+ method: SUMMARIZATION_METHODS.LENGTH_BASED,
220
+ };
221
+
222
+ it('should return original context when summarization disabled', async () => {
223
+ const provider = new MockLLMProvider();
224
+ const disabledConfig: SummarizationConfig = { ...summaryConfig, enabled: false };
225
+ const agent = RoleBasedAgent.create(
226
+ agentConfig,
227
+ provider,
228
+ 'System prompt',
229
+ undefined,
230
+ disabledConfig,
231
+ undefined
232
+ );
233
+
234
+ const context: DebateContext = {
235
+ problem: 'Test problem',
236
+ history: []
237
+ };
238
+
239
+ const result = await agent.prepareContext(context, 1);
240
+
241
+ expect(result.context).toEqual(context);
242
+ expect(result.summary).toBeUndefined();
243
+ });
244
+
245
+ it('should return original context when below threshold', async () => {
246
+ const provider = new MockLLMProvider();
247
+ const agent = RoleBasedAgent.create(
248
+ agentConfig,
249
+ provider,
250
+ 'System prompt',
251
+ undefined,
252
+ summaryConfig,
253
+ undefined
254
+ );
255
+
256
+ const context: DebateContext = {
257
+ problem: 'Test problem',
258
+ history: [{
259
+ roundNumber: 1,
260
+ timestamp: new Date(),
261
+ contributions: [{
262
+ agentId: 'test-agent',
263
+ agentRole: AGENT_ROLES.ARCHITECT,
264
+ type: CONTRIBUTION_TYPES.PROPOSAL,
265
+ content: 'Short',
266
+ metadata: {}
267
+ }]
268
+ }]
269
+ };
270
+
271
+ const result = await agent.prepareContext(context, 1);
272
+
273
+ expect(result.context).toEqual(context);
274
+ expect(result.summary).toBeUndefined();
275
+ });
276
+
277
+ it('should return context with summary when above threshold', async () => {
278
+ const provider = new MockLLMProvider(['Generated summary text']);
279
+ const agent = RoleBasedAgent.create(
280
+ agentConfig,
281
+ provider,
282
+ 'System prompt',
283
+ undefined,
284
+ summaryConfig,
285
+ undefined
286
+ );
287
+
288
+ const longContent = 'a'.repeat(150);
289
+ const context: DebateContext = {
290
+ problem: 'Test problem',
291
+ history: [{
292
+ roundNumber: 1,
293
+ timestamp: new Date(),
294
+ contributions: [{
295
+ agentId: 'test-agent',
296
+ agentRole: AGENT_ROLES.ARCHITECT,
297
+ type: CONTRIBUTION_TYPES.PROPOSAL,
298
+ content: longContent,
299
+ metadata: {}
300
+ }]
301
+ }]
302
+ };
303
+
304
+ const result = await agent.prepareContext(context, 1);
305
+
306
+ // Summary is no longer stored in context, but returned separately
307
+ expect(result.context).toBeDefined();
308
+ expect(result.summary).toBeDefined();
309
+ expect(result.summary?.agentId).toBe('test-agent');
310
+ expect(result.summary?.agentRole).toBe(AGENT_ROLES.ARCHITECT);
311
+ expect(result.summary?.summary).toBe('Generated summary text');
312
+ expect(result.summary?.metadata.model).toBe(agentConfig.model);
313
+ expect(result.summary?.metadata.temperature).toBe(agentConfig.temperature);
314
+ expect(result.summary?.metadata.provider).toBe(agentConfig.provider);
315
+ });
316
+
317
+ it('should filter history to agent perspective', async () => {
318
+ const provider = new MockLLMProvider(['Summary']);
319
+ const agent = RoleBasedAgent.create(
320
+ agentConfig,
321
+ provider,
322
+ 'System prompt',
323
+ undefined,
324
+ summaryConfig,
325
+ undefined
326
+ );
327
+
328
+ const context: DebateContext = {
329
+ problem: 'Test problem',
330
+ history: [{
331
+ roundNumber: 1,
332
+ timestamp: new Date(),
333
+ contributions: [
334
+ {
335
+ agentId: 'test-agent',
336
+ agentRole: AGENT_ROLES.ARCHITECT,
337
+ type: CONTRIBUTION_TYPES.PROPOSAL,
338
+ content: 'a'.repeat(60),
339
+ metadata: {}
340
+ },
341
+ {
342
+ agentId: 'test-agent',
343
+ agentRole: AGENT_ROLES.ARCHITECT,
344
+ type: CONTRIBUTION_TYPES.REFINEMENT,
345
+ content: 'b'.repeat(60),
346
+ metadata: {}
347
+ },
348
+ {
349
+ agentId: 'other-agent',
350
+ agentRole: AGENT_ROLES.PERFORMANCE,
351
+ type: CONTRIBUTION_TYPES.CRITIQUE,
352
+ content: 'Should not be included'.repeat(10),
353
+ targetAgentId: 'different-agent',
354
+ metadata: {}
355
+ }
356
+ ]
357
+ }]
358
+ };
359
+
360
+ await agent.prepareContext(context, 1);
361
+
362
+ // Verify the LLM was called (which means filtering occurred and threshold was met)
363
+ expect(provider['currentIndex']).toBe(1);
364
+ });
365
+
366
+ it('should create correct DebateSummary object', async () => {
367
+ const provider = new MockLLMProvider(['Summary text']);
368
+ const agent = RoleBasedAgent.create(
369
+ agentConfig,
370
+ provider,
371
+ 'System prompt',
372
+ undefined,
373
+ summaryConfig,
374
+ undefined
375
+ );
376
+
377
+ const longContent = 'x'.repeat(150);
378
+ const context: DebateContext = {
379
+ problem: 'Test problem',
380
+ history: [{
381
+ roundNumber: 1,
382
+ timestamp: new Date(),
383
+ contributions: [{
384
+ agentId: 'test-agent',
385
+ agentRole: AGENT_ROLES.ARCHITECT,
386
+ type: CONTRIBUTION_TYPES.PROPOSAL,
387
+ content: longContent,
388
+ metadata: {}
389
+ }]
390
+ }]
391
+ };
392
+
393
+ const result = await agent.prepareContext(context, 1);
394
+
395
+ expect(result.summary).toBeDefined();
396
+ expect(result.summary?.agentId).toBe('test-agent');
397
+ expect(result.summary?.agentRole).toBe(AGENT_ROLES.ARCHITECT);
398
+ expect(result.summary?.summary).toBe('Summary text');
399
+ expect(result.summary?.metadata.beforeChars).toBeGreaterThan(0);
400
+ expect(result.summary?.metadata.afterChars).toBe('Summary text'.length);
401
+ expect(result.summary?.metadata.method).toBe(SUMMARIZATION_METHODS.LENGTH_BASED);
402
+ expect(result.summary?.metadata.timestamp).toBeInstanceOf(Date);
403
+ expect(result.summary?.metadata.model).toBe(agentConfig.model);
404
+ expect(result.summary?.metadata.temperature).toBe(agentConfig.temperature);
405
+ expect(result.summary?.metadata.provider).toBe(agentConfig.provider);
406
+ });
407
+
408
+ it('should fallback to full history on error with warning', async () => {
409
+ class FailingProvider implements LLMProvider {
410
+ async complete(_request: any): Promise<any> {
411
+ throw new Error('LLM failure');
412
+ }
413
+ }
414
+
415
+ const provider = new FailingProvider();
416
+ const agent = RoleBasedAgent.create(
417
+ agentConfig,
418
+ provider,
419
+ 'System prompt',
420
+ undefined,
421
+ summaryConfig,
422
+ undefined
423
+ );
424
+
425
+ const longContent = 'z'.repeat(150);
426
+ const context: DebateContext = {
427
+ problem: 'Test problem',
428
+ history: [{
429
+ roundNumber: 1,
430
+ timestamp: new Date(),
431
+ contributions: [{
432
+ agentId: 'test-agent',
433
+ agentRole: AGENT_ROLES.ARCHITECT,
434
+ type: CONTRIBUTION_TYPES.PROPOSAL,
435
+ content: longContent,
436
+ metadata: {}
437
+ }]
438
+ }]
439
+ };
440
+
441
+ // Mock stderr.write to verify warning is logged (changed from console.warn)
442
+ const stderrSpy = jest.spyOn(process.stderr, 'write').mockImplementation(() => true);
443
+
444
+ const result = await agent.prepareContext(context, 1);
445
+
446
+ expect(result.context).toEqual(context);
447
+ expect(result.summary).toBeUndefined();
448
+ expect(stderrSpy).toHaveBeenCalledWith(
449
+ expect.stringContaining('Summarization failed')
450
+ );
451
+
452
+ stderrSpy.mockRestore();
453
+ });
454
+ });
455
+
456
+ describe('RoleBasedAgent - defaultSummaryPrompt()', () => {
457
+ it('should return summary prompt for each role', () => {
458
+ const roles = [
459
+ AGENT_ROLES.ARCHITECT,
460
+ AGENT_ROLES.PERFORMANCE,
461
+ AGENT_ROLES.SECURITY,
462
+ AGENT_ROLES.TESTING,
463
+ AGENT_ROLES.GENERALIST
464
+ ];
465
+
466
+ roles.forEach(role => {
467
+ const prompt = RoleBasedAgent.defaultSummaryPrompt(role, 'test content', 1000);
468
+ expect(prompt).toBeDefined();
469
+ expect(typeof prompt).toBe('string');
470
+ expect(prompt.length).toBeGreaterThan(0);
471
+ expect(prompt).toContain('test content');
472
+ expect(prompt).toContain('1000');
473
+ });
474
+ });
475
+ });
476
+
@@ -0,0 +1,221 @@
1
+ // Mock OpenAI SDK to avoid network calls during tests
2
+ jest.mock('openai', () => {
3
+ return {
4
+ __esModule: true,
5
+ default: class OpenAIMock {
6
+ public chat = {
7
+ completions: {
8
+ create: async (_: any) => ({
9
+ choices: [{ message: { content: 'Security solution text' } }],
10
+ usage: { total_tokens: 100, prompt_tokens: 50, completion_tokens: 50 }
11
+ }),
12
+ },
13
+ };
14
+ constructor(_opts: any) {}
15
+ },
16
+ };
17
+ });
18
+
19
+ import { RoleBasedAgent } from '../src/agents/role-based-agent';
20
+ import { createProvider } from '../src/providers/provider-factory';
21
+ import { AGENT_ROLES, LLM_PROVIDERS } from '../src/types/agent.types';
22
+ import { DEFAULT_SUMMARIZATION_ENABLED, DEFAULT_SUMMARIZATION_THRESHOLD, DEFAULT_SUMMARIZATION_MAX_LENGTH, DEFAULT_SUMMARIZATION_METHOD } from '../src/types/config.types';
23
+
24
+ describe('RoleBasedAgent (Security Role)', () => {
25
+ // Mock environment variable for provider factory
26
+ const originalEnv = process.env;
27
+ let mockProvider: any;
28
+
29
+ beforeAll(() => {
30
+ process.env.OPENAI_API_KEY = 'test-key';
31
+ mockProvider = createProvider('openai');
32
+ });
33
+ afterAll(() => {
34
+ process.env = originalEnv;
35
+ });
36
+ const mockConfig = {
37
+ id: 'test-security-agent',
38
+ name: 'Test Security Agent',
39
+ role: AGENT_ROLES.SECURITY,
40
+ model: 'gpt-4',
41
+ provider: LLM_PROVIDERS.OPENAI,
42
+ temperature: 0.5,
43
+ enabled: true
44
+ };
45
+ const mockContext = {
46
+ debateId: 'test-debate',
47
+ problem: 'Test problem',
48
+ currentRound: 1,
49
+ history: []
50
+ };
51
+
52
+ const defaultSummaryConfig = {
53
+ enabled: DEFAULT_SUMMARIZATION_ENABLED,
54
+ threshold: DEFAULT_SUMMARIZATION_THRESHOLD,
55
+ maxLength: DEFAULT_SUMMARIZATION_MAX_LENGTH,
56
+ method: DEFAULT_SUMMARIZATION_METHOD,
57
+ };
58
+
59
+ describe('RoleBasedAgent.create()', () => {
60
+ it('should create a RoleBasedAgent instance', () => {
61
+ const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test prompt', undefined, defaultSummaryConfig, undefined);
62
+
63
+ expect(agent).toBeInstanceOf(RoleBasedAgent);
64
+ expect(agent.config).toBe(mockConfig);
65
+ });
66
+
67
+ it('should create a RoleBasedAgent instance with prompt source metadata', () => {
68
+ const promptSource = { source: 'built-in' as const };
69
+ const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test prompt', promptSource, defaultSummaryConfig, undefined);
70
+
71
+ expect(agent).toBeInstanceOf(RoleBasedAgent);
72
+ expect(agent.promptSource).toBe(promptSource);
73
+ });
74
+ });
75
+
76
+ describe('defaultSystemPrompt()', () => {
77
+ it('should return expected security-focused system prompt content', () => {
78
+ const prompt = RoleBasedAgent.defaultSystemPrompt(AGENT_ROLES.SECURITY);
79
+
80
+ expect(prompt).toContain('security architect and engineer');
81
+ expect(prompt).toContain('Threat modeling');
82
+ expect(prompt).toContain('risk vectors');
83
+ expect(prompt).toContain('architectural security');
84
+ expect(prompt).toContain('Authentication');
85
+ expect(prompt).toContain('authorization');
86
+ expect(prompt).toContain('data protection');
87
+ expect(prompt).toContain('compliance');
88
+ expect(prompt).toContain('security controls');
89
+ expect(prompt).toContain('defense in depth');
90
+ expect(prompt).toContain('zero trust');
91
+ });
92
+ });
93
+
94
+ describe('propose()', () => {
95
+ it('should call proposeImpl with security-focused prompts', async () => {
96
+ const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test security prompt', undefined, defaultSummaryConfig, undefined);
97
+ const proposeImplSpy = jest.spyOn(agent, 'proposeImpl' as any);
98
+
99
+ const result = await agent.propose('Test problem', mockContext);
100
+
101
+ expect(proposeImplSpy).toHaveBeenCalledWith(
102
+ mockContext,
103
+ 'Test security prompt',
104
+ expect.stringContaining('security specialist')
105
+ );
106
+ expect(proposeImplSpy).toHaveBeenCalledWith(
107
+ mockContext,
108
+ 'Test security prompt',
109
+ expect.stringContaining('Threat Model')
110
+ );
111
+ expect(proposeImplSpy).toHaveBeenCalledWith(
112
+ mockContext,
113
+ 'Test security prompt',
114
+ expect.stringContaining('Security Objectives')
115
+ );
116
+ expect(result).toBeDefined();
117
+ expect(result.content).toBe('Security solution text');
118
+ });
119
+ });
120
+
121
+ describe('critique()', () => {
122
+ it('should call critiqueImpl with security-focused prompts', async () => {
123
+ const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test security prompt', undefined, defaultSummaryConfig, undefined);
124
+ const critiqueImplSpy = jest.spyOn(agent, 'critiqueImpl' as any);
125
+ const mockProposal = {
126
+ content: 'Test proposal content',
127
+ metadata: { latencyMs: 100, model: 'gpt-4' }
128
+ };
129
+
130
+ const result = await agent.critique(mockProposal, mockContext);
131
+
132
+ expect(critiqueImplSpy).toHaveBeenCalledWith(
133
+ mockProposal,
134
+ mockContext,
135
+ 'Test security prompt',
136
+ expect.stringContaining('security engineering perspective')
137
+ );
138
+ expect(critiqueImplSpy).toHaveBeenCalledWith(
139
+ mockProposal,
140
+ mockContext,
141
+ 'Test security prompt',
142
+ expect.stringContaining('vulnerabilities')
143
+ );
144
+ expect(critiqueImplSpy).toHaveBeenCalledWith(
145
+ mockProposal,
146
+ mockContext,
147
+ 'Test security prompt',
148
+ expect.stringContaining('missing controls')
149
+ );
150
+ expect(result).toBeDefined();
151
+ expect(result.content).toBe('Security solution text');
152
+ });
153
+ });
154
+
155
+ describe('refine()', () => {
156
+ it('should call refineImpl with security-focused prompts', async () => {
157
+ const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test security prompt', undefined, defaultSummaryConfig, undefined);
158
+ const refineImplSpy = jest.spyOn(agent, 'refineImpl' as any);
159
+ const mockProposal = {
160
+ content: 'Original proposal content',
161
+ metadata: { latencyMs: 100, model: 'gpt-4' }
162
+ };
163
+ const mockCritiques = [
164
+ { content: 'First critique', metadata: { latencyMs: 50, model: 'gpt-4' } },
165
+ { content: 'Second critique', metadata: { latencyMs: 60, model: 'gpt-4' } }
166
+ ];
167
+
168
+ const result = await agent.refine(mockProposal, mockCritiques, mockContext);
169
+
170
+ expect(refineImplSpy).toHaveBeenCalledWith(
171
+ mockProposal,
172
+ mockCritiques,
173
+ mockContext,
174
+ 'Test security prompt',
175
+ expect.stringContaining('security concerns')
176
+ );
177
+ expect(refineImplSpy).toHaveBeenCalledWith(
178
+ mockProposal,
179
+ mockCritiques,
180
+ mockContext,
181
+ 'Test security prompt',
182
+ expect.stringContaining('strengthen the protection')
183
+ );
184
+ expect(refineImplSpy).toHaveBeenCalledWith(
185
+ mockProposal,
186
+ mockCritiques,
187
+ mockContext,
188
+ 'Test security prompt',
189
+ expect.stringContaining('Revised Security Architecture')
190
+ );
191
+ expect(result).toBeDefined();
192
+ expect(result.content).toBe('Security solution text');
193
+ });
194
+ });
195
+
196
+ describe('prompt source metadata handling', () => {
197
+ it('should handle built-in prompt source metadata', () => {
198
+ const promptSource = { source: 'built-in' as const };
199
+ const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test prompt', promptSource, defaultSummaryConfig, undefined);
200
+
201
+ expect(agent.promptSource).toEqual(promptSource);
202
+ });
203
+
204
+ it('should handle file prompt source metadata', () => {
205
+ const promptSource = { source: 'file' as const, absPath: '/path/to/prompt.md' };
206
+ const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test prompt', promptSource, defaultSummaryConfig, undefined);
207
+
208
+ expect(agent.promptSource).toEqual(promptSource);
209
+ });
210
+
211
+ it('should handle undefined prompt source metadata', () => {
212
+ const agent = RoleBasedAgent.create(mockConfig, mockProvider, 'Test prompt', undefined, defaultSummaryConfig, undefined);
213
+
214
+ expect(agent.promptSource).toBeUndefined();
215
+ });
216
+ });
217
+ });
218
+
219
+ // Note: Integration test with buildAgents function would require importing and setting up
220
+ // the entire buildAgents function with mocks, which is complex. The core functionality
221
+ // is already tested above through the RoleBasedAgent class methods.