dialectic 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cursor/commands/setup-test.mdc +175 -0
- package/.cursor/rules/basic-code-cleanup.mdc +1110 -0
- package/.cursor/rules/riper5.mdc +96 -0
- package/.env.example +6 -0
- package/AGENTS.md +1052 -0
- package/LICENSE +21 -0
- package/README.md +93 -0
- package/WARP.md +113 -0
- package/dialectic-1.0.0.tgz +0 -0
- package/dialectic.js +10 -0
- package/docs/commands.md +375 -0
- package/docs/configuration.md +882 -0
- package/docs/context_summarization.md +1023 -0
- package/docs/debate_flow.md +1127 -0
- package/docs/eval_flow.md +795 -0
- package/docs/evaluator.md +141 -0
- package/examples/debate-config-openrouter.json +48 -0
- package/examples/debate_config1.json +48 -0
- package/examples/eval/eval1/eval_config1.json +13 -0
- package/examples/eval/eval1/result1.json +62 -0
- package/examples/eval/eval1/result2.json +97 -0
- package/examples/eval_summary_format.md +11 -0
- package/examples/example3/debate-config.json +64 -0
- package/examples/example3/eval_config2.json +25 -0
- package/examples/example3/problem.md +17 -0
- package/examples/example3/rounds_test/eval_run.sh +16 -0
- package/examples/example3/rounds_test/run_test.sh +16 -0
- package/examples/kata1/architect-only-solution_2-rounds.json +121 -0
- package/examples/kata1/architect-perf-solution_2-rounds.json +234 -0
- package/examples/kata1/debate-config-kata1.json +54 -0
- package/examples/kata1/eval_architect-only_2-rounds.json +97 -0
- package/examples/kata1/eval_architect-perf_2-rounds.json +97 -0
- package/examples/kata1/kata1-report.md +12224 -0
- package/examples/kata1/kata1-report_temps-01_01_01_07.md +2451 -0
- package/examples/kata1/kata1.md +5 -0
- package/examples/kata1/meta.txt +1 -0
- package/examples/kata2/debate-config.json +54 -0
- package/examples/kata2/eval_config1.json +21 -0
- package/examples/kata2/eval_config2.json +25 -0
- package/examples/kata2/kata2.md +5 -0
- package/examples/kata2/only_architect/debate-config.json +45 -0
- package/examples/kata2/only_architect/eval_run.sh +11 -0
- package/examples/kata2/only_architect/run_test.sh +5 -0
- package/examples/kata2/rounds_test/eval_run.sh +11 -0
- package/examples/kata2/rounds_test/run_test.sh +5 -0
- package/examples/kata2/summary_length_test/eval_run.sh +11 -0
- package/examples/kata2/summary_length_test/eval_run_w_clarify.sh +7 -0
- package/examples/kata2/summary_length_test/run_test.sh +5 -0
- package/examples/task-queue/debate-config.json +76 -0
- package/examples/task-queue/debate_report.md +566 -0
- package/examples/task-queue/task-queue-system.md +25 -0
- package/jest.config.ts +13 -0
- package/multi_agent_debate_spec.md +2980 -0
- package/package.json +38 -0
- package/sanity-check-problem.txt +9 -0
- package/src/agents/prompts/architect-prompts.ts +203 -0
- package/src/agents/prompts/generalist-prompts.ts +157 -0
- package/src/agents/prompts/index.ts +41 -0
- package/src/agents/prompts/judge-prompts.ts +19 -0
- package/src/agents/prompts/kiss-prompts.ts +230 -0
- package/src/agents/prompts/performance-prompts.ts +142 -0
- package/src/agents/prompts/prompt-types.ts +68 -0
- package/src/agents/prompts/security-prompts.ts +149 -0
- package/src/agents/prompts/shared.ts +144 -0
- package/src/agents/prompts/testing-prompts.ts +149 -0
- package/src/agents/role-based-agent.ts +386 -0
- package/src/cli/commands/debate.ts +761 -0
- package/src/cli/commands/eval.ts +475 -0
- package/src/cli/commands/report.ts +265 -0
- package/src/cli/index.ts +79 -0
- package/src/core/agent.ts +198 -0
- package/src/core/clarifications.ts +34 -0
- package/src/core/judge.ts +257 -0
- package/src/core/orchestrator.ts +432 -0
- package/src/core/state-manager.ts +322 -0
- package/src/eval/evaluator-agent.ts +130 -0
- package/src/eval/prompts/system.md +41 -0
- package/src/eval/prompts/user.md +64 -0
- package/src/providers/llm-provider.ts +25 -0
- package/src/providers/openai-provider.ts +84 -0
- package/src/providers/openrouter-provider.ts +122 -0
- package/src/providers/provider-factory.ts +64 -0
- package/src/types/agent.types.ts +141 -0
- package/src/types/config.types.ts +47 -0
- package/src/types/debate.types.ts +237 -0
- package/src/types/eval.types.ts +85 -0
- package/src/utils/common.ts +104 -0
- package/src/utils/context-formatter.ts +102 -0
- package/src/utils/context-summarizer.ts +143 -0
- package/src/utils/env-loader.ts +46 -0
- package/src/utils/exit-codes.ts +5 -0
- package/src/utils/id.ts +11 -0
- package/src/utils/logger.ts +48 -0
- package/src/utils/paths.ts +10 -0
- package/src/utils/progress-ui.ts +313 -0
- package/src/utils/prompt-loader.ts +79 -0
- package/src/utils/report-generator.ts +301 -0
- package/tests/clarifications.spec.ts +128 -0
- package/tests/cli.debate.spec.ts +144 -0
- package/tests/config-loading.spec.ts +206 -0
- package/tests/context-summarizer.spec.ts +131 -0
- package/tests/debate-config-custom.json +38 -0
- package/tests/env-loader.spec.ts +149 -0
- package/tests/eval.command.spec.ts +1191 -0
- package/tests/logger.spec.ts +19 -0
- package/tests/openai-provider.spec.ts +26 -0
- package/tests/openrouter-provider.spec.ts +279 -0
- package/tests/orchestrator-summary.spec.ts +386 -0
- package/tests/orchestrator.spec.ts +207 -0
- package/tests/prompt-loader.spec.ts +52 -0
- package/tests/prompts/architect.md +16 -0
- package/tests/provider-factory.spec.ts +150 -0
- package/tests/report.command.spec.ts +546 -0
- package/tests/role-based-agent-summary.spec.ts +476 -0
- package/tests/security-agent.spec.ts +221 -0
- package/tests/shared-prompts.spec.ts +318 -0
- package/tests/state-manager.spec.ts +251 -0
- package/tests/summary-prompts.spec.ts +153 -0
- package/tsconfig.json +49 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
// Mock OpenAI SDK to avoid network calls during CLI tests
|
|
2
|
+
jest.mock('openai', () => {
|
|
3
|
+
return {
|
|
4
|
+
__esModule: true,
|
|
5
|
+
default: class OpenAIMock {
|
|
6
|
+
public chat = {
|
|
7
|
+
completions: {
|
|
8
|
+
create: async (_: any) => ({ choices: [{ message: { content: 'Solution text' } }] }),
|
|
9
|
+
},
|
|
10
|
+
};
|
|
11
|
+
constructor(_opts: any) {}
|
|
12
|
+
},
|
|
13
|
+
};
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
// Mock env-loader
|
|
17
|
+
jest.mock('../src/utils/env-loader', () => ({
|
|
18
|
+
loadEnvironmentFile: jest.fn()
|
|
19
|
+
}));
|
|
20
|
+
|
|
21
|
+
// Mock readline module
|
|
22
|
+
jest.mock('readline', () => {
|
|
23
|
+
let mockAnswers: string[] = [];
|
|
24
|
+
let currentIndex = 0;
|
|
25
|
+
|
|
26
|
+
return {
|
|
27
|
+
__esModule: true,
|
|
28
|
+
default: {
|
|
29
|
+
createInterface: () => ({
|
|
30
|
+
question: (_: any, cb: (ans: string) => void) => {
|
|
31
|
+
const ans = currentIndex < mockAnswers.length ? mockAnswers[currentIndex++] : '';
|
|
32
|
+
// Use setImmediate to make it async like real readline
|
|
33
|
+
setImmediate(() => cb(String(ans)));
|
|
34
|
+
},
|
|
35
|
+
close: () => {},
|
|
36
|
+
})
|
|
37
|
+
},
|
|
38
|
+
// Helper function to set mock answers
|
|
39
|
+
__setMockAnswers: (answers: string[]) => {
|
|
40
|
+
mockAnswers = answers;
|
|
41
|
+
currentIndex = 0;
|
|
42
|
+
}
|
|
43
|
+
};
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
import os from 'os';
|
|
47
|
+
import path from 'path';
|
|
48
|
+
import fs from 'fs';
|
|
49
|
+
import { runCli } from '../src/cli/index';
|
|
50
|
+
import { loadEnvironmentFile } from '../src/utils/env-loader';
|
|
51
|
+
import { RoleBasedAgent } from '../src/agents/role-based-agent';
|
|
52
|
+
|
|
53
|
+
const mockedLoadEnvironmentFile = loadEnvironmentFile as jest.MockedFunction<typeof loadEnvironmentFile>;
|
|
54
|
+
|
|
55
|
+
describe('CLI clarifications phase', () => {
|
|
56
|
+
let stderrSpy: jest.SpyInstance;
|
|
57
|
+
let stdoutSpy: jest.SpyInstance;
|
|
58
|
+
const originalEnv = process.env;
|
|
59
|
+
|
|
60
|
+
beforeEach(() => {
|
|
61
|
+
process.env = { ...originalEnv, OPENAI_API_KEY: 'test' };
|
|
62
|
+
stderrSpy = jest.spyOn(process.stderr, 'write').mockImplementation(() => true as any);
|
|
63
|
+
stdoutSpy = jest.spyOn(process.stdout, 'write').mockImplementation(() => true as any);
|
|
64
|
+
mockedLoadEnvironmentFile.mockClear();
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
afterEach(() => {
|
|
68
|
+
process.env = originalEnv;
|
|
69
|
+
stderrSpy.mockRestore();
|
|
70
|
+
stdoutSpy.mockRestore();
|
|
71
|
+
jest.restoreAllMocks();
|
|
72
|
+
jest.resetModules();
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
function mockReadlineWithAnswers(answers: string[]) {
|
|
76
|
+
// Set mock answers for the readline mock
|
|
77
|
+
const readlineMock = require('readline');
|
|
78
|
+
if (readlineMock.__setMockAnswers) {
|
|
79
|
+
readlineMock.__setMockAnswers(answers);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
it('runs clarifications when --clarify and collects answers (including NA)', async () => {
|
|
84
|
+
// Two questions total across agents; provide one answer and one empty -> NA
|
|
85
|
+
mockReadlineWithAnswers(['My answer', '']);
|
|
86
|
+
|
|
87
|
+
const spy = jest.spyOn(RoleBasedAgent.prototype as any, 'askClarifyingQuestions')
|
|
88
|
+
.mockResolvedValueOnce({ questions: [{ id: 'q1', text: 'What is the SLA?' }] })
|
|
89
|
+
.mockResolvedValueOnce({ questions: [{ id: 'q1', text: 'Any data retention rules?' }] });
|
|
90
|
+
|
|
91
|
+
const tmpReport = path.join(os.tmpdir(), `clarify-report-${Date.now()}.md`);
|
|
92
|
+
|
|
93
|
+
await runCli(['debate', 'Design Y', '--clarify', '--report', tmpReport]);
|
|
94
|
+
|
|
95
|
+
expect(spy).toHaveBeenCalled();
|
|
96
|
+
const content = fs.readFileSync(tmpReport, 'utf-8');
|
|
97
|
+
expect(content).toContain('## Clarifications');
|
|
98
|
+
expect(content).toContain('Question (q1):');
|
|
99
|
+
// Should include the explicit answer
|
|
100
|
+
expect(content).toContain('My answer');
|
|
101
|
+
// And NA for the unanswered one
|
|
102
|
+
expect(content).toContain('\n```text\nNA\n```');
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
it('does not run clarifications without --clarify (default off)', async () => {
|
|
106
|
+
const spy = jest.spyOn(RoleBasedAgent.prototype as any, 'askClarifyingQuestions')
|
|
107
|
+
.mockResolvedValue({ questions: [] });
|
|
108
|
+
|
|
109
|
+
await runCli(['debate', 'Design Z']);
|
|
110
|
+
expect(spy).not.toHaveBeenCalled();
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it('truncates questions per agent and warns', async () => {
|
|
114
|
+
// Return 7 questions to trigger truncation to default 5
|
|
115
|
+
const many = Array.from({ length: 7 }, (_, i) => ({ id: `q${i + 1}`, text: `Q${i + 1}` }));
|
|
116
|
+
const spy = jest.spyOn(RoleBasedAgent.prototype as any, 'askClarifyingQuestions')
|
|
117
|
+
.mockResolvedValue({ questions: many });
|
|
118
|
+
|
|
119
|
+
mockReadlineWithAnswers(new Array(10).fill('A'));
|
|
120
|
+
|
|
121
|
+
await runCli(['debate', 'Design W', '--clarify']);
|
|
122
|
+
expect(spy).toHaveBeenCalled();
|
|
123
|
+
const stderr = (stderrSpy.mock.calls.map(args => String(args[0])).join(''));
|
|
124
|
+
expect(stderr).toMatch(/limited to 5/);
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
// Mock OpenAI SDK to avoid network calls during CLI tests
|
|
2
|
+
jest.mock('openai', () => {
|
|
3
|
+
return {
|
|
4
|
+
__esModule: true,
|
|
5
|
+
default: class OpenAIMock {
|
|
6
|
+
public chat = {
|
|
7
|
+
completions: {
|
|
8
|
+
create: async (_: any) => ({ choices: [{ message: { content: 'Solution text' } }] }),
|
|
9
|
+
},
|
|
10
|
+
};
|
|
11
|
+
constructor(_opts: any) {}
|
|
12
|
+
},
|
|
13
|
+
};
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
// Mock env-loader
|
|
17
|
+
jest.mock('../src/utils/env-loader', () => ({
|
|
18
|
+
loadEnvironmentFile: jest.fn()
|
|
19
|
+
}));
|
|
20
|
+
|
|
21
|
+
import { runCli } from '../src/cli/index';
|
|
22
|
+
import { EXIT_CONFIG_ERROR, EXIT_INVALID_ARGS } from '../src/utils/exit-codes';
|
|
23
|
+
import { loadEnvironmentFile } from '../src/utils/env-loader';
|
|
24
|
+
|
|
25
|
+
const mockedLoadEnvironmentFile = loadEnvironmentFile as jest.MockedFunction<typeof loadEnvironmentFile>;
|
|
26
|
+
|
|
27
|
+
describe('CLI debate command', () => {
|
|
28
|
+
const originalEnv = process.env;
|
|
29
|
+
let stderrSpy: jest.SpyInstance;
|
|
30
|
+
let stdoutSpy: jest.SpyInstance;
|
|
31
|
+
|
|
32
|
+
beforeEach(() => {
|
|
33
|
+
process.env = { ...originalEnv };
|
|
34
|
+
stderrSpy = jest.spyOn(process.stderr, 'write').mockImplementation(() => true);
|
|
35
|
+
stdoutSpy = jest.spyOn(process.stdout, 'write').mockImplementation(() => true);
|
|
36
|
+
mockedLoadEnvironmentFile.mockClear();
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
afterEach(() => {
|
|
40
|
+
process.env = originalEnv;
|
|
41
|
+
stderrSpy.mockRestore();
|
|
42
|
+
stdoutSpy.mockRestore();
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it('exits with config error when OPENAI_API_KEY is missing', async () => {
|
|
46
|
+
delete process.env.OPENAI_API_KEY;
|
|
47
|
+
await expect(runCli(['debate', 'Design a system'])).rejects.toHaveProperty('code', EXIT_CONFIG_ERROR);
|
|
48
|
+
expect(stderrSpy).toHaveBeenCalled();
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it('prints only minimal solution to stdout (non-verbose)', async () => {
|
|
52
|
+
process.env.OPENAI_API_KEY = 'test';
|
|
53
|
+
await runCli(['debate', 'Design a rate limiting system']);
|
|
54
|
+
expect(stdoutSpy).toHaveBeenCalled();
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('prints verbose header and summary with metadata when --verbose', async () => {
|
|
58
|
+
process.env.OPENAI_API_KEY = 'test';
|
|
59
|
+
const capturedStdout: string[] = [];
|
|
60
|
+
const capturedStderr: string[] = [];
|
|
61
|
+
const stdoutWriteSpy = jest.spyOn(process.stdout, 'write').mockImplementation((chunk: any) => {
|
|
62
|
+
capturedStdout.push(String(chunk));
|
|
63
|
+
return true as any;
|
|
64
|
+
});
|
|
65
|
+
const stderrWriteSpy = jest.spyOn(process.stderr, 'write').mockImplementation((chunk: any) => {
|
|
66
|
+
capturedStderr.push(String(chunk));
|
|
67
|
+
return true as any;
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
await runCli(['debate', 'Design X', '--rounds', '2', '--verbose']);
|
|
71
|
+
|
|
72
|
+
const stdout = capturedStdout.join('');
|
|
73
|
+
const stderr = capturedStderr.join('');
|
|
74
|
+
|
|
75
|
+
// Main solution should be on stdout
|
|
76
|
+
expect(stdout).toContain('Solution text');
|
|
77
|
+
|
|
78
|
+
// Verbose diagnostics should be on stderr
|
|
79
|
+
expect(stderr).toContain('Running debate (verbose)');
|
|
80
|
+
expect(stderr).toContain('Summary (verbose)');
|
|
81
|
+
expect(stderr).toMatch(/Round\s+1/);
|
|
82
|
+
// Progress UI provides real-time updates, verbose summary shows final details
|
|
83
|
+
expect(stderr).toMatch(/latency=.+, tokens=/);
|
|
84
|
+
|
|
85
|
+
stdoutWriteSpy.mockRestore();
|
|
86
|
+
stderrWriteSpy.mockRestore();
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it('should error when neither problem string nor --problemDescription are provided', async () => {
|
|
90
|
+
process.env.OPENAI_API_KEY = 'test';
|
|
91
|
+
|
|
92
|
+
await expect(runCli(['debate']))
|
|
93
|
+
.rejects.toHaveProperty('code', EXIT_INVALID_ARGS);
|
|
94
|
+
expect(stderrSpy).toHaveBeenCalledWith(
|
|
95
|
+
expect.stringContaining('Invalid arguments: problem is required (provide <problem> or --problemDescription)')
|
|
96
|
+
);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
describe('environment file loading', () => {
|
|
100
|
+
it('should call loadEnvironmentFile with default parameters', async () => {
|
|
101
|
+
process.env.OPENAI_API_KEY = 'test';
|
|
102
|
+
|
|
103
|
+
await runCli(['debate', 'Design a system']);
|
|
104
|
+
|
|
105
|
+
expect(mockedLoadEnvironmentFile).toHaveBeenCalledWith(undefined, undefined);
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it('should call loadEnvironmentFile with custom env file path', async () => {
|
|
109
|
+
process.env.OPENAI_API_KEY = 'test';
|
|
110
|
+
|
|
111
|
+
await runCli(['debate', 'Design a system', '--env-file', 'custom.env']);
|
|
112
|
+
|
|
113
|
+
expect(mockedLoadEnvironmentFile).toHaveBeenCalledWith('custom.env', undefined);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it('should call loadEnvironmentFile with verbose flag', async () => {
|
|
117
|
+
process.env.OPENAI_API_KEY = 'test';
|
|
118
|
+
|
|
119
|
+
await runCli(['debate', 'Design a system', '--verbose']);
|
|
120
|
+
|
|
121
|
+
expect(mockedLoadEnvironmentFile).toHaveBeenCalledWith(undefined, true);
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
it('should call loadEnvironmentFile with both custom env file and verbose flag', async () => {
|
|
125
|
+
process.env.OPENAI_API_KEY = 'test';
|
|
126
|
+
|
|
127
|
+
await runCli(['debate', 'Design a system', '--env-file', 'production.env', '--verbose']);
|
|
128
|
+
|
|
129
|
+
expect(mockedLoadEnvironmentFile).toHaveBeenCalledWith('production.env', true);
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it('should handle env loading errors gracefully', async () => {
|
|
133
|
+
process.env.OPENAI_API_KEY = 'test';
|
|
134
|
+
mockedLoadEnvironmentFile.mockImplementation(() => {
|
|
135
|
+
throw new Error('Environment file not found: /path/to/missing.env');
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
await expect(runCli(['debate', 'Design a system', '--env-file', 'missing.env']))
|
|
139
|
+
.rejects.toThrow('Environment file not found: /path/to/missing.env');
|
|
140
|
+
|
|
141
|
+
expect(mockedLoadEnvironmentFile).toHaveBeenCalledWith('missing.env', undefined);
|
|
142
|
+
});
|
|
143
|
+
});
|
|
144
|
+
});
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import { loadConfig } from '../src/cli/commands/debate';
|
|
2
|
+
import { DEFAULT_SUMMARIZATION_ENABLED, DEFAULT_SUMMARIZATION_THRESHOLD, DEFAULT_SUMMARIZATION_MAX_LENGTH, DEFAULT_SUMMARIZATION_METHOD } from '../src/types/config.types';
|
|
3
|
+
import fs from 'fs';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
import os from 'os';
|
|
6
|
+
|
|
7
|
+
// RED-phase: config loader behavior tests; module not implemented yet.
|
|
8
|
+
|
|
9
|
+
describe('Configuration loading', () => {
|
|
10
|
+
it('uses built-in defaults when ./debate-config.json is missing and emits a stderr notice', async () => {
|
|
11
|
+
const defaultConfigPath = path.resolve(process.cwd(), 'debate-config.json');
|
|
12
|
+
const configExists = fs.existsSync(defaultConfigPath);
|
|
13
|
+
let configBackup: string | undefined;
|
|
14
|
+
|
|
15
|
+
// Temporarily remove config file if it exists
|
|
16
|
+
if (configExists) {
|
|
17
|
+
configBackup = fs.readFileSync(defaultConfigPath, 'utf-8');
|
|
18
|
+
fs.unlinkSync(defaultConfigPath);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
try {
|
|
22
|
+
const stderrSpy = jest.spyOn(process.stderr, 'write').mockImplementation(() => true);
|
|
23
|
+
const cfg = await loadConfig(undefined);
|
|
24
|
+
expect(cfg).toBeDefined();
|
|
25
|
+
expect(stderrSpy).toHaveBeenCalled();
|
|
26
|
+
stderrSpy.mockRestore();
|
|
27
|
+
} finally {
|
|
28
|
+
// Restore config file if it existed
|
|
29
|
+
if (configExists && configBackup) {
|
|
30
|
+
fs.writeFileSync(defaultConfigPath, configBackup, 'utf-8');
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
describe('Summarization configuration loading', () => {
|
|
37
|
+
let tmpDir: string;
|
|
38
|
+
|
|
39
|
+
beforeEach(() => {
|
|
40
|
+
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'config-test-'));
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
afterEach(() => {
|
|
44
|
+
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it('should load default summarization config when not specified', async () => {
|
|
48
|
+
const stderrSpy = jest.spyOn(process.stderr, 'write').mockImplementation(() => true);
|
|
49
|
+
const cfg = await loadConfig(undefined);
|
|
50
|
+
|
|
51
|
+
expect(cfg.debate?.summarization).toBeDefined();
|
|
52
|
+
expect(cfg.debate?.summarization?.enabled).toBe(DEFAULT_SUMMARIZATION_ENABLED);
|
|
53
|
+
expect(cfg.debate?.summarization?.threshold).toBe(DEFAULT_SUMMARIZATION_THRESHOLD);
|
|
54
|
+
expect(cfg.debate?.summarization?.maxLength).toBe(DEFAULT_SUMMARIZATION_MAX_LENGTH);
|
|
55
|
+
expect(cfg.debate?.summarization?.method).toBe(DEFAULT_SUMMARIZATION_METHOD);
|
|
56
|
+
|
|
57
|
+
stderrSpy.mockRestore();
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it('should load custom summarization config from file', async () => {
|
|
61
|
+
const configPath = path.join(tmpDir, 'test-config.json');
|
|
62
|
+
const configContent = {
|
|
63
|
+
agents: [
|
|
64
|
+
{
|
|
65
|
+
id: 'test-agent',
|
|
66
|
+
name: 'Test Agent',
|
|
67
|
+
role: 'architect',
|
|
68
|
+
model: 'gpt-4',
|
|
69
|
+
provider: 'openai',
|
|
70
|
+
temperature: 0.5
|
|
71
|
+
}
|
|
72
|
+
],
|
|
73
|
+
debate: {
|
|
74
|
+
rounds: 3,
|
|
75
|
+
terminationCondition: { type: 'fixed' },
|
|
76
|
+
synthesisMethod: 'judge',
|
|
77
|
+
includeFullHistory: true,
|
|
78
|
+
timeoutPerRound: 300000,
|
|
79
|
+
summarization: {
|
|
80
|
+
enabled: false,
|
|
81
|
+
threshold: 3000,
|
|
82
|
+
maxLength: 1500,
|
|
83
|
+
method: 'length-based'
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
fs.writeFileSync(configPath, JSON.stringify(configContent, null, 2));
|
|
89
|
+
|
|
90
|
+
const cfg = await loadConfig(configPath);
|
|
91
|
+
|
|
92
|
+
expect(cfg.debate?.summarization).toBeDefined();
|
|
93
|
+
expect(cfg.debate?.summarization?.enabled).toBe(false);
|
|
94
|
+
expect(cfg.debate?.summarization?.threshold).toBe(3000);
|
|
95
|
+
expect(cfg.debate?.summarization?.maxLength).toBe(1500);
|
|
96
|
+
expect(cfg.debate?.summarization?.method).toBe('length-based');
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it('should support per-agent summarization override', async () => {
|
|
100
|
+
const configPath = path.join(tmpDir, 'test-config.json');
|
|
101
|
+
const configContent = {
|
|
102
|
+
agents: [
|
|
103
|
+
{
|
|
104
|
+
id: 'test-agent',
|
|
105
|
+
name: 'Test Agent',
|
|
106
|
+
role: 'architect',
|
|
107
|
+
model: 'gpt-4',
|
|
108
|
+
provider: 'openai',
|
|
109
|
+
temperature: 0.5,
|
|
110
|
+
summarization: {
|
|
111
|
+
enabled: true,
|
|
112
|
+
threshold: 2000,
|
|
113
|
+
maxLength: 1000,
|
|
114
|
+
method: 'length-based'
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
],
|
|
118
|
+
debate: {
|
|
119
|
+
rounds: 3,
|
|
120
|
+
terminationCondition: { type: 'fixed' },
|
|
121
|
+
synthesisMethod: 'judge',
|
|
122
|
+
includeFullHistory: true,
|
|
123
|
+
timeoutPerRound: 300000
|
|
124
|
+
}
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
fs.writeFileSync(configPath, JSON.stringify(configContent, null, 2));
|
|
128
|
+
|
|
129
|
+
const cfg = await loadConfig(configPath);
|
|
130
|
+
|
|
131
|
+
const agent = cfg.agents[0];
|
|
132
|
+
expect(agent).toBeDefined();
|
|
133
|
+
expect(agent!.summarization).toBeDefined();
|
|
134
|
+
expect(agent!.summarization?.enabled).toBe(true);
|
|
135
|
+
expect(agent!.summarization?.threshold).toBe(2000);
|
|
136
|
+
expect(agent!.summarization?.maxLength).toBe(1000);
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
it('should support summaryPromptPath in agent config', async () => {
|
|
140
|
+
const configPath = path.join(tmpDir, 'test-config.json');
|
|
141
|
+
const configContent = {
|
|
142
|
+
agents: [
|
|
143
|
+
{
|
|
144
|
+
id: 'test-agent',
|
|
145
|
+
name: 'Test Agent',
|
|
146
|
+
role: 'architect',
|
|
147
|
+
model: 'gpt-4',
|
|
148
|
+
provider: 'openai',
|
|
149
|
+
temperature: 0.5,
|
|
150
|
+
summaryPromptPath: './prompts/custom-summary.md'
|
|
151
|
+
}
|
|
152
|
+
],
|
|
153
|
+
debate: {
|
|
154
|
+
rounds: 3,
|
|
155
|
+
terminationCondition: { type: 'fixed' },
|
|
156
|
+
synthesisMethod: 'judge',
|
|
157
|
+
includeFullHistory: true,
|
|
158
|
+
timeoutPerRound: 300000
|
|
159
|
+
}
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
fs.writeFileSync(configPath, JSON.stringify(configContent, null, 2));
|
|
163
|
+
|
|
164
|
+
const cfg = await loadConfig(configPath);
|
|
165
|
+
|
|
166
|
+
const agent = cfg.agents[0];
|
|
167
|
+
expect(agent).toBeDefined();
|
|
168
|
+
expect(agent!.summaryPromptPath).toBe('./prompts/custom-summary.md');
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
it('should support partial summarization config', async () => {
|
|
172
|
+
const configPath = path.join(tmpDir, 'test-config.json');
|
|
173
|
+
const configContent = {
|
|
174
|
+
agents: [
|
|
175
|
+
{
|
|
176
|
+
id: 'test-agent',
|
|
177
|
+
name: 'Test Agent',
|
|
178
|
+
role: 'architect',
|
|
179
|
+
model: 'gpt-4',
|
|
180
|
+
provider: 'openai',
|
|
181
|
+
temperature: 0.5
|
|
182
|
+
}
|
|
183
|
+
],
|
|
184
|
+
debate: {
|
|
185
|
+
rounds: 3,
|
|
186
|
+
terminationCondition: { type: 'fixed' },
|
|
187
|
+
synthesisMethod: 'judge',
|
|
188
|
+
includeFullHistory: true,
|
|
189
|
+
timeoutPerRound: 300000,
|
|
190
|
+
summarization: {
|
|
191
|
+
threshold: 10000
|
|
192
|
+
// Other fields should use defaults
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
fs.writeFileSync(configPath, JSON.stringify(configContent, null, 2));
|
|
198
|
+
|
|
199
|
+
const cfg = await loadConfig(configPath);
|
|
200
|
+
|
|
201
|
+
expect(cfg.debate?.summarization).toBeDefined();
|
|
202
|
+
expect(cfg.debate?.summarization?.threshold).toBe(10000);
|
|
203
|
+
// Partial config should work with merging at runtime
|
|
204
|
+
});
|
|
205
|
+
});
|
|
206
|
+
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import { LengthBasedSummarizer } from '../src/utils/context-summarizer';
|
|
2
|
+
import { LLMProvider } from '../src/providers/llm-provider';
|
|
3
|
+
import { SummarizationConfig, SUMMARIZATION_METHODS } from '../src/types/debate.types';
|
|
4
|
+
import { AGENT_ROLES } from '../src/types/agent.types';
|
|
5
|
+
|
|
6
|
+
// Mock LLM Provider
|
|
7
|
+
class MockLLMProvider implements LLMProvider {
|
|
8
|
+
private mockResponse: string;
|
|
9
|
+
private shouldFail: boolean;
|
|
10
|
+
|
|
11
|
+
constructor(mockResponse: string = 'This is a test summary.', shouldFail: boolean = false) {
|
|
12
|
+
this.mockResponse = mockResponse;
|
|
13
|
+
this.shouldFail = shouldFail;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
async complete(_request: any): Promise<any> {
|
|
17
|
+
if (this.shouldFail) {
|
|
18
|
+
throw new Error('Mock LLM failure');
|
|
19
|
+
}
|
|
20
|
+
return {
|
|
21
|
+
text: this.mockResponse,
|
|
22
|
+
usage: { totalTokens: 100 }
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
describe('LengthBasedSummarizer', () => {
|
|
28
|
+
const config: SummarizationConfig = {
|
|
29
|
+
enabled: true,
|
|
30
|
+
threshold: 5000,
|
|
31
|
+
maxLength: 2500,
|
|
32
|
+
method: SUMMARIZATION_METHODS.LENGTH_BASED,
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
it('should return summary with correct metadata (configured values)', async () => {
|
|
36
|
+
const provider = new MockLLMProvider('Test summary content');
|
|
37
|
+
const summarizer = new LengthBasedSummarizer(provider, { model: 'gpt-4o', temperature: 0.55, provider: 'openai' as any });
|
|
38
|
+
|
|
39
|
+
const content = 'This is the debate history to summarize.';
|
|
40
|
+
const role = AGENT_ROLES.ARCHITECT;
|
|
41
|
+
const systemPrompt = 'You are an architect.';
|
|
42
|
+
const summaryPrompt = 'Summarize this content.';
|
|
43
|
+
|
|
44
|
+
const result = await summarizer.summarize(content, role, config, systemPrompt, summaryPrompt);
|
|
45
|
+
|
|
46
|
+
expect(result.summary).toBe('Test summary content');
|
|
47
|
+
expect(result.metadata.beforeChars).toBe(content.length);
|
|
48
|
+
expect(result.metadata.afterChars).toBe('Test summary content'.length);
|
|
49
|
+
expect(result.metadata.method).toBe(SUMMARIZATION_METHODS.LENGTH_BASED);
|
|
50
|
+
expect(result.metadata.timestamp).toBeInstanceOf(Date);
|
|
51
|
+
expect(result.metadata.latencyMs).toBeGreaterThanOrEqual(0);
|
|
52
|
+
expect(result.metadata.tokensUsed).toBe(100);
|
|
53
|
+
expect(result.metadata.model).toBe('gpt-4o');
|
|
54
|
+
expect(result.metadata.temperature).toBe(0.55);
|
|
55
|
+
expect(result.metadata.provider).toBe('openai');
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it('should call LLM provider with correct prompts (defaults as fallbacks)', async () => {
|
|
59
|
+
const provider = new MockLLMProvider();
|
|
60
|
+
const completeSpy = jest.spyOn(provider, 'complete');
|
|
61
|
+
const summarizer = new LengthBasedSummarizer(provider);
|
|
62
|
+
|
|
63
|
+
const content = 'Content to summarize';
|
|
64
|
+
const systemPrompt = 'System prompt';
|
|
65
|
+
const summaryPrompt = 'Summary prompt';
|
|
66
|
+
|
|
67
|
+
await summarizer.summarize(content, AGENT_ROLES.ARCHITECT, config, systemPrompt, summaryPrompt);
|
|
68
|
+
|
|
69
|
+
expect(completeSpy).toHaveBeenCalledWith({
|
|
70
|
+
model: 'gpt-4',
|
|
71
|
+
temperature: 0.3,
|
|
72
|
+
systemPrompt: 'System prompt',
|
|
73
|
+
userPrompt: 'Summary prompt',
|
|
74
|
+
});
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it('should truncate summary to maxLength if needed', async () => {
|
|
78
|
+
const longSummary = 'a'.repeat(3000);
|
|
79
|
+
const provider = new MockLLMProvider(longSummary);
|
|
80
|
+
const summarizer = new LengthBasedSummarizer(provider);
|
|
81
|
+
|
|
82
|
+
const result = await summarizer.summarize('content', AGENT_ROLES.ARCHITECT, config, 'sys', 'sum');
|
|
83
|
+
|
|
84
|
+
expect(result.summary.length).toBe(config.maxLength);
|
|
85
|
+
expect(result.summary).toBe('a'.repeat(config.maxLength));
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it('should measure latency correctly', async () => {
|
|
89
|
+
const provider = new MockLLMProvider();
|
|
90
|
+
const summarizer = new LengthBasedSummarizer(provider);
|
|
91
|
+
|
|
92
|
+
const result = await summarizer.summarize('content', AGENT_ROLES.ARCHITECT, config, 'sys', 'sum');
|
|
93
|
+
|
|
94
|
+
expect(result.metadata.latencyMs).toBeDefined();
|
|
95
|
+
expect(result.metadata.latencyMs).toBeGreaterThanOrEqual(0);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it('should handle LLM errors gracefully', async () => {
|
|
99
|
+
const provider = new MockLLMProvider('', true);
|
|
100
|
+
const summarizer = new LengthBasedSummarizer(provider);
|
|
101
|
+
|
|
102
|
+
await expect(
|
|
103
|
+
summarizer.summarize('content', AGENT_ROLES.ARCHITECT, config, 'sys', 'sum')
|
|
104
|
+
).rejects.toThrow('Mock LLM failure');
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
it('should include token usage when provided by LLM', async () => {
|
|
108
|
+
const provider = new MockLLMProvider('Summary');
|
|
109
|
+
const summarizer = new LengthBasedSummarizer(provider);
|
|
110
|
+
|
|
111
|
+
const result = await summarizer.summarize('content', AGENT_ROLES.ARCHITECT, config, 'sys', 'sum');
|
|
112
|
+
|
|
113
|
+
expect(result.metadata.tokensUsed).toBe(100);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it('should handle missing token usage from LLM', async () => {
|
|
117
|
+
class NoTokenProvider implements LLMProvider {
|
|
118
|
+
async complete(_request: any): Promise<any> {
|
|
119
|
+
return { text: 'Summary' };
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const provider = new NoTokenProvider();
|
|
124
|
+
const summarizer = new LengthBasedSummarizer(provider);
|
|
125
|
+
|
|
126
|
+
const result = await summarizer.summarize('content', AGENT_ROLES.ARCHITECT, config, 'sys', 'sum');
|
|
127
|
+
|
|
128
|
+
expect(result.metadata.tokensUsed).toBeUndefined();
|
|
129
|
+
});
|
|
130
|
+
});
|
|
131
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"agents": [
|
|
3
|
+
{
|
|
4
|
+
"id": "agent-architect",
|
|
5
|
+
"name": "System Architect (Custom)",
|
|
6
|
+
"role": "architect",
|
|
7
|
+
"model": "gpt-4",
|
|
8
|
+
"provider": "openai",
|
|
9
|
+
"temperature": 0.5,
|
|
10
|
+
"systemPromptPath": "./prompts/architect.md",
|
|
11
|
+
"enabled": true
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"id": "agent-performance",
|
|
15
|
+
"name": "Performance Engineer",
|
|
16
|
+
"role": "performance",
|
|
17
|
+
"model": "gpt-4",
|
|
18
|
+
"provider": "openai",
|
|
19
|
+
"temperature": 0.5,
|
|
20
|
+
"enabled": true
|
|
21
|
+
}
|
|
22
|
+
],
|
|
23
|
+
"judge": {
|
|
24
|
+
"id": "judge-main",
|
|
25
|
+
"name": "Technical Judge",
|
|
26
|
+
"role": "generalist",
|
|
27
|
+
"model": "gpt-4",
|
|
28
|
+
"provider": "openai",
|
|
29
|
+
"temperature": 0.3
|
|
30
|
+
},
|
|
31
|
+
"debate": {
|
|
32
|
+
"rounds": 2,
|
|
33
|
+
"terminationCondition": { "type": "fixed" },
|
|
34
|
+
"synthesisMethod": "judge",
|
|
35
|
+
"includeFullHistory": true,
|
|
36
|
+
"timeoutPerRound": 300000
|
|
37
|
+
}
|
|
38
|
+
}
|