dialectic 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/.cursor/commands/setup-test.mdc +175 -0
  2. package/.cursor/rules/basic-code-cleanup.mdc +1110 -0
  3. package/.cursor/rules/riper5.mdc +96 -0
  4. package/.env.example +6 -0
  5. package/AGENTS.md +1052 -0
  6. package/LICENSE +21 -0
  7. package/README.md +93 -0
  8. package/WARP.md +113 -0
  9. package/dialectic-1.0.0.tgz +0 -0
  10. package/dialectic.js +10 -0
  11. package/docs/commands.md +375 -0
  12. package/docs/configuration.md +882 -0
  13. package/docs/context_summarization.md +1023 -0
  14. package/docs/debate_flow.md +1127 -0
  15. package/docs/eval_flow.md +795 -0
  16. package/docs/evaluator.md +141 -0
  17. package/examples/debate-config-openrouter.json +48 -0
  18. package/examples/debate_config1.json +48 -0
  19. package/examples/eval/eval1/eval_config1.json +13 -0
  20. package/examples/eval/eval1/result1.json +62 -0
  21. package/examples/eval/eval1/result2.json +97 -0
  22. package/examples/eval_summary_format.md +11 -0
  23. package/examples/example3/debate-config.json +64 -0
  24. package/examples/example3/eval_config2.json +25 -0
  25. package/examples/example3/problem.md +17 -0
  26. package/examples/example3/rounds_test/eval_run.sh +16 -0
  27. package/examples/example3/rounds_test/run_test.sh +16 -0
  28. package/examples/kata1/architect-only-solution_2-rounds.json +121 -0
  29. package/examples/kata1/architect-perf-solution_2-rounds.json +234 -0
  30. package/examples/kata1/debate-config-kata1.json +54 -0
  31. package/examples/kata1/eval_architect-only_2-rounds.json +97 -0
  32. package/examples/kata1/eval_architect-perf_2-rounds.json +97 -0
  33. package/examples/kata1/kata1-report.md +12224 -0
  34. package/examples/kata1/kata1-report_temps-01_01_01_07.md +2451 -0
  35. package/examples/kata1/kata1.md +5 -0
  36. package/examples/kata1/meta.txt +1 -0
  37. package/examples/kata2/debate-config.json +54 -0
  38. package/examples/kata2/eval_config1.json +21 -0
  39. package/examples/kata2/eval_config2.json +25 -0
  40. package/examples/kata2/kata2.md +5 -0
  41. package/examples/kata2/only_architect/debate-config.json +45 -0
  42. package/examples/kata2/only_architect/eval_run.sh +11 -0
  43. package/examples/kata2/only_architect/run_test.sh +5 -0
  44. package/examples/kata2/rounds_test/eval_run.sh +11 -0
  45. package/examples/kata2/rounds_test/run_test.sh +5 -0
  46. package/examples/kata2/summary_length_test/eval_run.sh +11 -0
  47. package/examples/kata2/summary_length_test/eval_run_w_clarify.sh +7 -0
  48. package/examples/kata2/summary_length_test/run_test.sh +5 -0
  49. package/examples/task-queue/debate-config.json +76 -0
  50. package/examples/task-queue/debate_report.md +566 -0
  51. package/examples/task-queue/task-queue-system.md +25 -0
  52. package/jest.config.ts +13 -0
  53. package/multi_agent_debate_spec.md +2980 -0
  54. package/package.json +38 -0
  55. package/sanity-check-problem.txt +9 -0
  56. package/src/agents/prompts/architect-prompts.ts +203 -0
  57. package/src/agents/prompts/generalist-prompts.ts +157 -0
  58. package/src/agents/prompts/index.ts +41 -0
  59. package/src/agents/prompts/judge-prompts.ts +19 -0
  60. package/src/agents/prompts/kiss-prompts.ts +230 -0
  61. package/src/agents/prompts/performance-prompts.ts +142 -0
  62. package/src/agents/prompts/prompt-types.ts +68 -0
  63. package/src/agents/prompts/security-prompts.ts +149 -0
  64. package/src/agents/prompts/shared.ts +144 -0
  65. package/src/agents/prompts/testing-prompts.ts +149 -0
  66. package/src/agents/role-based-agent.ts +386 -0
  67. package/src/cli/commands/debate.ts +761 -0
  68. package/src/cli/commands/eval.ts +475 -0
  69. package/src/cli/commands/report.ts +265 -0
  70. package/src/cli/index.ts +79 -0
  71. package/src/core/agent.ts +198 -0
  72. package/src/core/clarifications.ts +34 -0
  73. package/src/core/judge.ts +257 -0
  74. package/src/core/orchestrator.ts +432 -0
  75. package/src/core/state-manager.ts +322 -0
  76. package/src/eval/evaluator-agent.ts +130 -0
  77. package/src/eval/prompts/system.md +41 -0
  78. package/src/eval/prompts/user.md +64 -0
  79. package/src/providers/llm-provider.ts +25 -0
  80. package/src/providers/openai-provider.ts +84 -0
  81. package/src/providers/openrouter-provider.ts +122 -0
  82. package/src/providers/provider-factory.ts +64 -0
  83. package/src/types/agent.types.ts +141 -0
  84. package/src/types/config.types.ts +47 -0
  85. package/src/types/debate.types.ts +237 -0
  86. package/src/types/eval.types.ts +85 -0
  87. package/src/utils/common.ts +104 -0
  88. package/src/utils/context-formatter.ts +102 -0
  89. package/src/utils/context-summarizer.ts +143 -0
  90. package/src/utils/env-loader.ts +46 -0
  91. package/src/utils/exit-codes.ts +5 -0
  92. package/src/utils/id.ts +11 -0
  93. package/src/utils/logger.ts +48 -0
  94. package/src/utils/paths.ts +10 -0
  95. package/src/utils/progress-ui.ts +313 -0
  96. package/src/utils/prompt-loader.ts +79 -0
  97. package/src/utils/report-generator.ts +301 -0
  98. package/tests/clarifications.spec.ts +128 -0
  99. package/tests/cli.debate.spec.ts +144 -0
  100. package/tests/config-loading.spec.ts +206 -0
  101. package/tests/context-summarizer.spec.ts +131 -0
  102. package/tests/debate-config-custom.json +38 -0
  103. package/tests/env-loader.spec.ts +149 -0
  104. package/tests/eval.command.spec.ts +1191 -0
  105. package/tests/logger.spec.ts +19 -0
  106. package/tests/openai-provider.spec.ts +26 -0
  107. package/tests/openrouter-provider.spec.ts +279 -0
  108. package/tests/orchestrator-summary.spec.ts +386 -0
  109. package/tests/orchestrator.spec.ts +207 -0
  110. package/tests/prompt-loader.spec.ts +52 -0
  111. package/tests/prompts/architect.md +16 -0
  112. package/tests/provider-factory.spec.ts +150 -0
  113. package/tests/report.command.spec.ts +546 -0
  114. package/tests/role-based-agent-summary.spec.ts +476 -0
  115. package/tests/security-agent.spec.ts +221 -0
  116. package/tests/shared-prompts.spec.ts +318 -0
  117. package/tests/state-manager.spec.ts +251 -0
  118. package/tests/summary-prompts.spec.ts +153 -0
  119. package/tsconfig.json +49 -0
@@ -0,0 +1,882 @@
1
+ # Configuration Guide
2
+
3
+ This document describes all configuration options for the multi-agent debate system.
4
+
5
+ ## Overview
6
+
7
+ The debate system can be configured through three mechanisms:
8
+
9
+ 1. **Configuration File**: JSON file (default: `./debate-config.json`) defining agents, judge, and debate settings
10
+ 2. **Environment Variables**: Required API keys and optional settings
11
+ 3. **Command Line Options**: Runtime overrides for debate execution
12
+
13
+ If no configuration file is provided, the system uses built-in defaults.
14
+
15
+ ## Configuration File
16
+
17
+ The configuration file is a JSON document with the following structure:
18
+
19
+ ```json
20
+ {
21
+ "agents": [...],
22
+ "judge": {...},
23
+ "debate": {...}
24
+ }
25
+ ```
26
+
27
+ ### File Location
28
+
29
+ - **Default Path**: `./debate-config.json` (in the current working directory)
30
+ - **Custom Path**: Specify via `--config <path>` command line option
31
+
32
+ ### Root Configuration Schema
33
+
34
+ The root configuration object must conform to the `SystemConfig` interface:
35
+
36
+ | Field | Type | Required | Description |
37
+ |-------|------|----------|-------------|
38
+ | `agents` | `AgentConfig[]` | No | Array of agent configurations. If missing or empty, built-in defaults are used. |
39
+ | `judge` | `AgentConfig` | No | Configuration for the judge agent. If missing, a default judge is used. |
40
+ | `debate` | `DebateConfig` | No | Debate execution settings. If missing, default debate configuration is used. |
41
+
42
+ ## Agent Configuration
43
+
44
+ Each agent (including the judge) is configured using the `AgentConfig` schema:
45
+
46
+ ### Schema
47
+
48
+ | Field | Type | Required | Description |
49
+ |-------|------|----------|-------------|
50
+ | `id` | `string` | Yes | Unique identifier for the agent. Must be unique across all agents and the judge. |
51
+ | `name` | `string` | Yes | Human-readable name for the agent. Used in output and logging. |
52
+ | `role` | `AgentRole` | Yes | The functional role of the agent. |
53
+ | `model` | `string` | Yes | The LLM model name to use for this agent. |
54
+ | `provider` | `string` | Yes | The LLM provider. Supports `"openai"` or `"openrouter"`. |
55
+ | `temperature` | `number` | Yes | Sampling temperature for the LLM. |
56
+ | `systemPromptPath` | `string` | No | Path to a markdown/text file containing the system prompt. If omitted, a built-in prompt for the role is used. |
57
+ | `enabled` | `boolean` | No | Whether the agent is enabled. Defaults to `true` if omitted. |
58
+ | `clarificationPromptPath` | `string` | No | Path to a markdown/text file containing the clarifications prompt for this agent. If omitted, a built-in role-specific prompt is used. |
59
+
60
+ ### Field Details
61
+
62
+ #### `id`
63
+ - **Type**: String
64
+ - **Accepted Values**: Any non-empty string
65
+ - **Semantics**: Uniquely identifies the agent within the system. Used for tracking contributions and referencing agents in debate logs.
66
+ - **Example**: `"agent-architect"`, `"agent-perf-001"`
67
+
68
+ #### `name`
69
+ - **Type**: String
70
+ - **Accepted Values**: Any non-empty string
71
+ - **Semantics**: Human-readable display name for the agent. Used in console output, logs, and verbose mode.
72
+ - **Example**: `"System Architect"`, `"Performance Engineer"`
73
+
74
+ #### `role`
75
+ - **Type**: String (enum)
76
+ - **Accepted Values**:
77
+ - `"architect"` - System architecture and design perspective
78
+ - `"security"` - Security and privacy concerns
79
+ - `"performance"` - Performance optimization and efficiency
80
+ - `"testing"` - Testing strategy and quality assurance
81
+ - `"kiss"` - Simplicity-focused perspective, challenges complexity
82
+ - `"generalist"` - General-purpose role (typically used for judge)
83
+ - **Semantics**: Defines the agent's functional perspective in the debate. Agents with unknown roles default to architect behavior with a warning.
84
+ - **Example**: `"architect"`
85
+
86
+ #### `model`
87
+ - **Type**: String
88
+ - **Accepted Values**:
89
+ - For OpenAI provider: Any valid OpenAI model name (e.g., `"gpt-4"`, `"gpt-4-turbo"`, `"gpt-3.5-turbo"`)
90
+ - For OpenRouter provider: Full qualified model names (e.g., `"openai/gpt-4"`, `"anthropic/claude-3-sonnet"`)
91
+ - **Semantics**: Specifies which LLM model the agent uses. More capable models generally produce better reasoning but cost more.
92
+ - **Example**: `"gpt-4"` (OpenAI) or `"openai/gpt-4"` (OpenRouter)
93
+
94
+ #### `provider`
95
+ - **Type**: String (literal)
96
+ - **Accepted Values**: `"openai"` or `"openrouter"`
97
+ - **Semantics**: Specifies the LLM provider. Each provider requires its own API key and supports different model naming conventions.
98
+ - **Example**: `"openai"` or `"openrouter"`
99
+
100
+ #### `temperature`
101
+ - **Type**: Number
102
+ - **Accepted Values**: `0.0` to `1.0` (inclusive)
103
+ - **Semantics**: Controls randomness in model output. Lower values (0.0-0.3) produce more deterministic and focused responses. Higher values (0.7-1.0) produce more creative and varied responses. Recommended ranges:
104
+ - Judge: 0.2-0.3 (more deterministic)
105
+ - Agents: 0.4-0.7 (balanced creativity)
106
+ - **Example**: `0.5`
107
+
108
+ #### `systemPromptPath`
109
+ - **Type**: String (optional)
110
+ - **Accepted Values**: File path (absolute or relative to the configuration file directory)
111
+ - **Semantics**: Filesystem path to a markdown/text file containing custom instructions that prime the agent's behavior. If omitted or invalid, the system uses a built-in prompt appropriate for the role.
112
+ - **Resolution**: Relative paths are resolved against the configuration file directory. No environment variable expansion is performed.
113
+ - **Reading**: File is read as UTF-8; the entire file content is used as the system prompt. Empty/whitespace-only files are considered invalid.
114
+ - **Fallback**: If the path is missing/unreadable/invalid, a warning is printed to stderr and the built-in prompt is used.
115
+ - **Example**: `"./prompts/architect.md"`
116
+
117
+ #### `enabled`
118
+ - **Type**: Boolean (optional)
119
+ - **Accepted Values**: `true` or `false`
120
+ - **Default**: `true`
121
+ - **Semantics**: Whether the agent participates in debates. Disabled agents are filtered out before debate execution. Useful for temporarily removing agents without deleting their configuration.
122
+ - **Example**: `true`
123
+
124
+ ### Example Agent Configuration
125
+
126
+ #### Single Provider (OpenAI)
127
+ ```json
128
+ {
129
+ "id": "agent-architect",
130
+ "name": "System Architect",
131
+ "role": "architect",
132
+ "model": "gpt-4",
133
+ "provider": "openai",
134
+ "temperature": 0.5,
135
+ "systemPromptPath": "./prompts/architect.md",
136
+ "enabled": true
137
+ }
138
+ ```
139
+
140
+ #### Single Provider (OpenRouter)
141
+ ```json
142
+ {
143
+ "id": "agent-architect",
144
+ "name": "System Architect",
145
+ "role": "architect",
146
+ "model": "openai/gpt-4",
147
+ "provider": "openrouter",
148
+ "temperature": 0.5,
149
+ "systemPromptPath": "./prompts/architect.md",
150
+ "enabled": true
151
+ }
152
+ ```
153
+
154
+ #### Mixed Provider Configuration
155
+ ```json
156
+ {
157
+ "id": "agent-architect",
158
+ "name": "System Architect",
159
+ "role": "architect",
160
+ "model": "openai/gpt-4",
161
+ "provider": "openrouter",
162
+ "temperature": 0.5,
163
+ "enabled": true
164
+ },
165
+ {
166
+ "id": "agent-security",
167
+ "name": "Security Specialist",
168
+ "role": "security",
169
+ "model": "gpt-4",
170
+ "provider": "openai",
171
+ "temperature": 0.4,
172
+ "enabled": true
173
+ }
174
+ ```
175
+
176
+ ### Default agents values:
177
+
178
+ - System Architect (role: `architect`, model: `gpt-4`, temperature: `0.5`)
179
+ - Performance Engineer (role: `performance`, model: `gpt-4`, temperature: `0.5`)
180
+ - Simplicity Advocate (role: `kiss`, model: `gpt-4`, temperature: `0.5`)
181
+
182
+ ## Judge Configuration
183
+
184
+ The judge is a special agent that synthesizes the final solution after all debate rounds complete. It uses the same `AgentConfig` schema as regular agents.
185
+
186
+ ### Judge-Specific Considerations
187
+
188
+ - Supports the same `systemPromptPath` behavior as agents (path resolved relative to the configuration file directory; invalid/empty files cause a warning and fallback to built-in).
189
+
190
+ - **Role**: Typically set to `"generalist"` to maintain objectivity
191
+ - **Temperature**: Recommended range is 0.2-0.3 for more consistent synthesis
192
+ - **Model**: Should be the same or more capable than agent models
193
+
194
+ ### Default Judge Configuration
195
+
196
+ If no judge is specified in the configuration file, the system uses:
197
+
198
+ ```json
199
+ {
200
+ "id": "judge-main",
201
+ "name": "Technical Judge",
202
+ "role": "generalist",
203
+ "model": "gpt-4",
204
+ "provider": "openai",
205
+ "temperature": 0.3
206
+ }
207
+ ```
208
+
209
+ ## Debate Configuration
210
+
211
+ The `DebateConfig` schema controls how debates execute:
212
+
213
+ ### Schema
214
+
215
+ | Field | Type | Required | Description |
216
+ |-------|------|----------|-------------|
217
+ | `rounds` | `number` | Yes | Number of debate rounds to execute. |
218
+ | `terminationCondition` | `TerminationCondition` | Yes | Conditions for early termination. |
219
+ | `synthesisMethod` | `string` | Yes | Method for synthesizing the final solution. |
220
+ | `includeFullHistory` | `boolean` | Yes | Whether to include full debate history in agent context. |
221
+ | `timeoutPerRound` | `number` | Yes | Maximum time allowed per round in milliseconds. |
222
+ | `interactiveClarifications` | `boolean` | No | Run a one-time pre-debate clarifications phase (default: false). |
223
+ | `clarificationsMaxPerAgent` | `number` | No | Max questions per agent in clarifications phase (default: 5; excess truncated with a warning). |
224
+
225
+ ### Field Details
226
+
227
+ #### `rounds`
228
+ - **Type**: Number (integer)
229
+ - **Accepted Values**: Positive integers >= 1
230
+ - **Semantics**: Number of complete rounds to execute. Each round consists of all phases in order: proposal → critique → refinement. After the final round completes, the judge synthesizes the final solution. Proposals are fresh each round; agents may incorporate prior history when `includeFullHistory` is true.
231
+ - **Default**: 3
232
+ - **Example**: `3`
233
+
234
+ #### `terminationCondition`
235
+ - **Type**: Object
236
+ - **Schema**:
237
+ - `type`: `"fixed"` | `"convergence"` | `"quality"`
238
+ - `threshold`: `number` (optional, depends on type)
239
+ - **Accepted Values**:
240
+ - `{ "type": "fixed" }` - Run exactly the specified number of rounds (currently only supported type)
241
+ - `{ "type": "convergence", "threshold": 0.9 }` - Stop when solutions converge (planned)
242
+ - `{ "type": "quality", "threshold": 85 }` - Stop when quality threshold reached (planned)
243
+ - **Semantics**: Determines when the debate terminates. Currently, only `"fixed"` type is implemented.
244
+ - **Default**: `{ "type": "fixed" }`
245
+ - **Example**: `{ "type": "fixed" }`
246
+
247
+ #### `synthesisMethod`
248
+ - **Type**: String (enum)
249
+ - **Accepted Values**: `"judge"` | `"voting"` | `"merge"`
250
+ - **Currently Supported**: `"judge"` only
251
+ - **Semantics**: How the final solution is produced:
252
+ - `"judge"` - Judge agent synthesizes the solution based on all contributions
253
+ - `"voting"` - Agents vote on proposals (planned)
254
+ - `"merge"` - Automatic merging of proposals (planned)
255
+ - **Default**: `"judge"`
256
+ - **Example**: `"judge"`
257
+
258
+ #### `includeFullHistory`
259
+ - **Type**: Boolean
260
+ - **Accepted Values**: `true` or `false`
261
+ - **Semantics**: Whether agents receive the complete debate history or only recent context. Setting to `true` provides more context but uses more tokens.
262
+ - **Default**: `true`
263
+ - **Example**: `true`
264
+
265
+ #### `timeoutPerRound`
266
+ - **Type**: Number (integer)
267
+ - **Accepted Values**: Positive integers (milliseconds)
268
+ - **Semantics**: Maximum time allowed for a single round to complete. If exceeded, the debate may fail or proceed with partial results (behavior depends on implementation).
269
+ - **Default**: `300000` (5 minutes)
270
+ - **Example**: `300000`
271
+
272
+ ### Default Debate Configuration
273
+
274
+ If no debate configuration is specified, the system uses:
275
+
276
+ ```json
277
+ {
278
+ "rounds": 3,
279
+ "terminationCondition": { "type": "fixed" },
280
+ "synthesisMethod": "judge",
281
+ "includeFullHistory": true,
282
+ "timeoutPerRound": 300000
283
+ }
284
+ ```
285
+
286
+ ### Example Debate Configuration
287
+
288
+ ```json
289
+ {
290
+ "rounds": 5,
291
+ "terminationCondition": { "type": "fixed" },
292
+ "synthesisMethod": "judge",
293
+ "includeFullHistory": true,
294
+ "timeoutPerRound": 600000
295
+ }
296
+ ```
297
+
298
+ ## Complete Configuration Example
299
+
300
+ ```json
301
+ {
302
+ "agents": [
303
+ {
304
+ "id": "agent-architect",
305
+ "name": "System Architect",
306
+ "role": "architect",
307
+ "model": "gpt-4",
308
+ "provider": "openai",
309
+ "temperature": 0.5,
310
+ "enabled": true
311
+ },
312
+ {
313
+ "id": "agent-performance",
314
+ "name": "Performance Engineer",
315
+ "role": "performance",
316
+ "model": "gpt-4",
317
+ "provider": "openai",
318
+ "temperature": 0.5,
319
+ "enabled": true
320
+ },
321
+ {
322
+ "id": "agent-security",
323
+ "name": "Security Specialist",
324
+ "role": "security",
325
+ "model": "gpt-4",
326
+ "provider": "openai",
327
+ "temperature": 0.4,
328
+ "enabled": false
329
+ }
330
+ ],
331
+ "judge": {
332
+ "id": "judge-main",
333
+ "name": "Technical Judge",
334
+ "role": "generalist",
335
+ "model": "gpt-4",
336
+ "provider": "openai",
337
+ "temperature": 0.3
338
+ },
339
+ "debate": {
340
+ "rounds": 3,
341
+ "terminationCondition": { "type": "fixed" },
342
+ "synthesisMethod": "judge",
343
+ "includeFullHistory": true,
344
+ "timeoutPerRound": 300000
345
+ }
346
+ }
347
+ ```
348
+
349
+ ## Context Summarization Configuration
350
+
351
+ The debate system includes automatic context summarization to manage debate history length and avoid context window limitations. Each agent independently summarizes their perspective-based history when it exceeds configured thresholds. The judge agent also supports summarization for synthesis when the final round's content becomes too large.
352
+
353
+ ### Overview
354
+
355
+ Context summarization is configured at two levels:
356
+ 1. **System-Wide**: Default summarization settings in `debate.summarization`
357
+ 2. **Per-Agent**: Agent-specific overrides in `AgentConfig.summarization`
358
+
359
+ Agent-level settings override system-wide settings, allowing fine-grained control over which agents summarize and how. The judge agent uses the same system-wide summarization configuration for its synthesis process.
360
+
361
+ ### System-Wide Configuration
362
+
363
+ Add a `summarization` field to the `debate` configuration:
364
+
365
+ ```json
366
+ {
367
+ "debate": {
368
+ "rounds": 3,
369
+ "terminationCondition": { "type": "fixed" },
370
+ "synthesisMethod": "judge",
371
+ "includeFullHistory": true,
372
+ "timeoutPerRound": 300000,
373
+ "summarization": {
374
+ "enabled": true,
375
+ "threshold": 5000,
376
+ "maxLength": 2500,
377
+ "method": "length-based"
378
+ }
379
+ }
380
+ }
381
+ ```
382
+
383
+ ### Summarization Schema
384
+
385
+ | Field | Type | Required | Description |
386
+ |-------|------|----------|-------------|
387
+ | `enabled` | `boolean` | Yes | Whether summarization is enabled. |
388
+ | `threshold` | `number` | Yes | Character count threshold for triggering summarization. |
389
+ | `maxLength` | `number` | Yes | Maximum length of generated summary in characters. |
390
+ | `method` | `string` | Yes | Summarization method to use. Currently only `"length-based"` is supported. |
391
+ | `promptPath` | `string` | No | Optional path to custom summarization prompt file. |
392
+
393
+ ### Field Details
394
+
395
+ #### `enabled`
396
+ - **Type**: Boolean
397
+ - **Default**: `true`
398
+ - **Description**: Controls whether agents perform context summarization. When `false`, agents always receive full history (subject to `includeFullHistory` setting).
399
+ - **Example**: `true`
400
+
401
+ #### `threshold`
402
+ - **Type**: Number (integer)
403
+ - **Default**: `5000`
404
+ - **Description**: Character count threshold for triggering summarization. When an agent's perspective-based history (their proposals, received critiques, and refinements) exceeds this threshold, summarization is triggered.
405
+ - **Minimum**: 100 (practical minimum)
406
+ - **Example**: `5000`
407
+
408
+ #### `maxLength`
409
+ - **Type**: Number (integer)
410
+ - **Default**: `2500`
411
+ - **Description**: Maximum length of the generated summary in characters. Summaries exceeding this length are truncated.
412
+ - **Recommendation**: Set to approximately 50% of threshold for effective compression
413
+ - **Example**: `2500`
414
+
415
+ #### `method`
416
+ - **Type**: String (enum)
417
+ - **Accepted Values**: `"length-based"` (only supported value currently)
418
+ - **Default**: `"length-based"`
419
+ - **Description**: Summarization strategy to use. Future versions may support additional methods like `"semantic"` or `"hierarchical"`.
420
+ - **Example**: `"length-based"`
421
+
422
+ #### `promptPath`
423
+ - **Type**: String (optional)
424
+ - **Description**: Path to a custom summarization prompt file, resolved relative to the configuration file directory.
425
+ - **Fallback**: If omitted or invalid, uses built-in role-specific summary prompts
426
+ - **Example**: `"./prompts/custom-summary.md"`
427
+
428
+ ### Per-Agent Configuration
429
+
430
+ Agents can override system-wide summarization settings:
431
+
432
+ ```json
433
+ {
434
+ "agents": [
435
+ {
436
+ "id": "agent-architect",
437
+ "name": "System Architect",
438
+ "role": "architect",
439
+ "model": "gpt-4",
440
+ "provider": "openai",
441
+ "temperature": 0.5,
442
+ "summaryPromptPath": "./prompts/architect-summary.md",
443
+ "summarization": {
444
+ "enabled": true,
445
+ "threshold": 3000,
446
+ "maxLength": 1500,
447
+ "method": "length-based"
448
+ }
449
+ }
450
+ ]
451
+ }
452
+ ```
453
+
454
+ ### Agent-Specific Fields
455
+
456
+ In addition to the `summarization` object, agents support:
457
+
458
+ #### `summaryPromptPath`
459
+ - **Type**: String (optional)
460
+ - **Description**: Path to a custom summary prompt for this specific agent, following the same resolution rules as `systemPromptPath`
461
+ - **Fallback**: If omitted, uses built-in role-specific summary prompt
462
+ - **Resolution**: Relative paths resolved against configuration file directory
463
+ - **Example**: `"./prompts/architect-summary.md"`
464
+
465
+ ### Default Configuration
466
+
467
+ If no summarization configuration is provided, the system uses:
468
+
469
+ ```json
470
+ {
471
+ "enabled": true,
472
+ "threshold": 5000,
473
+ "maxLength": 2500,
474
+ "method": "length-based"
475
+ }
476
+ ```
477
+
478
+ ### Behavior Details
479
+
480
+ #### When Summarization Occurs
481
+
482
+ Summarization happens at the beginning of each round, before the proposal phase:
483
+
484
+ 1. **Decision**: Each agent evaluates whether their history exceeds the threshold
485
+ 2. **Filtering**: Agent filters history to their perspective:
486
+ - Their own proposals
487
+ - Critiques they received (not critiques of other agents)
488
+ - Their own refinements
489
+ 3. **Calculation**: Total character count is calculated from filtered history
490
+ 4. **Trigger**: If count >= threshold, summarization is performed
491
+ 5. **LLM Call**: Agent uses configured model, temperature, and provider to generate summary (falls back to defaults if not provided: model `gpt-4`, temperature `0.3`)
492
+ 6. **Storage**: Summary and metadata are persisted as `round.summaries[agentId] = summary` (keyed by agent ID)
493
+
494
+ **Judge Summarization**: The judge also performs summarization during the synthesis phase if the final round's proposals and refinements exceed the threshold. The judge's summary is stored separately in `DebateState.judgeSummary`.
495
+
496
+ #### What Gets Summarized
497
+
498
+ Each agent summarizes **only their perspective** of the debate:
499
+ - **Proposals**: All proposals made by this agent across all rounds
500
+ - **Critiques Received**: Only critiques targeting this agent's proposals
501
+ - **Refinements**: All refinements made by this agent
502
+
503
+ Critiques of other agents are **excluded** from each agent's summary.
504
+
505
+ **Judge Summarization**: The judge summarizes only the final round's proposals and refinements (not critiques) when the content exceeds the threshold. This provides a focused view of the most recent solution attempts for synthesis.
506
+
507
+ #### Context Usage
508
+
509
+ The system uses summaries dynamically when formatting prompts:
510
+
511
+ 1. **Storage**: Summaries are stored in `round.summaries[agentId]` (Record keyed by agent ID)
512
+ 2. **Retrieval**: When generating a prompt, the formatter:
513
+ - Searches backwards through `context.history` rounds
514
+ - Looks for `round.summaries[agentId]`
515
+ - Uses the **most recent summary** if found
516
+ - Falls back to full history if no summary exists
517
+ 3. **Data Isolation**: Each agent only sees their own summary
518
+ 4. **Fresh Summaries**: Summary is **recalculated fresh each round** (not incremental)
519
+ 5. **Original Context**: The `DebateContext` object is never modified - summaries are retrieved dynamically
520
+
521
+ **Precedence**: Agent's most recent summary > Full history > No context
522
+
523
+ #### Verbose Output
524
+
525
+ When `--verbose` is enabled, summarization information is displayed:
526
+
527
+ ```
528
+ Summarization:
529
+ - Enabled: true
530
+ - Threshold: 5000 characters
531
+ - Max summary length: 2500 characters
532
+ - Method: length-based
533
+
534
+ Round 1
535
+ summaries:
536
+ [architect] 6234 → 2345 chars
537
+ (latency=1234ms, tokens=456, method=length-based)
538
+ ```
539
+
540
+ ### Summary Prompts
541
+
542
+ The system provides built-in role-specific summary prompts that instruct agents to:
543
+ - Summarize from their role's perspective
544
+ - Preserve critical insights and decisions
545
+ - Focus on information useful for future rounds
546
+ - Stay within the maximum length
547
+
548
+ Custom summary prompts can override these using `summaryPromptPath` (per-agent) or `debate.summarization.promptPath` (system-wide).
549
+
550
+ #### Custom Summary Prompt Format
551
+
552
+ Custom prompts should include:
553
+ - Instructions to summarize from the role's perspective
554
+ - Guidance on what to preserve (key decisions, open questions, critical points)
555
+ - Maximum length constraint
556
+ - Content placeholder (the history to summarize will be provided)
557
+
558
+ **Example custom summary prompt:**
559
+ ```markdown
560
+ You are summarizing the debate history from an architectural perspective.
561
+
562
+ Focus on:
563
+ - Key architectural decisions and their rationale
564
+ - Component designs and interfaces
565
+ - Scalability concerns discussed
566
+ - Open architectural questions
567
+
568
+ Create a concise summary (maximum 2500 characters) that preserves the most important architectural insights and decisions for use in future rounds.
569
+
570
+ History to summarize:
571
+ {content}
572
+ ```
573
+
574
+ ### Fallback Behavior
575
+
576
+ #### Missing Summary Prompt
577
+
578
+ If `summaryPromptPath` is specified but the file is missing or invalid:
579
+ - System uses built-in role-specific summary prompt
580
+ - Warning is logged to stderr
581
+ - Debate continues normally
582
+
583
+ #### Summarization Failure
584
+
585
+ If summarization fails due to LLM errors:
586
+ - Agent falls back to using full history
587
+ - Warning is logged to stderr with error details
588
+ - Debate continues normally
589
+
590
+ ### Configuration Examples
591
+
592
+ #### Disable Summarization Globally
593
+
594
+ ```json
595
+ {
596
+ "debate": {
597
+ "summarization": {
598
+ "enabled": false
599
+ }
600
+ }
601
+ }
602
+ ```
603
+
604
+ #### Aggressive Summarization (Low Threshold)
605
+
606
+ ```json
607
+ {
608
+ "debate": {
609
+ "summarization": {
610
+ "enabled": true,
611
+ "threshold": 2000,
612
+ "maxLength": 1000,
613
+ "method": "length-based"
614
+ }
615
+ }
616
+ }
617
+ ```
618
+
619
+ #### Per-Agent Customization
620
+
621
+ ```json
622
+ {
623
+ "agents": [
624
+ {
625
+ "id": "agent-architect",
626
+ "name": "System Architect",
627
+ "role": "architect",
628
+ "model": "gpt-4",
629
+ "provider": "openai",
630
+ "temperature": 0.5,
631
+ "summarization": {
632
+ "enabled": true,
633
+ "threshold": 3000,
634
+ "maxLength": 1500,
635
+ "method": "length-based"
636
+ }
637
+ },
638
+ {
639
+ "id": "agent-security",
640
+ "name": "Security Specialist",
641
+ "role": "security",
642
+ "model": "gpt-4",
643
+ "provider": "openai",
644
+ "temperature": 0.4,
645
+ "summarization": {
646
+ "enabled": false
647
+ }
648
+ }
649
+ ]
650
+ }
651
+ ```
652
+
653
+ ### Best Practices
654
+
655
+ 1. **Default Settings**: The default threshold (5000 characters) and max length (2500 characters) work well for most debates. Start with these and adjust if needed.
656
+
657
+ 2. **Threshold Selection**: Set threshold based on your models' context windows and typical debate verbosity. Consider that proposals, critiques, and refinements add up quickly.
658
+
659
+ 3. **Max Length**: Set maxLength to approximately 40-50% of threshold for effective compression while preserving key information.
660
+
661
+ 4. **Per-Agent Tuning**: Different roles may need different thresholds:
662
+ - Architect agents often produce longer, more detailed proposals → higher threshold
663
+ - Security agents may be more concise → lower threshold acceptable
664
+
665
+ 5. **Monitor Verbose Output**: Use `--verbose` to see when summarization triggers and verify summaries are appropriately sized.
666
+
667
+ 6. **Custom Prompts**: For specialized use cases, provide custom summary prompts that emphasize domain-specific information to preserve.
668
+
669
+ 7. **Balance**: Summarization reduces context size but may lose detail. If debates are producing poor results after summarization, increase the threshold or disable it for critical agents.
670
+
671
+ ## Environment Variables
672
+
673
+ ### `OPENAI_API_KEY`
674
+ - **Type**: String
675
+ - **Required**: Yes (when using OpenAI provider)
676
+ - **Description**: Your OpenAI API key for authenticating with the OpenAI API.
677
+ - **How to Set**:
678
+ - Windows PowerShell: `$Env:OPENAI_API_KEY = "sk-..."`
679
+ - macOS/Linux bash/zsh: `export OPENAI_API_KEY="sk-..."`
680
+ - **Security**: Never commit this value to version control. Use environment variables or secret management systems.
681
+
682
+ ### `OPENROUTER_API_KEY`
683
+ - **Type**: String
684
+ - **Required**: Yes (when using OpenRouter provider)
685
+ - **Description**: Your OpenRouter API key for authenticating with the OpenRouter API.
686
+ - **How to Set**:
687
+ - Windows PowerShell: `$Env:OPENROUTER_API_KEY = "sk-or-..."`
688
+ - macOS/Linux bash/zsh: `export OPENROUTER_API_KEY="sk-or-..."`
689
+ - **Security**: Never commit this value to version control. Use environment variables or secret management systems.
690
+
691
+ ## Command Line Options
692
+
693
+ The CLI accepts the following options that can override configuration file settings:
694
+ - ### `--clarify`
695
+ - **Type**: Boolean flag
696
+ - **Description**: Forces a one-time pre-debate clarifications phase regardless of configuration.
697
+ - **Precedence**: Takes precedence over `debate.interactiveClarifications` in the configuration file.
698
+
699
+
700
+ ### `debate <problem>`
701
+ - **Description**: Main command to run a debate
702
+ - **Arguments**:
703
+ - `<problem>` (required): The problem statement to debate. Must be a non-empty string.
704
+
705
+ ### `-a, --agents <roles>`
706
+ - **Type**: String (comma-separated list)
707
+ - **Accepted Values**: Comma-separated list of role names: `architect`, `security`, `performance`, `testing`, `kiss`, `generalist`
708
+ - **Description**: Filter which agents participate in the debate. Only agents with matching roles will be included.
709
+ - **Default**: All enabled agents from configuration
710
+ - **Example**: `--agents architect,performance`
711
+ - **Behavior**: If no agents match the filter, the system falls back to default agents (architect, performance, and kiss).
712
+ - **Important**: This option **filters** agents from the configuration file; it does not replace or override agent configurations. The configuration file defines the agent pool (including models, temperatures, custom prompts), while this option selects which configured agents participate in the debate. For example, if your config defines a security agent with a custom prompt and `gpt-4` model, using `--agents security` will use that configured security agent, not a default one.
713
+
714
+ ### `-r, --rounds <number>`
715
+ - **Type**: Integer
716
+ - **Accepted Values**: Integers >= 1
717
+ - **Description**: Override the number of debate rounds.
718
+ - **Default**: Value from configuration file, or 3 if not specified
719
+ - **Example**: `--rounds 5`
720
+ - **Behavior**: Must be at least 1. Invalid values result in an error with exit code 2.
721
+
722
+ ### `-c, --config <path>`
723
+ - **Type**: String (file path)
724
+ - **Accepted Values**: Path to a valid JSON configuration file
725
+ - **Description**: Path to the configuration file to load.
726
+ - **Default**: `./debate-config.json`
727
+ - **Example**: `--config ./custom-config.json`
728
+ - **Behavior**: If the file does not exist, the system uses built-in defaults and prints a warning to stderr.
729
+
730
+ ### `-o, --output <path>`
731
+ - **Type**: String (file path)
732
+ - **Accepted Values**: Any valid file path
733
+ - **Description**: Output file for debate results.
734
+ - **Behavior**:
735
+ - If path ends with `.json`: Full debate state (JSON) is written
736
+ - Otherwise: Only the final solution text is written
737
+ - If omitted: Solution is written to stdout
738
+ - **Example**: `--output result.json` or `--output solution.txt`
739
+
740
+ ### `-v, --verbose`
741
+ - **Type**: Boolean flag
742
+ - **Description**: Enable verbose output showing round-by-round details, agent information, and metadata.
743
+ - **Default**: `false`
744
+ - **Example**: `--verbose`
745
+ - **Behavior**:
746
+ - When enabled and no output file is specified, detailed round information is written to stdout after the solution.
747
+ - Additionally, for each agent (and the judge), a one-line note shows which system prompt was used: either "built-in default" or the resolved absolute file path.
748
+
749
+ ## Built-In Defaults
750
+
751
+ If the configuration file is missing or incomplete, the system uses these built-in defaults:
752
+
753
+ ### Default Agents
754
+ ```json
755
+ [
756
+ {
757
+ "id": "agent-architect",
758
+ "name": "System Architect",
759
+ "role": "architect",
760
+ "model": "gpt-4",
761
+ "provider": "openai",
762
+ "temperature": 0.5,
763
+ "enabled": true
764
+ },
765
+ {
766
+ "id": "agent-performance",
767
+ "name": "Performance Engineer",
768
+ "role": "performance",
769
+ "model": "gpt-4",
770
+ "provider": "openai",
771
+ "temperature": 0.5,
772
+ "enabled": true
773
+ },
774
+ {
775
+ "id": "agent-kiss",
776
+ "name": "Simplicity Advocate",
777
+ "role": "kiss",
778
+ "model": "gpt-4",
779
+ "provider": "openai",
780
+ "temperature": 0.5,
781
+ "enabled": true
782
+ }
783
+ ]
784
+ ```
785
+
786
+ ### Default Judge
787
+ ```json
788
+ {
789
+ "id": "judge-main",
790
+ "name": "Technical Judge",
791
+ "role": "generalist",
792
+ "model": "gpt-4",
793
+ "provider": "openai",
794
+ "temperature": 0.3
795
+ }
796
+ ```
797
+
798
+ ### Default Debate Settings
799
+ ```json
800
+ {
801
+ "rounds": 3,
802
+ "terminationCondition": { "type": "fixed" },
803
+ "synthesisMethod": "judge",
804
+ "includeFullHistory": true,
805
+ "timeoutPerRound": 300000
806
+ }
807
+ ```
808
+
809
+ ## Configuration Loading Behavior
810
+
811
+ 1. **No Config File**: If the configuration file does not exist, all built-in defaults are used, and a warning is printed to stderr.
812
+
813
+ 2. **Missing Agents**: If the configuration file exists but has no agents or an empty agents array, all built-in defaults are used, and a warning is printed.
814
+
815
+ 3. **Missing Judge**: If the judge field is absent, the default judge is used, and a warning is printed.
816
+
817
+ 4. **Missing Debate Settings**: If the debate field is absent, default debate settings are used (no warning).
818
+
819
+ 5. **Invalid Agent Roles**: If an agent has an unrecognized role, it defaults to architect behavior, and a warning is printed.
820
+
821
+ 6. **Disabled Agents**: Agents with `enabled: false` are excluded from debate execution.
822
+
823
+ 7. **Agent Filtering**: The `--agents` CLI option filters enabled agents by role. If no agents match, defaults are used, and a warning is printed. The filtering process is: (1) load all agents from config, (2) filter out disabled agents, (3) apply role filter from `--agents` if provided, (4) fall back to defaults if result is empty.
824
+
825
+ 8. **System Prompt Path Resolution**: If `systemPromptPath` is provided for an agent or judge, the CLI resolves it relative to the configuration file directory and attempts to read the full file as UTF-8. Missing/unreadable/empty files result in a warning to stderr and fallback to a built-in prompt.
826
+
827
+ ## Exit Codes
828
+
829
+ The CLI uses specific exit codes to indicate different error conditions:
830
+
831
+ | Code | Meaning | Description |
832
+ |------|---------|-------------|
833
+ | 0 | Success | Debate completed successfully |
834
+ | 1 | General Error | Unexpected error during execution |
835
+ | 2 | Invalid Arguments | Invalid CLI arguments (e.g., missing problem, rounds < 1) |
836
+ | 3 | Provider Error | Reserved for future LLM provider errors |
837
+ | 4 | Configuration Error | Configuration issue (e.g., missing OPENAI_API_KEY) |
838
+
839
+ ## Validation Rules
840
+
841
+ ### Agent Configuration Validation
842
+ - `id` must be non-empty and unique across all agents and judge
843
+ - `name` must be non-empty
844
+ - `role` must be one of the accepted role values
845
+ - `provider` must be `"openai"`
846
+ - `temperature` must be between 0.0 and 1.0 (inclusive)
847
+ - `model` must be a valid OpenAI model identifier
848
+
849
+ ### Debate Configuration Validation
850
+ - `rounds` must be >= 1 (validated at runtime)
851
+ - `terminationCondition.type` must be `"fixed"` (other types not yet implemented)
852
+ - `synthesisMethod` must be `"judge"` (other methods not yet implemented)
853
+ - `timeoutPerRound` must be a positive integer
854
+
855
+ ### CLI Validation
856
+ - Problem statement must be non-empty
857
+ - `OPENAI_API_KEY` environment variable must be set
858
+ - Rounds (if specified) must be >= 1
859
+ - Configuration file (if specified) must be valid JSON
860
+
861
+ ## Tips and Best Practices
862
+
863
+ 1. **Start with Defaults**: Use the built-in defaults initially, then customize as needed.
864
+
865
+ 2. **Temperature Settings**:
866
+ - Use lower temperatures (0.2-0.3) for judges to ensure consistent synthesis
867
+ - Use moderate temperatures (0.4-0.6) for agents to balance creativity and focus
868
+
869
+ 3. **Agent Selection**: For complex problems, include multiple perspectives (architect, performance, security).
870
+
871
+ 4. **Rounds**: Start with 3 rounds for most problems. Increase for complex issues requiring deeper exploration.
872
+
873
+ 5. **Verbose Mode**: Use `--verbose` during development to understand agent behavior and debug issues.
874
+
875
+ 6. **Output Files**: Save debates as JSON for later analysis and reproducibility.
876
+
877
+ 7. **Environment Variables**: Use a `.env` file (with appropriate tooling) to manage API keys securely.
878
+
879
+ 8. **Disabled Agents**: Use `enabled: false` to keep agent configurations without removing them entirely.
880
+
881
+ 9. **Configuration vs CLI Filtering**: Use the configuration file to define your agent pool with all settings (models, temperatures, custom prompts), and use `--agents` for quick runtime selection. For example, configure all three agent types in your file, then use `--agents architect,security` for security-focused debates and `--agents architect,performance` for optimization debates—all while preserving each agent's custom configuration.
882
+