dialectic 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/.cursor/commands/setup-test.mdc +175 -0
  2. package/.cursor/rules/basic-code-cleanup.mdc +1110 -0
  3. package/.cursor/rules/riper5.mdc +96 -0
  4. package/.env.example +6 -0
  5. package/AGENTS.md +1052 -0
  6. package/LICENSE +21 -0
  7. package/README.md +93 -0
  8. package/WARP.md +113 -0
  9. package/dialectic-1.0.0.tgz +0 -0
  10. package/dialectic.js +10 -0
  11. package/docs/commands.md +375 -0
  12. package/docs/configuration.md +882 -0
  13. package/docs/context_summarization.md +1023 -0
  14. package/docs/debate_flow.md +1127 -0
  15. package/docs/eval_flow.md +795 -0
  16. package/docs/evaluator.md +141 -0
  17. package/examples/debate-config-openrouter.json +48 -0
  18. package/examples/debate_config1.json +48 -0
  19. package/examples/eval/eval1/eval_config1.json +13 -0
  20. package/examples/eval/eval1/result1.json +62 -0
  21. package/examples/eval/eval1/result2.json +97 -0
  22. package/examples/eval_summary_format.md +11 -0
  23. package/examples/example3/debate-config.json +64 -0
  24. package/examples/example3/eval_config2.json +25 -0
  25. package/examples/example3/problem.md +17 -0
  26. package/examples/example3/rounds_test/eval_run.sh +16 -0
  27. package/examples/example3/rounds_test/run_test.sh +16 -0
  28. package/examples/kata1/architect-only-solution_2-rounds.json +121 -0
  29. package/examples/kata1/architect-perf-solution_2-rounds.json +234 -0
  30. package/examples/kata1/debate-config-kata1.json +54 -0
  31. package/examples/kata1/eval_architect-only_2-rounds.json +97 -0
  32. package/examples/kata1/eval_architect-perf_2-rounds.json +97 -0
  33. package/examples/kata1/kata1-report.md +12224 -0
  34. package/examples/kata1/kata1-report_temps-01_01_01_07.md +2451 -0
  35. package/examples/kata1/kata1.md +5 -0
  36. package/examples/kata1/meta.txt +1 -0
  37. package/examples/kata2/debate-config.json +54 -0
  38. package/examples/kata2/eval_config1.json +21 -0
  39. package/examples/kata2/eval_config2.json +25 -0
  40. package/examples/kata2/kata2.md +5 -0
  41. package/examples/kata2/only_architect/debate-config.json +45 -0
  42. package/examples/kata2/only_architect/eval_run.sh +11 -0
  43. package/examples/kata2/only_architect/run_test.sh +5 -0
  44. package/examples/kata2/rounds_test/eval_run.sh +11 -0
  45. package/examples/kata2/rounds_test/run_test.sh +5 -0
  46. package/examples/kata2/summary_length_test/eval_run.sh +11 -0
  47. package/examples/kata2/summary_length_test/eval_run_w_clarify.sh +7 -0
  48. package/examples/kata2/summary_length_test/run_test.sh +5 -0
  49. package/examples/task-queue/debate-config.json +76 -0
  50. package/examples/task-queue/debate_report.md +566 -0
  51. package/examples/task-queue/task-queue-system.md +25 -0
  52. package/jest.config.ts +13 -0
  53. package/multi_agent_debate_spec.md +2980 -0
  54. package/package.json +38 -0
  55. package/sanity-check-problem.txt +9 -0
  56. package/src/agents/prompts/architect-prompts.ts +203 -0
  57. package/src/agents/prompts/generalist-prompts.ts +157 -0
  58. package/src/agents/prompts/index.ts +41 -0
  59. package/src/agents/prompts/judge-prompts.ts +19 -0
  60. package/src/agents/prompts/kiss-prompts.ts +230 -0
  61. package/src/agents/prompts/performance-prompts.ts +142 -0
  62. package/src/agents/prompts/prompt-types.ts +68 -0
  63. package/src/agents/prompts/security-prompts.ts +149 -0
  64. package/src/agents/prompts/shared.ts +144 -0
  65. package/src/agents/prompts/testing-prompts.ts +149 -0
  66. package/src/agents/role-based-agent.ts +386 -0
  67. package/src/cli/commands/debate.ts +761 -0
  68. package/src/cli/commands/eval.ts +475 -0
  69. package/src/cli/commands/report.ts +265 -0
  70. package/src/cli/index.ts +79 -0
  71. package/src/core/agent.ts +198 -0
  72. package/src/core/clarifications.ts +34 -0
  73. package/src/core/judge.ts +257 -0
  74. package/src/core/orchestrator.ts +432 -0
  75. package/src/core/state-manager.ts +322 -0
  76. package/src/eval/evaluator-agent.ts +130 -0
  77. package/src/eval/prompts/system.md +41 -0
  78. package/src/eval/prompts/user.md +64 -0
  79. package/src/providers/llm-provider.ts +25 -0
  80. package/src/providers/openai-provider.ts +84 -0
  81. package/src/providers/openrouter-provider.ts +122 -0
  82. package/src/providers/provider-factory.ts +64 -0
  83. package/src/types/agent.types.ts +141 -0
  84. package/src/types/config.types.ts +47 -0
  85. package/src/types/debate.types.ts +237 -0
  86. package/src/types/eval.types.ts +85 -0
  87. package/src/utils/common.ts +104 -0
  88. package/src/utils/context-formatter.ts +102 -0
  89. package/src/utils/context-summarizer.ts +143 -0
  90. package/src/utils/env-loader.ts +46 -0
  91. package/src/utils/exit-codes.ts +5 -0
  92. package/src/utils/id.ts +11 -0
  93. package/src/utils/logger.ts +48 -0
  94. package/src/utils/paths.ts +10 -0
  95. package/src/utils/progress-ui.ts +313 -0
  96. package/src/utils/prompt-loader.ts +79 -0
  97. package/src/utils/report-generator.ts +301 -0
  98. package/tests/clarifications.spec.ts +128 -0
  99. package/tests/cli.debate.spec.ts +144 -0
  100. package/tests/config-loading.spec.ts +206 -0
  101. package/tests/context-summarizer.spec.ts +131 -0
  102. package/tests/debate-config-custom.json +38 -0
  103. package/tests/env-loader.spec.ts +149 -0
  104. package/tests/eval.command.spec.ts +1191 -0
  105. package/tests/logger.spec.ts +19 -0
  106. package/tests/openai-provider.spec.ts +26 -0
  107. package/tests/openrouter-provider.spec.ts +279 -0
  108. package/tests/orchestrator-summary.spec.ts +386 -0
  109. package/tests/orchestrator.spec.ts +207 -0
  110. package/tests/prompt-loader.spec.ts +52 -0
  111. package/tests/prompts/architect.md +16 -0
  112. package/tests/provider-factory.spec.ts +150 -0
  113. package/tests/report.command.spec.ts +546 -0
  114. package/tests/role-based-agent-summary.spec.ts +476 -0
  115. package/tests/security-agent.spec.ts +221 -0
  116. package/tests/shared-prompts.spec.ts +318 -0
  117. package/tests/state-manager.spec.ts +251 -0
  118. package/tests/summary-prompts.spec.ts +153 -0
  119. package/tsconfig.json +49 -0
package/AGENTS.md ADDED
@@ -0,0 +1,1052 @@
1
+ # AGENTS.md
2
+
3
+ ## Project Overview
4
+
5
+ Dialectic is a CLI tool that orchestrates multi-agent debates to solve software design problems. The system uses multiple AI agents with different perspectives (architecture, performance, security, testing, simplicity) to debate a problem through structured rounds of proposals, critiques, and refinements, culminating in a synthesized solution from a judge agent.
6
+
7
+ **Key Technologies:**
8
+ - **Language**: TypeScript (ES2022)
9
+ - **Runtime**: Node.js >= 18
10
+ - **Testing Framework**: Jest with ts-jest
11
+ - **Build Tool**: TypeScript Compiler (tsc)
12
+ - **LLM Providers**: OpenAI API and OpenRouter API
13
+ - **CLI Framework**: Commander.js
14
+
15
+ **Main Components:**
16
+ - **Core Orchestrator**: Manages debate rounds and phases (proposal, critique, refinement)
17
+ - **Agents**: Role-based agents (architect, performance, security, testing, kiss, generalist)
18
+ - **Judge Agent**: Synthesizes final solutions from debate history
19
+ - **State Manager**: Persists debate state to JSON files
20
+ - **LLM Providers**: Abstracted provider interface supporting OpenAI and OpenRouter
21
+ - **CLI**: Command-line interface for running debates and evaluations
22
+
23
+ **Key Features:**
24
+ - Multi-round debate orchestration with configurable rounds
25
+ - Role-based agent system with customizable prompts
26
+ - Context summarization to manage debate history length
27
+ - Interactive clarifications phase for problem refinement
28
+ - Debate state persistence and report generation
29
+ - Evaluator command for assessing debate outcomes
30
+
31
+ ## Command-Line Usage
32
+
33
+ Dialectic is invoked from the command line using the `dialectic` command. This section provides comprehensive examples for running debates and evaluations in a bash shell environment.
34
+
35
+ ### Basic Command Structure
36
+
37
+ **Debate Command:**
38
+ ```bash
39
+ dialectic debate [problem] [options]
40
+ ```
41
+
42
+ **Evaluator Command:**
43
+ ```bash
44
+ dialectic eval [options]
45
+ ```
46
+
47
+ ### Problem Input
48
+
49
+ You can provide the problem statement in two ways:
50
+
51
+ **1. Inline string:**
52
+ ```bash
53
+ dialectic debate "Design a rate limiting system"
54
+ dialectic debate "Build a secure authentication API with JWT tokens"
55
+ ```
56
+
57
+ **2. Problem description file:**
58
+ ```bash
59
+ dialectic debate --problemDescription problem.txt
60
+ dialectic debate --problemDescription ./problems/rate-limiting.md
61
+ dialectic debate --problemDescription ../design-problems/cache-system.md
62
+ ```
63
+
64
+ **Problem File Requirements:**
65
+ - **Encoding**: UTF-8
66
+ - **Format**: Any text format (plain text, markdown, etc.)
67
+ - **Content**: Must be non-empty (whitespace-only files are rejected)
68
+ - **Path**: Relative paths resolved from current working directory
69
+ - **Mutual exclusivity**: Cannot provide both inline problem string and `--problemDescription` file
70
+
71
+ ### Configuration File
72
+
73
+ **Default configuration:**
74
+ ```bash
75
+ dialectic debate "Design a caching system"
76
+ # Uses ./debate-config.json if it exists, otherwise uses built-in defaults
77
+ ```
78
+
79
+ **Custom configuration file:**
80
+ ```bash
81
+ dialectic debate "Design a caching system" --config ./configs/production.json
82
+ dialectic debate "Design a caching system" --config /path/to/custom-config.json
83
+ ```
84
+
85
+ **Configuration file location:**
86
+ - Default: `./debate-config.json` (relative to current working directory)
87
+ - Custom: Specify with `--config <path>`
88
+ - If file doesn't exist: System uses built-in defaults with a warning to stderr
89
+
90
+ ### Agent Selection
91
+
92
+ **Default agents (architect, performance, and kiss):**
93
+ ```bash
94
+ dialectic debate "Design a database system"
95
+ ```
96
+
97
+ **Select specific agent roles:**
98
+ ```bash
99
+ dialectic debate "Design a secure API" --agents architect,security
100
+ dialectic debate "Build a high-performance system" --agents architect,performance,security
101
+ dialectic debate "Design a testable system" --agents architect,testing
102
+ dialectic debate "Design a simple API" --agents architect,kiss
103
+ ```
104
+
105
+ **Available agent roles:**
106
+ - `architect` - System design and architecture perspective
107
+ - `performance` - Performance optimization and efficiency
108
+ - `security` - Security and threat modeling
109
+ - `testing` - Testing strategy and quality assurance
110
+ - `kiss` - Simplicity-focused perspective, challenges complexity
111
+ - `generalist` - General-purpose role (typically used for judge)
112
+
113
+ **Note:** The `--agents` option filters agents from your configuration file by role. If no agents match, the system falls back to default agents (architect, performance, and kiss).
114
+
115
+ ### Debate Rounds
116
+
117
+ **Default rounds (3):**
118
+ ```bash
119
+ dialectic debate "Design a messaging system"
120
+ ```
121
+
122
+ **Custom number of rounds:**
123
+ ```bash
124
+ dialectic debate "Design a messaging system" --rounds 1
125
+ dialectic debate "Design a messaging system" --rounds 5
126
+ dialectic debate "Design a messaging system" --rounds 10
127
+ ```
128
+
129
+ **Constraints:**
130
+ - Minimum: 1 round
131
+ - Default: 3 rounds
132
+ - Invalid values (e.g., 0, negative) result in exit code 2
133
+
134
+ ### Output Options
135
+
136
+ **Default output (stdout):**
137
+ ```bash
138
+ dialectic debate "Design a rate limiting system"
139
+ # Final solution text written to stdout
140
+ # Full debate state saved to ./debates/deb-YYYYMMDD-HHMMSS-RAND.json
141
+ ```
142
+
143
+ **Save solution text to file:**
144
+ ```bash
145
+ dialectic debate "Design a rate limiting system" --output solution.txt
146
+ dialectic debate "Design a rate limiting system" --output ./results/solution.txt
147
+ ```
148
+
149
+ **Save full debate state (JSON):**
150
+ ```bash
151
+ dialectic debate "Design a rate limiting system" --output debate-result.json
152
+ dialectic debate "Design a rate limiting system" --output ./results/debate-result.json
153
+ ```
154
+
155
+ **Output behavior:**
156
+ - If path ends with `.json`: Full debate state (JSON) written to file
157
+ - Otherwise: Only final solution text written to file
158
+ - If omitted: Solution written to stdout, state saved to `./debates/` directory
159
+
160
+ **Redirecting output:**
161
+ ```bash
162
+ # Save solution to file
163
+ dialectic debate "Design a system" --output solution.txt
164
+
165
+ # Pipe solution to another command
166
+ dialectic debate "Design a system" | grep "recommendation"
167
+
168
+ # Suppress solution output (save to file instead)
169
+ dialectic debate "Design a system" --output solution.txt > /dev/null
170
+ ```
171
+
172
+ ### Verbose Mode
173
+
174
+ **Enable detailed logging:**
175
+ ```bash
176
+ dialectic debate "Design a system" --verbose
177
+ ```
178
+
179
+ **Verbose output includes:**
180
+ - Round-by-round breakdown
181
+ - Individual contributions with metadata (tokens, latency)
182
+ - Total statistics (rounds, duration, token counts)
183
+ - System prompt sources (built-in vs file path)
184
+ - Written to stderr (doesn't interfere with stdout piping)
185
+
186
+ **Example with verbose:**
187
+ ```bash
188
+ dialectic debate "Design a system" --verbose --output solution.txt
189
+ # Solution goes to solution.txt
190
+ # Verbose diagnostics go to stderr
191
+ ```
192
+
193
+ ### Markdown Report Generation
194
+
195
+ **Generate debate report:**
196
+ ```bash
197
+ dialectic debate "Design a system" --report debate-report.md
198
+ dialectic debate "Design a system" --report ./reports/debate-report
199
+ ```
200
+
201
+ **Report features:**
202
+ - Extension auto-appended if missing (`.md` added automatically)
203
+ - Parent directories created automatically
204
+ - Non-fatal on failure (debate succeeds even if report generation fails)
205
+ - Includes full debate transcript, metadata, clarifications, and synthesis
206
+
207
+ **Report with verbose metadata:**
208
+ ```bash
209
+ dialectic debate "Design a system" --verbose --report ./reports/debate-report.md
210
+ # Report includes latency and token counts in contribution titles
211
+ ```
212
+
213
+ **Report contents:**
214
+ - Problem Description
215
+ - Agents table and Judge table
216
+ - Clarifications (if `--clarify` was used)
217
+ - Rounds with proposals, critiques, and refinements
218
+ - Final Synthesis
219
+
220
+ ### Interactive Clarifications
221
+
222
+ **Enable clarifications phase:**
223
+ ```bash
224
+ dialectic debate "Design a distributed cache system" --clarify
225
+ ```
226
+
227
+ **Clarifications workflow:**
228
+ 1. Each agent generates up to 5 clarifying questions (configurable)
229
+ 2. CLI presents questions grouped by agent in interactive session
230
+ 3. Answer each question or press Enter to skip (recorded as "NA")
231
+ 4. Questions and answers included in debate context and final report
232
+
233
+ **Example interaction:**
234
+ ```bash
235
+ dialectic debate "Design a distributed cache system" --clarify
236
+ # [Architect] Q1: What are the expected read/write ratios?
237
+ # > 80% reads, 20% writes
238
+ # [Performance] Q2: What's the target latency requirement?
239
+ # > < 10ms for 95th percentile
240
+ # [Security] Q3: What data sensitivity level?
241
+ # > (press Enter to skip)
242
+ # Q3: NA
243
+ ```
244
+
245
+ **Clarifications with other options:**
246
+ ```bash
247
+ dialectic debate --problemDescription problem.md --clarify --agents architect,security
248
+ dialectic debate "Design a system" --clarify --rounds 5 --verbose
249
+ ```
250
+
251
+ ### Environment File
252
+
253
+ **Default environment file (`.env`):**
254
+ ```bash
255
+ dialectic debate "Design a system"
256
+ # Automatically loads .env from current directory if it exists
257
+ ```
258
+
259
+ **Custom environment file:**
260
+ ```bash
261
+ dialectic debate "Design a system" --env-file ./config/.env.production
262
+ dialectic debate "Design a system" --env-file /path/to/.env
263
+ ```
264
+
265
+ **Environment variables required:**
266
+ - `OPENAI_API_KEY` - Required for OpenAI provider
267
+ - `OPENROUTER_API_KEY` - Required for OpenRouter provider
268
+
269
+ ### Complete Examples
270
+
271
+ **Simple debate:**
272
+ ```bash
273
+ dialectic debate "Design a rate limiting system"
274
+ ```
275
+
276
+ **Complex debate with all options:**
277
+ ```bash
278
+ dialectic debate \
279
+ --problemDescription ./problems/rate-limiting.md \
280
+ --config ./configs/production.json \
281
+ --agents architect,performance,security \
282
+ --rounds 5 \
283
+ --output ./results/rate-limiting-solution.json \
284
+ --report ./reports/rate-limiting-report.md \
285
+ --verbose \
286
+ --clarify \
287
+ --env-file .env.production
288
+ ```
289
+
290
+ **Quick security-focused debate:**
291
+ ```bash
292
+ dialectic debate "Design a secure authentication system" \
293
+ --agents architect,security \
294
+ --rounds 3 \
295
+ --output auth-solution.txt \
296
+ --verbose
297
+ ```
298
+
299
+ **Save debate state for later evaluation:**
300
+ ```bash
301
+ dialectic debate "Design a system" \
302
+ --output ./debates/my-debate.json \
303
+ --rounds 3
304
+ ```
305
+
306
+ ### Report Command
307
+
308
+ **Generate report from saved debate state:**
309
+ ```bash
310
+ dialectic report --debate ./debates/deb-20250101-010203-ABC.json
311
+ ```
312
+
313
+ **Generate report without config file (creates minimal configs from debate state):**
314
+ ```bash
315
+ dialectic report --debate ./debates/deb-20250101-010203-ABC.json
316
+ # Creates minimal agent/judge configs from debate state, no validation
317
+ ```
318
+
319
+ **Generate report with config file (matches agent/judge configs):**
320
+ ```bash
321
+ dialectic report --debate ./debates/deb-20250101-010203-ABC.json --config ./debate-config.json
322
+ ```
323
+
324
+ **Save report to file:**
325
+ ```bash
326
+ dialectic report --debate ./debates/debate.json --output ./reports/report.md
327
+ ```
328
+
329
+ **Report with verbose metadata:**
330
+ ```bash
331
+ dialectic report --debate ./debates/debate.json --verbose --output report.md
332
+ ```
333
+
334
+ **Report behavior:**
335
+ - If `--config` is provided: loads configuration file and matches agent/judge configs with agent IDs found in debate state
336
+ - If `--config` is not provided: creates minimal agent/judge configs from debate state (extracts agent IDs and roles from contributions), no validation of IDs
337
+ - Generates markdown report identical to `--report` option in debate command
338
+ - Writes to stdout by default, or to specified file if `--output` provided
339
+
340
+ **Report options:**
341
+ - `--debate <path>`: Path to debate JSON file (DebateState format) (required)
342
+ - `--config <path>`: Optional path to configuration file. If not provided, creates minimal configs from debate state.
343
+ - `-o, --output <path>`: Optional path to output markdown file (default: stdout)
344
+ - `-v, --verbose`: Optional verbose mode for report generation
345
+
346
+ ### Evaluator Command
347
+
348
+ **Basic evaluation:**
349
+ ```bash
350
+ dialectic eval --config ./eval-config.json --debate ./debates/deb-20250101-010203-ABC.json
351
+ ```
352
+
353
+ **Evaluator with JSON output:**
354
+ ```bash
355
+ dialectic eval \
356
+ --config ./eval-config.json \
357
+ --debate ./debates/deb-20250101-010203-ABC.json \
358
+ --output ./results/evaluation.json
359
+ ```
360
+
361
+ **Evaluator with verbose logs:**
362
+ ```bash
363
+ dialectic eval \
364
+ --config ./eval-config.json \
365
+ --debate ./debates/deb-20250101-010203-ABC.json \
366
+ --verbose \
367
+ --env-file .env
368
+ ```
369
+
370
+ **Evaluator options:**
371
+ - `-c, --config <path>`: Evaluator configuration JSON (required)
372
+ - `-d, --debate <path>`: Debate state JSON to evaluate (required)
373
+ - `--env-file <path>`: Optional .env file path
374
+ - `-v, --verbose`: Verbose diagnostic logs to stderr
375
+ - `-o, --output <path>`: Output destination
376
+ - If ends with `.json`: writes aggregated JSON output
377
+ - Otherwise: writes Markdown table (or stdout by default)
378
+
379
+ ### Exit Codes
380
+
381
+ | Code | Description |
382
+ |------|-------------|
383
+ | `0` | Success |
384
+ | `1` | General error |
385
+ | `2` | Invalid CLI arguments (e.g., missing problem, invalid rounds) |
386
+ | `3` | Provider error (reserved for future use) |
387
+ | `4` | Configuration error (e.g., missing `OPENAI_API_KEY`) |
388
+
389
+ **Checking exit codes:**
390
+ ```bash
391
+ dialectic debate "Design a system" && echo "Success!"
392
+ dialectic debate "Design a system" || echo "Failed with code: $?"
393
+ ```
394
+
395
+ ### Command-Line Option Summary
396
+
397
+ **Debate Command Options:**
398
+ - `[problem]` - Problem statement as inline string (mutually exclusive with `--problemDescription`)
399
+ - `--problemDescription <path>` - Path to problem description file
400
+ - `--agents <list>` - Comma-separated agent roles (default: `architect,performance,kiss`)
401
+ - `--rounds <n>` - Number of debate rounds (default: `3`, minimum: `1`)
402
+ - `--config <path>` - Path to configuration file (default: `./debate-config.json`)
403
+ - `--env-file <path>` - Path to environment file (default: `.env`)
404
+ - `--output <path>` - Output file path (JSON or text based on extension)
405
+ - `--verbose` - Enable detailed logging to stderr
406
+ - `--report <path>` - Generate Markdown report (extension auto-appended)
407
+ - `--clarify` - Enable interactive clarifications phase
408
+
409
+ **Evaluator Command Options:**
410
+ - `-c, --config <path>` - Evaluator configuration JSON (required)
411
+ - `-d, --debate <path>` - Debate state JSON to evaluate (required)
412
+ - `--env-file <path>` - Optional .env file path
413
+ - `-v, --verbose` - Verbose diagnostic logs to stderr
414
+ - `-o, --output <path>` - Output destination (JSON or Markdown based on extension)
415
+
416
+ **Report Command Options:**
417
+ - `--debate <path>` - Path to debate JSON file (DebateState format) (required)
418
+ - `--config <path>` - Optional path to configuration file. If not provided, creates minimal configs from debate state.
419
+ - `-o, --output <path>` - Optional path to output markdown file (default: stdout)
420
+ - `-v, --verbose` - Optional verbose mode for report generation
421
+
422
+ ## Build and Test Commands
423
+
424
+ ### Setup Commands
425
+
426
+ **Install dependencies:**
427
+ ```bash
428
+ npm install
429
+ ```
430
+
431
+ **Build the project:**
432
+ ```bash
433
+ npm run build
434
+ ```
435
+
436
+ This compiles TypeScript source files from `src/` to `dist/` with source maps and type declarations.
437
+
438
+ **Development mode:**
439
+ ```bash
440
+ npm run dev
441
+ ```
442
+
443
+ Runs the CLI using `ts-node` for development without building.
444
+
445
+ ### Test Commands
446
+
447
+ **Run all tests:**
448
+ ```bash
449
+ npm test
450
+ ```
451
+
452
+ Runs all test files in the `tests/` directory using Jest.
453
+
454
+ **Run tests in watch mode:**
455
+ ```bash
456
+ npm run test:watch
457
+ ```
458
+
459
+ Runs tests in watch mode, re-running tests when files change.
460
+
461
+ **Run tests with coverage measurement:**
462
+ ```bash
463
+ npm run test:coverage
464
+ ```
465
+
466
+ Generates coverage reports in multiple formats:
467
+ - **HTML Report**: `coverage/lcov-report/index.html` (view in browser)
468
+ - **LCOV**: `coverage/lcov.info` (for CI/CD integration)
469
+ - **JSON**: `coverage/coverage-final.json` (for programmatic access)
470
+
471
+ Coverage reports include:
472
+ - Line coverage percentage
473
+ - Branch coverage percentage
474
+ - Function coverage percentage
475
+ - Statement coverage percentage
476
+ - Uncovered lines and branches
477
+
478
+ **View Coverage Reports:**
479
+ - Open `coverage/lcov-report/index.html` in a web browser for interactive HTML report
480
+ - Use `coverage/lcov.info` for CI/CD integration with tools like Codecov or Coveralls
481
+
482
+ ## Code Style Guidelines
483
+
484
+ This project uses TypeScript with strict type checking enabled. Please adhere to the following guidelines:
485
+
486
+ ### TypeScript Configuration
487
+
488
+ The project uses strict TypeScript settings defined in `tsconfig.json`:
489
+
490
+ - **Strict Mode**: All strict type checking options are enabled
491
+ - **Target**: ES2022
492
+ - **Module**: CommonJS
493
+ - **File Layout**: Source files in `src/`, compiled output in `dist/`
494
+
495
+ ### Code Style Rules
496
+
497
+ **Type Safety:**
498
+ - Use explicit types for function parameters and return values
499
+ - Avoid `any` type; use `unknown` when necessary, then narrow with type guards
500
+ - Enable `noImplicitAny`, `strictNullChecks`, and `noUncheckedIndexedAccess`
501
+ - Use `exactOptionalPropertyTypes` for precise optional property handling
502
+
503
+ **Naming Conventions:**
504
+ - Use camelCase for variables, functions, and methods
505
+ - Use PascalCase for classes, interfaces, and types
506
+ - Use UPPER_SNAKE_CASE for constants
507
+ - Use descriptive names that indicate purpose and scope
508
+
509
+ **File Organization:**
510
+ - One main class/interface per file
511
+ - Group related functionality in the same directory
512
+ - Use barrel exports (`index.ts`) for public APIs
513
+
514
+ **Imports:**
515
+ - Use ES6 import/export syntax
516
+ - Group imports: external packages, then internal modules
517
+ - Use absolute imports from `src/` when appropriate
518
+
519
+ **Error Handling:**
520
+ - Use custom error classes with exit codes (see `src/utils/exit-codes.ts`)
521
+ - Include error codes in error objects: `err.code = EXIT_CONFIG_ERROR`
522
+ - Provide clear, actionable error messages
523
+
524
+ **Async/Await:**
525
+ - Prefer `async/await` over Promises with `.then()`
526
+ - Handle errors with try/catch blocks
527
+ - Use `Promise.all()` for parallel operations when appropriate
528
+
529
+ **Comments:**
530
+ - Use JSDoc comments for public functions and classes
531
+ - Include parameter descriptions and return types in JSDoc
532
+ - Document complex logic and non-obvious behavior
533
+ - Remove commented-out code before committing
534
+
535
+ ### Example Code Style
536
+
537
+ ```typescript
538
+ /**
539
+ * Creates a debate orchestrator instance.
540
+ *
541
+ * @param agents - Array of agent instances to participate
542
+ * @param judge - Judge agent for synthesis
543
+ * @param config - Debate configuration
544
+ * @returns Orchestrator instance
545
+ * @throws {Error} If agents array is empty
546
+ */
547
+ export function createOrchestrator(
548
+ agents: Agent[],
549
+ judge: JudgeAgent,
550
+ config: DebateConfig
551
+ ): DebateOrchestrator {
552
+ if (agents.length === 0) {
553
+ const err: any = new Error('At least one agent is required');
554
+ err.code = EXIT_INVALID_ARGS;
555
+ throw err;
556
+ }
557
+
558
+ return new DebateOrchestrator(agents, judge, config);
559
+ }
560
+ ```
561
+
562
+ ### Linting and Formatting
563
+
564
+ While the project doesn't currently include ESLint or Prettier configuration files, maintain consistency with:
565
+ - 2-space indentation (as seen in `tsconfig.json`)
566
+ - Semicolons at end of statements
567
+ - Trailing commas in multi-line objects and arrays
568
+ - Single quotes for strings (preferred, but double quotes are acceptable)
569
+
570
+ ## Code Cleanup Guidelines
571
+
572
+ This section provides guidelines for maintaining clean, maintainable code. Follow these principles to improve code quality and reduce technical debt.
573
+
574
+ ### 1. Template Method Pattern
575
+
576
+ **When to use:** When multiple classes have similar methods with duplicate logic, but different values (like prompts).
577
+
578
+ **Principle:** Extract common logic into a template method in the base class. Subclasses provide only unique values and delegate execution.
579
+
580
+ **Example:**
581
+
582
+ ```typescript
583
+ // ❌ BAD: Duplicate logic in each agent
584
+ class ArchitectAgent {
585
+ async propose(problem: string): Promise<Proposal> {
586
+ const system = this.config.systemPrompt || ARCHITECT_SYSTEM_PROMPT;
587
+ const user = `Problem: ${problem}\n\nProvide architectural solution...`;
588
+ const { text, usage, latencyMs } = await this.callLLM(system, user);
589
+ const metadata: ContributionMetadata = { latencyMs, model: this.config.model };
590
+ if (usage?.totalTokens != null) metadata.tokensUsed = usage.totalTokens;
591
+ return { content: text, metadata };
592
+ }
593
+ }
594
+
595
+ // ✅ GOOD: Template method in base class
596
+ // Base Agent class
597
+ protected async proposeImpl(
598
+ _context: DebateContext,
599
+ systemPrompt: string,
600
+ userPrompt: string
601
+ ): Promise<Proposal> {
602
+ const { text, usage, latencyMs } = await this.callLLM(systemPrompt, userPrompt);
603
+ const metadata: ContributionMetadata = { latencyMs, model: this.config.model };
604
+ if (usage?.totalTokens != null) metadata.tokensUsed = usage.totalTokens;
605
+ return { content: text, metadata };
606
+ }
607
+
608
+ // Subclass only provides prompts
609
+ class ArchitectAgent {
610
+ async propose(problem: string, context: DebateContext): Promise<Proposal> {
611
+ const system = this.config.systemPrompt || ARCHITECT_SYSTEM_PROMPT;
612
+ const user = `Problem: ${problem}\n\nProvide architectural solution...`;
613
+ return this.proposeImpl(context, system, user);
614
+ }
615
+ }
616
+ ```
617
+
618
+ ### 2. Remove Magic Numbers and Hardcoded Strings
619
+
620
+ **Principle:** Replace literal values with named constants that explain their purpose.
621
+
622
+ **Example:**
623
+
624
+ ```typescript
625
+ // ❌ BAD: Magic numbers
626
+ const debateConfig = {
627
+ rounds: options.rounds || config.rounds || 3, // What is 3?
628
+ timeout: 300000 // What is 300000?
629
+ };
630
+
631
+ // ✅ GOOD: Named constants
632
+ const DEFAULT_ROUNDS = 3;
633
+ const DEFAULT_TIMEOUT_MS = 300000;
634
+
635
+ const debateConfig = {
636
+ rounds: options.rounds || config.rounds || DEFAULT_ROUNDS,
637
+ timeout: DEFAULT_TIMEOUT_MS
638
+ };
639
+ ```
640
+
641
+ **Where to define constants:**
642
+ - **File-level constants**: Use when only one file needs them
643
+ - **Exported constants**: Use when multiple files need them (in `types/` or `utils/` files)
644
+
645
+ ### 3. Proper Documentation Standards
646
+
647
+ **JSDoc for public APIs:**
648
+ - Document all public functions, classes, and complex methods
649
+ - Include parameter descriptions (`@param`) and return types (`@returns`)
650
+ - Document errors thrown (`@throws`)
651
+ - Use `@final` tag for template methods that shouldn't be overridden
652
+
653
+ **Example:**
654
+
655
+ ```typescript
656
+ /**
657
+ * Creates a DebateConfig from the system configuration and command-line options.
658
+ * Validates that the number of rounds is at least 1.
659
+ *
660
+ * @param sysConfig - The system configuration.
661
+ * @param options - Command-line options containing optional rounds override.
662
+ * @returns The debate configuration.
663
+ * @throws {Error} If rounds is less than 1.
664
+ */
665
+ function debateConfigFromSysConfig(sysConfig: SystemConfig, options: any): DebateConfig {
666
+ // implementation
667
+ }
668
+ ```
669
+
670
+ **Inline comments:** Explain **why**, not **what**. Document non-obvious technical choices.
671
+
672
+ ### 4. Common Code Smells to Avoid
673
+
674
+ #### Unnecessary Exports
675
+
676
+ **Problem:** Exporting constants that are only accessed through methods.
677
+
678
+ ```typescript
679
+ // ❌ BAD: Exports internal constant
680
+ export const DEFAULT_PERFORMANCE_SYSTEM_PROMPT = `...`;
681
+
682
+ // ✅ GOOD: Keep constant private, expose through method
683
+ const DEFAULT_PERFORMANCE_SYSTEM_PROMPT = `...`;
684
+ export class PerformanceAgent {
685
+ static defaultSystemPrompt(): string {
686
+ return DEFAULT_PERFORMANCE_SYSTEM_PROMPT;
687
+ }
688
+ }
689
+ ```
690
+
691
+ #### Inline Type Definitions (DRY Violation)
692
+
693
+ **Problem:** Repeating the same inline type definition multiple times.
694
+
695
+ ```typescript
696
+ // ❌ BAD: Repeated inline type
697
+ function createAgent(promptSource?: { source: 'built-in' | 'file'; absPath?: string }) { }
698
+
699
+ // ✅ GOOD: Define once, reuse
700
+ export interface PromptSource {
701
+ source: 'built-in' | 'file';
702
+ absPath?: string;
703
+ }
704
+ function createAgent(promptSource?: PromptSource) { }
705
+ ```
706
+
707
+ #### Repeated Code Patterns
708
+
709
+ **Problem:** The same logic pattern repeated 3+ times.
710
+
711
+ **Solution:** Extract to a helper function that can be reused.
712
+
713
+ #### Improper stdout/stderr Usage
714
+
715
+ **Principle:** stdout = data results, stderr = diagnostics/errors.
716
+
717
+ ```typescript
718
+ // ❌ BAD: Diagnostic output on stdout
719
+ process.stdout.write(result.solution.description);
720
+ process.stdout.write('Debug info...'); // Should be stderr
721
+
722
+ // ✅ GOOD: Proper separation
723
+ process.stdout.write(result.solution.description); // Main result
724
+ process.stderr.write('Debug info...'); // Diagnostics
725
+ ```
726
+
727
+ #### Redundant Function Calls
728
+
729
+ **Problem:** Calling the same function multiple times with the same result.
730
+
731
+ ```typescript
732
+ // ❌ BAD: Multiple calls
733
+ if (!fs.existsSync(finalPath)) {
734
+ return builtInDefaults(); // Call 1
735
+ }
736
+ if (!parsed.judge) {
737
+ parsed.judge = builtInDefaults().judge; // Call 2
738
+ }
739
+
740
+ // ✅ GOOD: Call once, reuse
741
+ const defaults = builtInDefaults(); // Call once
742
+ if (!fs.existsSync(finalPath)) {
743
+ return defaults;
744
+ }
745
+ if (!parsed.judge) {
746
+ parsed.judge = defaults.judge; // Reuse
747
+ }
748
+ ```
749
+
750
+ #### Complex Nested Logic
751
+
752
+ **Problem:** Deeply nested loops and conditionals that are hard to read.
753
+
754
+ **Solution:** Extract to focused helper functions with single responsibilities.
755
+
756
+ ### 5. Type Assertions Guidelines
757
+
758
+ **Use type assertions sparingly** and only when you have information TypeScript cannot infer.
759
+
760
+ **✅ Valid use cases:**
761
+ - Working with third-party libraries that return `any`
762
+ - Type narrowing after runtime validation
763
+ - Complex type transformations TypeScript struggles with
764
+
765
+ **❌ Avoid:**
766
+ - Hiding legitimate type errors
767
+ - Bypassing strict null checks
768
+ - Forcing incompatible types
769
+
770
+ **Before using `as Type`, consider:**
771
+ - Type guards: `if (typeof x === 'string')`
772
+ - Discriminated unions
773
+ - Conditional spreads: `...(value !== undefined && { value })`
774
+ - Proper typing: Fix the type definitions rather than casting
775
+
776
+ ### 6. Code Cleanup Checklist
777
+
778
+ When reviewing or refactoring code, check:
779
+
780
+ - [ ] **No duplicate logic** - Extract common patterns to base classes or utilities
781
+ - [ ] **No magic numbers** - All literal numbers replaced with named constants
782
+ - [ ] **No hardcoded strings** - Especially for types, roles, statuses - use constants
783
+ - [ ] **All public APIs documented** - JSDoc for functions, classes, complex methods
784
+ - [ ] **Non-obvious choices explained** - Inline comments for "why" not "what"
785
+ - [ ] **Constants properly scoped** - File-level for local, exported for shared
786
+ - [ ] **Template methods marked @final** - When they shouldn't be overridden
787
+ - [ ] **Function extraction** - Large functions broken into smaller, focused ones
788
+ - [ ] **Separation of concerns** - Each function has a single, clear purpose
789
+ - [ ] **Proper stdout/stderr usage** - Results to stdout, diagnostics to stderr
790
+ - [ ] **No redundant calls** - Functions called once, results reused
791
+ - [ ] **No unnecessary exports** - Internal constants accessed through methods
792
+ - [ ] **No inline type repetition** - Types defined once, reused everywhere
793
+
794
+ ### 7. Quick Reference
795
+
796
+ | Problem | Solution | Example |
797
+ |---------|----------|---------|
798
+ | Duplicate method logic | Template method pattern | `proposeImpl()` in base class |
799
+ | Magic number `3` | Named constant | `DEFAULT_ROUNDS = 3` |
800
+ | String `"architect"` | Constant object | `AGENT_ROLES.ARCHITECT` |
801
+ | Long complex function | Extract helper functions | `debateConfigFromSysConfig()` |
802
+ | Unclear technical choice | Inline comment | `// Use stderr.write for unbuffered output` |
803
+ | Public function | JSDoc with @param/@returns | See examples above |
804
+ | Repeated pattern 3+ times | Extract to helper function | `createAgentWithPromptResolution()` |
805
+
806
+ ### Key Principles
807
+
808
+ Good code is:
809
+ - **DRY**: Don't Repeat Yourself - extract common patterns
810
+ - **Type-safe**: Let the compiler help you - avoid unnecessary `as` casts
811
+ - **Clear**: Easy to read and understand - self-documenting with good names
812
+ - **Focused**: Each function does one thing well - single responsibility
813
+ - **Consistent**: Follows established patterns - use the same approach throughout
814
+
815
+ **Remember:** If you copy-paste code, you're doing it wrong. Extract it to a reusable function or class.
816
+
817
+ ## Testing Instructions
818
+
819
+ ### Unit Testing
820
+
821
+ **Test Framework:** Jest with ts-jest preset
822
+
823
+ **Test File Location:** Tests are located in the `tests/` directory
824
+
825
+ **Test File Naming:** Test files should be named with `.spec.ts` suffix (e.g., `orchestrator.spec.ts`)
826
+
827
+ **Writing Tests:**
828
+
829
+ 1. **Test Structure:**
830
+ ```typescript
831
+ import { Component } from '../src/component';
832
+
833
+ describe('Component', () => {
834
+ it('should do something specific', async () => {
835
+ // Arrange
836
+ const component = new Component();
837
+
838
+ // Act
839
+ const result = await component.method();
840
+
841
+ // Assert
842
+ expect(result).toBeDefined();
843
+ });
844
+ });
845
+ ```
846
+
847
+ 2. **Mocking:**
848
+ - Mock external dependencies (LLM providers, file system)
849
+ - Use Jest mocks for async operations
850
+ - Create mock factories for complex objects (see `tests/orchestrator.spec.ts`)
851
+
852
+ 3. **Test Coverage:**
853
+ - Write tests for all public functions and methods
854
+ - Test error cases and edge conditions
855
+ - Test async operations with proper await handling
856
+ - Test both success and failure paths
857
+
858
+ 4. **Best Practices:**
859
+ - Use descriptive test names that explain what is being tested
860
+ - One assertion per test when possible
861
+ - Clean up resources (mocks, temporary files) after tests
862
+ - Use `beforeEach` and `afterEach` for test setup/teardown
863
+
864
+ **Example Test:**
865
+ ```typescript
866
+ describe('DebateOrchestrator', () => {
867
+ it('runs the correct phases for rounds=3 and calls judge synthesis', async () => {
868
+ const agents = [createMockAgent('a1', 'architect')];
869
+ const sm = createMockStateManager();
870
+ const cfg: DebateConfig = {
871
+ rounds: 3,
872
+ terminationCondition: { type: 'fixed' },
873
+ synthesisMethod: 'judge',
874
+ includeFullHistory: true,
875
+ timeoutPerRound: 300000,
876
+ };
877
+
878
+ const orchestrator = new DebateOrchestrator(agents, mockJudge, sm, cfg);
879
+ const result = await orchestrator.runDebate('Design a caching system');
880
+
881
+ expect(result).toBeDefined();
882
+ expect(result.solution).toBeDefined();
883
+ });
884
+ });
885
+ ```
886
+
887
+ ### Coverage Measurement
888
+
889
+ **Running Coverage:**
890
+ ```bash
891
+ npm run test:coverage
892
+ ```
893
+
894
+ **Coverage Goals:**
895
+ - Aim for minimum 80% code coverage across all metrics
896
+ - Focus on critical paths: orchestrator, agents, state management
897
+ - Ensure all error handling paths are tested
898
+
899
+ **Viewing Coverage:**
900
+ 1. **HTML Report:** Open `coverage/lcov-report/index.html` in a browser
901
+ - Navigate by file to see line-by-line coverage
902
+ - Red lines indicate uncovered code
903
+ - Yellow lines indicate partially covered branches
904
+
905
+ 2. **Terminal Output:** Coverage summary is printed to console:
906
+ ```
907
+ File | % Stmts | % Branch | % Funcs | % Lines | Uncovered Line #s
908
+ ----------|---------|----------|---------|---------|-------------------
909
+ All files | 85.42 | 78.57 | 82.14 | 85.42 |
910
+ ```
911
+
912
+ 3. **LCOV Format:** Use `coverage/lcov.info` for CI/CD integration
913
+
914
+ **Interpreting Coverage:**
915
+ - **Statements**: Percentage of executable statements covered
916
+ - **Branches**: Percentage of conditional branches covered (if/else, ternary)
917
+ - **Functions**: Percentage of functions called
918
+ - **Lines**: Percentage of lines executed
919
+
920
+ **Improving Coverage:**
921
+ - Identify uncovered files in the HTML report
922
+ - Add tests for missing branches and error cases
923
+ - Test edge conditions and boundary values
924
+ - Ensure all public API methods have tests
925
+
926
+ ### Integration Testing
927
+
928
+ For integration tests that require actual API calls:
929
+ - Use test API keys (never commit real keys)
930
+ - Mock LLM providers in unit tests
931
+ - Use environment variables for test configuration
932
+ - Clean up test artifacts (debate files) after tests
933
+
934
+ ## Security Considerations
935
+
936
+ ### API Key Management
937
+
938
+ **Never commit API keys to version control:**
939
+ - API keys are stored in environment variables only
940
+ - `.env` file is gitignored (see `.gitignore`)
941
+ - Use `.env` files for local development, never commit them
942
+
943
+ **Environment Variables:**
944
+ - `OPENAI_API_KEY`: Required for OpenAI provider
945
+ - `OPENROUTER_API_KEY`: Required for OpenRouter provider
946
+
947
+ **Setting Environment Variables:**
948
+ - **Windows PowerShell:** `$Env:OPENAI_API_KEY = "sk-..."`
949
+ - **macOS/Linux:** `export OPENAI_API_KEY="sk-..."`
950
+ - **Using .env file:** Use `dotenv` package (already configured) to load from `.env`
951
+
952
+ **API Key Validation:**
953
+ - The system validates API keys are set before use
954
+ - Missing keys result in configuration errors (exit code 4)
955
+ - Keys are never logged or printed to console
956
+
957
+ ### Input Validation
958
+
959
+ **Problem Descriptions:**
960
+ - Validate problem descriptions are non-empty
961
+ - Sanitize file paths to prevent directory traversal
962
+ - Validate file encoding (UTF-8 only)
963
+
964
+ **Configuration Files:**
965
+ - Validate JSON structure before parsing
966
+ - Validate configuration schema (agent IDs, roles, temperatures)
967
+ - Reject invalid configuration values with clear error messages
968
+
969
+ **File Paths:**
970
+ - Resolve relative paths safely
971
+ - Validate file existence before reading
972
+ - Prevent directory traversal attacks
973
+ - Use absolute paths for sensitive operations
974
+
975
+ ### Data Security
976
+
977
+ **Debate State Files:**
978
+ - Debate state files may contain sensitive problem descriptions
979
+ - Store debate files in `debates/` directory (gitignored)
980
+ - Users should review debate files before sharing
981
+ - Consider encryption for sensitive debates in production
982
+
983
+ **Logging:**
984
+ - Never log API keys or sensitive credentials
985
+ - Avoid logging full problem descriptions in production
986
+ - Use structured logging with appropriate log levels
987
+
988
+ ### Dependency Security
989
+
990
+ **Regular Updates:**
991
+ - Keep dependencies updated to patch vulnerabilities
992
+ - Use `npm audit` to check for known vulnerabilities
993
+ - Review security advisories for dependencies
994
+
995
+ **Checking for Vulnerabilities:**
996
+ ```bash
997
+ npm audit
998
+ ```
999
+
1000
+ **Fixing Vulnerabilities:**
1001
+ ```bash
1002
+ npm audit fix
1003
+ ```
1004
+
1005
+ For vulnerabilities that require manual intervention, review the advisory and update dependencies accordingly.
1006
+
1007
+ ### Secure Coding Practices
1008
+
1009
+ **Type Safety:**
1010
+ - Use TypeScript's type system to prevent runtime errors
1011
+ - Validate external inputs (API responses, file contents)
1012
+ - Use type guards for runtime type checking
1013
+
1014
+ **Error Handling:**
1015
+ - Don't expose internal implementation details in error messages
1016
+ - Log errors with appropriate detail levels
1017
+ - Use exit codes for different error conditions
1018
+
1019
+ **File System:**
1020
+ - Validate file paths before operations
1021
+ - Use safe path resolution (avoid `../` traversal)
1022
+ - Handle file system errors gracefully
1023
+
1024
+ **Network Security:**
1025
+ - Use HTTPS for all API calls (enforced by OpenAI/OpenRouter SDKs)
1026
+ - Validate API responses before processing
1027
+ - Handle network errors and timeouts appropriately
1028
+
1029
+ ### Recommendations
1030
+
1031
+ 1. **Development:**
1032
+ - Use separate API keys for development and production
1033
+ - Rotate API keys regularly
1034
+ - Use environment variable management tools
1035
+
1036
+ 2. **CI/CD:**
1037
+ - Store API keys in secure secrets management (GitHub Secrets, etc.)
1038
+ - Never hardcode API keys in CI/CD scripts
1039
+ - Use test API keys for automated tests
1040
+
1041
+ 3. **Production:**
1042
+ - Use secret management services (AWS Secrets Manager, HashiCorp Vault)
1043
+ - Implement API key rotation policies
1044
+ - Monitor API usage for anomalies
1045
+
1046
+ 4. **Code Review:**
1047
+ - Review all code for hardcoded secrets
1048
+ - Verify environment variable usage
1049
+ - Check for accidental credential logging
1050
+
1051
+ For more detailed security information, refer to the configuration documentation in `./docs/configuration.md`.
1052
+