dialectic 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/.cursor/commands/setup-test.mdc +175 -0
  2. package/.cursor/rules/basic-code-cleanup.mdc +1110 -0
  3. package/.cursor/rules/riper5.mdc +96 -0
  4. package/.env.example +6 -0
  5. package/AGENTS.md +1052 -0
  6. package/LICENSE +21 -0
  7. package/README.md +93 -0
  8. package/WARP.md +113 -0
  9. package/dialectic-1.0.0.tgz +0 -0
  10. package/dialectic.js +10 -0
  11. package/docs/commands.md +375 -0
  12. package/docs/configuration.md +882 -0
  13. package/docs/context_summarization.md +1023 -0
  14. package/docs/debate_flow.md +1127 -0
  15. package/docs/eval_flow.md +795 -0
  16. package/docs/evaluator.md +141 -0
  17. package/examples/debate-config-openrouter.json +48 -0
  18. package/examples/debate_config1.json +48 -0
  19. package/examples/eval/eval1/eval_config1.json +13 -0
  20. package/examples/eval/eval1/result1.json +62 -0
  21. package/examples/eval/eval1/result2.json +97 -0
  22. package/examples/eval_summary_format.md +11 -0
  23. package/examples/example3/debate-config.json +64 -0
  24. package/examples/example3/eval_config2.json +25 -0
  25. package/examples/example3/problem.md +17 -0
  26. package/examples/example3/rounds_test/eval_run.sh +16 -0
  27. package/examples/example3/rounds_test/run_test.sh +16 -0
  28. package/examples/kata1/architect-only-solution_2-rounds.json +121 -0
  29. package/examples/kata1/architect-perf-solution_2-rounds.json +234 -0
  30. package/examples/kata1/debate-config-kata1.json +54 -0
  31. package/examples/kata1/eval_architect-only_2-rounds.json +97 -0
  32. package/examples/kata1/eval_architect-perf_2-rounds.json +97 -0
  33. package/examples/kata1/kata1-report.md +12224 -0
  34. package/examples/kata1/kata1-report_temps-01_01_01_07.md +2451 -0
  35. package/examples/kata1/kata1.md +5 -0
  36. package/examples/kata1/meta.txt +1 -0
  37. package/examples/kata2/debate-config.json +54 -0
  38. package/examples/kata2/eval_config1.json +21 -0
  39. package/examples/kata2/eval_config2.json +25 -0
  40. package/examples/kata2/kata2.md +5 -0
  41. package/examples/kata2/only_architect/debate-config.json +45 -0
  42. package/examples/kata2/only_architect/eval_run.sh +11 -0
  43. package/examples/kata2/only_architect/run_test.sh +5 -0
  44. package/examples/kata2/rounds_test/eval_run.sh +11 -0
  45. package/examples/kata2/rounds_test/run_test.sh +5 -0
  46. package/examples/kata2/summary_length_test/eval_run.sh +11 -0
  47. package/examples/kata2/summary_length_test/eval_run_w_clarify.sh +7 -0
  48. package/examples/kata2/summary_length_test/run_test.sh +5 -0
  49. package/examples/task-queue/debate-config.json +76 -0
  50. package/examples/task-queue/debate_report.md +566 -0
  51. package/examples/task-queue/task-queue-system.md +25 -0
  52. package/jest.config.ts +13 -0
  53. package/multi_agent_debate_spec.md +2980 -0
  54. package/package.json +38 -0
  55. package/sanity-check-problem.txt +9 -0
  56. package/src/agents/prompts/architect-prompts.ts +203 -0
  57. package/src/agents/prompts/generalist-prompts.ts +157 -0
  58. package/src/agents/prompts/index.ts +41 -0
  59. package/src/agents/prompts/judge-prompts.ts +19 -0
  60. package/src/agents/prompts/kiss-prompts.ts +230 -0
  61. package/src/agents/prompts/performance-prompts.ts +142 -0
  62. package/src/agents/prompts/prompt-types.ts +68 -0
  63. package/src/agents/prompts/security-prompts.ts +149 -0
  64. package/src/agents/prompts/shared.ts +144 -0
  65. package/src/agents/prompts/testing-prompts.ts +149 -0
  66. package/src/agents/role-based-agent.ts +386 -0
  67. package/src/cli/commands/debate.ts +761 -0
  68. package/src/cli/commands/eval.ts +475 -0
  69. package/src/cli/commands/report.ts +265 -0
  70. package/src/cli/index.ts +79 -0
  71. package/src/core/agent.ts +198 -0
  72. package/src/core/clarifications.ts +34 -0
  73. package/src/core/judge.ts +257 -0
  74. package/src/core/orchestrator.ts +432 -0
  75. package/src/core/state-manager.ts +322 -0
  76. package/src/eval/evaluator-agent.ts +130 -0
  77. package/src/eval/prompts/system.md +41 -0
  78. package/src/eval/prompts/user.md +64 -0
  79. package/src/providers/llm-provider.ts +25 -0
  80. package/src/providers/openai-provider.ts +84 -0
  81. package/src/providers/openrouter-provider.ts +122 -0
  82. package/src/providers/provider-factory.ts +64 -0
  83. package/src/types/agent.types.ts +141 -0
  84. package/src/types/config.types.ts +47 -0
  85. package/src/types/debate.types.ts +237 -0
  86. package/src/types/eval.types.ts +85 -0
  87. package/src/utils/common.ts +104 -0
  88. package/src/utils/context-formatter.ts +102 -0
  89. package/src/utils/context-summarizer.ts +143 -0
  90. package/src/utils/env-loader.ts +46 -0
  91. package/src/utils/exit-codes.ts +5 -0
  92. package/src/utils/id.ts +11 -0
  93. package/src/utils/logger.ts +48 -0
  94. package/src/utils/paths.ts +10 -0
  95. package/src/utils/progress-ui.ts +313 -0
  96. package/src/utils/prompt-loader.ts +79 -0
  97. package/src/utils/report-generator.ts +301 -0
  98. package/tests/clarifications.spec.ts +128 -0
  99. package/tests/cli.debate.spec.ts +144 -0
  100. package/tests/config-loading.spec.ts +206 -0
  101. package/tests/context-summarizer.spec.ts +131 -0
  102. package/tests/debate-config-custom.json +38 -0
  103. package/tests/env-loader.spec.ts +149 -0
  104. package/tests/eval.command.spec.ts +1191 -0
  105. package/tests/logger.spec.ts +19 -0
  106. package/tests/openai-provider.spec.ts +26 -0
  107. package/tests/openrouter-provider.spec.ts +279 -0
  108. package/tests/orchestrator-summary.spec.ts +386 -0
  109. package/tests/orchestrator.spec.ts +207 -0
  110. package/tests/prompt-loader.spec.ts +52 -0
  111. package/tests/prompts/architect.md +16 -0
  112. package/tests/provider-factory.spec.ts +150 -0
  113. package/tests/report.command.spec.ts +546 -0
  114. package/tests/role-based-agent-summary.spec.ts +476 -0
  115. package/tests/security-agent.spec.ts +221 -0
  116. package/tests/shared-prompts.spec.ts +318 -0
  117. package/tests/state-manager.spec.ts +251 -0
  118. package/tests/summary-prompts.spec.ts +153 -0
  119. package/tsconfig.json +49 -0
@@ -0,0 +1,175 @@
1
+ ---
2
+ description: Generate a complete test setup for dialectic debate testing with debate config, evaluation config, and test scripts
3
+ ---
4
+
5
+ # Setup Test Infrastructure
6
+
7
+ Generate a complete test setup for dialectic debate testing. This command creates all necessary configuration files and scripts to run debates with different parameters and evaluate the results.
8
+
9
+ ## Input Parameters
10
+
11
+ The command accepts the following parameters (all optional with sensible defaults):
12
+
13
+ - **output_directory** (default: `/mnt/c/tmp/dialectic/{example_dir}/{test_case_name}`)
14
+ - The directory where debate outputs and evaluation results will be saved
15
+ - Should be a WSL-compatible path (e.g., `/mnt/c/...` for Windows drives)
16
+ - The directory will be created automatically if it doesn't exist
17
+
18
+ - **example_directory** (default: `./examples/example3`)
19
+ - The base directory containing the example (must contain `problem.md`)
20
+ - Relative to the project root
21
+ - This directory will contain the generated config files
22
+
23
+ - **test_case_name** (default: `rounds_test`)
24
+ - Name of the test case (e.g., `rounds_test`, `agents_test`, `summary_test`)
25
+ - Used to create a subdirectory under `example_directory` for test scripts
26
+
27
+ - **model** (default: `google/gemini-2.5-flash-lite`)
28
+ - The LLM model identifier to use for all agents and evaluators
29
+ - Format depends on provider (e.g., `google/gemini-2.5-flash-lite` for OpenRouter, `gpt-4` for OpenAI)
30
+
31
+ - **provider** (default: `openrouter`)
32
+ - The LLM provider to use (`openai` or `openrouter`)
33
+ - All agents and evaluators will use this provider
34
+
35
+ ## Generated Files
36
+
37
+ The command will create the following files:
38
+
39
+ ### 1. `{example_directory}/debate-config.json`
40
+ Debate configuration file with:
41
+ - Three agents: architect, performance, and security (all enabled)
42
+ - Judge agent with generalist role
43
+ - All agents configured with the specified model and provider
44
+ - Summarization enabled with threshold 10000 and maxLength 5000
45
+ - Default debate settings (3 rounds, fixed termination, judge synthesis)
46
+
47
+ ### 2. `{example_directory}/eval_config2.json`
48
+ Evaluation configuration file with:
49
+ - Two evaluator agents (eval-1 and eval-2)
50
+ - Both configured with the specified model and provider
51
+ - Custom prompt paths (relative to example directory parent)
52
+ - Timeout set to 30000ms
53
+
54
+ ### 3. `{example_directory}/{test_case_name}/run_test.sh`
55
+ Test execution script that:
56
+ - Uses variables for BASE_DIR and OUTPUT_DIR
57
+ - Creates the output directory if it doesn't exist
58
+ - Runs 5 debate commands with rounds 1-5
59
+ - Outputs results to the specified output directory
60
+ - Uses WSL-compatible paths (converts Windows paths to `/mnt/c/...` format)
61
+ - Includes shebang (`#!/bin/bash`) and is executable
62
+
63
+ ### 4. `{example_directory}/{test_case_name}/eval_run.sh`
64
+ Evaluation script that:
65
+ - Uses the same path variables as run_test.sh
66
+ - Evaluates all 5 debate outputs from the test run
67
+ - Writes evaluation results to the output directory
68
+ - Includes shebang and is executable
69
+
70
+ ## Implementation Instructions
71
+
72
+ When executing this command, follow these steps precisely:
73
+
74
+ 1. **Validate Inputs**
75
+ - If `example_directory` doesn't exist, create it
76
+ - Verify `{example_directory}/problem.md` exists (create placeholder if needed)
77
+ - Convert Windows-style output paths (`C:/...`) to WSL format (`/mnt/c/...`)
78
+
79
+ 2. **Create Debate Config**
80
+ - Create `{example_directory}/debate-config.json`
81
+ - Configure three agents: architect, performance, security (all enabled, temperature 0.5)
82
+ - Configure judge: generalist role, temperature 0.5
83
+ - Set all agents and judge to use the specified model and provider
84
+ - Enable summarization: threshold 10000, maxLength 5000, method "length-based"
85
+ - Set default debate settings: rounds 3, fixed termination, judge synthesis, includeFullHistory true, timeoutPerRound 300000
86
+
87
+ 3. **Create Evaluation Config**
88
+ - Create `{example_directory}/eval_config.json`
89
+ - Configure two evaluator agents (eval-1 and eval-2)
90
+ - Set both to use the specified model and provider
91
+ - Set timeout to 30000ms
92
+ - Set systemPromptPath to `../eval_system.md` and userPromptPath to `../eval_user.md`
93
+ - Both agents enabled by default
94
+
95
+ 4. **Create Test Directory**
96
+ - Create directory `{example_directory}/{test_case_name}/`
97
+ - Ensure parent directories exist
98
+
99
+ 5. **Create run_test.sh Script**
100
+ - Set BASE_DIR variable to `{example_directory}` (relative path)
101
+ - Set OUTPUT_DIR variable to the output directory (WSL-compatible path)
102
+ - Add `mkdir -p "$OUTPUT_DIR"` to ensure directory exists
103
+ - Generate 5 dialectic debate commands for rounds 1-5:
104
+ - Format: `dialectic debate -r {round} -c "$BASE_DIR/debate-config.json" -o "$OUTPUT_DIR/all_agents_{round}R_no_clarify.json" -p "$BASE_DIR/problem.md" -v`
105
+ - Add executable permissions (shebang and chmod notation in comments)
106
+
107
+ 6. **Create eval_run.sh Script**
108
+ - Use the same BASE_DIR and OUTPUT_DIR variables
109
+ - Generate 5 dialectic eval commands:
110
+ - Format: `dialectic eval -c ./$BASE_DIR/eval_config2.json -d $OUTPUT_DIR/all_agents_{round}R_no_clarify.json -v -o $OUTPUT_DIR/eval2_all_agents_{round}R_no_clarify.json`
111
+ - Add executable permissions
112
+
113
+ 7. **Path Handling**
114
+ - All paths in scripts should use forward slashes
115
+ - Convert Windows paths to WSL format (C:/ → /mnt/c/)
116
+ - Use variables consistently throughout scripts
117
+ - Ensure paths are properly quoted in bash scripts
118
+
119
+ 8. **File Permissions**
120
+ - Add `#!/bin/bash` shebang to both scripts
121
+ - Note in comments that scripts should be made executable with `chmod +x`
122
+ - Scripts should work in both WSL and Git Bash environments
123
+
124
+ ## Default Values
125
+
126
+ If parameters are not provided, use these defaults:
127
+ - `output_directory`: `~/tmp/dialectic/{example_directory}/{test_case_name}`
128
+ - `example_directory`: `./examples/example3`
129
+ - `test_case_name`: `rounds_test`
130
+ - `model`: `google/gemini-2.5-flash-lite`
131
+ - `provider`: `openrouter`
132
+
133
+ ## Path Conversion Rules
134
+
135
+ - If output_directory starts with `C:/`, convert to `/mnt/c/`
136
+ - If output_directory starts with `D:/`, convert to `/mnt/d/`
137
+ - Apply similar conversion for other Windows drive letters
138
+ - Preserve forward slashes in all paths
139
+ - Ensure output directory path uses forward slashes throughout
140
+
141
+ ## Verification
142
+
143
+ After generation:
144
+ - All files should be created in the correct locations
145
+ - JSON files should be valid JSON (verify with a JSON parser)
146
+ - Scripts should have proper shebang and variable usage
147
+ - Paths should be WSL-compatible
148
+ - The user should be able to run `./{test_case_name}/run_test.sh` immediately
149
+
150
+ ## Example Usage
151
+
152
+ Default usage (all defaults):
153
+ ```
154
+ Generate test setup for example3 with rounds_test
155
+ ```
156
+
157
+ Custom parameters:
158
+ ```
159
+ Generate test setup with:
160
+ - example_directory: ./examples/my_example
161
+ - test_case_name: agents_test
162
+ - output_directory: /mnt/c/data/tests/my_example
163
+ - model: gpt-4
164
+ - provider: openai
165
+ ```
166
+
167
+ ## Output
168
+
169
+ After successful execution, the user should be able to:
170
+ 1. Navigate to `{example_directory}/{test_case_name}/`
171
+ 2. Run `./run_test.sh` to execute all test debates
172
+ 3. Run `./eval_run.sh` to evaluate all debate outputs
173
+ 4. Find all outputs in the specified output directory
174
+
175
+ All scripts should be ready to execute without modification.