dialectic 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cursor/commands/setup-test.mdc +175 -0
- package/.cursor/rules/basic-code-cleanup.mdc +1110 -0
- package/.cursor/rules/riper5.mdc +96 -0
- package/.env.example +6 -0
- package/AGENTS.md +1052 -0
- package/LICENSE +21 -0
- package/README.md +93 -0
- package/WARP.md +113 -0
- package/dialectic-1.0.0.tgz +0 -0
- package/dialectic.js +10 -0
- package/docs/commands.md +375 -0
- package/docs/configuration.md +882 -0
- package/docs/context_summarization.md +1023 -0
- package/docs/debate_flow.md +1127 -0
- package/docs/eval_flow.md +795 -0
- package/docs/evaluator.md +141 -0
- package/examples/debate-config-openrouter.json +48 -0
- package/examples/debate_config1.json +48 -0
- package/examples/eval/eval1/eval_config1.json +13 -0
- package/examples/eval/eval1/result1.json +62 -0
- package/examples/eval/eval1/result2.json +97 -0
- package/examples/eval_summary_format.md +11 -0
- package/examples/example3/debate-config.json +64 -0
- package/examples/example3/eval_config2.json +25 -0
- package/examples/example3/problem.md +17 -0
- package/examples/example3/rounds_test/eval_run.sh +16 -0
- package/examples/example3/rounds_test/run_test.sh +16 -0
- package/examples/kata1/architect-only-solution_2-rounds.json +121 -0
- package/examples/kata1/architect-perf-solution_2-rounds.json +234 -0
- package/examples/kata1/debate-config-kata1.json +54 -0
- package/examples/kata1/eval_architect-only_2-rounds.json +97 -0
- package/examples/kata1/eval_architect-perf_2-rounds.json +97 -0
- package/examples/kata1/kata1-report.md +12224 -0
- package/examples/kata1/kata1-report_temps-01_01_01_07.md +2451 -0
- package/examples/kata1/kata1.md +5 -0
- package/examples/kata1/meta.txt +1 -0
- package/examples/kata2/debate-config.json +54 -0
- package/examples/kata2/eval_config1.json +21 -0
- package/examples/kata2/eval_config2.json +25 -0
- package/examples/kata2/kata2.md +5 -0
- package/examples/kata2/only_architect/debate-config.json +45 -0
- package/examples/kata2/only_architect/eval_run.sh +11 -0
- package/examples/kata2/only_architect/run_test.sh +5 -0
- package/examples/kata2/rounds_test/eval_run.sh +11 -0
- package/examples/kata2/rounds_test/run_test.sh +5 -0
- package/examples/kata2/summary_length_test/eval_run.sh +11 -0
- package/examples/kata2/summary_length_test/eval_run_w_clarify.sh +7 -0
- package/examples/kata2/summary_length_test/run_test.sh +5 -0
- package/examples/task-queue/debate-config.json +76 -0
- package/examples/task-queue/debate_report.md +566 -0
- package/examples/task-queue/task-queue-system.md +25 -0
- package/jest.config.ts +13 -0
- package/multi_agent_debate_spec.md +2980 -0
- package/package.json +38 -0
- package/sanity-check-problem.txt +9 -0
- package/src/agents/prompts/architect-prompts.ts +203 -0
- package/src/agents/prompts/generalist-prompts.ts +157 -0
- package/src/agents/prompts/index.ts +41 -0
- package/src/agents/prompts/judge-prompts.ts +19 -0
- package/src/agents/prompts/kiss-prompts.ts +230 -0
- package/src/agents/prompts/performance-prompts.ts +142 -0
- package/src/agents/prompts/prompt-types.ts +68 -0
- package/src/agents/prompts/security-prompts.ts +149 -0
- package/src/agents/prompts/shared.ts +144 -0
- package/src/agents/prompts/testing-prompts.ts +149 -0
- package/src/agents/role-based-agent.ts +386 -0
- package/src/cli/commands/debate.ts +761 -0
- package/src/cli/commands/eval.ts +475 -0
- package/src/cli/commands/report.ts +265 -0
- package/src/cli/index.ts +79 -0
- package/src/core/agent.ts +198 -0
- package/src/core/clarifications.ts +34 -0
- package/src/core/judge.ts +257 -0
- package/src/core/orchestrator.ts +432 -0
- package/src/core/state-manager.ts +322 -0
- package/src/eval/evaluator-agent.ts +130 -0
- package/src/eval/prompts/system.md +41 -0
- package/src/eval/prompts/user.md +64 -0
- package/src/providers/llm-provider.ts +25 -0
- package/src/providers/openai-provider.ts +84 -0
- package/src/providers/openrouter-provider.ts +122 -0
- package/src/providers/provider-factory.ts +64 -0
- package/src/types/agent.types.ts +141 -0
- package/src/types/config.types.ts +47 -0
- package/src/types/debate.types.ts +237 -0
- package/src/types/eval.types.ts +85 -0
- package/src/utils/common.ts +104 -0
- package/src/utils/context-formatter.ts +102 -0
- package/src/utils/context-summarizer.ts +143 -0
- package/src/utils/env-loader.ts +46 -0
- package/src/utils/exit-codes.ts +5 -0
- package/src/utils/id.ts +11 -0
- package/src/utils/logger.ts +48 -0
- package/src/utils/paths.ts +10 -0
- package/src/utils/progress-ui.ts +313 -0
- package/src/utils/prompt-loader.ts +79 -0
- package/src/utils/report-generator.ts +301 -0
- package/tests/clarifications.spec.ts +128 -0
- package/tests/cli.debate.spec.ts +144 -0
- package/tests/config-loading.spec.ts +206 -0
- package/tests/context-summarizer.spec.ts +131 -0
- package/tests/debate-config-custom.json +38 -0
- package/tests/env-loader.spec.ts +149 -0
- package/tests/eval.command.spec.ts +1191 -0
- package/tests/logger.spec.ts +19 -0
- package/tests/openai-provider.spec.ts +26 -0
- package/tests/openrouter-provider.spec.ts +279 -0
- package/tests/orchestrator-summary.spec.ts +386 -0
- package/tests/orchestrator.spec.ts +207 -0
- package/tests/prompt-loader.spec.ts +52 -0
- package/tests/prompts/architect.md +16 -0
- package/tests/provider-factory.spec.ts +150 -0
- package/tests/report.command.spec.ts +546 -0
- package/tests/role-based-agent-summary.spec.ts +476 -0
- package/tests/security-agent.spec.ts +221 -0
- package/tests/shared-prompts.spec.ts +318 -0
- package/tests/state-manager.spec.ts +251 -0
- package/tests/summary-prompts.spec.ts +153 -0
- package/tsconfig.json +49 -0
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
A university has greatly expanded its CS course and wants to be able to automate the grading of simple programming assignments.
|
|
2
|
+
|
|
3
|
+
Users: 300+ students per year, plus staff and admin.
|
|
4
|
+
|
|
5
|
+
Requirements: Students must be able to upload their source code, which will be run and graded. Grades and runs must be persistent and auditable. There must be a plagiarism detection system involving comparing with other submissions and also submitting to a web-based service (TurnItIn). There must be some level of integration with the University's learning management system (LMS)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
source: https://www.architecturalkatas.com/kata.html?kata=CheckYourWork.json
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"agents": [
|
|
3
|
+
{
|
|
4
|
+
"id": "agent-architect",
|
|
5
|
+
"name": "System Architect",
|
|
6
|
+
"role": "architect",
|
|
7
|
+
"model": "claude-sonnet-4",
|
|
8
|
+
"provider": "openai",
|
|
9
|
+
"temperature": 0.5,
|
|
10
|
+
"enabled": true
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"id": "agent-performance",
|
|
14
|
+
"name": "Performance Engineer",
|
|
15
|
+
"role": "performance",
|
|
16
|
+
"model": "claude-sonnet-4",
|
|
17
|
+
"provider": "openai",
|
|
18
|
+
"temperature": 0.5,
|
|
19
|
+
"enabled": true
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"id": "agent-security",
|
|
23
|
+
"name": "Security Specialist",
|
|
24
|
+
"role": "security",
|
|
25
|
+
"model": "claude-sonnet-4",
|
|
26
|
+
"provider": "openai",
|
|
27
|
+
"temperature": 0.5,
|
|
28
|
+
"enabled": true
|
|
29
|
+
}
|
|
30
|
+
],
|
|
31
|
+
"judge": {
|
|
32
|
+
"id": "judge-main",
|
|
33
|
+
"name": "Technical Judge",
|
|
34
|
+
"role": "generalist",
|
|
35
|
+
"model": "claude-sonnet-4",
|
|
36
|
+
"provider": "openai",
|
|
37
|
+
"temperature": 0.5
|
|
38
|
+
},
|
|
39
|
+
"debate": {
|
|
40
|
+
"rounds": 3,
|
|
41
|
+
"terminationCondition": {
|
|
42
|
+
"type": "fixed"
|
|
43
|
+
},
|
|
44
|
+
"synthesisMethod": "judge",
|
|
45
|
+
"includeFullHistory": true,
|
|
46
|
+
"timeoutPerRound": 300000,
|
|
47
|
+
"summarization": {
|
|
48
|
+
"enabled": false,
|
|
49
|
+
"threshold": 2500,
|
|
50
|
+
"maxLength": 1250,
|
|
51
|
+
"method": "length-based"
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"agents": [
|
|
3
|
+
{
|
|
4
|
+
"id": "eval-1",
|
|
5
|
+
"name": "Sonnet 4 Evaluator 1",
|
|
6
|
+
"model": "claude-sonnet-4",
|
|
7
|
+
"provider": "openai",
|
|
8
|
+
"timeout": 30000,
|
|
9
|
+
"enabled": true
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
"id": "eval-2",
|
|
13
|
+
"name": "Sonnet 4 Evaluator 2",
|
|
14
|
+
"model": "claude-sonnet-4",
|
|
15
|
+
"provider": "openai",
|
|
16
|
+
"timeout": 30000,
|
|
17
|
+
"enabled": true
|
|
18
|
+
}
|
|
19
|
+
]
|
|
20
|
+
}
|
|
21
|
+
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"agents": [
|
|
3
|
+
{
|
|
4
|
+
"id": "eval-1",
|
|
5
|
+
"name": "Sonnet 4 Evaluator 1",
|
|
6
|
+
"model": "claude-sonnet-4",
|
|
7
|
+
"provider": "openai",
|
|
8
|
+
"timeout": 30000,
|
|
9
|
+
"enabled": true,
|
|
10
|
+
"systemPromptPath": "../eval_system.md",
|
|
11
|
+
"userPromptPath":"../eval_user.md"
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"id": "eval-2",
|
|
15
|
+
"name": "Sonnet 4 Evaluator 2",
|
|
16
|
+
"model": "claude-sonnet-4",
|
|
17
|
+
"provider": "openai",
|
|
18
|
+
"timeout": 30000,
|
|
19
|
+
"enabled": true,
|
|
20
|
+
"systemPromptPath": "../eval_system.md",
|
|
21
|
+
"userPromptPath": "../eval_user.md"
|
|
22
|
+
}
|
|
23
|
+
]
|
|
24
|
+
}
|
|
25
|
+
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
Organization running "Mock UN" events wants to take its events online, permitting students to participate online
|
|
2
|
+
|
|
3
|
+
Requirements: student-diplomats must be able to video-chat with one another; student-diplomats must be able to "give speeches" to the "assembly" (video-chat to the entire group); (mocked) world events (created by moderators) distributed via (mock) "news sites"; moderators must be able to monitor any video chat for appropriateness
|
|
4
|
+
|
|
5
|
+
Users: 500 or so "diplomats" per "mock UN" gathering; dozens of moderators per "mock UN"; many "mock UN"s simultaneously; no new hardware requirements on students
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"agents": [
|
|
3
|
+
{
|
|
4
|
+
"id": "agent-architect1",
|
|
5
|
+
"name": "System Architect 1",
|
|
6
|
+
"role": "architect",
|
|
7
|
+
"model": "claude-sonnet-4",
|
|
8
|
+
"provider": "openai",
|
|
9
|
+
"temperature": 0.5,
|
|
10
|
+
"enabled": true
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"id": "agent-architect2",
|
|
14
|
+
"name": "System Architect 2",
|
|
15
|
+
"role": "architect",
|
|
16
|
+
"model": "claude-sonnet-4",
|
|
17
|
+
"provider": "openai",
|
|
18
|
+
"temperature": 0.5,
|
|
19
|
+
"enabled": true
|
|
20
|
+
}
|
|
21
|
+
],
|
|
22
|
+
"judge": {
|
|
23
|
+
"id": "judge-main",
|
|
24
|
+
"name": "Technical Judge",
|
|
25
|
+
"role": "generalist",
|
|
26
|
+
"model": "claude-sonnet-4",
|
|
27
|
+
"provider": "openai",
|
|
28
|
+
"temperature": 0.5
|
|
29
|
+
},
|
|
30
|
+
"debate": {
|
|
31
|
+
"rounds": 3,
|
|
32
|
+
"terminationCondition": {
|
|
33
|
+
"type": "fixed"
|
|
34
|
+
},
|
|
35
|
+
"synthesisMethod": "judge",
|
|
36
|
+
"includeFullHistory": true,
|
|
37
|
+
"timeoutPerRound": 300000,
|
|
38
|
+
"summarization": {
|
|
39
|
+
"enabled": true,
|
|
40
|
+
"threshold": 10000,
|
|
41
|
+
"maxLength": 5000,
|
|
42
|
+
"method": "length-based"
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/only_architect/all_agents_1R_no_clarify.json -v -o examples/kata2/only_architect/eval_all_agents_1R_no_clarify.json -v
|
|
2
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/only_architect/all_agents_2R_no_clarify.json -v -o examples/kata2/only_architect/eval_all_agents_2R_no_clarify.json -v
|
|
3
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/only_architect/all_agents_3R_no_clarify.json -v -o examples/kata2/only_architect/eval_all_agents_3R_no_clarify.json -v
|
|
4
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/only_architect/all_agents_4R_no_clarify.json -v -o examples/kata2/only_architect/eval_all_agents_4R_no_clarify.json -v
|
|
5
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/only_architect/all_agents_5R_no_clarify.json -v -o examples/kata2/only_architect/eval_all_agents_5R_no_clarify.json -v
|
|
6
|
+
|
|
7
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/only_architect/all_agents_1R_no_clarify.json -v -o examples/kata2/only_architect/eval2_all_agents_1R_no_clarify.json -v
|
|
8
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/only_architect/all_agents_2R_no_clarify.json -v -o examples/kata2/only_architect/eval2_all_agents_2R_no_clarify.json -v
|
|
9
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/only_architect/all_agents_3R_no_clarify.json -v -o examples/kata2/only_architect/eval2_all_agents_3R_no_clarify.json -v
|
|
10
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/only_architect/all_agents_4R_no_clarify.json -v -o examples/kata2/only_architect/eval2_all_agents_4R_no_clarify.json -v
|
|
11
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/only_architect/all_agents_5R_no_clarify.json -v -o examples/kata2/only_architect/eval2_all_agents_5R_no_clarify.json -v
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
dialectic debate -r 1 -c examples/kata2/only_architect/debate-config.json -o examples/kata2/only_architect/all_agents_1R_no_clarify.json -p examples/kata2/kata2.md -v
|
|
2
|
+
dialectic debate -r 2 -c examples/kata2/only_architect/debate-config.json -o examples/kata2/only_architect/all_agents_2R_no_clarify.json -p examples/kata2/kata2.md -v
|
|
3
|
+
dialectic debate -r 3 -c examples/kata2/only_architect/debate-config.json -o examples/kata2/only_architect/all_agents_3R_no_clarify.json -p examples/kata2/kata2.md -v
|
|
4
|
+
dialectic debate -r 4 -c examples/kata2/only_architect/debate-config.json -o examples/kata2/only_architect/all_agents_4R_no_clarify.json -p examples/kata2/kata2.md -v
|
|
5
|
+
dialectic debate -r 5 -c examples/kata2/only_architect/debate-config.json -o examples/kata2/only_architect/all_agents_5R_no_clarify.json -p examples/kata2/kata2.md -v
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/rounds_test/all_agents_1R_no_clarify.json -v -o examples/kata2/rounds_test/eval_all_agents_1R_no_clarify.json -v
|
|
2
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/rounds_test/all_agents_2R_no_clarify.json -v -o examples/kata2/rounds_test/eval_all_agents_2R_no_clarify.json -v
|
|
3
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/rounds_test/all_agents_3R_no_clarify.json -v -o examples/kata2/rounds_test/eval_all_agents_3R_no_clarify.json -v
|
|
4
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/rounds_test/all_agents_4R_no_clarify.json -v -o examples/kata2/rounds_test/eval_all_agents_4R_no_clarify.json -v
|
|
5
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/rounds_test/all_agents_5R_no_clarify.json -v -o examples/kata2/rounds_test/eval_all_agents_5R_no_clarify.json -v
|
|
6
|
+
|
|
7
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/rounds_test/all_agents_1R_no_clarify.json -v -o examples/kata2/rounds_test/eval2_all_agents_1R_no_clarify.json -v
|
|
8
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/rounds_test/all_agents_2R_no_clarify.json -v -o examples/kata2/rounds_test/eval2_all_agents_2R_no_clarify.json -v
|
|
9
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/rounds_test/all_agents_3R_no_clarify.json -v -o examples/kata2/rounds_test/eval2_all_agents_3R_no_clarify.json -v
|
|
10
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/rounds_test/all_agents_4R_no_clarify.json -v -o examples/kata2/rounds_test/eval2_all_agents_4R_no_clarify.json -v
|
|
11
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/rounds_test/all_agents_5R_no_clarify.json -v -o examples/kata2/rounds_test/eval2_all_agents_5R_no_clarify.json -v
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
dialectic debate -r 1 -c examples/kata2/debate-config.json -o examples/kata2/rounds_test/all_agents_1R_no_clarify.json -p examples/kata2/kata2.md -v
|
|
2
|
+
dialectic debate -r 2 -c examples/kata2/debate-config.json -o examples/kata2/rounds_test/all_agents_2R_no_clarify.json -p examples/kata2/kata2.md -v
|
|
3
|
+
dialectic debate -r 3 -c examples/kata2/debate-config.json -o examples/kata2/rounds_test/all_agents_3R_no_clarify.json -p examples/kata2/kata2.md -v
|
|
4
|
+
dialectic debate -r 4 -c examples/kata2/debate-config.json -o examples/kata2/rounds_test/all_agents_4R_no_clarify.json -p examples/kata2/kata2.md -v
|
|
5
|
+
dialectic debate -r 5 -c examples/kata2/debate-config.json -o examples/kata2/rounds_test/all_agents_5R_no_clarify.json -p examples/kata2/kata2.md -v
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/summary_length_test/all_sum-length-1250.json -v -o examples/kata2/summary_length_test/eval_all_sum-length-1250.json -v
|
|
2
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/summary_length_test/all_sum-length-2500.json -v -o examples/kata2/summary_length_test/eval_all_sum-length-2500.json -v
|
|
3
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/summary_length_test/all_sum-length-5000.json -v -o examples/kata2/summary_length_test/eval_all_sum-length-5000.json -v
|
|
4
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/summary_length_test/all_sum-length-10000.json -v -o examples/kata2/summary_length_test/eval_all_sum-length-10000.json -v
|
|
5
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/summary_length_test/all_sum-length-15000.json -v -o examples/kata2/summary_length_test/eval_all_sum-length-15000.json -v
|
|
6
|
+
|
|
7
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/summary_length_test/all_sum-length-1250.json -v -o examples/kata2/summary_length_test/eval2_all_sum-length-1250.json -v
|
|
8
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/summary_length_test/all_sum-length-2500.json -v -o examples/kata2/summary_length_test/eval2_all_sum-length-2500.json -v
|
|
9
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/summary_length_test/all_sum-length-5000.json -v -o examples/kata2/summary_length_test/eval2_all_sum-length-5000.json -v
|
|
10
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/summary_length_test/all_sum-length-10000.json -v -o examples/kata2/summary_length_test/eval2_all_sum-length-10000.json -v
|
|
11
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/summary_length_test/all_sum-length-15000.json -v -o examples/kata2/summary_length_test/eval2_all_sum-length-15000.json -v
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/summary_length_test/all_sum-length-1250_clarify.json -v -o examples/kata2/summary_length_test/eval_all_sum-length-1250_clarify.json -v
|
|
2
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/summary_length_test/all_sum-length-5000_clarify.json -v -o examples/kata2/summary_length_test/eval_all_sum-length-5000_clarify.json -v
|
|
3
|
+
# dialectic eval -c ./examples/kata2/eval_config1.json -d ./examples/kata2/summary_length_test/all_sum-length-10000_clarify.json -v -o examples/kata2/summary_length_test/eval_all_sum-length-10000_clarify.json -v
|
|
4
|
+
|
|
5
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/summary_length_test/all_sum-length-1250_clarify.json -v -o examples/kata2/summary_length_test/eval2_all_sum-length-1250_clarify.json -v
|
|
6
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/summary_length_test/all_sum-length-5000_clarify.json -v -o examples/kata2/summary_length_test/eval2_all_sum-length-5000_clarify.json -v
|
|
7
|
+
dialectic eval -c ./examples/kata2/eval_config2.json -d ./examples/kata2/summary_length_test/all_sum-length-10000_clarify.json -v -o examples/kata2/summary_length_test/eval2_all_sum-length-10000_clarify.json -v
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
dialectic debate -r 3 -c examples/kata2/summary_length_test/debate-config-sum-t-1250.json -o examples/kata2/summary_length_test/all_sum-length-1250.json -p examples/kata2/kata2.md -v
|
|
2
|
+
dialectic debate -r 3 -c examples/kata2/summary_length_test/debate-config-sum-t-2500.json -o examples/kata2/summary_length_test/all_sum-length-2500.json -p examples/kata2/kata2.md -v
|
|
3
|
+
dialectic debate -r 3 -c examples/kata2/summary_length_test/debate-config-sum-t-5000.json -o examples/kata2/summary_length_test/all_sum-length-5000.json -p examples/kata2/kata2.md -v
|
|
4
|
+
dialectic debate -r 3 -c examples/kata2/summary_length_test/debate-config-sum-t-10000.json -o examples/kata2/summary_length_test/all_sum-length-10000.json -p examples/kata2/kata2.md -v
|
|
5
|
+
dialectic debate -r 3 -c examples/kata2/summary_length_test/debate-config-sum-t-15000.json -o examples/kata2/summary_length_test/all_sum-length-15000.json -p examples/kata2/kata2.md -v
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
{
|
|
2
|
+
"agents": [
|
|
3
|
+
{
|
|
4
|
+
"id": "agent-architect",
|
|
5
|
+
"name": "System Architect",
|
|
6
|
+
"role": "architect",
|
|
7
|
+
"model": "gpt-4",
|
|
8
|
+
"provider": "openai",
|
|
9
|
+
"temperature": 0.5,
|
|
10
|
+
"enabled": true,
|
|
11
|
+
"summarization": {
|
|
12
|
+
"enabled": true,
|
|
13
|
+
"threshold": 4000,
|
|
14
|
+
"maxLength": 2000,
|
|
15
|
+
"method": "length-based"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"id": "agent-performance",
|
|
20
|
+
"name": "Performance Engineer",
|
|
21
|
+
"role": "performance",
|
|
22
|
+
"model": "gpt-4",
|
|
23
|
+
"provider": "openai",
|
|
24
|
+
"temperature": 0.5,
|
|
25
|
+
"enabled": true,
|
|
26
|
+
"summarization": {
|
|
27
|
+
"enabled": true,
|
|
28
|
+
"threshold": 3500,
|
|
29
|
+
"maxLength": 1750,
|
|
30
|
+
"method": "length-based"
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"id": "agent-security",
|
|
35
|
+
"name": "Security Specialist",
|
|
36
|
+
"role": "security",
|
|
37
|
+
"model": "gpt-4",
|
|
38
|
+
"provider": "openai",
|
|
39
|
+
"temperature": 0.4,
|
|
40
|
+
"enabled": true,
|
|
41
|
+
"summarization": {
|
|
42
|
+
"enabled": true,
|
|
43
|
+
"threshold": 3000,
|
|
44
|
+
"maxLength": 1500,
|
|
45
|
+
"method": "length-based"
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
],
|
|
49
|
+
"judge": {
|
|
50
|
+
"id": "judge-main",
|
|
51
|
+
"name": "Technical Judge",
|
|
52
|
+
"role": "generalist",
|
|
53
|
+
"model": "gpt-4",
|
|
54
|
+
"provider": "openai",
|
|
55
|
+
"temperature": 0.3,
|
|
56
|
+
"summarization": {
|
|
57
|
+
"enabled": true,
|
|
58
|
+
"threshold": 6000,
|
|
59
|
+
"maxLength": 3000,
|
|
60
|
+
"method": "length-based"
|
|
61
|
+
}
|
|
62
|
+
},
|
|
63
|
+
"debate": {
|
|
64
|
+
"rounds": 4,
|
|
65
|
+
"terminationCondition": { "type": "fixed" },
|
|
66
|
+
"synthesisMethod": "judge",
|
|
67
|
+
"includeFullHistory": true,
|
|
68
|
+
"timeoutPerRound": 600000,
|
|
69
|
+
"summarization": {
|
|
70
|
+
"enabled": true,
|
|
71
|
+
"threshold": 5000,
|
|
72
|
+
"maxLength": 2500,
|
|
73
|
+
"method": "length-based"
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|