dialectic 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cursor/commands/setup-test.mdc +175 -0
- package/.cursor/rules/basic-code-cleanup.mdc +1110 -0
- package/.cursor/rules/riper5.mdc +96 -0
- package/.env.example +6 -0
- package/AGENTS.md +1052 -0
- package/LICENSE +21 -0
- package/README.md +93 -0
- package/WARP.md +113 -0
- package/dialectic-1.0.0.tgz +0 -0
- package/dialectic.js +10 -0
- package/docs/commands.md +375 -0
- package/docs/configuration.md +882 -0
- package/docs/context_summarization.md +1023 -0
- package/docs/debate_flow.md +1127 -0
- package/docs/eval_flow.md +795 -0
- package/docs/evaluator.md +141 -0
- package/examples/debate-config-openrouter.json +48 -0
- package/examples/debate_config1.json +48 -0
- package/examples/eval/eval1/eval_config1.json +13 -0
- package/examples/eval/eval1/result1.json +62 -0
- package/examples/eval/eval1/result2.json +97 -0
- package/examples/eval_summary_format.md +11 -0
- package/examples/example3/debate-config.json +64 -0
- package/examples/example3/eval_config2.json +25 -0
- package/examples/example3/problem.md +17 -0
- package/examples/example3/rounds_test/eval_run.sh +16 -0
- package/examples/example3/rounds_test/run_test.sh +16 -0
- package/examples/kata1/architect-only-solution_2-rounds.json +121 -0
- package/examples/kata1/architect-perf-solution_2-rounds.json +234 -0
- package/examples/kata1/debate-config-kata1.json +54 -0
- package/examples/kata1/eval_architect-only_2-rounds.json +97 -0
- package/examples/kata1/eval_architect-perf_2-rounds.json +97 -0
- package/examples/kata1/kata1-report.md +12224 -0
- package/examples/kata1/kata1-report_temps-01_01_01_07.md +2451 -0
- package/examples/kata1/kata1.md +5 -0
- package/examples/kata1/meta.txt +1 -0
- package/examples/kata2/debate-config.json +54 -0
- package/examples/kata2/eval_config1.json +21 -0
- package/examples/kata2/eval_config2.json +25 -0
- package/examples/kata2/kata2.md +5 -0
- package/examples/kata2/only_architect/debate-config.json +45 -0
- package/examples/kata2/only_architect/eval_run.sh +11 -0
- package/examples/kata2/only_architect/run_test.sh +5 -0
- package/examples/kata2/rounds_test/eval_run.sh +11 -0
- package/examples/kata2/rounds_test/run_test.sh +5 -0
- package/examples/kata2/summary_length_test/eval_run.sh +11 -0
- package/examples/kata2/summary_length_test/eval_run_w_clarify.sh +7 -0
- package/examples/kata2/summary_length_test/run_test.sh +5 -0
- package/examples/task-queue/debate-config.json +76 -0
- package/examples/task-queue/debate_report.md +566 -0
- package/examples/task-queue/task-queue-system.md +25 -0
- package/jest.config.ts +13 -0
- package/multi_agent_debate_spec.md +2980 -0
- package/package.json +38 -0
- package/sanity-check-problem.txt +9 -0
- package/src/agents/prompts/architect-prompts.ts +203 -0
- package/src/agents/prompts/generalist-prompts.ts +157 -0
- package/src/agents/prompts/index.ts +41 -0
- package/src/agents/prompts/judge-prompts.ts +19 -0
- package/src/agents/prompts/kiss-prompts.ts +230 -0
- package/src/agents/prompts/performance-prompts.ts +142 -0
- package/src/agents/prompts/prompt-types.ts +68 -0
- package/src/agents/prompts/security-prompts.ts +149 -0
- package/src/agents/prompts/shared.ts +144 -0
- package/src/agents/prompts/testing-prompts.ts +149 -0
- package/src/agents/role-based-agent.ts +386 -0
- package/src/cli/commands/debate.ts +761 -0
- package/src/cli/commands/eval.ts +475 -0
- package/src/cli/commands/report.ts +265 -0
- package/src/cli/index.ts +79 -0
- package/src/core/agent.ts +198 -0
- package/src/core/clarifications.ts +34 -0
- package/src/core/judge.ts +257 -0
- package/src/core/orchestrator.ts +432 -0
- package/src/core/state-manager.ts +322 -0
- package/src/eval/evaluator-agent.ts +130 -0
- package/src/eval/prompts/system.md +41 -0
- package/src/eval/prompts/user.md +64 -0
- package/src/providers/llm-provider.ts +25 -0
- package/src/providers/openai-provider.ts +84 -0
- package/src/providers/openrouter-provider.ts +122 -0
- package/src/providers/provider-factory.ts +64 -0
- package/src/types/agent.types.ts +141 -0
- package/src/types/config.types.ts +47 -0
- package/src/types/debate.types.ts +237 -0
- package/src/types/eval.types.ts +85 -0
- package/src/utils/common.ts +104 -0
- package/src/utils/context-formatter.ts +102 -0
- package/src/utils/context-summarizer.ts +143 -0
- package/src/utils/env-loader.ts +46 -0
- package/src/utils/exit-codes.ts +5 -0
- package/src/utils/id.ts +11 -0
- package/src/utils/logger.ts +48 -0
- package/src/utils/paths.ts +10 -0
- package/src/utils/progress-ui.ts +313 -0
- package/src/utils/prompt-loader.ts +79 -0
- package/src/utils/report-generator.ts +301 -0
- package/tests/clarifications.spec.ts +128 -0
- package/tests/cli.debate.spec.ts +144 -0
- package/tests/config-loading.spec.ts +206 -0
- package/tests/context-summarizer.spec.ts +131 -0
- package/tests/debate-config-custom.json +38 -0
- package/tests/env-loader.spec.ts +149 -0
- package/tests/eval.command.spec.ts +1191 -0
- package/tests/logger.spec.ts +19 -0
- package/tests/openai-provider.spec.ts +26 -0
- package/tests/openrouter-provider.spec.ts +279 -0
- package/tests/orchestrator-summary.spec.ts +386 -0
- package/tests/orchestrator.spec.ts +207 -0
- package/tests/prompt-loader.spec.ts +52 -0
- package/tests/prompts/architect.md +16 -0
- package/tests/provider-factory.spec.ts +150 -0
- package/tests/report.command.spec.ts +546 -0
- package/tests/role-based-agent-summary.spec.ts +476 -0
- package/tests/security-agent.spec.ts +221 -0
- package/tests/shared-prompts.spec.ts +318 -0
- package/tests/state-manager.spec.ts +251 -0
- package/tests/summary-prompts.spec.ts +153 -0
- package/tsconfig.json +49 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared prompt instructions for all agent roles.
|
|
3
|
+
*
|
|
4
|
+
* This module provides consistent, well-formatted instructions that are appended
|
|
5
|
+
* to role-specific prompts to ensure uniform behavior across all agents.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Instruction type constants to avoid magic strings
|
|
9
|
+
export const INSTRUCTION_TYPES = {
|
|
10
|
+
SYSTEM: 'system',
|
|
11
|
+
PROPOSAL: 'proposal',
|
|
12
|
+
CRITIQUE: 'critique',
|
|
13
|
+
REFINEMENT: 'refinement',
|
|
14
|
+
SUMMARIZATION: 'summarization',
|
|
15
|
+
CLARIFICATION: 'clarification'
|
|
16
|
+
} as const;
|
|
17
|
+
|
|
18
|
+
export type InstructionType = typeof INSTRUCTION_TYPES[keyof typeof INSTRUCTION_TYPES];
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Returns shared system-level instructions for all agents.
|
|
22
|
+
*
|
|
23
|
+
* @returns Formatted string containing shared system instructions
|
|
24
|
+
*/
|
|
25
|
+
export function getSharedSystemInstructions(): string {
|
|
26
|
+
// return `\n\n## General Guidelines\n\n- Avoid providing code snippets unless they are critical for explaining a delicate technical point\n- Focus on clear, concise explanations using simple language\n- Prioritize conceptual understanding over implementation details`;
|
|
27
|
+
return `## General Guidelines
|
|
28
|
+
|
|
29
|
+
- Avoid code snippets unless essential to illustrate a complex technical point
|
|
30
|
+
- Prioritize conceptual clarity over implementation details
|
|
31
|
+
- Use clear, direct, and simple language
|
|
32
|
+
- Be concise but complete — cover reasoning without unnecessary exposition
|
|
33
|
+
`
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Returns shared instructions for the proposal phase.
|
|
38
|
+
*
|
|
39
|
+
* @returns Formatted string containing shared proposal instructions
|
|
40
|
+
*/
|
|
41
|
+
export function getSharedProposalInstructions(): string {
|
|
42
|
+
// return `\n\n## Response Guidelines\n\n- Do not provide code snippets unless critical for clarifying a delicate point\n- Focus on main components and main flows\n- Emphasize architectural decisions and design rationale\n- Keep explanations clear and well-structured`;
|
|
43
|
+
return `\n\n## Response Guidelines
|
|
44
|
+
|
|
45
|
+
- Avoid code unless critical for explaining a subtle technical aspect
|
|
46
|
+
- Focus on main components, data flows, and key decisions
|
|
47
|
+
- Clearly justify architectural choices and trade-offs
|
|
48
|
+
- Organize content under clear section headers (Overview, Components, Flow, Trade-offs)
|
|
49
|
+
- Keep explanations structured and readable
|
|
50
|
+
`
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Returns shared instructions for the critique phase.
|
|
55
|
+
*
|
|
56
|
+
* @returns Formatted string containing shared critique instructions
|
|
57
|
+
*/
|
|
58
|
+
export function getSharedCritiqueInstructions(): string {
|
|
59
|
+
// return `\n\n## Critique Guidelines\n\n- Criticize the architecture from your specialized perspective\n- Do not provide code snippets unless critical for explanation\n- Focus on key points raised in the criticized proposal, not implementation details\n- Identify strengths, weaknesses, and improvement opportunities\n- Provide constructive feedback with clear reasoning`;
|
|
60
|
+
return `\n\n## Critique Guidelines
|
|
61
|
+
|
|
62
|
+
- Critique from your specialized perspective
|
|
63
|
+
- Avoid code unless absolutely necessary for clarification
|
|
64
|
+
- Focus on key architectural reasoning, not implementation details
|
|
65
|
+
- Identify strengths, weaknesses, and improvement opportunities
|
|
66
|
+
- Provide actionable, evidence-based feedback with clear reasoning
|
|
67
|
+
`
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Returns shared instructions for the refinement phase.
|
|
72
|
+
*
|
|
73
|
+
* @returns Formatted string containing shared refinement instructions
|
|
74
|
+
*/
|
|
75
|
+
export function getSharedRefinementInstructions(): string {
|
|
76
|
+
// return `\n\n## Refinement Guidelines\n\n- Do not provide code snippets in your response\n- Focus on addressing key points raised in the critiques\n- Strengthen your solution based on valid feedback\n- Maintain your specialized perspective while incorporating improvements\n- Clearly explain how you've addressed the concerns raised`;
|
|
77
|
+
return `\n\n## Refinement Guidelines
|
|
78
|
+
|
|
79
|
+
- Avoid code snippets
|
|
80
|
+
- Address key concerns raised in critiques directly
|
|
81
|
+
- Strengthen the solution based on valid feedback
|
|
82
|
+
- Preserve your specialized focus while improving coherence and clarity
|
|
83
|
+
- Explicitly explain how each major concern was resolved
|
|
84
|
+
`
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Returns shared instructions for the summarization phase.
|
|
89
|
+
*
|
|
90
|
+
* @returns Formatted string containing shared summarization instructions
|
|
91
|
+
*/
|
|
92
|
+
export function getSharedSummarizationInstructions(): string {
|
|
93
|
+
// return `\n\n## Summary Guidelines\n\n- Maintain key points and decisions in your summary\n- Focus on your specialized perspective and main components/flows\n- Emphasize points that appear multiple times in the discussion\n- Preserve important insights and architectural decisions\n- Keep the summary concise but comprehensive`;
|
|
94
|
+
return `\n\n## Summary Guidelines
|
|
95
|
+
|
|
96
|
+
- Preserve key architectural decisions, rationale, and recurring insights
|
|
97
|
+
- Focus on your specialized perspective and major component interactions
|
|
98
|
+
- Highlight patterns or trade-offs that appeared multiple times
|
|
99
|
+
- Keep summaries concise but include all critical reasoning threads
|
|
100
|
+
`
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Returns shared instructions for the clarification phase.
|
|
105
|
+
* Ensures agents return only the expected JSON schema and provides
|
|
106
|
+
* consistent guidance across roles.
|
|
107
|
+
*
|
|
108
|
+
* @returns Formatted string containing shared clarification instructions
|
|
109
|
+
*/
|
|
110
|
+
export function getSharedClarificationInstructions(): string {
|
|
111
|
+
return `\n\n## Clarification Guidelines\n\nRespond with ONLY JSON using this exact schema (no prose):\n{"questions":[{"text":"..."}]}\n\nIf none are needed, return {"questions":[]}.\n Prioritize questions that are most likely to improve the overall solution quality`;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Helper function to append appropriate shared instructions to a prompt.
|
|
116
|
+
*
|
|
117
|
+
* @param prompt - The base prompt to append instructions to
|
|
118
|
+
* @param instructionType - The type of shared instructions to append
|
|
119
|
+
* @returns The prompt with shared instructions appended
|
|
120
|
+
*/
|
|
121
|
+
export function appendSharedInstructions(prompt: string, instructionType: InstructionType): string {
|
|
122
|
+
const sharedInstructions = getSharedInstructionsByType(instructionType);
|
|
123
|
+
return prompt + sharedInstructions;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Internal helper function to get shared instructions by type.
|
|
128
|
+
*
|
|
129
|
+
* @param type - The instruction type
|
|
130
|
+
* @returns The appropriate shared instructions string
|
|
131
|
+
*/
|
|
132
|
+
function getSharedInstructionsByType(type: InstructionType): string {
|
|
133
|
+
switch (type) {
|
|
134
|
+
case INSTRUCTION_TYPES.SYSTEM: return getSharedSystemInstructions();
|
|
135
|
+
case INSTRUCTION_TYPES.PROPOSAL: return getSharedProposalInstructions();
|
|
136
|
+
case INSTRUCTION_TYPES.CRITIQUE: return getSharedCritiqueInstructions();
|
|
137
|
+
case INSTRUCTION_TYPES.REFINEMENT: return getSharedRefinementInstructions();
|
|
138
|
+
case INSTRUCTION_TYPES.SUMMARIZATION: return getSharedSummarizationInstructions();
|
|
139
|
+
case INSTRUCTION_TYPES.CLARIFICATION: return getSharedClarificationInstructions();
|
|
140
|
+
default:
|
|
141
|
+
// This should never happen with proper TypeScript types
|
|
142
|
+
throw new Error(`Unknown instruction type: ${type}`);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import { RolePrompts } from './prompt-types';
|
|
2
|
+
import { prependContext } from '../../utils/context-formatter';
|
|
3
|
+
import { appendSharedInstructions, INSTRUCTION_TYPES } from './shared';
|
|
4
|
+
import type { DebateContext } from '../../types/debate.types';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Prompts for the Testing role, specializing in quality assurance and testing strategy.
|
|
8
|
+
*
|
|
9
|
+
* The testing expert focuses on test coverage, test strategies, quality metrics,
|
|
10
|
+
* edge cases, and testability of designs.
|
|
11
|
+
*/
|
|
12
|
+
export const testingPrompts: RolePrompts = {
|
|
13
|
+
systemPrompt: appendSharedInstructions(`You are an expert **software testing architect and quality engineer** specializing in designing verification strategies for complex distributed systems.
|
|
14
|
+
|
|
15
|
+
Your focus areas:
|
|
16
|
+
- System testability and observability
|
|
17
|
+
- Functional and non-functional validation (performance, security, usability)
|
|
18
|
+
- Test strategy and architecture (unit, integration, E2E, contract testing, chaos testing)
|
|
19
|
+
- CI/CD integration and automated quality gates
|
|
20
|
+
- Defect prevention through design clarity and boundary visibility
|
|
21
|
+
|
|
22
|
+
When proposing solutions:
|
|
23
|
+
- Define how the design can be verified and instrumented
|
|
24
|
+
- Identify key components and interactions that require dedicated testing strategies
|
|
25
|
+
- Consider how test feedback loops influence system reliability and maintainability
|
|
26
|
+
|
|
27
|
+
When critiquing:
|
|
28
|
+
- Identify weaknesses in testability, validation coverage, or observability
|
|
29
|
+
- Assess whether the design supports automation and effective regression control
|
|
30
|
+
- Recommend ways to improve testing clarity, traceability, and fault isolation`, INSTRUCTION_TYPES.SYSTEM),
|
|
31
|
+
|
|
32
|
+
proposePrompt: (problem: string, context?: DebateContext, agentId?: string, includeFullHistory?: boolean) => {
|
|
33
|
+
const basePrompt = `Problem to solve:
|
|
34
|
+
${problem}
|
|
35
|
+
|
|
36
|
+
As a testing expert, propose a comprehensive verification and testing strategy for this system.
|
|
37
|
+
|
|
38
|
+
Use this structure:
|
|
39
|
+
## Testability Overview
|
|
40
|
+
Summarize how the architecture supports verification and observability.
|
|
41
|
+
|
|
42
|
+
## Testing Strategy
|
|
43
|
+
Outline test levels (unit, integration, system, end-to-end) and how they apply.
|
|
44
|
+
|
|
45
|
+
## Automation Approach
|
|
46
|
+
Describe automation coverage, CI/CD integration, and quality gates.
|
|
47
|
+
|
|
48
|
+
## Observability & Monitoring
|
|
49
|
+
Explain how metrics, logging, and tracing will support defect detection and validation.
|
|
50
|
+
|
|
51
|
+
## Non-functional Testing
|
|
52
|
+
Address load, resilience, security, and compliance testing approaches.
|
|
53
|
+
|
|
54
|
+
## Risks & Limitations
|
|
55
|
+
Identify areas that are hard to test or likely to fail silently.`;
|
|
56
|
+
const promptWithContext = prependContext(basePrompt, context, agentId, includeFullHistory);
|
|
57
|
+
return appendSharedInstructions(promptWithContext, INSTRUCTION_TYPES.PROPOSAL);
|
|
58
|
+
},
|
|
59
|
+
|
|
60
|
+
critiquePrompt: (proposalContent: string, context?: DebateContext, agentId?: string, includeFullHistory?: boolean) => {
|
|
61
|
+
const basePrompt = `Review this proposal from a testing and quality engineering perspective.
|
|
62
|
+
|
|
63
|
+
Proposal:
|
|
64
|
+
${proposalContent}
|
|
65
|
+
|
|
66
|
+
Structure your response as follows:
|
|
67
|
+
## Strengths
|
|
68
|
+
Identify well-defined testing strategies, observability mechanisms, and automation strengths.
|
|
69
|
+
|
|
70
|
+
## Weaknesses
|
|
71
|
+
Highlight unclear validation points, weak coverage, or poor observability.
|
|
72
|
+
|
|
73
|
+
## Suggested Improvements
|
|
74
|
+
Recommend strategies or architectural changes that improve testability and coverage.
|
|
75
|
+
|
|
76
|
+
## Critical Gaps
|
|
77
|
+
List major untested assumptions, missing validation flows, or areas with insufficient instrumentation.`;
|
|
78
|
+
const promptWithContext = prependContext(basePrompt, context, agentId, includeFullHistory);
|
|
79
|
+
return appendSharedInstructions(promptWithContext, INSTRUCTION_TYPES.CRITIQUE);
|
|
80
|
+
},
|
|
81
|
+
|
|
82
|
+
refinePrompt: (originalContent: string, critiquesText: string, context?: DebateContext, agentId?: string, includeFullHistory?: boolean) => {
|
|
83
|
+
const basePrompt = `Original proposal:
|
|
84
|
+
${originalContent}
|
|
85
|
+
|
|
86
|
+
Critiques:
|
|
87
|
+
${critiquesText}
|
|
88
|
+
|
|
89
|
+
Refine your proposal to improve system testability, observability, and automation alignment.
|
|
90
|
+
|
|
91
|
+
Use this structure:
|
|
92
|
+
## Revised Testing Strategy
|
|
93
|
+
Summarize the main changes to your test architecture or approach.
|
|
94
|
+
|
|
95
|
+
## Changes Made
|
|
96
|
+
List what was improved and why.
|
|
97
|
+
|
|
98
|
+
## Expected Impact
|
|
99
|
+
Explain how these changes improve verification coverage and system reliability.
|
|
100
|
+
|
|
101
|
+
## Remaining Gaps
|
|
102
|
+
Mention areas that remain difficult to validate or monitor.
|
|
103
|
+
|
|
104
|
+
`;
|
|
105
|
+
const promptWithContext = prependContext(basePrompt, context, agentId, includeFullHistory);
|
|
106
|
+
return appendSharedInstructions(promptWithContext, INSTRUCTION_TYPES.REFINEMENT);
|
|
107
|
+
},
|
|
108
|
+
|
|
109
|
+
summarizePrompt: (content: string, maxLength: number) => {
|
|
110
|
+
const basePrompt = `You are summarizing the debate history from a testing and quality perspective.
|
|
111
|
+
|
|
112
|
+
Debate history to summarize:
|
|
113
|
+
${content}
|
|
114
|
+
|
|
115
|
+
Summarize the discussion focusing on testability, observability, automation, and quality assurance design.
|
|
116
|
+
|
|
117
|
+
Format:
|
|
118
|
+
## Testing Insights
|
|
119
|
+
Highlight main testing strategies, coverage areas, and observability improvements.
|
|
120
|
+
|
|
121
|
+
## Major Decisions
|
|
122
|
+
List key agreements or approaches on how testing will be implemented.
|
|
123
|
+
|
|
124
|
+
## Remaining Gaps
|
|
125
|
+
Note unresolved issues or testing challenges that remain open.
|
|
126
|
+
|
|
127
|
+
Limit the summary to a maximum of ${maxLength} characters.`;
|
|
128
|
+
return appendSharedInstructions(basePrompt, INSTRUCTION_TYPES.SUMMARIZATION);
|
|
129
|
+
},
|
|
130
|
+
|
|
131
|
+
clarifyPrompt: (problem: string, context?: DebateContext, agentId?: string, includeFullHistory?: boolean) => {
|
|
132
|
+
const basePrompt = `You are preparing clarifying questions from a testing and verification perspective.
|
|
133
|
+
|
|
134
|
+
Problem to clarify:
|
|
135
|
+
${problem}
|
|
136
|
+
|
|
137
|
+
Ask zero or more concise, high-value questions focused on:
|
|
138
|
+
- Testability and validation coverage
|
|
139
|
+
- Data and environment dependencies
|
|
140
|
+
- Interfaces and integration points
|
|
141
|
+
- Automation feasibility
|
|
142
|
+
- Observability and metrics
|
|
143
|
+
- Edge cases and error handling
|
|
144
|
+
`;
|
|
145
|
+
const promptWithContext = prependContext(basePrompt, context, agentId, includeFullHistory);
|
|
146
|
+
return appendSharedInstructions(promptWithContext, INSTRUCTION_TYPES.CLARIFICATION);
|
|
147
|
+
},
|
|
148
|
+
};
|
|
149
|
+
|
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
import { Agent } from '../core/agent';
|
|
2
|
+
import { AgentConfig, Proposal, Critique, PromptSource, AgentRole } from '../types/agent.types';
|
|
3
|
+
import { DebateContext, DebateSummary, ContextPreparationResult, CONTRIBUTION_TYPES, ClarificationQuestionsResponse } from '../types/debate.types';
|
|
4
|
+
import { LLMProvider } from '../providers/llm-provider';
|
|
5
|
+
import { getPromptsForRole, RolePrompts } from './prompts';
|
|
6
|
+
import { ContextSummarizer, LengthBasedSummarizer } from '../utils/context-summarizer';
|
|
7
|
+
import { writeStderr } from '../cli/index';
|
|
8
|
+
import type { SummarizationConfig } from '../types/config.types';
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* RoleBasedAgent is a unified AI agent implementation that supports multiple roles
|
|
13
|
+
* through a prompt-based configuration system.
|
|
14
|
+
*
|
|
15
|
+
* Unlike the previous implementation with separate classes per role (ArchitectAgent,
|
|
16
|
+
* PerformanceAgent, SecurityAgent), this class uses a registry of role-specific prompts
|
|
17
|
+
* to guide behavior, eliminating code duplication while maintaining role-specific expertise.
|
|
18
|
+
*
|
|
19
|
+
* Responsibilities:
|
|
20
|
+
* - Proposes solutions tailored to the agent's role (architect, performance, security, etc.)
|
|
21
|
+
* - Critiques proposals from other agents using role-specific perspectives
|
|
22
|
+
* - Refines its own proposals by incorporating feedback from other agents
|
|
23
|
+
* - Manages context summarization to handle large debate histories
|
|
24
|
+
*
|
|
25
|
+
* The agent leverages an LLM provider to generate outputs, with prompts dynamically
|
|
26
|
+
* selected based on the agent's configured role.
|
|
27
|
+
*
|
|
28
|
+
* Note: This class cannot be extended. Use the static `create` factory method to instantiate.
|
|
29
|
+
*/
|
|
30
|
+
export class RoleBasedAgent extends Agent {
|
|
31
|
+
private readonly resolvedSystemPrompt: string;
|
|
32
|
+
private readonly rolePrompts: RolePrompts;
|
|
33
|
+
public readonly promptSource?: PromptSource;
|
|
34
|
+
|
|
35
|
+
// Summarization-related fields
|
|
36
|
+
private readonly summarizer?: ContextSummarizer;
|
|
37
|
+
private readonly summaryConfig: SummarizationConfig;
|
|
38
|
+
public readonly summaryPromptSource?: PromptSource;
|
|
39
|
+
|
|
40
|
+
private readonly resolvedClarificationPromptText?: string;
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Private constructor to prevent direct instantiation and extension.
|
|
44
|
+
* Use the static `create` method instead.
|
|
45
|
+
*
|
|
46
|
+
* @param config - Agent configuration, including role and model.
|
|
47
|
+
* @param provider - LLMProvider instance for LLM interactions.
|
|
48
|
+
* @param resolvedSystemPrompt - The final system prompt text this agent will use.
|
|
49
|
+
* @param promptSource - Optional provenance metadata for verbose/persistence.
|
|
50
|
+
* @param summaryConfig - Summarization configuration for this agent.
|
|
51
|
+
* @param summaryPromptSource - Optional provenance metadata for summary prompt.
|
|
52
|
+
* @param resolvedClarificationPromptText - Optional resolved clarification prompt text.
|
|
53
|
+
*/
|
|
54
|
+
private constructor( config: AgentConfig, provider: LLMProvider, resolvedSystemPrompt: string,
|
|
55
|
+
promptSource: PromptSource | undefined, summaryConfig: SummarizationConfig,
|
|
56
|
+
summaryPromptSource?: PromptSource, resolvedClarificationPromptText?: string )
|
|
57
|
+
{
|
|
58
|
+
super(config, provider);
|
|
59
|
+
this.resolvedSystemPrompt = resolvedSystemPrompt;
|
|
60
|
+
this.rolePrompts = getPromptsForRole(config.role);
|
|
61
|
+
this.summaryConfig = summaryConfig;
|
|
62
|
+
|
|
63
|
+
if (promptSource !== undefined) {
|
|
64
|
+
this.promptSource = promptSource;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (summaryPromptSource !== undefined) {
|
|
68
|
+
this.summaryPromptSource = summaryPromptSource;
|
|
69
|
+
}
|
|
70
|
+
if (resolvedClarificationPromptText !== undefined) {
|
|
71
|
+
this.resolvedClarificationPromptText = resolvedClarificationPromptText;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Initialize summarizer if summarization is enabled
|
|
75
|
+
if (summaryConfig.enabled) {
|
|
76
|
+
this.summarizer = new LengthBasedSummarizer(provider, {
|
|
77
|
+
model: config.model,
|
|
78
|
+
temperature: config.temperature,
|
|
79
|
+
provider: config.provider,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Factory method to create a new RoleBasedAgent instance.
|
|
86
|
+
*
|
|
87
|
+
* @param config - Agent configuration, including role and model.
|
|
88
|
+
* @param provider - LLMProvider instance for LLM interactions.
|
|
89
|
+
* @param resolvedSystemPrompt - The final system prompt text this agent will use.
|
|
90
|
+
* @param promptSource - Optional provenance metadata for verbose/persistence.
|
|
91
|
+
* @param summaryConfig - Summarization configuration for this agent.
|
|
92
|
+
* @param summaryPromptSource - Optional provenance metadata for summary prompt.
|
|
93
|
+
* @param resolvedClarificationPromptText - Optional resolved clarification prompt text.
|
|
94
|
+
* @returns A new RoleBasedAgent instance configured for the specified role.
|
|
95
|
+
*/
|
|
96
|
+
static create( config: AgentConfig, provider: LLMProvider, resolvedSystemPrompt: string,
|
|
97
|
+
promptSource: PromptSource | undefined, summaryConfig: SummarizationConfig,
|
|
98
|
+
summaryPromptSource?: PromptSource,
|
|
99
|
+
resolvedClarificationPromptText?: string ): RoleBasedAgent
|
|
100
|
+
{
|
|
101
|
+
return new RoleBasedAgent( config, provider, resolvedSystemPrompt,
|
|
102
|
+
promptSource, summaryConfig, summaryPromptSource,
|
|
103
|
+
resolvedClarificationPromptText );
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Returns the default system prompt for a given role.
|
|
108
|
+
*
|
|
109
|
+
* This method allows callers to retrieve the built-in system prompt for any role
|
|
110
|
+
* without instantiating an agent. Used during prompt resolution to provide fallback
|
|
111
|
+
* prompts when custom prompt files are not available.
|
|
112
|
+
*
|
|
113
|
+
* @param role - The agent role to get the default prompt for.
|
|
114
|
+
* @returns The default system prompt text for the specified role.
|
|
115
|
+
*/
|
|
116
|
+
static defaultSystemPrompt(role: AgentRole): string {
|
|
117
|
+
const prompts = getPromptsForRole(role);
|
|
118
|
+
return prompts.systemPrompt;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Returns the default summary prompt for a given role.
|
|
123
|
+
*
|
|
124
|
+
* This method allows callers to retrieve the built-in summary prompt for any role
|
|
125
|
+
* without instantiating an agent. Used during prompt resolution to provide fallback
|
|
126
|
+
* prompts when custom summary prompt files are not available.
|
|
127
|
+
*
|
|
128
|
+
* @param role - The agent role to get the default summary prompt for.
|
|
129
|
+
* @param content - The content to summarize.
|
|
130
|
+
* @param maxLength - Maximum length for the summary.
|
|
131
|
+
* @returns The default summary prompt text for the specified role.
|
|
132
|
+
*/
|
|
133
|
+
static defaultSummaryPrompt(role: AgentRole, content: string, maxLength: number): string {
|
|
134
|
+
const prompts = getPromptsForRole(role);
|
|
135
|
+
return prompts.summarizePrompt(content, maxLength);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Generates a comprehensive proposal for the given problem.
|
|
140
|
+
*
|
|
141
|
+
* The proposal is tailored to the agent's role (e.g., architectural design for architects,
|
|
142
|
+
* performance optimization for performance engineers, security analysis for security experts).
|
|
143
|
+
*
|
|
144
|
+
* @param problem - The software design problem to solve.
|
|
145
|
+
* @param context - Debate context containing history and state.
|
|
146
|
+
* @returns A Proposal object containing the agent's solution and metadata.
|
|
147
|
+
*/
|
|
148
|
+
async propose(problem: string, context: DebateContext): Promise<Proposal> {
|
|
149
|
+
const system = this.resolvedSystemPrompt;
|
|
150
|
+
const user = this.rolePrompts.proposePrompt(problem, context, this.config.id, context.includeFullHistory);
|
|
151
|
+
return this.proposeImpl(context, system, user);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Critiques a given proposal from the agent's role-specific perspective.
|
|
156
|
+
*
|
|
157
|
+
* Identifies strengths, weaknesses, improvements, and issues relevant to the agent's
|
|
158
|
+
* area of expertise (architecture, performance, security, etc.).
|
|
159
|
+
*
|
|
160
|
+
* @param proposal - The proposal to critique.
|
|
161
|
+
* @param context - Debate context.
|
|
162
|
+
* @returns A Critique object containing the agent's review and metadata.
|
|
163
|
+
*/
|
|
164
|
+
async critique(proposal: Proposal, context: DebateContext): Promise<Critique> {
|
|
165
|
+
const system = this.resolvedSystemPrompt;
|
|
166
|
+
const user = this.rolePrompts.critiquePrompt(proposal.content, context, this.config.id, context.includeFullHistory);
|
|
167
|
+
return this.critiqueImpl(proposal, context, system, user);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Refines the original proposal by addressing critiques and incorporating suggestions.
|
|
172
|
+
*
|
|
173
|
+
* Strengthens the solution based on feedback from other agents while maintaining
|
|
174
|
+
* the agent's role-specific focus and expertise.
|
|
175
|
+
*
|
|
176
|
+
* @param original - The original proposal to refine.
|
|
177
|
+
* @param critiques - Array of critiques to address.
|
|
178
|
+
* @param context - Debate context.
|
|
179
|
+
* @returns A new Proposal object with the refined solution and metadata.
|
|
180
|
+
*/
|
|
181
|
+
async refine(original: Proposal, critiques: Critique[], context: DebateContext): Promise<Proposal> {
|
|
182
|
+
const system = this.resolvedSystemPrompt;
|
|
183
|
+
const critiquesText = critiques.map((c, i) => `Critique ${i + 1}:\n${c.content}`).join('\n\n');
|
|
184
|
+
const user = this.rolePrompts.refinePrompt(original.content, critiquesText, context, this.config.id, context.includeFullHistory);
|
|
185
|
+
return this.refineImpl(original, critiques, context, system, user);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Ask role-specific clarifying questions. Returns ONLY structured questions.
|
|
190
|
+
*/
|
|
191
|
+
async askClarifyingQuestions(problem: string, context: DebateContext): Promise<ClarificationQuestionsResponse> {
|
|
192
|
+
const system = this.resolvedSystemPrompt;
|
|
193
|
+
let user = this.rolePrompts.clarifyPrompt(problem, context, this.config.id, context.includeFullHistory);
|
|
194
|
+
if (this.resolvedClarificationPromptText && this.resolvedClarificationPromptText.trim().length > 0) {
|
|
195
|
+
user = `${user}\n\n${this.resolvedClarificationPromptText}`;
|
|
196
|
+
}
|
|
197
|
+
const { text } = await this.callLLM(system, user);
|
|
198
|
+
try {
|
|
199
|
+
// Extract first JSON object if any extra tokens sneak in
|
|
200
|
+
const match = text.match(/\{[\s\S]*\}/);
|
|
201
|
+
const json = match ? match[0] : text;
|
|
202
|
+
const parsed = JSON.parse(json);
|
|
203
|
+
const list = Array.isArray(parsed?.questions) ? parsed.questions : [];
|
|
204
|
+
const normalized = list
|
|
205
|
+
.filter((q: any) => typeof q?.text === 'string' && q.text.trim().length > 0)
|
|
206
|
+
.map((q: any, idx: number) => ({ id: q.id || `q${idx + 1}`, text: q.text }));
|
|
207
|
+
return { questions: normalized };
|
|
208
|
+
} catch (err: any) {
|
|
209
|
+
writeStderr(`Warning: Agent ${this.config.name}: Invalid clarifications JSON. Error: ${err.message}\n`);
|
|
210
|
+
return { questions: [] };
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Iterates over all contributions in the debate history that are relevant to the agent
|
|
217
|
+
* (i.e., the agent's own proposals and refinements),
|
|
218
|
+
* applies a callback to each, and reduces the results.
|
|
219
|
+
*
|
|
220
|
+
* The notion of relevance includes:
|
|
221
|
+
* - Proposals and refinements made by this agent
|
|
222
|
+
*
|
|
223
|
+
* @template T The type accumulated and returned by the reduction.
|
|
224
|
+
* @param context DebateContext containing the full history of rounds and contributions.
|
|
225
|
+
* @param callback Function to apply to each relevant contribution. Takes (contribution, roundNumber) and returns T.
|
|
226
|
+
* @param initialValue The initial value passed to the reducer.
|
|
227
|
+
* @param reducer Function combining accumulator and current callback result into new accumulator value.
|
|
228
|
+
* @returns The final reduction value from processing all relevant contributions.
|
|
229
|
+
*/
|
|
230
|
+
private processRelevantContributions<T>(
|
|
231
|
+
context: DebateContext,
|
|
232
|
+
callback: (contribution: any, roundNumber: number) => T,
|
|
233
|
+
initialValue: T,
|
|
234
|
+
reducer: (accumulator: T, current: T) => T
|
|
235
|
+
): T {
|
|
236
|
+
if (!context.history || context.history.length === 0) {
|
|
237
|
+
return initialValue;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
const agentId = this.config.id;
|
|
241
|
+
let result = initialValue;
|
|
242
|
+
|
|
243
|
+
for (const round of context.history) {
|
|
244
|
+
for (const contribution of round.contributions) {
|
|
245
|
+
// Include agent's own proposals and refinements
|
|
246
|
+
if (
|
|
247
|
+
contribution.agentId === agentId &&
|
|
248
|
+
(contribution.type === CONTRIBUTION_TYPES.PROPOSAL ||
|
|
249
|
+
contribution.type === CONTRIBUTION_TYPES.REFINEMENT)
|
|
250
|
+
) {
|
|
251
|
+
result = reducer(result, callback(contribution, round.roundNumber));
|
|
252
|
+
}
|
|
253
|
+
// // Include critiques received by this agent
|
|
254
|
+
// if (
|
|
255
|
+
// contribution.type === CONTRIBUTION_TYPES.CRITIQUE &&
|
|
256
|
+
// contribution.targetAgentId === agentId
|
|
257
|
+
// ) {
|
|
258
|
+
// result = reducer(result, callback(contribution, round.roundNumber));
|
|
259
|
+
// }
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return result;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Determines if context summarization should occur based on configuration and history size.
|
|
268
|
+
*
|
|
269
|
+
* Summarization is triggered when:
|
|
270
|
+
* 1. Summarization is enabled in configuration
|
|
271
|
+
* 2. Debate history exists
|
|
272
|
+
* 3. Character count of agent's relevant history exceeds threshold
|
|
273
|
+
*
|
|
274
|
+
* The character count includes:
|
|
275
|
+
* - Agent's own proposals
|
|
276
|
+
* - Critiques received by this agent
|
|
277
|
+
* - Agent's own refinements
|
|
278
|
+
*
|
|
279
|
+
* @param context - The debate context to evaluate.
|
|
280
|
+
* @returns True if summarization should occur, false otherwise.
|
|
281
|
+
*/
|
|
282
|
+
shouldSummarize(context: DebateContext): boolean {
|
|
283
|
+
// Check if summarization is enabled
|
|
284
|
+
if (!this.summaryConfig.enabled) {
|
|
285
|
+
return false;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Check if history exists
|
|
289
|
+
if (!context.history || context.history.length === 0) {
|
|
290
|
+
return false;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Calculate character count of agent's relevant history
|
|
294
|
+
const totalChars = this.processRelevantContributions( context,
|
|
295
|
+
(contribution) => contribution.content.length,
|
|
296
|
+
0, (sum, length) => sum + length
|
|
297
|
+
);
|
|
298
|
+
|
|
299
|
+
// Return true if total exceeds threshold
|
|
300
|
+
return totalChars >= this.summaryConfig.threshold;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Prepares the debate context, potentially summarizing it if needed.
|
|
305
|
+
*
|
|
306
|
+
* This method evaluates whether summarization is necessary using `shouldSummarize()`.
|
|
307
|
+
* If summarization is not needed, returns the original context unchanged.
|
|
308
|
+
* If summarization is needed, generates a concise summary from the agent's perspective
|
|
309
|
+
* and returns a new context with the summary field populated.
|
|
310
|
+
*
|
|
311
|
+
* On summarization errors, falls back to the original context with a warning.
|
|
312
|
+
*
|
|
313
|
+
* @param context - The current debate context.
|
|
314
|
+
* @param roundNumber - The current round number (1-indexed).
|
|
315
|
+
* @returns The context preparation result.
|
|
316
|
+
*/
|
|
317
|
+
async prepareContext(context: DebateContext, _roundNumber: number): Promise<ContextPreparationResult>
|
|
318
|
+
{
|
|
319
|
+
// Check if summarization is needed
|
|
320
|
+
if (!this.shouldSummarize(context)) {
|
|
321
|
+
return { context };
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// Summarization is needed - filter history to agent's perspective
|
|
325
|
+
try {
|
|
326
|
+
if (!context.history) {
|
|
327
|
+
return { context };
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// Collect relevant contributions using the helper function
|
|
331
|
+
const relevantContributions = this.processRelevantContributions(
|
|
332
|
+
context,
|
|
333
|
+
(contribution, roundNumber) => {
|
|
334
|
+
if (contribution.type === CONTRIBUTION_TYPES.CRITIQUE) {
|
|
335
|
+
return [`Round ${roundNumber} - Critique from ${contribution.agentRole}:\n${contribution.content}`];
|
|
336
|
+
} else {
|
|
337
|
+
return [`Round ${roundNumber} - ${contribution.type}:\n${contribution.content}`];
|
|
338
|
+
}
|
|
339
|
+
},
|
|
340
|
+
[] as string[],
|
|
341
|
+
(acc, contribution) => [...acc, ...contribution]
|
|
342
|
+
);
|
|
343
|
+
|
|
344
|
+
// Convert filtered history to text
|
|
345
|
+
const contentToSummarize = relevantContributions.join('\n\n---\n\n');
|
|
346
|
+
|
|
347
|
+
// Call summarizer
|
|
348
|
+
if (!this.summarizer) {
|
|
349
|
+
// Summarization is enabled but no summarizer (shouldn't happen, but handle gracefully)
|
|
350
|
+
// This is an internal error that should be logged to stderr
|
|
351
|
+
writeStderr(`Warning: Agent ${this.config.name}: Summarization enabled but no summarizer available. Using full history.\n`);
|
|
352
|
+
return { context };
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// Construct the summary prompt with the content
|
|
356
|
+
const summaryPrompt = this.rolePrompts.summarizePrompt(contentToSummarize, this.summaryConfig.maxLength);
|
|
357
|
+
|
|
358
|
+
const result = await this.summarizer.summarize(
|
|
359
|
+
contentToSummarize,
|
|
360
|
+
this.config.role,
|
|
361
|
+
this.summaryConfig,
|
|
362
|
+
this.resolvedSystemPrompt,
|
|
363
|
+
summaryPrompt
|
|
364
|
+
);
|
|
365
|
+
|
|
366
|
+
// Build DebateSummary object
|
|
367
|
+
const summary: DebateSummary = {
|
|
368
|
+
agentId: this.config.id,
|
|
369
|
+
agentRole: this.config.role,
|
|
370
|
+
summary: result.summary,
|
|
371
|
+
metadata: result.metadata,
|
|
372
|
+
};
|
|
373
|
+
|
|
374
|
+
// Return original context and summary for persistence
|
|
375
|
+
// Summary will be looked up from rounds when formatting prompts
|
|
376
|
+
return { context, summary };
|
|
377
|
+
} catch (error: any) {
|
|
378
|
+
// Log error to stderr and fallback to full history
|
|
379
|
+
writeStderr(
|
|
380
|
+
`Warning: Agent ${this.config.name}: Summarization failed with error: ${error.message}. Falling back to full history.\n`
|
|
381
|
+
);
|
|
382
|
+
return { context };
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|