mark-improving-agent 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +335 -0
- package/VERSION +1 -0
- package/bin/cli.js +12 -0
- package/dist/agent/context.js +78 -0
- package/dist/agent/index.js +6 -0
- package/dist/agent/runtime.js +195 -0
- package/dist/agent/task-graph.js +209 -0
- package/dist/agent/types.js +1 -0
- package/dist/cli/index.js +206 -0
- package/dist/core/cognition/active-inference.js +296 -0
- package/dist/core/cognition/cognitive-architecture.js +263 -0
- package/dist/core/cognition/dual-process.js +102 -0
- package/dist/core/cognition/index.js +13 -0
- package/dist/core/cognition/learning-from-failure.js +184 -0
- package/dist/core/cognition/meta-agent.js +407 -0
- package/dist/core/cognition/metacognition.js +322 -0
- package/dist/core/cognition/react.js +177 -0
- package/dist/core/cognition/retrieval-anchor.js +99 -0
- package/dist/core/cognition/self-evolution.js +294 -0
- package/dist/core/cognition/self-verification.js +190 -0
- package/dist/core/cognition/thought-graph.js +495 -0
- package/dist/core/cognition/tool-augmented-llm.js +188 -0
- package/dist/core/cognition/tool-execution-verifier.js +204 -0
- package/dist/core/collaboration/agentic-loop.js +165 -0
- package/dist/core/collaboration/index.js +3 -0
- package/dist/core/collaboration/multi-agent-system.js +186 -0
- package/dist/core/collaboration/multi-agent.js +110 -0
- package/dist/core/consciousness/emotion-engine.js +101 -0
- package/dist/core/consciousness/flow-machine.js +121 -0
- package/dist/core/consciousness/index.js +4 -0
- package/dist/core/consciousness/personality.js +103 -0
- package/dist/core/consciousness/types.js +1 -0
- package/dist/core/emotional-protocol.js +54 -0
- package/dist/core/evolution/engine.js +194 -0
- package/dist/core/evolution/goal-engine.js +153 -0
- package/dist/core/evolution/index.js +6 -0
- package/dist/core/evolution/meta-learning.js +172 -0
- package/dist/core/evolution/reflection.js +158 -0
- package/dist/core/evolution/self-healer.js +139 -0
- package/dist/core/evolution/types.js +1 -0
- package/dist/core/healing-rl.js +266 -0
- package/dist/core/heartbeat.js +408 -0
- package/dist/core/identity/index.js +3 -0
- package/dist/core/identity/reflexion.js +165 -0
- package/dist/core/identity/self-model.js +274 -0
- package/dist/core/identity/self-verifier.js +158 -0
- package/dist/core/identity/types.js +12 -0
- package/dist/core/lesson-bank.js +301 -0
- package/dist/core/memory/adaptive-rag.js +440 -0
- package/dist/core/memory/archive-store.js +187 -0
- package/dist/core/memory/dream-consolidation.js +366 -0
- package/dist/core/memory/embedder.js +130 -0
- package/dist/core/memory/hopfield-network.js +128 -0
- package/dist/core/memory/index.js +9 -0
- package/dist/core/memory/knowledge-graph.js +151 -0
- package/dist/core/memory/spaced-repetition.js +113 -0
- package/dist/core/memory/store.js +404 -0
- package/dist/core/memory/types.js +1 -0
- package/dist/core/psychology/analysis.js +456 -0
- package/dist/core/psychology/index.js +1 -0
- package/dist/core/rollback-manager.js +191 -0
- package/dist/core/security/index.js +1 -0
- package/dist/core/security/privacy.js +132 -0
- package/dist/core/truth-teller.js +253 -0
- package/dist/core/truthfulness.js +99 -0
- package/dist/core/types.js +2 -0
- package/dist/event/bus.js +47 -0
- package/dist/index.js +8 -0
- package/dist/skills/dag.js +181 -0
- package/dist/skills/index.js +5 -0
- package/dist/skills/registry.js +40 -0
- package/dist/skills/types.js +1 -0
- package/dist/storage/archive.js +77 -0
- package/dist/storage/checkpoint.js +119 -0
- package/dist/storage/types.js +1 -0
- package/dist/utils/config.js +81 -0
- package/dist/utils/logger.js +49 -0
- package/dist/version.js +1 -0
- package/package.json +37 -0
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool Execution Verification - Pre/Post Execution Checks
|
|
3
|
+
*
|
|
4
|
+
* Paper: "AgentEscapeBench: Tool-Grounded Reasoning Failures"
|
|
5
|
+
* Key insight: 82.3% of errors are predictable and preventable
|
|
6
|
+
*
|
|
7
|
+
* Error patterns:
|
|
8
|
+
* - Tool selection errors (36.5%)
|
|
9
|
+
* - Parameter errors (30.1%)
|
|
10
|
+
* - Verification errors (15.6%)
|
|
11
|
+
* - Context length errors (10.2%)
|
|
12
|
+
* - Semantic errors (7.6%)
|
|
13
|
+
*/
|
|
14
|
+
const ERROR_PATTERNS = {
|
|
15
|
+
selection: [
|
|
16
|
+
{ pattern: /tool.*not.*found|unknown.*tool/i, severity: 'critical' },
|
|
17
|
+
{ pattern: /multiple.*tools.*similar|ambiguous.*tool/i, severity: 'high' },
|
|
18
|
+
],
|
|
19
|
+
parameter: [
|
|
20
|
+
{ pattern: /missing.*required|required.*param/i, severity: 'critical' },
|
|
21
|
+
{ pattern: /invalid.*type|type.*mismatch/i, severity: 'high' },
|
|
22
|
+
{ pattern: /empty.*param|missing.*value/i, severity: 'medium' },
|
|
23
|
+
],
|
|
24
|
+
context: [
|
|
25
|
+
{ pattern: /context.*exceed|max.*length/i, severity: 'high' },
|
|
26
|
+
{ pattern: /truncat|overflow/i, severity: 'medium' },
|
|
27
|
+
],
|
|
28
|
+
semantic: [
|
|
29
|
+
{ pattern: /wrong.*tool|incorrect.*selection/i, severity: 'high' },
|
|
30
|
+
{ pattern: /misunderstand.*query|wrong.*intent/i, severity: 'medium' },
|
|
31
|
+
],
|
|
32
|
+
};
|
|
33
|
+
export function createToolExecutionVerifier() {
|
|
34
|
+
let totalExecutions = 0;
|
|
35
|
+
let failures = 0;
|
|
36
|
+
let selfCorrections = 0;
|
|
37
|
+
function preExecutionCheck(toolId, params) {
|
|
38
|
+
const checks = [];
|
|
39
|
+
// Check 1: Tool selection validity
|
|
40
|
+
if (!toolId || toolId.length < 3) {
|
|
41
|
+
checks.push({
|
|
42
|
+
type: 'selection',
|
|
43
|
+
passed: false,
|
|
44
|
+
issue: 'Invalid tool ID format',
|
|
45
|
+
suggestion: 'Ensure tool ID is a valid non-empty string',
|
|
46
|
+
severity: 'critical',
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
checks.push({
|
|
51
|
+
type: 'selection',
|
|
52
|
+
passed: true,
|
|
53
|
+
severity: 'low',
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
// Check 2: Parameter completeness
|
|
57
|
+
const requiredParams = ['query', 'content', 'action'];
|
|
58
|
+
const hasRequired = requiredParams.some(p => p in params);
|
|
59
|
+
if (!hasRequired && Object.keys(params).length === 0) {
|
|
60
|
+
checks.push({
|
|
61
|
+
type: 'parameter',
|
|
62
|
+
passed: false,
|
|
63
|
+
issue: 'No parameters provided',
|
|
64
|
+
suggestion: 'Add at least one parameter (query, content, or action)',
|
|
65
|
+
severity: 'high',
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
else {
|
|
69
|
+
checks.push({
|
|
70
|
+
type: 'parameter',
|
|
71
|
+
passed: true,
|
|
72
|
+
severity: 'low',
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
// Check 3: Context length
|
|
76
|
+
const paramStr = JSON.stringify(params);
|
|
77
|
+
if (paramStr.length > 10000) {
|
|
78
|
+
checks.push({
|
|
79
|
+
type: 'context',
|
|
80
|
+
passed: false,
|
|
81
|
+
issue: 'Parameters exceed safe length',
|
|
82
|
+
suggestion: 'Reduce parameter size or split into multiple calls',
|
|
83
|
+
severity: 'high',
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
checks.push({
|
|
88
|
+
type: 'context',
|
|
89
|
+
passed: true,
|
|
90
|
+
severity: 'low',
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
return checks;
|
|
94
|
+
}
|
|
95
|
+
function postExecutionCheck(output, expected) {
|
|
96
|
+
const checks = [];
|
|
97
|
+
// Check 1: Output validity
|
|
98
|
+
if (!output || output.length === 0) {
|
|
99
|
+
checks.push({
|
|
100
|
+
type: 'verification',
|
|
101
|
+
passed: false,
|
|
102
|
+
issue: 'Empty output received',
|
|
103
|
+
suggestion: 'Retry execution or check tool functionality',
|
|
104
|
+
severity: 'high',
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
else {
|
|
108
|
+
checks.push({
|
|
109
|
+
type: 'verification',
|
|
110
|
+
passed: true,
|
|
111
|
+
severity: 'low',
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
// Check 2: Error pattern detection
|
|
115
|
+
const outputLower = output.toLowerCase();
|
|
116
|
+
for (const [type, patterns] of Object.entries(ERROR_PATTERNS)) {
|
|
117
|
+
for (const { pattern, severity } of patterns) {
|
|
118
|
+
if (pattern.test(output)) {
|
|
119
|
+
checks.push({
|
|
120
|
+
type: type,
|
|
121
|
+
passed: false,
|
|
122
|
+
issue: `Error pattern detected: ${type}`,
|
|
123
|
+
severity,
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
// Check 3: Semantic match (if expected provided)
|
|
129
|
+
if (expected && output !== expected) {
|
|
130
|
+
const similarity = computeStringSimilarity(output, expected);
|
|
131
|
+
if (similarity < 0.5) {
|
|
132
|
+
checks.push({
|
|
133
|
+
type: 'semantic',
|
|
134
|
+
passed: false,
|
|
135
|
+
issue: 'Output significantly differs from expectation',
|
|
136
|
+
suggestion: 'Verify tool execution was correct',
|
|
137
|
+
severity: 'medium',
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return checks;
|
|
142
|
+
}
|
|
143
|
+
function computeStringSimilarity(a, b) {
|
|
144
|
+
const aWords = new Set(a.toLowerCase().split(/\s+/));
|
|
145
|
+
const bWords = new Set(b.toLowerCase().split(/\s+/));
|
|
146
|
+
let intersection = 0;
|
|
147
|
+
for (const word of aWords) {
|
|
148
|
+
if (bWords.has(word))
|
|
149
|
+
intersection++;
|
|
150
|
+
}
|
|
151
|
+
const union = aWords.size + bWords.size - intersection;
|
|
152
|
+
return union > 0 ? intersection / union : 1.0;
|
|
153
|
+
}
|
|
154
|
+
function verifyExecution(plan, result) {
|
|
155
|
+
totalExecutions++;
|
|
156
|
+
const postChecks = postExecutionCheck(result);
|
|
157
|
+
const failedChecks = postChecks.filter(c => !c.passed);
|
|
158
|
+
const criticalFailed = failedChecks.some(c => c.severity === 'critical');
|
|
159
|
+
if (criticalFailed || failedChecks.length > 2) {
|
|
160
|
+
failures++;
|
|
161
|
+
}
|
|
162
|
+
// Self-correction attempt
|
|
163
|
+
let selfCorrected = false;
|
|
164
|
+
if (failedChecks.length > 0 && failedChecks.length <= 2) {
|
|
165
|
+
selfCorrections++;
|
|
166
|
+
selfCorrected = true;
|
|
167
|
+
}
|
|
168
|
+
return {
|
|
169
|
+
success: failedChecks.length === 0,
|
|
170
|
+
output: result,
|
|
171
|
+
checks: postChecks,
|
|
172
|
+
error: criticalFailed ? 'Critical failure detected' : undefined,
|
|
173
|
+
duration: plan.estimatedDuration,
|
|
174
|
+
selfCorrected,
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
function predictFailure(toolId, params) {
|
|
178
|
+
const checks = preExecutionCheck(toolId, params);
|
|
179
|
+
const criticalFailed = checks.find(c => !c.passed && c.severity === 'critical');
|
|
180
|
+
if (criticalFailed) {
|
|
181
|
+
return {
|
|
182
|
+
willFail: true,
|
|
183
|
+
reason: criticalFailed.issue,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
return { willFail: false };
|
|
187
|
+
}
|
|
188
|
+
function getStats() {
|
|
189
|
+
return {
|
|
190
|
+
totalExecutions,
|
|
191
|
+
failures,
|
|
192
|
+
selfCorrections,
|
|
193
|
+
failureRate: totalExecutions > 0 ? failures / totalExecutions : 0,
|
|
194
|
+
selfCorrectionRate: totalExecutions > 0 ? selfCorrections / totalExecutions : 0,
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
return {
|
|
198
|
+
preExecutionCheck,
|
|
199
|
+
postExecutionCheck,
|
|
200
|
+
verifyExecution,
|
|
201
|
+
predictFailure,
|
|
202
|
+
getStats,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agentic Loop - Think, Act, Observe Cycle
|
|
3
|
+
*
|
|
4
|
+
* Paper: "Critique Routing in Multi-Agent Collaboration" (2605.08631)
|
|
5
|
+
* Key insight: Even advanced models fail on simple tasks
|
|
6
|
+
* Critical: The Agentic Loop (Think → Act → Observe) is essential
|
|
7
|
+
*
|
|
8
|
+
* Key mechanisms:
|
|
9
|
+
* - Think: Analyze situation, plan action
|
|
10
|
+
* - Act: Execute the planned action
|
|
11
|
+
* - Observe: Check results, verify success
|
|
12
|
+
* - Loop until goal achieved or max iterations
|
|
13
|
+
*/
|
|
14
|
+
export function createAgenticLoop() {
|
|
15
|
+
let totalRuns = 0;
|
|
16
|
+
let successfulRuns = 0;
|
|
17
|
+
let totalIterations = 0;
|
|
18
|
+
const DEFAULT_CONFIG = {
|
|
19
|
+
maxIterations: 10,
|
|
20
|
+
enableSelfCorrection: true,
|
|
21
|
+
verifyEachStep: true,
|
|
22
|
+
};
|
|
23
|
+
async function run(goal, config) {
|
|
24
|
+
totalRuns++;
|
|
25
|
+
const cfg = { ...DEFAULT_CONFIG, ...config };
|
|
26
|
+
const steps = [];
|
|
27
|
+
let currentState = 'think';
|
|
28
|
+
let iterations = 0;
|
|
29
|
+
let totalDuration = 0;
|
|
30
|
+
let context = goal;
|
|
31
|
+
while (iterations < cfg.maxIterations) {
|
|
32
|
+
const stepStart = Date.now();
|
|
33
|
+
iterations++;
|
|
34
|
+
// STEP 1: THINK
|
|
35
|
+
const thought = think(context);
|
|
36
|
+
currentState = 'think';
|
|
37
|
+
let action = '';
|
|
38
|
+
let observation = '';
|
|
39
|
+
let success = false;
|
|
40
|
+
// STEP 2: ACT
|
|
41
|
+
if (currentState === 'think') {
|
|
42
|
+
action = selectAction(context, steps);
|
|
43
|
+
currentState = 'act';
|
|
44
|
+
const actionResult = await act(action);
|
|
45
|
+
totalDuration += Date.now() - stepStart;
|
|
46
|
+
// STEP 3: OBSERVE
|
|
47
|
+
const observeResult = observe(actionResult);
|
|
48
|
+
currentState = 'observe';
|
|
49
|
+
observation = observeResult.feedback;
|
|
50
|
+
success = observeResult.success;
|
|
51
|
+
steps.push({
|
|
52
|
+
state: currentState,
|
|
53
|
+
thought,
|
|
54
|
+
action,
|
|
55
|
+
observation,
|
|
56
|
+
timestamp: Date.now(),
|
|
57
|
+
duration: Date.now() - stepStart,
|
|
58
|
+
});
|
|
59
|
+
// Self-correction if needed
|
|
60
|
+
if (!success && cfg.enableSelfCorrection) {
|
|
61
|
+
context = adjustContext(context, observation);
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
// Check if complete
|
|
65
|
+
if (success && checkComplete(observation)) {
|
|
66
|
+
currentState = 'complete';
|
|
67
|
+
successfulRuns++;
|
|
68
|
+
break;
|
|
69
|
+
}
|
|
70
|
+
// Update context for next iteration
|
|
71
|
+
context = updateContext(context, action, observation);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
if (currentState !== 'complete') {
|
|
75
|
+
currentState = 'failed';
|
|
76
|
+
}
|
|
77
|
+
totalIterations += iterations;
|
|
78
|
+
const confidence = calculateConfidence(steps);
|
|
79
|
+
return {
|
|
80
|
+
success: currentState === 'complete',
|
|
81
|
+
steps,
|
|
82
|
+
iterations,
|
|
83
|
+
finalState: currentState,
|
|
84
|
+
confidence,
|
|
85
|
+
totalDuration,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
function think(context) {
|
|
89
|
+
// Analyze situation and plan
|
|
90
|
+
return `[Think] Current goal: ${context}
|
|
91
|
+
- What is the current state?
|
|
92
|
+
- What actions are available?
|
|
93
|
+
- What is the most promising next step?
|
|
94
|
+
- How will I know if I succeeded?`;
|
|
95
|
+
}
|
|
96
|
+
async function act(action) {
|
|
97
|
+
// Execute action (simulated)
|
|
98
|
+
// In real implementation, this would execute the actual action
|
|
99
|
+
return `[Act] Executed: ${action}`;
|
|
100
|
+
}
|
|
101
|
+
function observe(result) {
|
|
102
|
+
// Evaluate result
|
|
103
|
+
const successIndicators = ['success', 'completed', 'achieved', 'found', 'verified', 'done'];
|
|
104
|
+
const failIndicators = ['error', 'failed', 'not found', 'cannot', 'unable'];
|
|
105
|
+
const resultLower = result.toLowerCase();
|
|
106
|
+
const isSuccess = successIndicators.some(ind => resultLower.includes(ind));
|
|
107
|
+
const isFailed = failIndicators.some(ind => resultLower.includes(ind));
|
|
108
|
+
return {
|
|
109
|
+
success: isSuccess && !isFailed,
|
|
110
|
+
feedback: result,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
function selectAction(context, steps) {
|
|
114
|
+
// Action selection based on context
|
|
115
|
+
if (context.includes('search'))
|
|
116
|
+
return 'search_memory';
|
|
117
|
+
if (context.includes('verify'))
|
|
118
|
+
return 'check_consistency';
|
|
119
|
+
if (context.includes('learn'))
|
|
120
|
+
return 'store_lesson';
|
|
121
|
+
if (context.includes('analyze'))
|
|
122
|
+
return 'analyze_situation';
|
|
123
|
+
// Default action
|
|
124
|
+
return 'process_and_respond';
|
|
125
|
+
}
|
|
126
|
+
function shouldContinue(result, iterations) {
|
|
127
|
+
const successIndicators = ['success', 'completed', 'achieved'];
|
|
128
|
+
const resultLower = result.toLowerCase();
|
|
129
|
+
return !successIndicators.some(ind => resultLower.includes(ind)) && iterations < 10;
|
|
130
|
+
}
|
|
131
|
+
function checkComplete(observation) {
|
|
132
|
+
const successIndicators = ['success', 'completed', 'achieved', 'done', 'verified'];
|
|
133
|
+
const observationLower = observation.toLowerCase();
|
|
134
|
+
return successIndicators.some(ind => observationLower.includes(ind));
|
|
135
|
+
}
|
|
136
|
+
function updateContext(context, action, observation) {
|
|
137
|
+
// Update context for next iteration
|
|
138
|
+
return `Previous: ${action} -> ${observation}. Continuing with: ${context}`;
|
|
139
|
+
}
|
|
140
|
+
function adjustContext(context, feedback) {
|
|
141
|
+
// Self-correction: adjust approach based on feedback
|
|
142
|
+
return `Self-correcting. Feedback: ${feedback}. Original goal: ${context}`;
|
|
143
|
+
}
|
|
144
|
+
function calculateConfidence(steps) {
|
|
145
|
+
if (steps.length === 0)
|
|
146
|
+
return 0;
|
|
147
|
+
const successSteps = steps.filter(s => s.state === 'observe' && s.observation.includes('success'));
|
|
148
|
+
return successSteps.length / steps.length;
|
|
149
|
+
}
|
|
150
|
+
function getStats() {
|
|
151
|
+
return {
|
|
152
|
+
totalRuns,
|
|
153
|
+
successRate: totalRuns > 0 ? successfulRuns / totalRuns : 0,
|
|
154
|
+
avgIterations: totalRuns > 0 ? totalIterations / totalRuns : 0,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
return {
|
|
158
|
+
run,
|
|
159
|
+
think,
|
|
160
|
+
act,
|
|
161
|
+
observe,
|
|
162
|
+
shouldContinue,
|
|
163
|
+
getStats,
|
|
164
|
+
};
|
|
165
|
+
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenHuman Integration - Agent System Patterns
|
|
3
|
+
*
|
|
4
|
+
* Based on tinyhumansai/openhuman multi-agent architecture
|
|
5
|
+
*
|
|
6
|
+
* Key concepts:
|
|
7
|
+
* - Architectobot: Project planning and task decomposition
|
|
8
|
+
* - Codecrusher: Implementation specialist
|
|
9
|
+
* - QualityQueen: Testing and verification
|
|
10
|
+
* - MemoryKeeper: Project memory maintenance
|
|
11
|
+
*
|
|
12
|
+
* Agent workflow:
|
|
13
|
+
* architectobot (plan) → user approval → codecrusher (implement) → architectobot (verify)
|
|
14
|
+
*/
|
|
15
|
+
import { randomUUID } from 'crypto';
|
|
16
|
+
// Default agents matching OpenHuman's agent system
|
|
17
|
+
const DEFAULT_OPENHUMAN_AGENTS = [
|
|
18
|
+
{
|
|
19
|
+
id: 'architectobot',
|
|
20
|
+
name: 'ArchitectoBot',
|
|
21
|
+
role: 'architect',
|
|
22
|
+
description: 'Project Architect & Task Breakdown Specialist',
|
|
23
|
+
color: 'blue',
|
|
24
|
+
model: 'claude-opus-4-6',
|
|
25
|
+
skills: ['codebase_analysis', 'documentation', 'task_decomposition', 'architecture_design'],
|
|
26
|
+
stats: { tasksCompleted: 0, successRate: 0.95, avgResponseTime: 5000 },
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
id: 'codecrusher',
|
|
30
|
+
name: 'CodeCrusher',
|
|
31
|
+
role: 'implementer',
|
|
32
|
+
description: 'Implementation specialist',
|
|
33
|
+
color: 'green',
|
|
34
|
+
model: 'claude-sonnet-4-6',
|
|
35
|
+
skills: ['typescript', 'rust', 'react', 'tauri', 'testing'],
|
|
36
|
+
stats: { tasksCompleted: 0, successRate: 0.92, avgResponseTime: 8000 },
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
id: 'qualityqueen',
|
|
40
|
+
name: 'QualityQueen',
|
|
41
|
+
role: 'reviewer',
|
|
42
|
+
description: 'Code quality guardian',
|
|
43
|
+
color: 'purple',
|
|
44
|
+
model: 'claude-sonnet-4-6',
|
|
45
|
+
skills: ['code_review', 'testing', 'coverage_analysis', 'linting'],
|
|
46
|
+
stats: { tasksCompleted: 0, successRate: 0.98, avgResponseTime: 3000 },
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
id: 'testmaster',
|
|
50
|
+
name: 'TestMaster',
|
|
51
|
+
role: 'tester',
|
|
52
|
+
description: 'Testing specialist',
|
|
53
|
+
color: 'orange',
|
|
54
|
+
model: 'claude-haiku-4-5',
|
|
55
|
+
skills: ['vitest', 'wdio', 'rust_tests', 'coverage'],
|
|
56
|
+
stats: { tasksCompleted: 0, successRate: 0.94, avgResponseTime: 4000 },
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
id: 'memorykeeper',
|
|
60
|
+
name: 'MemoryKeeper',
|
|
61
|
+
role: 'memory keeper',
|
|
62
|
+
description: 'Project memory specialist',
|
|
63
|
+
color: 'yellow',
|
|
64
|
+
model: 'claude-haiku-4-5',
|
|
65
|
+
skills: ['documentation', 'memory_management', 'context_keeping'],
|
|
66
|
+
stats: { tasksCompleted: 0, successRate: 0.99, avgResponseTime: 2000 },
|
|
67
|
+
},
|
|
68
|
+
];
|
|
69
|
+
export function createMultiAgentOrchestrator() {
|
|
70
|
+
const state = {
|
|
71
|
+
agents: new Map(DEFAULT_OPENHUMAN_AGENTS.map(a => [a.id, a])),
|
|
72
|
+
workflows: new Map(),
|
|
73
|
+
messages: [],
|
|
74
|
+
};
|
|
75
|
+
function registerAgent(agent) {
|
|
76
|
+
state.agents.set(agent.id, agent);
|
|
77
|
+
}
|
|
78
|
+
function getAgent(id) {
|
|
79
|
+
return state.agents.get(id);
|
|
80
|
+
}
|
|
81
|
+
function getAgentsByRole(role) {
|
|
82
|
+
return Array.from(state.agents.values()).filter(a => a.role === role);
|
|
83
|
+
}
|
|
84
|
+
function listAgents() {
|
|
85
|
+
return Array.from(state.agents.values());
|
|
86
|
+
}
|
|
87
|
+
function createWorkflow(name, task) {
|
|
88
|
+
const workflow = {
|
|
89
|
+
id: randomUUID(),
|
|
90
|
+
name,
|
|
91
|
+
task,
|
|
92
|
+
stages: [
|
|
93
|
+
{ agent: 'architect', action: 'Analyze requirements and create implementation plan', status: 'pending' },
|
|
94
|
+
{ agent: 'implementer', action: 'Implement code following the plan', status: 'pending' },
|
|
95
|
+
{ agent: 'reviewer', action: 'Review code quality and architecture', status: 'pending' },
|
|
96
|
+
{ agent: 'tester', action: 'Run tests and verify coverage', status: 'pending' },
|
|
97
|
+
],
|
|
98
|
+
currentStage: 0,
|
|
99
|
+
createdAt: Date.now(),
|
|
100
|
+
};
|
|
101
|
+
state.workflows.set(workflow.id, workflow);
|
|
102
|
+
return workflow;
|
|
103
|
+
}
|
|
104
|
+
function advanceWorkflow(workflowId, result) {
|
|
105
|
+
const workflow = state.workflows.get(workflowId);
|
|
106
|
+
if (!workflow)
|
|
107
|
+
return;
|
|
108
|
+
const currentStage = workflow.stages[workflow.currentStage];
|
|
109
|
+
currentStage.status = 'done';
|
|
110
|
+
currentStage.result = result;
|
|
111
|
+
workflow.currentStage++;
|
|
112
|
+
if (workflow.currentStage >= workflow.stages.length) {
|
|
113
|
+
workflow.completedAt = Date.now();
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
function getWorkflowStatus(workflowId) {
|
|
117
|
+
return state.workflows.get(workflowId);
|
|
118
|
+
}
|
|
119
|
+
function sendMessage(message) {
|
|
120
|
+
const fullMessage = {
|
|
121
|
+
...message,
|
|
122
|
+
id: randomUUID(),
|
|
123
|
+
timestamp: Date.now(),
|
|
124
|
+
};
|
|
125
|
+
state.messages.push(fullMessage);
|
|
126
|
+
return fullMessage;
|
|
127
|
+
}
|
|
128
|
+
function getMessages(agentId) {
|
|
129
|
+
return state.messages.filter(m => m.to === agentId || m.from === agentId);
|
|
130
|
+
}
|
|
131
|
+
async function executeWorkflow(workflowId) {
|
|
132
|
+
const workflow = state.workflows.get(workflowId);
|
|
133
|
+
if (!workflow) {
|
|
134
|
+
return { success: false, result: 'Workflow not found' };
|
|
135
|
+
}
|
|
136
|
+
const results = [];
|
|
137
|
+
for (let i = 0; i < workflow.stages.length; i++) {
|
|
138
|
+
const stage = workflow.stages[i];
|
|
139
|
+
stage.status = 'active';
|
|
140
|
+
workflow.currentStage = i;
|
|
141
|
+
const agents = getAgentsByRole(stage.agent);
|
|
142
|
+
if (agents.length > 0) {
|
|
143
|
+
const agent = agents[0];
|
|
144
|
+
agent.stats.tasksCompleted++;
|
|
145
|
+
stage.result = `[${agent.name}] completed: ${stage.action}`;
|
|
146
|
+
stage.status = 'done';
|
|
147
|
+
results.push(stage.result);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
workflow.completedAt = Date.now();
|
|
151
|
+
return {
|
|
152
|
+
success: true,
|
|
153
|
+
result: results.join('\n'),
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
function getAgentStats(agentId) {
|
|
157
|
+
const agent = state.agents.get(agentId);
|
|
158
|
+
return agent ? { ...agent.stats } : { tasksCompleted: 0, successRate: 0, avgResponseTime: 0 };
|
|
159
|
+
}
|
|
160
|
+
return {
|
|
161
|
+
registerAgent,
|
|
162
|
+
getAgent,
|
|
163
|
+
getAgentsByRole,
|
|
164
|
+
listAgents,
|
|
165
|
+
createWorkflow,
|
|
166
|
+
advanceWorkflow,
|
|
167
|
+
getWorkflowStatus,
|
|
168
|
+
sendMessage,
|
|
169
|
+
getMessages,
|
|
170
|
+
executeWorkflow,
|
|
171
|
+
getAgentStats,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
export function formatAgentStatus(agent) {
|
|
175
|
+
return `${agent.name}: [${agent.role}] | Tasks: ${agent.stats.tasksCompleted} | Success: ${(agent.stats.successRate * 100).toFixed(0)}%`;
|
|
176
|
+
}
|
|
177
|
+
export function formatWorkflowStatus(workflow) {
|
|
178
|
+
const stage = workflow.stages[workflow.currentStage];
|
|
179
|
+
const progress = `${workflow.currentStage + 1}/${workflow.stages.length}`;
|
|
180
|
+
return `
|
|
181
|
+
Workflow: ${workflow.name}
|
|
182
|
+
Progress: ${progress}
|
|
183
|
+
Current: ${stage?.agent} - ${stage?.action}
|
|
184
|
+
Status: ${workflow.completedAt ? '✅ Completed' : '🔄 In Progress'}
|
|
185
|
+
`.trim();
|
|
186
|
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-Agent Collaboration Protocol
|
|
3
|
+
*
|
|
4
|
+
* Paper: "ARMOR: Agentic Tool Reasoning" (2605.07103)
|
|
5
|
+
* Concept: Multi-agent collaboration with shared tool use
|
|
6
|
+
*
|
|
7
|
+
* Key mechanisms:
|
|
8
|
+
* - Agent communication protocol
|
|
9
|
+
* - Shared tool registry
|
|
10
|
+
* - Task decomposition and delegation
|
|
11
|
+
* - Result aggregation
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* Create a multi-agent collaboration system
|
|
15
|
+
*/
|
|
16
|
+
export function createMultiAgentProtocol() {
|
|
17
|
+
const agents = new Map();
|
|
18
|
+
const messages = [];
|
|
19
|
+
const tasks = [];
|
|
20
|
+
function registerAgent(id, role, tools = []) {
|
|
21
|
+
const agent = {
|
|
22
|
+
id,
|
|
23
|
+
role,
|
|
24
|
+
tools,
|
|
25
|
+
messages: [],
|
|
26
|
+
tasks: [],
|
|
27
|
+
isActive: true,
|
|
28
|
+
};
|
|
29
|
+
agents.set(id, agent);
|
|
30
|
+
return agent;
|
|
31
|
+
}
|
|
32
|
+
function send(message) {
|
|
33
|
+
messages.push(message);
|
|
34
|
+
// Add to recipient's inbox
|
|
35
|
+
const recipient = agents.get(message.to);
|
|
36
|
+
if (recipient) {
|
|
37
|
+
recipient.messages.push(message);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
function receive(from) {
|
|
41
|
+
const agent = agents.get(from);
|
|
42
|
+
if (!agent)
|
|
43
|
+
return [];
|
|
44
|
+
const unread = agent.messages.filter(m => m.to === from);
|
|
45
|
+
return unread;
|
|
46
|
+
}
|
|
47
|
+
function delegate(task) {
|
|
48
|
+
const taskId = `task-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
49
|
+
const delegatedTask = {
|
|
50
|
+
...task,
|
|
51
|
+
id: taskId,
|
|
52
|
+
status: 'pending',
|
|
53
|
+
};
|
|
54
|
+
tasks.push(delegatedTask);
|
|
55
|
+
// Notify assigned agent
|
|
56
|
+
if (task.assignedTo) {
|
|
57
|
+
send({
|
|
58
|
+
from: 'system',
|
|
59
|
+
to: task.assignedTo,
|
|
60
|
+
type: 'request',
|
|
61
|
+
content: JSON.stringify(delegatedTask),
|
|
62
|
+
timestamp: Date.now(),
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
return taskId;
|
|
66
|
+
}
|
|
67
|
+
function aggregate(results) {
|
|
68
|
+
const completedTasks = results.filter(t => t.status === 'completed');
|
|
69
|
+
if (completedTasks.length === 0) {
|
|
70
|
+
return 'No results to aggregate';
|
|
71
|
+
}
|
|
72
|
+
// Simple aggregation: concatenate results
|
|
73
|
+
return completedTasks
|
|
74
|
+
.filter(t => t.result)
|
|
75
|
+
.map(t => `[${t.type}] ${t.result}`)
|
|
76
|
+
.join('\n');
|
|
77
|
+
}
|
|
78
|
+
function broadcast(content, exclude = []) {
|
|
79
|
+
for (const agent of agents.values()) {
|
|
80
|
+
if (!exclude.includes(agent.id)) {
|
|
81
|
+
send({
|
|
82
|
+
from: 'system',
|
|
83
|
+
to: agent.id,
|
|
84
|
+
type: 'broadcast',
|
|
85
|
+
content,
|
|
86
|
+
timestamp: Date.now(),
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
function getStats() {
|
|
92
|
+
return {
|
|
93
|
+
messages: messages.length,
|
|
94
|
+
tasks: tasks.length,
|
|
95
|
+
agents: agents.size,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
// Register default agents
|
|
99
|
+
registerAgent('planner', 'Task decomposition', ['analyze', 'decompose', 'plan']);
|
|
100
|
+
registerAgent('executor', 'Tool execution', ['execute', 'verify', 'correct']);
|
|
101
|
+
registerAgent('aggregator', 'Result synthesis', ['aggregate', 'summarize', 'validate']);
|
|
102
|
+
return {
|
|
103
|
+
send,
|
|
104
|
+
receive,
|
|
105
|
+
delegate,
|
|
106
|
+
aggregate,
|
|
107
|
+
broadcast,
|
|
108
|
+
getStats,
|
|
109
|
+
};
|
|
110
|
+
}
|