create-merlin-brain 3.6.3 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/index.js +20 -8
- package/dist/server/index.js.map +1 -1
- package/dist/server/server.d.ts.map +1 -1
- package/dist/server/server.js +823 -783
- package/dist/server/server.js.map +1 -1
- package/files/agents/merlin.md +0 -1
- package/files/loop/lib/blend-handoff.sh +284 -0
- package/files/loop/lib/blend-learn.sh +337 -0
- package/files/loop/lib/blend-parallel.sh +217 -0
- package/files/loop/lib/blend-verify.sh +305 -0
- package/files/loop/lib/blend.sh +62 -3
- package/files/loop/merlin-loop.sh +5 -0
- package/files/loop/merlin-session.sh +13 -0
- package/files/merlin/VERSION +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
|
4
|
+
# ║ BLEND ENGINE — Stage 2: Parallel Agent Execution ║
|
|
5
|
+
# ║ Spawn multiple blended agents simultaneously for independent subtasks ║
|
|
6
|
+
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
|
7
|
+
#
|
|
8
|
+
# When a task has clearly independent subtasks, spawn multiple blended agents
|
|
9
|
+
# in parallel using bash background processes. Wait for all, merge results.
|
|
10
|
+
#
|
|
11
|
+
# This gives ~Nx speed improvement for N independent subtasks.
|
|
12
|
+
#
|
|
13
|
+
# Requires: blend.sh loaded first (uses blend_for_task, blend_and_spawn, etc.)
|
|
14
|
+
|
|
15
|
+
# Colors
|
|
16
|
+
: "${RESET:=\033[0m}"
|
|
17
|
+
: "${BOLD:=\033[1m}"
|
|
18
|
+
: "${DIM:=\033[2m}"
|
|
19
|
+
: "${GREEN:=\033[32m}"
|
|
20
|
+
: "${YELLOW:=\033[33m}"
|
|
21
|
+
: "${RED:=\033[31m}"
|
|
22
|
+
: "${CYAN:=\033[36m}"
|
|
23
|
+
: "${MAGENTA:=\033[35m}"
|
|
24
|
+
|
|
25
|
+
# Max parallel agents (don't overwhelm the system)
|
|
26
|
+
BLEND_MAX_PARALLEL="${BLEND_MAX_PARALLEL:-4}"
|
|
27
|
+
|
|
28
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
29
|
+
# Subtask Splitting
|
|
30
|
+
# Detect if a task has independent subtasks that can run in parallel
|
|
31
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
32
|
+
|
|
33
|
+
# Split a task description into independent subtasks
|
|
34
|
+
# Returns newline-separated subtasks, or the original task if not splittable
|
|
35
|
+
blend_split_subtasks() {
|
|
36
|
+
local task="$1"
|
|
37
|
+
local task_lower
|
|
38
|
+
task_lower=$(echo "$task" | tr '[:upper:]' '[:lower:]')
|
|
39
|
+
|
|
40
|
+
# Pattern 1: Explicit list with "and" or numbered items
|
|
41
|
+
# e.g., "add auth to the API and write tests for it"
|
|
42
|
+
# These are often DEPENDENT, not independent — skip
|
|
43
|
+
|
|
44
|
+
# Pattern 2: Explicit parallel markers
|
|
45
|
+
# e.g., "in parallel: refactor auth, add logging, update docs"
|
|
46
|
+
if echo "$task_lower" | grep -qiE "^(in parallel|simultaneously|at the same time|concurrently):"; then
|
|
47
|
+
local items
|
|
48
|
+
items=$(echo "$task" | sed 's/^[^:]*: *//' | tr ',' '\n' | sed 's/^ *//' | sed '/^$/d')
|
|
49
|
+
if [ -n "$items" ]; then
|
|
50
|
+
echo "$items"
|
|
51
|
+
return 0
|
|
52
|
+
fi
|
|
53
|
+
fi
|
|
54
|
+
|
|
55
|
+
# Pattern 3: Multiple independent files mentioned
|
|
56
|
+
# e.g., "update package.json, README.md, and CHANGELOG.md with new version"
|
|
57
|
+
# These are independent because they touch different files
|
|
58
|
+
|
|
59
|
+
# Pattern 4: Explicit subtask markers from plan execution
|
|
60
|
+
# e.g., tasks separated by "---" or "SUBTASK:"
|
|
61
|
+
if echo "$task" | grep -qE '^(SUBTASK|---|\[PARALLEL\])'; then
|
|
62
|
+
echo "$task" | awk '/^(SUBTASK|---|\[PARALLEL\])/{if(buf)print buf; buf=""; next}{buf=buf ? buf" "$0 : $0}END{if(buf)print buf}'
|
|
63
|
+
return 0
|
|
64
|
+
fi
|
|
65
|
+
|
|
66
|
+
# Default: not splittable — return the original task
|
|
67
|
+
echo "$task"
|
|
68
|
+
return 1 # Return 1 to indicate "not split"
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
72
|
+
# Parallel Execution Engine
|
|
73
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
74
|
+
|
|
75
|
+
# Execute multiple subtasks in parallel, each with its own blended agent
|
|
76
|
+
# Returns: merged output from all subtasks
|
|
77
|
+
blend_parallel() {
|
|
78
|
+
local task="$1"
|
|
79
|
+
local extra_context="${2:-}"
|
|
80
|
+
local session_dir="${3:-/tmp/merlin-blend-parallel-$$}"
|
|
81
|
+
|
|
82
|
+
mkdir -p "$session_dir"
|
|
83
|
+
|
|
84
|
+
# Split into subtasks
|
|
85
|
+
local subtasks
|
|
86
|
+
subtasks=$(blend_split_subtasks "$task")
|
|
87
|
+
local split_status=$?
|
|
88
|
+
|
|
89
|
+
# Count subtasks
|
|
90
|
+
local subtask_count
|
|
91
|
+
subtask_count=$(echo "$subtasks" | grep -c '^' || echo "1")
|
|
92
|
+
|
|
93
|
+
# If only one subtask (not splittable), fall back to normal blend
|
|
94
|
+
if [ "$subtask_count" -le 1 ] || [ $split_status -ne 0 ]; then
|
|
95
|
+
echo -e "${DIM} ○ Single task — using standard blend${RESET}" >&2
|
|
96
|
+
blend_and_spawn "$task" "$extra_context" "$session_dir"
|
|
97
|
+
return $?
|
|
98
|
+
fi
|
|
99
|
+
|
|
100
|
+
# Cap parallel count
|
|
101
|
+
local parallel_count=$subtask_count
|
|
102
|
+
if [ "$parallel_count" -gt "$BLEND_MAX_PARALLEL" ]; then
|
|
103
|
+
parallel_count=$BLEND_MAX_PARALLEL
|
|
104
|
+
echo -e "${YELLOW} ⚠ Capped to $BLEND_MAX_PARALLEL parallel agents${RESET}" >&2
|
|
105
|
+
fi
|
|
106
|
+
|
|
107
|
+
echo -e "${MAGENTA}${BOLD} ⚡ Parallel Execution: $subtask_count subtasks${RESET}" >&2
|
|
108
|
+
|
|
109
|
+
# Launch background processes
|
|
110
|
+
local pids=()
|
|
111
|
+
local output_files=()
|
|
112
|
+
local idx=0
|
|
113
|
+
|
|
114
|
+
while IFS= read -r subtask; do
|
|
115
|
+
[ -z "$subtask" ] && continue
|
|
116
|
+
idx=$((idx + 1))
|
|
117
|
+
|
|
118
|
+
if [ $idx -gt "$parallel_count" ]; then
|
|
119
|
+
break
|
|
120
|
+
fi
|
|
121
|
+
|
|
122
|
+
local output_file="${session_dir}/parallel-${idx}.md"
|
|
123
|
+
output_files+=("$output_file")
|
|
124
|
+
|
|
125
|
+
echo -e " ${CYAN}[${idx}/${subtask_count}]${RESET} Spawning: ${subtask:0:60}..." >&2
|
|
126
|
+
|
|
127
|
+
# Spawn in background
|
|
128
|
+
(
|
|
129
|
+
local result
|
|
130
|
+
result=$(blend_and_spawn "$subtask" "$extra_context" "${session_dir}/sub-${idx}")
|
|
131
|
+
echo "$result" > "$output_file"
|
|
132
|
+
) &
|
|
133
|
+
pids+=($!)
|
|
134
|
+
|
|
135
|
+
done <<< "$subtasks"
|
|
136
|
+
|
|
137
|
+
# Wait for all background processes
|
|
138
|
+
local failed=0
|
|
139
|
+
local completed=0
|
|
140
|
+
|
|
141
|
+
for i in "${!pids[@]}"; do
|
|
142
|
+
local pid=${pids[$i]}
|
|
143
|
+
local sub_idx=$((i + 1))
|
|
144
|
+
|
|
145
|
+
if wait "$pid" 2>/dev/null; then
|
|
146
|
+
completed=$((completed + 1))
|
|
147
|
+
echo -e " ${GREEN}✓${RESET} [${sub_idx}] Complete" >&2
|
|
148
|
+
else
|
|
149
|
+
failed=$((failed + 1))
|
|
150
|
+
echo -e " ${RED}✗${RESET} [${sub_idx}] Failed" >&2
|
|
151
|
+
fi
|
|
152
|
+
done
|
|
153
|
+
|
|
154
|
+
echo -e "${MAGENTA} Results: ${completed} complete, ${failed} failed${RESET}" >&2
|
|
155
|
+
|
|
156
|
+
# Merge outputs
|
|
157
|
+
local merged_output="# Parallel Execution Results\n\n"
|
|
158
|
+
merged_output+="**Subtasks:** $subtask_count | **Completed:** $completed | **Failed:** $failed\n\n---\n\n"
|
|
159
|
+
|
|
160
|
+
for i in "${!output_files[@]}"; do
|
|
161
|
+
local ofile="${output_files[$i]}"
|
|
162
|
+
local sub_idx=$((i + 1))
|
|
163
|
+
|
|
164
|
+
if [ -f "$ofile" ] && [ -s "$ofile" ]; then
|
|
165
|
+
merged_output+="## Subtask ${sub_idx}\n\n"
|
|
166
|
+
merged_output+="$(cat "$ofile")\n\n---\n\n"
|
|
167
|
+
else
|
|
168
|
+
merged_output+="## Subtask ${sub_idx}\n\n*Failed or no output*\n\n---\n\n"
|
|
169
|
+
fi
|
|
170
|
+
done
|
|
171
|
+
|
|
172
|
+
echo -e "$merged_output"
|
|
173
|
+
|
|
174
|
+
if [ $failed -gt 0 ]; then
|
|
175
|
+
return 1
|
|
176
|
+
fi
|
|
177
|
+
return 0
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
181
|
+
# Convenience: Parallel with verification
|
|
182
|
+
# Combines Stage 1 (verify) and Stage 2 (parallel) for high-stakes parallel
|
|
183
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
184
|
+
|
|
185
|
+
blend_parallel_verified() {
|
|
186
|
+
local task="$1"
|
|
187
|
+
local extra_context="${2:-}"
|
|
188
|
+
local session_dir="${3:-/tmp/merlin-blend-pv-$$}"
|
|
189
|
+
|
|
190
|
+
# Run parallel first
|
|
191
|
+
local parallel_output
|
|
192
|
+
parallel_output=$(blend_parallel "$task" "$extra_context" "$session_dir")
|
|
193
|
+
local parallel_exit=$?
|
|
194
|
+
|
|
195
|
+
if [ $parallel_exit -ne 0 ]; then
|
|
196
|
+
echo "$parallel_output"
|
|
197
|
+
return $parallel_exit
|
|
198
|
+
fi
|
|
199
|
+
|
|
200
|
+
# Then verify the merged result
|
|
201
|
+
if type _verify_needs_independent &>/dev/null; then
|
|
202
|
+
local top_agents primary_key
|
|
203
|
+
top_agents=$(blend_get_top_agents "$task" 4)
|
|
204
|
+
primary_key=$(echo "$top_agents" | head -1 | cut -d: -f2)
|
|
205
|
+
|
|
206
|
+
if _verify_needs_independent "$primary_key" 2>/dev/null; then
|
|
207
|
+
echo -e "${MAGENTA}${BOLD} 🔍 Verifying parallel results${RESET}" >&2
|
|
208
|
+
# Save parallel output and run verification flow
|
|
209
|
+
echo "$parallel_output" > "$session_dir/parallel-merged.md"
|
|
210
|
+
blend_and_verify "$task" "$extra_context" "$session_dir"
|
|
211
|
+
return $?
|
|
212
|
+
fi
|
|
213
|
+
fi
|
|
214
|
+
|
|
215
|
+
echo "$parallel_output"
|
|
216
|
+
return 0
|
|
217
|
+
}
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
|
4
|
+
# ║ BLEND ENGINE — Stage 1: True Independent Verification ║
|
|
5
|
+
# ║ Spawn SEPARATE verifier agent for high-stakes tasks ║
|
|
6
|
+
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
|
7
|
+
#
|
|
8
|
+
# Instead of CoVe instructions in a single prompt, this spawns a real
|
|
9
|
+
# SEPARATE verification agent: different process, different 200K context,
|
|
10
|
+
# different system prompt. The verifier never sees the original task framing.
|
|
11
|
+
#
|
|
12
|
+
# Architecture:
|
|
13
|
+
# Spawn 1: Blended Specialist → does the work
|
|
14
|
+
# Spawn 2: Blended Verifier → reviews output cold
|
|
15
|
+
# Spawn 3: (if conflicts) → Resolver makes final call
|
|
16
|
+
#
|
|
17
|
+
# Requires: blend.sh loaded first (uses blend_for_task, _blend_needs_cove, etc.)
|
|
18
|
+
|
|
19
|
+
# Colors (inherit from blend.sh or set defaults)
|
|
20
|
+
: "${RESET:=\033[0m}"
|
|
21
|
+
: "${BOLD:=\033[1m}"
|
|
22
|
+
: "${DIM:=\033[2m}"
|
|
23
|
+
: "${GREEN:=\033[32m}"
|
|
24
|
+
: "${YELLOW:=\033[33m}"
|
|
25
|
+
: "${RED:=\033[31m}"
|
|
26
|
+
: "${CYAN:=\033[36m}"
|
|
27
|
+
: "${MAGENTA:=\033[35m}"
|
|
28
|
+
|
|
29
|
+
# Task types that get independent verification (more aggressive than CoVe)
|
|
30
|
+
VERIFY_TASK_TYPES="security architect debug secaudit migrate"
|
|
31
|
+
|
|
32
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
33
|
+
# Detection: Should this task get independent verification?
|
|
34
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
35
|
+
|
|
36
|
+
_verify_needs_independent() {
|
|
37
|
+
local primary_key="$1"
|
|
38
|
+
for vtype in $VERIFY_TASK_TYPES; do
|
|
39
|
+
if [ "$primary_key" = "$vtype" ]; then
|
|
40
|
+
return 0
|
|
41
|
+
fi
|
|
42
|
+
done
|
|
43
|
+
return 1
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
47
|
+
# Verifier Agent Builder
|
|
48
|
+
# Creates a cold-review agent that only sees the output, not the task framing
|
|
49
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
50
|
+
|
|
51
|
+
_verify_build_agent() {
|
|
52
|
+
local primary_key="$1"
|
|
53
|
+
local session_dir="$2"
|
|
54
|
+
local verify_path="${session_dir}/merlin-verifier-$(date +%s).md"
|
|
55
|
+
|
|
56
|
+
# Domain-specific verification focus
|
|
57
|
+
local verify_focus=""
|
|
58
|
+
case "$primary_key" in
|
|
59
|
+
security|secaudit)
|
|
60
|
+
verify_focus="Focus on: SQL injection, XSS, CSRF, auth bypass, secrets exposure, input validation, rate limiting, privilege escalation. Check every user input path." ;;
|
|
61
|
+
architect)
|
|
62
|
+
verify_focus="Focus on: circular dependencies, single points of failure, scalability bottlenecks, data consistency, API contract clarity, separation of concerns." ;;
|
|
63
|
+
debug)
|
|
64
|
+
verify_focus="Focus on: root cause correctness (not just symptom fix), regression risk, edge cases, race conditions, resource leaks, error handling completeness." ;;
|
|
65
|
+
migrate)
|
|
66
|
+
verify_focus="Focus on: data loss risk, rollback safety, backward compatibility, index coverage, constraint violations, zero-downtime feasibility." ;;
|
|
67
|
+
*)
|
|
68
|
+
verify_focus="Focus on: correctness, edge cases, error handling, performance implications, and code quality." ;;
|
|
69
|
+
esac
|
|
70
|
+
|
|
71
|
+
cat > "$verify_path" << VAGENT_EOF
|
|
72
|
+
---
|
|
73
|
+
name: merlin-independent-verifier
|
|
74
|
+
description: Cold-review verifier — sees output without original task context
|
|
75
|
+
model: sonnet
|
|
76
|
+
tools: Read, Grep, Glob, Bash
|
|
77
|
+
permissionMode: bypassPermissions
|
|
78
|
+
maxTurns: 50
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
# Independent Verifier — Cold Review
|
|
82
|
+
|
|
83
|
+
You are a VERIFICATION specialist. You are reviewing code/output produced by another agent. You did NOT see the original task request. You only see the output.
|
|
84
|
+
|
|
85
|
+
Your job is to find problems. Be skeptical. Assume nothing.
|
|
86
|
+
|
|
87
|
+
${verify_focus}
|
|
88
|
+
|
|
89
|
+
## Verification Protocol
|
|
90
|
+
|
|
91
|
+
1. **Read the output carefully** — what was changed/created?
|
|
92
|
+
2. **Check each change independently** — read the actual files, not just the diff
|
|
93
|
+
3. **List all issues found** — severity: CRITICAL / WARNING / SUGGESTION
|
|
94
|
+
4. **Verdict**: PASS (ship it), PASS_WITH_WARNINGS (ship with notes), or FAIL (needs rework)
|
|
95
|
+
|
|
96
|
+
## Output Format
|
|
97
|
+
|
|
98
|
+
\`\`\`
|
|
99
|
+
## Verification Result
|
|
100
|
+
|
|
101
|
+
**Verdict:** [PASS | PASS_WITH_WARNINGS | FAIL]
|
|
102
|
+
|
|
103
|
+
### Issues Found
|
|
104
|
+
- [CRITICAL] ...
|
|
105
|
+
- [WARNING] ...
|
|
106
|
+
- [SUGGESTION] ...
|
|
107
|
+
|
|
108
|
+
### Files Verified
|
|
109
|
+
- path/to/file — OK | ISSUE
|
|
110
|
+
|
|
111
|
+
### Summary
|
|
112
|
+
One paragraph: is this safe to ship?
|
|
113
|
+
\`\`\`
|
|
114
|
+
|
|
115
|
+
Be thorough. Missing a critical issue is worse than a false positive.
|
|
116
|
+
VAGENT_EOF
|
|
117
|
+
|
|
118
|
+
echo "$verify_path"
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
122
|
+
# Resolver Agent Builder
|
|
123
|
+
# When specialist and verifier disagree, resolver makes the final call
|
|
124
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
125
|
+
|
|
126
|
+
_verify_build_resolver() {
|
|
127
|
+
local session_dir="$1"
|
|
128
|
+
local resolver_path="${session_dir}/merlin-resolver-$(date +%s).md"
|
|
129
|
+
|
|
130
|
+
cat > "$resolver_path" << RAGENT_EOF
|
|
131
|
+
---
|
|
132
|
+
name: merlin-conflict-resolver
|
|
133
|
+
description: Resolves conflicts between specialist output and verifier findings
|
|
134
|
+
model: sonnet
|
|
135
|
+
tools: Read, Write, Edit, Grep, Glob, Bash
|
|
136
|
+
permissionMode: bypassPermissions
|
|
137
|
+
maxTurns: 100
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
# Conflict Resolver
|
|
141
|
+
|
|
142
|
+
Two agents disagree. You see both perspectives and make the final call.
|
|
143
|
+
|
|
144
|
+
## Your Process
|
|
145
|
+
|
|
146
|
+
1. Read the **Specialist's output** — what they built and why
|
|
147
|
+
2. Read the **Verifier's findings** — what issues they found
|
|
148
|
+
3. For each CRITICAL issue:
|
|
149
|
+
- Is the verifier right? → Fix it
|
|
150
|
+
- Is it a false positive? → Document why and keep specialist's version
|
|
151
|
+
4. For each WARNING:
|
|
152
|
+
- Quick fix possible? → Fix it
|
|
153
|
+
- Trade-off? → Document the decision
|
|
154
|
+
5. Apply all fixes directly to the codebase
|
|
155
|
+
6. Output a resolution summary
|
|
156
|
+
|
|
157
|
+
## Output Format
|
|
158
|
+
|
|
159
|
+
\`\`\`
|
|
160
|
+
## Resolution Summary
|
|
161
|
+
|
|
162
|
+
### Critical Issues Resolved
|
|
163
|
+
- [issue]: [action taken]
|
|
164
|
+
|
|
165
|
+
### Warnings Addressed
|
|
166
|
+
- [issue]: [action taken or reason to skip]
|
|
167
|
+
|
|
168
|
+
### Final Verdict
|
|
169
|
+
[Ship / Rework needed]
|
|
170
|
+
\`\`\`
|
|
171
|
+
|
|
172
|
+
Be decisive. Don't deliberate endlessly — pick the safer option and move on.
|
|
173
|
+
RAGENT_EOF
|
|
174
|
+
|
|
175
|
+
echo "$resolver_path"
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
179
|
+
# Main Entry: Blend, Execute, and Verify
|
|
180
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
181
|
+
|
|
182
|
+
# Execute a task with independent verification
|
|
183
|
+
# Returns: the final output (specialist output if PASS, resolved output if FAIL)
|
|
184
|
+
blend_and_verify() {
|
|
185
|
+
local task="$1"
|
|
186
|
+
local extra_context="${2:-}"
|
|
187
|
+
local session_dir="${3:-/tmp/merlin-blend-$$}"
|
|
188
|
+
|
|
189
|
+
mkdir -p "$session_dir"
|
|
190
|
+
|
|
191
|
+
# Get primary agent key to decide verification level
|
|
192
|
+
local top_agents primary_key
|
|
193
|
+
top_agents=$(blend_get_top_agents "$task" 4)
|
|
194
|
+
primary_key=$(echo "$top_agents" | head -1 | cut -d: -f2)
|
|
195
|
+
|
|
196
|
+
# ── Spawn 1: Specialist does the work ──
|
|
197
|
+
echo -e "${MAGENTA}${BOLD} ⚡ Spawn 1: Specialist${RESET}"
|
|
198
|
+
local specialist_output
|
|
199
|
+
specialist_output=$(blend_and_spawn "$task" "$extra_context" "$session_dir")
|
|
200
|
+
local specialist_exit=$?
|
|
201
|
+
|
|
202
|
+
if [ $specialist_exit -ne 0 ]; then
|
|
203
|
+
echo -e "${RED} ✗ Specialist failed (exit $specialist_exit)${RESET}"
|
|
204
|
+
echo "$specialist_output"
|
|
205
|
+
return $specialist_exit
|
|
206
|
+
fi
|
|
207
|
+
|
|
208
|
+
# Save specialist output for verification
|
|
209
|
+
echo "$specialist_output" > "$session_dir/specialist-output.md"
|
|
210
|
+
echo -e "${GREEN} ✓ Specialist complete${RESET}"
|
|
211
|
+
|
|
212
|
+
# ── Check if verification is needed ──
|
|
213
|
+
if ! _verify_needs_independent "$primary_key" 2>/dev/null; then
|
|
214
|
+
echo -e "${DIM} ○ Low-stakes task — skipping verification${RESET}"
|
|
215
|
+
echo "$specialist_output"
|
|
216
|
+
return 0
|
|
217
|
+
fi
|
|
218
|
+
|
|
219
|
+
# ── Spawn 2: Verifier reviews cold ──
|
|
220
|
+
echo -e "${MAGENTA}${BOLD} 🔍 Spawn 2: Independent Verifier${RESET}"
|
|
221
|
+
local verifier_agent
|
|
222
|
+
verifier_agent=$(_verify_build_agent "$primary_key" "$session_dir")
|
|
223
|
+
|
|
224
|
+
# Build verifier prompt — only sees the OUTPUT, not the original task
|
|
225
|
+
local verify_prompt="Review the following output from another agent. You do NOT know what they were asked to do. Judge the output purely on its own merits.
|
|
226
|
+
|
|
227
|
+
## Agent Output to Verify
|
|
228
|
+
|
|
229
|
+
${specialist_output}"
|
|
230
|
+
|
|
231
|
+
local verifier_output
|
|
232
|
+
verifier_output=$(claude --agent "$verifier_agent" --output-format text -p "$verify_prompt" 2>&1)
|
|
233
|
+
local verifier_exit=$?
|
|
234
|
+
|
|
235
|
+
echo "$verifier_output" > "$session_dir/verifier-output.md"
|
|
236
|
+
|
|
237
|
+
# ── Parse verdict ──
|
|
238
|
+
local verdict="UNKNOWN"
|
|
239
|
+
if echo "$verifier_output" | grep -qi "Verdict.*PASS_WITH_WARNINGS"; then
|
|
240
|
+
verdict="PASS_WITH_WARNINGS"
|
|
241
|
+
elif echo "$verifier_output" | grep -qi "Verdict.*FAIL"; then
|
|
242
|
+
verdict="FAIL"
|
|
243
|
+
elif echo "$verifier_output" | grep -qi "Verdict.*PASS"; then
|
|
244
|
+
verdict="PASS"
|
|
245
|
+
fi
|
|
246
|
+
|
|
247
|
+
echo -e "${CYAN} Verdict: ${verdict}${RESET}"
|
|
248
|
+
|
|
249
|
+
# ── PASS: ship it ──
|
|
250
|
+
if [ "$verdict" = "PASS" ]; then
|
|
251
|
+
echo -e "${GREEN} ✓ Verification passed — shipping${RESET}"
|
|
252
|
+
echo "$specialist_output"
|
|
253
|
+
return 0
|
|
254
|
+
fi
|
|
255
|
+
|
|
256
|
+
# ── PASS_WITH_WARNINGS: ship with notes ──
|
|
257
|
+
if [ "$verdict" = "PASS_WITH_WARNINGS" ]; then
|
|
258
|
+
echo -e "${YELLOW} ⚠ Passed with warnings${RESET}"
|
|
259
|
+
echo "${specialist_output}
|
|
260
|
+
|
|
261
|
+
---
|
|
262
|
+
## Verification Notes
|
|
263
|
+
${verifier_output}"
|
|
264
|
+
return 0
|
|
265
|
+
fi
|
|
266
|
+
|
|
267
|
+
# ── FAIL: spawn resolver ──
|
|
268
|
+
echo -e "${RED} ✗ Verification failed — spawning resolver${RESET}"
|
|
269
|
+
echo -e "${MAGENTA}${BOLD} 🔧 Spawn 3: Conflict Resolver${RESET}"
|
|
270
|
+
|
|
271
|
+
local resolver_agent
|
|
272
|
+
resolver_agent=$(_verify_build_resolver "$session_dir")
|
|
273
|
+
|
|
274
|
+
local resolve_prompt="Two agents disagree. Resolve the conflict and apply fixes.
|
|
275
|
+
|
|
276
|
+
## Specialist Output
|
|
277
|
+
${specialist_output}
|
|
278
|
+
|
|
279
|
+
## Verifier Findings
|
|
280
|
+
${verifier_output}
|
|
281
|
+
|
|
282
|
+
## Original Task
|
|
283
|
+
${task}
|
|
284
|
+
|
|
285
|
+
Fix all CRITICAL issues. Address WARNINGs where practical. Apply changes directly to files."
|
|
286
|
+
|
|
287
|
+
local resolver_output
|
|
288
|
+
resolver_output=$(claude --agent "$resolver_agent" --output-format text -p "$resolve_prompt" 2>&1)
|
|
289
|
+
|
|
290
|
+
echo "$resolver_output" > "$session_dir/resolver-output.md"
|
|
291
|
+
echo -e "${GREEN} ✓ Resolution complete${RESET}"
|
|
292
|
+
|
|
293
|
+
echo "$resolver_output"
|
|
294
|
+
return 0
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
# Log verification result for blend learning
|
|
298
|
+
_verify_log_result() {
|
|
299
|
+
local primary_key="$1"
|
|
300
|
+
local verdict="$2"
|
|
301
|
+
local session_dir="$3"
|
|
302
|
+
local log_file="${session_dir}/verification-log.jsonl"
|
|
303
|
+
|
|
304
|
+
echo "{\"timestamp\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\",\"agent\":\"${primary_key}\",\"verdict\":\"${verdict}\"}" >> "$log_file"
|
|
305
|
+
}
|
package/files/loop/lib/blend.sh
CHANGED
|
@@ -240,6 +240,11 @@ blend_get_top_agents() {
|
|
|
240
240
|
local all_scored
|
|
241
241
|
all_scored=$(blend_score_all "$task")
|
|
242
242
|
|
|
243
|
+
# Apply learning weights if available (Stage 4)
|
|
244
|
+
if type learn_apply_weights &>/dev/null; then
|
|
245
|
+
all_scored=$(echo "$all_scored" | learn_apply_weights)
|
|
246
|
+
fi
|
|
247
|
+
|
|
243
248
|
local count=0
|
|
244
249
|
local result=""
|
|
245
250
|
|
|
@@ -675,6 +680,24 @@ blend_show_decision() {
|
|
|
675
680
|
if _blend_needs_cove "$primary_key" 2>/dev/null; then
|
|
676
681
|
echo -e " ${YELLOW}[CoVe]${RESET} Chain-of-Verification enabled (high-stakes task)"
|
|
677
682
|
fi
|
|
683
|
+
|
|
684
|
+
# Show verification status (Stage 1)
|
|
685
|
+
if type _verify_needs_independent &>/dev/null && _verify_needs_independent "$primary_key" 2>/dev/null; then
|
|
686
|
+
echo -e " ${MAGENTA}[verify]${RESET} Independent verification will run"
|
|
687
|
+
fi
|
|
688
|
+
|
|
689
|
+
# Show learning status (Stage 4)
|
|
690
|
+
if type learn_get_boost &>/dev/null; then
|
|
691
|
+
local boost
|
|
692
|
+
boost=$(learn_get_boost "$primary_key" 2>/dev/null || echo "0")
|
|
693
|
+
if [ "$boost" != "0" ]; then
|
|
694
|
+
if [ "$boost" -gt 0 ]; then
|
|
695
|
+
echo -e " ${GREEN}[learn]${RESET} +${boost} boost from historical success"
|
|
696
|
+
else
|
|
697
|
+
echo -e " ${RED}[learn]${RESET} ${boost} penalty from historical failures"
|
|
698
|
+
fi
|
|
699
|
+
fi
|
|
700
|
+
fi
|
|
678
701
|
}
|
|
679
702
|
|
|
680
703
|
# Show a one-line summary of the blend
|
|
@@ -712,20 +735,56 @@ blend_and_spawn() {
|
|
|
712
735
|
local agent_path
|
|
713
736
|
agent_path=$(blend_for_task "$task" "$session_dir")
|
|
714
737
|
|
|
738
|
+
# Extract agent info for learning (before spawn)
|
|
739
|
+
local _spawn_primary="" _spawn_secondaries=""
|
|
740
|
+
if type learn_record &>/dev/null; then
|
|
741
|
+
local _top_agents
|
|
742
|
+
_top_agents=$(blend_get_top_agents "$task" 4)
|
|
743
|
+
_spawn_primary=$(echo "$_top_agents" | head -1 | cut -d: -f2)
|
|
744
|
+
_spawn_secondaries=$(echo "$_top_agents" | tail -n +2 | cut -d: -f2 | paste -sd, -)
|
|
745
|
+
fi
|
|
746
|
+
|
|
715
747
|
# Build the prompt (task + any extra context)
|
|
716
748
|
local prompt="$task"
|
|
717
749
|
if [ -n "$extra_context" ]; then
|
|
718
750
|
prompt="${prompt}\n\n## Additional Context\n\n${extra_context}"
|
|
719
751
|
fi
|
|
720
752
|
|
|
721
|
-
# Spawn
|
|
753
|
+
# Spawn (with timing)
|
|
754
|
+
local _spawn_start
|
|
755
|
+
_spawn_start=$(date +%s)
|
|
756
|
+
|
|
722
757
|
local output
|
|
723
758
|
output=$(claude --agent "$agent_path" --output-format text -p "$prompt" 2>&1)
|
|
724
759
|
local exit_code=$?
|
|
725
760
|
|
|
726
|
-
#
|
|
727
|
-
|
|
761
|
+
# Record outcome for learning (Stage 4)
|
|
762
|
+
if type learn_record &>/dev/null && [ -n "$_spawn_primary" ]; then
|
|
763
|
+
local _spawn_end _spawn_duration
|
|
764
|
+
_spawn_end=$(date +%s)
|
|
765
|
+
_spawn_duration=$((_spawn_end - _spawn_start))
|
|
766
|
+
learn_record "$_spawn_primary" "$_spawn_secondaries" "$exit_code" "$_spawn_duration" "" 2>/dev/null || true
|
|
767
|
+
fi
|
|
728
768
|
|
|
729
769
|
echo "$output"
|
|
730
770
|
return $exit_code
|
|
731
771
|
}
|
|
772
|
+
|
|
773
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
774
|
+
# Advanced Modules (Stage 1-4 Upgrades)
|
|
775
|
+
# Source after core blend engine is loaded
|
|
776
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
777
|
+
|
|
778
|
+
_BLEND_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
779
|
+
|
|
780
|
+
# Stage 4: Blend Learning — track success rates, adjust scoring
|
|
781
|
+
source "${_BLEND_LIB_DIR}/blend-learn.sh" 2>/dev/null || true
|
|
782
|
+
|
|
783
|
+
# Stage 1: True Independent Verification — separate verifier agent
|
|
784
|
+
source "${_BLEND_LIB_DIR}/blend-verify.sh" 2>/dev/null || true
|
|
785
|
+
|
|
786
|
+
# Stage 2: Parallel Agent Execution — concurrent subtasks
|
|
787
|
+
source "${_BLEND_LIB_DIR}/blend-parallel.sh" 2>/dev/null || true
|
|
788
|
+
|
|
789
|
+
# Stage 3: Structured Handoffs — clean JSON pipeline context
|
|
790
|
+
source "${_BLEND_LIB_DIR}/blend-handoff.sh" 2>/dev/null || true
|
|
@@ -42,6 +42,11 @@ source "$SCRIPT_DIR/lib/sights.sh" 2>/dev/null || true # Sights integratio
|
|
|
42
42
|
source "$SCRIPT_DIR/lib/agents.sh" 2>/dev/null || true # Agent profiles and routing
|
|
43
43
|
source "$SCRIPT_DIR/lib/blend.sh" 2>/dev/null || true # Dynamic agent blending engine
|
|
44
44
|
source "$SCRIPT_DIR/lib/teams.sh" 2>/dev/null || true # Agent Teams integration
|
|
45
|
+
# Blend Engine Stages (loaded by blend.sh, listed here for visibility)
|
|
46
|
+
# Stage 1: blend-verify.sh — True Independent Verification
|
|
47
|
+
# Stage 2: blend-parallel.sh — Parallel Agent Execution
|
|
48
|
+
# Stage 3: blend-handoff.sh — Structured Handoffs
|
|
49
|
+
# Stage 4: blend-learn.sh — Blend Learning
|
|
45
50
|
source "$SCRIPT_DIR/lib/boot.sh" 2>/dev/null || true # Boot sequence
|
|
46
51
|
source "$SCRIPT_DIR/lib/session-end.sh" 2>/dev/null || true # Session end protocol
|
|
47
52
|
source "$SCRIPT_DIR/lib/tui.sh" 2>/dev/null || true # Interactive TUI
|
|
@@ -45,6 +45,11 @@ source "$SCRIPT_DIR/lib/agents.sh" 2>/dev/null || true
|
|
|
45
45
|
source "$SCRIPT_DIR/lib/sights.sh" 2>/dev/null || true
|
|
46
46
|
source "$SCRIPT_DIR/lib/boot.sh" 2>/dev/null || true
|
|
47
47
|
source "$SCRIPT_DIR/lib/blend.sh" 2>/dev/null || true
|
|
48
|
+
# Blend Engine Stages (loaded by blend.sh, listed here for visibility)
|
|
49
|
+
# Stage 1: blend-verify.sh — True Independent Verification
|
|
50
|
+
# Stage 2: blend-parallel.sh — Parallel Agent Execution
|
|
51
|
+
# Stage 3: blend-handoff.sh — Structured Handoffs
|
|
52
|
+
# Stage 4: blend-learn.sh — Blend Learning
|
|
48
53
|
|
|
49
54
|
# Colors
|
|
50
55
|
: "${RESET:=\033[0m}"
|
|
@@ -471,6 +476,14 @@ See PROJECT.md in the repo root."
|
|
|
471
476
|
# Log to history
|
|
472
477
|
echo "{\"agent\":\"blend:${blend_label}\",\"task\":\"$(echo "$task" | head -c 100)\",\"duration\":$duration,\"exit_code\":$exit_code,\"timestamp\":$(date +%s)}" >> "$HISTORY_FILE"
|
|
473
478
|
|
|
479
|
+
# Record for blend learning (Stage 4)
|
|
480
|
+
if type learn_record &>/dev/null; then
|
|
481
|
+
local _primary _secondaries
|
|
482
|
+
_primary=$(blend_get_top_agents "$task" 4 2>/dev/null | head -1 | cut -d: -f2)
|
|
483
|
+
_secondaries=$(blend_get_top_agents "$task" 4 2>/dev/null | tail -n +2 | cut -d: -f2 | paste -sd, -)
|
|
484
|
+
learn_record "$_primary" "$_secondaries" "$exit_code" "$duration" "" 2>/dev/null || true
|
|
485
|
+
fi
|
|
486
|
+
|
|
474
487
|
# Show result
|
|
475
488
|
echo ""
|
|
476
489
|
echo -e "${DIM}────────────────────────────────────────────────────────${RESET}"
|
package/files/merlin/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
3.6.
|
|
1
|
+
3.6.4
|
package/package.json
CHANGED