@evo-hq/pi-evo 0.5.0-alpha.8 → 0.5.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@evo-hq/pi-evo",
3
- "version": "0.5.0-alpha.8",
3
+ "version": "0.5.0-alpha.9",
4
4
  "description": "Evo plugin for pi-coding-agent: optimize/discover/subagent skills + mid-run inject extension.",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -2,7 +2,7 @@
2
2
  name: discover
3
3
  description: Initialize evo for the current repository by exploring the codebase, proposing unexplored optimization dimensions, constructing the benchmark inside a baseline worktree, and running the first experiment. Use when the user invokes /evo:discover, mentions setting up evo, wants to instrument a codebase for autonomous optimization, or asks to start a new evo run on a project.
4
4
  argument-hint: <optional context about what to optimize>
5
- evo_version: 0.5.0-alpha.8
5
+ evo_version: 0.5.0-alpha.9
6
6
  ---
7
7
 
8
8
  # Discover
@@ -116,20 +116,20 @@ evo --version
116
116
  The output must be exactly:
117
117
 
118
118
  ```
119
- evo-hq-cli 0.5.0-alpha.8
119
+ evo-hq-cli 0.5.0-alpha.9
120
120
  ```
121
121
 
122
122
  Three outcomes:
123
123
 
124
124
  1. **Matches exactly** — continue to step 1.
125
125
  2. **Reports a different version** (`evo-hq-cli 0.4.2`, etc.) — the host refetched a newer/older skill bundle than the CLI on PATH. Drift breaks skills silently. Stop and tell the user:
126
- > Your installed evo CLI is on a different version than this skill (`0.5.0-alpha.8`). Run:
126
+ > Your installed evo CLI is on a different version than this skill (`0.5.0-alpha.9`). Run:
127
127
  > ```
128
- > uv tool install --force evo-hq-cli==0.5.0-alpha.8
128
+ > uv tool install --force evo-hq-cli==0.5.0-alpha.9
129
129
  > ```
130
130
  > Then re-invoke this skill.
131
131
  3. **`command not found`, or reports a different package** (commonly `evo 1.x` — the unrelated SLAM tool) — the CLI isn't installed. Tell the user:
132
- > `evo-hq-cli` isn't on your PATH. Install it: `uv tool install evo-hq-cli==0.5.0-alpha.8` (or `pipx install evo-hq-cli==0.5.0-alpha.8`). Then re-invoke this skill.
132
+ > `evo-hq-cli` isn't on your PATH. Install it: `uv tool install evo-hq-cli==0.5.0-alpha.9` (or `pipx install evo-hq-cli==0.5.0-alpha.9`). Then re-invoke this skill.
133
133
 
134
134
  Do not try to auto-install. Host sandbox + network policy may block it; leaving the install as a user action keeps failure modes clear.
135
135
 
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: infra-setup
3
3
  description: Non-user-invocable provider/setup reference for evo backend switching, prerequisite checks, and auth/install guidance.
4
- evo_version: 0.5.0-alpha.8
4
+ evo_version: 0.5.0-alpha.9
5
5
  ---
6
6
 
7
7
  # Infra Setup
@@ -2,7 +2,7 @@
2
2
  name: optimize
3
3
  description: Drive structured autoresearch iteration after evo:discover and the baseline commit -- scan-subagent cross-cutting analysis between rounds, frontier-based parent selection, ideator dispatch on stall, verifier pre/post hooks, annotation discipline. Width is set via subagents=N (1 for serial workloads, larger for parallel); the loop's structural value applies at any width.
4
4
  argument-hint: "[subagents=N] [budget=N] [stall=N]"
5
- evo_version: 0.5.0-alpha.8
5
+ evo_version: 0.5.0-alpha.9
6
6
  ---
7
7
 
8
8
  Run the `evo` optimization loop. Each round, the orchestrator writes structured briefs and spawns subagents that execute within them. Each subagent is semi-autonomous: it reads the pointer traces, forms the concrete edit, runs experiments, and can iterate within its branch. Runs until interrupted or the stall limit is reached.
@@ -5,7 +5,9 @@
5
5
  * opt-in, Claude-Code-only driver; the prose skill remains the canonical, host-agnostic
6
6
  * default. The workflow encodes the loop CONTROL: while/stall, mandatory scan + cross-history
7
7
  * axis check, research escalation (ideators on stall / every ~5 commits), brief + diversity,
8
- * fan-out + verify, collect + frontier-select. All domain work goes through the `evo` CLI inside
8
+ * fan-out + verify, collect + frontier-select. A concurrent ANALYST thread (Opus, self-paced,
9
+ * read-only) runs alongside the round loop via Promise.all — host + cross-history checks during
10
+ * rounds, feeding hints into the next brief. All domain work goes through the `evo` CLI inside
9
11
  * agents — the script itself never touches the filesystem/shell.
10
12
  *
11
13
  * Treat this as a TEMPLATE: launch it as-is for the standard loop, or adapt the prompts /
@@ -34,6 +36,7 @@ export const meta = {
34
36
  { title: 'Optimize', detail: 'parallel optimization subagents (evo new/run)' },
35
37
  { title: 'Verify', detail: 'validity audit + benchmark-noise confirm' },
36
38
  { title: 'Collect', detail: 'prune dead lineages, record cross-cutting notes' },
39
+ { title: 'Analyst', detail: 'concurrent independent observer (Opus) — host + cross-history checks during rounds' },
37
40
  ],
38
41
  }
39
42
 
@@ -175,6 +178,16 @@ const PREVERDICT = {
175
178
  },
176
179
  }
177
180
 
181
+ // Analyst tick output: work-quality hints (fed into the next brief) + runtime/host alerts (surfaced).
182
+ const ANALYST_FINDINGS = {
183
+ type: 'object',
184
+ required: ['briefHints', 'alerts'],
185
+ properties: {
186
+ briefHints: { type: 'array', items: { type: 'string' } },
187
+ alerts: { type: 'array', items: { type: 'string' } },
188
+ },
189
+ }
190
+
178
191
  // ---------------------------------------------------------------------------
179
192
  // Helpers (pure JS — control-plane only)
180
193
  // ---------------------------------------------------------------------------
@@ -193,6 +206,18 @@ const LIMIT = Number(A.stall) || 5
193
206
  const IDEATE_STALL = Math.max(1, Math.min(3, LIMIT - 1))
194
207
  const IDEATE_EVERY_COMMITS = 5 // periodic research cadence (matches prose step 6b)
195
208
  const PREVERIFY_MAX = 3 // pre-run verify <-> revise attempts before discarding a rigged edit
209
+ // Concurrent analyst thread (runs alongside the round loop, NOT per-round).
210
+ const ANALYST_ENABLED = true
211
+ const ANALYST_MODEL = 'opus' // the analyst always reasons with Opus (judgment-heavy)
212
+ const ANALYST_INTERVAL_S = 300 // self-pace: observe ~every 5 min, during rounds
213
+ const ANALYST_HOP_S = 15 // the wait is INTERRUPTIBLE in hops of this size: when the optimize loop
214
+ // ends mid-wait it drops a sentinel the analyst polls, so the in-flight
215
+ // tick exits within ~ANALYST_HOP_S instead of stalling the run for the
216
+ // full interval (the script can't interrupt an agent's `sleep` directly).
217
+ const DONE_SENTINEL = '.evo/.wf_optimize_done' // optimize -> analyst "loop is over" signal (a file,
218
+ // since the in-memory `done` flag isn't visible to the agent's process)
219
+ const ANALYST_MAX_FAILS = 3 // consecutive failed ticks before the advisory analyst self-disables
220
+ // (guards against a hot-spin when ticks fail instantly, e.g. a bad schema)
196
221
  // Experiments per scan agent. Heuristic for the prose "small enough to read in one pass" rule —
197
222
  // the workflow can't recursively self-partition like the prose loop, so this is fixed up front.
198
223
  // Lower it for heavy traces (many tasks / long messages); raise it for tiny traces.
@@ -305,7 +330,7 @@ function aggregatePrompt(ids) {
305
330
  ].join(' ')
306
331
  }
307
332
 
308
- function briefPrompt(state, findings, patterns, parents, ideated) {
333
+ function briefPrompt(state, findings, patterns, parents, ideated, analystHints) {
309
334
  return [
310
335
  'You are the evo orchestrator\'s brief writer.',
311
336
  'State summary:', state.summary || '',
@@ -316,6 +341,9 @@ function briefPrompt(state, findings, patterns, parents, ideated) {
316
341
  ? '\nFRESH IDEATOR PROPOSALS may be available — read `.evo/run_*/ideator/proposals.jsonl` and reconcile BEFORE writing: skip any whose technique was already tried (`evo discards --like "<keyword>"`); score the rest by expected_score_uplift x confidence (frontier_extrapolation > failure_analysis > literature, all else equal); let the top 1-2 become brief objectives, citing the proposal\'s hypothesis/technique. Proposals are advisory — if none beat the in-graph scan findings, ignore them.'
317
342
  : '',
318
343
  '\nIf the patterns include an "axis-warning", the current axis is saturated — target the ORTHOGONAL axis it names rather than iterating the plateaued one.',
344
+ (analystHints && analystHints.length)
345
+ ? '\nLIVE ANALYST SIGNALS (from the concurrent observer — fold relevant ones into objectives/boundaries, e.g. switch off a saturated axis, avoid a flagged dead direction): ' + JSON.stringify(analystHints)
346
+ : '',
319
347
  `\nWrite up to ${WIDTH} briefs (use the full round width of ${WIDTH} whenever you can find that many genuinely DISTINCT objectives — multiple briefs MAY branch from the SAME parent when fewer than ${WIDTH} frontier parents exist, as long as each attacks a different surface; do not pad with redundant briefs). One per subagent, each with four fields:`,
320
348
  '1. objective -- one sentence naming WHERE in system behavior the gain hides, with evidence; NO file/function/edit names.',
321
349
  '2. parent -- which experiment id to branch from (choose from the selected parents).',
@@ -420,6 +448,28 @@ function collectPrompt(results, round) {
420
448
  ].join(' ')
421
449
  }
422
450
 
451
+ // One analyst tick (a FRESH Opus agent each call — no memory across ticks, so `reported` carries
452
+ // the dedup state in the loop's closure). Read-only: observes host + cross-history signals DURING
453
+ // rounds, returns work-quality briefHints (folded into the next brief) + runtime alerts (surfaced).
454
+ function analystPrompt(ctx, intervalS, reported) {
455
+ return [
456
+ 'You are the evo ANALYST — an independent observer running CONCURRENTLY with the optimize loop.',
457
+ 'Read-only: do NOT edit code, run experiments, or mutate evo state.',
458
+ `FIRST pace yourself with an INTERRUPTIBLE wait, so you stop promptly when the optimize loop ends. Run this single Bash command with a tool timeout of at least ${(intervalS + 30) * 1000} ms:`,
459
+ ` \`if [ -f ${DONE_SENTINEL} ]; then echo OPTIMIZE_DONE; else for i in $(seq 1 ${Math.ceil(intervalS / ANALYST_HOP_S)}); do sleep ${ANALYST_HOP_S}; [ -f ${DONE_SENTINEL} ] && { echo OPTIMIZE_DONE; break; }; done; fi\``,
460
+ `If that prints OPTIMIZE_DONE, the optimize loop has finished — return {"briefHints":[],"alerts":[]} immediately WITHOUT gathering any signals. Otherwise the full interval elapsed: now gather signals and report.`,
461
+ `Current loop state: round=${ctx.round}, stall=${ctx.stall}/${LIMIT}, best=${ctx.bestScore}.`,
462
+ `Already reported (do NOT repeat — only emit findings NEW since these): ${JSON.stringify(reported || [])}.`,
463
+ 'Walk these checks (skip any whose inputs are unavailable; cite evidence; nothing speculative):',
464
+ '- Zombie GPU: `nvidia-smi --query-compute-apps=pid,used_memory,process_name --format=csv,noheader` + `ps` — a PID holding >=4GB not tied to an active `evo run`. ALERT with a verify clause (do NOT kill).',
465
+ '- Buried stderr warning: tail recent experiment stderr under `.evo/run_*/experiments/*/attempts/*/` for tokenizer / EOS / chat_template / parity-mismatch lines not already annotated. ALERT.',
466
+ '- Stuck experiment / time-budget overrun: from `evo status`/`evo show`, an experiment active far longer than its peers, or a round overrunning the others. ALERT.',
467
+ '- Stuck axis: from `evo tree`, 3+ structurally-distinct committed hypotheses plateaued at ~the same score → name the saturated axis + one orthogonal axis. BRIEF HINT.',
468
+ '- Dead direction / ignored mechanism: annotations repeatedly naming a mechanism the recent work ignores, or a direction that keeps regressing. BRIEF HINT.',
469
+ 'Return {briefHints:[...], alerts:[...]}. briefHints feed the NEXT round\'s briefs (work-quality redirections); alerts surface to the user (runtime/host issues). Empty arrays are fine — most ticks should be quiet.',
470
+ ].join('\n')
471
+ }
472
+
423
473
  // Per-brief lane: implement -> pre-verify <-> revise loop -> run -> post-audit, repeated up to the
424
474
  // iteration budget (deepening the branch each time a committed improver lands). The independent
425
475
  // evo:verifier gates EACH run for design-time cheating BEFORE the experiment is evaluated; its
@@ -487,82 +537,136 @@ let stall = 0
487
537
  let round = 0
488
538
  let lastIdeatedCommit = 0 // committedCount at the last ideator dispatch (periodic cadence)
489
539
  let ideatedThisStall = false // fire ideators once per stall episode, not every stalled round
540
+ let lastBestScore = null // latest best score, surfaced to the concurrent analyst thread
541
+ let done = false // set when the optimize loop ends -> stops the analyst thread
542
+ const analystSignals = [] // briefHints the analyst pushes; drained into the next round's brief
543
+
544
+ log(`evo-optimize start: subagents=${WIDTH} budget=${ITER} stall=${LIMIT} analyst=${ANALYST_ENABLED ? ANALYST_MODEL : 'off'} | argsType=${typeof args} A.subagents=${A.subagents} A.budget=${A.budget} A.stall=${A.stall}`)
545
+
546
+ // The optimize round loop (runs concurrently with analystLoop via Promise.all).
547
+ async function optimizeLoop() {
548
+ while (stall < LIMIT) {
549
+ round += 1
550
+
551
+ phase('Orient')
552
+ const state = await agent(statePrompt(), { schema: STATE, agentType: 'Explore', model: 'sonnet', phase: 'Orient', label: `state:r${round}` })
553
+ lastBestScore = state.bestScore
554
+ if (state.bestScore === state.ceiling) { log(`ceiling reached (best=${state.bestScore}) — stopping`); break }
555
+ const parents = (state.frontier || []).slice(0, WIDTH)
556
+ if (parents.length === 0) { log('no explorable frontier nodes — stopping'); break }
557
+
558
+ // N1 + N1.5 — mandatory parallel scan + structural aggregation (barrier). Scan runs EVERY round
559
+ // (hard rule); when there are no evaluated-undecided nodes yet (round 1) it falls back to the
560
+ // committed frontier so at least one scan agent still runs before briefs.
561
+ phase('Scan')
562
+ const evaluatedIds = state.evaluatedIds || []
563
+ const frontierIds = (state.frontier || []).map((f) => f.id).filter(Boolean)
564
+ const scanTargets = evaluatedIds.length ? evaluatedIds : frontierIds
565
+ const batches = chunk(scanTargets, SCAN_BATCH)
566
+ const scanThunks = batches.map((b) => () => agent(scanBrief(b), { schema: FINDINGS, agentType: 'Explore', phase: 'Scan', label: `scan ${b.length}: ${batchLabel(b)}` }))
567
+ const aggregateIds = [...new Set([...evaluatedIds, ...frontierIds])]
568
+ const aggThunk = aggregateIds.length
569
+ ? [() => agent(aggregatePrompt(aggregateIds), { schema: PATTERNS, agentType: 'Explore', phase: 'Scan', label: 'aggregate' })]
570
+ : []
571
+ const scanResults = (await parallel([...scanThunks, ...aggThunk])).filter(Boolean)
572
+ const findings = scanResults.flatMap((r) => (r && r.findings) ? r.findings : [])
573
+ const patterns = scanResults.flatMap((r) => (r && r.patterns) ? r.patterns : [])
574
+
575
+ // N1.7 — research escalation (6b): on stall (before the hard limit) or every ~5 commits, fire the
576
+ // three ideators in parallel. parallel() blocks until all return (proposals land before briefing).
577
+ const commits = Number(state.committedCount) || 0
578
+ const stalledTrigger = stall >= IDEATE_STALL && !ideatedThisStall
579
+ const periodicTrigger = commits - lastIdeatedCommit >= IDEATE_EVERY_COMMITS
580
+ let ideated = false
581
+ if (stalledTrigger || periodicTrigger) {
582
+ phase('Ideate')
583
+ await parallel(['frontier_extrapolation', 'failure_analysis', 'literature'].map((b) => () =>
584
+ agent(ideatorPrompt(b), { agentType: 'evo:ideator', phase: 'Ideate', label: `ideate:${b}` })))
585
+ lastIdeatedCommit = commits
586
+ if (stalledTrigger) ideatedThisStall = true
587
+ ideated = true
588
+ log(`ideators fired (trigger: ${stalledTrigger ? 'stall' : 'periodic'}, stall=${stall}, commits=${commits})`)
589
+ }
490
590
 
491
- log(`evo-optimize start: subagents=${WIDTH} budget=${ITER} stall=${LIMIT} | argsType=${typeof args} A.subagents=${A.subagents} A.budget=${A.budget} A.stall=${A.stall}`)
492
-
493
- while (stall < LIMIT) {
494
- round += 1
495
-
496
- phase('Orient')
497
- const state = await agent(statePrompt(), { schema: STATE, agentType: 'Explore', model: 'haiku', phase: 'Orient', label: `state:r${round}` })
498
- if (state.bestScore === state.ceiling) { log(`ceiling reached (best=${state.bestScore}) — stopping`); break }
499
- const parents = (state.frontier || []).slice(0, WIDTH)
500
- if (parents.length === 0) { log('no explorable frontier nodes — stopping'); break }
501
-
502
- // N1 + N1.5 mandatory parallel scan + structural aggregation (barrier).
503
- // The scan runs EVERY round (hard rule). When there are no evaluated-undecided nodes yet
504
- // (e.g. round 1, right after the baseline), fall back to scanning the committed frontier nodes
505
- // so at least one scan agent still runs before briefs.
506
- phase('Scan')
507
- const evaluatedIds = state.evaluatedIds || []
508
- const frontierIds = (state.frontier || []).map((f) => f.id).filter(Boolean)
509
- const scanTargets = evaluatedIds.length ? evaluatedIds : frontierIds
510
- const batches = chunk(scanTargets, SCAN_BATCH)
511
- const scanThunks = batches.map((b) => () => agent(scanBrief(b), { schema: FINDINGS, agentType: 'Explore', phase: 'Scan', label: `scan ${b.length}: ${batchLabel(b)}` }))
512
- // Aggregate sees BOTH evaluated-undecided nodes (for failure intersections) AND committed
513
- // frontier nodes (so the improver enumeration has committed experiments to draw from).
514
- const aggregateIds = [...new Set([...evaluatedIds, ...frontierIds])]
515
- const aggThunk = aggregateIds.length
516
- ? [() => agent(aggregatePrompt(aggregateIds), { schema: PATTERNS, agentType: 'Explore', phase: 'Scan', label: 'aggregate' })]
517
- : []
518
- const scanResults = (await parallel([...scanThunks, ...aggThunk])).filter(Boolean)
519
- const findings = scanResults.flatMap((r) => (r && r.findings) ? r.findings : [])
520
- const patterns = scanResults.flatMap((r) => (r && r.patterns) ? r.patterns : [])
521
-
522
- // N1.7 — research escalation (6b): on stall (before the hard limit) or every ~5 commits, fire
523
- // the three ideators in parallel. They append proposals to .evo/run_*/ideator/proposals.jsonl;
524
- // parallel() blocks until all return (the "block until proposals land" the prose does via evo wait).
525
- const commits = Number(state.committedCount) || 0
526
- const stalledTrigger = stall >= IDEATE_STALL && !ideatedThisStall
527
- const periodicTrigger = commits - lastIdeatedCommit >= IDEATE_EVERY_COMMITS
528
- let ideated = false
529
- if (stalledTrigger || periodicTrigger) {
530
- phase('Ideate')
531
- await parallel(['frontier_extrapolation', 'failure_analysis', 'literature'].map((b) => () =>
532
- agent(ideatorPrompt(b), { agentType: 'evo:ideator', phase: 'Ideate', label: `ideate:${b}` })))
533
- lastIdeatedCommit = commits
534
- if (stalledTrigger) ideatedThisStall = true
535
- ideated = true
536
- log(`ideators fired (trigger: ${stalledTrigger ? 'stall' : 'periodic'}, stall=${stall}, commits=${commits})`)
591
+ // N2 — brief writer: reconciles ideator proposals (6c), acts on axis-warning, and folds in any
592
+ // live analyst hints accumulated since the last round; JS diversity dedupe afterwards.
593
+ phase('Brief')
594
+ const analystHints = analystSignals.splice(0)
595
+ const briefOut = await agent(briefPrompt(state, findings, patterns, parents, ideated, analystHints), { schema: BRIEFS, phase: 'Brief', label: `briefs:r${round}` })
596
+ const briefs = dedupeBriefs((briefOut && briefOut.briefs) || [])
597
+ if (briefs.length === 0) { log('no briefs produced stopping'); break }
598
+
599
+ // N3..N4 fan out one lane per brief; each lane: implement -> pre-verify<->revise -> run -> post-audit.
600
+ const results = (await parallel(briefs.map((b) => () => runBrief(b, state)))).filter(Boolean)
601
+
602
+ // N5collect: prune dead lineages, record notes.
603
+ phase('Collect')
604
+ await agent(collectPrompt(results, round), { phase: 'Collect', label: `collect:r${round}` })
605
+
606
+ // Loop control: stall resets only when this round produced a VERIFIED committed score that beats
607
+ // the PRIOR BEST in the metric direction (a beat-its-own-parent commit is branch progress, not a
608
+ // new best, and does NOT reset stall). No budget in the condition.
609
+ const dir = state.direction || 'max'
610
+ const gains = results
611
+ .filter((r) => r.committedImprover && r.valid !== false && typeof r.bestScore === 'number')
612
+ .map((r) => r.bestScore)
613
+ const roundBest = gains.length ? (dir === 'min' ? Math.min(...gains) : Math.max(...gains)) : null
614
+ const improved = roundBest !== null && (dir === 'min' ? roundBest < state.bestScore : roundBest > state.bestScore)
615
+ stall = improved ? 0 : stall + 1
616
+ if (improved) ideatedThisStall = false
617
+ log(`round ${round}: improved=${improved} roundBest=${roundBest} prevBest=${state.bestScore} stall=${stall}/${LIMIT} spent=${budget.spent()}`)
537
618
  }
619
+ done = true
620
+ // Wake any in-flight analyst tick now (its `sleep` can't see the in-memory `done`): the sentinel
621
+ // makes the tick's interruptible wait exit within ~ANALYST_HOP_S instead of running the full interval.
622
+ if (ANALYST_ENABLED) await agent(`mkdir -p .evo && : > ${DONE_SENTINEL} && echo signalled`, { phase: 'Collect', label: 'signal:optimize-done' })
623
+ log(`optimize loop finished after ${round} round(s), final stall=${stall}/${LIMIT}`)
624
+ return { rounds: round, finalStall: stall }
625
+ }
538
626
 
539
- // N2 brief writer (judgment): reconciles ideator proposals (6c) + acts on axis-warning; JS diversity dedupe.
540
- phase('Brief')
541
- const briefOut = await agent(briefPrompt(state, findings, patterns, parents, ideated), { schema: BRIEFS, phase: 'Brief', label: `briefs:r${round}` })
542
- const briefs = dedupeBriefs((briefOut && briefOut.briefs) || [])
543
- if (briefs.length === 0) { log('no briefs produced — stopping'); break }
544
-
545
- // N3..N4 fan out one lane per brief; each lane: implement -> pre-verify<->revise -> run -> post-audit.
546
- const results = (await parallel(briefs.map((b) => () => runBrief(b, state)))).filter(Boolean)
547
-
548
- // N5 — collect: prune dead lineages, record notes.
549
- phase('Collect')
550
- await agent(collectPrompt(results, round), { phase: 'Collect', label: `collect:r${round}` })
551
-
552
- // Loop control: stall resets only when this round produced a VERIFIED committed score that
553
- // beats the PRIOR BEST in the metric direction. A committed improver that beat its own parent
554
- // but not the global best does NOT reset stall (it's progress on a branch, not a new best).
555
- // No budget in the condition.
556
- const dir = state.direction || 'max'
557
- const gains = results
558
- .filter((r) => r.committedImprover && r.valid !== false && typeof r.bestScore === 'number')
559
- .map((r) => r.bestScore)
560
- const roundBest = gains.length ? (dir === 'min' ? Math.min(...gains) : Math.max(...gains)) : null
561
- const improved = roundBest !== null && (dir === 'min' ? roundBest < state.bestScore : roundBest > state.bestScore)
562
- stall = improved ? 0 : stall + 1
563
- if (improved) ideatedThisStall = false // new best a fresh stall episode may re-trigger ideators later
564
- log(`round ${round}: improved=${improved} roundBest=${roundBest} prevBest=${state.bestScore} stall=${stall}/${LIMIT} spent=${budget.spent()}`)
627
+ // Concurrent analyst thread (P1-sliver/P2-P5/P7): an independent, self-paced Opus observer that runs
628
+ // DURING rounds (not per-round). Each tick is a FRESH agent (no cross-tick memory), so `reported`
629
+ // holds the dedup state in this closure. Work-quality findings -> analystSignals (next brief);
630
+ // runtime/host alerts -> the run log. Stops when optimizeLoop sets `done`.
631
+ async function analystLoop() {
632
+ if (!ANALYST_ENABLED) return
633
+ const reported = [] // closure memory across the stateless ticks (caps re-alerting)
634
+ let t = 0
635
+ let fails = 0 // consecutive tick failures; trips the self-disable below
636
+ while (!done) {
637
+ t += 1
638
+ // The analyst is purely advisory and read-only: a failed tick must NEVER reject this loop and
639
+ // abort the optimizer. Swallow any tick error, log it, and continue (or exit if `done` flipped).
640
+ let tick = null
641
+ try {
642
+ tick = await agent(analystPrompt({ round, stall, bestScore: lastBestScore }, ANALYST_INTERVAL_S, reported.slice(-30)), {
643
+ agentType: 'Explore', model: ANALYST_MODEL, schema: ANALYST_FINDINGS, phase: 'Analyst', label: `analyst#${t}`,
644
+ })
645
+ } catch (e) {
646
+ log(`ANALYST tick #${t} errored (ignored, optimize unaffected): ${(e && e.message) || e}`)
647
+ }
648
+ if (tick) {
649
+ fails = 0 // a real tick resets the failure streak
650
+ for (const h of (tick.briefHints || [])) { analystSignals.push(h); reported.push(h) }
651
+ for (const a of (tick.alerts || [])) { log(`ANALYST ALERT: ${a}`); reported.push(a) }
652
+ } else if (++fails >= ANALYST_MAX_FAILS) {
653
+ // The pacing wait lives INSIDE the agent, so a tick that fails before sleeping (e.g. a schema
654
+ // reject) leaves nothing to pace the retry — left unchecked the loop hot-spins agents. The
655
+ // analyst is optional, so after a short streak of failures, disable it for the rest of the run.
656
+ log(`ANALYST disabled after ${fails} consecutive failed ticks — optimize continues without it.`)
657
+ return
658
+ }
659
+ }
565
660
  }
566
661
 
567
- log(`optimize workflow finished after ${round} round(s), final stall=${stall}/${LIMIT}`)
568
- return { rounds: round, finalStall: stall }
662
+ // Clear any stale sentinel from a prior run BEFORE the threads start, else the analyst's first wait
663
+ // would see it and exit instantly. The script can't touch the filesystem itself, so an agent does it.
664
+ if (ANALYST_ENABLED) await agent(`rm -f ${DONE_SENTINEL}; echo cleared`, { phase: 'Orient', label: 'init:clear-sentinel' })
665
+
666
+ // optimizeLoop is the run's result; analystLoop is advisory. The `.catch` is the definitive guard that
667
+ // the observer thread can NEVER reject the combined promise and fail an otherwise-good optimize run.
668
+ const [optimizeResult] = await Promise.all([
669
+ optimizeLoop(),
670
+ analystLoop().catch((e) => log(`ANALYST thread exited abnormally (ignored): ${(e && e.message) || e}`)),
671
+ ])
672
+ return optimizeResult
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: report
3
3
  description: Print the dashboard's dot chart (score over experiment order, status colors, best-path stair) inline in the terminal for every run in the workspace. Use when the user invokes /evo:report, asks for a quick score chart without opening the dashboard, or wants the scatter plot in chat output.
4
- evo_version: 0.5.0-alpha.8
4
+ evo_version: 0.5.0-alpha.9
5
5
  ---
6
6
 
7
7
  # Report
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: subagent
3
3
  description: Protocol that evo optimization subagents follow when dispatched from /optimize. Auto-loaded by spawned subagents via their host's skill loader. The orchestrator may also invoke this skill to understand the brief shape its dispatched subagents expect + what they're required to emit -- useful when writing briefs or debugging a subagent's behavior.
4
- evo_version: 0.5.0-alpha.8
4
+ evo_version: 0.5.0-alpha.9
5
5
  ---
6
6
 
7
7
  # Evo Subagent Protocol