helixevo 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/dashboard/app/api/skills/route.ts +1 -1
- package/dashboard/app/guide/page.tsx +46 -22
- package/dashboard/lib/data.ts +5 -3
- package/dist/cli.js +26 -23
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -12,7 +12,7 @@ HelixEvo builds on ideas from [EvoSkill](https://arxiv.org/abs/2603.02766) and [
|
|
|
12
12
|
|
|
13
13
|
Every proposed change goes through:
|
|
14
14
|
1. **3 independent LLM judges** (Task Completion, Correction Alignment, Side-Effect Check)
|
|
15
|
-
2. **Regression testing** against
|
|
15
|
+
2. **Regression testing** against skill tests
|
|
16
16
|
3. **3-day canary deployment** with auto-rollback
|
|
17
17
|
|
|
18
18
|
## Prerequisites
|
|
@@ -57,7 +57,7 @@ npm link
|
|
|
57
57
|
## Quick Start
|
|
58
58
|
|
|
59
59
|
```bash
|
|
60
|
-
# 1. Initialize — imports existing skills + generates
|
|
60
|
+
# 1. Initialize — imports existing skills + generates skill tests
|
|
61
61
|
helixevo init
|
|
62
62
|
|
|
63
63
|
# 2. Capture failures from a session
|
|
@@ -80,7 +80,7 @@ helixevo dashboard
|
|
|
80
80
|
| `helixevo watch` | Always-on learning: auto-capture + auto-evolve |
|
|
81
81
|
| `helixevo metrics` | Correction rates, skill trends, evolution impact |
|
|
82
82
|
| `helixevo health` | Network health: cohesion, coverage, balance, transfer |
|
|
83
|
-
| `helixevo init` | Import existing skills + generate
|
|
83
|
+
| `helixevo init` | Import existing skills + generate skill tests |
|
|
84
84
|
| `helixevo capture <session>` | Extract failures from a session file |
|
|
85
85
|
| `helixevo evolve` | Evolve skills from captured failures |
|
|
86
86
|
| `helixevo generalize` | Promote cross-project patterns ↑ |
|
|
@@ -126,7 +126,7 @@ All data is stored in `~/.helix/`:
|
|
|
126
126
|
├── failures.jsonl # Captured failures
|
|
127
127
|
├── frontier.json # Pareto frontier (top-k configurations)
|
|
128
128
|
├── evolution-history.json # All evolution runs + proposals
|
|
129
|
-
├──
|
|
129
|
+
├── skill-tests.jsonl # Regression test cases
|
|
130
130
|
├── skill-graph.json # Cached network (nodes + edges)
|
|
131
131
|
├── canary-registry.json # Active canary deployments
|
|
132
132
|
├── knowledge-buffer.json # Research discoveries + drafts
|
|
@@ -122,7 +122,7 @@ function analyzeNetworkAdaptation(
|
|
|
122
122
|
})
|
|
123
123
|
result.suggestions.push({
|
|
124
124
|
type: 'rewire',
|
|
125
|
-
description: `Review
|
|
125
|
+
description: `Review skill tests for [${partners.join(', ')}] after this change`
|
|
126
126
|
})
|
|
127
127
|
}
|
|
128
128
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
'use client'
|
|
2
2
|
|
|
3
|
-
import { useState } from 'react'
|
|
3
|
+
import { useState, useEffect, useRef } from 'react'
|
|
4
4
|
|
|
5
5
|
// ─── Table of Contents ──────────────────────────────────────────
|
|
6
6
|
const TOC = [
|
|
@@ -148,7 +148,7 @@ function ArchitectureDiagram() {
|
|
|
148
148
|
<div className="guide-diagram-box guide-diagram-check" style={{ direction: 'ltr' }}>
|
|
149
149
|
<div className="guide-diagram-box-label">Validate</div>
|
|
150
150
|
<div className="guide-diagram-box-title">Regression Tests</div>
|
|
151
|
-
<div className="guide-diagram-box-desc">
|
|
151
|
+
<div className="guide-diagram-box-desc">Skill tests + cross-skill</div>
|
|
152
152
|
</div>
|
|
153
153
|
<div className="guide-diagram-arrow" style={{ direction: 'ltr' }}>←</div>
|
|
154
154
|
<div className="guide-diagram-box guide-diagram-judge" style={{ direction: 'ltr' }}>
|
|
@@ -195,6 +195,30 @@ function HierarchyDiagram() {
|
|
|
195
195
|
// ─── Main Guide Page ────────────────────────────────────────────
|
|
196
196
|
export default function GuidePage() {
|
|
197
197
|
const [activeSection, setActiveSection] = useState('overview')
|
|
198
|
+
const contentRef = useRef<HTMLDivElement>(null)
|
|
199
|
+
|
|
200
|
+
useEffect(() => {
|
|
201
|
+
const sectionIds = TOC.map(t => t.id)
|
|
202
|
+
const observer = new IntersectionObserver(
|
|
203
|
+
(entries) => {
|
|
204
|
+
// Find the most visible section
|
|
205
|
+
const visible = entries
|
|
206
|
+
.filter(e => e.isIntersecting)
|
|
207
|
+
.sort((a, b) => b.intersectionRatio - a.intersectionRatio)
|
|
208
|
+
if (visible.length > 0) {
|
|
209
|
+
setActiveSection(visible[0].target.id)
|
|
210
|
+
}
|
|
211
|
+
},
|
|
212
|
+
{ rootMargin: '-80px 0px -60% 0px', threshold: [0, 0.25, 0.5] }
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
for (const id of sectionIds) {
|
|
216
|
+
const el = document.getElementById(id)
|
|
217
|
+
if (el) observer.observe(el)
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
return () => observer.disconnect()
|
|
221
|
+
}, [])
|
|
198
222
|
|
|
199
223
|
return (
|
|
200
224
|
<div className="guide-layout">
|
|
@@ -202,7 +226,7 @@ export default function GuidePage() {
|
|
|
202
226
|
<nav className="guide-toc">
|
|
203
227
|
<div className="guide-toc-header">
|
|
204
228
|
<div className="guide-toc-title">Documentation</div>
|
|
205
|
-
<div className="guide-toc-version">v0.2.
|
|
229
|
+
<div className="guide-toc-version">v0.2.6</div>
|
|
206
230
|
</div>
|
|
207
231
|
{TOC.map(item => (
|
|
208
232
|
<a
|
|
@@ -222,7 +246,7 @@ export default function GuidePage() {
|
|
|
222
246
|
{/* Hero */}
|
|
223
247
|
<div className="guide-hero">
|
|
224
248
|
<div className="guide-hero-badge">Documentation</div>
|
|
225
|
-
<h1 className="guide-hero-title">
|
|
249
|
+
<h1 className="guide-hero-title">HelixEvo Guide</h1>
|
|
226
250
|
<p className="guide-hero-desc">
|
|
227
251
|
A comprehensive guide to the self-evolving skill ecosystem for AI agents.
|
|
228
252
|
Capture failures, evolve skills through multi-judge evaluation, and maintain
|
|
@@ -293,7 +317,7 @@ cd helixevo && npm install && npm run build && npm link`}</Code>
|
|
|
293
317
|
<Code title="Terminal">{`helixevo init`}</Code>
|
|
294
318
|
<p className="guide-text-sm">
|
|
295
319
|
This scans your existing SKILL.md files (from <code>~/.agents/skills/</code>), imports them into HelixEvo,
|
|
296
|
-
and generates
|
|
320
|
+
and generates skill tests for each skill. It also creates the data directory at <code>~/.helix/</code>.
|
|
297
321
|
</p>
|
|
298
322
|
</Step>
|
|
299
323
|
|
|
@@ -346,7 +370,7 @@ helixevo status`}</Code>
|
|
|
346
370
|
},
|
|
347
371
|
{
|
|
348
372
|
cmd: 'helixevo init',
|
|
349
|
-
desc: 'Import existing skills and generate
|
|
373
|
+
desc: 'Import existing skills and generate skill tests. Scans ~/.agents/skills/ and creates the HelixEvo data directory.',
|
|
350
374
|
flags: ['--verbose'],
|
|
351
375
|
},
|
|
352
376
|
{
|
|
@@ -521,7 +545,7 @@ Project B: "Use FlashList not FlatList" (React Native perf)
|
|
|
521
545
|
→ Abstract skill created: "react-native-performance" (domain layer)
|
|
522
546
|
→ Project A skill inherits from it
|
|
523
547
|
→ Project B skill inherits from it
|
|
524
|
-
→ Domain skill tested against all
|
|
548
|
+
→ Domain skill tested against all skill tests
|
|
525
549
|
→ Deployed if regression passes`}</Code>
|
|
526
550
|
<Callout type="tip">
|
|
527
551
|
Auto-generalization is the key to the <strong>double helix</strong> metaphor: as projects evolve, skills
|
|
@@ -577,7 +601,7 @@ Project B: "Use FlashList not FlatList" (React Native perf)
|
|
|
577
601
|
<div className="guide-pipeline-connector" />
|
|
578
602
|
<PipelineStep
|
|
579
603
|
icon="5" title="Regression Testing" color="var(--red)"
|
|
580
|
-
desc="The modified skill is tested against all
|
|
604
|
+
desc="The modified skill is tested against all skill tests for that skill AND co-evolved partner skills. Must maintain ≥95% pass rate."
|
|
581
605
|
/>
|
|
582
606
|
<div className="guide-pipeline-connector" />
|
|
583
607
|
<PipelineStep
|
|
@@ -669,29 +693,29 @@ Project B: "Use FlashList not FlatList" (React Native perf)
|
|
|
669
693
|
</Section>
|
|
670
694
|
|
|
671
695
|
{/* ─── Regression Testing ─── */}
|
|
672
|
-
<Section id="regression" title="Regression Testing" subtitle="
|
|
673
|
-
<h3 className="guide-h3">
|
|
696
|
+
<Section id="regression" title="Regression Testing" subtitle="Skill tests and cross-skill validation ensure quality.">
|
|
697
|
+
<h3 className="guide-h3">Skill Tests</h3>
|
|
674
698
|
<p className="guide-text">
|
|
675
|
-
|
|
699
|
+
Skill tests are regression test scenarios tied to specific skills. They're created when:
|
|
676
700
|
</p>
|
|
677
701
|
<ul className="guide-list">
|
|
678
702
|
<li><strong>Init:</strong> Automatically generated from existing SKILL.md files during <code>helixevo init</code></li>
|
|
679
|
-
<li><strong>Evolution:</strong> When a failure is resolved, the scenario is promoted to a
|
|
703
|
+
<li><strong>Evolution:</strong> When a failure is resolved, the scenario is promoted to a skill test</li>
|
|
680
704
|
</ul>
|
|
681
705
|
<p className="guide-text">
|
|
682
|
-
Each
|
|
706
|
+
Each skill test stores the input, context, and expected behavior. During regression testing,
|
|
683
707
|
an LLM judge evaluates whether the modified skill would still handle each scenario correctly.
|
|
684
708
|
</p>
|
|
685
709
|
|
|
686
710
|
<h3 className="guide-h3">Cross-Skill Regression</h3>
|
|
687
711
|
<p className="guide-text">
|
|
688
|
-
When skill A is modified, HelixEvo also tests
|
|
712
|
+
When skill A is modified, HelixEvo also tests skill tests from co-evolved, dependent, and enhancing
|
|
689
713
|
partner skills. This catches silent incompatibilities where changing one skill breaks a related skill's behavior.
|
|
690
714
|
</p>
|
|
691
715
|
<Code title="How it works">{`Skill A evolves
|
|
692
716
|
→ Load skill graph edges
|
|
693
717
|
→ Find partners (co-evolves, depends, enhances)
|
|
694
|
-
→ Test partner
|
|
718
|
+
→ Test partner skill tests against Skill A's changes
|
|
695
719
|
→ Block if partner pass rate < 95%`}</Code>
|
|
696
720
|
</Section>
|
|
697
721
|
|
|
@@ -772,10 +796,10 @@ generation: 3
|
|
|
772
796
|
<div className="guide-params">
|
|
773
797
|
<Param name="quality.judgePassScore" type="number" desc="Minimum judge score to pass (1-10)." def="7" />
|
|
774
798
|
<Param name="quality.judgeConsensusMin" type="number" desc="Minimum judges that must pass." def="2" />
|
|
775
|
-
<Param name="quality.regressionPassRate" type="number" desc="Minimum
|
|
799
|
+
<Param name="quality.regressionPassRate" type="number" desc="Minimum skill test pass rate (0-1)." def="0.95" />
|
|
776
800
|
<Param name="quality.canaryDurationDays" type="number" desc="Days to monitor canary deployments." def="3" />
|
|
777
801
|
<Param name="quality.autoRollbackThreshold" type="number" desc="Failure rate multiplier triggering rollback." def="1.5" />
|
|
778
|
-
<Param name="quality.
|
|
802
|
+
<Param name="quality.maxSkillTests" type="number" desc="Maximum skill tests per skill." def="50" />
|
|
779
803
|
</div>
|
|
780
804
|
|
|
781
805
|
<Code title="~/.helix/config.json">{`{
|
|
@@ -795,7 +819,7 @@ generation: 3
|
|
|
795
819
|
"regressionPassRate": 0.95,
|
|
796
820
|
"canaryDurationDays": 3,
|
|
797
821
|
"autoRollbackThreshold": 1.5,
|
|
798
|
-
"
|
|
822
|
+
"maxSkillTests": 50
|
|
799
823
|
}
|
|
800
824
|
}`}</Code>
|
|
801
825
|
</Section>
|
|
@@ -807,7 +831,7 @@ generation: 3
|
|
|
807
831
|
├── failures.jsonl # Captured failure records (append-only)
|
|
808
832
|
├── frontier.json # Pareto frontier (top-K programs)
|
|
809
833
|
├── evolution-history.json # All evolution iterations + proposals
|
|
810
|
-
├──
|
|
834
|
+
├── skill-tests.jsonl # Regression test cases (append-only)
|
|
811
835
|
├── skill-graph.json # Cached network (nodes + edges)
|
|
812
836
|
├── canary-registry.json # Active canary deployments
|
|
813
837
|
├── knowledge-buffer.json # Research discoveries + drafts
|
|
@@ -834,7 +858,7 @@ generation: 3
|
|
|
834
858
|
}`}</Code>
|
|
835
859
|
</div>
|
|
836
860
|
<div className="guide-data-card">
|
|
837
|
-
<div className="guide-data-title">
|
|
861
|
+
<div className="guide-data-title">Skill Test</div>
|
|
838
862
|
<Code>{`{
|
|
839
863
|
"id": "gc_react_42",
|
|
840
864
|
"skill": "react-patterns",
|
|
@@ -925,7 +949,7 @@ generation: 3
|
|
|
925
949
|
</FAQItem>
|
|
926
950
|
<FAQItem q="How does cross-skill regression work?">
|
|
927
951
|
When Skill A evolves, HelixEvo checks the skill graph for co-evolved, dependent, and enhancing
|
|
928
|
-
partners. It tests their
|
|
952
|
+
partners. It tests their skill tests against Skill A's changes. If partner pass rate drops below 95%,
|
|
929
953
|
the proposal is rejected.
|
|
930
954
|
</FAQItem>
|
|
931
955
|
<FAQItem q="How does the knowledge buffer work?">
|
|
@@ -958,7 +982,7 @@ generation: 3
|
|
|
958
982
|
{/* Footer */}
|
|
959
983
|
<div className="guide-footer">
|
|
960
984
|
<div className="guide-footer-content">
|
|
961
|
-
<div style={{ fontSize: 13, fontWeight: 600 }}>HelixEvo v0.2.
|
|
985
|
+
<div style={{ fontSize: 13, fontWeight: 600 }}>HelixEvo v0.2.6</div>
|
|
962
986
|
<div style={{ fontSize: 12, color: 'var(--text-dim)', marginTop: 4 }}>
|
|
963
987
|
Self-evolving skill ecosystem for AI agents · MIT License
|
|
964
988
|
</div>
|
package/dashboard/lib/data.ts
CHANGED
|
@@ -89,7 +89,9 @@ export function loadHistory(): { iterations: Iteration[] } {
|
|
|
89
89
|
return readJson<{ iterations: Iteration[] }>('evolution-history.json', { iterations: [] })
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
export function
|
|
92
|
+
export function loadSkillTests(): { id: string; skill: string; input: string }[] {
|
|
93
|
+
const newFile = readJsonl('skill-tests.jsonl')
|
|
94
|
+
if (newFile.length > 0) return newFile
|
|
93
95
|
return readJsonl('golden-cases.jsonl')
|
|
94
96
|
}
|
|
95
97
|
|
|
@@ -126,7 +128,7 @@ export function getDashboardSummary() {
|
|
|
126
128
|
const history = loadHistory()
|
|
127
129
|
const buffer = loadBuffer()
|
|
128
130
|
const canaries = loadCanaries()
|
|
129
|
-
const
|
|
131
|
+
const skillTests = loadSkillTests()
|
|
130
132
|
|
|
131
133
|
const evolved = graph.nodes.filter(n => n.generation > 0)
|
|
132
134
|
const totalProposals = history.iterations.flatMap(i => i.proposals)
|
|
@@ -141,6 +143,6 @@ export function getDashboardSummary() {
|
|
|
141
143
|
evolution: { runs: history.iterations.length, accepted: accepted.length, rejected: rejected.length },
|
|
142
144
|
buffer: { discoveries: buffer.discoveries.length, drafts: buffer.drafts.length },
|
|
143
145
|
canaries: canaries.entries.length,
|
|
144
|
-
|
|
146
|
+
skillTests: skillTests.length,
|
|
145
147
|
}
|
|
146
148
|
}
|
package/dist/cli.js
CHANGED
|
@@ -2129,7 +2129,7 @@ var init_config = __esm(() => {
|
|
|
2129
2129
|
regressionPassRate: 0.95,
|
|
2130
2130
|
canaryDurationDays: 3,
|
|
2131
2131
|
autoRollbackThreshold: 1.5,
|
|
2132
|
-
|
|
2132
|
+
maxSkillTests: 50
|
|
2133
2133
|
},
|
|
2134
2134
|
reporting: {
|
|
2135
2135
|
schedule: "0 8 * * *",
|
|
@@ -9226,11 +9226,14 @@ function loadHistory() {
|
|
|
9226
9226
|
function saveHistory(history) {
|
|
9227
9227
|
writeJson("evolution-history.json", history);
|
|
9228
9228
|
}
|
|
9229
|
-
function
|
|
9229
|
+
function loadSkillTests() {
|
|
9230
|
+
const newFile = readJsonl("skill-tests.jsonl");
|
|
9231
|
+
if (newFile.length > 0)
|
|
9232
|
+
return newFile;
|
|
9230
9233
|
return readJsonl("golden-cases.jsonl");
|
|
9231
9234
|
}
|
|
9232
|
-
function
|
|
9233
|
-
appendJsonl("
|
|
9235
|
+
function appendSkillTest(gc) {
|
|
9236
|
+
appendJsonl("skill-tests.jsonl", gc);
|
|
9234
9237
|
}
|
|
9235
9238
|
function loadSkillGraph() {
|
|
9236
9239
|
return readJson("skill-graph.json", {
|
|
@@ -9577,8 +9580,8 @@ import { join as join3 } from "node:path";
|
|
|
9577
9580
|
import { homedir as homedir2 } from "node:os";
|
|
9578
9581
|
import { existsSync as existsSync4, cpSync } from "node:fs";
|
|
9579
9582
|
|
|
9580
|
-
// src/prompts/
|
|
9581
|
-
function
|
|
9583
|
+
// src/prompts/test-gen.ts
|
|
9584
|
+
function buildTestGenPrompt(skill) {
|
|
9582
9585
|
return `Read this skill and generate 3 typical usage scenarios where the skill should guide correct behavior.
|
|
9583
9586
|
|
|
9584
9587
|
## Skill: ${skill.meta.name}
|
|
@@ -9650,13 +9653,13 @@ async function initCommand(options) {
|
|
|
9650
9653
|
console.log(`
|
|
9651
9654
|
Imported ${imported} new skills
|
|
9652
9655
|
`);
|
|
9653
|
-
if (!options.
|
|
9656
|
+
if (!options.skipTests) {
|
|
9654
9657
|
const generalSkills = loadAllGeneralSkills();
|
|
9655
|
-
console.log(` Generating
|
|
9658
|
+
console.log(` Generating skill tests...
|
|
9656
9659
|
`);
|
|
9657
9660
|
for (const skill of generalSkills) {
|
|
9658
9661
|
try {
|
|
9659
|
-
const prompt =
|
|
9662
|
+
const prompt = buildTestGenPrompt(skill);
|
|
9660
9663
|
const output = await chatJson({ prompt });
|
|
9661
9664
|
for (const c of output.cases) {
|
|
9662
9665
|
const gc = {
|
|
@@ -9671,11 +9674,11 @@ async function initCommand(options) {
|
|
|
9671
9674
|
lastResult: "pass",
|
|
9672
9675
|
consecutivePasses: 1
|
|
9673
9676
|
};
|
|
9674
|
-
|
|
9677
|
+
appendSkillTest(gc);
|
|
9675
9678
|
}
|
|
9676
|
-
console.log(` ✓ ${skill.slug}: ${output.cases.length}
|
|
9679
|
+
console.log(` ✓ ${skill.slug}: ${output.cases.length} skill tests`);
|
|
9677
9680
|
} catch (err) {
|
|
9678
|
-
console.log(` ✗ ${skill.slug}: failed to generate
|
|
9681
|
+
console.log(` ✗ ${skill.slug}: failed to generate skill tests (${err})`);
|
|
9679
9682
|
}
|
|
9680
9683
|
}
|
|
9681
9684
|
}
|
|
@@ -9989,11 +9992,11 @@ init_config();
|
|
|
9989
9992
|
init_llm();
|
|
9990
9993
|
async function runRegression(skillSlug, newSkillContent, verbose = false) {
|
|
9991
9994
|
const config = loadConfig();
|
|
9992
|
-
const allCases =
|
|
9995
|
+
const allCases = loadSkillTests();
|
|
9993
9996
|
const cases = allCases.filter((gc) => gc.skill === skillSlug);
|
|
9994
9997
|
if (cases.length === 0) {
|
|
9995
9998
|
if (verbose)
|
|
9996
|
-
console.log(` No
|
|
9999
|
+
console.log(` No skill tests for ${skillSlug}, skipping regression`);
|
|
9997
10000
|
return { total: 0, passed: 0, passRate: 1, failures: [] };
|
|
9998
10001
|
}
|
|
9999
10002
|
const failures = [];
|
|
@@ -10020,7 +10023,7 @@ async function runRegression(skillSlug, newSkillContent, verbose = false) {
|
|
|
10020
10023
|
failures
|
|
10021
10024
|
};
|
|
10022
10025
|
}
|
|
10023
|
-
function
|
|
10026
|
+
function promoteToSkillTest(failure, skillSlug, replayResult) {
|
|
10024
10027
|
const gc = {
|
|
10025
10028
|
id: `gc_${skillSlug}_${Date.now() % 1e5}`,
|
|
10026
10029
|
addedAt: new Date().toISOString(),
|
|
@@ -10033,7 +10036,7 @@ function promoteToGoldenCase(failure, skillSlug, replayResult) {
|
|
|
10033
10036
|
lastResult: "pass",
|
|
10034
10037
|
consecutivePasses: 1
|
|
10035
10038
|
};
|
|
10036
|
-
|
|
10039
|
+
appendSkillTest(gc);
|
|
10037
10040
|
}
|
|
10038
10041
|
function buildRegressionJudgePrompt(gc, skillContent) {
|
|
10039
10042
|
return `You are a regression test judge. Determine if a modified skill can still handle this scenario correctly.
|
|
@@ -10072,7 +10075,7 @@ async function runCrossSkillRegression(skillSlug, newSkillContent, verbose = fal
|
|
|
10072
10075
|
return { total: 0, passed: 0, passRate: 1, testedSkills: [] };
|
|
10073
10076
|
}
|
|
10074
10077
|
const config = loadConfig();
|
|
10075
|
-
const allCases =
|
|
10078
|
+
const allCases = loadSkillTests();
|
|
10076
10079
|
const partnerCases = allCases.filter((gc) => partners.includes(gc.skill));
|
|
10077
10080
|
if (partnerCases.length === 0) {
|
|
10078
10081
|
return { total: 0, passed: 0, passRate: 1, testedSkills: partners };
|
|
@@ -10496,7 +10499,7 @@ async function evolveCommand(options) {
|
|
|
10496
10499
|
const skillFailureCount = allFailures.filter((f) => f.skillsActive.includes(skillSlug2)).length;
|
|
10497
10500
|
deployCanary(skillSlug2, `v${generation}`, backupPath, config.quality.canaryDurationDays, skillFailureCount);
|
|
10498
10501
|
console.log(` \uD83D\uDC25 Canary deployed: ${config.quality.canaryDurationDays} day monitoring period`);
|
|
10499
|
-
|
|
10502
|
+
promoteToSkillTest(testFailure, skillSlug2, replayResult);
|
|
10500
10503
|
const program2 = {
|
|
10501
10504
|
id: `gen${generation}-${skillSlug2}`,
|
|
10502
10505
|
generation,
|
|
@@ -10703,7 +10706,7 @@ async function statusCommand() {
|
|
|
10703
10706
|
const unresolved = failures.filter((f) => !f.resolved);
|
|
10704
10707
|
const frontier = loadFrontier();
|
|
10705
10708
|
const skills = loadAllGeneralSkills();
|
|
10706
|
-
const
|
|
10709
|
+
const skillTests = loadSkillTests();
|
|
10707
10710
|
const stagnation = getStagnationCount();
|
|
10708
10711
|
const recentIter = getRecentIterations(7);
|
|
10709
10712
|
console.log(`\uD83E\uDDEC HelixEvo Status
|
|
@@ -10722,7 +10725,7 @@ async function statusCommand() {
|
|
|
10722
10725
|
}
|
|
10723
10726
|
console.log(`
|
|
10724
10727
|
Failures: ${unresolved.length} unresolved / ${failures.length} total`);
|
|
10725
|
-
console.log(`
|
|
10728
|
+
console.log(` Skill tests: ${skillTests.length}`);
|
|
10726
10729
|
const buffer = getBufferStats();
|
|
10727
10730
|
console.log(`
|
|
10728
10731
|
Knowledge Buffer:`);
|
|
@@ -12924,12 +12927,12 @@ async function metricsCommand(options) {
|
|
|
12924
12927
|
|
|
12925
12928
|
// src/cli.ts
|
|
12926
12929
|
var program2 = new Command;
|
|
12927
|
-
program2.name("helixevo").description("Self-evolving skill ecosystem for AI agents").version("0.2.
|
|
12930
|
+
program2.name("helixevo").description("Self-evolving skill ecosystem for AI agents").version("0.2.8").addHelpText("after", `
|
|
12928
12931
|
Examples:
|
|
12929
12932
|
$ helixevo watch Always-on learning (auto-capture + auto-evolve)
|
|
12930
12933
|
$ helixevo watch --project myapp Watch with project context
|
|
12931
12934
|
$ helixevo metrics Show correction rates and evolution impact
|
|
12932
|
-
$ helixevo init Import skills + generate
|
|
12935
|
+
$ helixevo init Import skills + generate skill tests
|
|
12933
12936
|
$ helixevo status Show system health
|
|
12934
12937
|
$ helixevo evolve --verbose Evolve skills from failures
|
|
12935
12938
|
$ helixevo evolve --dry-run Preview proposals without applying
|
|
@@ -12943,7 +12946,7 @@ Examples:
|
|
|
12943
12946
|
$ helixevo graph --optimize Detect merge/split/conflicts
|
|
12944
12947
|
$ helixevo report --days 7 Weekly evolution report
|
|
12945
12948
|
$ helixevo capture <session.json> Extract failures from session`);
|
|
12946
|
-
program2.command("init").description("Import existing skills + generate
|
|
12949
|
+
program2.command("init").description("Import existing skills + generate skill tests").option("--skills-paths <paths...>", "Paths to scan for existing skills").option("--skip-tests", "Skip skill test generation").action(initCommand);
|
|
12947
12950
|
program2.command("capture").description("Capture failures from a Craft Agent session").argument("<sessionPath>", "Path to session conversation file").option("--project <name>", "Project name override").action(captureCommand);
|
|
12948
12951
|
program2.command("evolve").description("Evolve skills from failures [--dry-run] [--verbose] [--max-proposals <n>]").option("--dry-run", "Show proposals without applying").option("--verbose", "Show detailed LLM interactions").option("--max-proposals <n>", "Max proposals per run", "5").action(evolveCommand);
|
|
12949
12952
|
program2.command("generalize").description("Promote cross-skill patterns to higher layer ↑ [--dry-run] [--verbose]").option("--dry-run", "Show candidates without applying").option("--verbose", "Show detailed analysis").action(generalizeCommand);
|
package/package.json
CHANGED