synergyspec-selfevolving 2.1.5 → 2.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/learn.js +80 -24
- package/dist/commands/self-evolution-dream.d.ts +15 -1
- package/dist/commands/self-evolution-dream.js +111 -6
- package/dist/commands/self-evolution-episode.d.ts +3 -0
- package/dist/commands/self-evolution-episode.js +157 -108
- package/dist/commands/workflow/status.js +4 -0
- package/dist/core/archive.js +17 -9
- package/dist/core/change-readiness.d.ts +16 -1
- package/dist/core/change-readiness.js +441 -15
- package/dist/core/fitness/loss.d.ts +3 -5
- package/dist/core/fitness/loss.js +2 -2
- package/dist/core/fitness/test-metrics.d.ts +1 -0
- package/dist/core/fitness/test-metrics.js +49 -0
- package/dist/core/learn.js +129 -11
- package/dist/core/migration.d.ts +6 -14
- package/dist/core/migration.js +63 -21
- package/dist/core/runner-evidence.d.ts +53 -0
- package/dist/core/runner-evidence.js +613 -0
- package/dist/core/self-evolution/candidates.js +0 -2
- package/dist/core/self-evolution/dream.d.ts +57 -3
- package/dist/core/self-evolution/dream.js +480 -9
- package/dist/core/self-evolution/episode-orchestrator.d.ts +2 -0
- package/dist/core/self-evolution/episode-orchestrator.js +17 -5
- package/dist/core/self-evolution/episode-store.d.ts +5 -0
- package/dist/core/self-evolution/episode-store.js +6 -2
- package/dist/core/self-evolution/evolving-agent.js +8 -0
- package/dist/core/self-evolution/host-harness.d.ts +35 -12
- package/dist/core/self-evolution/host-harness.js +188 -49
- package/dist/core/self-evolution/reward-aggregator.js +2 -2
- package/dist/core/templates/workflows/archive-change.js +18 -18
- package/dist/core/templates/workflows/dream.js +57 -47
- package/dist/core/templates/workflows/learn.js +7 -5
- package/dist/core/templates/workflows/run-tests.js +48 -29
- package/dist/core/templates/workflows/self-evolving.js +11 -8
- package/dist/core/trajectory/facts.d.ts +1 -1
- package/dist/core/trajectory/registry.js +39 -8
- package/package.json +1 -1
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { resolveAgentTimeoutMs, runHeadlessAgent, } from './host-harness.js';
|
|
2
2
|
import { enrichGradientWithDeepRead, DEFAULT_DEEP_READ_CONFIG, } from './reward-deepread.js';
|
|
3
3
|
import { loadRewardScoringContext, scoreOnce, deriveSingleSampleVerdict, buildAnchors, computeJudgeVerifierDivergence, formatJudgeVerifierDivergenceFlag, JUDGE_VERIFIER_DIVERGENCE_FLAG_PREFIX, } from './reward-agent.js';
|
|
4
4
|
import { writeDiagnosis, advanceEpisodeStage, } from './episode-store.js';
|
|
@@ -102,7 +102,7 @@ export async function runRewardAgentEnsemble(opts) {
|
|
|
102
102
|
cwd: repoRoot,
|
|
103
103
|
spawn: opts.spawn,
|
|
104
104
|
binaryOverride: opts.binary,
|
|
105
|
-
timeoutMs: opts.timeoutMs ??
|
|
105
|
+
timeoutMs: opts.timeoutMs ?? resolveAgentTimeoutMs(opts.harness),
|
|
106
106
|
harness: opts.harness,
|
|
107
107
|
});
|
|
108
108
|
if (r.exitCode === 0 && r.stdout.length > 0)
|
|
@@ -121,12 +121,12 @@ export function getArchiveChangeSkillTemplate() {
|
|
|
121
121
|
If \`evolution.status\` is \`error\`, surface the defect from status and still warn that the durable report is missing.
|
|
122
122
|
If \`evolution.status\` is \`not-run\`, learn has not run or left evidence.
|
|
123
123
|
|
|
124
|
-
If either is missing,
|
|
125
|
-
- Missing verification evidence: suggest \`/synspec:verify <name>\`
|
|
126
|
-
- Missing learn evidence: suggest \`/synspec:learn <name>\`
|
|
127
|
-
-
|
|
128
|
-
|
|
129
|
-
4d. **Final workspace/package identity check**
|
|
124
|
+
If either is missing, stop before archiving:
|
|
125
|
+
- Missing verification evidence: suggest \`/synspec:verify <name>\`
|
|
126
|
+
- Missing learn evidence: suggest \`/synspec:learn <name>\`
|
|
127
|
+
- Do not archive until the missing evidence exists and readiness passes
|
|
128
|
+
|
|
129
|
+
4d. **Final workspace/package identity check**
|
|
130
130
|
|
|
131
131
|
Before archiving, validate that verification evidence still describes the
|
|
132
132
|
current workspace:
|
|
@@ -169,7 +169,7 @@ export function getArchiveChangeSkillTemplate() {
|
|
|
169
169
|
- Whether specs were synced (if applicable)
|
|
170
170
|
- Blast radius triage results (e.g. "2 specs synced, 1 marked for review" or "No blast radius")
|
|
171
171
|
- Verify/learn evidence status
|
|
172
|
-
- Note about any warnings (incomplete artifacts/tasks
|
|
172
|
+
- Note about any warnings (incomplete artifacts/tasks) and any hard evidence blockers found
|
|
173
173
|
|
|
174
174
|
**Output On Success**
|
|
175
175
|
|
|
@@ -188,12 +188,12 @@ All artifacts complete. All tasks complete.
|
|
|
188
188
|
**Guardrails**
|
|
189
189
|
- Always prompt for change selection if not provided
|
|
190
190
|
- Use artifact graph (synergyspec-selfevolving status --json) for completion checking
|
|
191
|
-
-
|
|
191
|
+
- Do not block archive on force-bypassable warnings, such as incomplete artifacts/tasks when the user explicitly forces them; do block on missing verification evidence, missing learn evidence, invalid workspace identity, or incomplete evolution.
|
|
192
192
|
- Preserve .synergyspec-selfevolving.yaml when moving to archive (it moves with the directory)
|
|
193
193
|
- Show clear summary of what happened
|
|
194
194
|
- If sync is requested, use synergyspec-selfevolving-sync-specs approach (agent-driven)
|
|
195
195
|
- If delta specs exist, always run the sync assessment and show the combined summary before prompting
|
|
196
|
-
- If verification or learn evidence is missing,
|
|
196
|
+
- If verification or learn evidence is missing, stop before archiving; do not silently skip those workflow stages
|
|
197
197
|
- If spec-blast-radius.md does not exist, skip step 4b silently (no warning needed)`,
|
|
198
198
|
license: 'MIT',
|
|
199
199
|
compatibility: 'Requires synergyspec-selfevolving CLI.',
|
|
@@ -325,12 +325,12 @@ export function getOpsxArchiveCommandTemplate() {
|
|
|
325
325
|
If \`evolution.status\` is \`error\`, surface the defect from status and still warn that the durable report is missing.
|
|
326
326
|
If \`evolution.status\` is \`not-run\`, learn has not run or left evidence.
|
|
327
327
|
|
|
328
|
-
If either is missing,
|
|
329
|
-
- Missing verification evidence: suggest \`/synspec:verify <name>\`
|
|
330
|
-
- Missing learn evidence: suggest \`/synspec:learn <name>\`
|
|
331
|
-
-
|
|
332
|
-
|
|
333
|
-
5. **Perform the archive**
|
|
328
|
+
If either is missing, stop before archiving:
|
|
329
|
+
- Missing verification evidence: suggest \`/synspec:verify <name>\`
|
|
330
|
+
- Missing learn evidence: suggest \`/synspec:learn <name>\`
|
|
331
|
+
- Do not archive until the missing evidence exists and readiness passes
|
|
332
|
+
|
|
333
|
+
5. **Perform the archive**
|
|
334
334
|
|
|
335
335
|
Create the archive directory if it doesn't exist:
|
|
336
336
|
\`\`\`bash
|
|
@@ -356,7 +356,7 @@ export function getOpsxArchiveCommandTemplate() {
|
|
|
356
356
|
- Spec sync status (synced / sync skipped / no delta specs)
|
|
357
357
|
- Blast radius triage results (e.g. "2 specs synced, 1 marked for review" or "No blast radius")
|
|
358
358
|
- Verify/learn evidence status
|
|
359
|
-
- Note about any warnings (incomplete artifacts/tasks
|
|
359
|
+
- Note about any warnings (incomplete artifacts/tasks) and any hard evidence blockers found
|
|
360
360
|
|
|
361
361
|
**Output On Success**
|
|
362
362
|
|
|
@@ -424,12 +424,12 @@ Target archive directory already exists.
|
|
|
424
424
|
**Guardrails**
|
|
425
425
|
- Always prompt for change selection if not provided
|
|
426
426
|
- Use artifact graph (synergyspec-selfevolving status --json) for completion checking
|
|
427
|
-
-
|
|
427
|
+
- Do not block archive on force-bypassable warnings, such as incomplete artifacts/tasks when the user explicitly forces them; do block on missing verification evidence, missing learn evidence, invalid workspace identity, or incomplete evolution.
|
|
428
428
|
- Preserve .synergyspec-selfevolving.yaml when moving to archive (it moves with the directory)
|
|
429
429
|
- Show clear summary of what happened
|
|
430
430
|
- If sync is requested, use the Skill tool to invoke \`synergyspec-selfevolving-sync-specs\` (agent-driven)
|
|
431
431
|
- If delta specs exist, always run the sync assessment and show the combined summary before prompting
|
|
432
|
-
- If verification or learn evidence is missing,
|
|
432
|
+
- If verification or learn evidence is missing, stop before archiving; do not silently skip those workflow stages
|
|
433
433
|
- If spec-blast-radius.md does not exist, skip step 4b silently (no warning needed)`
|
|
434
434
|
};
|
|
435
435
|
}
|
|
@@ -3,32 +3,35 @@ const INSTRUCTIONS_BODY = `**Input**: Optionally specify a Dream mode and flags
|
|
|
3
3
|
Accepted forms:
|
|
4
4
|
|
|
5
5
|
\`\`\`text
|
|
6
|
-
/synspec:dream
|
|
7
|
-
/synspec:dream preview [--target <id>] [--limit <n>] [--json]
|
|
8
|
-
/synspec:dream run [--target <id>] [--limit <n>] [--json]
|
|
9
|
-
/synspec:dream show [runId] [--json]
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
6
|
+
/synspec:dream
|
|
7
|
+
/synspec:dream preview [--target <id>] [--limit <n>] [--json]
|
|
8
|
+
/synspec:dream run [--target <id>] [--limit <n>] [--apply --yes] [--json]
|
|
9
|
+
/synspec:dream show [runId] [--json]
|
|
10
|
+
/synspec:dream policy-update <candidateId> --accepted-by <name> --yes [--json]
|
|
11
|
+
\`\`\`
|
|
12
|
+
|
|
13
|
+
Bare \`/synspec:dream\` means \`preview\`. Preview is read-only. Plain \`run\` writes only Dream artifacts. \`run --apply --yes\` and \`policy-update ... --yes\` are explicit policy-update entrances for already accepted Dream candidates.
|
|
13
14
|
|
|
14
15
|
**Purpose**
|
|
15
16
|
|
|
16
|
-
This is the SS agent-harness entrance for offline Supervised Learning Dream. The user should trigger Dream from the code-agent chat, not by opening a separate terminal. Your job is to call the existing CLI engine, parse the JSON result, and relay a short Dream Verdict.
|
|
17
|
-
|
|
18
|
-
Dream is not the loop-v2 episode runner. It batch-reads completed evidence and proposes optimizer briefs for existing skill/workflow/template targets. It never creates new skills, never edits POLICY directly,
|
|
17
|
+
This is the SS agent-harness entrance for offline Supervised Learning Dream. The user should trigger Dream from the code-agent chat, not by opening a separate terminal. Your job is to call the existing CLI engine, parse the JSON result, and relay a short Dream Verdict.
|
|
18
|
+
|
|
19
|
+
Dream is not the loop-v2 episode runner. It batch-reads completed evidence and proposes optimizer briefs for existing skill/workflow/template targets. It never creates new skills, never edits POLICY directly, and never runs the episode/reward/evolving agents. By default Dream is proposal-only; policy changes require an explicit accepted-candidate update with \`--yes\`, synthesize bounded edits into the candidate package, pass the static gate, and promote through the existing rollback/ledger path.
|
|
19
20
|
|
|
20
21
|
**Mode parsing**
|
|
21
22
|
|
|
22
23
|
1. If the first argument is missing, use \`preview\`.
|
|
23
|
-
2. If the first argument is one of \`preview\`, \`run\`, or \`
|
|
24
|
-
3. If the first argument starts with \`--\`, treat it as a \`preview\` flag.
|
|
25
|
-
4. If the mode is unknown, stop and show the accepted forms above.
|
|
26
|
-
|
|
27
|
-
Pass only these user options through:
|
|
28
|
-
- \`--target <id>\`
|
|
29
|
-
- \`--limit <n>\`
|
|
30
|
-
- \`--
|
|
31
|
-
- \`
|
|
24
|
+
2. If the first argument is one of \`preview\`, \`run\`, \`show\`, or \`policy-update\`, use that mode.
|
|
25
|
+
3. If the first argument starts with \`--\`, treat it as a \`preview\` flag.
|
|
26
|
+
4. If the mode is unknown, stop and show the accepted forms above.
|
|
27
|
+
|
|
28
|
+
Pass only these user options through:
|
|
29
|
+
- \`--target <id>\`
|
|
30
|
+
- \`--limit <n>\`
|
|
31
|
+
- \`--apply\` and \`--yes\` for \`run\`
|
|
32
|
+
- \`candidateId\`, \`--accepted-by <name>\`, and \`--yes\` for \`policy-update\`
|
|
33
|
+
- \`--json\`
|
|
34
|
+
- \`runId\` for \`show\`
|
|
32
35
|
|
|
33
36
|
Always add \`--json\` to the CLI command you run so the result is machine-readable. If the user explicitly asked for \`--json\`, include the compact raw JSON after the Dream Verdict; otherwise provide the human summary only.
|
|
34
37
|
|
|
@@ -46,49 +49,56 @@ Always add \`--json\` to the CLI command you run so the result is machine-readab
|
|
|
46
49
|
synergyspec-selfevolving self-evolution dream run --json
|
|
47
50
|
\`\`\`
|
|
48
51
|
|
|
49
|
-
For show:
|
|
50
|
-
\`\`\`bash
|
|
51
|
-
synergyspec-selfevolving self-evolution dream show --json
|
|
52
|
-
\`\`\`
|
|
53
|
-
|
|
54
|
-
|
|
52
|
+
For show:
|
|
53
|
+
\`\`\`bash
|
|
54
|
+
synergyspec-selfevolving self-evolution dream show --json
|
|
55
|
+
\`\`\`
|
|
56
|
+
|
|
57
|
+
For accepted candidate policy update:
|
|
58
|
+
\`\`\`bash
|
|
59
|
+
synergyspec-selfevolving self-evolution dream policy-update <candidateId> --accepted-by <name> --yes --json
|
|
60
|
+
\`\`\`
|
|
61
|
+
|
|
62
|
+
Append \`--target <id>\`, \`--limit <n>\`, \`--apply\`, \`--yes\`, \`--accepted-by <name>\`, or \`runId\` only when the user supplied them. Never add \`--yes\` on the user's behalf.
|
|
55
63
|
|
|
56
64
|
2. **Interpret the result without re-judging it**
|
|
57
65
|
|
|
58
|
-
Read candidate ids, target ids, evidence summary, run id, and write paths from the CLI JSON when present. Do not invent candidate ids or claim a policy change.
|
|
66
|
+
Read candidate ids, target ids, evidence summary, run id, update outcome, gate result, promoted files, policy version, and write paths from the CLI JSON when present. Do not invent candidate ids or claim a policy change.
|
|
59
67
|
|
|
60
68
|
3. **Classify writes**
|
|
61
69
|
|
|
62
|
-
- \`preview\`: Writes are \`none\`.
|
|
63
|
-
- \`run\`: Writes are \`dream-run + draft candidates\`.
|
|
64
|
-
- \`
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
70
|
+
- \`preview\`: Writes are \`none\`.
|
|
71
|
+
- plain \`run\`: Writes are \`dream-run + draft candidates\`.
|
|
72
|
+
- \`run --apply --yes\`: Writes are \`dream-run + candidates + gated policy update\` when the update is promoted; otherwise report the refusal outcome.
|
|
73
|
+
- \`show\`: Writes are \`none\`.
|
|
74
|
+
- \`policy-update --yes\`: Writes are \`gated policy update\` when promoted; otherwise report the refusal outcome.
|
|
75
|
+
|
|
76
|
+
4. **Report the next step**
|
|
77
|
+
|
|
78
|
+
Plain Dream candidates are proposal-only optimizer briefs. To turn an accepted candidate into policy, use \`/synspec:dream policy-update <candidateId> --accepted-by <name> --yes\`. The update path must author bounded edits, pass the static gate, and promote through the existing rollback/ledger channel; if any gate refuses, report the refusal and leave the policy unchanged.
|
|
69
79
|
|
|
70
80
|
**Output Format**
|
|
71
81
|
|
|
72
82
|
End with this block:
|
|
73
83
|
|
|
74
84
|
\`\`\`text
|
|
75
|
-
## Dream Verdict
|
|
76
|
-
- Mode: preview | run | show
|
|
77
|
-
- Run id: <id or none>
|
|
78
|
-
- Candidates: <ids or none>
|
|
79
|
-
- Targets: <target ids or all eligible>
|
|
80
|
-
- Evidence read: <short summary>
|
|
81
|
-
- Writes: none | dream-run + draft candidates
|
|
82
|
-
- Policy changed: no
|
|
83
|
-
- New skills created: no
|
|
84
|
-
- Next step: review candidate(s),
|
|
85
|
-
\`\`\`
|
|
85
|
+
## Dream Verdict
|
|
86
|
+
- Mode: preview | run | show | policy-update
|
|
87
|
+
- Run id: <id or none>
|
|
88
|
+
- Candidates: <ids or none>
|
|
89
|
+
- Targets: <target ids or all eligible>
|
|
90
|
+
- Evidence read: <short summary>
|
|
91
|
+
- Writes: none | dream-run + draft candidates | dream-run + candidates + gated policy update | gated policy update
|
|
92
|
+
- Policy changed: yes | no
|
|
93
|
+
- New skills created: no
|
|
94
|
+
- Next step: review candidate(s), run accepted policy-update, or inspect gate refusal
|
|
95
|
+
\`\`\`
|
|
86
96
|
|
|
87
97
|
If the CLI command fails, still end with \`## Dream Verdict\` and set fields to \`none\` where unknown. Put the command failure under \`Evidence read\` or \`Next step\`; do not retry with a different self-evolution command.`;
|
|
88
98
|
export function getDreamSkillTemplate() {
|
|
89
99
|
return {
|
|
90
100
|
name: 'synergyspec-selfevolving-dream',
|
|
91
|
-
description: 'SS Dream entrance: preview, run, or
|
|
101
|
+
description: 'SS Dream entrance: preview, run, inspect, or apply accepted offline Supervised Learning Dream updates from the code-agent chat.',
|
|
92
102
|
instructions: `Run the SS offline Supervised Learning Dream lane from the code-agent harness.
|
|
93
103
|
|
|
94
104
|
${INSTRUCTIONS_BODY}`,
|
|
@@ -100,12 +110,12 @@ ${INSTRUCTIONS_BODY}`,
|
|
|
100
110
|
export function getOpsxDreamCommandTemplate() {
|
|
101
111
|
return {
|
|
102
112
|
name: 'SS: Dream',
|
|
103
|
-
description: 'Preview, run, or
|
|
113
|
+
description: 'Preview, run, inspect, or apply accepted offline Supervised Learning Dream updates from the code-agent chat',
|
|
104
114
|
category: 'Workflow',
|
|
105
115
|
tags: ['workflow', 'dream', 'self-evolution', 'offline-learning'],
|
|
106
116
|
content: `Run the SS offline Supervised Learning Dream lane from the code-agent harness.
|
|
107
117
|
|
|
108
|
-
**Input**: Optionally specify a mode after \`/synspec:dream\` (for example \`/synspec:dream preview\`, \`/synspec:dream run --limit 5\`, or \`/synspec:dream
|
|
118
|
+
**Input**: Optionally specify a mode after \`/synspec:dream\` (for example \`/synspec:dream preview\`, \`/synspec:dream run --limit 5\`, \`/synspec:dream show\`, or \`/synspec:dream policy-update <candidateId> --accepted-by <name> --yes\`). Bare \`/synspec:dream\` means read-only \`preview\`.
|
|
109
119
|
|
|
110
120
|
${INSTRUCTIONS_BODY}`,
|
|
111
121
|
};
|
|
@@ -19,17 +19,19 @@ This is the review-and-learn step after \`/synspec:apply\` and \`/synspec:verify
|
|
|
19
19
|
The runner starts with NO conversation context, so collect every handle it needs:
|
|
20
20
|
- **Project root**: the absolute path of the current working directory.
|
|
21
21
|
- **Change name**: from step 1.
|
|
22
|
-
- **Harness**:
|
|
23
|
-
- **Mode**: always \`apply\` — the episode runs the full loop (score, decide, and the 演进智能体's ONE bounded edit) autonomously, with no confirmation prompt. There is NO read-only episode and NO \`--preview\` flag. If the user wants a read-only look (no rollback, no evolution), do NOT run an episode: use the read-only view \`synergyspec-selfevolving self-evolution policy show\` (or a plain \`synergyspec-selfevolving learn <name>\` without \`--apply\`) instead.
|
|
24
|
-
- **
|
|
22
|
+
- **Harness**: resolve the CURRENT host runtime, not the change metadata. If this skill is running in Codex, use \`codex\`; in Claude Code, use \`claude\`; in OpenCode, use \`opencode\`. Use \`unknown\` only when the host is genuinely unidentified after checking the active session/tooling. Do NOT read \`harness:\` from the per-change YAML for this field: that metadata is historical provenance, not the runtime that will spawn the loop-v2 agents.
|
|
23
|
+
- **Mode**: always \`apply\` — the episode runs the full loop (score, decide, and the 演进智能体's ONE bounded edit) autonomously, with no confirmation prompt. There is NO read-only episode and NO \`--preview\` flag. If the user wants a read-only look (no rollback, no evolution), do NOT run an episode: use the read-only view \`synergyspec-selfevolving self-evolution policy show\` (or a plain \`synergyspec-selfevolving learn <name>\` without \`--apply\`) instead.
|
|
24
|
+
- **Force-new episode**: \`yes\` only when the user explicitly asked to rerun / force a fresh episode; otherwise \`no\`. A normal learn run must not invent a rerun.
|
|
25
|
+
- **Isolation**: \`fresh-context subagent\` for the spawned runner.
|
|
26
|
+
- **Session handle (optional)**: if your harness exposes this session's id or transcript path, capture it; otherwise omit it (the 主智能体 MAIN AGENT arm's trajectory discovery then uses the change window).
|
|
25
27
|
|
|
26
28
|
3. **Spawn the runner**
|
|
27
29
|
|
|
28
|
-
Use the host's available general-purpose Task/subagent runner (for example \`general-purpose\` on Claude or \`general\` on hosts that expose that type), prompt: "Use Skill tool to invoke synergyspec-selfevolving-self-evolving for change '<name>'. Project root: <root>. Harness: <harness>. Mode: apply. Session-id: <id>. Transcript: <path>. Trigger the loop-v2 self-evolution episode autonomously, do not ask the user questions, and end with the '## Episode Verdict' block."
|
|
30
|
+
Use the host's available general-purpose Task/subagent runner (for example \`general-purpose\` on Claude or \`general\` on hosts that expose that type), prompt: "Use Skill tool to invoke synergyspec-selfevolving-self-evolving for change '<name>'. Project root: <root>. Harness: <harness>. Mode: apply. Force-new: <yes|no>. Isolation: fresh-context subagent. Session-id: <id>. Transcript: <path>. Trigger the loop-v2 self-evolution episode autonomously, do not ask the user questions, and end with the '## Episode Verdict' block."
|
|
29
31
|
|
|
30
32
|
Include the \`Session-id: <id>.\` / \`Transcript: <path>.\` segment only when the session handle from step 2 is known — omit it entirely when unknown.
|
|
31
33
|
|
|
32
|
-
The runner triggers exactly one CLI command — \`synergyspec-selfevolving self-evolution episode --change "<name>" --session-id <id
|
|
34
|
+
The runner triggers exactly one CLI command — \`synergyspec-selfevolving self-evolution episode --change "<name>" --harness <harness> --session-id <id> --rerun\` when force-new is \`yes\`; omit \`--rerun\` when force-new is \`no\`; omit \`--harness\` when it is \`unknown\` — and the orchestrator CODE-SPAWNS the 奖励智能体 REWARD AGENT + 演进智能体 EVOLVING AGENT (+ optional CRITIC AGENT(基线智能体)). Neither you nor the runner grades or edits canonical files.
|
|
33
35
|
|
|
34
36
|
Guardrails:
|
|
35
37
|
- Do NOT trigger the episode yourself in this session — it must run from a fresh context.
|
|
@@ -38,35 +38,53 @@ const INSTRUCTIONS_BODY = `**Input**: Optionally specify a change name. If omitt
|
|
|
38
38
|
"startedAt": "<ISO timestamp>",
|
|
39
39
|
"finishedAt": "<ISO timestamp>",
|
|
40
40
|
"exitCode": 0,
|
|
41
|
-
"signal": null,
|
|
42
|
-
"stdoutLog": "synergyspec-selfevolving/changes/<name>/test-evidence/<timestamp>/runner.stdout.log",
|
|
43
|
-
"stderrLog": "synergyspec-selfevolving/changes/<name>/test-evidence/<timestamp>/runner.stderr.log",
|
|
44
|
-
"
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
},
|
|
59
|
-
"junitXml": null,
|
|
60
|
-
"coverageSummary": null,
|
|
61
|
-
"coverageLcov": null,
|
|
62
|
-
"coverageHtml": null
|
|
41
|
+
"signal": null,
|
|
42
|
+
"stdoutLog": "synergyspec-selfevolving/changes/<name>/test-evidence/<timestamp>/runner.stdout.log",
|
|
43
|
+
"stderrLog": "synergyspec-selfevolving/changes/<name>/test-evidence/<timestamp>/runner.stderr.log",
|
|
44
|
+
"stdoutLogSha256": "<sha256 of runner.stdout.log>",
|
|
45
|
+
"stderrLogSha256": "<sha256 of runner.stderr.log>",
|
|
46
|
+
"workspaceIdentity": {
|
|
47
|
+
"changeName": "<name>",
|
|
48
|
+
"taskId": "<benchmark task id, if any>",
|
|
49
|
+
"cwd": "<absolute working directory>",
|
|
50
|
+
"pyproject": null,
|
|
51
|
+
"packageJson": null
|
|
52
|
+
},
|
|
53
|
+
"testMetrics": {
|
|
54
|
+
"total": 29,
|
|
55
|
+
"passed": 29,
|
|
56
|
+
"failed": 0,
|
|
57
|
+
"passRate": 1
|
|
58
|
+
},
|
|
59
|
+
"junitXml": null,
|
|
60
|
+
"coverageSummary": null,
|
|
61
|
+
"coverageLcov": null,
|
|
62
|
+
"coverageHtml": null
|
|
63
63
|
}
|
|
64
64
|
\`\`\`
|
|
65
65
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
66
|
+
Set each workspace identity file entry to an object ONLY when that file
|
|
67
|
+
exists at the project root. If \`pyproject.toml\` or \`package.json\` is absent,
|
|
68
|
+
leave that field \`null\` (or omit it); do not emit a \`path\` for an absent file.
|
|
69
|
+
Object shape for a present file:
|
|
70
|
+
|
|
71
|
+
\`\`\`json
|
|
72
|
+
{
|
|
73
|
+
"path": "pyproject.toml",
|
|
74
|
+
"name": "<project/package name, or null>",
|
|
75
|
+
"sha256": "<sha256 of the file>"
|
|
76
|
+
}
|
|
77
|
+
\`\`\`
|
|
78
|
+
|
|
79
|
+
If the runner summary exposes pass/fail counts, record them in
|
|
80
|
+
\`testMetrics\`; otherwise set \`testMetrics\` to \`null\` and preserve the raw
|
|
81
|
+
stdout/stderr logs. The \`stdoutLogSha256\` and \`stderrLogSha256\` fields MUST
|
|
82
|
+
be the SHA-256 hashes of the exact saved log files, computed after writing the
|
|
83
|
+
files and before writing \`runner-exit.json\`; do not hand-edit logs after
|
|
84
|
+
hashing. If the runner produces JUnit XML or coverage artifacts,
|
|
85
|
+
record their paths in \`runner-exit.json\`. If it does not, keep those fields
|
|
86
|
+
\`null\`. The markdown report may summarize results, but the raw logs and exit
|
|
87
|
+
JSON are the durable evidence that later verification must inspect.
|
|
70
88
|
|
|
71
89
|
3b. **Promote PBT counterexamples to regression tests**
|
|
72
90
|
|
|
@@ -134,9 +152,10 @@ const INSTRUCTIONS_BODY = `**Input**: Optionally specify a change name. If omitt
|
|
|
134
152
|
| UC1-E4a1 | Error when no grid space | ❌ failed | \`gridSize=0, widgetCount=1\` | \`test/pbt-regression-uc1-e4a1-1.test.ts\` |
|
|
135
153
|
...
|
|
136
154
|
|
|
137
|
-
### Test Run Results
|
|
138
|
-
<
|
|
139
|
-
|
|
155
|
+
### Test Run Results
|
|
156
|
+
Summary: <N collected>, <N passed>, <N failed>, <N skipped>, <N collection errors>
|
|
157
|
+
<raw summary from test runner output: passed/failed/skipped counts>
|
|
158
|
+
If failures: list failing test names and errors.
|
|
140
159
|
|
|
141
160
|
### Runner Evidence
|
|
142
161
|
| Evidence | Path / Value |
|
|
@@ -12,9 +12,11 @@ You are the RUNNER for a completed SynergySpec-SelfEvolving change. In loop v2 (
|
|
|
12
12
|
|
|
13
13
|
Parse these handles from the spawning prompt:
|
|
14
14
|
- **Change name** (required). If the change name is missing or does not resolve via \`synergyspec-selfevolving list --json\`, stop and report the error — do NOT prompt the user (you may have no user channel).
|
|
15
|
-
- **Absolute project root.** Run every CLI command from it.
|
|
16
|
-
- **Harness**: \`claude\` | \`codex\` | \`opencode\` | \`unknown\`. If a harness was provided
|
|
17
|
-
- **
|
|
15
|
+
- **Absolute project root.** Run every CLI command from it.
|
|
16
|
+
- **Harness**: \`claude\` | \`codex\` | \`opencode\` | \`unknown\`. If a concrete harness was provided, pass \`--harness <harness>\` to the CLI invocation below. If the prompt says \`unknown\` but this runner is clearly executing inside Codex, Claude Code, or OpenCode, recover the current host and pass that concrete harness. Omit \`--harness\` only when both the prompt and the current runner host are genuinely unidentified; never set \`SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS=unknown\`.
|
|
17
|
+
- **Force-new**: \`yes\` | \`no\` (optional; default \`no\`). If \`yes\`, append \`--rerun\` so a closed matching episode is not reused.
|
|
18
|
+
- **Isolation**: \`fresh-context subagent\` | \`inline fallback (degraded)\` (optional). If supplied, copy it verbatim into the verdict; otherwise infer from whether this skill is running in a spawned subagent or inline fallback.
|
|
19
|
+
- **Session-id / transcript path** (optional). When the spawning prompt supplied a session-id or transcript path, pass \`--session-id <id>\` / \`--transcript <path>\` to the \`episode\` command so the 主智能体 MAIN AGENT arm's trajectory discovery does not depend on the change-window fallback.
|
|
18
20
|
|
|
19
21
|
**Recursion guard**
|
|
20
22
|
|
|
@@ -49,10 +51,11 @@ Everything in steps 1–6 is CODE. You do not perform any of it. You issue the c
|
|
|
49
51
|
|
|
50
52
|
Run exactly ONE command — the loop-v2 orchestrator. It CODE-SPAWNS the 奖励智能体 REWARD AGENT + 演进智能体 EVOLVING AGENT (+ optional CRITIC AGENT(基线智能体)); you spawn nothing:
|
|
51
53
|
\`\`\`bash
|
|
52
|
-
synergyspec-selfevolving self-evolution episode --change "<change>" --json
|
|
53
|
-
\`\`\`
|
|
54
|
-
- Append \`--session-id <id>\` and/or \`--transcript <path>\` ONLY when the spawning prompt supplied them.
|
|
55
|
-
-
|
|
54
|
+
synergyspec-selfevolving self-evolution episode --change "<change>" --json
|
|
55
|
+
\`\`\`
|
|
56
|
+
- Append \`--session-id <id>\` and/or \`--transcript <path>\` ONLY when the spawning prompt supplied them.
|
|
57
|
+
- Append \`--harness <harness>\` when the spawning prompt supplied \`claude\`, \`codex\`, or \`opencode\`, or when the prompt supplied \`unknown\` but this runner can identify the current host as Codex, Claude Code, or OpenCode. Never append \`--harness unknown\`.
|
|
58
|
+
- Append \`--rerun\` ONLY when the spawning prompt supplied \`Force-new: yes\`.
|
|
56
59
|
|
|
57
60
|
Do NOT grade, score, or author any edit yourself, and do NOT run \`evolve-from-edits\`, \`auto-evolve\`, or \`--agent\` / \`claude -p\` — those are not part of loop v2's host-facing path. The episode command IS the loop.
|
|
58
61
|
|
|
@@ -116,7 +119,7 @@ The session's final message MUST end with exactly this block shape:
|
|
|
116
119
|
- Use \`busy-in-flight\` when the episode command returned the clean concurrency deferral (another in-flight episode holds the same 策略 POLICY target): advantage is null, episode id is none, 策略 POLICY version is unchanged. It is TRANSIENT and self-healing (retry after the lock clears / the 60-min stale window) — it is NOT a DEFECT, do not list it under Defects to surface, and never advise deleting \`in-flight.json\`.
|
|
117
120
|
- When the episode did NOT start (Episode id is none — any not-run / busy-in-flight / error-* outcome), write \`none\` for Evolved target and Canonical file(s) changed, report Decision/Advantage as none/null, and leave 策略 POLICY version unchanged. The change's CONFIGURED target id is context only — do NOT copy it into the Evolved target field on a non-run verdict.
|
|
118
121
|
- A \`kept\` / \`abstained\` outcome on a verified-green run is the CORRECT no-op, not a missed evolution — say so plainly rather than hedging.
|
|
119
|
-
-
|
|
122
|
+
- Copy the supplied \`Isolation:\` value verbatim when present. If it was not supplied, report \`Isolation: fresh-context subagent\` when you were spawned as a subagent, or \`Isolation: inline fallback (degraded)\` when this skill is running inline in the spawning session.`;
|
|
120
123
|
export function getSelfEvolvingSkillTemplate() {
|
|
121
124
|
return {
|
|
122
125
|
name: 'synergyspec-selfevolving-self-evolving',
|
|
@@ -3,7 +3,7 @@ import type { HarnessName, NormalizedTrajectory } from './model.js';
|
|
|
3
3
|
/** One failing test observed in the graded runner result's output. */
|
|
4
4
|
export type ObservedTestFailure = ParsedTestFailure;
|
|
5
5
|
export interface TrajectoryFacts {
|
|
6
|
-
harness: HarnessName;
|
|
6
|
+
harness: HarnessName | 'runner-evidence';
|
|
7
7
|
changeName: string;
|
|
8
8
|
/**
|
|
9
9
|
* A recognizable test-runner invocation (vitest/pytest/go test/…) produced a
|
|
@@ -5,12 +5,11 @@
|
|
|
5
5
|
* Selection order:
|
|
6
6
|
* 1. If the change metadata stamps a `harness` (the strongest signal — see
|
|
7
7
|
* `ChangeMetadata.harness`), use that adapter.
|
|
8
|
-
* 2. Otherwise read the
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
* hermetic tests stay on the Claude adapter unchanged.
|
|
8
|
+
* 2. Otherwise read the trusted repo/host harness signal
|
|
9
|
+
* (`resolveHostHarnessDetailsForRepo()`): explicit override, persisted
|
|
10
|
+
* sidecar, explicit session id, or the historical Claude default. An
|
|
11
|
+
* env-only Codex/OpenCode signal on an unstamped change is intentionally
|
|
12
|
+
* too weak to scan the host's whole session store.
|
|
14
13
|
* 3. `trajsz` is OPT-IN (env `SYNERGYSPEC_SELFEVOLVING_TRAJSZ`): when enabled
|
|
15
14
|
* and a fresh archive is present it is tried FIRST, since it already
|
|
16
15
|
* normalizes all three harnesses; absent/stale, we fall back to native.
|
|
@@ -98,8 +97,9 @@ export async function resolveTrajectorySource(projectRoot, changeName, options =
|
|
|
98
97
|
return s;
|
|
99
98
|
}
|
|
100
99
|
catch {
|
|
101
|
-
// fall through to
|
|
100
|
+
// fall through to the fail-closed return below
|
|
102
101
|
}
|
|
102
|
+
return null;
|
|
103
103
|
}
|
|
104
104
|
}
|
|
105
105
|
// 2. No stamp: use a trusted host recovery signal, not a blind global scan.
|
|
@@ -143,7 +143,7 @@ export async function getTrajectoryForChange(projectRoot, changeName, options =
|
|
|
143
143
|
export async function getTrajectoryResultForChange(projectRoot, changeName, options = {}) {
|
|
144
144
|
const source = await resolveTrajectorySource(projectRoot, changeName, options);
|
|
145
145
|
if (!source)
|
|
146
|
-
return
|
|
146
|
+
return explainTrajectorySourceMiss(projectRoot, changeName, options);
|
|
147
147
|
try {
|
|
148
148
|
const result = source.getTrajectoryResult
|
|
149
149
|
? await source.getTrajectoryResult(changeName)
|
|
@@ -162,4 +162,35 @@ export async function getTrajectoryResultForChange(projectRoot, changeName, opti
|
|
|
162
162
|
};
|
|
163
163
|
}
|
|
164
164
|
}
|
|
165
|
+
async function explainTrajectorySourceMiss(projectRoot, changeName, options) {
|
|
166
|
+
const changeProvenance = await readChangeTrajectoryProvenance(projectRoot, changeName);
|
|
167
|
+
const sessionIds = uniqueNonBlank([
|
|
168
|
+
...(options.sessionIds ?? []),
|
|
169
|
+
...changeProvenance.sessionIds,
|
|
170
|
+
process.env.SYNERGYSPEC_SELFEVOLVING_SESSION_ID,
|
|
171
|
+
]);
|
|
172
|
+
if (changeProvenance.harness) {
|
|
173
|
+
return {
|
|
174
|
+
trajectory: null,
|
|
175
|
+
sourceHarness: changeProvenance.harness,
|
|
176
|
+
reason: `stamped-${changeProvenance.harness}-source-unavailable`,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
const hostResolution = await resolveHostHarnessDetailsForRepo(projectRoot);
|
|
180
|
+
const envOnlyNativeHost = hostResolution.source === 'env' &&
|
|
181
|
+
hostResolution.harness !== 'claude' &&
|
|
182
|
+
sessionIds.length === 0;
|
|
183
|
+
if (envOnlyNativeHost) {
|
|
184
|
+
return {
|
|
185
|
+
trajectory: null,
|
|
186
|
+
sourceHarness: hostResolution.harness,
|
|
187
|
+
reason: `env-only-${hostResolution.harness}-requires-stamped-harness-or-session-id`,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
return {
|
|
191
|
+
trajectory: null,
|
|
192
|
+
sourceHarness: hostResolution.source === 'default' ? null : hostResolution.harness,
|
|
193
|
+
reason: `${hostResolution.source}-${hostResolution.harness}-source-unavailable`,
|
|
194
|
+
};
|
|
195
|
+
}
|
|
165
196
|
//# sourceMappingURL=registry.js.map
|