synergyspec-selfevolving 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -19
- package/dist/commands/learn.d.ts +12 -1
- package/dist/commands/learn.js +373 -31
- package/dist/commands/self-evolution-episode.d.ts +177 -0
- package/dist/commands/self-evolution-episode.js +423 -0
- package/dist/commands/self-evolution.d.ts +12 -190
- package/dist/commands/self-evolution.js +179 -786
- package/dist/commands/workflow/status.js +3 -1
- package/dist/core/archive.d.ts +0 -1
- package/dist/core/archive.js +0 -58
- package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
- package/dist/core/artifact-graph/instruction-loader.js +3 -31
- package/dist/core/config-prompts.js +4 -0
- package/dist/core/fitness/health/health-metrics.d.ts +26 -56
- package/dist/core/fitness/health/health-metrics.js +19 -58
- package/dist/core/fitness/health/index.d.ts +15 -2
- package/dist/core/fitness/health/index.js +25 -1
- package/dist/core/fitness/health/local-source.d.ts +43 -4
- package/dist/core/fitness/health/local-source.js +181 -25
- package/dist/core/fitness/health/metric-source.d.ts +48 -19
- package/dist/core/fitness/health/metric-source.js +8 -18
- package/dist/core/fitness/health/resolve-source.js +4 -1
- package/dist/core/fitness/loss.d.ts +7 -7
- package/dist/core/fitness/loss.js +6 -6
- package/dist/core/fitness/sample.d.ts +10 -0
- package/dist/core/fitness/test-failures.d.ts +30 -0
- package/dist/core/fitness/test-failures.js +123 -0
- package/dist/core/learn/credit-path.d.ts +36 -0
- package/dist/core/learn/credit-path.js +198 -0
- package/dist/core/learn/trajectory-discovery.d.ts +39 -0
- package/dist/core/learn/trajectory-discovery.js +140 -0
- package/dist/core/learn.d.ts +39 -5
- package/dist/core/learn.js +131 -14
- package/dist/core/project-config.d.ts +4 -0
- package/dist/core/project-config.js +52 -1
- package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
- package/dist/core/self-evolution/candidate-fitness.js +31 -5
- package/dist/core/self-evolution/candidates.d.ts +0 -9
- package/dist/core/self-evolution/canonical-targets.d.ts +8 -4
- package/dist/core/self-evolution/canonical-targets.js +8 -4
- package/dist/core/self-evolution/critic-agent.d.ts +150 -0
- package/dist/core/self-evolution/critic-agent.js +487 -0
- package/dist/core/self-evolution/edits-contract.d.ts +53 -0
- package/dist/core/self-evolution/edits-contract.js +89 -0
- package/dist/core/self-evolution/episode-orchestrator.d.ts +197 -0
- package/dist/core/self-evolution/episode-orchestrator.js +534 -0
- package/dist/core/self-evolution/episode-store.d.ts +266 -0
- package/dist/core/self-evolution/episode-store.js +573 -0
- package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
- package/dist/core/self-evolution/evolution-switches.js +5 -10
- package/dist/core/self-evolution/evolving-agent.d.ts +162 -0
- package/dist/core/self-evolution/evolving-agent.js +449 -0
- package/dist/core/self-evolution/health-baseline.d.ts +25 -6
- package/dist/core/self-evolution/health-baseline.js +30 -6
- package/dist/core/self-evolution/host-harness.d.ts +1 -2
- package/dist/core/self-evolution/host-harness.js +1 -2
- package/dist/core/self-evolution/index.d.ts +10 -6
- package/dist/core/self-evolution/index.js +19 -6
- package/dist/core/self-evolution/learn-hints.d.ts +31 -0
- package/dist/core/self-evolution/learn-hints.js +16 -0
- package/dist/core/self-evolution/learn-observation-adapter.d.ts +35 -0
- package/dist/core/self-evolution/learn-observation-adapter.js +285 -10
- package/dist/core/self-evolution/line-diff.d.ts +60 -0
- package/dist/core/self-evolution/line-diff.js +130 -0
- package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
- package/dist/core/self-evolution/policy/fs-safe.js +89 -0
- package/dist/core/self-evolution/policy/index.d.ts +13 -0
- package/dist/core/self-evolution/policy/index.js +13 -0
- package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
- package/dist/core/self-evolution/policy/policy-store.js +774 -0
- package/dist/core/self-evolution/policy/reject-buffer.d.ts +48 -0
- package/dist/core/self-evolution/policy/reject-buffer.js +168 -0
- package/dist/core/self-evolution/promote.d.ts +1 -1
- package/dist/core/self-evolution/promote.js +6 -33
- package/dist/core/self-evolution/promotion.js +1 -2
- package/dist/core/self-evolution/proposer-agent.d.ts +41 -0
- package/dist/core/self-evolution/proposer-agent.js +94 -13
- package/dist/core/self-evolution/proposer-slice.d.ts +26 -0
- package/dist/core/self-evolution/proposer-slice.js +54 -0
- package/dist/core/self-evolution/reward-agent.d.ts +234 -0
- package/dist/core/self-evolution/reward-agent.js +564 -0
- package/dist/core/self-evolution/scope-gate.d.ts +66 -0
- package/dist/core/self-evolution/scope-gate.js +107 -0
- package/dist/core/self-evolution/success-channel.d.ts +79 -0
- package/dist/core/self-evolution/success-channel.js +361 -0
- package/dist/core/self-evolution/target-evolution.d.ts +11 -0
- package/dist/core/self-evolution/target-evolution.js +2 -0
- package/dist/core/self-evolution/tool-evolution.js +2 -13
- package/dist/core/self-evolution/verdict.d.ts +8 -5
- package/dist/core/self-evolution/verdict.js +4 -7
- package/dist/core/templates/skill-templates.d.ts +1 -0
- package/dist/core/templates/skill-templates.js +1 -0
- package/dist/core/templates/workflow-manifest.js +2 -0
- package/dist/core/templates/workflows/learn.d.ts +4 -2
- package/dist/core/templates/workflows/learn.js +25 -166
- package/dist/core/templates/workflows/self-evolving.d.ts +13 -0
- package/dist/core/templates/workflows/self-evolving.js +127 -0
- package/dist/core/trajectory/facts.d.ts +16 -0
- package/dist/core/trajectory/facts.js +12 -4
- package/dist/core/trajectory/skeleton.d.ts +43 -0
- package/dist/core/trajectory/skeleton.js +239 -0
- package/dist/dashboard/data.d.ts +25 -51
- package/dist/dashboard/data.js +68 -180
- package/dist/dashboard/react-client.js +458 -503
- package/dist/dashboard/react-styles.js +3 -3
- package/dist/dashboard/server.js +23 -17
- package/dist/ui/ascii-patterns.d.ts +7 -15
- package/dist/ui/ascii-patterns.js +123 -54
- package/dist/ui/welcome-screen.d.ts +0 -14
- package/dist/ui/welcome-screen.js +16 -35
- package/package.json +3 -1
- package/scripts/code-health.py +1066 -638
- package/scripts/slop_rules.yaml +2151 -0
|
@@ -22,4 +22,5 @@ export { getCiSkillTemplate, getOpsxCiCommandTemplate } from './workflows/ci.js'
|
|
|
22
22
|
export { getOpsxProposeSkillTemplate, getOpsxProposeCommandTemplate } from './workflows/propose.js';
|
|
23
23
|
export { getFeedbackSkillTemplate } from './workflows/feedback.js';
|
|
24
24
|
export { getCompareImagesSkillTemplate } from './workflows/compare-images.js';
|
|
25
|
+
export { getSelfEvolvingSkillTemplate } from './workflows/self-evolving.js';
|
|
25
26
|
//# sourceMappingURL=skill-templates.js.map
|
|
@@ -18,6 +18,7 @@ import { getNewChangeSkillTemplate, getOpsxNewCommandTemplate, } from './workflo
|
|
|
18
18
|
import { getOnboardSkillTemplate, getOpsxOnboardCommandTemplate, } from './workflows/onboard.js';
|
|
19
19
|
import { getOpsxProposeCommandTemplate, getOpsxProposeSkillTemplate, } from './workflows/propose.js';
|
|
20
20
|
import { getOpsxRunTestsCommandTemplate, getRunTestsSkillTemplate, } from './workflows/run-tests.js';
|
|
21
|
+
import { getSelfEvolvingSkillTemplate } from './workflows/self-evolving.js';
|
|
21
22
|
import { getOpsxSyncCommandTemplate, getSyncSpecsSkillTemplate, } from './workflows/sync-specs.js';
|
|
22
23
|
import { getOpsxTddCommandTemplate, getTddSkillTemplate, } from './workflows/tdd.js';
|
|
23
24
|
import { getOpsxVerifyCommandTemplate, getVerifyChangeSkillTemplate, } from './workflows/verify-change.js';
|
|
@@ -67,6 +68,7 @@ const WORKFLOW_MANIFEST = [
|
|
|
67
68
|
workflow('run-tests', 'synergyspec-selfevolving-run-tests', getRunTestsSkillTemplate, getOpsxRunTestsCommandTemplate, ['workflow', 'test']),
|
|
68
69
|
workflow('ci', 'synergyspec-selfevolving-ci', getCiSkillTemplate, getOpsxCiCommandTemplate, ['workflow', 'test', 'ci']),
|
|
69
70
|
utility('synergyspec-selfevolving-compare-images', getCompareImagesSkillTemplate, ['utility', 'test']),
|
|
71
|
+
utility('synergyspec-selfevolving-self-evolving', getSelfEvolvingSkillTemplate, ['utility', 'learn', 'critic']),
|
|
70
72
|
];
|
|
71
73
|
export function getWorkflowManifest() {
|
|
72
74
|
return WORKFLOW_MANIFEST;
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Skill Template Workflow Modules
|
|
3
3
|
*
|
|
4
|
-
* Learn workflow:
|
|
5
|
-
*
|
|
4
|
+
* Learn workflow: thin entrance to self-evolution — selects the change,
|
|
5
|
+
* spawns the fresh-context runner skill
|
|
6
|
+
* (synergyspec-selfevolving-self-evolving), which triggers ONE loop-v2
|
|
7
|
+
* self-evolution episode, and relays its verdict.
|
|
6
8
|
*/
|
|
7
9
|
import type { SkillTemplate, CommandTemplate } from '../types.js';
|
|
8
10
|
export declare function getLearnSkillTemplate(): SkillTemplate;
|
|
@@ -2,20 +2,7 @@ const INSTRUCTIONS_BODY = `**Input**: Optionally specify a change name. If omitt
|
|
|
2
2
|
|
|
3
3
|
**Purpose**
|
|
4
4
|
|
|
5
|
-
This is the review-and-learn step after \`/synspec:apply\` and \`/synspec:verify\`, and it is the ENTRANCE to
|
|
6
|
-
|
|
7
|
-
Preview-only is the bare-CLI default and the explicit opt-out: run \`synergyspec-selfevolving learn <change> --preview\` (or simply omit \`--apply\`) to analyze without evolving.
|
|
8
|
-
|
|
9
|
-
**Default Mode: Autonomous self-evolution**
|
|
10
|
-
|
|
11
|
-
- After reviewing the change, you DO the evolution. Do not stop at a report, and do not ask permission.
|
|
12
|
-
- Safety is AUTOMATED, not human-gated: a canonical file is promoted ONLY when the change's test evidence is OBSERVED green (the CLI verifies the ACTUAL test run from the session trajectory, not just the authored \`test-report.md\`), the static gate passes, the target is evolvable under the per-target switch, and a rollback snapshot is taken.
|
|
13
|
-
- When nothing canonical is written, CLASSIFY why before moving on — do not blanket-archive every no-op as "safety working":
|
|
14
|
-
- **(a) SAFE refusal (expected, not a bug):** evidence is missing or red, the target is frozen, or the static gate failed on real grounds. The floor refused to evolve on unverified / failing / out-of-scope edits. State the reason in the Evolution Result and move on.
|
|
15
|
-
- **(b) DEFECT (a tool bug to SURFACE, not archive over):** the evolution target STILL could not be BOUND after you named one concrete same-kind \`--evolve-target\` (a preview target with \`targetId: null\` / \`needsDisambiguation: true\`, or an \`evolution-target-unresolved\` observation, that PERSISTS post-naming), or promotion failed for a reason that is NOT about evidence / freezing / scope. A FIRST-pass \`evolution-target-unresolved\` observation (severity \`action\`) on an autonomous \`--apply\` run is NOT this — it only means the kind-only hint is AMBIGUOUS; pick ONE concrete id from its candidates, pass it with \`--evolve-target\`, and re-run (do NOT hand-edit a canonical file to work around it). For a genuine DEFECT, nothing was written because the CLI COULD NOT act — not because it correctly declined: surface it as an unresolved issue (keep an \`incident\` memory entry), name the target id that would not bind, and flag it for a fix. \`synergyspec-selfevolving status\` prints the machine-written \`Evolution:\` outcome — do not contradict it in free text.
|
|
16
|
-
- Frozen gate-defining / oracle files (the gen-test/run-test oracle, schema contracts you were not asked to evolve) are NEVER touched — the CLI rejects any such edit.
|
|
17
|
-
|
|
18
|
-
This run also produces neutral \`observations\` in the JSON output (reflection signals). During autonomous evolution learn persists derived evolution hints to \`.synergyspec-selfevolving/learn-handoffs/<change>/<timestamp>/hints.json\`; you then author the edit and promote it via the \`self-evolution evolve-from-edits\` command in the evolve step. The \`--agent\` flag (a headless \`claude -p\` proposer) is a cron/CI fallback ONLY — never use it when you are the running agent, and never assume \`claude\` exists on a non-Claude host.
|
|
5
|
+
This is the review-and-learn step after \`/synspec:apply\` and \`/synspec:verify\`, and it is the ENTRANCE to one self-evolution EPISODE (loop v2 — self-evolution as in-context RL) — but the episode itself runs through a FRESH-CONTEXT runner subagent (\`synergyspec-selfevolving-self-evolving\`). The rollout actor does not grade its own work: end-of-cycle sessions are context-heavy, and the system grades the on-disk trajectory, not the actor's intentions. The runner does not grade or edit either — it triggers the CLI episode orchestrator, which CODE-SPAWNS the 奖励智能体 REWARD AGENT (scoring) and 演进智能体 EVOLVING AGENT (the ONE bounded edit onto the 策略 POLICY), then relays the result. Your job here is to select the change, hand the runner explicit handles, and relay its verdict.
|
|
19
6
|
|
|
20
7
|
**Steps**
|
|
21
8
|
|
|
@@ -27,169 +14,41 @@ This run also produces neutral \`observations\` in the JSON output (reflection s
|
|
|
27
14
|
|
|
28
15
|
Prefer changes that have completed apply/verify evidence. If apply or verify appears incomplete, continue in preview mode and clearly mark the missing evidence.
|
|
29
16
|
|
|
30
|
-
2. **
|
|
31
|
-
|
|
32
|
-
Run:
|
|
33
|
-
\`\`\`bash
|
|
34
|
-
synergyspec-selfevolving status --change "<name>" --json
|
|
35
|
-
synergyspec-selfevolving instructions apply --change "<name>" --json
|
|
36
|
-
\`\`\`
|
|
37
|
-
|
|
38
|
-
Read every file in \`contextFiles\`. Also check for these optional evidence files under \`synergyspec-selfevolving/changes/<name>/\`:
|
|
39
|
-
- \`spec-tests.md\`
|
|
40
|
-
- \`test-report.md\`
|
|
41
|
-
- \`test-plan.md\`
|
|
42
|
-
- \`verification.md\`
|
|
43
|
-
- \`verification-report.md\`
|
|
44
|
-
- \`spec-blast-radius.md\`
|
|
45
|
-
|
|
46
|
-
3. **Review implementation and verification evidence**
|
|
47
|
-
|
|
48
|
-
Summarize:
|
|
49
|
-
- Artifact shape: proposal/use cases/specs/design/tasks, schema, and missing pieces
|
|
50
|
-
- Task completion: completed, incomplete, reopened, or ambiguous tasks
|
|
51
|
-
- Test evidence: mapped tests, uncovered use-case steps, failing tests, PBT regressions, manual test plans
|
|
52
|
-
- Verification evidence: critical/warning/suggestion findings, blast radius, and unresolved recommendations
|
|
53
|
-
- Implementation evidence from git diff if useful, but do not require git history to produce the preview
|
|
54
|
-
|
|
55
|
-
4. **Extract reusable experience**
|
|
56
|
-
|
|
57
|
-
Identify lessons that are specific enough to help future SynergySpec-SelfEvolving work:
|
|
58
|
-
- Reusable patterns: artifact structure, traceability conventions, implementation sequencing, test strategy, verification heuristics
|
|
59
|
-
- Problems to avoid: gaps, rework causes, missing tests, brittle assumptions, confusing instructions, unresolved warnings
|
|
60
|
-
- Local memory candidates: concise lessons that should help a future agent or teammate
|
|
61
|
-
- Template observations: repeated workflow-template opportunities that may justify a future change, but should not be edited now
|
|
62
|
-
- Optimization signals: which workflow prompt, artifact template, skill instruction, schema, verifier, or evaluator target could be improved, how it would be improved, and the exact evidence for that mapping
|
|
63
|
-
|
|
64
|
-
Be transparent about what is known versus not yet materialized. For each optimization signal, name the CONCRETE canonical target (e.g. \`artifact-template:design\`, \`workflow-prompt:design\`) and the exact edit — you author and promote it in the evolve step.
|
|
65
|
-
|
|
66
|
-
5. **Generate the preview**
|
|
67
|
-
|
|
68
|
-
Use this structure:
|
|
69
|
-
|
|
70
|
-
\`\`\`markdown
|
|
71
|
-
## Review and Learn: <change-name>
|
|
72
|
-
|
|
73
|
-
### Evidence Read
|
|
74
|
-
| Evidence | Status |
|
|
75
|
-
|----------|--------|
|
|
76
|
-
| tasks.md | loaded / missing |
|
|
77
|
-
| spec-tests.md | loaded / missing |
|
|
78
|
-
| test-report.md | loaded / missing |
|
|
79
|
-
| verification.md | loaded / missing |
|
|
80
|
-
|
|
81
|
-
### Reusable Experience
|
|
82
|
-
- <lesson with source artifact or file reference>
|
|
83
|
-
|
|
84
|
-
### Problems To Avoid
|
|
85
|
-
- <issue, why it mattered, and how to prevent it>
|
|
86
|
-
|
|
87
|
-
### Suggested Local Memory Entries
|
|
88
|
-
| Candidate ID | Type | Title | Disposition | Why Keep It | Evidence |
|
|
89
|
-
|--------------|------|-------|-------------|-------------|----------|
|
|
90
|
-
| <id> | workflow | <title> | keep / report-only / reject | <reason> | <artifact/test/verification source> |
|
|
91
|
-
|
|
92
|
-
### Memory Consolidation
|
|
93
|
-
- Status: preview only / applied
|
|
94
|
-
- Consolidated: <exact memory entries written, or exact keep candidates that would be written>
|
|
95
|
-
- Evidence: <retrieval checks, source artifacts, or why no candidate passed the keep gate>
|
|
96
|
-
|
|
97
|
-
### Template Observations
|
|
98
|
-
- <observation that may justify a future template/spec change>
|
|
99
|
-
|
|
100
|
-
### Skill/Template Optimization Preview
|
|
101
|
-
| Target | Kind | How It Would Be Optimized | Evidence | Status |
|
|
102
|
-
|--------|------|---------------------------|----------|--------|
|
|
103
|
-
| <canonical target id or none> | workflow-prompt / artifact-template / skill-instruction | clarify-instruction / tighten-output-contract / change-template-field / add-verification-step | <hint source artifact or observation> | preview only / hints written / no target |
|
|
104
|
-
|
|
105
|
-
### Next Step
|
|
106
|
-
- <write memory / persist hints / propose-canonical / auto-evolve / archive>
|
|
107
|
-
|
|
108
|
-
### Evolution Result
|
|
109
|
-
<which canonical file was evolved + the rollback command, OR why evolution was skipped: --preview, evidence not observed-green, or gate refused>
|
|
110
|
-
\`\`\`
|
|
111
|
-
|
|
112
|
-
6. **Consolidate memory and hints (autonomous — no confirmation)**
|
|
113
|
-
|
|
114
|
-
Unless \`--preview\` was requested, apply the learn writes directly — do not ask which to apply:
|
|
115
|
-
- write the learn report (\`synergyspec-selfevolving/changes/<name>/learn-report.md\`);
|
|
116
|
-
- the approved keep memory entries are written FOR you by this skill's \`learn --apply --yes\` run — it stamps them with the learn-candidate tags + \`synergyspec-selfevolving-learn\` provenance. Do NOT also hand-write them with a bare \`synergyspec-selfevolving memory add\` (that loses the provenance/tag set and double-writes); reserve \`memory add\` for ad-hoc notes that are deliberately NOT learn candidates (report-only / reject candidates stay out of memory either way);
|
|
117
|
-
- persist evolution hints: \`synergyspec-selfevolving learn "<name>" --persist-hints\` (this writes the hints.json you use in the evolve step).
|
|
118
|
-
|
|
119
|
-
\`--preview\` is the only mode that skips these writes.
|
|
120
|
-
|
|
121
|
-
7. **Apply only approved writes**
|
|
122
|
-
|
|
123
|
-
If applying a report, write \`synergyspec-selfevolving/changes/<name>/learn-report.md\` with the preview content plus an "Applied Writes" section.
|
|
124
|
-
|
|
125
|
-
If applying memory entries MANUALLY (only when you did NOT run \`learn --apply\`, which already writes them with these tags), use \`synergyspec-selfevolving memory add\` with:
|
|
126
|
-
- \`--type workflow\` for reusable workflow lessons
|
|
127
|
-
- \`--type incident\` for problems to avoid
|
|
128
|
-
- \`--tag synspec-learn\`
|
|
129
|
-
- \`--tag change:<name>\`
|
|
130
|
-
- \`--agent-id synergyspec-selfevolving-learn\`
|
|
131
|
-
- \`--promotion-candidate\` only for template observations that deserve later human review
|
|
132
|
-
|
|
133
|
-
Canonical workflow prompts/templates are evolved through the gated \`self-evolution evolve-from-edits\` flow in the next step — not by hand-editing the file (which would bypass the oracle + scope checks).
|
|
134
|
-
|
|
135
|
-
8. **Evolve the tool from these lessons (autonomous — DO THIS, do not ask)**
|
|
136
|
-
|
|
137
|
-
This is the close-the-loop step: you author a concrete improvement to a canonical prompt/template and promote it onto the LOCAL installed file — no rebuild, no republish, no confirmation, no \`claude -p\`.
|
|
138
|
-
|
|
139
|
-
a. **Pick the concrete target + its local file.** From the "Skill/Template Optimization Preview" take the canonical target id and its resolved LOCAL file path (e.g. \`artifact-template:design\` → \`synergyspec-selfevolving/schemas/spec-driven/templates/design.md\`). If the preview marks a target unbindable / needs-disambiguation (\`targetId: null\`, \`needsDisambiguation: true\`, formerly shown as \`:unspecified\`), choose ONE concrete id from its \`candidateTargetIds\` and pass it explicitly with \`--evolve-target\` (e.g. \`--evolve-target artifact-template:design\`); then re-run the preview to confirm it now binds. If it STILL will not bind after you name a single concrete \`--evolve-target\`, that is the case-(b) DEFECT above (\`evolution-target-unresolved\`) — surface it and stop; do NOT hand-edit the file to work around it.
|
|
140
|
-
|
|
141
|
-
a.5. **Consult the optimization trajectory first.** Before authoring, run \`synergyspec-selfevolving self-evolution trajectory <id> --json\` to see the scored history of prior candidates for this target — their loss, verdict, and one-line approach — plus the \`BASELINE TO BEAT\`. Aim for an edit that would score a LOWER loss than the best prior entry, and do NOT repeat the approach of entries marked \`rejected\`, \`rolled-back\`, or \`outcompeted\`. If it reports no prior candidates yet, just proceed.
|
|
142
|
-
|
|
143
|
-
b. **Author the edit yourself.** Reason about the exact prompt/template gap that caused the missed evidence (e.g. the design step missed a stdlib/API-shape compatibility check), then READ the LOCAL file the preview's \`localFiles\` resolves to and write its FULL improved contents. Keep the change minimal and targeted; never touch frozen oracle files.
|
|
144
|
-
|
|
145
|
-
Author against the path the preview gives you in \`localFiles\` (project-local). For an artifact-template / schema target on the FIRST evolution that project-local base may not exist on disk yet — the preview resolves the path read-only, and \`evolve-from-edits\` MATERIALIZES the canonical default into it (project-local override → user override → packaged default) when you promote. So if reading \`localFiles\` returns "not found", author your full new file against the canonical default content (the same base the CLI will materialize), not against a global copy. Do NOT go hunting in the GLOBAL npm install for the base (e.g. \`npm root -g\` → \`…/AppData/Roaming/npm/node_modules/synergyspec-selfevolving/schemas/…\`), and never edit anything under the global install — the materialize + promote writes land project-local under the repo (the promote write is guarded by an explicit within-repo assertion). If \`localFiles\` is empty, that target has no user-editable local surface here; treat it as the case-(b) DEFECT, not a reason to reach outside the repo.
|
|
146
|
-
|
|
147
|
-
c. **Promote it in one non-interactive command** (validates → gates → observed-verified → promotes onto the local file):
|
|
148
|
-
\`\`\`bash
|
|
149
|
-
synergyspec-selfevolving self-evolution evolve-from-edits --from-learn "<hints.json>" --evolve-target <id> --from-edits edits.json --yes
|
|
150
|
-
\`\`\`
|
|
151
|
-
where \`edits.json\` = \`{ "targetId": "<id>", "rationale": "<why this fixes the gap>", "edits": [{ "relPath": "<the local file>", "content": "<full new file>" }] }\` (pass \`-\` to read it from stdin).
|
|
17
|
+
2. **Gather explicit handles**
|
|
152
18
|
|
|
153
|
-
|
|
19
|
+
The runner starts with NO conversation context, so collect every handle it needs:
|
|
20
|
+
- **Project root**: the absolute path of the current working directory.
|
|
21
|
+
- **Change name**: from step 1.
|
|
22
|
+
- **Harness**: read the \`harness:\` key from \`synergyspec-selfevolving/changes/<name>/.synergyspec-selfevolving.yaml\`; if absent, use \`unknown\`.
|
|
23
|
+
- **Mode**: \`preview\` only if the user asked for a preview (the runner passes \`--preview\` so the orchestrator scores and diagnoses WITHOUT committing a rollback or evolution); otherwise \`apply\` (the orchestrator runs the full episode — score, decide, and the 演进智能体's ONE bounded edit — autonomously, no confirmation prompt).
|
|
24
|
+
- **Session handle (optional)**: if your harness exposes this session's id or transcript path, capture it; otherwise omit it (the 主智能体 MAIN AGENT arm's trajectory discovery then uses the change window).
|
|
154
25
|
|
|
155
|
-
|
|
26
|
+
3. **Spawn the runner**
|
|
156
27
|
|
|
157
|
-
|
|
28
|
+
Use Task tool (subagent_type: "general-purpose", prompt: "Use Skill tool to invoke synergyspec-selfevolving-self-evolving for change '<name>'. Project root: <root>. Harness: <harness>. Mode: <apply|preview>. Session-id: <id>. Transcript: <path>. Trigger the loop-v2 self-evolution episode autonomously, do not ask the user questions, and end with the '## Episode Verdict' block.")
|
|
158
29
|
|
|
159
|
-
|
|
30
|
+
Include the \`Session-id: <id>.\` / \`Transcript: <path>.\` segment only when the session handle from step 2 is known — omit it entirely when unknown.
|
|
160
31
|
|
|
161
|
-
|
|
162
|
-
\`\`\`bash
|
|
163
|
-
synergyspec-selfevolving learn handoff "<change>"
|
|
164
|
-
\`\`\`
|
|
165
|
-
This assembles the trajectory, writes \`extraction-brief.json\` and a sibling \`README.md\` to \`.synergyspec-selfevolving/learn-handoffs/<change>/<timestamp>/\`, and returns immediately (no polling), printing the workdir and the response path. If it reports \`no-transcript\` (no discoverable session for the change window) or \`no-status\` (no verification/test evidence to derive success/failure), stop here — there is nothing to fulfill.
|
|
32
|
+
The runner triggers exactly one CLI command — \`synergyspec-selfevolving self-evolution episode --change "<name>" --session-id <id>\` (add \`--preview\` in preview Mode) — and the orchestrator CODE-SPAWNS the 奖励智能体 REWARD AGENT + 演进智能体 EVOLVING AGENT (+ optional CRITIC AGENT(基线智能体)). Neither you nor the runner grades or edits canonical files.
|
|
166
33
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
## SubtaskStep <proposal | usecases | spec | design | tasks | global>
|
|
172
|
-
## Title <short identifier>
|
|
173
|
-
## Description <when to / when NOT to use this memory>
|
|
174
|
-
## Content <1-3 sentences distilling the insight>
|
|
175
|
-
\`\`\`
|
|
176
|
-
If no useful items can be extracted, write a comment-only response (e.g. \`<!-- no useful items -->\`). The parser yields zero candidates without raising an error.
|
|
34
|
+
Guardrails:
|
|
35
|
+
- Do NOT trigger the episode yourself in this session — it must run from a fresh context.
|
|
36
|
+
- **Fallback (no Task tool):** if this host has no Task tool (or spawning fails), invoke the synergyspec-selfevolving-self-evolving skill INLINE instead — use the Skill tool, or read \`<skillsDir>/skills/synergyspec-selfevolving-self-evolving/SKILL.md\` and follow it in this session — and note \`Isolation: inline fallback (degraded)\` in the final output.
|
|
37
|
+
- **Last resort (runner skill not installed):** run \`synergyspec-selfevolving self-evolution episode --change "<name>" --preview\`, report that the full episode could not run via the runner, and suggest re-running \`synergyspec-selfevolving init\`.
|
|
177
38
|
|
|
178
|
-
|
|
179
|
-
\`\`\`bash
|
|
180
|
-
synergyspec-selfevolving learn ingest-handoff "<workdir>" --apply --yes
|
|
181
|
-
\`\`\`
|
|
182
|
-
(Omit \`--apply --yes\` to preview only.) Ingested items pass through the same \`classifyCandidate\` quality gate as heuristic candidates, so only well-formed items (Trigger / Action / Evidence, bounded scope) are written.
|
|
39
|
+
4. **Verify and relay the verdict**
|
|
183
40
|
|
|
184
|
-
|
|
41
|
+
Read the runner's \`## Episode Verdict\` block from its final message, then:
|
|
42
|
+
- Cross-check it against \`synergyspec-selfevolving status --change "<name>" --json\` and the episode's \`episode.json\` / \`diagnosis.json\`. NEVER contradict the machine-written outcome.
|
|
43
|
+
- Relay the outcome, the decision (rolled-back / kept / abstained), the evolution kind, the new 策略 POLICY version, the evolved target, and the rollback command verbatim.
|
|
44
|
+
- Classify the outcome before moving on: a \`kept\` / \`abstained\` no-op on a verified-green or no-nameable-gap run is the CORRECT outcome (产物即弃), not a missed evolution; a \`rolled-back\` decision is the loop working (the 否决缓冲 reject-buffer recorded the lost direction). A SAFE refusal (missing/red evidence, frozen target, gate refused on real grounds) is expected, not a bug; a DEFECT the runner flagged (an unbindable target, an orchestrator failure that is NOT about evidence / freezing / scope) must be surfaced to the user, not archived over.
|
|
185
45
|
|
|
186
46
|
**Output Format**
|
|
187
47
|
|
|
188
|
-
- Lead with the
|
|
189
|
-
-
|
|
190
|
-
-
|
|
191
|
-
-
|
|
192
|
-
- If no useful lessons are found, say so and recommend preview-only.
|
|
48
|
+
- Lead with the runner's verdict, not the spawn mechanics.
|
|
49
|
+
- Relay the \`## Episode Verdict\` fields verbatim: outcome, decision, evolution kind, advantage, new 策略 POLICY version, evolved target, canonical file(s) changed, and the rollback command.
|
|
50
|
+
- State clearly whether the 策略 POLICY changed (evolved / rolled-back / unchanged) and the isolation mode (fresh-context subagent, or inline fallback (degraded)).
|
|
51
|
+
- Separate safe no-ops and refusals from DEFECTs to surface.
|
|
193
52
|
- End with the normal next step: \`/synspec:archive\` once the user is satisfied with the review.`;
|
|
194
53
|
export function getLearnSkillTemplate() {
|
|
195
54
|
return {
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Skill Template Workflow Modules
|
|
3
|
+
*
|
|
4
|
+
* Self-evolving runner: utility skill (no command counterpart) — always
|
|
5
|
+
* installed regardless of profile. In loop v2 (self-evolution as in-context
|
|
6
|
+
* RL) the 奖励智能体 REWARD AGENT (scoring) and 演进智能体 EVOLVING AGENT
|
|
7
|
+
* (editing) are CODE-SPAWNED by the episode orchestrator — so this host-facing
|
|
8
|
+
* skill is a THIN RUNNER: it triggers the CLI episode and relays the result.
|
|
9
|
+
* It NEVER grades and NEVER edits canonical files itself.
|
|
10
|
+
*/
|
|
11
|
+
import type { SkillTemplate } from '../types.js';
|
|
12
|
+
export declare function getSelfEvolvingSkillTemplate(): SkillTemplate;
|
|
13
|
+
//# sourceMappingURL=self-evolving.d.ts.map
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
const INSTRUCTIONS_BODY = `**Role**
|
|
2
|
+
|
|
3
|
+
You are the RUNNER for a completed SynergySpec-SelfEvolving change. In loop v2 (self-evolution as in-context RL) you do NOT grade and you do NOT edit canonical files — the orchestrator CODE-SPAWNS the 奖励智能体 REWARD AGENT (judge: 算分 reward(主臂)&reward(基线臂), advantage = reward(主臂) − reward(基线臂), 文本梯度 textual gradient — it never edits and 弃权 abstains when there is no nameable gap) and the 演进智能体 EVOLVING AGENT (optimizer.step: ONE bounded edit ≤L onto the 策略 POLICY — it never scores), plus an optional CRITIC AGENT(基线智能体 baseline agent)that reruns the last episode's policy vN on the SAME change. Your only job is to TRIGGER the episode via the CLI and RELAY the machine-written result. Read ONLY the on-disk evidence (episode.json, diagnosis.json, the episode JSON output) — never an actor's in-conversation self-report, and never re-judge what the agents decided.
|
|
4
|
+
|
|
5
|
+
**The boundary (read this first)**
|
|
6
|
+
|
|
7
|
+
- The skill itself NEVER grades. Scoring — reward(主臂), reward(基线臂), advantage, the 文本梯度 textual gradient — is computed by the CODE-SPAWNED 奖励智能体 REWARD AGENT, never by you.
|
|
8
|
+
- The skill itself NEVER edits canonical files. The ONE bounded edit (≤L) onto the 策略 POLICY (the design template — the 主智能体 MAIN AGENT's "weights") is authored by the CODE-SPAWNED 演进智能体 EVOLVING AGENT, never by you. Do NOT hand-edit any schema/template/prompt file from this skill.
|
|
9
|
+
- You trigger ONE CLI command (the episode orchestrator), then READ and RELAY its result. That is the whole job.
|
|
10
|
+
|
|
11
|
+
**Input contract**
|
|
12
|
+
|
|
13
|
+
Parse these handles from the spawning prompt:
|
|
14
|
+
- **Change name** (required). If the change name is missing or does not resolve via \`synergyspec-selfevolving list --json\`, stop and report the error — do NOT prompt the user (you may have no user channel).
|
|
15
|
+
- **Absolute project root.** Run every CLI command from it.
|
|
16
|
+
- **Harness**: \`claude\` | \`codex\` | \`opencode\` | \`unknown\`. If a harness was provided and differs from the ambient host, set \`SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS=<harness>\` for the CLI invocation below.
|
|
17
|
+
- **Session-id / transcript path** (optional). When the spawning prompt supplied a session-id or transcript path, pass \`--session-id <id>\` / \`--transcript <path>\` to the \`episode\` command so the 主智能体 MAIN AGENT arm's trajectory discovery does not depend on the change-window fallback.
|
|
18
|
+
|
|
19
|
+
**Recursion guard**
|
|
20
|
+
|
|
21
|
+
Execute every step inline in THIS session. NEVER use the Task tool from this skill, and NEVER invoke synergyspec-selfevolving-learn or synergyspec-selfevolving-self-evolving — you ARE the runner. The 奖励智能体 + 演进智能体 (+ optional 基线智能体) are spawned by the CLI orchestrator in their own contexts; do not spawn them yourself.
|
|
22
|
+
|
|
23
|
+
**Purpose**
|
|
24
|
+
|
|
25
|
+
This is the review-and-learn step after \`/synspec:apply\` and \`/synspec:verify\`, and it is the ENTRANCE to one self-evolution EPISODE. You trigger the loop-v2 orchestrator with a single CLI command. The orchestrator runs ONE episode in a strict, durably-persisted order:
|
|
26
|
+
|
|
27
|
+
1. Records the 主智能体 MAIN AGENT (frozen actor, policy vN+1) arm for this change.
|
|
28
|
+
2. Optionally runs the CRITIC AGENT(基线智能体 baseline agent)— reruns the LAST episode's policy vN on the SAME change (skipped when the 单一血统 single lineage has < 2 versions or the last action was refused).
|
|
29
|
+
3. Runs the 奖励智能体 REWARD AGENT — computes reward(主臂)&reward(基线臂), advantage = reward(主臂) − reward(基线臂), and the 文本梯度 textual gradient; writes diagnosis.json.
|
|
30
|
+
4. DECIDES on the main arm's edits: 弃权 abstained (no nameable gap) ⇒ skip; bad advantage (< threshold) ⇒ ROLLBACK the 策略 POLICY to the prior good version and append a 否决缓冲 reject-buffer entry; otherwise KEEP.
|
|
31
|
+
5. Runs the 演进智能体 EVOLVING AGENT (optimizer.step) — ONE bounded edit (≤L) onto the 策略 POLICY, or refuses, reading the reject-buffer fresh from disk.
|
|
32
|
+
6. Advances the 版本账本 ledger to the new 策略 POLICY version.
|
|
33
|
+
|
|
34
|
+
Everything in steps 1–6 is CODE. You do not perform any of it. You issue the command and relay what it wrote.
|
|
35
|
+
|
|
36
|
+
**The episode commits.** The \`episode\` command always runs the full loop — the orchestrator may roll back / keep / evolve as above; it has no read-only mode. If a read-only look (no rollback, no evolution) is wanted, that is NOT this skill's job: the caller should use plain \`learn <change>\` (no \`--apply\`) or the read-only \`self-evolution policy show\` view instead. Do NOT invent a preview flag — there is none.
|
|
37
|
+
|
|
38
|
+
**Steps**
|
|
39
|
+
|
|
40
|
+
1. **Confirm the change resolves**
|
|
41
|
+
|
|
42
|
+
Run:
|
|
43
|
+
\`\`\`bash
|
|
44
|
+
synergyspec-selfevolving status --change "<name>" --json
|
|
45
|
+
\`\`\`
|
|
46
|
+
If the change does not resolve, stop and report the error (do NOT prompt — you may have no user channel). Note from the status output whether apply/verify evidence is present; if it is incomplete, flag the missing evidence in your verdict — the orchestrator's 奖励智能体 REWARD AGENT will 弃权 abstain rather than score on absent evidence.
|
|
47
|
+
|
|
48
|
+
2. **Trigger the episode (the orchestrator does the work)**
|
|
49
|
+
|
|
50
|
+
Run exactly ONE command — the loop-v2 orchestrator. It CODE-SPAWNS the 奖励智能体 REWARD AGENT + 演进智能体 EVOLVING AGENT (+ optional CRITIC AGENT(基线智能体)); you spawn nothing:
|
|
51
|
+
\`\`\`bash
|
|
52
|
+
synergyspec-selfevolving self-evolution episode --change "<change>" --session-id <id>
|
|
53
|
+
\`\`\`
|
|
54
|
+
- Append \`--session-id <id>\` and/or \`--transcript <path>\` ONLY when the spawning prompt supplied them.
|
|
55
|
+
- If the harness differs from the ambient host, set \`SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS=<harness>\` first.
|
|
56
|
+
|
|
57
|
+
Do NOT grade, score, or author any edit yourself, and do NOT run \`evolve-from-edits\`, \`auto-evolve\`, or \`--agent\` / \`claude -p\` — those are not part of loop v2's host-facing path. The episode command IS the loop.
|
|
58
|
+
|
|
59
|
+
3. **Read the machine-written result**
|
|
60
|
+
|
|
61
|
+
The \`episode\` command prints the episode result (and persists it). Read it from the JSON output, and cross-check the on-disk record:
|
|
62
|
+
- \`.synergyspec-selfevolving/self-evolution/episodes/<episodeId>/episode.json\` — the episode stage and policy versions.
|
|
63
|
+
- \`.synergyspec-selfevolving/self-evolution/episodes/<episodeId>/diagnosis.json\` — the 奖励智能体's reward(主臂), reward(基线臂), advantage, 文本梯度, and any abstain reason.
|
|
64
|
+
|
|
65
|
+
Take these fields straight from the result — NEVER recompute them:
|
|
66
|
+
- **advantage** = reward(主臂) − reward(基线臂) (null when the baseline arm was skipped or the reward agent 弃权 abstained).
|
|
67
|
+
- **decision**: \`rolled-back\` | \`kept\` | \`abstained\`.
|
|
68
|
+
- **evolution kind**: the 演进智能体 outcome — \`evolved\` | \`refused\` | \`not-spawned\` (null when evolution was skipped, e.g. on 弃权).
|
|
69
|
+
- **new 策略 POLICY version**: the 版本账本 ledger head AFTER the episode (post-rollback / post-evolve).
|
|
70
|
+
|
|
71
|
+
4. **Consult the 版本账本 ledger for context (read-only, optional)**
|
|
72
|
+
|
|
73
|
+
To explain the result against prior episodes, run the READ-ONLY view:
|
|
74
|
+
\`\`\`bash
|
|
75
|
+
synergyspec-selfevolving self-evolution policy show --target <targetId> --json
|
|
76
|
+
\`\`\`
|
|
77
|
+
This shows the 版本账本 ledger (prior 策略 POLICY versions for the target, with the current head) and the 否决缓冲 reject-buffer (rolled-back directions to avoid). Use it only to contextualize the verdict — it changes nothing.
|
|
78
|
+
|
|
79
|
+
5. **Classify the outcome (do not re-judge it)**
|
|
80
|
+
|
|
81
|
+
Map the machine result to a verdict, classifying any no-op honestly:
|
|
82
|
+
- **evolved** — the 演进智能体 wrote ONE bounded edit onto the 策略 POLICY; report the new version and the rollback command.
|
|
83
|
+
- **kept (no evolution) / abstained** — a verified-green or no-nameable-gap run where nothing was promoted is the CORRECT outcome (产物即弃), not a missed evolution. State the reason from diagnosis.json.
|
|
84
|
+
- **rolled-back** — the edit's advantage fell below threshold; the 策略 POLICY was restored to the prior good version and a 否决缓冲 reject-buffer entry recorded the lost direction. This is the loop working, not a failure.
|
|
85
|
+
- **SAFE refusal** (evidence missing/red, target frozen, gate refused on real grounds) is expected; state the reason and move on.
|
|
86
|
+
- **DEFECT** (the orchestrator COULD NOT act for a reason that is NOT about evidence / freezing / scope — e.g. an unbindable target that persists) — surface it as an unresolved issue; do NOT hand-edit a canonical file to work around it. \`synergyspec-selfevolving status\` prints the machine-written \`Evolution:\` outcome — do not contradict it in free text.
|
|
87
|
+
|
|
88
|
+
6. **Emit the Runner Verdict (always — the final step)**
|
|
89
|
+
|
|
90
|
+
Your session's final message MUST end with the \`## Episode Verdict\` block defined in the Output Format below. Copy every field from the machine-written result (the \`episode\` JSON output / episode.json + diagnosis.json) — never re-judge it. Use \`not-run\` when the episode command was never invoked (change did not resolve); state the reason on the verdict lines.
|
|
91
|
+
|
|
92
|
+
**Output Format**
|
|
93
|
+
|
|
94
|
+
The session's final message MUST end with exactly this block shape:
|
|
95
|
+
|
|
96
|
+
\`\`\`
|
|
97
|
+
## Episode Verdict: <change-name>
|
|
98
|
+
- Outcome: evolved | kept | rolled-back | abstained | not-run | refused-static-gate | refused-unverified-evidence | refused-target-frozen | error-<...>
|
|
99
|
+
- Episode id: <episodeId, or none>
|
|
100
|
+
- Decision: rolled-back | kept | abstained
|
|
101
|
+
- Evolution: evolved | refused | not-spawned | none
|
|
102
|
+
- Advantage: <reward(主臂) − reward(基线臂), or null (baseline skipped / 弃权 abstained)>
|
|
103
|
+
- 策略 POLICY version: <new ledger head version, or unchanged>
|
|
104
|
+
- Evolved target: <canonical target id, or none>
|
|
105
|
+
- Canonical file(s) changed: <paths, or none>
|
|
106
|
+
- Rollback: synergyspec-selfevolving self-evolution promote <candidateId> --rollback
|
|
107
|
+
- Loss vs baseline: <loss / baseline, or unmeasured>
|
|
108
|
+
- Defects to surface: <case-(b) items, or none>
|
|
109
|
+
- Key lessons: <up to 3 one-line bullets from diagnosis.json>
|
|
110
|
+
- Isolation: fresh-context subagent | inline fallback (degraded)
|
|
111
|
+
\`\`\`
|
|
112
|
+
|
|
113
|
+
- EVERY field MUST be copied from the machine-written result (the \`episode\` JSON output / episode.json + diagnosis.json) — never re-judged. The skill neither grades nor edits; it only relays.
|
|
114
|
+
- Use \`not-run\` when the episode command was never invoked (the change did not resolve); state the reason on the verdict lines.
|
|
115
|
+
- A \`kept\` / \`abstained\` outcome on a verified-green run is the CORRECT no-op, not a missed evolution — say so plainly rather than hedging.
|
|
116
|
+
- Report \`Isolation: fresh-context subagent\` when you were spawned as a subagent; report \`Isolation: inline fallback (degraded)\` when this skill is running inline in the spawning session.`;
|
|
117
|
+
export function getSelfEvolvingSkillTemplate() {
|
|
118
|
+
return {
|
|
119
|
+
name: 'synergyspec-selfevolving-self-evolving',
|
|
120
|
+
description: 'Thin runner for a completed SynergySpec-SelfEvolving change: triggers the loop-v2 self-evolution episode (the orchestrator code-spawns the 奖励智能体 REWARD AGENT and 演进智能体 EVOLVING AGENT) and relays the machine-written result. Never grades and never edits canonical files. Normally spawned by synergyspec-selfevolving-learn rather than invoked directly.',
|
|
121
|
+
instructions: INSTRUCTIONS_BODY,
|
|
122
|
+
license: 'MIT',
|
|
123
|
+
compatibility: 'Requires synergyspec-selfevolving CLI.',
|
|
124
|
+
metadata: { author: 'synergyspec-selfevolving', version: '1.0' },
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
//# sourceMappingURL=self-evolving.js.map
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
import { type ParsedTestFailure } from '../fitness/test-failures.js';
|
|
1
2
|
import type { HarnessName, NormalizedTrajectory } from './model.js';
|
|
3
|
+
/** One failing test observed in the graded runner result's output. */
|
|
4
|
+
export type ObservedTestFailure = ParsedTestFailure;
|
|
2
5
|
export interface TrajectoryFacts {
|
|
3
6
|
harness: HarnessName;
|
|
4
7
|
changeName: string;
|
|
@@ -23,6 +26,12 @@ export interface TrajectoryFacts {
|
|
|
23
26
|
observedStatus: 'success' | 'failure' | null;
|
|
24
27
|
/** True only when a runner was actually observed (observedStatus !== null). */
|
|
25
28
|
verified: boolean;
|
|
29
|
+
/**
|
|
30
|
+
* Failing test ids + assertion lines parsed from the GRADED runner result's
|
|
31
|
+
* output (same result that decides `observedStatus`). Present only when at
|
|
32
|
+
* least one failure was recognized — the critic's observed failure evidence.
|
|
33
|
+
*/
|
|
34
|
+
observedFailures?: ObservedTestFailure[];
|
|
26
35
|
/** Count of tool_call parts across main + subagents (effort/activity signal). */
|
|
27
36
|
toolCallCount: number;
|
|
28
37
|
/** Distinct subagent sessions merged into the trajectory (coverage signal). */
|
|
@@ -30,6 +39,13 @@ export interface TrajectoryFacts {
|
|
|
30
39
|
/** Absolute raw transcript paths that produced these facts (audit trail). */
|
|
31
40
|
sourcePaths: string[];
|
|
32
41
|
}
|
|
42
|
+
/** Exported for the action-skeleton projection (skeleton.ts) — single source. */
|
|
43
|
+
export declare function isExecTool(tool: string | undefined): boolean;
|
|
44
|
+
/** Exported for the action-skeleton projection (skeleton.ts) — single source. */
|
|
45
|
+
export declare function inputLooksLikeRunner(input: Record<string, unknown> | undefined): boolean;
|
|
46
|
+
/** Best-effort command text from a tool_call input, for debug display.
|
|
47
|
+
* Exported for the action-skeleton projection (skeleton.ts) — single source. */
|
|
48
|
+
export declare function commandText(input: Record<string, unknown> | undefined): string | undefined;
|
|
33
49
|
/**
|
|
34
50
|
* Compute the {@link TrajectoryFacts} for a change. Returns `null` when there is
|
|
35
51
|
* no trajectory at all, so callers can cleanly fall back to the authored
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
* Pure + no throw.
|
|
14
14
|
*/
|
|
15
15
|
import { parseTestMetrics } from '../fitness/test-metrics.js';
|
|
16
|
+
import { parseTestFailures } from '../fitness/test-failures.js';
|
|
16
17
|
/**
|
|
17
18
|
* Matches the NAME of a shell/command-executing tool across harnesses — Claude
|
|
18
19
|
* `Bash`; opencode `bash`; Codex `shell`/`local_shell`/`shell_command`/`exec`;
|
|
@@ -29,7 +30,8 @@ import { parseTestMetrics } from '../fitness/test-metrics.js';
|
|
|
29
30
|
* never match.
|
|
30
31
|
*/
|
|
31
32
|
const EXEC_TOOL_RE = /(?:^|[._-])(?:bash|sh|zsh|fish|pwsh|powershell|shell|cmd|exec|command|terminal|run)(?:[._-]|$)/i;
|
|
32
|
-
|
|
33
|
+
/** Exported for the action-skeleton projection (skeleton.ts) — single source. */
|
|
34
|
+
export function isExecTool(tool) {
|
|
33
35
|
return tool !== undefined && EXEC_TOOL_RE.test(tool);
|
|
34
36
|
}
|
|
35
37
|
/**
|
|
@@ -56,7 +58,8 @@ function matchesRunner(value) {
|
|
|
56
58
|
}
|
|
57
59
|
return false;
|
|
58
60
|
}
|
|
59
|
-
|
|
61
|
+
/** Exported for the action-skeleton projection (skeleton.ts) — single source. */
|
|
62
|
+
export function inputLooksLikeRunner(input) {
|
|
60
63
|
if (!input)
|
|
61
64
|
return false;
|
|
62
65
|
const preferred = [];
|
|
@@ -70,8 +73,9 @@ function inputLooksLikeRunner(input) {
|
|
|
70
73
|
const values = preferred.length > 0 ? preferred : Object.values(input);
|
|
71
74
|
return values.some(matchesRunner);
|
|
72
75
|
}
|
|
73
|
-
/** Best-effort command text from a tool_call input, for debug display.
|
|
74
|
-
|
|
76
|
+
/** Best-effort command text from a tool_call input, for debug display.
|
|
77
|
+
* Exported for the action-skeleton projection (skeleton.ts) — single source. */
|
|
78
|
+
export function commandText(input) {
|
|
75
79
|
if (!input)
|
|
76
80
|
return undefined;
|
|
77
81
|
for (const f of COMMAND_FIELDS) {
|
|
@@ -173,6 +177,9 @@ export function toTrajectoryFacts(trajectory, changeName) {
|
|
|
173
177
|
const passed = runnerExitCode === 0 || (observedPassRate !== null && observedPassRate >= 1);
|
|
174
178
|
observedStatus = failed ? 'failure' : passed ? 'success' : null;
|
|
175
179
|
}
|
|
180
|
+
// Failure CONTENT from the same graded result that decides the verdict —
|
|
181
|
+
// omitted entirely when nothing was recognized (keeps JSON baselines stable).
|
|
182
|
+
const observedFailures = last?.output ? parseTestFailures(last.output) : [];
|
|
176
183
|
return {
|
|
177
184
|
harness: trajectory.harness,
|
|
178
185
|
changeName,
|
|
@@ -181,6 +188,7 @@ export function toTrajectoryFacts(trajectory, changeName) {
|
|
|
181
188
|
observedPassRate,
|
|
182
189
|
observedStatus,
|
|
183
190
|
verified: observedStatus !== null,
|
|
191
|
+
...(observedFailures.length > 0 ? { observedFailures } : {}),
|
|
184
192
|
toolCallCount,
|
|
185
193
|
subagentCount: trajectory.subagentSessionIds.length,
|
|
186
194
|
sourcePaths: trajectory.sourcePaths,
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import type { HarnessName, NormalizedTrajectory } from './model.js';
|
|
2
|
+
export interface SkeletonEvent {
|
|
3
|
+
kind: 'file-edit' | 'test-run' | 'command';
|
|
4
|
+
/** 0-based position in the (post-rollup, pre-truncation) event sequence. */
|
|
5
|
+
ordinal: number;
|
|
6
|
+
/** Tool name as the harness reports it. */
|
|
7
|
+
tool: string;
|
|
8
|
+
/** POSIX-normalized file path (file-edit only). */
|
|
9
|
+
file?: string;
|
|
10
|
+
/** How many consecutive edits to `file` this event rolls up (file-edit only). */
|
|
11
|
+
editCount?: number;
|
|
12
|
+
/** Executed command line, capped (test-run / command only). */
|
|
13
|
+
command?: string;
|
|
14
|
+
/** Exit code of the paired result, when structured (test-run / command). */
|
|
15
|
+
exitCode?: number | null;
|
|
16
|
+
/** Pass rate parsed from the paired runner output (test-run only). */
|
|
17
|
+
passRate?: number | null;
|
|
18
|
+
/** Failed-test count parsed from the paired runner output (test-run only). */
|
|
19
|
+
failedCount?: number | null;
|
|
20
|
+
/** Session that produced the event (main or subagent). */
|
|
21
|
+
sessionId?: string;
|
|
22
|
+
}
|
|
23
|
+
export interface ActionSkeleton {
|
|
24
|
+
harness: HarnessName;
|
|
25
|
+
/** Ordered, rolled-up, bounded events (≤ {@link MAX_SKELETON_EVENTS}). */
|
|
26
|
+
events: SkeletonEvent[];
|
|
27
|
+
/** Total tool_call parts seen (pre-projection activity signal). */
|
|
28
|
+
totalToolCalls: number;
|
|
29
|
+
/** True when events were middle-out truncated to the cap. */
|
|
30
|
+
truncated: boolean;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Project the bounded action skeleton from a normalized trajectory. One walk,
|
|
34
|
+
* the same call→result pairing approach as facts.ts's collectRunnerResults
|
|
35
|
+
* (callId map with a positional fallback).
|
|
36
|
+
*/
|
|
37
|
+
export declare function toActionSkeleton(trajectory: NormalizedTrajectory | null): ActionSkeleton | null;
|
|
38
|
+
/**
|
|
39
|
+
* Human-readable one-line(ish) play-by-play. Deterministic; events are dropped
|
|
40
|
+
* middle-out (with an elision marker) until the string fits `maxChars`.
|
|
41
|
+
*/
|
|
42
|
+
export declare function renderActionSkeleton(skeleton: ActionSkeleton, maxChars?: number): string;
|
|
43
|
+
//# sourceMappingURL=skeleton.d.ts.map
|