npm - synergyspec-selfevolving - Versions diffs - 2.1.5 → 2.1.7 - Mend

synergyspec-selfevolving 2.1.5 → 2.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/dist/commands/learn.js +80 -24
package/dist/commands/self-evolution-dream.d.ts +15 -1
package/dist/commands/self-evolution-dream.js +111 -6
package/dist/commands/self-evolution-episode.d.ts +3 -0
package/dist/commands/self-evolution-episode.js +157 -108
package/dist/commands/workflow/status.js +4 -0
package/dist/core/archive.js +17 -9
package/dist/core/change-readiness.d.ts +16 -1
package/dist/core/change-readiness.js +441 -15
package/dist/core/fitness/loss.d.ts +3 -5
package/dist/core/fitness/loss.js +2 -2
package/dist/core/fitness/test-metrics.d.ts +1 -0
package/dist/core/fitness/test-metrics.js +49 -0
package/dist/core/learn.js +129 -11
package/dist/core/migration.d.ts +6 -14
package/dist/core/migration.js +63 -21
package/dist/core/runner-evidence.d.ts +53 -0
package/dist/core/runner-evidence.js +613 -0
package/dist/core/self-evolution/candidates.js +0 -2
package/dist/core/self-evolution/dream.d.ts +57 -3
package/dist/core/self-evolution/dream.js +480 -9
package/dist/core/self-evolution/episode-orchestrator.d.ts +2 -0
package/dist/core/self-evolution/episode-orchestrator.js +17 -5
package/dist/core/self-evolution/episode-store.d.ts +5 -0
package/dist/core/self-evolution/episode-store.js +6 -2
package/dist/core/self-evolution/evolving-agent.d.ts +33 -4
package/dist/core/self-evolution/evolving-agent.js +138 -11
package/dist/core/self-evolution/host-harness.d.ts +35 -12
package/dist/core/self-evolution/host-harness.js +188 -49
package/dist/core/self-evolution/reward-aggregator.js +2 -2
package/dist/core/templates/workflows/archive-change.js +18 -18
package/dist/core/templates/workflows/dream.js +57 -47
package/dist/core/templates/workflows/learn.js +7 -5
package/dist/core/templates/workflows/run-tests.js +48 -29
package/dist/core/templates/workflows/self-evolving.js +11 -8
package/dist/core/trajectory/facts.d.ts +1 -1
package/dist/core/trajectory/registry.js +39 -8
package/package.json +1 -1

package/dist/core/templates/workflows/learn.js CHANGED Viewed

@@ -19,17 +19,19 @@ This is the review-and-learn step after \`/synspec:apply\` and \`/synspec:verify
    The runner starts with NO conversation context, so collect every handle it needs:
    - **Project root**: the absolute path of the current working directory.
    - **Change name**: from step 1.
-   - **Harness**: read the \`harness:\` key from \`synergyspec-selfevolving/changes/<name>/.synergyspec-selfevolving.yaml\`; if absent, use \`unknown\`.
-   - **Mode**: always \`apply\` — the episode runs the full loop (score, decide, and the 演进智能体's ONE bounded edit) autonomously, with no confirmation prompt. There is NO read-only episode and NO \`--preview\` flag. If the user wants a read-only look (no rollback, no evolution), do NOT run an episode: use the read-only view \`synergyspec-selfevolving self-evolution policy show\` (or a plain \`synergyspec-selfevolving learn <name>\` without \`--apply\`) instead.
-   - **Session handle (optional)**: if your harness exposes this session's id or transcript path, capture it; otherwise omit it (the 主智能体 MAIN AGENT arm's trajectory discovery then uses the change window).
+   - **Harness**: resolve the CURRENT host runtime, not the change metadata. If this skill is running in Codex, use \`codex\`; in Claude Code, use \`claude\`; in OpenCode, use \`opencode\`. Use \`unknown\` only when the host is genuinely unidentified after checking the active session/tooling. Do NOT read \`harness:\` from the per-change YAML for this field: that metadata is historical provenance, not the runtime that will spawn the loop-v2 agents.
+   - **Mode**: always \`apply\` — the episode runs the full loop (score, decide, and the 演进智能体's ONE bounded edit) autonomously, with no confirmation prompt. There is NO read-only episode and NO \`--preview\` flag. If the user wants a read-only look (no rollback, no evolution), do NOT run an episode: use the read-only view \`synergyspec-selfevolving self-evolution policy show\` (or a plain \`synergyspec-selfevolving learn <name>\` without \`--apply\`) instead.
+   - **Force-new episode**: \`yes\` only when the user explicitly asked to rerun / force a fresh episode; otherwise \`no\`. A normal learn run must not invent a rerun.
+   - **Isolation**: \`fresh-context subagent\` for the spawned runner.
+   - **Session handle (optional)**: if your harness exposes this session's id or transcript path, capture it; otherwise omit it (the 主智能体 MAIN AGENT arm's trajectory discovery then uses the change window).
 3. **Spawn the runner**
-   Use the host's available general-purpose Task/subagent runner (for example \`general-purpose\` on Claude or \`general\` on hosts that expose that type), prompt: "Use Skill tool to invoke synergyspec-selfevolving-self-evolving for change '<name>'. Project root: <root>. Harness: <harness>. Mode: apply. Session-id: <id>. Transcript: <path>. Trigger the loop-v2 self-evolution episode autonomously, do not ask the user questions, and end with the '## Episode Verdict' block."
+   Use the host's available general-purpose Task/subagent runner (for example \`general-purpose\` on Claude or \`general\` on hosts that expose that type), prompt: "Use Skill tool to invoke synergyspec-selfevolving-self-evolving for change '<name>'. Project root: <root>. Harness: <harness>. Mode: apply. Force-new: <yes|no>. Isolation: fresh-context subagent. Session-id: <id>. Transcript: <path>. Trigger the loop-v2 self-evolution episode autonomously, do not ask the user questions, and end with the '## Episode Verdict' block."
    Include the \`Session-id: <id>.\` / \`Transcript: <path>.\` segment only when the session handle from step 2 is known — omit it entirely when unknown.
-   The runner triggers exactly one CLI command — \`synergyspec-selfevolving self-evolution episode --change "<name>" --session-id <id>\` — and the orchestrator CODE-SPAWNS the 奖励智能体 REWARD AGENT + 演进智能体 EVOLVING AGENT (+ optional CRITIC AGENT（基线智能体）). Neither you nor the runner grades or edits canonical files.
+   The runner triggers exactly one CLI command — \`synergyspec-selfevolving self-evolution episode --change "<name>" --harness <harness> --session-id <id> --rerun\` when force-new is \`yes\`; omit \`--rerun\` when force-new is \`no\`; omit \`--harness\` when it is \`unknown\` — and the orchestrator CODE-SPAWNS the 奖励智能体 REWARD AGENT + 演进智能体 EVOLVING AGENT (+ optional CRITIC AGENT（基线智能体）). Neither you nor the runner grades or edits canonical files.
    Guardrails:
    - Do NOT trigger the episode yourself in this session — it must run from a fresh context.

package/dist/core/templates/workflows/run-tests.js CHANGED Viewed

@@ -38,35 +38,53 @@ const INSTRUCTIONS_BODY = `**Input**: Optionally specify a change name. If omitt
      "startedAt": "<ISO timestamp>",
      "finishedAt": "<ISO timestamp>",
      "exitCode": 0,
-     "signal": null,
-     "stdoutLog": "synergyspec-selfevolving/changes/<name>/test-evidence/<timestamp>/runner.stdout.log",
-     "stderrLog": "synergyspec-selfevolving/changes/<name>/test-evidence/<timestamp>/runner.stderr.log",
-     "workspaceIdentity": {
-       "changeName": "<name>",
-       "taskId": "<benchmark task id, if any>",
-       "cwd": "<absolute working directory>",
-       "pyproject": {
-         "path": "pyproject.toml",
-         "name": "<[project].name, or null>",
-         "sha256": "<sha256 of pyproject.toml, or null>"
-       },
-       "packageJson": {
-         "path": "package.json",
-         "name": "<package.json name, or null>",
-         "sha256": "<sha256 of package.json, or null>"
-       }
-     },
-     "junitXml": null,
-     "coverageSummary": null,
-     "coverageLcov": null,
-     "coverageHtml": null
+     "signal": null,
+     "stdoutLog": "synergyspec-selfevolving/changes/<name>/test-evidence/<timestamp>/runner.stdout.log",
+     "stderrLog": "synergyspec-selfevolving/changes/<name>/test-evidence/<timestamp>/runner.stderr.log",
+     "stdoutLogSha256": "<sha256 of runner.stdout.log>",
+     "stderrLogSha256": "<sha256 of runner.stderr.log>",
+      "workspaceIdentity": {
+        "changeName": "<name>",
+        "taskId": "<benchmark task id, if any>",
+        "cwd": "<absolute working directory>",
+        "pyproject": null,
+        "packageJson": null
+      },
+     "testMetrics": {
+       "total": 29,
+       "passed": 29,
+       "failed": 0,
+       "passRate": 1
+     },
+     "junitXml": null,
+     "coverageSummary": null,
+     "coverageLcov": null,
+     "coverageHtml": null
    }
    \`\`\`
-   If the runner produces JUnit XML or coverage artifacts, record their paths in
-   \`runner-exit.json\`. If it does not, keep those fields \`null\`. The markdown
-   report may summarize results, but the raw logs and exit JSON are the durable
-   evidence that later verification must inspect.
+   Set each workspace identity file entry to an object ONLY when that file
+   exists at the project root. If \`pyproject.toml\` or \`package.json\` is absent,
+   leave that field \`null\` (or omit it); do not emit a \`path\` for an absent file.
+   Object shape for a present file:
+   \`\`\`json
+   {
+     "path": "pyproject.toml",
+     "name": "<project/package name, or null>",
+     "sha256": "<sha256 of the file>"
+   }
+   \`\`\`
+   If the runner summary exposes pass/fail counts, record them in
+   \`testMetrics\`; otherwise set \`testMetrics\` to \`null\` and preserve the raw
+   stdout/stderr logs. The \`stdoutLogSha256\` and \`stderrLogSha256\` fields MUST
+   be the SHA-256 hashes of the exact saved log files, computed after writing the
+   files and before writing \`runner-exit.json\`; do not hand-edit logs after
+   hashing. If the runner produces JUnit XML or coverage artifacts,
+   record their paths in \`runner-exit.json\`. If it does not, keep those fields
+   \`null\`. The markdown report may summarize results, but the raw logs and exit
+   JSON are the durable evidence that later verification must inspect.
 3b. **Promote PBT counterexamples to regression tests**
@@ -134,9 +152,10 @@ const INSTRUCTIONS_BODY = `**Input**: Optionally specify a change name. If omitt
    | UC1-E4a1 | Error when no grid space | ❌ failed | \`gridSize=0, widgetCount=1\` | \`test/pbt-regression-uc1-e4a1-1.test.ts\` |
    ...
-   ### Test Run Results
-   <summary from test runner output: passed/failed/skipped counts>
-   If failures: list failing test names and errors.
+   ### Test Run Results
+   Summary: <N collected>, <N passed>, <N failed>, <N skipped>, <N collection errors>
+   <raw summary from test runner output: passed/failed/skipped counts>
+   If failures: list failing test names and errors.
    ### Runner Evidence
    | Evidence | Path / Value |

package/dist/core/templates/workflows/self-evolving.js CHANGED Viewed

@@ -12,9 +12,11 @@ You are the RUNNER for a completed SynergySpec-SelfEvolving change. In loop v2 (
 Parse these handles from the spawning prompt:
 - **Change name** (required). If the change name is missing or does not resolve via \`synergyspec-selfevolving list --json\`, stop and report the error — do NOT prompt the user (you may have no user channel).
-- **Absolute project root.** Run every CLI command from it.
-- **Harness**: \`claude\` | \`codex\` | \`opencode\` | \`unknown\`. If a harness was provided and differs from the ambient host, set \`SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS=<harness>\` for the CLI invocation below.
-- **Session-id / transcript path** (optional). When the spawning prompt supplied a session-id or transcript path, pass \`--session-id <id>\` / \`--transcript <path>\` to the \`episode\` command so the 主智能体 MAIN AGENT arm's trajectory discovery does not depend on the change-window fallback.
+- **Absolute project root.** Run every CLI command from it.
+- **Harness**: \`claude\` | \`codex\` | \`opencode\` | \`unknown\`. If a concrete harness was provided, pass \`--harness <harness>\` to the CLI invocation below. If the prompt says \`unknown\` but this runner is clearly executing inside Codex, Claude Code, or OpenCode, recover the current host and pass that concrete harness. Omit \`--harness\` only when both the prompt and the current runner host are genuinely unidentified; never set \`SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS=unknown\`.
+- **Force-new**: \`yes\` | \`no\` (optional; default \`no\`). If \`yes\`, append \`--rerun\` so a closed matching episode is not reused.
+- **Isolation**: \`fresh-context subagent\` | \`inline fallback (degraded)\` (optional). If supplied, copy it verbatim into the verdict; otherwise infer from whether this skill is running in a spawned subagent or inline fallback.
+- **Session-id / transcript path** (optional). When the spawning prompt supplied a session-id or transcript path, pass \`--session-id <id>\` / \`--transcript <path>\` to the \`episode\` command so the 主智能体 MAIN AGENT arm's trajectory discovery does not depend on the change-window fallback.
 **Recursion guard**
@@ -49,10 +51,11 @@ Everything in steps 1–6 is CODE. You do not perform any of it. You issue the c
    Run exactly ONE command — the loop-v2 orchestrator. It CODE-SPAWNS the 奖励智能体 REWARD AGENT + 演进智能体 EVOLVING AGENT (+ optional CRITIC AGENT（基线智能体）); you spawn nothing:
    \`\`\`bash
-   synergyspec-selfevolving self-evolution episode --change "<change>" --json
-   \`\`\`
-   - Append \`--session-id <id>\` and/or \`--transcript <path>\` ONLY when the spawning prompt supplied them.
-   - If the harness differs from the ambient host, set \`SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS=<harness>\` first.
+   synergyspec-selfevolving self-evolution episode --change "<change>" --json
+   \`\`\`
+   - Append \`--session-id <id>\` and/or \`--transcript <path>\` ONLY when the spawning prompt supplied them.
+   - Append \`--harness <harness>\` when the spawning prompt supplied \`claude\`, \`codex\`, or \`opencode\`, or when the prompt supplied \`unknown\` but this runner can identify the current host as Codex, Claude Code, or OpenCode. Never append \`--harness unknown\`.
+   - Append \`--rerun\` ONLY when the spawning prompt supplied \`Force-new: yes\`.
    Do NOT grade, score, or author any edit yourself, and do NOT run \`evolve-from-edits\`, \`auto-evolve\`, or \`--agent\` / \`claude -p\` — those are not part of loop v2's host-facing path. The episode command IS the loop.
@@ -116,7 +119,7 @@ The session's final message MUST end with exactly this block shape:
 - Use \`busy-in-flight\` when the episode command returned the clean concurrency deferral (another in-flight episode holds the same 策略 POLICY target): advantage is null, episode id is none, 策略 POLICY version is unchanged. It is TRANSIENT and self-healing (retry after the lock clears / the 60-min stale window) — it is NOT a DEFECT, do not list it under Defects to surface, and never advise deleting \`in-flight.json\`.
 - When the episode did NOT start (Episode id is none — any not-run / busy-in-flight / error-* outcome), write \`none\` for Evolved target and Canonical file(s) changed, report Decision/Advantage as none/null, and leave 策略 POLICY version unchanged. The change's CONFIGURED target id is context only — do NOT copy it into the Evolved target field on a non-run verdict.
 - A \`kept\` / \`abstained\` outcome on a verified-green run is the CORRECT no-op, not a missed evolution — say so plainly rather than hedging.
-- Report \`Isolation: fresh-context subagent\` when you were spawned as a subagent; report \`Isolation: inline fallback (degraded)\` when this skill is running inline in the spawning session.`;
+- Copy the supplied \`Isolation:\` value verbatim when present. If it was not supplied, report \`Isolation: fresh-context subagent\` when you were spawned as a subagent, or \`Isolation: inline fallback (degraded)\` when this skill is running inline in the spawning session.`;
 export function getSelfEvolvingSkillTemplate() {
     return {
         name: 'synergyspec-selfevolving-self-evolving',

package/dist/core/trajectory/facts.d.ts CHANGED Viewed

@@ -3,7 +3,7 @@ import type { HarnessName, NormalizedTrajectory } from './model.js';
 /** One failing test observed in the graded runner result's output. */
 export type ObservedTestFailure = ParsedTestFailure;
 export interface TrajectoryFacts {
-    harness: HarnessName;
+    harness: HarnessName | 'runner-evidence';
     changeName: string;
     /**
      * A recognizable test-runner invocation (vitest/pytest/go test/…) produced a

package/dist/core/trajectory/registry.js CHANGED Viewed

@@ -5,12 +5,11 @@
  * Selection order:
  *   1. If the change metadata stamps a `harness` (the strongest signal — see
  *      `ChangeMetadata.harness`), use that adapter.
- *   2. Otherwise read the adapter for the harness that is ACTUALLY RUNNING this
- *      process (`resolveHostHarness()`: the CODEX_ / OPENCODE_ env heuristic, or
- *      the `SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS` override). This makes the
- *      observed trajectory readable on a non-Claude host. With no harness env
- *      present it resolves to 'claude' — the historical default — so existing
- *      hermetic tests stay on the Claude adapter unchanged.
+ *   2. Otherwise read the trusted repo/host harness signal
+ *      (`resolveHostHarnessDetailsForRepo()`): explicit override, persisted
+ *      sidecar, explicit session id, or the historical Claude default. An
+ *      env-only Codex/OpenCode signal on an unstamped change is intentionally
+ *      too weak to scan the host's whole session store.
  *   3. `trajsz` is OPT-IN (env `SYNERGYSPEC_SELFEVOLVING_TRAJSZ`): when enabled
  *      and a fresh archive is present it is tried FIRST, since it already
  *      normalizes all three harnesses; absent/stale, we fall back to native.
@@ -98,8 +97,9 @@ export async function resolveTrajectorySource(projectRoot, changeName, options =
                     return s;
             }
             catch {
-                // fall through to probing
+                // fall through to the fail-closed return below
             }
+            return null;
         }
     }
     // 2. No stamp: use a trusted host recovery signal, not a blind global scan.
@@ -143,7 +143,7 @@ export async function getTrajectoryForChange(projectRoot, changeName, options =
 export async function getTrajectoryResultForChange(projectRoot, changeName, options = {}) {
     const source = await resolveTrajectorySource(projectRoot, changeName, options);
     if (!source)
-        return { trajectory: null, sourceHarness: null, reason: 'no-trajectory-source' };
+        return explainTrajectorySourceMiss(projectRoot, changeName, options);
     try {
         const result = source.getTrajectoryResult
             ? await source.getTrajectoryResult(changeName)
@@ -162,4 +162,35 @@ export async function getTrajectoryResultForChange(projectRoot, changeName, opti
         };
     }
 }
+async function explainTrajectorySourceMiss(projectRoot, changeName, options) {
+    const changeProvenance = await readChangeTrajectoryProvenance(projectRoot, changeName);
+    const sessionIds = uniqueNonBlank([
+        ...(options.sessionIds ?? []),
+        ...changeProvenance.sessionIds,
+        process.env.SYNERGYSPEC_SELFEVOLVING_SESSION_ID,
+    ]);
+    if (changeProvenance.harness) {
+        return {
+            trajectory: null,
+            sourceHarness: changeProvenance.harness,
+            reason: `stamped-${changeProvenance.harness}-source-unavailable`,
+        };
+    }
+    const hostResolution = await resolveHostHarnessDetailsForRepo(projectRoot);
+    const envOnlyNativeHost = hostResolution.source === 'env' &&
+        hostResolution.harness !== 'claude' &&
+        sessionIds.length === 0;
+    if (envOnlyNativeHost) {
+        return {
+            trajectory: null,
+            sourceHarness: hostResolution.harness,
+            reason: `env-only-${hostResolution.harness}-requires-stamped-harness-or-session-id`,
+        };
+    }
+    return {
+        trajectory: null,
+        sourceHarness: hostResolution.source === 'default' ? null : hostResolution.harness,
+        reason: `${hostResolution.source}-${hostResolution.harness}-source-unavailable`,
+    };
+}
 //# sourceMappingURL=registry.js.map

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "synergyspec-selfevolving",
-  "version": "2.1.5",
+  "version": "2.1.7",
   "description": "AI-native system for spec-driven development",
   "keywords": [
     "synergyspec-selfevolving",