synergyspec-selfevolving 1.4.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -18
- package/dist/commands/learn.d.ts +12 -1
- package/dist/commands/learn.js +158 -11
- package/dist/commands/self-evolution-episode.d.ts +177 -0
- package/dist/commands/self-evolution-episode.js +431 -0
- package/dist/commands/self-evolution.d.ts +12 -190
- package/dist/commands/self-evolution.js +114 -866
- package/dist/core/archive.d.ts +0 -1
- package/dist/core/archive.js +0 -58
- package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
- package/dist/core/artifact-graph/instruction-loader.js +3 -31
- package/dist/core/fitness/loss.d.ts +5 -5
- package/dist/core/fitness/loss.js +4 -4
- package/dist/core/fitness/test-failures.js +10 -2
- package/dist/core/project-config.d.ts +19 -0
- package/dist/core/project-config.js +96 -0
- package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
- package/dist/core/self-evolution/candidate-fitness.js +31 -5
- package/dist/core/self-evolution/candidates.d.ts +0 -9
- package/dist/core/self-evolution/critic-agent.d.ts +192 -0
- package/dist/core/self-evolution/critic-agent.js +568 -0
- package/dist/core/self-evolution/edits-contract.d.ts +53 -0
- package/dist/core/self-evolution/edits-contract.js +89 -0
- package/dist/core/self-evolution/episode-orchestrator.d.ts +234 -0
- package/dist/core/self-evolution/episode-orchestrator.js +681 -0
- package/dist/core/self-evolution/episode-store.d.ts +266 -0
- package/dist/core/self-evolution/episode-store.js +573 -0
- package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
- package/dist/core/self-evolution/evolution-switches.js +5 -10
- package/dist/core/self-evolution/evolving-agent.d.ts +208 -0
- package/dist/core/self-evolution/evolving-agent.js +535 -0
- package/dist/core/self-evolution/host-harness.d.ts +14 -15
- package/dist/core/self-evolution/host-harness.js +48 -23
- package/dist/core/self-evolution/index.d.ts +11 -6
- package/dist/core/self-evolution/index.js +20 -6
- package/dist/core/self-evolution/line-diff.d.ts +60 -0
- package/dist/core/self-evolution/line-diff.js +130 -0
- package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
- package/dist/core/self-evolution/policy/fs-safe.js +89 -0
- package/dist/core/self-evolution/policy/index.d.ts +13 -0
- package/dist/core/self-evolution/policy/index.js +13 -0
- package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
- package/dist/core/self-evolution/policy/policy-store.js +774 -0
- package/dist/core/self-evolution/policy/prediction-reconcile.d.ts +54 -0
- package/dist/core/self-evolution/policy/prediction-reconcile.js +191 -0
- package/dist/core/self-evolution/policy/reject-buffer.d.ts +55 -0
- package/dist/core/self-evolution/policy/reject-buffer.js +170 -0
- package/dist/core/self-evolution/promote.d.ts +1 -1
- package/dist/core/self-evolution/promote.js +6 -33
- package/dist/core/self-evolution/promotion.js +1 -2
- package/dist/core/self-evolution/reward-agent.d.ts +379 -0
- package/dist/core/self-evolution/reward-agent.js +940 -0
- package/dist/core/self-evolution/reward-aggregator.d.ts +59 -0
- package/dist/core/self-evolution/reward-aggregator.js +262 -0
- package/dist/core/self-evolution/scope-gate.d.ts +66 -0
- package/dist/core/self-evolution/scope-gate.js +107 -0
- package/dist/core/self-evolution/success-channel.js +2 -2
- package/dist/core/self-evolution/tamper-check.d.ts +24 -0
- package/dist/core/self-evolution/tamper-check.js +236 -0
- package/dist/core/self-evolution/tool-evolution.js +2 -13
- package/dist/core/self-evolution/verdict.d.ts +8 -5
- package/dist/core/self-evolution/verdict.js +4 -7
- package/dist/core/templates/workflows/gen-tests.js +1 -1
- package/dist/core/templates/workflows/learn.d.ts +3 -2
- package/dist/core/templates/workflows/learn.js +21 -18
- package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
- package/dist/core/templates/workflows/self-evolving.js +62 -172
- package/dist/core/trajectory/scrub.d.ts +27 -0
- package/dist/core/trajectory/scrub.js +79 -0
- package/dist/core/trajectory/skeleton.d.ts +27 -1
- package/dist/core/trajectory/skeleton.js +152 -8
- package/dist/dashboard/data.d.ts +25 -51
- package/dist/dashboard/data.js +68 -180
- package/dist/dashboard/react-client.js +458 -503
- package/dist/dashboard/react-styles.js +3 -3
- package/dist/dashboard/server.js +23 -17
- package/dist/ui/ascii-patterns.d.ts +7 -15
- package/dist/ui/ascii-patterns.js +123 -54
- package/dist/ui/welcome-screen.d.ts +0 -14
- package/dist/ui/welcome-screen.js +16 -35
- package/package.json +1 -1
- package/dist/core/self-evolution/ga-selection.d.ts +0 -94
- package/dist/core/self-evolution/ga-selection.js +0 -153
- package/dist/core/self-evolution/proposer-agent.d.ts +0 -182
- package/dist/core/self-evolution/proposer-agent.js +0 -326
- package/dist/core/self-evolution/replay-runner.d.ts +0 -100
- package/dist/core/self-evolution/replay-runner.js +0 -170
- package/dist/core/self-evolution/replay.d.ts +0 -45
- package/dist/core/self-evolution/replay.js +0 -56
- package/dist/core/self-evolution/template-variants.d.ts +0 -62
- package/dist/core/self-evolution/template-variants.js +0 -171
- package/dist/core/self-evolution/trajectory.d.ts +0 -65
- package/dist/core/self-evolution/trajectory.js +0 -185
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared candidate-EDITS contract.
|
|
3
|
+
*
|
|
4
|
+
* The single place that (a) defines the no-op / invalid-output error classes the
|
|
5
|
+
* manual host-authored channel raises, (b) validates already-structured
|
|
6
|
+
* candidate edits against the target-scoped file set + the frozen gate-defining
|
|
7
|
+
* files, and (c) renders a whole-file-replacement unified diff. Both surviving
|
|
8
|
+
* edit channels share it byte-for-byte:
|
|
9
|
+
* - the manual `--from-edits` / `--from-learn` host-authored path
|
|
10
|
+
* (`commands/self-evolution.ts` → `packageHostEdits`, `promote.ts`), and
|
|
11
|
+
* - the loop-v2 演进智能体 EVOLVING AGENT (`evolving-agent.ts`).
|
|
12
|
+
*
|
|
13
|
+
* Pure (no I/O, no spawn): this module never applies, promotes, or mutates any
|
|
14
|
+
* canonical file.
|
|
15
|
+
*/
|
|
16
|
+
import { GATE_DEFINING_FILES } from './candidate-gates.js';
|
|
17
|
+
export class CanonicalProposerOutputInvalid extends Error {
|
|
18
|
+
constructor(message) {
|
|
19
|
+
super(`canonical proposer output invalid: ${message}`);
|
|
20
|
+
this.name = 'CanonicalProposerOutputInvalid';
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
/** The model declined to edit anything (empty edits). Not an error — a no-op. */
|
|
24
|
+
export class CanonicalProposerNoOp extends Error {
|
|
25
|
+
constructor() {
|
|
26
|
+
super('canonical proposer returned no edits');
|
|
27
|
+
this.name = 'CanonicalProposerNoOp';
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
/** The headless agent invocation itself failed (crash / empty output). */
|
|
31
|
+
export class CanonicalProposerInvocationError extends Error {
|
|
32
|
+
constructor(stderr) {
|
|
33
|
+
super(`canonical proposer invocation failed: ${stderr}`);
|
|
34
|
+
this.name = 'CanonicalProposerInvocationError';
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Validate already-structured candidate edits against the allowed (target-
|
|
39
|
+
* scoped) file set and the frozen gate-defining files. Author-agnostic: this is
|
|
40
|
+
* the SINGLE place that enforces, at propose time, that every edit (a) is a
|
|
41
|
+
* well-formed `{relPath, content}` object, (b) does not touch a
|
|
42
|
+
* `GATE_DEFINING_FILES` entry (the frozen oracle/gate files), and (c) stays
|
|
43
|
+
* inside `allowedFiles`. Both the manual host-authored (`--from-edits`) path and
|
|
44
|
+
* the loop-v2 演进智能体 EVOLVING AGENT call this so their safety contract is
|
|
45
|
+
* byte-identical. relPaths are normalized to POSIX separators.
|
|
46
|
+
*
|
|
47
|
+
* Throws {@link CanonicalProposerNoOp} when `rawEdits` is empty and
|
|
48
|
+
* {@link CanonicalProposerOutputInvalid} for any shape / frozen / scope
|
|
49
|
+
* violation. Path traversal and absolute paths are rejected transitively: they
|
|
50
|
+
* can never be a member of `allowedFiles`, so they fail the scope check.
|
|
51
|
+
*/
|
|
52
|
+
export function validateCandidateEdits(rawEdits, allowedFiles) {
|
|
53
|
+
if (rawEdits.length === 0) {
|
|
54
|
+
throw new CanonicalProposerNoOp();
|
|
55
|
+
}
|
|
56
|
+
const allowed = new Set(allowedFiles.map((p) => p.replace(/\\/g, '/')));
|
|
57
|
+
const frozen = new Set(GATE_DEFINING_FILES.map((p) => p.replace(/\\/g, '/')));
|
|
58
|
+
const validated = [];
|
|
59
|
+
for (const e of rawEdits) {
|
|
60
|
+
if (!e || typeof e !== 'object') {
|
|
61
|
+
throw new CanonicalProposerOutputInvalid('edit entry must be an object');
|
|
62
|
+
}
|
|
63
|
+
const relPath = e.relPath;
|
|
64
|
+
const content = e.content;
|
|
65
|
+
if (typeof relPath !== 'string' || typeof content !== 'string') {
|
|
66
|
+
throw new CanonicalProposerOutputInvalid('edit must have string relPath and string content');
|
|
67
|
+
}
|
|
68
|
+
const norm = relPath.replace(/\\/g, '/');
|
|
69
|
+
if (frozen.has(norm)) {
|
|
70
|
+
throw new CanonicalProposerOutputInvalid(`edit relPath "${relPath}" is a gate-defining/frozen file and may never be proposed`);
|
|
71
|
+
}
|
|
72
|
+
if (!allowed.has(norm)) {
|
|
73
|
+
throw new CanonicalProposerOutputInvalid(`edit relPath "${relPath}" is outside the target's declared files`);
|
|
74
|
+
}
|
|
75
|
+
validated.push({ relPath: norm, content });
|
|
76
|
+
}
|
|
77
|
+
return validated;
|
|
78
|
+
}
|
|
79
|
+
/** Render a whole-file-replacement unified diff (human-readable; git-apply friendly). */
|
|
80
|
+
export function renderUnifiedDiff(relPath, oldContent, newContent) {
|
|
81
|
+
const oldLines = oldContent.length === 0 ? [] : oldContent.replace(/\n$/, '').split('\n');
|
|
82
|
+
const newLines = newContent.replace(/\n$/, '').split('\n');
|
|
83
|
+
const oldStart = oldLines.length === 0 ? 0 : 1;
|
|
84
|
+
const header = `--- a/${relPath}\n+++ b/${relPath}\n` +
|
|
85
|
+
`@@ -${oldStart},${oldLines.length} +1,${newLines.length} @@`;
|
|
86
|
+
const body = [...oldLines.map((l) => `-${l}`), ...newLines.map((l) => `+${l}`)].join('\n');
|
|
87
|
+
return `${header}\n${body}`;
|
|
88
|
+
}
|
|
89
|
+
//# sourceMappingURL=edits-contract.js.map
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Episode orchestrator — the rollback-before-evolution heart of loop v2
|
|
3
|
+
* (self-evolution as in-context RL).
|
|
4
|
+
*
|
|
5
|
+
* One `runEpisode` runs ONE synergyspec-selfevolving change through the full
|
|
6
|
+
* loop, in a STRICT, durably-persisted order — correctness of ordering is the
|
|
7
|
+
* design contract:
|
|
8
|
+
*
|
|
9
|
+
* a. acquireInFlight — one in-flight episode per target.
|
|
10
|
+
* b. ensure 单一血统 single lineage — init v0 from resolved files when new;
|
|
11
|
+
* policyVersionMain = lineage head.
|
|
12
|
+
* c. createEpisode + writeArmCapture — record the 主智能体 MAIN AGENT (frozen
|
|
13
|
+
* actor, policy vN+1) arm; advance
|
|
14
|
+
* 'main-arm-captured'.
|
|
15
|
+
* d. CRITIC AGENT(基线智能体 baseline — runCriticAgent reruns LAST episode's
|
|
16
|
+
* agent) policy vN on the SAME change, OR skip
|
|
17
|
+
* ('baseline-skipped') when the lineage
|
|
18
|
+
* has < 2 versions / last action refused.
|
|
19
|
+
* e. 奖励智能体 REWARD AGENT — runRewardAgent CALCULATES 算分
|
|
20
|
+
* reward(主臂)&reward(基线臂), advantage
|
|
21
|
+
* = reward(主臂) − reward(基线臂), the
|
|
22
|
+
* 文本梯度 textual gradient; writes
|
|
23
|
+
* diagnosis.json; advances 'scored'.
|
|
24
|
+
* f. DECISION on the main arm's edits:
|
|
25
|
+
* - 弃权 abstained / no gaps → advance 'abstained'; SKIP evolution.
|
|
26
|
+
* - bad advantage (< threshold) → ROLLBACK the 策略 POLICY to the prior
|
|
27
|
+
* good version, THEN append the 否决缓冲
|
|
28
|
+
* reject-buffer entry — BOTH durably on
|
|
29
|
+
* disk — THEN advance 'rolled-back'.
|
|
30
|
+
* - otherwise → advance 'kept'.
|
|
31
|
+
* g. 演进智能体 EVOLVING AGENT — ONLY after (f) persisted: runEvolvingAgent
|
|
32
|
+
* (optimizer.step) reads the reject-buffer FRESH from disk
|
|
33
|
+
* (so THIS episode's just-written entry is
|
|
34
|
+
* in its prompt) and either not-spawned /
|
|
35
|
+
* refused / evolved.
|
|
36
|
+
* h. advance 'closed' + releaseInFlight — ALWAYS, even on error.
|
|
37
|
+
*
|
|
38
|
+
* ORDERING GUARANTEE: the rollback + reject-buffer write are SEQUENTIAL awaits
|
|
39
|
+
* that BOTH complete (and the stage reads 'rolled-back'/'kept') before
|
|
40
|
+
* {@link runEvolvingAgent} is even called. (f) and (g) are never parallelized
|
|
41
|
+
* and never share a Promise.all.
|
|
42
|
+
*
|
|
43
|
+
* This module orchestrates; it never spawns an agent itself — the three agents
|
|
44
|
+
* own their own {@link runHeadlessAgent} spawns (the `spawn` seam threads to all
|
|
45
|
+
* three). The only state it owns is the ordering + the rollback/reject decision.
|
|
46
|
+
*/
|
|
47
|
+
import { spawn as nodeSpawn } from 'node:child_process';
|
|
48
|
+
import type { LearnReport } from '../learn.js';
|
|
49
|
+
import type { TrajectorySource } from '../trajectory/source.js';
|
|
50
|
+
import { type PolicyResolveFiles, type PolicyLedgerEntry } from './policy/policy-store.js';
|
|
51
|
+
import { type EpisodeStage } from './episode-store.js';
|
|
52
|
+
import { type ArmObjective, type CriticBaselineMode } from './critic-agent.js';
|
|
53
|
+
import { type RewardConfig } from './reward-aggregator.js';
|
|
54
|
+
import { type RunEvolvingAgentResult } from './evolving-agent.js';
|
|
55
|
+
/** The 主智能体 MAIN AGENT (policy vN+1) capture the orchestrator records. */
|
|
56
|
+
export interface MainArmCapture {
|
|
57
|
+
/** Raw session transcript text, when provided; persisted as `transcript.jsonl`. */
|
|
58
|
+
transcript?: string;
|
|
59
|
+
/** Bounded action skeleton of the observed run, when discoverable. */
|
|
60
|
+
skeleton?: object;
|
|
61
|
+
/**
|
|
62
|
+
* The arm objective, byte-shape-IDENTICAL to the CRITIC AGENT's
|
|
63
|
+
* {@link ArmObjective} so the 奖励智能体 REWARD AGENT reads both arms uniformly.
|
|
64
|
+
*/
|
|
65
|
+
objective: ArmObjective;
|
|
66
|
+
}
|
|
67
|
+
export interface CaptureMainArmOptions {
|
|
68
|
+
repoRoot: string;
|
|
69
|
+
changeName: string;
|
|
70
|
+
/**
|
|
71
|
+
* An ALREADY-COMPUTED learn report (from `generateLearnReport`). Its
|
|
72
|
+
* `fitnessSample` carries the graded pass rate / health / observed-trajectory
|
|
73
|
+
* facts — the orchestrator REUSES that grading rather than re-running it.
|
|
74
|
+
*/
|
|
75
|
+
report: LearnReport;
|
|
76
|
+
/**
|
|
77
|
+
* Optional trajectory handles. When the learn report did not carry an
|
|
78
|
+
* `observedRun` skeleton (older capture paths), these let the caller hand a
|
|
79
|
+
* skeleton / raw transcript directly, OR the orchestrator re-discovers the
|
|
80
|
+
* skeleton via {@link getTrajectoryForChange}. A `trajectorySource` override is
|
|
81
|
+
* honored first (tests / explicit harness selection).
|
|
82
|
+
*/
|
|
83
|
+
trajectoryHandles?: {
|
|
84
|
+
/** Verbatim session transcript text to persist as `transcript.jsonl`. */
|
|
85
|
+
transcript?: string;
|
|
86
|
+
/** Pre-computed action skeleton (else derived from the discovered trajectory). */
|
|
87
|
+
skeleton?: object;
|
|
88
|
+
/** Trajectory-source override (tests); else the registry auto-detects. */
|
|
89
|
+
trajectorySource?: TrajectorySource;
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Build the 主智能体 MAIN AGENT arm `{transcript?, skeleton?, objective}` from an
|
|
94
|
+
* already-computed learn report's {@link FitnessSample} + the discovered
|
|
95
|
+
* trajectory.
|
|
96
|
+
*
|
|
97
|
+
* REUSES the learn/fitness grading verbatim — it never re-grades:
|
|
98
|
+
* - `objective.passRate` prefers the OBSERVED pass rate
|
|
99
|
+
* (`fitnessSample.trajectoryFacts.observedPassRate` when a runner ran), else
|
|
100
|
+
* the authored `testMetrics.passRate`, else `null` (never fabricated);
|
|
101
|
+
* - `objective.healthPenalty` is `fitnessSample.healthSignal` (the raw
|
|
102
|
+
* "no signal ⇒ null" health reading, distinct from the `?? 0`-defaulted
|
|
103
|
+
* `loss.healthPenalty`);
|
|
104
|
+
* - `objective.loss` is the blended `fitnessSample.loss.loss` (or `null`);
|
|
105
|
+
* - `verified` / `observedStatus` come from `trajectoryFacts`.
|
|
106
|
+
* The shape is byte-identical to {@link ArmObjective} so both arms read uniformly.
|
|
107
|
+
*/
|
|
108
|
+
export declare function captureMainArm(opts: CaptureMainArmOptions): Promise<MainArmCapture>;
|
|
109
|
+
/** The decision the orchestrator made on the main arm's edits. */
|
|
110
|
+
export type EpisodeDecision = 'rolled-back' | 'kept' | 'abstained';
|
|
111
|
+
/**
|
|
112
|
+
* Count the consecutive trailing rolled-back episodes in the 版本账本 ledger.
|
|
113
|
+
*
|
|
114
|
+
* A bad streak's ledger tail reads `…, evolve, rollback, evolve, rollback` — the
|
|
115
|
+
* 演进智能体 EVOLVING AGENT appends exactly one 'evolve' after each decision, so
|
|
116
|
+
* each counted rollback is reached by skipping the single 'evolve' that follows
|
|
117
|
+
* it. A 'kept' episode leaves a bare 'evolve' (no following rollback) which
|
|
118
|
+
* breaks the streak, as do 'init'/'refused'. Returns 0 when the head is not a
|
|
119
|
+
* rollback (the last episode kept). Pure.
|
|
120
|
+
*/
|
|
121
|
+
export declare function consecutiveRollbacks(ledger: readonly PolicyLedgerEntry[]): number;
|
|
122
|
+
/**
|
|
123
|
+
* 步长 step-size schedule for the 演进智能体 EVOLVING AGENT's edit budget L.
|
|
124
|
+
*
|
|
125
|
+
* Backtracking-line-search / trust-region move (and SkillOpt's decaying edit
|
|
126
|
+
* budget): after an edit LOST ground and was rolled back, the next edit should
|
|
127
|
+
* be SMALLER — a smaller blast radius is cheaper to undo and its cause is more
|
|
128
|
+
* legible, and it keeps a struggling lineage from drifting via repeated
|
|
129
|
+
* full-size swings. HALVE the base budget once per consecutive rolled-back
|
|
130
|
+
* episode, never below `minBudget` (itself clamped to `base`, so a caller-shrunk
|
|
131
|
+
* base is never RAISED). A healthy lineage (no trailing rollback) keeps `base`.
|
|
132
|
+
* Pure.
|
|
133
|
+
*/
|
|
134
|
+
export declare function scheduledEditBudget(ledger: readonly PolicyLedgerEntry[], base: number, minBudget?: number): number;
|
|
135
|
+
export interface RunEpisodeOptions {
|
|
136
|
+
repoRoot: string;
|
|
137
|
+
targetId: string;
|
|
138
|
+
changeName: string;
|
|
139
|
+
/** Absolute path of the change dir; recorded in episode.json, never copied. */
|
|
140
|
+
changeDirPath: string;
|
|
141
|
+
/** The 主智能体 MAIN AGENT arm (from {@link captureMainArm}). */
|
|
142
|
+
mainArm: MainArmCapture;
|
|
143
|
+
/**
|
|
144
|
+
* advantage = reward(主臂) − reward(基线臂) threshold below which the 策略
|
|
145
|
+
* POLICY is rolled back to the prior version BEFORE the 演进智能体 EVOLVING
|
|
146
|
+
* AGENT runs. Default 0 (a non-positive advantage triggers a rollback).
|
|
147
|
+
*/
|
|
148
|
+
advantageRollbackThreshold?: number;
|
|
149
|
+
/** Edit budget L for the 演进智能体 EVOLVING AGENT. Default 40. */
|
|
150
|
+
editBudget?: number;
|
|
151
|
+
/**
|
|
152
|
+
* 奖励智能体 REWARD AGENT judge-quality knobs (from `selfEvolution.reward`).
|
|
153
|
+
* Omitted ⇒ single sample, flag-only tamper (historical, zero extra spawns).
|
|
154
|
+
*/
|
|
155
|
+
reward?: RewardConfig;
|
|
156
|
+
/**
|
|
157
|
+
* CRITIC AGENT(基线智能体)baseline construction (from `selfEvolution.critic`).
|
|
158
|
+
* Omitted ⇒ the critic's default 're-do' (regenerate the change under vN).
|
|
159
|
+
*/
|
|
160
|
+
critic?: {
|
|
161
|
+
baselineMode?: CriticBaselineMode;
|
|
162
|
+
};
|
|
163
|
+
/** Injectable spawn seam — threaded to ALL THREE agents. Defaults to node's spawn. */
|
|
164
|
+
spawn?: typeof nodeSpawn;
|
|
165
|
+
/** Injectable clock for the lock + episode id; defaults to `new Date()`. */
|
|
166
|
+
now?: Date;
|
|
167
|
+
/**
|
|
168
|
+
* TEST seam: the file resolver {@link initPolicyLineage} uses to snapshot v0
|
|
169
|
+
* when the lineage is new. Defaults to the real `resolveTargetLocalFiles`.
|
|
170
|
+
*/
|
|
171
|
+
resolveFiles?: PolicyResolveFiles;
|
|
172
|
+
}
|
|
173
|
+
export interface RunEpisodeResult {
|
|
174
|
+
episodeId: string;
|
|
175
|
+
/** True when the CRITIC AGENT(基线智能体 baseline agent)arm was skipped. */
|
|
176
|
+
baselineSkipped: boolean;
|
|
177
|
+
/** advantage = reward(主臂) − reward(基线臂); null when skipped or abstained. */
|
|
178
|
+
advantage: number | null;
|
|
179
|
+
decision: EpisodeDecision;
|
|
180
|
+
/** The 演进智能体 EVOLVING AGENT outcome, or `null` when it was never spawned. */
|
|
181
|
+
evolution: RunEvolvingAgentResult | null;
|
|
182
|
+
/** The lineage head version AFTER the episode (post-rollback / post-evolve). */
|
|
183
|
+
newPolicyVersion: number | null;
|
|
184
|
+
}
|
|
185
|
+
/** Returned (not thrown) when the target already has a non-stale in-flight lock. */
|
|
186
|
+
export interface RunEpisodeBusy {
|
|
187
|
+
episodeId: null;
|
|
188
|
+
busy: true;
|
|
189
|
+
reason: string;
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Run ONE episode through the loop in the strict, durably-persisted order
|
|
193
|
+
* documented at the top of this module. See {@link RunEpisodeResult}.
|
|
194
|
+
*
|
|
195
|
+
* The in-flight lock is released in a finally guard so a throw mid-episode never
|
|
196
|
+
* wedges the target's slot.
|
|
197
|
+
*/
|
|
198
|
+
export declare function runEpisode(opts: RunEpisodeOptions): Promise<RunEpisodeResult | RunEpisodeBusy>;
|
|
199
|
+
export interface ResumeEpisodeOptions {
|
|
200
|
+
repoRoot: string;
|
|
201
|
+
episodeId: string;
|
|
202
|
+
/** Injectable spawn seam — threaded to the remaining agents. */
|
|
203
|
+
spawn?: typeof nodeSpawn;
|
|
204
|
+
/** advantage rollback threshold (default 0), for an episode resumed before the decision. */
|
|
205
|
+
advantageRollbackThreshold?: number;
|
|
206
|
+
/** Edit budget L (default 40). */
|
|
207
|
+
editBudget?: number;
|
|
208
|
+
}
|
|
209
|
+
export interface ResumeEpisodeResult {
|
|
210
|
+
episodeId: string;
|
|
211
|
+
/** The stage the episode was at when resume was called. */
|
|
212
|
+
resumedFrom: EpisodeStage;
|
|
213
|
+
/** The stage it reached after the resume re-ran the remaining steps. */
|
|
214
|
+
stage: EpisodeStage;
|
|
215
|
+
evolution: RunEvolvingAgentResult | null;
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Re-enter a partially-run episode at its recorded stage and idempotently run
|
|
219
|
+
* the REMAINING steps. Best-effort — used by the CLI `episode resume`. The
|
|
220
|
+
* episode stage machine is monotonic, so this picks up from the first not-yet-
|
|
221
|
+
* done step rather than re-advancing a stage already entered:
|
|
222
|
+
*
|
|
223
|
+
* - 'scored' → run the decision (f) then the 演进智能体 (g).
|
|
224
|
+
* - 'rolled-back' / 'kept' → run the 演进智能体 EVOLVING AGENT (g) then close.
|
|
225
|
+
* - 'evolved'/'evolution-refused'/'abstained' → close.
|
|
226
|
+
* - earlier stages → not auto-resumable here (the arms / reward
|
|
227
|
+
* agent need their own re-entry); reported as-is.
|
|
228
|
+
*
|
|
229
|
+
* NOTE: resume does NOT re-acquire the in-flight lock — the original
|
|
230
|
+
* {@link runEpisode} already released it; a resume is an operator-driven
|
|
231
|
+
* recovery, not a concurrent run.
|
|
232
|
+
*/
|
|
233
|
+
export declare function resumeEpisode(opts: ResumeEpisodeOptions): Promise<ResumeEpisodeResult>;
|
|
234
|
+
//# sourceMappingURL=episode-orchestrator.d.ts.map
|