synergyspec-selfevolving 1.4.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -18
- package/dist/commands/learn.d.ts +12 -1
- package/dist/commands/learn.js +158 -11
- package/dist/commands/self-evolution-episode.d.ts +177 -0
- package/dist/commands/self-evolution-episode.js +431 -0
- package/dist/commands/self-evolution.d.ts +12 -190
- package/dist/commands/self-evolution.js +114 -866
- package/dist/core/archive.d.ts +0 -1
- package/dist/core/archive.js +0 -58
- package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
- package/dist/core/artifact-graph/instruction-loader.js +3 -31
- package/dist/core/fitness/loss.d.ts +5 -5
- package/dist/core/fitness/loss.js +4 -4
- package/dist/core/fitness/test-failures.js +10 -2
- package/dist/core/project-config.d.ts +19 -0
- package/dist/core/project-config.js +96 -0
- package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
- package/dist/core/self-evolution/candidate-fitness.js +31 -5
- package/dist/core/self-evolution/candidates.d.ts +0 -9
- package/dist/core/self-evolution/critic-agent.d.ts +192 -0
- package/dist/core/self-evolution/critic-agent.js +568 -0
- package/dist/core/self-evolution/edits-contract.d.ts +53 -0
- package/dist/core/self-evolution/edits-contract.js +89 -0
- package/dist/core/self-evolution/episode-orchestrator.d.ts +234 -0
- package/dist/core/self-evolution/episode-orchestrator.js +681 -0
- package/dist/core/self-evolution/episode-store.d.ts +266 -0
- package/dist/core/self-evolution/episode-store.js +573 -0
- package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
- package/dist/core/self-evolution/evolution-switches.js +5 -10
- package/dist/core/self-evolution/evolving-agent.d.ts +208 -0
- package/dist/core/self-evolution/evolving-agent.js +535 -0
- package/dist/core/self-evolution/host-harness.d.ts +14 -15
- package/dist/core/self-evolution/host-harness.js +48 -23
- package/dist/core/self-evolution/index.d.ts +11 -6
- package/dist/core/self-evolution/index.js +20 -6
- package/dist/core/self-evolution/line-diff.d.ts +60 -0
- package/dist/core/self-evolution/line-diff.js +130 -0
- package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
- package/dist/core/self-evolution/policy/fs-safe.js +89 -0
- package/dist/core/self-evolution/policy/index.d.ts +13 -0
- package/dist/core/self-evolution/policy/index.js +13 -0
- package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
- package/dist/core/self-evolution/policy/policy-store.js +774 -0
- package/dist/core/self-evolution/policy/prediction-reconcile.d.ts +54 -0
- package/dist/core/self-evolution/policy/prediction-reconcile.js +191 -0
- package/dist/core/self-evolution/policy/reject-buffer.d.ts +55 -0
- package/dist/core/self-evolution/policy/reject-buffer.js +170 -0
- package/dist/core/self-evolution/promote.d.ts +1 -1
- package/dist/core/self-evolution/promote.js +6 -33
- package/dist/core/self-evolution/promotion.js +1 -2
- package/dist/core/self-evolution/reward-agent.d.ts +379 -0
- package/dist/core/self-evolution/reward-agent.js +940 -0
- package/dist/core/self-evolution/reward-aggregator.d.ts +59 -0
- package/dist/core/self-evolution/reward-aggregator.js +262 -0
- package/dist/core/self-evolution/scope-gate.d.ts +66 -0
- package/dist/core/self-evolution/scope-gate.js +107 -0
- package/dist/core/self-evolution/success-channel.js +2 -2
- package/dist/core/self-evolution/tamper-check.d.ts +24 -0
- package/dist/core/self-evolution/tamper-check.js +236 -0
- package/dist/core/self-evolution/tool-evolution.js +2 -13
- package/dist/core/self-evolution/verdict.d.ts +8 -5
- package/dist/core/self-evolution/verdict.js +4 -7
- package/dist/core/templates/workflows/gen-tests.js +1 -1
- package/dist/core/templates/workflows/learn.d.ts +3 -2
- package/dist/core/templates/workflows/learn.js +21 -18
- package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
- package/dist/core/templates/workflows/self-evolving.js +62 -172
- package/dist/core/trajectory/scrub.d.ts +27 -0
- package/dist/core/trajectory/scrub.js +79 -0
- package/dist/core/trajectory/skeleton.d.ts +27 -1
- package/dist/core/trajectory/skeleton.js +152 -8
- package/dist/dashboard/data.d.ts +25 -51
- package/dist/dashboard/data.js +68 -180
- package/dist/dashboard/react-client.js +458 -503
- package/dist/dashboard/react-styles.js +3 -3
- package/dist/dashboard/server.js +23 -17
- package/dist/ui/ascii-patterns.d.ts +7 -15
- package/dist/ui/ascii-patterns.js +123 -54
- package/dist/ui/welcome-screen.d.ts +0 -14
- package/dist/ui/welcome-screen.js +16 -35
- package/package.json +1 -1
- package/dist/core/self-evolution/ga-selection.d.ts +0 -94
- package/dist/core/self-evolution/ga-selection.js +0 -153
- package/dist/core/self-evolution/proposer-agent.d.ts +0 -182
- package/dist/core/self-evolution/proposer-agent.js +0 -326
- package/dist/core/self-evolution/replay-runner.d.ts +0 -100
- package/dist/core/self-evolution/replay-runner.js +0 -170
- package/dist/core/self-evolution/replay.d.ts +0 -45
- package/dist/core/self-evolution/replay.js +0 -56
- package/dist/core/self-evolution/template-variants.d.ts +0 -62
- package/dist/core/self-evolution/template-variants.js +0 -171
- package/dist/core/self-evolution/trajectory.d.ts +0 -65
- package/dist/core/self-evolution/trajectory.js +0 -185
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 奖励智能体 REWARD AGENT — loop v2 (self-evolution as in-context RL).
|
|
3
|
+
*
|
|
4
|
+
* LLM as judge. Reads ONE episode's two arms from the {@link import('./episode-store.js')}
|
|
5
|
+
* episode store and CALCULATES 算分 reward(主臂) and reward(基线臂), each in
|
|
6
|
+
* [0,1], anchored on the objective evidence on disk (tests · health · 轨迹度量).
|
|
7
|
+
* advantage = reward(主臂) − reward(基线臂). It finds errors / names gaps with a
|
|
8
|
+
* suggested direction = 文本梯度 textual gradient; it NEVER edits any file; when
|
|
9
|
+
* there is no nameable gap it 弃权 abstains.
|
|
10
|
+
*
|
|
11
|
+
* It is a SIBLING of the 演进智能体 EVOLVING AGENT (optimizer.step; ONE bounded
|
|
12
|
+
* edit ≤L; never scores) — never parent-child. The reward agent runs FIRST (it
|
|
13
|
+
* scores), the 演进智能体 runs AFTER (it edits); each is code-spawned in a fresh
|
|
14
|
+
* context via {@link runHeadlessAgent} from `./host-harness.js` — NOT a skill,
|
|
15
|
+
* the same headless-spawn seam the canonical proposer uses.
|
|
16
|
+
*
|
|
17
|
+
* Write boundary: this module's ONLY write path is the episode dir — it calls
|
|
18
|
+
* {@link writeDiagnosis} and {@link advanceEpisodeStage} (to `scored`). It never
|
|
19
|
+
* touches a canonical target file, a 策略 POLICY snapshot, or the change dir
|
|
20
|
+
* (which it reads, never copies). The loss/health numbers from
|
|
21
|
+
* `src/core/fitness/loss.ts` stay on disk as ANCHORS — the reward score itself
|
|
22
|
+
* is the JUDGE'S OWN, anchored on but not equal to the loss.
|
|
23
|
+
*
|
|
24
|
+
* Uses the same fenced-block agent idiom as the EVOLVING AGENT: one fenced-block
|
|
25
|
+
* output discipline, a bounded repair loop on parse/validation failure,
|
|
26
|
+
* fail-closed plain (Zod-free) validation, and atomic episode-store writes.
|
|
27
|
+
*/
|
|
28
|
+
import { spawn as nodeSpawn } from 'node:child_process';
|
|
29
|
+
import { type EpisodeRecord } from './episode-store.js';
|
|
30
|
+
export declare class RewardAgentOutputInvalid extends Error {
|
|
31
|
+
constructor(message: string);
|
|
32
|
+
}
|
|
33
|
+
export declare class RewardAgentInvocationError extends Error {
|
|
34
|
+
constructor(stderr: string);
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* The objective record each arm persists as `objective.json`, as the 奖励智能体
|
|
38
|
+
* REWARD AGENT READS it. A null-safe SUPERSET of the canonical on-disk
|
|
39
|
+
* {@link import('./critic-agent.js').ArmObjective} (which is the flat shape both
|
|
40
|
+
* arms WRITE): the MAIN ARM may instead carry the nested-learn fallback fields
|
|
41
|
+
* (`testMetrics`, `healthSignal`, a nested {@link import('../fitness/loss.js').PerChangeLoss}
|
|
42
|
+
* `loss`) when an older capture path wrote a raw {@link import('../fitness/sample.js').FitnessSample}.
|
|
43
|
+
* This module reads ONLY the fields it maps to anchors and is null-safe to
|
|
44
|
+
* either shape, so a divergence in the exact layout cannot crash the judge.
|
|
45
|
+
*
|
|
46
|
+
* Named distinctly from the canonical `ArmObjective` (the barrel re-exports the
|
|
47
|
+
* critic-agent declaration) so there is no ambiguous duplicate export.
|
|
48
|
+
*/
|
|
49
|
+
export interface RewardArmObjectiveInput {
|
|
50
|
+
/** Flat pass rate in [0,1] (CRITIC AGENT shape). */
|
|
51
|
+
passRate?: number | null;
|
|
52
|
+
testsTotal?: number | null;
|
|
53
|
+
testsFailed?: number | null;
|
|
54
|
+
/** Flat normalized health penalty in [0,1] (CRITIC AGENT shape). */
|
|
55
|
+
healthPenalty?: number | null;
|
|
56
|
+
/** Flat blended loss in [0,1] (CRITIC AGENT shape) OR nested PerChangeLoss (MAIN ARM shape). */
|
|
57
|
+
loss?: number | null | {
|
|
58
|
+
functionalLoss?: number | null;
|
|
59
|
+
healthPenalty?: number | null;
|
|
60
|
+
loss?: number | null;
|
|
61
|
+
};
|
|
62
|
+
verified?: boolean;
|
|
63
|
+
observedStatus?: string | null;
|
|
64
|
+
measuredAt?: string | null;
|
|
65
|
+
/** Whether a real test-runner invocation was OBSERVED (P2 confidence calibration). */
|
|
66
|
+
testRunObserved?: boolean;
|
|
67
|
+
/** Failing test ids + assertion lines parsed from the observed runner output (P1 contrast). */
|
|
68
|
+
observedFailures?: {
|
|
69
|
+
testId: string;
|
|
70
|
+
file?: string;
|
|
71
|
+
assertion?: string;
|
|
72
|
+
}[];
|
|
73
|
+
/** MAIN ARM shape (FitnessSample): functional metrics live under testMetrics. */
|
|
74
|
+
testMetrics?: {
|
|
75
|
+
passRate?: number | null;
|
|
76
|
+
} | null;
|
|
77
|
+
/** MAIN ARM shape (FitnessSample): the raw health signal, distinct from loss.healthPenalty. */
|
|
78
|
+
healthSignal?: number | null;
|
|
79
|
+
/** Additive/forward-compatible fields pass through unread. */
|
|
80
|
+
[key: string]: unknown;
|
|
81
|
+
}
|
|
82
|
+
/** The anchors block mapped from both arms' objectives (nulls where skipped). */
|
|
83
|
+
export interface DiagnosisAnchors {
|
|
84
|
+
mainLoss: number | null;
|
|
85
|
+
baselineLoss: number | null;
|
|
86
|
+
mainPassRate: number | null;
|
|
87
|
+
baselinePassRate: number | null;
|
|
88
|
+
mainHealthPenalty: number | null;
|
|
89
|
+
baselineHealthPenalty: number | null;
|
|
90
|
+
/**
|
|
91
|
+
* The VERBOSITY component of the code-health signal, surfaced separately so
|
|
92
|
+
* the judge can apply a compression term (① composite reward). `null` when no
|
|
93
|
+
* verbosity sub-signal was captured — the rubric then judges verbosity from
|
|
94
|
+
* the artifacts alone. Additive/optional: absent on schemaVersion-1 captures.
|
|
95
|
+
*/
|
|
96
|
+
mainVerbosity?: number | null;
|
|
97
|
+
baselineVerbosity?: number | null;
|
|
98
|
+
}
|
|
99
|
+
/** One named error the judge found, addressed to a quoted span in a real file. */
|
|
100
|
+
export interface DiagnosisError {
|
|
101
|
+
arm: 'main' | 'baseline';
|
|
102
|
+
description: string;
|
|
103
|
+
evidence: {
|
|
104
|
+
file: string;
|
|
105
|
+
quote: string;
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* The failure mode of a gap (⑥ weakness-class), enabling the 演进智能体 EVOLVING
|
|
110
|
+
* AGENT to aim its bounded edit: `forgetting` (a capability the baseline had and
|
|
111
|
+
* the main arm lost), `boundary` (an edge/limit case), `rare` (a low-frequency
|
|
112
|
+
* scenario), `logic` (an outright wrong behavior), `verbosity` (bloat/redundancy
|
|
113
|
+
* to prune), or `other`.
|
|
114
|
+
*/
|
|
115
|
+
export type WeaknessClass = 'forgetting' | 'boundary' | 'rare' | 'logic' | 'verbosity' | 'other';
|
|
116
|
+
export type GapSeverity = 'high' | 'medium' | 'low';
|
|
117
|
+
/** One nameable gap, addressed to a heading (`section`) in a target file. */
|
|
118
|
+
export interface DiagnosisGap {
|
|
119
|
+
file: string;
|
|
120
|
+
/** A heading in the target file; the `'*'` wildcard is allowed. */
|
|
121
|
+
section: string;
|
|
122
|
+
description: string;
|
|
123
|
+
/** ⑥ Optional weakness-class — the failure mode this gap represents. */
|
|
124
|
+
weaknessClass?: WeaknessClass;
|
|
125
|
+
/** ⑥ Optional severity — high-severity gaps are addressed first. */
|
|
126
|
+
severity?: GapSeverity;
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* The judge's overall verdict (⑤ ternary). `no-gap` is the existing abstain
|
|
130
|
+
* (nothing to improve); `insufficient-signal` is the NEW honest abstain (the
|
|
131
|
+
* judge cannot tell — set by the statistical layer when the advantage is within
|
|
132
|
+
* the A/A noise floor, or by the tamper check in block mode). `main-better` /
|
|
133
|
+
* `baseline-better` / `tie` are scored outcomes.
|
|
134
|
+
*/
|
|
135
|
+
export type RewardVerdict = 'main-better' | 'baseline-better' | 'tie' | 'insufficient-signal' | 'no-gap';
|
|
136
|
+
/** ② Statistical summary, populated by the reward aggregator (absent for single-sample). */
|
|
137
|
+
export interface DiagnosisStats {
|
|
138
|
+
samples: number;
|
|
139
|
+
advantageMean: number | null;
|
|
140
|
+
advantageStdev: number | null;
|
|
141
|
+
/** Judge jitter measured by an A/A pair; `|advantageMean| < noiseFloor` ⇒ insufficient-signal. */
|
|
142
|
+
noiseFloor: number | null;
|
|
143
|
+
sequentialDecision: 'accept' | 'reject' | 'continue' | 'single';
|
|
144
|
+
}
|
|
145
|
+
/** ④ Integrity / anti-hacking signals. */
|
|
146
|
+
export interface DiagnosisIntegrity {
|
|
147
|
+
/** A test-tamper signal was detected on the main arm (Batch 4 wires the detector). */
|
|
148
|
+
testTamperSuspected: boolean;
|
|
149
|
+
/**
|
|
150
|
+
* Signed disagreement between the judge's advantage and the loss-implied
|
|
151
|
+
* advantage (baselineLoss − mainLoss): the judge loved an arm the verifier
|
|
152
|
+
* dislikes. `null` when either loss anchor is missing.
|
|
153
|
+
*/
|
|
154
|
+
judgeVerifierDivergence: number | null;
|
|
155
|
+
/** Human-readable integrity flags (empty when clean). */
|
|
156
|
+
flags: string[];
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* The `diagnosis.json` the 奖励智能体 REWARD AGENT writes. schemaVersion 2 adds
|
|
160
|
+
* the OPTIONAL `verdict` / `confidence` / `stats` / `integrity` fields (a
|
|
161
|
+
* schemaVersion-1 reader ignores them; a schemaVersion-2 reader tolerates their
|
|
162
|
+
* absence). advantage = reward(主臂) − reward(基线臂); `null` when the baseline
|
|
163
|
+
* arm was skipped (no comparison possible) OR when the judge 弃权 abstained.
|
|
164
|
+
*/
|
|
165
|
+
export interface RewardDiagnosis {
|
|
166
|
+
schemaVersion: 1 | 2;
|
|
167
|
+
episodeId: string;
|
|
168
|
+
changeName: string;
|
|
169
|
+
targetId: string;
|
|
170
|
+
policyVersions: {
|
|
171
|
+
main: number | null;
|
|
172
|
+
baseline: number | null;
|
|
173
|
+
};
|
|
174
|
+
rewardMain: number;
|
|
175
|
+
/** null when the baseline arm was skipped. */
|
|
176
|
+
rewardBaseline: number | null;
|
|
177
|
+
/** reward(主臂) − reward(基线臂); null when baseline skipped or abstained. */
|
|
178
|
+
advantage: number | null;
|
|
179
|
+
anchors: DiagnosisAnchors;
|
|
180
|
+
errors: DiagnosisError[];
|
|
181
|
+
gaps: DiagnosisGap[];
|
|
182
|
+
/** 文本梯度 textual gradient; null only when abstained. */
|
|
183
|
+
textualGradient: string | null;
|
|
184
|
+
abstained: boolean;
|
|
185
|
+
abstainReason?: string;
|
|
186
|
+
/** ⑤ Overall verdict (optional; derived single-sample, set by the aggregator otherwise). */
|
|
187
|
+
verdict?: RewardVerdict;
|
|
188
|
+
/** ⑤/② Confidence in [0,1] (optional; set by the statistical layer). */
|
|
189
|
+
confidence?: number | null;
|
|
190
|
+
/** ② Statistical summary (optional; absent for single-sample). */
|
|
191
|
+
stats?: DiagnosisStats;
|
|
192
|
+
/** ④ Integrity / anti-hacking signals (optional). */
|
|
193
|
+
integrity?: DiagnosisIntegrity;
|
|
194
|
+
}
|
|
195
|
+
/** Assembled input for {@link assembleRewardAgentPrompt}. */
|
|
196
|
+
export interface RewardAgentPromptInput {
|
|
197
|
+
changeName: string;
|
|
198
|
+
targetId: string;
|
|
199
|
+
policyVersions: {
|
|
200
|
+
main: number | null;
|
|
201
|
+
baseline: number | null;
|
|
202
|
+
};
|
|
203
|
+
/** 主智能体 MAIN AGENT (policy vN+1) capture. */
|
|
204
|
+
mainArm: {
|
|
205
|
+
skeleton: object | null;
|
|
206
|
+
/** Raw transcript text (jsonl) — bounded by {@link assembleRewardAgentPrompt}. */
|
|
207
|
+
transcript: string | null;
|
|
208
|
+
objective: RewardArmObjectiveInput;
|
|
209
|
+
};
|
|
210
|
+
/**
|
|
211
|
+
* CRITIC AGENT(基线智能体 baseline agent, policy vN)capture. `null` when the
|
|
212
|
+
* baseline arm was SKIPPED — the BASELINE ARM block is OMITTED entirely and
|
|
213
|
+
* the prompt states no comparison is possible (null rewardBaseline/advantage).
|
|
214
|
+
*/
|
|
215
|
+
baselineArm: {
|
|
216
|
+
skeleton: object | null;
|
|
217
|
+
transcript: string | null;
|
|
218
|
+
objective: RewardArmObjectiveInput;
|
|
219
|
+
} | null;
|
|
220
|
+
/** Bounded excerpts of the 5 artifacts + test-report.md read from the change dir. */
|
|
221
|
+
artifacts: {
|
|
222
|
+
file: string;
|
|
223
|
+
content: string;
|
|
224
|
+
}[];
|
|
225
|
+
/** Pre-mapped anchors (loss/health/passRate) from both arms. */
|
|
226
|
+
anchors: DiagnosisAnchors;
|
|
227
|
+
/**
|
|
228
|
+
* ③ Order in which the two arms are presented to the judge. Swapping the order
|
|
229
|
+
* across samples cancels the LLM's position bias. Defaults to `main-first`
|
|
230
|
+
* (the historical order; single-sample callers keep byte-identical prompts).
|
|
231
|
+
*/
|
|
232
|
+
armOrder?: 'main-first' | 'baseline-first';
|
|
233
|
+
/**
|
|
234
|
+
* ④ An optional tamper signal computed BEFORE scoring (Batch 4). When present
|
|
235
|
+
* and `suspected`, the judge is told not to reward passing tests that were
|
|
236
|
+
* weakened. Omitted ⇒ the prompt is unchanged.
|
|
237
|
+
*/
|
|
238
|
+
integrityHint?: {
|
|
239
|
+
suspected: boolean;
|
|
240
|
+
flags: string[];
|
|
241
|
+
} | null;
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Assemble the 奖励智能体 REWARD AGENT prompt. Pure (no I/O); exported for golden
|
|
245
|
+
* tests. Ordered blocks:
|
|
246
|
+
* 1. PRELUDE — the judge contract.
|
|
247
|
+
* 2. MAIN ARM (主智能体, policy vN+1) — skeleton + bounded transcript excerpt.
|
|
248
|
+
* 3. BASELINE ARM (CRITIC AGENT(基线智能体), policy vN) — same; OMITTED
|
|
249
|
+
* ENTIRELY when the baseline arm was skipped (a one-line note replaces it,
|
|
250
|
+
* demanding null rewardBaseline/advantage).
|
|
251
|
+
* 4. ARTIFACTS — the 5 artifacts + test-report.md, bounded excerpts.
|
|
252
|
+
* 5. OBJECTIVE EVIDENCE — both arms' anchors verbatim (the on-disk loss/health
|
|
253
|
+
* numbers that anchor the score).
|
|
254
|
+
*/
|
|
255
|
+
export declare function assembleRewardAgentPrompt(input: RewardAgentPromptInput): string;
|
|
256
|
+
interface ParsedDiagnosis {
|
|
257
|
+
rewardMain: number;
|
|
258
|
+
rewardBaseline: number | null;
|
|
259
|
+
advantage: number | null;
|
|
260
|
+
errors: DiagnosisError[];
|
|
261
|
+
gaps: DiagnosisGap[];
|
|
262
|
+
textualGradient: string | null;
|
|
263
|
+
abstained: boolean;
|
|
264
|
+
abstainReason?: string;
|
|
265
|
+
/** ⑤ Optional judge-emitted verdict (validated against the enum when present). */
|
|
266
|
+
verdict?: RewardVerdict;
|
|
267
|
+
/** ⑤ Optional judge-emitted confidence in [0,1]. */
|
|
268
|
+
confidence?: number | null;
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Parse the judge's `json:diagnosis` block with a strict one-block discipline:
|
|
272
|
+
* exactly one fenced block, well-formed JSON, then fail-closed shape + range
|
|
273
|
+
* validation.
|
|
274
|
+
*
|
|
275
|
+
* Throws {@link RewardAgentOutputInvalid} on any violation (the repair loop
|
|
276
|
+
* re-prompts with the concrete message appended).
|
|
277
|
+
*/
|
|
278
|
+
export declare function parseRewardAgentResponse(text: string): ParsedDiagnosis;
|
|
279
|
+
/** Map an arm's objective to its (loss, passRate, healthPenalty, verbosity) anchors. */
|
|
280
|
+
export declare function mapArmAnchors(objective: RewardArmObjectiveInput | null | undefined): {
|
|
281
|
+
loss: number | null;
|
|
282
|
+
passRate: number | null;
|
|
283
|
+
healthPenalty: number | null;
|
|
284
|
+
verbosity: number | null;
|
|
285
|
+
};
|
|
286
|
+
/** Build the {@link DiagnosisAnchors} block from both arms' objectives. */
|
|
287
|
+
export declare function buildAnchors(mainObjective: RewardArmObjectiveInput, baselineObjective: RewardArmObjectiveInput | null): DiagnosisAnchors;
|
|
288
|
+
export interface RunRewardAgentOptions {
|
|
289
|
+
repoRoot: string;
|
|
290
|
+
episodeId: string;
|
|
291
|
+
/** Injected for tests; defaults to node's spawn. */
|
|
292
|
+
spawn?: typeof nodeSpawn;
|
|
293
|
+
/** Override the agent binary; defaults to the host harness's default. */
|
|
294
|
+
binary?: string;
|
|
295
|
+
/** Bounded re-prompts on parse/validation failure (default 2 ⇒ at most 3 spawns). */
|
|
296
|
+
maxRepairAttempts?: number;
|
|
297
|
+
}
|
|
298
|
+
export interface RunRewardAgentResult {
|
|
299
|
+
diagnosis: RewardDiagnosis;
|
|
300
|
+
/** Absolute path of the written `diagnosis.json`. */
|
|
301
|
+
diagnosisPath: string;
|
|
302
|
+
/** The episode record after advancing to `scored`. */
|
|
303
|
+
episode: EpisodeRecord;
|
|
304
|
+
}
|
|
305
|
+
export interface ScoreOnceOptions {
|
|
306
|
+
/** The fully-assembled prompt input (carries arms, anchors, armOrder, integrityHint). */
|
|
307
|
+
promptInput: RewardAgentPromptInput;
|
|
308
|
+
/** Whether the baseline arm was skipped (drives the null-baseline contract). */
|
|
309
|
+
baselineSkipped: boolean;
|
|
310
|
+
/** cwd for the headless spawn (the repo/change root). */
|
|
311
|
+
repoRoot: string;
|
|
312
|
+
spawn?: typeof nodeSpawn;
|
|
313
|
+
binary?: string;
|
|
314
|
+
/** Bounded re-prompts on parse/validation failure (default 2 ⇒ at most 3 spawns). */
|
|
315
|
+
maxRepairAttempts?: number;
|
|
316
|
+
}
|
|
317
|
+
export interface ScoreOnceResult {
|
|
318
|
+
parsed: ParsedDiagnosis;
|
|
319
|
+
/** ④ Integrity signals computed from this duel's parse + anchors + tamper hint. */
|
|
320
|
+
integrity: DiagnosisIntegrity;
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Score ONE judged duel: spawn the judge (fresh context), parse with the bounded
|
|
324
|
+
* repair loop (re-prompting with the concrete error appended), recompute and
|
|
325
|
+
* validate the advantage (incl. ① gate-not-blend), and compute ④ integrity
|
|
326
|
+
* signals. Does NOT write `diagnosis.json` — the caller (single-sample
|
|
327
|
+
* {@link runRewardAgent}, or the statistical aggregator) owns the write. This is
|
|
328
|
+
* the unit the aggregator calls k times for the A/A noise floor + SPRT.
|
|
329
|
+
*/
|
|
330
|
+
export declare function scoreOnce(opts: ScoreOnceOptions): Promise<ScoreOnceResult>;
|
|
331
|
+
/** The read-side scoring context for one episode: arms, anchors, assembled prompt input. */
|
|
332
|
+
export interface RewardScoringContext {
|
|
333
|
+
episode: EpisodeRecord;
|
|
334
|
+
baselineSkipped: boolean;
|
|
335
|
+
/** Prompt input with `armOrder` defaulting to `main-first` and no integrity hint set. */
|
|
336
|
+
promptInput: RewardAgentPromptInput;
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Load everything the judge needs to score one episode WITHOUT spawning: read
|
|
340
|
+
* the episode + both arms (baseline omitted when skipped), map anchors, and read
|
|
341
|
+
* the change artifacts. Shared by {@link runRewardAgent} (single sample) and the
|
|
342
|
+
* statistical aggregator (which calls {@link scoreOnce} k times over the same
|
|
343
|
+
* context with swapped `armOrder`).
|
|
344
|
+
*/
|
|
345
|
+
export declare function loadRewardScoringContext(repoRoot: string, episodeId: string): Promise<RewardScoringContext>;
|
|
346
|
+
/**
|
|
347
|
+
* Run the 奖励智能体 REWARD AGENT end-to-end for one episode (single sample):
|
|
348
|
+
* 1. read the episode + both arms (baseline omitted when skipped),
|
|
349
|
+
* 2. map anchors and assemble the prompt,
|
|
350
|
+
* 3. {@link scoreOnce} — spawn the judge, parse with the bounded repair loop,
|
|
351
|
+
* recompute/validate the advantage (incl. ① gate-not-blend), compute ④
|
|
352
|
+
* integrity,
|
|
353
|
+
* 4. derive the ⑤ single-sample verdict and write `diagnosis.json` (schema 2)
|
|
354
|
+
* via {@link writeDiagnosis}, then advance the episode stage to `scored`.
|
|
355
|
+
*
|
|
356
|
+
* Behaviour is byte-compatible with the historical single-call path: no extra
|
|
357
|
+
* spawns, `armOrder` defaults to `main-first`, and the new schema-2 fields are
|
|
358
|
+
* OPTIONAL (a reader that ignores them sees the same diagnosis). The statistical
|
|
359
|
+
* layer (Batch 3) wraps {@link scoreOnce} instead of calling this directly.
|
|
360
|
+
*
|
|
361
|
+
* The ONLY write path is the episode dir. Invocation errors (agent crash) are
|
|
362
|
+
* NOT repaired — they propagate as {@link RewardAgentInvocationError}.
|
|
363
|
+
*/
|
|
364
|
+
export declare function runRewardAgent(opts: RunRewardAgentOptions): Promise<RunRewardAgentResult>;
|
|
365
|
+
/**
|
|
366
|
+
* Derive the ⑤ single-sample verdict. A judge-emitted `verdict` wins; otherwise
|
|
367
|
+
* it is read off the advantage sign (no-gap when abstained; undefined when the
|
|
368
|
+
* baseline was skipped and only the main arm was scored). The statistical layer
|
|
369
|
+
* overrides this with `insufficient-signal` when the advantage is within the
|
|
370
|
+
* A/A noise floor.
|
|
371
|
+
*/
|
|
372
|
+
export declare function deriveSingleSampleVerdict(parsed: ParsedDiagnosis): RewardVerdict | undefined;
|
|
373
|
+
/** ④ Compute integrity signals from a parsed duel, its anchors, and the tamper hint. */
|
|
374
|
+
export declare function computeIntegrity(parsed: ParsedDiagnosis, anchors: DiagnosisAnchors, integrityHint: {
|
|
375
|
+
suspected: boolean;
|
|
376
|
+
flags: string[];
|
|
377
|
+
} | null): DiagnosisIntegrity;
|
|
378
|
+
export {};
|
|
379
|
+
//# sourceMappingURL=reward-agent.d.ts.map
|