synergyspec-selfevolving 1.4.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +31 -18
  2. package/dist/commands/learn.d.ts +12 -1
  3. package/dist/commands/learn.js +158 -11
  4. package/dist/commands/self-evolution-episode.d.ts +177 -0
  5. package/dist/commands/self-evolution-episode.js +431 -0
  6. package/dist/commands/self-evolution.d.ts +12 -190
  7. package/dist/commands/self-evolution.js +114 -866
  8. package/dist/core/archive.d.ts +0 -1
  9. package/dist/core/archive.js +0 -58
  10. package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
  11. package/dist/core/artifact-graph/instruction-loader.js +3 -31
  12. package/dist/core/fitness/loss.d.ts +5 -5
  13. package/dist/core/fitness/loss.js +4 -4
  14. package/dist/core/fitness/test-failures.js +10 -2
  15. package/dist/core/project-config.d.ts +19 -0
  16. package/dist/core/project-config.js +96 -0
  17. package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
  18. package/dist/core/self-evolution/candidate-fitness.js +31 -5
  19. package/dist/core/self-evolution/candidates.d.ts +0 -9
  20. package/dist/core/self-evolution/critic-agent.d.ts +192 -0
  21. package/dist/core/self-evolution/critic-agent.js +568 -0
  22. package/dist/core/self-evolution/edits-contract.d.ts +53 -0
  23. package/dist/core/self-evolution/edits-contract.js +89 -0
  24. package/dist/core/self-evolution/episode-orchestrator.d.ts +234 -0
  25. package/dist/core/self-evolution/episode-orchestrator.js +681 -0
  26. package/dist/core/self-evolution/episode-store.d.ts +266 -0
  27. package/dist/core/self-evolution/episode-store.js +573 -0
  28. package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
  29. package/dist/core/self-evolution/evolution-switches.js +5 -10
  30. package/dist/core/self-evolution/evolving-agent.d.ts +208 -0
  31. package/dist/core/self-evolution/evolving-agent.js +535 -0
  32. package/dist/core/self-evolution/host-harness.d.ts +14 -15
  33. package/dist/core/self-evolution/host-harness.js +48 -23
  34. package/dist/core/self-evolution/index.d.ts +11 -6
  35. package/dist/core/self-evolution/index.js +20 -6
  36. package/dist/core/self-evolution/line-diff.d.ts +60 -0
  37. package/dist/core/self-evolution/line-diff.js +130 -0
  38. package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
  39. package/dist/core/self-evolution/policy/fs-safe.js +89 -0
  40. package/dist/core/self-evolution/policy/index.d.ts +13 -0
  41. package/dist/core/self-evolution/policy/index.js +13 -0
  42. package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
  43. package/dist/core/self-evolution/policy/policy-store.js +774 -0
  44. package/dist/core/self-evolution/policy/prediction-reconcile.d.ts +54 -0
  45. package/dist/core/self-evolution/policy/prediction-reconcile.js +191 -0
  46. package/dist/core/self-evolution/policy/reject-buffer.d.ts +55 -0
  47. package/dist/core/self-evolution/policy/reject-buffer.js +170 -0
  48. package/dist/core/self-evolution/promote.d.ts +1 -1
  49. package/dist/core/self-evolution/promote.js +6 -33
  50. package/dist/core/self-evolution/promotion.js +1 -2
  51. package/dist/core/self-evolution/reward-agent.d.ts +379 -0
  52. package/dist/core/self-evolution/reward-agent.js +940 -0
  53. package/dist/core/self-evolution/reward-aggregator.d.ts +59 -0
  54. package/dist/core/self-evolution/reward-aggregator.js +262 -0
  55. package/dist/core/self-evolution/scope-gate.d.ts +66 -0
  56. package/dist/core/self-evolution/scope-gate.js +107 -0
  57. package/dist/core/self-evolution/success-channel.js +2 -2
  58. package/dist/core/self-evolution/tamper-check.d.ts +24 -0
  59. package/dist/core/self-evolution/tamper-check.js +236 -0
  60. package/dist/core/self-evolution/tool-evolution.js +2 -13
  61. package/dist/core/self-evolution/verdict.d.ts +8 -5
  62. package/dist/core/self-evolution/verdict.js +4 -7
  63. package/dist/core/templates/workflows/gen-tests.js +1 -1
  64. package/dist/core/templates/workflows/learn.d.ts +3 -2
  65. package/dist/core/templates/workflows/learn.js +21 -18
  66. package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
  67. package/dist/core/templates/workflows/self-evolving.js +62 -172
  68. package/dist/core/trajectory/scrub.d.ts +27 -0
  69. package/dist/core/trajectory/scrub.js +79 -0
  70. package/dist/core/trajectory/skeleton.d.ts +27 -1
  71. package/dist/core/trajectory/skeleton.js +152 -8
  72. package/dist/dashboard/data.d.ts +25 -51
  73. package/dist/dashboard/data.js +68 -180
  74. package/dist/dashboard/react-client.js +458 -503
  75. package/dist/dashboard/react-styles.js +3 -3
  76. package/dist/dashboard/server.js +23 -17
  77. package/dist/ui/ascii-patterns.d.ts +7 -15
  78. package/dist/ui/ascii-patterns.js +123 -54
  79. package/dist/ui/welcome-screen.d.ts +0 -14
  80. package/dist/ui/welcome-screen.js +16 -35
  81. package/package.json +1 -1
  82. package/dist/core/self-evolution/ga-selection.d.ts +0 -94
  83. package/dist/core/self-evolution/ga-selection.js +0 -153
  84. package/dist/core/self-evolution/proposer-agent.d.ts +0 -182
  85. package/dist/core/self-evolution/proposer-agent.js +0 -326
  86. package/dist/core/self-evolution/replay-runner.d.ts +0 -100
  87. package/dist/core/self-evolution/replay-runner.js +0 -170
  88. package/dist/core/self-evolution/replay.d.ts +0 -45
  89. package/dist/core/self-evolution/replay.js +0 -56
  90. package/dist/core/self-evolution/template-variants.d.ts +0 -62
  91. package/dist/core/self-evolution/template-variants.js +0 -171
  92. package/dist/core/self-evolution/trajectory.d.ts +0 -65
  93. package/dist/core/self-evolution/trajectory.js +0 -185
@@ -0,0 +1,379 @@
1
+ /**
2
+ * 奖励智能体 REWARD AGENT — loop v2 (self-evolution as in-context RL).
3
+ *
4
+ * LLM as judge. Reads ONE episode's two arms from the {@link import('./episode-store.js')}
5
+ * episode store and CALCULATES 算分 reward(主臂) and reward(基线臂), each in
6
+ * [0,1], anchored on the objective evidence on disk (tests · health · 轨迹度量).
7
+ * advantage = reward(主臂) − reward(基线臂). It finds errors / names gaps with a
8
+ * suggested direction = 文本梯度 textual gradient; it NEVER edits any file; when
9
+ * there is no nameable gap it 弃权 abstains.
10
+ *
11
+ * It is a SIBLING of the 演进智能体 EVOLVING AGENT (optimizer.step; ONE bounded
12
+ * edit ≤L; never scores) — never parent-child. The reward agent runs FIRST (it
13
+ * scores), the 演进智能体 runs AFTER (it edits); each is code-spawned in a fresh
14
+ * context via {@link runHeadlessAgent} from `./host-harness.js` — NOT a skill,
15
+ * the same headless-spawn seam the canonical proposer uses.
16
+ *
17
+ * Write boundary: this module's ONLY write path is the episode dir — it calls
18
+ * {@link writeDiagnosis} and {@link advanceEpisodeStage} (to `scored`). It never
19
+ * touches a canonical target file, a 策略 POLICY snapshot, or the change dir
20
+ * (which it reads, never copies). The loss/health numbers from
21
+ * `src/core/fitness/loss.ts` stay on disk as ANCHORS — the reward score itself
22
+ * is the JUDGE'S OWN, anchored on but not equal to the loss.
23
+ *
24
+ * Uses the same fenced-block agent idiom as the EVOLVING AGENT: one fenced-block
25
+ * output discipline, a bounded repair loop on parse/validation failure,
26
+ * fail-closed plain (Zod-free) validation, and atomic episode-store writes.
27
+ */
28
+ import { spawn as nodeSpawn } from 'node:child_process';
29
+ import { type EpisodeRecord } from './episode-store.js';
30
+ export declare class RewardAgentOutputInvalid extends Error {
31
+ constructor(message: string);
32
+ }
33
+ export declare class RewardAgentInvocationError extends Error {
34
+ constructor(stderr: string);
35
+ }
36
+ /**
37
+ * The objective record each arm persists as `objective.json`, as the 奖励智能体
38
+ * REWARD AGENT READS it. A null-safe SUPERSET of the canonical on-disk
39
+ * {@link import('./critic-agent.js').ArmObjective} (which is the flat shape both
40
+ * arms WRITE): the MAIN ARM may instead carry the nested-learn fallback fields
41
+ * (`testMetrics`, `healthSignal`, a nested {@link import('../fitness/loss.js').PerChangeLoss}
42
+ * `loss`) when an older capture path wrote a raw {@link import('../fitness/sample.js').FitnessSample}.
43
+ * This module reads ONLY the fields it maps to anchors and is null-safe to
44
+ * either shape, so a divergence in the exact layout cannot crash the judge.
45
+ *
46
+ * Named distinctly from the canonical `ArmObjective` (the barrel re-exports the
47
+ * critic-agent declaration) so there is no ambiguous duplicate export.
48
+ */
49
+ export interface RewardArmObjectiveInput {
50
+ /** Flat pass rate in [0,1] (CRITIC AGENT shape). */
51
+ passRate?: number | null;
52
+ testsTotal?: number | null;
53
+ testsFailed?: number | null;
54
+ /** Flat normalized health penalty in [0,1] (CRITIC AGENT shape). */
55
+ healthPenalty?: number | null;
56
+ /** Flat blended loss in [0,1] (CRITIC AGENT shape) OR nested PerChangeLoss (MAIN ARM shape). */
57
+ loss?: number | null | {
58
+ functionalLoss?: number | null;
59
+ healthPenalty?: number | null;
60
+ loss?: number | null;
61
+ };
62
+ verified?: boolean;
63
+ observedStatus?: string | null;
64
+ measuredAt?: string | null;
65
+ /** Whether a real test-runner invocation was OBSERVED (P2 confidence calibration). */
66
+ testRunObserved?: boolean;
67
+ /** Failing test ids + assertion lines parsed from the observed runner output (P1 contrast). */
68
+ observedFailures?: {
69
+ testId: string;
70
+ file?: string;
71
+ assertion?: string;
72
+ }[];
73
+ /** MAIN ARM shape (FitnessSample): functional metrics live under testMetrics. */
74
+ testMetrics?: {
75
+ passRate?: number | null;
76
+ } | null;
77
+ /** MAIN ARM shape (FitnessSample): the raw health signal, distinct from loss.healthPenalty. */
78
+ healthSignal?: number | null;
79
+ /** Additive/forward-compatible fields pass through unread. */
80
+ [key: string]: unknown;
81
+ }
82
+ /** The anchors block mapped from both arms' objectives (nulls where skipped). */
83
+ export interface DiagnosisAnchors {
84
+ mainLoss: number | null;
85
+ baselineLoss: number | null;
86
+ mainPassRate: number | null;
87
+ baselinePassRate: number | null;
88
+ mainHealthPenalty: number | null;
89
+ baselineHealthPenalty: number | null;
90
+ /**
91
+ * The VERBOSITY component of the code-health signal, surfaced separately so
92
+ * the judge can apply a compression term (① composite reward). `null` when no
93
+ * verbosity sub-signal was captured — the rubric then judges verbosity from
94
+ * the artifacts alone. Additive/optional: absent on schemaVersion-1 captures.
95
+ */
96
+ mainVerbosity?: number | null;
97
+ baselineVerbosity?: number | null;
98
+ }
99
+ /** One named error the judge found, addressed to a quoted span in a real file. */
100
+ export interface DiagnosisError {
101
+ arm: 'main' | 'baseline';
102
+ description: string;
103
+ evidence: {
104
+ file: string;
105
+ quote: string;
106
+ };
107
+ }
108
+ /**
109
+ * The failure mode of a gap (⑥ weakness-class), enabling the 演进智能体 EVOLVING
110
+ * AGENT to aim its bounded edit: `forgetting` (a capability the baseline had and
111
+ * the main arm lost), `boundary` (an edge/limit case), `rare` (a low-frequency
112
+ * scenario), `logic` (an outright wrong behavior), `verbosity` (bloat/redundancy
113
+ * to prune), or `other`.
114
+ */
115
+ export type WeaknessClass = 'forgetting' | 'boundary' | 'rare' | 'logic' | 'verbosity' | 'other';
116
+ export type GapSeverity = 'high' | 'medium' | 'low';
117
+ /** One nameable gap, addressed to a heading (`section`) in a target file. */
118
+ export interface DiagnosisGap {
119
+ file: string;
120
+ /** A heading in the target file; the `'*'` wildcard is allowed. */
121
+ section: string;
122
+ description: string;
123
+ /** ⑥ Optional weakness-class — the failure mode this gap represents. */
124
+ weaknessClass?: WeaknessClass;
125
+ /** ⑥ Optional severity — high-severity gaps are addressed first. */
126
+ severity?: GapSeverity;
127
+ }
128
+ /**
129
+ * The judge's overall verdict (⑤ ternary). `no-gap` is the existing abstain
130
+ * (nothing to improve); `insufficient-signal` is the NEW honest abstain (the
131
+ * judge cannot tell — set by the statistical layer when the advantage is within
132
+ * the A/A noise floor, or by the tamper check in block mode). `main-better` /
133
+ * `baseline-better` / `tie` are scored outcomes.
134
+ */
135
+ export type RewardVerdict = 'main-better' | 'baseline-better' | 'tie' | 'insufficient-signal' | 'no-gap';
136
+ /** ② Statistical summary, populated by the reward aggregator (absent for single-sample). */
137
+ export interface DiagnosisStats {
138
+ samples: number;
139
+ advantageMean: number | null;
140
+ advantageStdev: number | null;
141
+ /** Judge jitter measured by an A/A pair; `|advantageMean| < noiseFloor` ⇒ insufficient-signal. */
142
+ noiseFloor: number | null;
143
+ sequentialDecision: 'accept' | 'reject' | 'continue' | 'single';
144
+ }
145
+ /** ④ Integrity / anti-hacking signals. */
146
+ export interface DiagnosisIntegrity {
147
+ /** A test-tamper signal was detected on the main arm (Batch 4 wires the detector). */
148
+ testTamperSuspected: boolean;
149
+ /**
150
+ * Signed disagreement between the judge's advantage and the loss-implied
151
+ * advantage (baselineLoss − mainLoss): the judge loved an arm the verifier
152
+ * dislikes. `null` when either loss anchor is missing.
153
+ */
154
+ judgeVerifierDivergence: number | null;
155
+ /** Human-readable integrity flags (empty when clean). */
156
+ flags: string[];
157
+ }
158
+ /**
159
+ * The `diagnosis.json` the 奖励智能体 REWARD AGENT writes. schemaVersion 2 adds
160
+ * the OPTIONAL `verdict` / `confidence` / `stats` / `integrity` fields (a
161
+ * schemaVersion-1 reader ignores them; a schemaVersion-2 reader tolerates their
162
+ * absence). advantage = reward(主臂) − reward(基线臂); `null` when the baseline
163
+ * arm was skipped (no comparison possible) OR when the judge 弃权 abstained.
164
+ */
165
+ export interface RewardDiagnosis {
166
+ schemaVersion: 1 | 2;
167
+ episodeId: string;
168
+ changeName: string;
169
+ targetId: string;
170
+ policyVersions: {
171
+ main: number | null;
172
+ baseline: number | null;
173
+ };
174
+ rewardMain: number;
175
+ /** null when the baseline arm was skipped. */
176
+ rewardBaseline: number | null;
177
+ /** reward(主臂) − reward(基线臂); null when baseline skipped or abstained. */
178
+ advantage: number | null;
179
+ anchors: DiagnosisAnchors;
180
+ errors: DiagnosisError[];
181
+ gaps: DiagnosisGap[];
182
+ /** 文本梯度 textual gradient; null only when abstained. */
183
+ textualGradient: string | null;
184
+ abstained: boolean;
185
+ abstainReason?: string;
186
+ /** ⑤ Overall verdict (optional; derived single-sample, set by the aggregator otherwise). */
187
+ verdict?: RewardVerdict;
188
+ /** ⑤/② Confidence in [0,1] (optional; set by the statistical layer). */
189
+ confidence?: number | null;
190
+ /** ② Statistical summary (optional; absent for single-sample). */
191
+ stats?: DiagnosisStats;
192
+ /** ④ Integrity / anti-hacking signals (optional). */
193
+ integrity?: DiagnosisIntegrity;
194
+ }
195
+ /** Assembled input for {@link assembleRewardAgentPrompt}. */
196
+ export interface RewardAgentPromptInput {
197
+ changeName: string;
198
+ targetId: string;
199
+ policyVersions: {
200
+ main: number | null;
201
+ baseline: number | null;
202
+ };
203
+ /** 主智能体 MAIN AGENT (policy vN+1) capture. */
204
+ mainArm: {
205
+ skeleton: object | null;
206
+ /** Raw transcript text (jsonl) — bounded by {@link assembleRewardAgentPrompt}. */
207
+ transcript: string | null;
208
+ objective: RewardArmObjectiveInput;
209
+ };
210
+ /**
211
+ * CRITIC AGENT(基线智能体 baseline agent, policy vN)capture. `null` when the
212
+ * baseline arm was SKIPPED — the BASELINE ARM block is OMITTED entirely and
213
+ * the prompt states no comparison is possible (null rewardBaseline/advantage).
214
+ */
215
+ baselineArm: {
216
+ skeleton: object | null;
217
+ transcript: string | null;
218
+ objective: RewardArmObjectiveInput;
219
+ } | null;
220
+ /** Bounded excerpts of the 5 artifacts + test-report.md read from the change dir. */
221
+ artifacts: {
222
+ file: string;
223
+ content: string;
224
+ }[];
225
+ /** Pre-mapped anchors (loss/health/passRate) from both arms. */
226
+ anchors: DiagnosisAnchors;
227
+ /**
228
+ * ③ Order in which the two arms are presented to the judge. Swapping the order
229
+ * across samples cancels the LLM's position bias. Defaults to `main-first`
230
+ * (the historical order; single-sample callers keep byte-identical prompts).
231
+ */
232
+ armOrder?: 'main-first' | 'baseline-first';
233
+ /**
234
+ * ④ An optional tamper signal computed BEFORE scoring (Batch 4). When present
235
+ * and `suspected`, the judge is told not to reward passing tests that were
236
+ * weakened. Omitted ⇒ the prompt is unchanged.
237
+ */
238
+ integrityHint?: {
239
+ suspected: boolean;
240
+ flags: string[];
241
+ } | null;
242
+ }
243
+ /**
244
+ * Assemble the 奖励智能体 REWARD AGENT prompt. Pure (no I/O); exported for golden
245
+ * tests. Ordered blocks:
246
+ * 1. PRELUDE — the judge contract.
247
+ * 2. MAIN ARM (主智能体, policy vN+1) — skeleton + bounded transcript excerpt.
248
+ * 3. BASELINE ARM (CRITIC AGENT(基线智能体), policy vN) — same; OMITTED
249
+ * ENTIRELY when the baseline arm was skipped (a one-line note replaces it,
250
+ * demanding null rewardBaseline/advantage).
251
+ * 4. ARTIFACTS — the 5 artifacts + test-report.md, bounded excerpts.
252
+ * 5. OBJECTIVE EVIDENCE — both arms' anchors verbatim (the on-disk loss/health
253
+ * numbers that anchor the score).
254
+ */
255
+ export declare function assembleRewardAgentPrompt(input: RewardAgentPromptInput): string;
256
+ interface ParsedDiagnosis {
257
+ rewardMain: number;
258
+ rewardBaseline: number | null;
259
+ advantage: number | null;
260
+ errors: DiagnosisError[];
261
+ gaps: DiagnosisGap[];
262
+ textualGradient: string | null;
263
+ abstained: boolean;
264
+ abstainReason?: string;
265
+ /** ⑤ Optional judge-emitted verdict (validated against the enum when present). */
266
+ verdict?: RewardVerdict;
267
+ /** ⑤ Optional judge-emitted confidence in [0,1]. */
268
+ confidence?: number | null;
269
+ }
270
+ /**
271
+ * Parse the judge's `json:diagnosis` block with a strict one-block discipline:
272
+ * exactly one fenced block, well-formed JSON, then fail-closed shape + range
273
+ * validation.
274
+ *
275
+ * Throws {@link RewardAgentOutputInvalid} on any violation (the repair loop
276
+ * re-prompts with the concrete message appended).
277
+ */
278
+ export declare function parseRewardAgentResponse(text: string): ParsedDiagnosis;
279
+ /** Map an arm's objective to its (loss, passRate, healthPenalty, verbosity) anchors. */
280
+ export declare function mapArmAnchors(objective: RewardArmObjectiveInput | null | undefined): {
281
+ loss: number | null;
282
+ passRate: number | null;
283
+ healthPenalty: number | null;
284
+ verbosity: number | null;
285
+ };
286
+ /** Build the {@link DiagnosisAnchors} block from both arms' objectives. */
287
+ export declare function buildAnchors(mainObjective: RewardArmObjectiveInput, baselineObjective: RewardArmObjectiveInput | null): DiagnosisAnchors;
288
+ export interface RunRewardAgentOptions {
289
+ repoRoot: string;
290
+ episodeId: string;
291
+ /** Injected for tests; defaults to node's spawn. */
292
+ spawn?: typeof nodeSpawn;
293
+ /** Override the agent binary; defaults to the host harness's default. */
294
+ binary?: string;
295
+ /** Bounded re-prompts on parse/validation failure (default 2 ⇒ at most 3 spawns). */
296
+ maxRepairAttempts?: number;
297
+ }
298
+ export interface RunRewardAgentResult {
299
+ diagnosis: RewardDiagnosis;
300
+ /** Absolute path of the written `diagnosis.json`. */
301
+ diagnosisPath: string;
302
+ /** The episode record after advancing to `scored`. */
303
+ episode: EpisodeRecord;
304
+ }
305
+ export interface ScoreOnceOptions {
306
+ /** The fully-assembled prompt input (carries arms, anchors, armOrder, integrityHint). */
307
+ promptInput: RewardAgentPromptInput;
308
+ /** Whether the baseline arm was skipped (drives the null-baseline contract). */
309
+ baselineSkipped: boolean;
310
+ /** cwd for the headless spawn (the repo/change root). */
311
+ repoRoot: string;
312
+ spawn?: typeof nodeSpawn;
313
+ binary?: string;
314
+ /** Bounded re-prompts on parse/validation failure (default 2 ⇒ at most 3 spawns). */
315
+ maxRepairAttempts?: number;
316
+ }
317
+ export interface ScoreOnceResult {
318
+ parsed: ParsedDiagnosis;
319
+ /** ④ Integrity signals computed from this duel's parse + anchors + tamper hint. */
320
+ integrity: DiagnosisIntegrity;
321
+ }
322
+ /**
323
+ * Score ONE judged duel: spawn the judge (fresh context), parse with the bounded
324
+ * repair loop (re-prompting with the concrete error appended), recompute and
325
+ * validate the advantage (incl. ① gate-not-blend), and compute ④ integrity
326
+ * signals. Does NOT write `diagnosis.json` — the caller (single-sample
327
+ * {@link runRewardAgent}, or the statistical aggregator) owns the write. This is
328
+ * the unit the aggregator calls k times for the A/A noise floor + SPRT.
329
+ */
330
+ export declare function scoreOnce(opts: ScoreOnceOptions): Promise<ScoreOnceResult>;
331
+ /** The read-side scoring context for one episode: arms, anchors, assembled prompt input. */
332
+ export interface RewardScoringContext {
333
+ episode: EpisodeRecord;
334
+ baselineSkipped: boolean;
335
+ /** Prompt input with `armOrder` defaulting to `main-first` and no integrity hint set. */
336
+ promptInput: RewardAgentPromptInput;
337
+ }
338
+ /**
339
+ * Load everything the judge needs to score one episode WITHOUT spawning: read
340
+ * the episode + both arms (baseline omitted when skipped), map anchors, and read
341
+ * the change artifacts. Shared by {@link runRewardAgent} (single sample) and the
342
+ * statistical aggregator (which calls {@link scoreOnce} k times over the same
343
+ * context with swapped `armOrder`).
344
+ */
345
+ export declare function loadRewardScoringContext(repoRoot: string, episodeId: string): Promise<RewardScoringContext>;
346
+ /**
347
+ * Run the 奖励智能体 REWARD AGENT end-to-end for one episode (single sample):
348
+ * 1. read the episode + both arms (baseline omitted when skipped),
349
+ * 2. map anchors and assemble the prompt,
350
+ * 3. {@link scoreOnce} — spawn the judge, parse with the bounded repair loop,
351
+ * recompute/validate the advantage (incl. ① gate-not-blend), compute ④
352
+ * integrity,
353
+ * 4. derive the ⑤ single-sample verdict and write `diagnosis.json` (schema 2)
354
+ * via {@link writeDiagnosis}, then advance the episode stage to `scored`.
355
+ *
356
+ * Behaviour is byte-compatible with the historical single-call path: no extra
357
+ * spawns, `armOrder` defaults to `main-first`, and the new schema-2 fields are
358
+ * OPTIONAL (a reader that ignores them sees the same diagnosis). The statistical
359
+ * layer (Batch 3) wraps {@link scoreOnce} instead of calling this directly.
360
+ *
361
+ * The ONLY write path is the episode dir. Invocation errors (agent crash) are
362
+ * NOT repaired — they propagate as {@link RewardAgentInvocationError}.
363
+ */
364
+ export declare function runRewardAgent(opts: RunRewardAgentOptions): Promise<RunRewardAgentResult>;
365
+ /**
366
+ * Derive the ⑤ single-sample verdict. A judge-emitted `verdict` wins; otherwise
367
+ * it is read off the advantage sign (no-gap when abstained; undefined when the
368
+ * baseline was skipped and only the main arm was scored). The statistical layer
369
+ * overrides this with `insufficient-signal` when the advantage is within the
370
+ * A/A noise floor.
371
+ */
372
+ export declare function deriveSingleSampleVerdict(parsed: ParsedDiagnosis): RewardVerdict | undefined;
373
+ /** ④ Compute integrity signals from a parsed duel, its anchors, and the tamper hint. */
374
+ export declare function computeIntegrity(parsed: ParsedDiagnosis, anchors: DiagnosisAnchors, integrityHint: {
375
+ suspected: boolean;
376
+ flags: string[];
377
+ } | null): DiagnosisIntegrity;
378
+ export {};
379
+ //# sourceMappingURL=reward-agent.d.ts.map