@oh-my-pi/pi-coding-agent 13.14.0 → 13.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/CHANGELOG.md +140 -0
  2. package/package.json +10 -8
  3. package/src/autoresearch/command-initialize.md +34 -0
  4. package/src/autoresearch/command-resume.md +17 -0
  5. package/src/autoresearch/contract.ts +332 -0
  6. package/src/autoresearch/dashboard.ts +447 -0
  7. package/src/autoresearch/git.ts +243 -0
  8. package/src/autoresearch/helpers.ts +458 -0
  9. package/src/autoresearch/index.ts +693 -0
  10. package/src/autoresearch/prompt.md +227 -0
  11. package/src/autoresearch/resume-message.md +16 -0
  12. package/src/autoresearch/state.ts +386 -0
  13. package/src/autoresearch/tools/init-experiment.ts +310 -0
  14. package/src/autoresearch/tools/log-experiment.ts +833 -0
  15. package/src/autoresearch/tools/run-experiment.ts +640 -0
  16. package/src/autoresearch/types.ts +218 -0
  17. package/src/cli/args.ts +8 -2
  18. package/src/cli/initial-message.ts +58 -0
  19. package/src/config/keybindings.ts +417 -212
  20. package/src/config/model-registry.ts +1 -0
  21. package/src/config/model-resolver.ts +57 -9
  22. package/src/config/settings-schema.ts +38 -10
  23. package/src/config/settings.ts +1 -4
  24. package/src/exec/bash-executor.ts +7 -5
  25. package/src/export/html/template.css +43 -13
  26. package/src/export/html/template.generated.ts +1 -1
  27. package/src/export/html/template.html +1 -0
  28. package/src/export/html/template.js +107 -0
  29. package/src/extensibility/extensions/types.ts +31 -8
  30. package/src/internal-urls/docs-index.generated.ts +1 -1
  31. package/src/lsp/index.ts +1 -1
  32. package/src/main.ts +44 -44
  33. package/src/mcp/oauth-discovery.ts +1 -1
  34. package/src/modes/acp/acp-agent.ts +957 -0
  35. package/src/modes/acp/acp-event-mapper.ts +531 -0
  36. package/src/modes/acp/acp-mode.ts +13 -0
  37. package/src/modes/acp/index.ts +2 -0
  38. package/src/modes/components/agent-dashboard.ts +5 -4
  39. package/src/modes/components/bash-execution.ts +40 -11
  40. package/src/modes/components/custom-editor.ts +47 -47
  41. package/src/modes/components/extensions/extension-dashboard.ts +2 -1
  42. package/src/modes/components/history-search.ts +2 -1
  43. package/src/modes/components/hook-editor.ts +2 -1
  44. package/src/modes/components/hook-input.ts +8 -7
  45. package/src/modes/components/hook-selector.ts +15 -10
  46. package/src/modes/components/keybinding-hints.ts +9 -9
  47. package/src/modes/components/login-dialog.ts +3 -3
  48. package/src/modes/components/mcp-add-wizard.ts +2 -1
  49. package/src/modes/components/model-selector.ts +14 -3
  50. package/src/modes/components/oauth-selector.ts +2 -1
  51. package/src/modes/components/python-execution.ts +2 -3
  52. package/src/modes/components/session-selector.ts +2 -1
  53. package/src/modes/components/settings-selector.ts +2 -1
  54. package/src/modes/components/status-line-segment-editor.ts +2 -1
  55. package/src/modes/components/tool-execution.ts +4 -5
  56. package/src/modes/components/tree-selector.ts +3 -2
  57. package/src/modes/components/user-message-selector.ts +3 -8
  58. package/src/modes/components/user-message.ts +16 -0
  59. package/src/modes/controllers/command-controller.ts +0 -2
  60. package/src/modes/controllers/extension-ui-controller.ts +89 -4
  61. package/src/modes/controllers/input-controller.ts +29 -23
  62. package/src/modes/controllers/mcp-command-controller.ts +1 -1
  63. package/src/modes/index.ts +1 -0
  64. package/src/modes/interactive-mode.ts +17 -5
  65. package/src/modes/print-mode.ts +1 -1
  66. package/src/modes/prompt-action-autocomplete.ts +7 -7
  67. package/src/modes/rpc/rpc-mode.ts +7 -2
  68. package/src/modes/rpc/rpc-types.ts +1 -0
  69. package/src/modes/theme/theme.ts +53 -44
  70. package/src/modes/types.ts +9 -2
  71. package/src/modes/utils/hotkeys-markdown.ts +19 -19
  72. package/src/modes/utils/keybinding-matchers.ts +21 -0
  73. package/src/modes/utils/ui-helpers.ts +1 -1
  74. package/src/patch/hashline.ts +139 -127
  75. package/src/patch/index.ts +77 -59
  76. package/src/patch/shared.ts +19 -11
  77. package/src/prompts/tools/hashline.md +43 -116
  78. package/src/sdk.ts +34 -17
  79. package/src/session/agent-session.ts +123 -30
  80. package/src/session/session-manager.ts +32 -31
  81. package/src/session/streaming-output.ts +87 -37
  82. package/src/tools/ask.ts +56 -30
  83. package/src/tools/bash-interactive.ts +2 -6
  84. package/src/tools/bash-interceptor.ts +1 -39
  85. package/src/tools/bash-skill-urls.ts +1 -1
  86. package/src/tools/browser.ts +1 -1
  87. package/src/tools/gemini-image.ts +1 -1
  88. package/src/tools/python.ts +2 -2
  89. package/src/tools/resolve.ts +1 -1
  90. package/src/utils/child-process.ts +88 -0
@@ -0,0 +1,227 @@
1
+ {{{base_system_prompt}}}
2
+
3
+ ## Autoresearch Mode
4
+
5
+ Autoresearch mode is active.
6
+
7
+ {{#if has_goal}}
8
+ Primary goal:
9
+ {{goal}}
10
+ {{else}}
11
+ Primary goal is documented in `autoresearch.md` for this session.
12
+ {{/if}}
13
+
14
+ Working directory:
15
+ `{{working_dir}}`
16
+
17
+ You are running an autonomous experiment loop. Keep iterating until the user interrupts you or the configured maximum iteration count is reached.
18
+ {{#if has_program}}
19
+
20
+ ### Local Playbook
21
+
22
+ `autoresearch.program.md` exists at `{{program_path}}`.
23
+
24
+ Use it as a repo-local strategy overlay for this session. `autoresearch.md` remains the source of truth for benchmark, scope, and constraints.
25
+ {{/if}}
26
+ {{#if has_recent_results}}
27
+
28
+ ### Current Segment Snapshot
29
+
30
+ - segment: `{{current_segment}}`
31
+ - runs in current segment: `{{current_segment_run_count}}`
32
+ {{#if has_baseline_metric}}
33
+ - baseline `{{metric_name}}`: `{{baseline_metric_display}}`
34
+ {{/if}}
35
+ {{#if has_best_result}}
36
+ - best kept `{{metric_name}}`: `{{best_metric_display}}`{{#if best_run_number}} from run `#{{best_run_number}}`{{/if}}
37
+ {{/if}}
38
+
39
+ Recent runs:
40
+ {{#each recent_results}}
41
+ - run `#{{run_number}}`: `{{status}}` `{{metric_display}}` — {{description}}
42
+ {{#if has_asi_summary}}
43
+ ASI: {{asi_summary}}
44
+ {{/if}}
45
+ {{/each}}
46
+ {{/if}}
47
+ {{#if has_pending_run}}
48
+
49
+ ### Pending Run
50
+
51
+ An unlogged run artifact exists at `{{pending_run_directory}}`.
52
+
53
+ - run: `#{{pending_run_number}}`
54
+ - command: `{{pending_run_command}}`
55
+ {{#if has_pending_run_metric}}
56
+ - parsed `{{metric_name}}`: `{{pending_run_metric_display}}`
57
+ {{/if}}
58
+ - result status: {{#if pending_run_passed}}passed{{else}}failed{{/if}}
59
+ - finish the `log_experiment` step before starting another benchmark
60
+ {{/if}}
61
+
62
+ ### Available tools
63
+
64
+ - `init_experiment` — initialize or reset the experiment session for the current optimization target.
65
+ - `run_experiment` — run a benchmark or experiment command with timing, output capture, structured metric parsing, and optional backpressure checks.
66
+ - `log_experiment` — record the result, update the dashboard, persist JSONL history, auto-commit kept experiments, and auto-revert discarded or failed experiments.
67
+
68
+ ### Operating protocol
69
+
70
+ 1. Understand the target before touching code.
71
+ - Read the relevant source files.
72
+ - Identify the true bottleneck or quality constraint.
73
+ - Check existing scripts, benchmark harnesses, and config files.
74
+ - Verify prerequisites, one-time setup, and benchmark inputs before the first run of a segment.
75
+ 2. Keep your notes in `autoresearch.md`.
76
+ - Record the goal, the benchmark command, the primary metric, important secondary metrics, the files in scope, hard constraints, preflight requirements, and the benchmark comparability invariant.
77
+ - Update the notes whenever the strategy changes.
78
+ - Keep durable conclusions in `autoresearch.md`.
79
+ - Use `autoresearch.ideas.md` for deferred experiment ideas that are promising but not active yet.
80
+ 3. Use `autoresearch.sh` as the canonical benchmark entrypoint.
81
+ - If it does not exist yet, create it.
82
+ - Make it print structured metric lines in the form `METRIC name=value`.
83
+ - Use the same workload every run unless you intentionally re-initialize with a new segment.
84
+ - Keep the measurement harness, evaluator, and fixed benchmark inputs stable unless you intentionally start a new segment and document the change.
85
+ 4. Initialize the loop with `init_experiment` before the first logged run of a segment.
86
+ 5. Run a baseline first.
87
+ - Establish the baseline metric before attempting optimizations.
88
+ - Track secondary metrics only when they matter to correctness, quality, or obvious regressions.
89
+ 6. Iterate.
90
+ - Make one coherent experiment at a time.
91
+ - Run `run_experiment`.
92
+ - Interpret the result honestly.
93
+ - Call `log_experiment` after every run.
94
+ 7. Keep the primary metric as the decision maker.
95
+ - `keep` when the primary metric improves.
96
+ - `discard` when it regresses or stays flat.
97
+ - `crash` when the run fails.
98
+ - `checks_failed` when the benchmark passes but backpressure checks fail.
99
+ 8. Record ASI on every `log_experiment` call.
100
+ - At minimum include `hypothesis`.
101
+ - On `discard`, `crash`, or `checks_failed`, also include `rollback_reason` and `next_action_hint`.
102
+ - Use ASI to capture what you learned, not just what you changed.
103
+ 9. Prefer simpler wins.
104
+ - Remove dead ends.
105
+ - Keep equal or near-equal results when they materially simplify the implementation.
106
+ - Do not keep ugly complexity for tiny gains unless the payoff is clearly worth it.
107
+ - Do not thrash between unrelated ideas without writing down the conclusion.
108
+ 10. When confidence is low, confirm.
109
+ - The dashboard confidence score compares the best observed improvement against the observed noise floor.
110
+ - Below `1.0x` usually means the improvement is within noise.
111
+ - Re-run promising changes when needed before keeping them.
112
+
113
+ ### Benchmark harness guidance
114
+
115
+ Your benchmark script SHOULD:
116
+
117
+ - live at `autoresearch.sh`
118
+ - run from `{{working_dir}}`
119
+ - fail with a non-zero exit status on invalid runs
120
+ - print the primary metric as `METRIC {{default_metric_name}}=<number>` or another explicit metric name chosen during initialization
121
+ - print secondary metrics as additional `METRIC name=value` lines
122
+ - avoid extra randomness when possible
123
+ - use repeated samples and median-style summaries for fast benchmarks
124
+ - preserve the comparability invariant for the current segment
125
+ - keep the ground-truth evaluator and fixed benchmark inputs unchanged unless the segment is explicitly re-initialized
126
+
127
+ ### Notes file template
128
+
129
+ Keep `autoresearch.md` concise and current.
130
+
131
+ Suggested structure:
132
+
133
+ ```md
134
+ # Autoresearch
135
+
136
+ ## Goal
137
+ {{#if has_goal}}
138
+ - {{goal}}
139
+ {{else}}
140
+ - document the active target here before the first benchmark
141
+ {{/if}}
142
+
143
+ ## Benchmark
144
+ - command:
145
+ - primary metric:
146
+ - metric unit:
147
+ - direction:
148
+ - secondary metrics: memory_mb, rss_mb
149
+
150
+ ## Files in Scope
151
+ - path:
152
+
153
+ ## Off Limits
154
+ - path:
155
+
156
+ ## Constraints
157
+ - rule:
158
+
159
+ ## Baseline
160
+ - metric:
161
+ - notes:
162
+
163
+ ## Current best
164
+ - metric:
165
+ - why it won:
166
+
167
+ ## What's Been Tried
168
+ - experiment:
169
+ - lesson:
170
+ ```
171
+
172
+ ### Guardrails
173
+
174
+ - Do not game the benchmark.
175
+ - Do not overfit to synthetic inputs if the real workload is broader.
176
+ - Preserve correctness.
177
+ - Only modify files that are explicitly in scope for the current session.
178
+ - Do not use the general shell tool for file mutations during autoresearch. Use `write`, `edit`, or `ast_edit` for scoped code changes and `run_experiment` for benchmark execution.
179
+ - If you create `autoresearch.checks.sh`, treat it as a hard gate for `keep`.
180
+ - If the user sends another message while a run is in progress, finish the current run and logging cycle first, then address the new input in the next iteration.
181
+
182
+ {{#if has_autoresearch_md}}
183
+ ### Resume mode
184
+
185
+ `autoresearch.md` already exists at `{{autoresearch_md_path}}`.
186
+
187
+ Resume from the existing notes:
188
+
189
+ - read `autoresearch.md`
190
+ - inspect recent git history
191
+ - inspect `autoresearch.jsonl`
192
+ - continue from the most promising unfinished direction on the current protected branch
193
+
194
+ {{else}}
195
+ ### Initial setup
196
+
197
+ `autoresearch.md` does not exist yet.
198
+
199
+ Create the experiment workspace before the first benchmark:
200
+
201
+ - write `autoresearch.md`
202
+ - write `autoresearch.sh`
203
+ - optionally write `autoresearch.checks.sh`
204
+ - run `init_experiment`
205
+ - run and log the baseline
206
+
207
+ {{/if}}
208
+ {{#if has_checks}}
209
+ ### Backpressure checks
210
+
211
+ `autoresearch.checks.sh` exists at `{{checks_path}}` and runs automatically after passing benchmark runs.
212
+
213
+ Treat failing checks as a failed experiment:
214
+
215
+ - do not `keep` a run when checks fail
216
+ - log it as `checks_failed`
217
+ - diagnose the regression before continuing
218
+
219
+ {{/if}}
220
+ {{#if has_ideas}}
221
+ ### Ideas backlog
222
+
223
+ `autoresearch.ideas.md` exists at `{{ideas_path}}`.
224
+
225
+ Use it to keep promising but deferred experiments. `autoresearch.md` should hold durable conclusions; `autoresearch.ideas.md` is the scratch backlog. Prune stale ideas when they are disproven or superseded.
226
+
227
+ {{/if}}
@@ -0,0 +1,16 @@
1
+ Continue the autoresearch loop now.
2
+
3
+ @{{autoresearch_md_path}}
4
+
5
+ - Read `autoresearch.md` and `autoresearch.jsonl`.
6
+ - Treat `autoresearch.md` as the source of truth for the current direction, scope, and constraints.
7
+ - Inspect recent git history for context.
8
+ {{#if has_pending_run}}
9
+ - Inspect the latest unlogged `run.json` under `.autoresearch/runs/` and finish the pending `log_experiment` step before starting a new benchmark.
10
+ {{/if}}
11
+ - Continue from the most promising unfinished direction.
12
+ {{#if has_ideas}}
13
+ - Review `autoresearch.ideas.md` for deferred next steps and prune stale items.
14
+ {{/if}}
15
+ - Keep iterating until interrupted or until the configured iteration cap is reached.
16
+ - Preserve correctness and do not game the benchmark.
@@ -0,0 +1,386 @@
1
+ import * as fs from "node:fs";
2
+ import * as path from "node:path";
3
+ import type { SessionEntry } from "../session/session-manager";
4
+ import { normalizeAutoresearchList, normalizeContractPathSpec } from "./contract";
5
+ import { inferMetricUnitFromName, isBetter } from "./helpers";
6
+ import type {
7
+ AutoresearchControlEntryData,
8
+ AutoresearchJsonConfigEntry,
9
+ AutoresearchJsonRunEntry,
10
+ AutoresearchRuntime,
11
+ ExperimentResult,
12
+ ExperimentState,
13
+ MetricDef,
14
+ MetricDirection,
15
+ NumericMetricMap,
16
+ ReconstructedControlState,
17
+ ReconstructedExperimentData,
18
+ RuntimeStore,
19
+ } from "./types";
20
+
21
+ export function createExperimentState(): ExperimentState {
22
+ return {
23
+ results: [],
24
+ bestMetric: null,
25
+ bestDirection: "lower",
26
+ metricName: "metric",
27
+ metricUnit: "",
28
+ secondaryMetrics: [],
29
+ name: null,
30
+ currentSegment: 0,
31
+ maxExperiments: null,
32
+ confidence: null,
33
+ benchmarkCommand: null,
34
+ scopePaths: [],
35
+ offLimits: [],
36
+ constraints: [],
37
+ segmentFingerprint: null,
38
+ };
39
+ }
40
+
41
+ export function createSessionRuntime(): AutoresearchRuntime {
42
+ return {
43
+ autoresearchMode: false,
44
+ autoResumeArmed: false,
45
+ dashboardExpanded: false,
46
+ lastAutoResumePendingRunNumber: null,
47
+ lastRunChecks: null,
48
+ lastRunDuration: null,
49
+ lastRunAsi: null,
50
+ lastRunArtifactDir: null,
51
+ lastRunNumber: null,
52
+ lastRunSummary: null,
53
+ runningExperiment: null,
54
+ state: createExperimentState(),
55
+ goal: null,
56
+ };
57
+ }
58
+
59
+ export function cloneExperimentState(state: ExperimentState): ExperimentState {
60
+ return {
61
+ ...state,
62
+ results: state.results.map(result => ({
63
+ ...result,
64
+ metrics: { ...result.metrics },
65
+ asi: result.asi ? structuredClone(result.asi) : undefined,
66
+ })),
67
+ secondaryMetrics: state.secondaryMetrics.map(metric => ({ ...metric })),
68
+ scopePaths: [...state.scopePaths],
69
+ offLimits: [...state.offLimits],
70
+ constraints: [...state.constraints],
71
+ };
72
+ }
73
+
74
+ export function currentResults(results: ExperimentResult[], segment: number): ExperimentResult[] {
75
+ return results.filter(result => result.segment === segment);
76
+ }
77
+
78
+ export function findBaselineResult(results: ExperimentResult[], segment: number): ExperimentResult | null {
79
+ return currentResults(results, segment).find(result => result.status === "keep") ?? null;
80
+ }
81
+
82
+ export function findBaselineMetric(results: ExperimentResult[], segment: number): number | null {
83
+ const baseline = findBaselineResult(results, segment);
84
+ return baseline ? baseline.metric : null;
85
+ }
86
+
87
+ export function findBestKeptMetric(
88
+ results: ExperimentResult[],
89
+ segment: number,
90
+ direction: MetricDirection,
91
+ ): number | null {
92
+ let best: number | null = null;
93
+ for (const result of currentResults(results, segment)) {
94
+ if (result.status !== "keep") continue;
95
+ if (best === null || isBetter(result.metric, best, direction)) {
96
+ best = result.metric;
97
+ }
98
+ }
99
+ return best;
100
+ }
101
+
102
+ export function findBaselineRunNumber(results: ExperimentResult[], segment: number): number | null {
103
+ const baseline = findBaselineResult(results, segment);
104
+ if (!baseline) return null;
105
+ if (baseline.runNumber !== null) return baseline.runNumber;
106
+ const index = results.indexOf(baseline);
107
+ return index >= 0 ? index + 1 : null;
108
+ }
109
+
110
+ export function findBaselineSecondary(
111
+ results: ExperimentResult[],
112
+ segment: number,
113
+ knownMetrics: MetricDef[],
114
+ ): NumericMetricMap {
115
+ const baseline = findBaselineResult(results, segment);
116
+ const values: NumericMetricMap = baseline ? { ...baseline.metrics } : {};
117
+ for (const metric of knownMetrics) {
118
+ if (values[metric.name] !== undefined) continue;
119
+ for (const result of currentResults(results, segment)) {
120
+ const value = result.metrics[metric.name];
121
+ if (value !== undefined) {
122
+ values[metric.name] = value;
123
+ break;
124
+ }
125
+ }
126
+ }
127
+ return values;
128
+ }
129
+
130
+ export function sortedMedian(values: number[]): number {
131
+ if (values.length === 0) return 0;
132
+ const sorted = [...values].sort((left, right) => left - right);
133
+ const midpoint = Math.floor(sorted.length / 2);
134
+ if (sorted.length % 2 === 0) {
135
+ return (sorted[midpoint - 1] + sorted[midpoint]) / 2;
136
+ }
137
+ return sorted[midpoint];
138
+ }
139
+
140
+ export function computeConfidence(
141
+ results: ExperimentResult[],
142
+ segment: number,
143
+ direction: MetricDirection,
144
+ ): number | null {
145
+ const current = currentResults(results, segment).filter(result => result.metric > 0);
146
+ if (current.length < 3) return null;
147
+
148
+ const values = current.map(result => result.metric);
149
+ const median = sortedMedian(values);
150
+ const mad = sortedMedian(values.map(value => Math.abs(value - median)));
151
+ if (mad === 0) return null;
152
+
153
+ const baseline = findBaselineMetric(results, segment);
154
+ if (baseline === null) return null;
155
+
156
+ let bestKept: number | null = null;
157
+ for (const result of current) {
158
+ if (result.status !== "keep" || result.metric <= 0) continue;
159
+ if (bestKept === null || isBetter(result.metric, bestKept, direction)) {
160
+ bestKept = result.metric;
161
+ }
162
+ }
163
+ if (bestKept === null || bestKept === baseline) return null;
164
+
165
+ return Math.abs(bestKept - baseline) / mad;
166
+ }
167
+
168
+ export function reconstructStateFromJsonl(workDir: string): ReconstructedExperimentData {
169
+ const state = createExperimentState();
170
+ const jsonlPath = path.join(workDir, "autoresearch.jsonl");
171
+ if (!fs.existsSync(jsonlPath)) {
172
+ return { hasLog: false, state };
173
+ }
174
+
175
+ const content = fs.readFileSync(jsonlPath, "utf8");
176
+ const lines = content
177
+ .split("\n")
178
+ .map(line => line.trim())
179
+ .filter(line => line.length > 0);
180
+
181
+ let segment = 0;
182
+ let sawConfig = false;
183
+ for (const line of lines) {
184
+ let parsed: unknown;
185
+ try {
186
+ parsed = JSON.parse(line) as unknown;
187
+ } catch {
188
+ continue;
189
+ }
190
+
191
+ const configEntry = parseConfigEntry(parsed);
192
+ if (configEntry) {
193
+ if (sawConfig || state.results.length > 0) {
194
+ segment += 1;
195
+ }
196
+ sawConfig = true;
197
+ state.currentSegment = segment;
198
+ if (configEntry.name) state.name = configEntry.name;
199
+ if (configEntry.metricName) state.metricName = configEntry.metricName;
200
+ if (configEntry.metricUnit !== undefined) state.metricUnit = configEntry.metricUnit;
201
+ if (configEntry.bestDirection) state.bestDirection = configEntry.bestDirection;
202
+ if (configEntry.benchmarkCommand !== undefined) state.benchmarkCommand = configEntry.benchmarkCommand;
203
+ state.scopePaths = cloneStringArray(configEntry.scopePaths);
204
+ state.offLimits = cloneStringArray(configEntry.offLimits);
205
+ state.constraints = cloneStringArray(configEntry.constraints);
206
+ state.segmentFingerprint =
207
+ typeof configEntry.segmentFingerprint === "string" ? configEntry.segmentFingerprint : null;
208
+ state.secondaryMetrics = hydrateMetricDefs(configEntry.secondaryMetrics);
209
+ continue;
210
+ }
211
+
212
+ if (!isRunEntry(parsed)) continue;
213
+ const result: ExperimentResult = {
214
+ runNumber: typeof parsed.run === "number" && Number.isFinite(parsed.run) ? parsed.run : null,
215
+ commit: typeof parsed.commit === "string" ? parsed.commit : "",
216
+ metric: typeof parsed.metric === "number" && Number.isFinite(parsed.metric) ? parsed.metric : 0,
217
+ metrics: cloneNumericMetrics(parsed.metrics),
218
+ status: isExperimentStatus(parsed.status) ? parsed.status : "keep",
219
+ description: typeof parsed.description === "string" ? parsed.description : "",
220
+ timestamp: typeof parsed.timestamp === "number" && Number.isFinite(parsed.timestamp) ? parsed.timestamp : 0,
221
+ segment,
222
+ confidence:
223
+ typeof parsed.confidence === "number" && Number.isFinite(parsed.confidence) ? parsed.confidence : null,
224
+ asi: cloneAsi(parsed.asi),
225
+ };
226
+ state.results.push(result);
227
+ if (segment !== state.currentSegment) continue;
228
+ registerSecondaryMetrics(state.secondaryMetrics, result.metrics);
229
+ }
230
+
231
+ state.bestMetric = findBaselineMetric(state.results, state.currentSegment);
232
+ state.confidence = computeConfidence(state.results, state.currentSegment, state.bestDirection);
233
+ return { hasLog: true, state };
234
+ }
235
+
236
+ export function reconstructControlState(entries: SessionEntry[]): ReconstructedControlState {
237
+ let autoresearchMode = false;
238
+ let goal: string | null = null;
239
+ let lastMode: ReconstructedControlState["lastMode"] = null;
240
+ for (const entry of entries) {
241
+ if (entry.type !== "custom" || entry.customType !== "autoresearch-control") continue;
242
+ const data = parseControlEntry(entry.data);
243
+ if (!data) continue;
244
+ lastMode = data.mode;
245
+ autoresearchMode = data.mode === "on";
246
+ goal = data.goal ?? goal;
247
+ if (data.mode === "clear") {
248
+ goal = null;
249
+ }
250
+ }
251
+ return { autoresearchMode, goal, lastMode };
252
+ }
253
+
254
+ export function createRuntimeStore(): RuntimeStore {
255
+ const runtimes = new Map<string, AutoresearchRuntime>();
256
+ return {
257
+ clear(sessionKey: string): void {
258
+ runtimes.delete(sessionKey);
259
+ },
260
+ ensure(sessionKey: string): AutoresearchRuntime {
261
+ const existing = runtimes.get(sessionKey);
262
+ if (existing) return existing;
263
+ const runtime = createSessionRuntime();
264
+ runtimes.set(sessionKey, runtime);
265
+ return runtime;
266
+ },
267
+ };
268
+ }
269
+
270
+ function registerSecondaryMetrics(metrics: MetricDef[], values: NumericMetricMap): void {
271
+ for (const name of Object.keys(values)) {
272
+ if (metrics.some(metric => metric.name === name)) continue;
273
+ metrics.push({
274
+ name,
275
+ unit: inferMetricUnitFromName(name),
276
+ });
277
+ }
278
+ }
279
+
280
+ function isConfigEntry(value: unknown): value is AutoresearchJsonConfigEntry {
281
+ if (typeof value !== "object" || value === null) return false;
282
+ const candidate = value as { type?: unknown };
283
+ return candidate.type === "config";
284
+ }
285
+
286
+ function parseConfigEntry(value: unknown): AutoresearchJsonConfigEntry | null {
287
+ if (!isConfigEntry(value)) return null;
288
+ const candidate = value as AutoresearchJsonConfigEntry;
289
+ const config: AutoresearchJsonConfigEntry = { type: "config" };
290
+ if (typeof candidate.name === "string" && candidate.name.trim().length > 0) {
291
+ config.name = candidate.name;
292
+ }
293
+ if (typeof candidate.metricName === "string" && candidate.metricName.trim().length > 0) {
294
+ config.metricName = candidate.metricName;
295
+ }
296
+ if (typeof candidate.metricUnit === "string") {
297
+ config.metricUnit = candidate.metricUnit;
298
+ }
299
+ if (candidate.bestDirection === "lower" || candidate.bestDirection === "higher") {
300
+ config.bestDirection = candidate.bestDirection;
301
+ }
302
+ if (typeof candidate.benchmarkCommand === "string" && candidate.benchmarkCommand.trim().length > 0) {
303
+ config.benchmarkCommand = candidate.benchmarkCommand;
304
+ }
305
+ if (Array.isArray(candidate.secondaryMetrics)) {
306
+ config.secondaryMetrics = normalizeAutoresearchList(
307
+ candidate.secondaryMetrics.filter((item): item is string => typeof item === "string"),
308
+ );
309
+ }
310
+ if (Array.isArray(candidate.scopePaths)) {
311
+ config.scopePaths = normalizeAutoresearchList(
312
+ candidate.scopePaths.filter((item): item is string => typeof item === "string").map(normalizeContractPathSpec),
313
+ );
314
+ }
315
+ if (Array.isArray(candidate.offLimits)) {
316
+ config.offLimits = normalizeAutoresearchList(
317
+ candidate.offLimits.filter((item): item is string => typeof item === "string").map(normalizeContractPathSpec),
318
+ );
319
+ }
320
+ if (Array.isArray(candidate.constraints)) {
321
+ config.constraints = normalizeAutoresearchList(
322
+ candidate.constraints.filter((item): item is string => typeof item === "string"),
323
+ );
324
+ }
325
+ if (typeof candidate.segmentFingerprint === "string" && candidate.segmentFingerprint.trim().length > 0) {
326
+ config.segmentFingerprint = candidate.segmentFingerprint;
327
+ }
328
+ return config;
329
+ }
330
+
331
+ function isRunEntry(value: unknown): value is AutoresearchJsonRunEntry {
332
+ if (typeof value !== "object" || value === null) return false;
333
+ const candidate = value as { type?: unknown };
334
+ return candidate.type === undefined || candidate.type === "run";
335
+ }
336
+
337
+ function isExperimentStatus(value: unknown): value is ExperimentResult["status"] {
338
+ return value === "keep" || value === "discard" || value === "crash" || value === "checks_failed";
339
+ }
340
+
341
+ function cloneNumericMetrics(value: unknown): NumericMetricMap {
342
+ if (typeof value !== "object" || value === null) return {};
343
+ const metrics = value as { [key: string]: unknown };
344
+ const clone: NumericMetricMap = {};
345
+ for (const [key, entryValue] of Object.entries(metrics)) {
346
+ if (key === "__proto__" || key === "constructor" || key === "prototype") continue;
347
+ if (typeof entryValue === "number" && Number.isFinite(entryValue)) {
348
+ clone[key] = entryValue;
349
+ }
350
+ }
351
+ return clone;
352
+ }
353
+
354
+ function cloneStringArray(value: unknown): string[] {
355
+ if (!Array.isArray(value)) return [];
356
+ return value.filter((item): item is string => typeof item === "string");
357
+ }
358
+
359
+ function hydrateMetricDefs(metricNames: string[] | undefined): MetricDef[] {
360
+ if (!metricNames) return [];
361
+ return metricNames.map(name => ({
362
+ name,
363
+ unit: inferMetricUnitFromName(name),
364
+ }));
365
+ }
366
+
367
+ function cloneAsi(value: unknown): ExperimentResult["asi"] {
368
+ if (typeof value !== "object" || value === null) return undefined;
369
+ const clone: { [key: string]: unknown } = {};
370
+ for (const [key, entryValue] of Object.entries(value)) {
371
+ if (key === "__proto__" || key === "constructor" || key === "prototype") continue;
372
+ clone[key] = structuredClone(entryValue);
373
+ }
374
+ return clone as ExperimentResult["asi"];
375
+ }
376
+
377
+ function parseControlEntry(value: unknown): AutoresearchControlEntryData | null {
378
+ if (typeof value !== "object" || value === null) return null;
379
+ const candidate = value as { goal?: unknown; mode?: unknown };
380
+ if (candidate.mode !== "on" && candidate.mode !== "off" && candidate.mode !== "clear") return null;
381
+ const data: AutoresearchControlEntryData = { mode: candidate.mode };
382
+ if (typeof candidate.goal === "string" && candidate.goal.trim().length > 0) {
383
+ data.goal = candidate.goal;
384
+ }
385
+ return data;
386
+ }