@oh-my-pi/pi-coding-agent 13.14.0 → 13.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/CHANGELOG.md +140 -0
  2. package/package.json +10 -8
  3. package/src/autoresearch/command-initialize.md +34 -0
  4. package/src/autoresearch/command-resume.md +17 -0
  5. package/src/autoresearch/contract.ts +332 -0
  6. package/src/autoresearch/dashboard.ts +447 -0
  7. package/src/autoresearch/git.ts +243 -0
  8. package/src/autoresearch/helpers.ts +458 -0
  9. package/src/autoresearch/index.ts +693 -0
  10. package/src/autoresearch/prompt.md +227 -0
  11. package/src/autoresearch/resume-message.md +16 -0
  12. package/src/autoresearch/state.ts +386 -0
  13. package/src/autoresearch/tools/init-experiment.ts +310 -0
  14. package/src/autoresearch/tools/log-experiment.ts +833 -0
  15. package/src/autoresearch/tools/run-experiment.ts +640 -0
  16. package/src/autoresearch/types.ts +218 -0
  17. package/src/cli/args.ts +8 -2
  18. package/src/cli/initial-message.ts +58 -0
  19. package/src/config/keybindings.ts +417 -212
  20. package/src/config/model-registry.ts +1 -0
  21. package/src/config/model-resolver.ts +57 -9
  22. package/src/config/settings-schema.ts +38 -10
  23. package/src/config/settings.ts +1 -4
  24. package/src/exec/bash-executor.ts +7 -5
  25. package/src/export/html/template.css +43 -13
  26. package/src/export/html/template.generated.ts +1 -1
  27. package/src/export/html/template.html +1 -0
  28. package/src/export/html/template.js +107 -0
  29. package/src/extensibility/extensions/types.ts +31 -8
  30. package/src/internal-urls/docs-index.generated.ts +1 -1
  31. package/src/lsp/index.ts +1 -1
  32. package/src/main.ts +44 -44
  33. package/src/mcp/oauth-discovery.ts +1 -1
  34. package/src/modes/acp/acp-agent.ts +957 -0
  35. package/src/modes/acp/acp-event-mapper.ts +531 -0
  36. package/src/modes/acp/acp-mode.ts +13 -0
  37. package/src/modes/acp/index.ts +2 -0
  38. package/src/modes/components/agent-dashboard.ts +5 -4
  39. package/src/modes/components/bash-execution.ts +40 -11
  40. package/src/modes/components/custom-editor.ts +47 -47
  41. package/src/modes/components/extensions/extension-dashboard.ts +2 -1
  42. package/src/modes/components/history-search.ts +2 -1
  43. package/src/modes/components/hook-editor.ts +2 -1
  44. package/src/modes/components/hook-input.ts +8 -7
  45. package/src/modes/components/hook-selector.ts +15 -10
  46. package/src/modes/components/keybinding-hints.ts +9 -9
  47. package/src/modes/components/login-dialog.ts +3 -3
  48. package/src/modes/components/mcp-add-wizard.ts +2 -1
  49. package/src/modes/components/model-selector.ts +14 -3
  50. package/src/modes/components/oauth-selector.ts +2 -1
  51. package/src/modes/components/python-execution.ts +2 -3
  52. package/src/modes/components/session-selector.ts +2 -1
  53. package/src/modes/components/settings-selector.ts +2 -1
  54. package/src/modes/components/status-line-segment-editor.ts +2 -1
  55. package/src/modes/components/tool-execution.ts +4 -5
  56. package/src/modes/components/tree-selector.ts +3 -2
  57. package/src/modes/components/user-message-selector.ts +3 -8
  58. package/src/modes/components/user-message.ts +16 -0
  59. package/src/modes/controllers/command-controller.ts +0 -2
  60. package/src/modes/controllers/extension-ui-controller.ts +89 -4
  61. package/src/modes/controllers/input-controller.ts +29 -23
  62. package/src/modes/controllers/mcp-command-controller.ts +1 -1
  63. package/src/modes/index.ts +1 -0
  64. package/src/modes/interactive-mode.ts +17 -5
  65. package/src/modes/print-mode.ts +1 -1
  66. package/src/modes/prompt-action-autocomplete.ts +7 -7
  67. package/src/modes/rpc/rpc-mode.ts +7 -2
  68. package/src/modes/rpc/rpc-types.ts +1 -0
  69. package/src/modes/theme/theme.ts +53 -44
  70. package/src/modes/types.ts +9 -2
  71. package/src/modes/utils/hotkeys-markdown.ts +19 -19
  72. package/src/modes/utils/keybinding-matchers.ts +21 -0
  73. package/src/modes/utils/ui-helpers.ts +1 -1
  74. package/src/patch/hashline.ts +139 -127
  75. package/src/patch/index.ts +77 -59
  76. package/src/patch/shared.ts +19 -11
  77. package/src/prompts/tools/hashline.md +43 -116
  78. package/src/sdk.ts +34 -17
  79. package/src/session/agent-session.ts +123 -30
  80. package/src/session/session-manager.ts +32 -31
  81. package/src/session/streaming-output.ts +87 -37
  82. package/src/tools/ask.ts +56 -30
  83. package/src/tools/bash-interactive.ts +2 -6
  84. package/src/tools/bash-interceptor.ts +1 -39
  85. package/src/tools/bash-skill-urls.ts +1 -1
  86. package/src/tools/browser.ts +1 -1
  87. package/src/tools/gemini-image.ts +1 -1
  88. package/src/tools/python.ts +2 -2
  89. package/src/tools/resolve.ts +1 -1
  90. package/src/utils/child-process.ts +88 -0
package/CHANGELOG.md CHANGED
@@ -2,6 +2,146 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [13.15.0] - 2026-03-23
6
+ ### Breaking Changes
7
+
8
+ - Changed hashline edit schema from flat `op`/`pos`/`end`/`lines` fields to structured `loc`/`content` format with location-specific objects
9
+ - Renamed hashline edit operations: `replace_line` → `{ line: anchor }`, `replace_range` → `{ block: { pos, end } }`, `append_at` → `{ append: anchor }`, `prepend_at` → `{ prepend: anchor }`, `append_file` → `"append"`, `prepend_file` → `"prepend"`
10
+ - Changed `lines` parameter to `content` in hashline edit entries
11
+ - Renamed hashline edit operation types: `append` → `append_at`, `prepend` → `prepend_at`, `append_eof` → `append_file`, `prepend_bof` → `prepend_file`
12
+ - Changed hashline edit operation types from `replace` (with optional `end`) to explicit `replace_line` and `replace_range` operations
13
+ - Added required `append_eof` and `prepend_bof` operations for file-level edits; `append` and `prepend` now require an anchor position
14
+ - Made `pos` parameter required for `replace_line`, `append`, and `prepend` operations; `append_eof` and `prepend_bof` no longer accept anchors
15
+
16
+ ### Added
17
+
18
+ - Added prompt for tradeoff metrics during autoresearch setup to collect secondary metrics alongside primary metric
19
+ - Added validation of contract path specifications to reject absolute paths and parent directory references
20
+ - Added stricter benchmark command validation in `isAutoresearchShCommand()` to reject chained commands, pipes, and redirects
21
+ - Added protection against prototype pollution in ASI data and metric cloning by filtering `__proto__`, `constructor`, and `prototype` keys
22
+ - Added `autoResumeArmed` flag to track when autoresearch should automatically resume pending runs
23
+ - Added `lastAutoResumePendingRunNumber` to prevent duplicate auto-resume prompts for the same pending run
24
+ - Added `git clean -X` invocation during failed experiment rollback to remove ignored build artifacts
25
+ - Added validation to reject `init_experiment` when a previous run is still pending and unlogged
26
+ - Added autoresearch contract system for validating benchmark commands, metrics, scope paths, off-limits paths, and constraints with fingerprint tracking to detect configuration drift
27
+ - Added `autoresearch.program.md` support for repo-local playbook overlays that guide session strategy while preserving `autoresearch.md` as source of truth
28
+ - Added pending run artifact tracking and recovery to resume incomplete experiments from `.autoresearch/runs/` directory with run numbers and benchmark logs
29
+ - Added run directory organization with numbered run artifacts, benchmark logs, and optional checks logs for experiment traceability
30
+ - Added segment fingerprinting to detect when benchmark configuration changes between runs and warn about potential incomparability
31
+ - Added support for secondary metrics tracking alongside primary metric with configurable direction (lower/higher is better)
32
+ - Added `getCurrentAutoresearchBranch()` helper to detect and validate existing autoresearch branches for session resumption
33
+ - Added `PendingRunSummary` type to track unlogged run state including parsed metrics, ASI data, and pass/fail status
34
+ - Added hidden next-turn message delivery via `deliverAs: 'nextTurn'` with optional `triggerTurn` to queue context for next LLM call without exposing in editable queue
35
+ - Added `#queueHiddenNextTurnMessage()` and `#promptQueuedHiddenNextTurnMessages()` to AgentSession for autonomous tool reactions
36
+ - Added resume context support in `command-resume.md` template for user-provided guidance when resuming sessions
37
+ - Added current segment snapshot display in autoresearch prompt showing recent runs, baseline metrics, and best results
38
+ - Added pending run indicator in autoresearch prompt to guide users to complete unlogged experiments before starting new benchmarks
39
+ - Added local playbook section in autoresearch prompt when `autoresearch.program.md` exists
40
+ - Added tab replacement in dashboard and tool output rendering to prevent display corruption from shell commands with tabs
41
+ - Added boundary duplication warning when replace_range or replace_line operations include a last inserted line that matches the next surviving line, helping detect off-by-one range errors
42
+ - Added git branch isolation for autoresearch sessions via `ensureAutoresearchBranch()` to safely revert failed experiments
43
+ - Added branch status line to autoresearch initialization and resume prompts showing created or reused branch name
44
+ - Added `Files in Scope`, `Off Limits`, and `Constraints` sections to autoresearch.md template for explicit scope definition
45
+ - Added validation of ASI metadata requirements in `log_experiment` tool, requiring hypothesis for all runs and rollback context for failed runs
46
+ - Added keybinding matcher utilities `matchesAppInterrupt()` and `matchesSelectCancel()` for consistent escape key handling across components
47
+ - Added support for customizable `app.interrupt` and `tui.select.cancel` keybindings in interactive components
48
+ - Added `defaultInactive` property to `ToolDefinition` to allow tools to be registered but excluded from the initial active set, with extension responsibility for activation/deactivation
49
+ - Added dynamic tool activation/deactivation in autoresearch mode via `setActiveTools()` API
50
+ - Added separate initialization and resume workflows for autoresearch with `command-initialize.md` and `command-resume.md` prompts
51
+ - Added intent dialog to prompt users for autoresearch optimization goals when starting fresh
52
+ - Added automatic detection of existing `autoresearch.md` to resume from previous sessions without re-prompting for intent
53
+ - Added autoresearch extension with autonomous experiment loop capabilities
54
+ - Added `init_experiment` tool to initialize and reset autoresearch sessions with configurable metrics
55
+ - Added `log_experiment` tool to record experiment results with metric parsing and confidence tracking
56
+ - Added `run_experiment` tool to execute commands and capture metrics with timeout and crash detection
57
+ - Added autoresearch dashboard controller for displaying experiment results and optimization progress
58
+ - Added support for secondary metrics tracking alongside primary metric
59
+ - Added `ExtensionWidgetContent` and `ExtensionUiComponentFactory` types for flexible widget configuration
60
+ - Added `ExtensionWidgetOptions` interface with `placement` parameter to position widgets above or below editor
61
+ - Added `WidgetPlacement` type supporting 'aboveEditor' and 'belowEditor' placement options
62
+ - Added `hookWidgetContainerAbove` and `hookWidgetContainerBelow` containers to InteractiveMode for separate widget management
63
+ - Added autoresearch mode for autonomous experiment loops with init_experiment, log_experiment, and run_experiment tools
64
+ - Added autoresearch dashboard widget displaying experiment results, metrics, and optimization progress
65
+ - Added support for metric tracking with configurable direction (lower/higher is better) and secondary metrics
66
+ - Added widget placement options to position extensions above or below the editor via `placement` parameter
67
+ - Added `ExtensionWidgetContent` and `ExtensionWidgetOptions` types for flexible widget configuration
68
+ - Added ACP (Agent Client Protocol) mode for headless agent operation via `--mode acp`
69
+ - Added support for Agent Client Protocol SDK integration with session management, MCP server configuration, and streaming communication
70
+ - Added `ensureOnDisk()` method to SessionManager to persist sessions immediately for ACP discovery
71
+
72
+ ### Changed
73
+
74
+ - Changed `isAutoresearchShCommand()` to use proper command-line argument parsing instead of regex, improving accuracy for complex shell invocations
75
+ - Changed autoresearch initialization prompt to display collected tradeoff metrics in the setup summary
76
+ - Changed `command-initialize.md` template to include guidance on preflight requirements, comparability invariants, and marking measurement-critical files as off-limits
77
+ - Changed `command-initialize.md` to instruct users to write or update `autoresearch.program.md` with durable heuristics and repo-specific strategy
78
+ - Changed autoresearch resume guidance to emphasize continuing on the current protected branch rather than switching branches
79
+ - Changed autoresearch prompt to clarify that `autoresearch.md` holds durable conclusions while `autoresearch.ideas.md` is the scratch backlog
80
+ - Changed autoresearch prompt guidance to require stable measurement harness and fixed benchmark inputs unless intentionally starting a new segment
81
+ - Changed autoresearch prompt to recommend keeping equal or near-equal results when they materially simplify implementation
82
+ - Changed `init_experiment` to reset pending run state (checks, duration, ASI, artifact directory) when initializing a new segment
83
+ - Changed `log_experiment` to set `autoResumeArmed` flag after successfully logging a run to enable auto-resume on next agent turn
84
+ - Changed `run_experiment` to set `autoResumeArmed` flag and update dashboard after completing a run
85
+ - Changed auto-resume logic to only prompt when a new pending run exists or when `autoResumeArmed` is explicitly set, preventing duplicate prompts
86
+ - Changed path normalization in contract validation to use `path.posix.normalize()` for consistent path handling
87
+ - Changed autoresearch initialization to collect and validate benchmark command, metric definition, scope paths, off-limits list, and constraints before `init_experiment`
88
+ - Changed `init_experiment` to require exact benchmark command, metric definition, scope, off-limits, and constraints matching collected contract
89
+ - Changed `log_experiment` to record run number, benchmark command, scope paths, off-limits list, constraints, and segment fingerprint with each result
90
+ - Changed `run_experiment` to organize output in numbered run directories with separate benchmark and checks logs for artifact preservation
91
+ - Changed autoresearch dashboard to show pending run indicator when unlogged experiment exists
92
+ - Changed autoresearch resume workflow to detect and offer recovery of pending run artifacts before continuing experiment loop
93
+ - Changed `ExperimentResult` to include `runNumber`, `benchmarkCommand`, `scopePaths`, `offLimits`, `constraints`, and `segmentFingerprint` fields
94
+ - Changed `RunningExperiment` to track `runDirectory` and `runNumber` for artifact organization
95
+ - Changed `AutoresearchRuntime` to include `lastRunArtifactDir`, `lastRunNumber`, `lastRunSummary`, `benchmarkCommand`, `secondaryMetrics`, `scopePaths`, `offLimits`, `constraints`, and `segmentFingerprint`
96
+ - Changed autoresearch prompts to emphasize `autoresearch.md` as source of truth for benchmark, scope, and constraints
97
+ - Changed `command-initialize.md` to display collected setup (benchmark command, metric, direction, scope, off-limits, constraints) before initialization
98
+ - Changed `resume-message.md` to reference pending run artifacts and guide completion of unlogged experiments
99
+ - Changed `sendMessage()` API documentation to clarify `deliverAs: 'nextTurn'` behavior for hidden context delivery
100
+ - Changed `SendMessageHandler` type documentation to explain hidden next-turn message queuing during prompt teardown
101
+ - Changed autoresearch startup to create or reuse a dedicated `autoresearch/...` git branch before enabling the experiment loop
102
+ - Changed autoresearch to refuse startup when unrelated worktree changes would make auto-reverts unsafe
103
+ - Changed autoresearch prompts to emphasize scope and constraints as source of truth for session direction
104
+ - Changed component escape key handling to use keybinding manager for `app.interrupt` and `tui.select.cancel` with fallback to raw Escape matching
105
+ - Updated autoresearch prompt guidance to require explicit files in scope, off-limits paths, and session constraints
106
+ - Changed autoresearch command to use intent-based initialization instead of goal parameter, with user input dialog for new sessions
107
+ - Changed autoresearch startup to create or reuse a dedicated `autoresearch/...` git branch before enabling the experiment loop, and to refuse startup when unrelated worktree changes would make auto-reverts unsafe
108
+ - Changed autoresearch startup to activate experiment tools (`init_experiment`, `run_experiment`, `log_experiment`) only when autoresearch mode is enabled
109
+ - Changed autoresearch shutdown to deactivate experiment tools when mode is disabled or cleared
110
+ - Changed autoresearch session rehydration to dynamically manage experiment tool activation based on session state
111
+ - Changed autoresearch prompts and notes guidance to require explicit files in scope, off-limits paths, and session constraints
112
+ - Refactored hashline edit validation to enforce stricter anchor requirements per operation type
113
+ - Updated edit application logic to handle explicit file-level operations (`append_eof`, `prepend_bof`) separately from anchor-based operations
114
+ - Changed `setWidget` API to accept `ExtensionWidgetOptions` parameter for placement control
115
+ - Changed widget placement logic to manage widgets above and below editor separately
116
+ - Changed hashline edit application to preserve duplicated boundary lines exactly as provided instead of auto-correcting them
117
+ - Updated RPC mode to support widget placement option in `setWidget` requests
118
+ - Changed hashline edit application to preserve duplicated boundary lines exactly as provided instead of auto-correcting them
119
+ - Changed widget API to support placement options and component factories in addition to string arrays
120
+ - Updated extension UI controller to manage widgets above and below the editor separately
121
+ - Updated ask tool rendering to support markdown formatting in questions and option labels
122
+ - Refactored hook input and selector components to render titles as markdown for richer text formatting
123
+ - Changed session collection to include sessions with zero messages, enabling ACP mode to create discoverable sessions immediately
124
+ - Changed session persistence logic to use atomic file rewrite when flushing unflushed sessions to prevent duplication
125
+ - Removed hashline edit autocorrection for duplicated boundary lines; escaped-tab autocorrection remains available for leading `\\t` sequences
126
+
127
+ ### Removed
128
+
129
+ - Removed `command-start.md` prompt template in favor of separate initialize and resume workflows
130
+ - Removed auto-correction of off-by-one range edits that duplicated closing braces or boundary lines
131
+ - Removed `shouldAutocorrect` function and related boundary line deduplication logic from hashline editor
132
+ - Removed auto-correction of off-by-one range edits that duplicated closing braces or boundary lines
133
+
134
+ ### Fixed
135
+
136
+ - Fixed boundary duplication warnings to always display when replacement lines match the next surviving line, even when auto-correction is disabled
137
+ - Fixed secondary metrics validation to properly reject missing configured metrics and new metrics without force flag
138
+ - Fixed ASI data cloning to prevent prototype pollution attacks by filtering reserved property names
139
+ - Fixed autoresearch resume to detect and recover pending run artifacts that were left unlogged from previous sessions
140
+ - Fixed dashboard overlay to display when running experiment even with zero completed results
141
+ - Fixed tab character rendering in dashboard command display and tool output summaries
142
+ - Fixed autoresearch logging to require durable ASI metadata (hypothesis, rollback_reason, next_action_hint) for every run including rollback context for discarded, crashed, and checks-failed experiments
143
+ - Fixed autoresearch logging to require durable ASI metadata for every run, including rollback context for discarded, crashed, and checks-failed experiments
144
+
5
145
  ## [13.14.0] - 2026-03-20
6
146
 
7
147
  ### Added
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-coding-agent",
4
- "version": "13.14.0",
4
+ "version": "13.15.2",
5
5
  "description": "Coding agent CLI with read, bash, edit, write tools and session management",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -40,13 +40,14 @@
40
40
  "test": "bun test"
41
41
  },
42
42
  "dependencies": {
43
+ "@agentclientprotocol/sdk": "0.16.1",
43
44
  "@mozilla/readability": "^0.6",
44
- "@oh-my-pi/omp-stats": "13.14.0",
45
- "@oh-my-pi/pi-agent-core": "13.14.0",
46
- "@oh-my-pi/pi-ai": "13.14.0",
47
- "@oh-my-pi/pi-natives": "13.14.0",
48
- "@oh-my-pi/pi-tui": "13.14.0",
49
- "@oh-my-pi/pi-utils": "13.14.0",
45
+ "@oh-my-pi/omp-stats": "13.15.2",
46
+ "@oh-my-pi/pi-agent-core": "13.15.2",
47
+ "@oh-my-pi/pi-ai": "13.15.2",
48
+ "@oh-my-pi/pi-natives": "13.15.2",
49
+ "@oh-my-pi/pi-tui": "13.15.2",
50
+ "@oh-my-pi/pi-utils": "13.15.2",
50
51
  "@sinclair/typebox": "^0.34",
51
52
  "@xterm/headless": "^6.0",
52
53
  "ajv": "^8.18",
@@ -54,7 +55,8 @@
54
55
  "diff": "^8.0",
55
56
  "handlebars": "^4.7",
56
57
  "linkedom": "^0.18",
57
- "puppeteer": "^24.37"
58
+ "puppeteer": "^24.37",
59
+ "zod": "4.3.6"
58
60
  },
59
61
  "devDependencies": {
60
62
  "@types/bun": "^1.3"
@@ -0,0 +1,34 @@
1
+ Set up autoresearch for this intent:
2
+
3
+ {{intent}}
4
+
5
+ {{branch_status_line}}
6
+
7
+ Collected setup:
8
+
9
+ - benchmark command: `{{benchmark_command}}`
10
+ - primary metric: `{{metric_name}}`
11
+ - metric unit: `{{metric_unit}}`
12
+ - direction: `{{direction}}`
13
+ - tradeoff metrics:
14
+ {{{secondary_metrics_block}}}
15
+ - files in scope:
16
+ {{{scope_paths_block}}}
17
+ - off limits:
18
+ {{{off_limits_block}}}
19
+ - constraints:
20
+ {{{constraints_block}}}
21
+
22
+ Explain briefly what autoresearch will do in this repository, then initialize the workspace.
23
+
24
+ Your first actions:
25
+ - write `autoresearch.md`
26
+ - record the collected benchmark command, primary metric, metric unit, direction, tradeoff metrics, scope, off-limits list, and constraints in `autoresearch.md`
27
+ - add a short preflight section in `autoresearch.md` covering prerequisites, one-time setup, and the comparability invariant that must stay fixed across runs
28
+ - explicitly mark the ground-truth evaluator, fixed datasets, and other measurement-critical files as off-limits or hard constraints when they define the benchmark contract
29
+ - write or update `autoresearch.program.md` when you learn durable heuristics, failure patterns, or repo-specific strategy that future resume turns should inherit
30
+ - define the benchmark entrypoint in `autoresearch.sh`
31
+ - optionally add `autoresearch.checks.sh` if correctness or quality needs a hard gate
32
+ - run `init_experiment` with the exact collected benchmark command, metric definition, scope paths, off-limits list, and constraints
33
+ - run and log the baseline
34
+ - keep iterating until interrupted or until the configured iteration cap is reached
@@ -0,0 +1,17 @@
1
+ Resume autoresearch from the attached notes.
2
+
3
+ @{{autoresearch_md_path}}
4
+
5
+ {{branch_status_line}}
6
+ {{#if has_resume_context}}
7
+
8
+ Additional context from the user:
9
+
10
+ {{resume_context}}
11
+ {{/if}}
12
+
13
+ Use the notes as the source of truth for the current direction, scope, and constraints.
14
+ - inspect recent git history for context
15
+ - inspect `autoresearch.jsonl` if it exists
16
+ - continue the most promising unfinished direction on the current protected branch
17
+ - keep iterating until interrupted or until the configured iteration cap is reached
@@ -0,0 +1,332 @@
1
+ import * as crypto from "node:crypto";
2
+ import * as fs from "node:fs";
3
+ import * as path from "node:path";
4
+ import type { AutoresearchBenchmarkContract, AutoresearchContract, MetricDirection } from "./types";
5
+
6
+ export interface AutoresearchContractLoadResult {
7
+ contract: AutoresearchContract;
8
+ errors: string[];
9
+ path: string;
10
+ }
11
+
12
+ export interface AutoresearchScriptSnapshot {
13
+ benchmarkScript: string;
14
+ benchmarkScriptPath: string;
15
+ checksScript: string | null;
16
+ checksScriptPath: string;
17
+ errors: string[];
18
+ }
19
+
20
+ const HEADING_REGEX = /^##\s+(.+?)\s*$/;
21
+ const LIST_ITEM_REGEX = /^\s*[-*]\s+(.*)$/;
22
+ const KEY_VALUE_REGEX = /^\s*[-*]\s+([^:]+):\s*(.*)$/;
23
+
24
+ export function readAutoresearchContract(workDir: string): AutoresearchContractLoadResult {
25
+ const contractPath = path.join(workDir, "autoresearch.md");
26
+ let content = "";
27
+ try {
28
+ content = fs.readFileSync(contractPath, "utf8");
29
+ } catch {
30
+ return {
31
+ contract: createEmptyAutoresearchContract(),
32
+ errors: [`${contractPath} does not exist. Create it before initializing autoresearch.`],
33
+ path: contractPath,
34
+ };
35
+ }
36
+
37
+ const contract = parseAutoresearchContract(content);
38
+ const errors = validateAutoresearchContract(contract);
39
+ return { contract, errors, path: contractPath };
40
+ }
41
+
42
+ export function parseAutoresearchContract(markdown: string): AutoresearchContract {
43
+ const sections = extractSections(markdown);
44
+ return {
45
+ benchmark: parseBenchmarkSection(sections.get("benchmark") ?? ""),
46
+ scopePaths: parseListSection(sections.get("files in scope") ?? "", normalizeContractPathSpec),
47
+ offLimits: parseListSection(sections.get("off limits") ?? "", normalizeContractPathSpec),
48
+ constraints: parseListSection(sections.get("constraints") ?? ""),
49
+ };
50
+ }
51
+
52
+ export function validateAutoresearchContract(contract: AutoresearchContract): string[] {
53
+ const errors: string[] = [];
54
+ if (!contract.benchmark.command) {
55
+ errors.push("Benchmark.command is required in autoresearch.md.");
56
+ }
57
+ if (!contract.benchmark.primaryMetric) {
58
+ errors.push("Benchmark.primary metric is required in autoresearch.md.");
59
+ }
60
+ if (!contract.benchmark.direction) {
61
+ errors.push("Benchmark.direction must be `lower` or `higher` in autoresearch.md.");
62
+ }
63
+ if (contract.scopePaths.length === 0) {
64
+ errors.push("Files in Scope must contain at least one path in autoresearch.md.");
65
+ }
66
+ for (const scopePath of contract.scopePaths) {
67
+ if (isUnsafeContractPathSpec(scopePath)) {
68
+ errors.push(`Files in Scope contains an invalid path: ${scopePath}`);
69
+ }
70
+ }
71
+ for (const offLimitsPath of contract.offLimits) {
72
+ if (isUnsafeContractPathSpec(offLimitsPath)) {
73
+ errors.push(`Off Limits contains an invalid path: ${offLimitsPath}`);
74
+ }
75
+ }
76
+ return errors;
77
+ }
78
+
79
+ export function buildAutoresearchSegmentFingerprint(
80
+ contract: AutoresearchContract,
81
+ scripts: {
82
+ benchmarkScript: string;
83
+ checksScript: string | null;
84
+ },
85
+ ): string {
86
+ const payload = {
87
+ benchmark: contract.benchmark,
88
+ scopePaths: contract.scopePaths,
89
+ offLimits: contract.offLimits,
90
+ constraints: contract.constraints,
91
+ scripts,
92
+ };
93
+ return crypto.createHash("sha256").update(JSON.stringify(payload)).digest("hex");
94
+ }
95
+
96
+ export function getAutoresearchFingerprintMismatchError(
97
+ stateFingerprint: string | null,
98
+ workDir: string,
99
+ ): string | null {
100
+ if (!stateFingerprint) {
101
+ return "The current segment has no fingerprint metadata. Re-run init_experiment before continuing.";
102
+ }
103
+
104
+ const contractResult = readAutoresearchContract(workDir);
105
+ const scriptSnapshot = loadAutoresearchScriptSnapshot(workDir);
106
+ const errors = [...contractResult.errors, ...scriptSnapshot.errors];
107
+ if (errors.length > 0) {
108
+ return `${errors.join(" ")} Re-run init_experiment after fixing the workspace contract.`;
109
+ }
110
+
111
+ const currentFingerprint = buildAutoresearchSegmentFingerprint(contractResult.contract, {
112
+ benchmarkScript: scriptSnapshot.benchmarkScript,
113
+ checksScript: scriptSnapshot.checksScript,
114
+ });
115
+ if (currentFingerprint === stateFingerprint) {
116
+ return null;
117
+ }
118
+
119
+ return "autoresearch.md, autoresearch.sh, or autoresearch.checks.sh changed since the current segment was initialized. Re-run init_experiment before continuing.";
120
+ }
121
+
122
+ export function loadAutoresearchScriptSnapshot(workDir: string): AutoresearchScriptSnapshot {
123
+ const benchmarkScriptPath = path.join(workDir, "autoresearch.sh");
124
+ const checksScriptPath = path.join(workDir, "autoresearch.checks.sh");
125
+ const errors: string[] = [];
126
+
127
+ let benchmarkScript = "";
128
+ try {
129
+ benchmarkScript = fs.readFileSync(benchmarkScriptPath, "utf8");
130
+ } catch {
131
+ errors.push(`${benchmarkScriptPath} does not exist. Create it before initializing autoresearch.`);
132
+ }
133
+
134
+ let checksScript: string | null = null;
135
+ try {
136
+ checksScript = fs.readFileSync(checksScriptPath, "utf8");
137
+ } catch {
138
+ checksScript = null;
139
+ }
140
+
141
+ return {
142
+ benchmarkScript,
143
+ benchmarkScriptPath,
144
+ checksScript,
145
+ checksScriptPath,
146
+ errors,
147
+ };
148
+ }
149
+
150
+ export function normalizeAutoresearchList(values: readonly string[]): string[] {
151
+ const normalized: string[] = [];
152
+ const seen = new Set<string>();
153
+ for (const value of values) {
154
+ const trimmed = value.trim();
155
+ if (trimmed.length === 0) continue;
156
+ if (seen.has(trimmed)) continue;
157
+ seen.add(trimmed);
158
+ normalized.push(trimmed);
159
+ }
160
+ return normalized;
161
+ }
162
+
163
+ export function normalizeContractPathSpec(value: string): string {
164
+ const normalized = path.posix.normalize(value.trim().replaceAll("\\", "/"));
165
+ if (normalized === "." || normalized === "./") return ".";
166
+ return normalized.replace(/^\.\/+/, "").replace(/\/+$/, "");
167
+ }
168
+
169
+ export function pathMatchesContractPath(pathValue: string, specValue: string): boolean {
170
+ const normalizedPath = normalizeContractPathSpec(pathValue);
171
+ const normalizedSpec = normalizeContractPathSpec(specValue);
172
+ if (normalizedSpec === ".") return true;
173
+ return normalizedPath === normalizedSpec || normalizedPath.startsWith(`${normalizedSpec}/`);
174
+ }
175
+
176
+ export function contractListsEqual(left: readonly string[], right: readonly string[]): boolean {
177
+ const normalizedLeft = normalizeAutoresearchList(left);
178
+ const normalizedRight = normalizeAutoresearchList(right);
179
+ if (normalizedLeft.length !== normalizedRight.length) return false;
180
+ return normalizedLeft.every((value, index) => value === normalizedRight[index]);
181
+ }
182
+
183
+ export function contractPathListsEqual(left: readonly string[], right: readonly string[]): boolean {
184
+ const normalizedLeft = normalizeContractPathList(left);
185
+ const normalizedRight = normalizeContractPathList(right);
186
+ if (normalizedLeft.length !== normalizedRight.length) return false;
187
+ return normalizedLeft.every((value, index) => value === normalizedRight[index]);
188
+ }
189
+
190
+ function createEmptyAutoresearchContract(): AutoresearchContract {
191
+ return {
192
+ benchmark: {
193
+ command: null,
194
+ primaryMetric: null,
195
+ metricUnit: "",
196
+ direction: null,
197
+ secondaryMetrics: [],
198
+ },
199
+ scopePaths: [],
200
+ offLimits: [],
201
+ constraints: [],
202
+ };
203
+ }
204
+
205
+ function normalizeContractPathList(values: readonly string[]): string[] {
206
+ return normalizeAutoresearchList(values.map(normalizeContractPathSpec)).sort((left, right) =>
207
+ left.localeCompare(right),
208
+ );
209
+ }
210
+
211
+ function extractSections(markdown: string): Map<string, string> {
212
+ const sections = new Map<string, string>();
213
+ const lines = markdown.split("\n");
214
+ let currentHeading: string | null = null;
215
+ let currentLines: string[] = [];
216
+
217
+ for (const line of lines) {
218
+ const headingMatch = line.match(HEADING_REGEX);
219
+ if (headingMatch) {
220
+ if (currentHeading) {
221
+ sections.set(currentHeading, currentLines.join("\n").trim());
222
+ }
223
+ currentHeading = headingMatch[1]?.trim().toLowerCase() ?? null;
224
+ currentLines = [];
225
+ continue;
226
+ }
227
+ if (currentHeading) {
228
+ currentLines.push(line);
229
+ }
230
+ }
231
+
232
+ if (currentHeading) {
233
+ sections.set(currentHeading, currentLines.join("\n").trim());
234
+ }
235
+ return sections;
236
+ }
237
+
238
+ function parseBenchmarkSection(section: string): AutoresearchBenchmarkContract {
239
+ const entries = new Map<string, string>();
240
+ const lines = section.split("\n");
241
+ for (let index = 0; index < lines.length; index += 1) {
242
+ const rawLine = lines[index] ?? "";
243
+ const match = rawLine.match(KEY_VALUE_REGEX);
244
+ if (!match) continue;
245
+ const key = normalizeKey(match[1] ?? "");
246
+ let value = (match[2] ?? "").trim();
247
+ if (key === "secondarymetrics") {
248
+ const nestedItems: string[] = [];
249
+ for (let nestedIndex = index + 1; nestedIndex < lines.length; nestedIndex += 1) {
250
+ const nestedLine = lines[nestedIndex] ?? "";
251
+ if (nestedLine.match(KEY_VALUE_REGEX)) break;
252
+ const nestedMatch = nestedLine.match(/^\s{2,}[-*]\s+(.*)$/);
253
+ if (!nestedMatch) {
254
+ if (nestedLine.trim().length > 0) break;
255
+ continue;
256
+ }
257
+ nestedItems.push((nestedMatch[1] ?? "").trim());
258
+ index = nestedIndex;
259
+ }
260
+ if (nestedItems.length > 0) {
261
+ value = [value, ...nestedItems].filter(Boolean).join(", ");
262
+ }
263
+ }
264
+ entries.set(key, value);
265
+ }
266
+
267
+ const direction = parseDirection(entries.get("direction"));
268
+ return {
269
+ command: readNullableEntry(entries.get("command")),
270
+ primaryMetric: readNullableEntry(entries.get("primarymetric")),
271
+ metricUnit: entries.get("metricunit")?.trim() ?? "",
272
+ direction,
273
+ secondaryMetrics: parseSecondaryMetrics(entries.get("secondarymetrics")),
274
+ };
275
+ }
276
+
277
+ function parseListSection(section: string, normalizeItem?: (value: string) => string): string[] {
278
+ const items: string[] = [];
279
+ let activeItem: string | null = null;
280
+ for (const rawLine of section.split("\n")) {
281
+ const line = rawLine.trimEnd();
282
+ if (line.trim().length === 0) continue;
283
+ const match = rawLine.match(LIST_ITEM_REGEX);
284
+ if (match) {
285
+ if (activeItem) items.push(activeItem);
286
+ activeItem = (match[1] ?? "").trim();
287
+ continue;
288
+ }
289
+ if (activeItem && /^\s{2,}\S/.test(rawLine)) {
290
+ activeItem = `${activeItem} ${line.trim()}`;
291
+ continue;
292
+ }
293
+ if (activeItem) {
294
+ items.push(activeItem);
295
+ activeItem = null;
296
+ }
297
+ items.push(line.trim());
298
+ }
299
+ if (activeItem) {
300
+ items.push(activeItem);
301
+ }
302
+ const normalizedItems = normalizeAutoresearchList(items);
303
+ return normalizeItem ? normalizedItems.map(normalizeItem) : normalizedItems;
304
+ }
305
+
306
+ function normalizeKey(value: string): string {
307
+ return value.toLowerCase().replace(/[^a-z0-9]+/g, "");
308
+ }
309
+
310
+ function parseDirection(value: string | undefined): MetricDirection | null {
311
+ if (value === "lower" || value === "higher") return value;
312
+ return null;
313
+ }
314
+
315
+ function readNullableEntry(value: string | undefined): string | null {
316
+ const trimmed = value?.trim() ?? "";
317
+ return trimmed.length > 0 ? trimmed : null;
318
+ }
319
+
320
+ function parseSecondaryMetrics(value: string | undefined): string[] {
321
+ if (!value) return [];
322
+ return normalizeAutoresearchList(
323
+ value
324
+ .split(",")
325
+ .map(entry => entry.trim())
326
+ .filter(Boolean),
327
+ );
328
+ }
329
+
330
+ function isUnsafeContractPathSpec(value: string): boolean {
331
+ return path.posix.isAbsolute(value) || value === ".." || value.startsWith("../");
332
+ }