@oh-my-pi/pi-coding-agent 13.14.2 → 13.15.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/CHANGELOG.md +150 -0
  2. package/package.json +10 -8
  3. package/src/autoresearch/command-initialize.md +34 -0
  4. package/src/autoresearch/command-resume.md +17 -0
  5. package/src/autoresearch/contract.ts +332 -0
  6. package/src/autoresearch/dashboard.ts +447 -0
  7. package/src/autoresearch/git.ts +243 -0
  8. package/src/autoresearch/helpers.ts +458 -0
  9. package/src/autoresearch/index.ts +693 -0
  10. package/src/autoresearch/prompt.md +227 -0
  11. package/src/autoresearch/resume-message.md +16 -0
  12. package/src/autoresearch/state.ts +386 -0
  13. package/src/autoresearch/tools/init-experiment.ts +310 -0
  14. package/src/autoresearch/tools/log-experiment.ts +833 -0
  15. package/src/autoresearch/tools/run-experiment.ts +640 -0
  16. package/src/autoresearch/types.ts +218 -0
  17. package/src/cli/args.ts +8 -2
  18. package/src/cli/initial-message.ts +58 -0
  19. package/src/config/keybindings.ts +423 -212
  20. package/src/config/model-registry.ts +1 -0
  21. package/src/config/model-resolver.ts +57 -9
  22. package/src/config/settings-schema.ts +38 -10
  23. package/src/config/settings.ts +1 -4
  24. package/src/export/html/template.css +43 -13
  25. package/src/export/html/template.generated.ts +1 -1
  26. package/src/export/html/template.html +1 -0
  27. package/src/export/html/template.js +107 -0
  28. package/src/extensibility/extensions/types.ts +31 -8
  29. package/src/internal-urls/docs-index.generated.ts +1 -1
  30. package/src/lsp/index.ts +1 -1
  31. package/src/main.ts +44 -44
  32. package/src/mcp/oauth-discovery.ts +1 -1
  33. package/src/modes/acp/acp-agent.ts +957 -0
  34. package/src/modes/acp/acp-event-mapper.ts +531 -0
  35. package/src/modes/acp/acp-mode.ts +13 -0
  36. package/src/modes/acp/index.ts +2 -0
  37. package/src/modes/components/agent-dashboard.ts +5 -4
  38. package/src/modes/components/custom-editor.ts +53 -51
  39. package/src/modes/components/extensions/extension-dashboard.ts +2 -1
  40. package/src/modes/components/history-search.ts +2 -1
  41. package/src/modes/components/hook-editor.ts +2 -1
  42. package/src/modes/components/hook-input.ts +8 -7
  43. package/src/modes/components/hook-selector.ts +15 -10
  44. package/src/modes/components/keybinding-hints.ts +9 -9
  45. package/src/modes/components/login-dialog.ts +3 -3
  46. package/src/modes/components/mcp-add-wizard.ts +2 -1
  47. package/src/modes/components/model-selector.ts +14 -3
  48. package/src/modes/components/oauth-selector.ts +2 -1
  49. package/src/modes/components/session-selector.ts +2 -1
  50. package/src/modes/components/settings-selector.ts +2 -1
  51. package/src/modes/components/status-line-segment-editor.ts +2 -1
  52. package/src/modes/components/tree-selector.ts +3 -2
  53. package/src/modes/components/user-message-selector.ts +3 -8
  54. package/src/modes/components/user-message.ts +16 -0
  55. package/src/modes/controllers/extension-ui-controller.ts +89 -4
  56. package/src/modes/controllers/input-controller.ts +48 -29
  57. package/src/modes/controllers/mcp-command-controller.ts +1 -1
  58. package/src/modes/index.ts +1 -0
  59. package/src/modes/interactive-mode.ts +17 -5
  60. package/src/modes/print-mode.ts +1 -1
  61. package/src/modes/prompt-action-autocomplete.ts +7 -7
  62. package/src/modes/rpc/rpc-mode.ts +7 -2
  63. package/src/modes/rpc/rpc-types.ts +1 -0
  64. package/src/modes/theme/theme.ts +53 -44
  65. package/src/modes/types.ts +9 -2
  66. package/src/modes/utils/hotkeys-markdown.ts +20 -20
  67. package/src/modes/utils/keybinding-matchers.ts +21 -0
  68. package/src/modes/utils/ui-helpers.ts +1 -1
  69. package/src/patch/hashline.ts +139 -127
  70. package/src/patch/index.ts +77 -59
  71. package/src/patch/shared.ts +19 -11
  72. package/src/prompts/tools/hashline.md +43 -116
  73. package/src/sdk.ts +34 -17
  74. package/src/session/agent-session.ts +436 -86
  75. package/src/session/messages.ts +23 -0
  76. package/src/session/session-manager.ts +97 -31
  77. package/src/tools/ask.ts +56 -30
  78. package/src/tools/bash-interceptor.ts +1 -39
  79. package/src/tools/bash-skill-urls.ts +1 -1
  80. package/src/tools/browser.ts +1 -1
  81. package/src/tools/gemini-image.ts +1 -1
  82. package/src/tools/resolve.ts +1 -1
  83. package/src/utils/child-process.ts +88 -0
  84. package/src/utils/image-input.ts +11 -1
  85. package/src/web/search/providers/codex.ts +10 -3
package/CHANGELOG.md CHANGED
@@ -2,6 +2,156 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [13.15.3] - 2026-03-26
6
+
7
+ ### Added
8
+
9
+ - Added configurable `app.model.selectTemporary` keybinding for temporary model selection.
10
+
11
+ ## [13.15.0] - 2026-03-23
12
+ ### Breaking Changes
13
+
14
+ - Changed hashline edit schema from flat `op`/`pos`/`end`/`lines` fields to structured `loc`/`content` format with location-specific objects
15
+ - Renamed hashline edit operations: `replace_line` → `{ line: anchor }`, `replace_range` → `{ block: { pos, end } }`, `append_at` → `{ append: anchor }`, `prepend_at` → `{ prepend: anchor }`, `append_file` → `"append"`, `prepend_file` → `"prepend"`
16
+ - Changed `lines` parameter to `content` in hashline edit entries
17
+ - Renamed hashline edit operation types: `append` → `append_at`, `prepend` → `prepend_at`, `append_eof` → `append_file`, `prepend_bof` → `prepend_file`
18
+ - Changed hashline edit operation types from `replace` (with optional `end`) to explicit `replace_line` and `replace_range` operations
19
+ - Added required `append_eof` and `prepend_bof` operations for file-level edits; `append` and `prepend` now require an anchor position
20
+ - Made `pos` parameter required for `replace_line`, `append`, and `prepend` operations; `append_eof` and `prepend_bof` no longer accept anchors
21
+
22
+ ### Added
23
+
24
+ - Added prompt for tradeoff metrics during autoresearch setup to collect secondary metrics alongside primary metric
25
+ - Added validation of contract path specifications to reject absolute paths and parent directory references
26
+ - Added stricter benchmark command validation in `isAutoresearchShCommand()` to reject chained commands, pipes, and redirects
27
+ - Added protection against prototype pollution in ASI data and metric cloning by filtering `__proto__`, `constructor`, and `prototype` keys
28
+ - Added `autoResumeArmed` flag to track when autoresearch should automatically resume pending runs
29
+ - Added `lastAutoResumePendingRunNumber` to prevent duplicate auto-resume prompts for the same pending run
30
+ - Added `git clean -X` invocation during failed experiment rollback to remove ignored build artifacts
31
+ - Added validation to reject `init_experiment` when a previous run is still pending and unlogged
32
+ - Added autoresearch contract system for validating benchmark commands, metrics, scope paths, off-limits paths, and constraints with fingerprint tracking to detect configuration drift
33
+ - Added `autoresearch.program.md` support for repo-local playbook overlays that guide session strategy while preserving `autoresearch.md` as source of truth
34
+ - Added pending run artifact tracking and recovery to resume incomplete experiments from `.autoresearch/runs/` directory with run numbers and benchmark logs
35
+ - Added run directory organization with numbered run artifacts, benchmark logs, and optional checks logs for experiment traceability
36
+ - Added segment fingerprinting to detect when benchmark configuration changes between runs and warn about potential incomparability
37
+ - Added support for secondary metrics tracking alongside primary metric with configurable direction (lower/higher is better)
38
+ - Added `getCurrentAutoresearchBranch()` helper to detect and validate existing autoresearch branches for session resumption
39
+ - Added `PendingRunSummary` type to track unlogged run state including parsed metrics, ASI data, and pass/fail status
40
+ - Added hidden next-turn message delivery via `deliverAs: 'nextTurn'` with optional `triggerTurn` to queue context for next LLM call without exposing in editable queue
41
+ - Added `#queueHiddenNextTurnMessage()` and `#promptQueuedHiddenNextTurnMessages()` to AgentSession for autonomous tool reactions
42
+ - Added resume context support in `command-resume.md` template for user-provided guidance when resuming sessions
43
+ - Added current segment snapshot display in autoresearch prompt showing recent runs, baseline metrics, and best results
44
+ - Added pending run indicator in autoresearch prompt to guide users to complete unlogged experiments before starting new benchmarks
45
+ - Added local playbook section in autoresearch prompt when `autoresearch.program.md` exists
46
+ - Added tab replacement in dashboard and tool output rendering to prevent display corruption from shell commands with tabs
47
+ - Added boundary duplication warning when replace_range or replace_line operations include a last inserted line that matches the next surviving line, helping detect off-by-one range errors
48
+ - Added git branch isolation for autoresearch sessions via `ensureAutoresearchBranch()` to safely revert failed experiments
49
+ - Added branch status line to autoresearch initialization and resume prompts showing created or reused branch name
50
+ - Added `Files in Scope`, `Off Limits`, and `Constraints` sections to autoresearch.md template for explicit scope definition
51
+ - Added validation of ASI metadata requirements in `log_experiment` tool, requiring hypothesis for all runs and rollback context for failed runs
52
+ - Added keybinding matcher utilities `matchesAppInterrupt()` and `matchesSelectCancel()` for consistent escape key handling across components
53
+ - Added support for customizable `app.interrupt` and `tui.select.cancel` keybindings in interactive components
54
+ - Added `defaultInactive` property to `ToolDefinition` to allow tools to be registered but excluded from the initial active set, with extension responsibility for activation/deactivation
55
+ - Added dynamic tool activation/deactivation in autoresearch mode via `setActiveTools()` API
56
+ - Added separate initialization and resume workflows for autoresearch with `command-initialize.md` and `command-resume.md` prompts
57
+ - Added intent dialog to prompt users for autoresearch optimization goals when starting fresh
58
+ - Added automatic detection of existing `autoresearch.md` to resume from previous sessions without re-prompting for intent
59
+ - Added autoresearch extension with autonomous experiment loop capabilities
60
+ - Added `init_experiment` tool to initialize and reset autoresearch sessions with configurable metrics
61
+ - Added `log_experiment` tool to record experiment results with metric parsing and confidence tracking
62
+ - Added `run_experiment` tool to execute commands and capture metrics with timeout and crash detection
63
+ - Added autoresearch dashboard controller for displaying experiment results and optimization progress
64
+ - Added support for secondary metrics tracking alongside primary metric
65
+ - Added `ExtensionWidgetContent` and `ExtensionUiComponentFactory` types for flexible widget configuration
66
+ - Added `ExtensionWidgetOptions` interface with `placement` parameter to position widgets above or below editor
67
+ - Added `WidgetPlacement` type supporting 'aboveEditor' and 'belowEditor' placement options
68
+ - Added `hookWidgetContainerAbove` and `hookWidgetContainerBelow` containers to InteractiveMode for separate widget management
69
+ - Added autoresearch mode for autonomous experiment loops with init_experiment, log_experiment, and run_experiment tools
70
+ - Added autoresearch dashboard widget displaying experiment results, metrics, and optimization progress
71
+ - Added support for metric tracking with configurable direction (lower/higher is better) and secondary metrics
72
+ - Added widget placement options to position extensions above or below the editor via `placement` parameter
73
+ - Added `ExtensionWidgetContent` and `ExtensionWidgetOptions` types for flexible widget configuration
74
+ - Added ACP (Agent Client Protocol) mode for headless agent operation via `--mode acp`
75
+ - Added support for Agent Client Protocol SDK integration with session management, MCP server configuration, and streaming communication
76
+ - Added `ensureOnDisk()` method to SessionManager to persist sessions immediately for ACP discovery
77
+
78
+ ### Changed
79
+
80
+ - Changed `isAutoresearchShCommand()` to use proper command-line argument parsing instead of regex, improving accuracy for complex shell invocations
81
+ - Changed autoresearch initialization prompt to display collected tradeoff metrics in the setup summary
82
+ - Changed `command-initialize.md` template to include guidance on preflight requirements, comparability invariants, and marking measurement-critical files as off-limits
83
+ - Changed `command-initialize.md` to instruct users to write or update `autoresearch.program.md` with durable heuristics and repo-specific strategy
84
+ - Changed autoresearch resume guidance to emphasize continuing on the current protected branch rather than switching branches
85
+ - Changed autoresearch prompt to clarify that `autoresearch.md` holds durable conclusions while `autoresearch.ideas.md` is the scratch backlog
86
+ - Changed autoresearch prompt guidance to require stable measurement harness and fixed benchmark inputs unless intentionally starting a new segment
87
+ - Changed autoresearch prompt to recommend keeping equal or near-equal results when they materially simplify implementation
88
+ - Changed `init_experiment` to reset pending run state (checks, duration, ASI, artifact directory) when initializing a new segment
89
+ - Changed `log_experiment` to set `autoResumeArmed` flag after successfully logging a run to enable auto-resume on next agent turn
90
+ - Changed `run_experiment` to set `autoResumeArmed` flag and update dashboard after completing a run
91
+ - Changed auto-resume logic to only prompt when a new pending run exists or when `autoResumeArmed` is explicitly set, preventing duplicate prompts
92
+ - Changed path normalization in contract validation to use `path.posix.normalize()` for consistent path handling
93
+ - Changed autoresearch initialization to collect and validate benchmark command, metric definition, scope paths, off-limits list, and constraints before `init_experiment`
94
+ - Changed `init_experiment` to require exact benchmark command, metric definition, scope, off-limits, and constraints matching collected contract
95
+ - Changed `log_experiment` to record run number, benchmark command, scope paths, off-limits list, constraints, and segment fingerprint with each result
96
+ - Changed `run_experiment` to organize output in numbered run directories with separate benchmark and checks logs for artifact preservation
97
+ - Changed autoresearch dashboard to show pending run indicator when unlogged experiment exists
98
+ - Changed autoresearch resume workflow to detect and offer recovery of pending run artifacts before continuing experiment loop
99
+ - Changed `ExperimentResult` to include `runNumber`, `benchmarkCommand`, `scopePaths`, `offLimits`, `constraints`, and `segmentFingerprint` fields
100
+ - Changed `RunningExperiment` to track `runDirectory` and `runNumber` for artifact organization
101
+ - Changed `AutoresearchRuntime` to include `lastRunArtifactDir`, `lastRunNumber`, `lastRunSummary`, `benchmarkCommand`, `secondaryMetrics`, `scopePaths`, `offLimits`, `constraints`, and `segmentFingerprint`
102
+ - Changed autoresearch prompts to emphasize `autoresearch.md` as source of truth for benchmark, scope, and constraints
103
+ - Changed `command-initialize.md` to display collected setup (benchmark command, metric, direction, scope, off-limits, constraints) before initialization
104
+ - Changed `resume-message.md` to reference pending run artifacts and guide completion of unlogged experiments
105
+ - Changed `sendMessage()` API documentation to clarify `deliverAs: 'nextTurn'` behavior for hidden context delivery
106
+ - Changed `SendMessageHandler` type documentation to explain hidden next-turn message queuing during prompt teardown
107
+ - Changed autoresearch startup to create or reuse a dedicated `autoresearch/...` git branch before enabling the experiment loop
108
+ - Changed autoresearch to refuse startup when unrelated worktree changes would make auto-reverts unsafe
109
+ - Changed autoresearch prompts to emphasize scope and constraints as source of truth for session direction
110
+ - Changed component escape key handling to use keybinding manager for `app.interrupt` and `tui.select.cancel` with fallback to raw Escape matching
111
+ - Updated autoresearch prompt guidance to require explicit files in scope, off-limits paths, and session constraints
112
+ - Changed autoresearch command to use intent-based initialization instead of goal parameter, with user input dialog for new sessions
113
+ - Changed autoresearch startup to create or reuse a dedicated `autoresearch/...` git branch before enabling the experiment loop, and to refuse startup when unrelated worktree changes would make auto-reverts unsafe
114
+ - Changed autoresearch startup to activate experiment tools (`init_experiment`, `run_experiment`, `log_experiment`) only when autoresearch mode is enabled
115
+ - Changed autoresearch shutdown to deactivate experiment tools when mode is disabled or cleared
116
+ - Changed autoresearch session rehydration to dynamically manage experiment tool activation based on session state
117
+ - Changed autoresearch prompts and notes guidance to require explicit files in scope, off-limits paths, and session constraints
118
+ - Refactored hashline edit validation to enforce stricter anchor requirements per operation type
119
+ - Updated edit application logic to handle explicit file-level operations (`append_eof`, `prepend_bof`) separately from anchor-based operations
120
+ - Changed `setWidget` API to accept `ExtensionWidgetOptions` parameter for placement control
121
+ - Changed widget placement logic to manage widgets above and below editor separately
122
+ - Changed hashline edit application to preserve duplicated boundary lines exactly as provided instead of auto-correcting them
123
+ - Updated RPC mode to support widget placement option in `setWidget` requests
124
+ - Changed hashline edit application to preserve duplicated boundary lines exactly as provided instead of auto-correcting them
125
+ - Changed widget API to support placement options and component factories in addition to string arrays
126
+ - Updated extension UI controller to manage widgets above and below the editor separately
127
+ - Updated ask tool rendering to support markdown formatting in questions and option labels
128
+ - Refactored hook input and selector components to render titles as markdown for richer text formatting
129
+ - Changed session collection to include sessions with zero messages, enabling ACP mode to create discoverable sessions immediately
130
+ - Changed session persistence logic to use atomic file rewrite when flushing unflushed sessions to prevent duplication
131
+ - Removed hashline edit autocorrection for duplicated boundary lines; escaped-tab autocorrection remains available for leading `\\t` sequences
132
+
133
+ ### Removed
134
+
135
+ - Removed `command-start.md` prompt template in favor of separate initialize and resume workflows
136
+ - Removed auto-correction of off-by-one range edits that duplicated closing braces or boundary lines
137
+ - Removed `shouldAutocorrect` function and related boundary line deduplication logic from hashline editor
138
+ - Removed auto-correction of off-by-one range edits that duplicated closing braces or boundary lines
139
+
140
+ ### Fixed
141
+
142
+ - Fixed boundary duplication warnings to always display when replacement lines match the next surviving line, even when auto-correction is disabled
143
+ - Fixed secondary metrics validation to properly reject missing configured metrics and new metrics without force flag
144
+ - Fixed ASI data cloning to prevent prototype pollution attacks by filtering reserved property names
145
+ - Fixed autoresearch resume to detect and recover pending run artifacts that were left unlogged from previous sessions
146
+ - Fixed dashboard overlay to display when running experiment even with zero completed results
147
+ - Fixed tab character rendering in dashboard command display and tool output summaries
148
+ - Fixed autoresearch logging to require durable ASI metadata (hypothesis, rollback_reason, next_action_hint) for every run including rollback context for discarded, crashed, and checks-failed experiments
149
+ - Fixed autoresearch logging to require durable ASI metadata for every run, including rollback context for discarded, crashed, and checks-failed experiments
150
+
151
+
152
+ ### Fixed
153
+
154
+ - Fixed resumed and session-switched GitHub Copilot/OpenAI Responses conversations replaying stale assistant native history from older saved sessions by sanitizing persisted assistant replay metadata on rehydration and resetting provider session state across live session boundaries ([#505](https://github.com/can1357/oh-my-pi/issues/505))
5
155
  ## [13.14.0] - 2026-03-20
6
156
 
7
157
  ### Added
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-coding-agent",
4
- "version": "13.14.2",
4
+ "version": "13.15.3",
5
5
  "description": "Coding agent CLI with read, bash, edit, write tools and session management",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -40,13 +40,14 @@
40
40
  "test": "bun test"
41
41
  },
42
42
  "dependencies": {
43
+ "@agentclientprotocol/sdk": "0.16.1",
43
44
  "@mozilla/readability": "^0.6",
44
- "@oh-my-pi/omp-stats": "13.14.2",
45
- "@oh-my-pi/pi-agent-core": "13.14.2",
46
- "@oh-my-pi/pi-ai": "13.14.2",
47
- "@oh-my-pi/pi-natives": "13.14.2",
48
- "@oh-my-pi/pi-tui": "13.14.2",
49
- "@oh-my-pi/pi-utils": "13.14.2",
45
+ "@oh-my-pi/omp-stats": "13.15.3",
46
+ "@oh-my-pi/pi-agent-core": "13.15.3",
47
+ "@oh-my-pi/pi-ai": "13.15.3",
48
+ "@oh-my-pi/pi-natives": "13.15.3",
49
+ "@oh-my-pi/pi-tui": "13.15.3",
50
+ "@oh-my-pi/pi-utils": "13.15.3",
50
51
  "@sinclair/typebox": "^0.34",
51
52
  "@xterm/headless": "^6.0",
52
53
  "ajv": "^8.18",
@@ -54,7 +55,8 @@
54
55
  "diff": "^8.0",
55
56
  "handlebars": "^4.7",
56
57
  "linkedom": "^0.18",
57
- "puppeteer": "^24.37"
58
+ "puppeteer": "^24.37",
59
+ "zod": "4.3.6"
58
60
  },
59
61
  "devDependencies": {
60
62
  "@types/bun": "^1.3"
@@ -0,0 +1,34 @@
1
+ Set up autoresearch for this intent:
2
+
3
+ {{intent}}
4
+
5
+ {{branch_status_line}}
6
+
7
+ Collected setup:
8
+
9
+ - benchmark command: `{{benchmark_command}}`
10
+ - primary metric: `{{metric_name}}`
11
+ - metric unit: `{{metric_unit}}`
12
+ - direction: `{{direction}}`
13
+ - tradeoff metrics:
14
+ {{{secondary_metrics_block}}}
15
+ - files in scope:
16
+ {{{scope_paths_block}}}
17
+ - off limits:
18
+ {{{off_limits_block}}}
19
+ - constraints:
20
+ {{{constraints_block}}}
21
+
22
+ Explain briefly what autoresearch will do in this repository, then initialize the workspace.
23
+
24
+ Your first actions:
25
+ - write `autoresearch.md`
26
+ - record the collected benchmark command, primary metric, metric unit, direction, tradeoff metrics, scope, off-limits list, and constraints in `autoresearch.md`
27
+ - add a short preflight section in `autoresearch.md` covering prerequisites, one-time setup, and the comparability invariant that must stay fixed across runs
28
+ - explicitly mark the ground-truth evaluator, fixed datasets, and other measurement-critical files as off-limits or hard constraints when they define the benchmark contract
29
+ - write or update `autoresearch.program.md` when you learn durable heuristics, failure patterns, or repo-specific strategy that future resume turns should inherit
30
+ - define the benchmark entrypoint in `autoresearch.sh`
31
+ - optionally add `autoresearch.checks.sh` if correctness or quality needs a hard gate
32
+ - run `init_experiment` with the exact collected benchmark command, metric definition, scope paths, off-limits list, and constraints
33
+ - run and log the baseline
34
+ - keep iterating until interrupted or until the configured iteration cap is reached
@@ -0,0 +1,17 @@
1
+ Resume autoresearch from the attached notes.
2
+
3
+ @{{autoresearch_md_path}}
4
+
5
+ {{branch_status_line}}
6
+ {{#if has_resume_context}}
7
+
8
+ Additional context from the user:
9
+
10
+ {{resume_context}}
11
+ {{/if}}
12
+
13
+ Use the notes as the source of truth for the current direction, scope, and constraints.
14
+ - inspect recent git history for context
15
+ - inspect `autoresearch.jsonl` if it exists
16
+ - continue the most promising unfinished direction on the current protected branch
17
+ - keep iterating until interrupted or until the configured iteration cap is reached
@@ -0,0 +1,332 @@
1
+ import * as crypto from "node:crypto";
2
+ import * as fs from "node:fs";
3
+ import * as path from "node:path";
4
+ import type { AutoresearchBenchmarkContract, AutoresearchContract, MetricDirection } from "./types";
5
+
6
+ export interface AutoresearchContractLoadResult {
7
+ contract: AutoresearchContract;
8
+ errors: string[];
9
+ path: string;
10
+ }
11
+
12
+ export interface AutoresearchScriptSnapshot {
13
+ benchmarkScript: string;
14
+ benchmarkScriptPath: string;
15
+ checksScript: string | null;
16
+ checksScriptPath: string;
17
+ errors: string[];
18
+ }
19
+
20
+ const HEADING_REGEX = /^##\s+(.+?)\s*$/;
21
+ const LIST_ITEM_REGEX = /^\s*[-*]\s+(.*)$/;
22
+ const KEY_VALUE_REGEX = /^\s*[-*]\s+([^:]+):\s*(.*)$/;
23
+
24
+ export function readAutoresearchContract(workDir: string): AutoresearchContractLoadResult {
25
+ const contractPath = path.join(workDir, "autoresearch.md");
26
+ let content = "";
27
+ try {
28
+ content = fs.readFileSync(contractPath, "utf8");
29
+ } catch {
30
+ return {
31
+ contract: createEmptyAutoresearchContract(),
32
+ errors: [`${contractPath} does not exist. Create it before initializing autoresearch.`],
33
+ path: contractPath,
34
+ };
35
+ }
36
+
37
+ const contract = parseAutoresearchContract(content);
38
+ const errors = validateAutoresearchContract(contract);
39
+ return { contract, errors, path: contractPath };
40
+ }
41
+
42
+ export function parseAutoresearchContract(markdown: string): AutoresearchContract {
43
+ const sections = extractSections(markdown);
44
+ return {
45
+ benchmark: parseBenchmarkSection(sections.get("benchmark") ?? ""),
46
+ scopePaths: parseListSection(sections.get("files in scope") ?? "", normalizeContractPathSpec),
47
+ offLimits: parseListSection(sections.get("off limits") ?? "", normalizeContractPathSpec),
48
+ constraints: parseListSection(sections.get("constraints") ?? ""),
49
+ };
50
+ }
51
+
52
+ export function validateAutoresearchContract(contract: AutoresearchContract): string[] {
53
+ const errors: string[] = [];
54
+ if (!contract.benchmark.command) {
55
+ errors.push("Benchmark.command is required in autoresearch.md.");
56
+ }
57
+ if (!contract.benchmark.primaryMetric) {
58
+ errors.push("Benchmark.primary metric is required in autoresearch.md.");
59
+ }
60
+ if (!contract.benchmark.direction) {
61
+ errors.push("Benchmark.direction must be `lower` or `higher` in autoresearch.md.");
62
+ }
63
+ if (contract.scopePaths.length === 0) {
64
+ errors.push("Files in Scope must contain at least one path in autoresearch.md.");
65
+ }
66
+ for (const scopePath of contract.scopePaths) {
67
+ if (isUnsafeContractPathSpec(scopePath)) {
68
+ errors.push(`Files in Scope contains an invalid path: ${scopePath}`);
69
+ }
70
+ }
71
+ for (const offLimitsPath of contract.offLimits) {
72
+ if (isUnsafeContractPathSpec(offLimitsPath)) {
73
+ errors.push(`Off Limits contains an invalid path: ${offLimitsPath}`);
74
+ }
75
+ }
76
+ return errors;
77
+ }
78
+
79
+ export function buildAutoresearchSegmentFingerprint(
80
+ contract: AutoresearchContract,
81
+ scripts: {
82
+ benchmarkScript: string;
83
+ checksScript: string | null;
84
+ },
85
+ ): string {
86
+ const payload = {
87
+ benchmark: contract.benchmark,
88
+ scopePaths: contract.scopePaths,
89
+ offLimits: contract.offLimits,
90
+ constraints: contract.constraints,
91
+ scripts,
92
+ };
93
+ return crypto.createHash("sha256").update(JSON.stringify(payload)).digest("hex");
94
+ }
95
+
96
+ export function getAutoresearchFingerprintMismatchError(
97
+ stateFingerprint: string | null,
98
+ workDir: string,
99
+ ): string | null {
100
+ if (!stateFingerprint) {
101
+ return "The current segment has no fingerprint metadata. Re-run init_experiment before continuing.";
102
+ }
103
+
104
+ const contractResult = readAutoresearchContract(workDir);
105
+ const scriptSnapshot = loadAutoresearchScriptSnapshot(workDir);
106
+ const errors = [...contractResult.errors, ...scriptSnapshot.errors];
107
+ if (errors.length > 0) {
108
+ return `${errors.join(" ")} Re-run init_experiment after fixing the workspace contract.`;
109
+ }
110
+
111
+ const currentFingerprint = buildAutoresearchSegmentFingerprint(contractResult.contract, {
112
+ benchmarkScript: scriptSnapshot.benchmarkScript,
113
+ checksScript: scriptSnapshot.checksScript,
114
+ });
115
+ if (currentFingerprint === stateFingerprint) {
116
+ return null;
117
+ }
118
+
119
+ return "autoresearch.md, autoresearch.sh, or autoresearch.checks.sh changed since the current segment was initialized. Re-run init_experiment before continuing.";
120
+ }
121
+
122
+ export function loadAutoresearchScriptSnapshot(workDir: string): AutoresearchScriptSnapshot {
123
+ const benchmarkScriptPath = path.join(workDir, "autoresearch.sh");
124
+ const checksScriptPath = path.join(workDir, "autoresearch.checks.sh");
125
+ const errors: string[] = [];
126
+
127
+ let benchmarkScript = "";
128
+ try {
129
+ benchmarkScript = fs.readFileSync(benchmarkScriptPath, "utf8");
130
+ } catch {
131
+ errors.push(`${benchmarkScriptPath} does not exist. Create it before initializing autoresearch.`);
132
+ }
133
+
134
+ let checksScript: string | null = null;
135
+ try {
136
+ checksScript = fs.readFileSync(checksScriptPath, "utf8");
137
+ } catch {
138
+ checksScript = null;
139
+ }
140
+
141
+ return {
142
+ benchmarkScript,
143
+ benchmarkScriptPath,
144
+ checksScript,
145
+ checksScriptPath,
146
+ errors,
147
+ };
148
+ }
149
+
150
+ export function normalizeAutoresearchList(values: readonly string[]): string[] {
151
+ const normalized: string[] = [];
152
+ const seen = new Set<string>();
153
+ for (const value of values) {
154
+ const trimmed = value.trim();
155
+ if (trimmed.length === 0) continue;
156
+ if (seen.has(trimmed)) continue;
157
+ seen.add(trimmed);
158
+ normalized.push(trimmed);
159
+ }
160
+ return normalized;
161
+ }
162
+
163
+ export function normalizeContractPathSpec(value: string): string {
164
+ const normalized = path.posix.normalize(value.trim().replaceAll("\\", "/"));
165
+ if (normalized === "." || normalized === "./") return ".";
166
+ return normalized.replace(/^\.\/+/, "").replace(/\/+$/, "");
167
+ }
168
+
169
+ export function pathMatchesContractPath(pathValue: string, specValue: string): boolean {
170
+ const normalizedPath = normalizeContractPathSpec(pathValue);
171
+ const normalizedSpec = normalizeContractPathSpec(specValue);
172
+ if (normalizedSpec === ".") return true;
173
+ return normalizedPath === normalizedSpec || normalizedPath.startsWith(`${normalizedSpec}/`);
174
+ }
175
+
176
+ export function contractListsEqual(left: readonly string[], right: readonly string[]): boolean {
177
+ const normalizedLeft = normalizeAutoresearchList(left);
178
+ const normalizedRight = normalizeAutoresearchList(right);
179
+ if (normalizedLeft.length !== normalizedRight.length) return false;
180
+ return normalizedLeft.every((value, index) => value === normalizedRight[index]);
181
+ }
182
+
183
+ export function contractPathListsEqual(left: readonly string[], right: readonly string[]): boolean {
184
+ const normalizedLeft = normalizeContractPathList(left);
185
+ const normalizedRight = normalizeContractPathList(right);
186
+ if (normalizedLeft.length !== normalizedRight.length) return false;
187
+ return normalizedLeft.every((value, index) => value === normalizedRight[index]);
188
+ }
189
+
190
+ function createEmptyAutoresearchContract(): AutoresearchContract {
191
+ return {
192
+ benchmark: {
193
+ command: null,
194
+ primaryMetric: null,
195
+ metricUnit: "",
196
+ direction: null,
197
+ secondaryMetrics: [],
198
+ },
199
+ scopePaths: [],
200
+ offLimits: [],
201
+ constraints: [],
202
+ };
203
+ }
204
+
205
+ function normalizeContractPathList(values: readonly string[]): string[] {
206
+ return normalizeAutoresearchList(values.map(normalizeContractPathSpec)).sort((left, right) =>
207
+ left.localeCompare(right),
208
+ );
209
+ }
210
+
211
+ function extractSections(markdown: string): Map<string, string> {
212
+ const sections = new Map<string, string>();
213
+ const lines = markdown.split("\n");
214
+ let currentHeading: string | null = null;
215
+ let currentLines: string[] = [];
216
+
217
+ for (const line of lines) {
218
+ const headingMatch = line.match(HEADING_REGEX);
219
+ if (headingMatch) {
220
+ if (currentHeading) {
221
+ sections.set(currentHeading, currentLines.join("\n").trim());
222
+ }
223
+ currentHeading = headingMatch[1]?.trim().toLowerCase() ?? null;
224
+ currentLines = [];
225
+ continue;
226
+ }
227
+ if (currentHeading) {
228
+ currentLines.push(line);
229
+ }
230
+ }
231
+
232
+ if (currentHeading) {
233
+ sections.set(currentHeading, currentLines.join("\n").trim());
234
+ }
235
+ return sections;
236
+ }
237
+
238
+ function parseBenchmarkSection(section: string): AutoresearchBenchmarkContract {
239
+ const entries = new Map<string, string>();
240
+ const lines = section.split("\n");
241
+ for (let index = 0; index < lines.length; index += 1) {
242
+ const rawLine = lines[index] ?? "";
243
+ const match = rawLine.match(KEY_VALUE_REGEX);
244
+ if (!match) continue;
245
+ const key = normalizeKey(match[1] ?? "");
246
+ let value = (match[2] ?? "").trim();
247
+ if (key === "secondarymetrics") {
248
+ const nestedItems: string[] = [];
249
+ for (let nestedIndex = index + 1; nestedIndex < lines.length; nestedIndex += 1) {
250
+ const nestedLine = lines[nestedIndex] ?? "";
251
+ if (nestedLine.match(KEY_VALUE_REGEX)) break;
252
+ const nestedMatch = nestedLine.match(/^\s{2,}[-*]\s+(.*)$/);
253
+ if (!nestedMatch) {
254
+ if (nestedLine.trim().length > 0) break;
255
+ continue;
256
+ }
257
+ nestedItems.push((nestedMatch[1] ?? "").trim());
258
+ index = nestedIndex;
259
+ }
260
+ if (nestedItems.length > 0) {
261
+ value = [value, ...nestedItems].filter(Boolean).join(", ");
262
+ }
263
+ }
264
+ entries.set(key, value);
265
+ }
266
+
267
+ const direction = parseDirection(entries.get("direction"));
268
+ return {
269
+ command: readNullableEntry(entries.get("command")),
270
+ primaryMetric: readNullableEntry(entries.get("primarymetric")),
271
+ metricUnit: entries.get("metricunit")?.trim() ?? "",
272
+ direction,
273
+ secondaryMetrics: parseSecondaryMetrics(entries.get("secondarymetrics")),
274
+ };
275
+ }
276
+
277
+ function parseListSection(section: string, normalizeItem?: (value: string) => string): string[] {
278
+ const items: string[] = [];
279
+ let activeItem: string | null = null;
280
+ for (const rawLine of section.split("\n")) {
281
+ const line = rawLine.trimEnd();
282
+ if (line.trim().length === 0) continue;
283
+ const match = rawLine.match(LIST_ITEM_REGEX);
284
+ if (match) {
285
+ if (activeItem) items.push(activeItem);
286
+ activeItem = (match[1] ?? "").trim();
287
+ continue;
288
+ }
289
+ if (activeItem && /^\s{2,}\S/.test(rawLine)) {
290
+ activeItem = `${activeItem} ${line.trim()}`;
291
+ continue;
292
+ }
293
+ if (activeItem) {
294
+ items.push(activeItem);
295
+ activeItem = null;
296
+ }
297
+ items.push(line.trim());
298
+ }
299
+ if (activeItem) {
300
+ items.push(activeItem);
301
+ }
302
+ const normalizedItems = normalizeAutoresearchList(items);
303
+ return normalizeItem ? normalizedItems.map(normalizeItem) : normalizedItems;
304
+ }
305
+
306
+ function normalizeKey(value: string): string {
307
+ return value.toLowerCase().replace(/[^a-z0-9]+/g, "");
308
+ }
309
+
310
+ function parseDirection(value: string | undefined): MetricDirection | null {
311
+ if (value === "lower" || value === "higher") return value;
312
+ return null;
313
+ }
314
+
315
+ function readNullableEntry(value: string | undefined): string | null {
316
+ const trimmed = value?.trim() ?? "";
317
+ return trimmed.length > 0 ? trimmed : null;
318
+ }
319
+
320
+ function parseSecondaryMetrics(value: string | undefined): string[] {
321
+ if (!value) return [];
322
+ return normalizeAutoresearchList(
323
+ value
324
+ .split(",")
325
+ .map(entry => entry.trim())
326
+ .filter(Boolean),
327
+ );
328
+ }
329
+
330
+ function isUnsafeContractPathSpec(value: string): boolean {
331
+ return path.posix.isAbsolute(value) || value === ".." || value.startsWith("../");
332
+ }