autoresearcher 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,124 @@
1
+ # autoresearcher
2
+
3
+ `autoresearcher` is a standalone terminal CLI for benchmark-driven autonomous research loops.
4
+
5
+ It runs this cycle repeatedly:
6
+
7
+ 1. Run one internal headless agent iteration.
8
+ 2. Run your benchmark command.
9
+ 3. Parse metric with a regex.
10
+ 4. Keep iteration only if metric improved.
11
+
12
+ ## Quick Start
13
+
14
+ ```bash
15
+ npm i -g autoresearcher
16
+
17
+ # In your research repo:
18
+ cd /path/to/your/new-repo
19
+ autoresearcher wizard
20
+ autoresearcher run --iterations 20
21
+ ```
22
+
23
+ ## Commands
24
+
25
+ ```bash
26
+ autoresearcher init
27
+ autoresearcher wizard
28
+ autoresearcher run [--iterations N]
29
+ autoresearcher progress [--run-id <id>] [--output <file.svg|file.png>]
30
+ ```
31
+
32
+ ## Important Config
33
+
34
+ The `init` command creates `.autoresearcher/config.json`:
35
+
36
+ ```json
37
+ {
38
+ "agentMode": "internal",
39
+ "agentPromptFile": "program.md",
40
+ "agentPrompt": "Improve the benchmark metric while preserving correctness, test behavior, and safety.",
41
+ "agentCommand": "./scripts/agent-step.sh",
42
+ "backendAgent": "",
43
+ "backendModel": "",
44
+ "backendMaxIterations": 1,
45
+ "benchmarkCommand": "./scripts/benchmark.sh",
46
+ "metricRegex": "score=([0-9.]+)",
47
+ "direction": "max",
48
+ "iterations": 20,
49
+ "autoCommit": false,
50
+ "onRejectCommand": "",
51
+ "onKeepCommand": "",
52
+ "stopOnAgentFailure": true,
53
+ "streamAgentOutput": true,
54
+ "commitMessageTemplate": "research: improved metric to {metric} (iter {iteration})"
55
+ }
56
+ ```
57
+
58
+ `agentMode: "internal"` is the default. For a fully custom step command, set `agentMode` to `"command"` and edit `agentCommand`.
59
+
60
+ ## Example Configs
61
+
62
+ Default internal headless mode:
63
+
64
+ ```json
65
+ {
66
+ "agentMode": "internal",
67
+ "agentPromptFile": "program.md",
68
+ "agentPrompt": "Improve benchmark with safe, minimal changes.",
69
+ "backendAgent": "amp",
70
+ "backendModel": "claude-sonnet-4-5-20250929",
71
+ "backendMaxIterations": 1,
72
+ "benchmarkCommand": "./scripts/benchmark.sh",
73
+ "metricRegex": "score=([0-9.]+)",
74
+ "direction": "max",
75
+ "iterations": 40,
76
+ "autoCommit": false
77
+ }
78
+ ```
79
+
80
+ Custom command mode:
81
+
82
+ ```json
83
+ {
84
+ "agentMode": "command",
85
+ "agentCommand": "./scripts/agent-step.sh",
86
+ "benchmarkCommand": "./scripts/benchmark.sh",
87
+ "metricRegex": "score=([0-9.]+)",
88
+ "direction": "max",
89
+ "iterations": 40
90
+ }
91
+ ```
92
+
93
+ ## Typical Real Setup
94
+
95
+ 1. Start with internal mode and tailor `agentPrompt` to your objective.
96
+ 2. Set one provider key (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, or `OPENROUTER_API_KEY`).
97
+ 3. Optionally pin `backendAgent` and `backendModel`.
98
+ 4. Edit `program.md` with your experiment objective and constraints.
99
+ 5. Replace `./scripts/benchmark.sh` so it prints one numeric metric, like `score=0.8123`.
100
+ 6. Set `direction` to `max` or `min`.
101
+ 7. Optionally switch to `agentMode: "command"` and customize `agentCommand`.
102
+ 8. Optionally set `onRejectCommand` to revert non-improving changes.
103
+
104
+ ## Progress Graph
105
+
106
+ Generate a chart from the latest run:
107
+
108
+ ```bash
109
+ autoresearcher progress --output progress.png
110
+ ```
111
+
112
+ For a specific run ID:
113
+
114
+ ```bash
115
+ autoresearcher progress --run-id 2026-03-13T12-51-48-680Z --output run.svg
116
+ ```
117
+
118
+ Example progress graph:
119
+
120
+ ![Post-Quantum Progress Example](https://raw.githubusercontent.com/multivmlabs/autoresearcher/main/apps/docs/docs/public/images/post-quantum-progress.png)
121
+
122
+ ## Logs
123
+
124
+ Every run writes JSONL logs in `.autoresearcher/runs/<timestamp>.jsonl`.
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env node
2
+ import { runCli } from '../src/cli.js';
3
+
4
+ runCli(process.argv.slice(2)).catch((error) => {
5
+ const message = error instanceof Error ? error.message : String(error);
6
+ console.error(`Error: ${message}`);
7
+ process.exit(1);
8
+ });
package/package.json ADDED
@@ -0,0 +1,37 @@
1
+ {
2
+ "name": "autoresearcher",
3
+ "version": "0.1.0",
4
+ "description": "Benchmark-driven autonomous research CLI for post-quantum and blockchain R&D",
5
+ "type": "module",
6
+ "bin": {
7
+ "autoresearcher": "bin/autoresearcher.js"
8
+ },
9
+ "scripts": {
10
+ "start": "node ./bin/autoresearcher.js",
11
+ "check": "node ./bin/autoresearcher.js --help",
12
+ "build": "echo \"no build step for plain node cli\""
13
+ },
14
+ "keywords": [
15
+ "cli",
16
+ "autoresearch",
17
+ "benchmark",
18
+ "agent",
19
+ "post-quantum",
20
+ "blockchain",
21
+ "cryptography"
22
+ ],
23
+ "repository": {
24
+ "type": "git",
25
+ "url": "git+https://github.com/multivmlabs/autoresearcher.git"
26
+ },
27
+ "files": [
28
+ "bin",
29
+ "src",
30
+ "README.md"
31
+ ],
32
+ "homepage": "https://autoresearcher.multivmlabs.com",
33
+ "license": "MIT",
34
+ "dependencies": {
35
+ "ralph-starter": "^0.4.4"
36
+ }
37
+ }
package/src/cli.js ADDED
@@ -0,0 +1,357 @@
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { stdin as input, stdout as output } from 'node:process';
4
+ import readline from 'node:readline/promises';
5
+ import { loadConfig, writeConfig, CONFIG_DIR } from './config.js';
6
+ import { DEFAULT_AGENT_MODE, DEFAULT_AGENT_PROMPT } from './internal-backend.js';
7
+ import { generateProgressChart } from './progress-chart.js';
8
+ import { runResearchLoop } from './run-loop.js';
9
+
10
+ const AGENT_CHOICES = [
11
+ { label: 'Auto-detect (recommended)', value: '' },
12
+ { label: 'amp', value: 'amp' },
13
+ { label: 'claude-code', value: 'claude-code' },
14
+ { label: 'codex', value: 'codex' },
15
+ { label: 'cursor', value: 'cursor' },
16
+ { label: 'opencode', value: 'opencode' },
17
+ { label: 'openclaw', value: 'openclaw' },
18
+ ];
19
+
20
+ const MODEL_CHOICES = [
21
+ { label: 'Use backend default (recommended)', value: '' },
22
+ { label: 'claude-sonnet-4-5-20250929', value: 'claude-sonnet-4-5-20250929' },
23
+ { label: 'gpt-5', value: 'gpt-5' },
24
+ { label: 'Custom model ID', value: '__custom__' },
25
+ ];
26
+
27
+ function parseArgs(args) {
28
+ const result = { _: [] };
29
+
30
+ for (let i = 0; i < args.length; i++) {
31
+ const token = args[i];
32
+ if (!token.startsWith('--')) {
33
+ result._.push(token);
34
+ continue;
35
+ }
36
+
37
+ const key = token.slice(2);
38
+ const next = args[i + 1];
39
+ if (!next || next.startsWith('--')) {
40
+ result[key] = true;
41
+ continue;
42
+ }
43
+
44
+ result[key] = next;
45
+ i++;
46
+ }
47
+
48
+ return result;
49
+ }
50
+
51
+ function printHelp() {
52
+ console.log(`
53
+ autoresearcher - benchmark-driven autonomous research CLI
54
+
55
+ Usage:
56
+ autoresearcher init
57
+ autoresearcher wizard
58
+ autoresearcher run [--iterations N] [--agent-prompt "..."] [--benchmark-command "..."]
59
+ autoresearcher progress [--run-id <id>] [--output <file.svg|file.png>]
60
+ autoresearcher --help
61
+
62
+ Key config fields:
63
+ agentMode "internal" (default) or "command"
64
+ agentPromptFile Markdown objective file (default: program.md)
65
+ agentPrompt Iteration objective for internal headless agent backend
66
+ backendAgent Optional backend agent override (amp/codex/claude-code/...)
67
+ backendModel Optional backend model override (provider-specific)
68
+ agentCommand Shell command when agentMode is "command"
69
+ benchmarkCommand Shell command that prints metric output
70
+ metricRegex Regex with capture group, e.g. "score=([0-9.]+)"
71
+ direction "max" or "min"
72
+ iterations Loop count
73
+ autoCommit true/false (git add/commit on improvements)
74
+ onRejectCommand Optional command for rejected iterations
75
+
76
+ Examples:
77
+ autoresearcher init
78
+ autoresearcher wizard
79
+ autoresearcher run --iterations 30
80
+ autoresearcher progress --output progress.png
81
+ autoresearcher run --agent-prompt "improve benchmark metric without regressions"
82
+ autoresearcher run --agent-command "amp -p 'improve benchmark'" --benchmark-command "./scripts/benchmark.sh"
83
+ `);
84
+ }
85
+
86
+ function createDefaultConfig() {
87
+ return {
88
+ agentMode: DEFAULT_AGENT_MODE,
89
+ agentPromptFile: 'program.md',
90
+ agentPrompt: DEFAULT_AGENT_PROMPT,
91
+ agentCommand: './scripts/agent-step.sh',
92
+ backendAgent: '',
93
+ backendModel: '',
94
+ backendMaxIterations: 1,
95
+ benchmarkCommand: './scripts/benchmark.sh',
96
+ metricRegex: 'score=([0-9.]+)',
97
+ direction: 'max',
98
+ iterations: 20,
99
+ autoCommit: false,
100
+ onRejectCommand: '',
101
+ onKeepCommand: '',
102
+ stopOnAgentFailure: true,
103
+ streamAgentOutput: true,
104
+ commitMessageTemplate: 'research: improved metric to {metric} (iter {iteration})',
105
+ };
106
+ }
107
+
108
+ async function writeProgramFile(cwd, programFile = 'program.md', objective = DEFAULT_AGENT_PROMPT) {
109
+ if (!programFile) return;
110
+
111
+ const programPath = path.resolve(cwd, programFile);
112
+
113
+ try {
114
+ await fs.access(programPath);
115
+ return;
116
+ } catch {
117
+ // File does not exist yet, create it from template.
118
+ }
119
+
120
+ const template = `# Research Program
121
+
122
+ Objective:
123
+ ${objective}
124
+
125
+ Constraints:
126
+ - Preserve correctness and existing behavior.
127
+ - Keep changes minimal and measurable.
128
+ - Avoid adding unnecessary complexity.
129
+
130
+ Iteration Policy:
131
+ 1. Propose one improvement.
132
+ 2. Run benchmark.
133
+ 3. Keep only metric improvements.
134
+ `;
135
+
136
+ await fs.writeFile(programPath, template, 'utf8');
137
+ }
138
+
139
+ async function selectChoice(rl, title, options, currentValue = '') {
140
+ const fallbackIndex = 0;
141
+ const currentIndex = options.findIndex((option) => option.value === currentValue);
142
+ const defaultIndex = currentIndex >= 0 ? currentIndex : fallbackIndex;
143
+
144
+ console.log(`\n${title}`);
145
+ options.forEach((option, index) => {
146
+ console.log(` ${index + 1}. ${option.label}`);
147
+ });
148
+
149
+ while (true) {
150
+ const answer = (await rl.question(`Select option [${defaultIndex + 1}]: `)).trim();
151
+ if (!answer) {
152
+ return options[defaultIndex].value;
153
+ }
154
+
155
+ const index = Number.parseInt(answer, 10);
156
+ if (Number.isInteger(index) && index >= 1 && index <= options.length) {
157
+ return options[index - 1].value;
158
+ }
159
+
160
+ console.log(`Please enter a number between 1 and ${options.length}.`);
161
+ }
162
+ }
163
+
164
+ async function askText(rl, label, defaultValue = '') {
165
+ const suffix = defaultValue ? ` [${defaultValue}]` : '';
166
+ const answer = (await rl.question(`${label}${suffix}: `)).trim();
167
+ return answer || defaultValue;
168
+ }
169
+
170
+ async function runWizard() {
171
+ if (!process.stdin.isTTY || !process.stdout.isTTY) {
172
+ throw new Error('Wizard requires an interactive terminal. Use "autoresearcher init" for non-interactive setup.');
173
+ }
174
+
175
+ const cwd = process.cwd();
176
+ const existingConfig = await loadConfig(cwd).catch(() => null);
177
+ const base = {
178
+ ...createDefaultConfig(),
179
+ ...(existingConfig || {}),
180
+ };
181
+
182
+ const rl = readline.createInterface({ input, output });
183
+
184
+ try {
185
+ console.log('autoresearcher setup wizard');
186
+ console.log('This configures internal mode with your preferred agent and model.');
187
+
188
+ const backendAgent = await selectChoice(rl, 'Choose backend agent', AGENT_CHOICES, base.backendAgent || '');
189
+ const modelSelection = await selectChoice(rl, 'Choose backend model', MODEL_CHOICES, base.backendModel || '');
190
+
191
+ let backendModel = modelSelection;
192
+ if (modelSelection === '__custom__') {
193
+ backendModel = await askText(rl, 'Enter custom model ID', base.backendModel || '');
194
+ }
195
+
196
+ const agentPrompt = await askText(rl, 'Iteration objective prompt', base.agentPrompt || DEFAULT_AGENT_PROMPT);
197
+ const agentPromptFile = await askText(rl, 'Objective file path', base.agentPromptFile || 'program.md');
198
+ const benchmarkCommand = await askText(rl, 'Benchmark command', base.benchmarkCommand || './scripts/benchmark.sh');
199
+ const metricRegex = await askText(rl, 'Metric regex', base.metricRegex || 'score=([0-9.]+)');
200
+ const direction = await selectChoice(
201
+ rl,
202
+ 'Metric direction',
203
+ [
204
+ { label: 'max (higher is better)', value: 'max' },
205
+ { label: 'min (lower is better)', value: 'min' },
206
+ ],
207
+ base.direction === 'min' ? 'min' : 'max'
208
+ );
209
+
210
+ const iterationInput = await askText(rl, 'Iterations per run', String(base.iterations ?? 20));
211
+ const parsedIterations = Number.parseInt(iterationInput, 10);
212
+ const iterations = Number.isInteger(parsedIterations) && parsedIterations > 0 ? parsedIterations : 20;
213
+
214
+ const config = {
215
+ ...base,
216
+ agentMode: DEFAULT_AGENT_MODE,
217
+ backendAgent,
218
+ backendModel,
219
+ agentPromptFile,
220
+ agentPrompt,
221
+ benchmarkCommand,
222
+ metricRegex,
223
+ direction,
224
+ iterations,
225
+ };
226
+
227
+ const configPath = await writeConfig(config, cwd);
228
+ await writeExampleScripts(cwd);
229
+ await writeProgramFile(cwd, config.agentPromptFile, config.agentPrompt);
230
+
231
+ console.log(`\nSaved config to ${configPath}`);
232
+ console.log('Created or refreshed program/objective and script templates');
233
+ console.log('Run: autoresearcher run');
234
+ } finally {
235
+ rl.close();
236
+ }
237
+ }
238
+
239
+ async function writeExampleScripts(cwd) {
240
+ const scriptsDir = path.join(cwd, 'scripts');
241
+ await fs.mkdir(scriptsDir, { recursive: true });
242
+
243
+ const benchmarkPath = path.join(scriptsDir, 'benchmark.sh');
244
+ const agentPath = path.join(scriptsDir, 'agent-step.sh');
245
+
246
+ const benchmarkScript = `#!/usr/bin/env bash
247
+ set -euo pipefail
248
+
249
+ # Demo metric: random score for quick smoke testing.
250
+ SCORE=$(awk 'BEGIN{srand(); printf "%.6f", rand()}')
251
+ echo "score=$SCORE"
252
+ `;
253
+
254
+ const agentScript = `#!/usr/bin/env bash
255
+ set -euo pipefail
256
+
257
+ echo "agent iteration: \${AR_ITERATION:-0}"
258
+ # Optional: switch agentMode to "command" and use this script.
259
+ # Example: amp -p "improve benchmark metric"
260
+ `;
261
+
262
+ await fs.writeFile(benchmarkPath, benchmarkScript, 'utf8');
263
+ await fs.writeFile(agentPath, agentScript, 'utf8');
264
+ await fs.chmod(benchmarkPath, 0o755);
265
+ await fs.chmod(agentPath, 0o755);
266
+ }
267
+
268
+ async function runInit() {
269
+ const config = createDefaultConfig();
270
+
271
+ const configPath = await writeConfig(config, process.cwd());
272
+ await writeExampleScripts(process.cwd());
273
+ await writeProgramFile(process.cwd(), config.agentPromptFile, config.agentPrompt);
274
+
275
+ console.log(`Initialized ${CONFIG_DIR} config at:`);
276
+ console.log(` ${configPath}`);
277
+ console.log('Created program.md and example scripts in ./scripts/.');
278
+ console.log('Next step: edit program.md and benchmark script, then run: autoresearcher run');
279
+ }
280
+
281
+ function buildOverrides(parsed) {
282
+ const map = {
283
+ 'agent-mode': 'agentMode',
284
+ 'agent-prompt-file': 'agentPromptFile',
285
+ 'agent-prompt': 'agentPrompt',
286
+ iterations: 'iterations',
287
+ 'agent-command': 'agentCommand',
288
+ 'backend-agent': 'backendAgent',
289
+ 'backend-model': 'backendModel',
290
+ 'backend-max-iterations': 'backendMaxIterations',
291
+ 'benchmark-command': 'benchmarkCommand',
292
+ 'metric-regex': 'metricRegex',
293
+ direction: 'direction',
294
+ 'on-reject-command': 'onRejectCommand',
295
+ 'on-keep-command': 'onKeepCommand',
296
+ 'commit-template': 'commitMessageTemplate',
297
+ };
298
+
299
+ const overrides = {};
300
+ for (const [cliKey, configKey] of Object.entries(map)) {
301
+ if (parsed[cliKey] !== undefined) {
302
+ overrides[configKey] = parsed[cliKey];
303
+ }
304
+ }
305
+
306
+ if (parsed['agent-command'] !== undefined && parsed['agent-mode'] === undefined) {
307
+ overrides.agentMode = 'command';
308
+ }
309
+ if (parsed['agent-prompt'] !== undefined && parsed['agent-mode'] === undefined) {
310
+ overrides.agentMode = 'internal';
311
+ }
312
+
313
+ if (parsed['auto-commit'] === true) overrides.autoCommit = true;
314
+ if (parsed['no-auto-commit'] === true) overrides.autoCommit = false;
315
+ if (parsed['stream-agent-output'] === true) overrides.streamAgentOutput = true;
316
+ if (parsed['no-stream-agent-output'] === true) overrides.streamAgentOutput = false;
317
+
318
+ return overrides;
319
+ }
320
+
321
+ export async function runCli(args) {
322
+ if (args.length === 0 || args.includes('--help') || args.includes('-h')) {
323
+ printHelp();
324
+ return;
325
+ }
326
+
327
+ const command = args[0];
328
+ const parsed = parseArgs(args.slice(1));
329
+
330
+ if (command === 'init') {
331
+ await runInit();
332
+ return;
333
+ }
334
+
335
+ if (command === 'wizard') {
336
+ await runWizard();
337
+ return;
338
+ }
339
+
340
+ if (command === 'run') {
341
+ const config = await loadConfig(process.cwd());
342
+ const overrides = buildOverrides(parsed);
343
+ await runResearchLoop(config, overrides);
344
+ return;
345
+ }
346
+
347
+ if (command === 'progress') {
348
+ const chartPath = await generateProgressChart({
349
+ runId: parsed['run-id'],
350
+ output: parsed.output,
351
+ });
352
+ console.log(`Progress chart written: ${chartPath}`);
353
+ return;
354
+ }
355
+
356
+ throw new Error(`Unknown command: ${command}`);
357
+ }
package/src/config.js ADDED
@@ -0,0 +1,29 @@
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+
4
+ export const CONFIG_DIR = '.autoresearcher';
5
+ export const CONFIG_FILE = 'config.json';
6
+ const LEGACY_CONFIG_DIR = '.ar-agent';
7
+
8
+ export function getConfigPath(cwd = process.cwd()) {
9
+ return path.join(cwd, CONFIG_DIR, CONFIG_FILE);
10
+ }
11
+
12
+ export async function loadConfig(cwd = process.cwd()) {
13
+ let filePath = getConfigPath(cwd);
14
+ try {
15
+ await fs.access(filePath);
16
+ } catch {
17
+ filePath = path.join(cwd, LEGACY_CONFIG_DIR, CONFIG_FILE);
18
+ }
19
+ const raw = await fs.readFile(filePath, 'utf8');
20
+ return JSON.parse(raw);
21
+ }
22
+
23
+ export async function writeConfig(config, cwd = process.cwd()) {
24
+ const dirPath = path.join(cwd, CONFIG_DIR);
25
+ const filePath = getConfigPath(cwd);
26
+ await fs.mkdir(dirPath, { recursive: true });
27
+ await fs.writeFile(filePath, `${JSON.stringify(config, null, 2)}\n`, 'utf8');
28
+ return filePath;
29
+ }
@@ -0,0 +1,72 @@
1
+ import path from 'node:path';
2
+ import { createRequire } from 'node:module';
3
+
4
+ const require = createRequire(import.meta.url);
5
+
6
+ export const DEFAULT_AGENT_MODE = 'internal';
7
+ export const DEFAULT_AGENT_PROMPT =
8
+ 'Improve the benchmark metric while preserving correctness, test behavior, and safety.';
9
+
10
+ function shellQuote(value) {
11
+ return `'${String(value).replaceAll("'", "'\\''")}'`;
12
+ }
13
+
14
+ function resolveBackendRunner() {
15
+ if (process.env.AR_INTERNAL_BACKEND_COMMAND) {
16
+ return process.env.AR_INTERNAL_BACKEND_COMMAND;
17
+ }
18
+
19
+ try {
20
+ const packagePath = require.resolve('ralph-starter/package.json');
21
+ const packageDir = path.dirname(packagePath);
22
+ const cliPath = path.join(packageDir, 'dist', 'cli.js');
23
+ return `${shellQuote(process.execPath)} ${shellQuote(cliPath)}`;
24
+ } catch {
25
+ return 'ralph-starter';
26
+ }
27
+ }
28
+
29
+ export function resolveAgentMode(config) {
30
+ if (config.agentMode === 'command' || config.agentMode === 'internal') {
31
+ return config.agentMode;
32
+ }
33
+
34
+ if (config.agentCommand) {
35
+ return 'command';
36
+ }
37
+
38
+ return DEFAULT_AGENT_MODE;
39
+ }
40
+
41
+ export function buildInternalBackendCommand({
42
+ cwd,
43
+ iteration,
44
+ runId,
45
+ agentPrompt,
46
+ backendAgent,
47
+ backendModel,
48
+ backendMaxIterations,
49
+ }) {
50
+ const backendRunner = resolveBackendRunner();
51
+ const maxIterations = Number(backendMaxIterations ?? 1);
52
+ const safeMaxIterations = Number.isFinite(maxIterations) && maxIterations > 0 ? Math.floor(maxIterations) : 1;
53
+ const prompt = agentPrompt || DEFAULT_AGENT_PROMPT;
54
+
55
+ const contextualPrompt = `${prompt}\n\nIteration context:\n- run_id: ${runId}\n- iteration: ${iteration}`;
56
+
57
+ let command = `${backendRunner} run ${shellQuote(contextualPrompt)}`;
58
+ command += ' --auto';
59
+ command += ` --max-iterations ${safeMaxIterations}`;
60
+ command += ` --output-dir ${shellQuote(cwd)}`;
61
+ command += ' --no-track-progress --no-track-cost';
62
+
63
+ if (backendAgent) {
64
+ command += ` --agent ${shellQuote(backendAgent)}`;
65
+ }
66
+
67
+ if (backendModel) {
68
+ command += ` --model ${shellQuote(backendModel)}`;
69
+ }
70
+
71
+ return command;
72
+ }
@@ -0,0 +1,207 @@
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { CONFIG_DIR } from './config.js';
4
+ import { runCommand } from './shell.js';
5
+
6
+ function shellQuote(value) {
7
+ return `'${String(value).replaceAll("'", "'\\''")}'`;
8
+ }
9
+
10
+ function linePath(points, xScale, yScale) {
11
+ if (points.length === 0) return '';
12
+ return points
13
+ .map((point, index) => {
14
+ const x = xScale(point.iteration);
15
+ const y = yScale(point.metric);
16
+ return `${index === 0 ? 'M' : 'L'}${x.toFixed(2)} ${y.toFixed(2)}`;
17
+ })
18
+ .join(' ');
19
+ }
20
+
21
+ function buildTicks(minValue, maxValue, count = 5) {
22
+ if (minValue === maxValue) return [minValue];
23
+ const step = (maxValue - minValue) / (count - 1);
24
+ return Array.from({ length: count }, (_, index) => minValue + step * index);
25
+ }
26
+
27
+ async function resolveRunLogFile(cwd, runId) {
28
+ const runsDir = path.join(cwd, CONFIG_DIR, 'runs');
29
+ await fs.mkdir(runsDir, { recursive: true });
30
+
31
+ if (runId) {
32
+ return {
33
+ runId,
34
+ filePath: path.join(runsDir, `${runId}.jsonl`),
35
+ runsDir,
36
+ };
37
+ }
38
+
39
+ const files = await fs.readdir(runsDir);
40
+ const runFiles = files.filter((file) => file.endsWith('.jsonl')).sort();
41
+ if (runFiles.length === 0) {
42
+ throw new Error(`No run logs found in ${runsDir}`);
43
+ }
44
+
45
+ const fileName = runFiles[runFiles.length - 1];
46
+ return {
47
+ runId: fileName.replace(/\.jsonl$/, ''),
48
+ filePath: path.join(runsDir, fileName),
49
+ runsDir,
50
+ };
51
+ }
52
+
53
+ async function loadChartData(filePath) {
54
+ const raw = await fs.readFile(filePath, 'utf8');
55
+ const lines = raw
56
+ .split('\n')
57
+ .map((line) => line.trim())
58
+ .filter(Boolean);
59
+
60
+ const entries = lines.map((line) => JSON.parse(line));
61
+
62
+ const metricPoints = entries
63
+ .filter((entry) => Number.isFinite(entry.metric) && Number.isFinite(entry.iteration))
64
+ .map((entry) => ({
65
+ iteration: Number(entry.iteration),
66
+ metric: Number(entry.metric),
67
+ status: entry.status,
68
+ }));
69
+
70
+ const bestPoints = entries
71
+ .filter((entry) => Number.isFinite(entry.bestMetric) && Number.isFinite(entry.iteration))
72
+ .map((entry) => ({
73
+ iteration: Number(entry.iteration),
74
+ metric: Number(entry.bestMetric),
75
+ }));
76
+
77
+ if (metricPoints.length === 0) {
78
+ throw new Error('Run log does not contain plottable metric points.');
79
+ }
80
+
81
+ return { metricPoints, bestPoints };
82
+ }
83
+
84
+ function renderSvg({ runId, metricPoints, bestPoints }) {
85
+ const width = 1200;
86
+ const height = 680;
87
+ const margin = { top: 72, right: 48, bottom: 72, left: 88 };
88
+ const plotWidth = width - margin.left - margin.right;
89
+ const plotHeight = height - margin.top - margin.bottom;
90
+
91
+ const allMetrics = [...metricPoints.map((point) => point.metric), ...bestPoints.map((point) => point.metric)];
92
+ const minMetric = Math.min(...allMetrics);
93
+ const maxMetric = Math.max(...allMetrics);
94
+ const metricRange = Math.max(maxMetric - minMetric, Math.max(Math.abs(maxMetric) * 0.05, 1e-9));
95
+ const yMin = minMetric - metricRange * 0.15;
96
+ const yMax = maxMetric + metricRange * 0.15;
97
+
98
+ const minIteration = Math.min(...metricPoints.map((point) => point.iteration));
99
+ const maxIteration = Math.max(...metricPoints.map((point) => point.iteration));
100
+ const iterationRange = Math.max(maxIteration - minIteration, 1);
101
+
102
+ const xScale = (iteration) => margin.left + ((iteration - minIteration) / iterationRange) * plotWidth;
103
+ const yScale = (metric) => margin.top + ((yMax - metric) / (yMax - yMin)) * plotHeight;
104
+
105
+ const xTicks = buildTicks(minIteration, maxIteration, Math.min(6, iterationRange + 1));
106
+ const yTicks = buildTicks(yMin, yMax, 6);
107
+
108
+ const metricPath = linePath(metricPoints, xScale, yScale);
109
+ const bestPath = linePath(bestPoints, xScale, yScale);
110
+
111
+ const pointDots = metricPoints
112
+ .map((point) => {
113
+ const color = point.status === 'keep' ? '#7fd3ff' : '#4a5f79';
114
+ return `<circle cx="${xScale(point.iteration).toFixed(2)}" cy="${yScale(point.metric).toFixed(2)}" r="4" fill="${color}" />`;
115
+ })
116
+ .join('\n');
117
+
118
+ const xTickLines = xTicks
119
+ .map((tick) => {
120
+ const x = xScale(tick).toFixed(2);
121
+ const label = Number.isInteger(tick) ? String(Math.round(tick)) : tick.toFixed(1);
122
+ return `<line x1="${x}" y1="${margin.top}" x2="${x}" y2="${height - margin.bottom}" stroke="#14202f" stroke-width="1" />
123
+ <text x="${x}" y="${height - margin.bottom + 26}" text-anchor="middle" fill="#9bb0c8" font-size="13">${label}</text>`;
124
+ })
125
+ .join('\n');
126
+
127
+ const yTickLines = yTicks
128
+ .map((tick) => {
129
+ const y = yScale(tick).toFixed(2);
130
+ return `<line x1="${margin.left}" y1="${y}" x2="${width - margin.right}" y2="${y}" stroke="#14202f" stroke-width="1" />
131
+ <text x="${margin.left - 12}" y="${(Number(y) + 5).toFixed(2)}" text-anchor="end" fill="#9bb0c8" font-size="13">${tick.toFixed(6)}</text>`;
132
+ })
133
+ .join('\n');
134
+
135
+ return `<?xml version="1.0" encoding="UTF-8"?>
136
+ <svg xmlns="http://www.w3.org/2000/svg" width="${width}" height="${height}" viewBox="0 0 ${width} ${height}">
137
+ <rect width="100%" height="100%" fill="#000000" />
138
+ <text x="${margin.left}" y="40" fill="#f2f7ff" font-size="28" font-family="Inter, system-ui, sans-serif" font-weight="700">autoresearcher progress</text>
139
+ <text x="${margin.left}" y="62" fill="#8ea1b9" font-size="14" font-family="Inter, system-ui, sans-serif">run ${runId}</text>
140
+
141
+ ${xTickLines}
142
+ ${yTickLines}
143
+
144
+ <line x1="${margin.left}" y1="${height - margin.bottom}" x2="${width - margin.right}" y2="${height - margin.bottom}" stroke="#304156" stroke-width="1.5" />
145
+ <line x1="${margin.left}" y1="${margin.top}" x2="${margin.left}" y2="${height - margin.bottom}" stroke="#304156" stroke-width="1.5" />
146
+
147
+ <path d="${metricPath}" fill="none" stroke="#4a5f79" stroke-width="2" />
148
+ <path d="${bestPath}" fill="none" stroke="#7fd3ff" stroke-width="3" />
149
+ ${pointDots}
150
+
151
+ <text x="${margin.left}" y="${height - 18}" fill="#8ea1b9" font-size="13" font-family="Inter, system-ui, sans-serif">iteration</text>
152
+ <text x="18" y="${margin.top - 18}" fill="#8ea1b9" font-size="13" font-family="Inter, system-ui, sans-serif">metric</text>
153
+
154
+ <rect x="${width - 255}" y="30" width="205" height="54" rx="10" fill="#070b11" stroke="#1b2635" />
155
+ <line x1="${width - 240}" y1="50" x2="${width - 200}" y2="50" stroke="#7fd3ff" stroke-width="3" />
156
+ <text x="${width - 192}" y="54" fill="#d5e8fb" font-size="13" font-family="Inter, system-ui, sans-serif">best so far</text>
157
+ <line x1="${width - 240}" y1="70" x2="${width - 200}" y2="70" stroke="#4a5f79" stroke-width="2" />
158
+ <text x="${width - 192}" y="74" fill="#d5e8fb" font-size="13" font-family="Inter, system-ui, sans-serif">iteration metric</text>
159
+ </svg>`;
160
+ }
161
+
162
+ export async function generateProgressChart({ runId, output }) {
163
+ const cwd = process.cwd();
164
+ const { runId: resolvedRunId, filePath, runsDir } = await resolveRunLogFile(cwd, runId);
165
+ const chartData = await loadChartData(filePath);
166
+
167
+ const chartsDir = path.join(path.dirname(runsDir), 'charts');
168
+ await fs.mkdir(chartsDir, { recursive: true });
169
+
170
+ const outputPath = output
171
+ ? path.resolve(cwd, output)
172
+ : path.join(chartsDir, `${resolvedRunId}.svg`);
173
+
174
+ const ext = path.extname(outputPath).toLowerCase();
175
+ const svg = renderSvg({ runId: resolvedRunId, ...chartData });
176
+
177
+ if (!ext || ext === '.svg') {
178
+ const target = ext ? outputPath : `${outputPath}.svg`;
179
+ await fs.mkdir(path.dirname(target), { recursive: true });
180
+ await fs.writeFile(target, svg, 'utf8');
181
+ return target;
182
+ }
183
+
184
+ if (ext === '.png') {
185
+ await fs.mkdir(path.dirname(outputPath), { recursive: true });
186
+ const tempSvgPath = path.join(
187
+ path.dirname(outputPath),
188
+ `${path.basename(outputPath, '.png')}.tmp-${Date.now()}.svg`
189
+ );
190
+
191
+ await fs.writeFile(tempSvgPath, svg, 'utf8');
192
+ const convertResult = await runCommand(
193
+ `sips -s format png ${shellQuote(tempSvgPath)} --out ${shellQuote(outputPath)}`,
194
+ { cwd }
195
+ );
196
+
197
+ await fs.unlink(tempSvgPath).catch(() => {});
198
+
199
+ if (convertResult.code !== 0) {
200
+ throw new Error('PNG export requires macOS sips. Use --output <name>.svg or install a converter.');
201
+ }
202
+
203
+ return outputPath;
204
+ }
205
+
206
+ throw new Error('Unsupported output extension. Use .svg or .png');
207
+ }
@@ -0,0 +1,238 @@
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { CONFIG_DIR } from './config.js';
4
+ import {
5
+ buildInternalBackendCommand,
6
+ DEFAULT_AGENT_PROMPT,
7
+ resolveAgentMode,
8
+ } from './internal-backend.js';
9
+ import { runCommand } from './shell.js';
10
+
11
+ function parseMetric(output, metricRegex) {
12
+ const regex = new RegExp(metricRegex, 'm');
13
+ const match = output.match(regex);
14
+ if (!match || !match[1]) return null;
15
+ const metric = Number(match[1]);
16
+ return Number.isFinite(metric) ? metric : null;
17
+ }
18
+
19
+ function isBetter(metric, best, direction) {
20
+ if (best == null) return true;
21
+ return direction === 'min' ? metric < best : metric > best;
22
+ }
23
+
24
+ async function appendRunLog(cwd, runId, entry) {
25
+ const runsDir = path.join(cwd, CONFIG_DIR, 'runs');
26
+ const runFile = path.join(runsDir, `${runId}.jsonl`);
27
+ await fs.mkdir(runsDir, { recursive: true });
28
+ await fs.appendFile(runFile, `${JSON.stringify(entry)}\n`, 'utf8');
29
+ }
30
+
31
+ async function getGitCommit(cwd) {
32
+ const result = await runCommand('git rev-parse --short HEAD', { cwd });
33
+ return result.code === 0 ? result.stdout.trim() : null;
34
+ }
35
+
36
+ async function hasGitChanges(cwd) {
37
+ const result = await runCommand('git status --porcelain', { cwd });
38
+ return result.code === 0 && result.stdout.trim().length > 0;
39
+ }
40
+
41
+ function getAgentStepCommand(merged, cwd, iteration, runId, agentPrompt) {
42
+ const agentMode = resolveAgentMode(merged);
43
+
44
+ if (agentMode === 'internal') {
45
+ return {
46
+ agentMode,
47
+ command: buildInternalBackendCommand({
48
+ cwd,
49
+ iteration,
50
+ runId,
51
+ agentPrompt,
52
+ backendAgent: merged.backendAgent,
53
+ backendModel: merged.backendModel,
54
+ backendMaxIterations: merged.backendMaxIterations,
55
+ }),
56
+ };
57
+ }
58
+
59
+ return {
60
+ agentMode,
61
+ command: merged.agentCommand,
62
+ };
63
+ }
64
+
65
+ async function resolveAgentPrompt(merged, cwd) {
66
+ if (merged.agentPromptFile) {
67
+ const promptPath = path.resolve(cwd, merged.agentPromptFile);
68
+ try {
69
+ const raw = await fs.readFile(promptPath, 'utf8');
70
+ const prompt = raw.trim();
71
+ if (prompt) {
72
+ return { prompt, source: merged.agentPromptFile };
73
+ }
74
+ } catch {
75
+ // Fall back to inline prompt when file does not exist.
76
+ }
77
+ }
78
+
79
+ return {
80
+ prompt: merged.agentPrompt || DEFAULT_AGENT_PROMPT,
81
+ source: 'config.agentPrompt',
82
+ };
83
+ }
84
+
85
+ export async function runResearchLoop(config, cliOverrides = {}) {
86
+ const cwd = process.cwd();
87
+ const merged = {
88
+ ...config,
89
+ ...cliOverrides,
90
+ };
91
+
92
+ const required = ['benchmarkCommand', 'metricRegex'];
93
+ for (const key of required) {
94
+ if (!merged[key]) {
95
+ throw new Error(`Missing required config: ${key}`);
96
+ }
97
+ }
98
+
99
+ const agentMode = resolveAgentMode(merged);
100
+ if (agentMode === 'command' && !merged.agentCommand) {
101
+ throw new Error('Missing required config: agentCommand (required when agentMode="command")');
102
+ }
103
+
104
+ const direction = merged.direction === 'min' ? 'min' : 'max';
105
+ const iterations = Number(merged.iterations ?? 20);
106
+ const runId = new Date().toISOString().replace(/[:.]/g, '-');
107
+ const resolvedPrompt = await resolveAgentPrompt(merged, cwd);
108
+
109
+ let bestMetric = null;
110
+ let bestIteration = 0;
111
+
112
+ console.log(`Starting run ${runId}`);
113
+ console.log(`Agent mode: ${agentMode}`);
114
+ if (agentMode === 'internal') {
115
+ console.log(`Agent objective source: ${resolvedPrompt.source}`);
116
+ console.log(`Agent objective: ${resolvedPrompt.prompt}`);
117
+ if (merged.backendAgent) {
118
+ console.log(`Backend agent: ${merged.backendAgent}`);
119
+ }
120
+ if (merged.backendModel) {
121
+ console.log(`Backend model: ${merged.backendModel}`);
122
+ }
123
+ } else {
124
+ console.log(`Agent command: ${merged.agentCommand}`);
125
+ }
126
+ console.log(`Benchmark command: ${merged.benchmarkCommand}`);
127
+ console.log(`Direction: ${direction} (${direction === 'min' ? 'lower is better' : 'higher is better'})`);
128
+ console.log(`Iterations: ${iterations}`);
129
+
130
+ for (let i = 1; i <= iterations; i++) {
131
+ console.log(`\n--- Iteration ${i}/${iterations} ---`);
132
+ const beforeCommit = await getGitCommit(cwd);
133
+
134
+ const agentStep = getAgentStepCommand(merged, cwd, i, runId, resolvedPrompt.prompt);
135
+
136
+ const agentResult = await runCommand(agentStep.command, {
137
+ cwd,
138
+ stream: merged.streamAgentOutput === true,
139
+ env: { AR_ITERATION: String(i), AR_RUN_ID: runId },
140
+ });
141
+
142
+ if (agentResult.code !== 0) {
143
+ console.log(`Agent step failed with code ${agentResult.code}`);
144
+ if (merged.stopOnAgentFailure !== false) {
145
+ await appendRunLog(cwd, runId, {
146
+ iteration: i,
147
+ status: 'agent_failed',
148
+ agentMode: agentStep.agentMode,
149
+ agentExitCode: agentResult.code,
150
+ beforeCommit,
151
+ timestamp: new Date().toISOString(),
152
+ });
153
+ break;
154
+ }
155
+ }
156
+
157
+ const benchmarkResult = await runCommand(merged.benchmarkCommand, { cwd });
158
+ const benchmarkOutput = `${benchmarkResult.stdout}\n${benchmarkResult.stderr}`;
159
+ const metric = parseMetric(benchmarkOutput, merged.metricRegex);
160
+
161
+ if (benchmarkResult.code !== 0 || metric == null) {
162
+ console.log('Benchmark failed or metric could not be parsed.');
163
+ if (benchmarkResult.stdout) console.log(benchmarkResult.stdout.trim());
164
+ if (benchmarkResult.stderr) console.log(benchmarkResult.stderr.trim());
165
+
166
+ if (merged.onRejectCommand) {
167
+ await runCommand(merged.onRejectCommand, { cwd, stream: true });
168
+ }
169
+
170
+ await appendRunLog(cwd, runId, {
171
+ iteration: i,
172
+ status: 'benchmark_failed',
173
+ benchmarkExitCode: benchmarkResult.code,
174
+ parsedMetric: metric,
175
+ beforeCommit,
176
+ timestamp: new Date().toISOString(),
177
+ });
178
+ continue;
179
+ }
180
+
181
+ const improved = isBetter(metric, bestMetric, direction);
182
+ console.log(`Metric: ${metric}${bestMetric == null ? ' (baseline)' : ` | best: ${bestMetric}`}`);
183
+
184
+ if (improved) {
185
+ bestMetric = metric;
186
+ bestIteration = i;
187
+ console.log('Result: improved -> keep');
188
+
189
+ if (merged.autoCommit === true && (await hasGitChanges(cwd))) {
190
+ const commitMessage = merged.commitMessageTemplate
191
+ ? merged.commitMessageTemplate
192
+ .replaceAll('{iteration}', String(i))
193
+ .replaceAll('{metric}', String(metric))
194
+ : `research: improve metric to ${metric} (iter ${i})`;
195
+
196
+ await runCommand('git add -A', { cwd });
197
+ const commitResult = await runCommand(`git commit -m "${commitMessage.replaceAll('"', '\\"')}"`, {
198
+ cwd,
199
+ });
200
+ if (commitResult.code === 0) {
201
+ console.log(`Committed: ${commitMessage}`);
202
+ } else {
203
+ console.log('Commit skipped (possibly no staged changes).');
204
+ }
205
+ }
206
+
207
+ if (merged.onKeepCommand) {
208
+ await runCommand(merged.onKeepCommand, { cwd, stream: true });
209
+ }
210
+ } else {
211
+ console.log('Result: not improved -> reject');
212
+ if (merged.onRejectCommand) {
213
+ await runCommand(merged.onRejectCommand, { cwd, stream: true });
214
+ }
215
+ }
216
+
217
+ await appendRunLog(cwd, runId, {
218
+ iteration: i,
219
+ status: improved ? 'keep' : 'reject',
220
+ agentMode: agentStep.agentMode,
221
+ metric,
222
+ bestMetric,
223
+ beforeCommit,
224
+ afterCommit: await getGitCommit(cwd),
225
+ timestamp: new Date().toISOString(),
226
+ });
227
+ }
228
+
229
+ console.log('\n=== Run Summary ===');
230
+ if (bestMetric == null) {
231
+ console.log('No valid metric was recorded.');
232
+ } else {
233
+ console.log(`Best metric: ${bestMetric}`);
234
+ console.log(`Best iteration: ${bestIteration}`);
235
+ }
236
+
237
+ console.log(`Run logs: ${CONFIG_DIR}/runs/${runId}.jsonl`);
238
+ }
package/src/shell.js ADDED
@@ -0,0 +1,35 @@
1
+ import { spawn } from 'node:child_process';
2
+
3
+ export async function runCommand(command, options = {}) {
4
+ const cwd = options.cwd || process.cwd();
5
+ const env = { ...process.env, ...(options.env || {}) };
6
+
7
+ return new Promise((resolve) => {
8
+ const child = spawn(command, {
9
+ cwd,
10
+ env,
11
+ shell: true,
12
+ stdio: options.stream ? 'inherit' : 'pipe',
13
+ });
14
+
15
+ let stdout = '';
16
+ let stderr = '';
17
+
18
+ if (!options.stream) {
19
+ child.stdout?.on('data', (chunk) => {
20
+ stdout += chunk.toString();
21
+ });
22
+ child.stderr?.on('data', (chunk) => {
23
+ stderr += chunk.toString();
24
+ });
25
+ }
26
+
27
+ child.on('close', (code) => {
28
+ resolve({
29
+ code: code ?? 1,
30
+ stdout,
31
+ stderr,
32
+ });
33
+ });
34
+ });
35
+ }