autoresearcher 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +2 -2
- package/src/cli.js +1 -1
- package/src/internal-backend.js +1 -0
- package/src/run-loop.js +174 -14
package/README.md
CHANGED
|
@@ -55,7 +55,7 @@ The `init` command creates `.autoresearcher/config.json`:
|
|
|
55
55
|
```
|
|
56
56
|
|
|
57
57
|
`agentMode: "internal"` is the default. For a fully custom step command, set `agentMode` to `"command"` and edit `agentCommand`.
|
|
58
|
-
In internal mode, backend output is streamed through a
|
|
58
|
+
In internal mode, backend output is streamed through a status-focused relay so users only see clean `autoresearcher` loop logs.
|
|
59
59
|
|
|
60
60
|
## Example Configs
|
|
61
61
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "autoresearcher",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.4",
|
|
4
4
|
"description": "Benchmark-driven autonomous research CLI for post-quantum and blockchain R&D",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -32,6 +32,6 @@
|
|
|
32
32
|
"homepage": "https://autoresearcher.multivmlabs.com",
|
|
33
33
|
"license": "MIT",
|
|
34
34
|
"dependencies": {
|
|
35
|
-
"ralph-starter": "^0.4.
|
|
35
|
+
"ralph-starter": "^0.4.5"
|
|
36
36
|
}
|
|
37
37
|
}
|
package/src/cli.js
CHANGED
|
@@ -61,7 +61,7 @@ Key config fields:
|
|
|
61
61
|
agentMode "internal" (default) or "command"
|
|
62
62
|
agentPromptFile Markdown objective file (default: program.md)
|
|
63
63
|
agentPrompt Iteration objective for internal headless agent backend
|
|
64
|
-
streamAgentOutput Stream loop output
|
|
64
|
+
streamAgentOutput Stream loop output from the internal backend relay
|
|
65
65
|
backendAgent Optional backend agent override (amp/codex/claude-code/...)
|
|
66
66
|
backendModel Optional backend model override (provider-specific)
|
|
67
67
|
agentCommand Shell command when agentMode is "command"
|
package/src/internal-backend.js
CHANGED
|
@@ -58,6 +58,7 @@ export function buildInternalBackendCommand({
|
|
|
58
58
|
command += ' --auto';
|
|
59
59
|
command += ` --max-iterations ${safeMaxIterations}`;
|
|
60
60
|
command += ` --output-dir ${shellQuote(cwd)}`;
|
|
61
|
+
command += ' --headless --no-auto-skills';
|
|
61
62
|
command += ' --no-track-progress --no-track-cost';
|
|
62
63
|
|
|
63
64
|
if (backendAgent) {
|
package/src/run-loop.js
CHANGED
|
@@ -23,6 +23,108 @@ function isBetter(metric, best, direction) {
|
|
|
23
23
|
|
|
24
24
|
const ANSI_ESCAPE_REGEX = /\u001b\[[0-9;]*m/g;
|
|
25
25
|
const BOX_DRAWING_ONLY_REGEX = /^[\s╭╮╰╯│─┌┐└┘═║╔╗╚╝]+$/;
|
|
26
|
+
const INTERNAL_OUTPUT_SUPPRESS_PATTERNS = [
|
|
27
|
+
/\b(?:auto[- ]?)?skills?\b/i,
|
|
28
|
+
/\bdownload(?:ing|ed)?\b/i,
|
|
29
|
+
/\binstall(?:ing|ed)?\b/i,
|
|
30
|
+
/\bskill catalog\b/i,
|
|
31
|
+
/\bskill registry\b/i,
|
|
32
|
+
];
|
|
33
|
+
const INTERNAL_OUTPUT_STATUS_PATTERNS = [
|
|
34
|
+
/checking agent/i,
|
|
35
|
+
/agent (?:detected|selected|mode)/i,
|
|
36
|
+
/loop\s+\d+/i,
|
|
37
|
+
/iteration\s+\d+/i,
|
|
38
|
+
/planning/i,
|
|
39
|
+
/writing code/i,
|
|
40
|
+
/validat(?:e|ing|ion)/i,
|
|
41
|
+
/\btests?\b/i,
|
|
42
|
+
/\blint\b/i,
|
|
43
|
+
/\bbuild\b/i,
|
|
44
|
+
/\bbenchmark\b/i,
|
|
45
|
+
/\bcomplete(?:d)?\b/i,
|
|
46
|
+
/\bdone\b/i,
|
|
47
|
+
/\bcommit(?:ted)?\b/i,
|
|
48
|
+
/\berror\b/i,
|
|
49
|
+
/\bfailed\b/i,
|
|
50
|
+
/\bwarning\b/i,
|
|
51
|
+
/\bstopping\b/i,
|
|
52
|
+
/\bcircuit breaker\b/i,
|
|
53
|
+
];
|
|
54
|
+
const UNSUPPORTED_RALPH_FLAG_PATTERN = /unknown option\s+['"]--(?:headless|no-auto-skills)['"]/i;
|
|
55
|
+
const NON_FATAL_MAX_ITERATIONS_PATTERN = /(?:\berror:\s*)?max_iterations\b/i;
|
|
56
|
+
|
|
57
|
+
function stripRalphHeadlessFlags(command) {
|
|
58
|
+
return command.replace(/\s--headless\b/g, '').replace(/\s--no-auto-skills\b/g, '');
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function isNonFatalInternalMaxIterations(agentMode, result) {
|
|
62
|
+
if (agentMode !== 'internal' || result.code === 0) {
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const combined = `${result.stdout}\n${result.stderr}`;
|
|
67
|
+
return NON_FATAL_MAX_ITERATIONS_PATTERN.test(combined);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function isLikelyGlyphNoise(text) {
|
|
71
|
+
const alnumCount = (text.match(/[A-Za-z0-9]/g) || []).length;
|
|
72
|
+
const nonAsciiCount = (text.match(/[^\x20-\x7E]/g) || []).length;
|
|
73
|
+
|
|
74
|
+
if (alnumCount === 0) {
|
|
75
|
+
return true;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return nonAsciiCount > alnumCount;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function truncateText(value, max = 500) {
|
|
82
|
+
if (!value) return '';
|
|
83
|
+
return value.length <= max ? value : `${value.slice(0, max)}...`;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function buildBenchmarkFeedback({
|
|
87
|
+
iteration,
|
|
88
|
+
benchmarkCommand,
|
|
89
|
+
metricRegex,
|
|
90
|
+
direction,
|
|
91
|
+
metric,
|
|
92
|
+
bestMetric,
|
|
93
|
+
improved,
|
|
94
|
+
benchmarkFailure,
|
|
95
|
+
benchmarkOutput,
|
|
96
|
+
}) {
|
|
97
|
+
if (benchmarkFailure) {
|
|
98
|
+
return [
|
|
99
|
+
`Iteration ${iteration} benchmark status: failed to execute or parse metric.`,
|
|
100
|
+
`Benchmark command: ${benchmarkCommand}`,
|
|
101
|
+
`Metric regex: ${metricRegex}`,
|
|
102
|
+
`Benchmark output excerpt: ${truncateText(benchmarkOutput.replace(/\s+/g, ' ').trim(), 300)}`,
|
|
103
|
+
'Before optimizing further, make sure benchmark execution and metric extraction are stable.',
|
|
104
|
+
].join('\n');
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const optimizationHint =
|
|
108
|
+
direction === 'min' ? 'Lower metric values are better.' : 'Higher metric values are better.';
|
|
109
|
+
|
|
110
|
+
if (improved) {
|
|
111
|
+
return [
|
|
112
|
+
`Iteration ${iteration} benchmark status: improved.`,
|
|
113
|
+
`Current metric: ${metric}`,
|
|
114
|
+
`Best metric so far: ${bestMetric}`,
|
|
115
|
+
optimizationHint,
|
|
116
|
+
'Continue in the same direction with another focused optimization.',
|
|
117
|
+
].join('\n');
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return [
|
|
121
|
+
`Iteration ${iteration} benchmark status: not improved.`,
|
|
122
|
+
`Current metric: ${metric}`,
|
|
123
|
+
`Best metric so far: ${bestMetric}`,
|
|
124
|
+
optimizationHint,
|
|
125
|
+
'Try a different approach and avoid repeating the same change pattern.',
|
|
126
|
+
].join('\n');
|
|
127
|
+
}
|
|
26
128
|
|
|
27
129
|
function createChunkLineRelay(onLine) {
|
|
28
130
|
let buffer = '';
|
|
@@ -45,13 +147,23 @@ function createChunkLineRelay(onLine) {
|
|
|
45
147
|
};
|
|
46
148
|
}
|
|
47
149
|
|
|
48
|
-
function normalizeInternalBackendLine(line) {
|
|
150
|
+
function normalizeInternalBackendLine(line, channel = 'stdout') {
|
|
49
151
|
const withoutAnsi = line.replace(ANSI_ESCAPE_REGEX, '').replaceAll('\r', '');
|
|
50
152
|
const cleaned = withoutAnsi.trimEnd();
|
|
51
153
|
const compact = cleaned.trim();
|
|
52
154
|
|
|
53
155
|
if (!compact) return null;
|
|
54
156
|
if (BOX_DRAWING_ONLY_REGEX.test(compact)) return null;
|
|
157
|
+
if (isLikelyGlyphNoise(compact)) return null;
|
|
158
|
+
|
|
159
|
+
if (INTERNAL_OUTPUT_SUPPRESS_PATTERNS.some((pattern) => pattern.test(compact))) {
|
|
160
|
+
return null;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const hasStatusSignal = INTERNAL_OUTPUT_STATUS_PATTERNS.some((pattern) => pattern.test(compact));
|
|
164
|
+
if (!hasStatusSignal && channel !== 'stderr') {
|
|
165
|
+
return null;
|
|
166
|
+
}
|
|
55
167
|
|
|
56
168
|
const lowered = compact.toLowerCase();
|
|
57
169
|
if (lowered === 'ralph-starter') return null;
|
|
@@ -148,6 +260,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
148
260
|
const iterations = Number(merged.iterations ?? 20);
|
|
149
261
|
const runId = new Date().toISOString().replace(/[:.]/g, '-');
|
|
150
262
|
const resolvedPrompt = await resolveAgentPrompt(merged, cwd);
|
|
263
|
+
let benchmarkFeedback = '';
|
|
151
264
|
|
|
152
265
|
let bestMetric = null;
|
|
153
266
|
let bestIteration = 0;
|
|
@@ -174,38 +287,39 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
174
287
|
console.log(`\n--- Iteration ${i}/${iterations} ---`);
|
|
175
288
|
const beforeCommit = await getGitCommit(cwd);
|
|
176
289
|
|
|
177
|
-
const
|
|
290
|
+
const iterationAgentPrompt = benchmarkFeedback
|
|
291
|
+
? `${resolvedPrompt.prompt}\n\n## Benchmark Feedback From Previous Iteration\n${benchmarkFeedback}`
|
|
292
|
+
: resolvedPrompt.prompt;
|
|
293
|
+
|
|
294
|
+
const agentStep = getAgentStepCommand(merged, cwd, i, runId, iterationAgentPrompt);
|
|
178
295
|
const shouldStreamRawCommandOutput =
|
|
179
296
|
agentStep.agentMode === 'command' && merged.streamAgentOutput === true;
|
|
180
|
-
const
|
|
297
|
+
const shouldStreamInternalOutput =
|
|
181
298
|
agentStep.agentMode === 'internal' && merged.streamAgentOutput === true;
|
|
182
299
|
|
|
183
300
|
if (agentStep.agentMode === 'internal') {
|
|
184
|
-
|
|
185
|
-
console.log('Agent step: running (white-labeled stream)...');
|
|
186
|
-
} else {
|
|
187
|
-
console.log('Agent step: running...');
|
|
188
|
-
}
|
|
301
|
+
console.log('Agent step: running...');
|
|
189
302
|
}
|
|
190
303
|
|
|
191
|
-
const internalStdoutRelay =
|
|
304
|
+
const internalStdoutRelay = shouldStreamInternalOutput
|
|
192
305
|
? createChunkLineRelay((line) => {
|
|
193
|
-
const normalized = normalizeInternalBackendLine(line);
|
|
306
|
+
const normalized = normalizeInternalBackendLine(line, 'stdout');
|
|
194
307
|
if (normalized) {
|
|
195
308
|
console.log(` [agent] ${normalized}`);
|
|
196
309
|
}
|
|
197
310
|
})
|
|
198
311
|
: null;
|
|
199
|
-
const internalStderrRelay =
|
|
312
|
+
const internalStderrRelay = shouldStreamInternalOutput
|
|
200
313
|
? createChunkLineRelay((line) => {
|
|
201
|
-
const normalized = normalizeInternalBackendLine(line);
|
|
314
|
+
const normalized = normalizeInternalBackendLine(line, 'stderr');
|
|
202
315
|
if (normalized) {
|
|
203
316
|
console.log(` [agent:error] ${normalized}`);
|
|
204
317
|
}
|
|
205
318
|
})
|
|
206
319
|
: null;
|
|
207
320
|
|
|
208
|
-
|
|
321
|
+
let executedCommand = agentStep.command;
|
|
322
|
+
let agentResult = await runCommand(executedCommand, {
|
|
209
323
|
cwd,
|
|
210
324
|
stream: shouldStreamRawCommandOutput,
|
|
211
325
|
onStdout: internalStdoutRelay ? (chunk) => internalStdoutRelay.onChunk(chunk) : undefined,
|
|
@@ -213,6 +327,25 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
213
327
|
env: { AR_ITERATION: String(i), AR_RUN_ID: runId },
|
|
214
328
|
});
|
|
215
329
|
|
|
330
|
+
if (
|
|
331
|
+
agentStep.agentMode === 'internal' &&
|
|
332
|
+
agentResult.code !== 0 &&
|
|
333
|
+
UNSUPPORTED_RALPH_FLAG_PATTERN.test(`${agentResult.stdout}\n${agentResult.stderr}`)
|
|
334
|
+
) {
|
|
335
|
+
const fallbackCommand = stripRalphHeadlessFlags(agentStep.command);
|
|
336
|
+
if (fallbackCommand !== agentStep.command) {
|
|
337
|
+
console.log('Agent step: backend does not support headless flags, retrying with compatibility mode...');
|
|
338
|
+
executedCommand = fallbackCommand;
|
|
339
|
+
agentResult = await runCommand(executedCommand, {
|
|
340
|
+
cwd,
|
|
341
|
+
stream: shouldStreamRawCommandOutput,
|
|
342
|
+
onStdout: internalStdoutRelay ? (chunk) => internalStdoutRelay.onChunk(chunk) : undefined,
|
|
343
|
+
onStderr: internalStderrRelay ? (chunk) => internalStderrRelay.onChunk(chunk) : undefined,
|
|
344
|
+
env: { AR_ITERATION: String(i), AR_RUN_ID: runId },
|
|
345
|
+
});
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
|
|
216
349
|
internalStdoutRelay?.flush();
|
|
217
350
|
internalStderrRelay?.flush();
|
|
218
351
|
|
|
@@ -220,7 +353,13 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
220
353
|
console.log('Agent step: complete');
|
|
221
354
|
}
|
|
222
355
|
|
|
223
|
-
|
|
356
|
+
const nonFatalInternalMaxIterations = isNonFatalInternalMaxIterations(agentStep.agentMode, agentResult);
|
|
357
|
+
|
|
358
|
+
if (nonFatalInternalMaxIterations) {
|
|
359
|
+
console.log('Agent step reached backend max iterations; continuing to benchmark...');
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
if (agentResult.code !== 0 && !nonFatalInternalMaxIterations) {
|
|
224
363
|
console.log(`Agent step failed with code ${agentResult.code}`);
|
|
225
364
|
if (merged.stopOnAgentFailure !== false) {
|
|
226
365
|
await appendRunLog(cwd, runId, {
|
|
@@ -244,6 +383,15 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
244
383
|
if (benchmarkResult.stdout) console.log(benchmarkResult.stdout.trim());
|
|
245
384
|
if (benchmarkResult.stderr) console.log(benchmarkResult.stderr.trim());
|
|
246
385
|
|
|
386
|
+
benchmarkFeedback = buildBenchmarkFeedback({
|
|
387
|
+
iteration: i,
|
|
388
|
+
benchmarkCommand: merged.benchmarkCommand,
|
|
389
|
+
metricRegex: merged.metricRegex,
|
|
390
|
+
direction,
|
|
391
|
+
benchmarkFailure: true,
|
|
392
|
+
benchmarkOutput,
|
|
393
|
+
});
|
|
394
|
+
|
|
247
395
|
if (merged.onRejectCommand) {
|
|
248
396
|
await runCommand(merged.onRejectCommand, { cwd, stream: true });
|
|
249
397
|
}
|
|
@@ -262,6 +410,18 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
262
410
|
const improved = isBetter(metric, bestMetric, direction);
|
|
263
411
|
console.log(`Metric: ${metric}${bestMetric == null ? ' (baseline)' : ` | best: ${bestMetric}`}`);
|
|
264
412
|
|
|
413
|
+
const nextBestMetric = improved ? metric : bestMetric;
|
|
414
|
+
benchmarkFeedback = buildBenchmarkFeedback({
|
|
415
|
+
iteration: i,
|
|
416
|
+
benchmarkCommand: merged.benchmarkCommand,
|
|
417
|
+
metricRegex: merged.metricRegex,
|
|
418
|
+
direction,
|
|
419
|
+
metric,
|
|
420
|
+
bestMetric: nextBestMetric,
|
|
421
|
+
improved,
|
|
422
|
+
benchmarkFailure: false,
|
|
423
|
+
});
|
|
424
|
+
|
|
265
425
|
if (improved) {
|
|
266
426
|
bestMetric = metric;
|
|
267
427
|
bestIteration = i;
|