autoresearcher 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/package.json +2 -2
- package/src/internal-backend.js +1 -0
- package/src/run-loop.js +153 -5
package/README.md
CHANGED
|
@@ -55,11 +55,11 @@ The `init` command creates `.autoresearcher/config.json`:
|
|
|
55
55
|
```
|
|
56
56
|
|
|
57
57
|
`agentMode: "internal"` is the default. For a fully custom step command, set `agentMode` to `"command"` and edit `agentCommand`.
|
|
58
|
-
In internal mode, backend output is streamed through a white-labeled relay so users only see `autoresearcher` logs.
|
|
58
|
+
In internal mode, backend output is streamed through a white-labeled, status-focused relay so users only see clean `autoresearcher` loop logs.
|
|
59
59
|
|
|
60
60
|
## Example Configs
|
|
61
61
|
|
|
62
|
-
Default internal
|
|
62
|
+
Default internal mode:
|
|
63
63
|
|
|
64
64
|
```json
|
|
65
65
|
{
|
|
@@ -73,7 +73,8 @@ Default internal headless mode:
|
|
|
73
73
|
"metricRegex": "score=([0-9.]+)",
|
|
74
74
|
"direction": "max",
|
|
75
75
|
"iterations": 40,
|
|
76
|
-
"autoCommit": false
|
|
76
|
+
"autoCommit": false,
|
|
77
|
+
"streamAgentOutput": true
|
|
77
78
|
}
|
|
78
79
|
```
|
|
79
80
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "autoresearcher",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"description": "Benchmark-driven autonomous research CLI for post-quantum and blockchain R&D",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -32,6 +32,6 @@
|
|
|
32
32
|
"homepage": "https://autoresearcher.multivmlabs.com",
|
|
33
33
|
"license": "MIT",
|
|
34
34
|
"dependencies": {
|
|
35
|
-
"ralph-starter": "^0.4.
|
|
35
|
+
"ralph-starter": "^0.4.5"
|
|
36
36
|
}
|
|
37
37
|
}
|
package/src/internal-backend.js
CHANGED
|
@@ -58,6 +58,7 @@ export function buildInternalBackendCommand({
|
|
|
58
58
|
command += ' --auto';
|
|
59
59
|
command += ` --max-iterations ${safeMaxIterations}`;
|
|
60
60
|
command += ` --output-dir ${shellQuote(cwd)}`;
|
|
61
|
+
command += ' --headless --no-auto-skills';
|
|
61
62
|
command += ' --no-track-progress --no-track-cost';
|
|
62
63
|
|
|
63
64
|
if (backendAgent) {
|
package/src/run-loop.js
CHANGED
|
@@ -23,6 +23,98 @@ function isBetter(metric, best, direction) {
|
|
|
23
23
|
|
|
24
24
|
const ANSI_ESCAPE_REGEX = /\u001b\[[0-9;]*m/g;
|
|
25
25
|
const BOX_DRAWING_ONLY_REGEX = /^[\s╭╮╰╯│─┌┐└┘═║╔╗╚╝]+$/;
|
|
26
|
+
const INTERNAL_OUTPUT_SUPPRESS_PATTERNS = [
|
|
27
|
+
/\b(?:auto[- ]?)?skills?\b/i,
|
|
28
|
+
/\bdownload(?:ing|ed)?\b/i,
|
|
29
|
+
/\binstall(?:ing|ed)?\b/i,
|
|
30
|
+
/\bskill catalog\b/i,
|
|
31
|
+
/\bskill registry\b/i,
|
|
32
|
+
];
|
|
33
|
+
const INTERNAL_OUTPUT_STATUS_PATTERNS = [
|
|
34
|
+
/checking agent/i,
|
|
35
|
+
/agent (?:detected|selected|mode)/i,
|
|
36
|
+
/loop\s+\d+/i,
|
|
37
|
+
/iteration\s+\d+/i,
|
|
38
|
+
/planning/i,
|
|
39
|
+
/writing code/i,
|
|
40
|
+
/validat(?:e|ing|ion)/i,
|
|
41
|
+
/\btests?\b/i,
|
|
42
|
+
/\blint\b/i,
|
|
43
|
+
/\bbuild\b/i,
|
|
44
|
+
/\bbenchmark\b/i,
|
|
45
|
+
/\bcomplete(?:d)?\b/i,
|
|
46
|
+
/\bdone\b/i,
|
|
47
|
+
/\bcommit(?:ted)?\b/i,
|
|
48
|
+
/\berror\b/i,
|
|
49
|
+
/\bfailed\b/i,
|
|
50
|
+
/\bwarning\b/i,
|
|
51
|
+
/\bstopping\b/i,
|
|
52
|
+
/\bcircuit breaker\b/i,
|
|
53
|
+
];
|
|
54
|
+
const UNSUPPORTED_RALPH_FLAG_PATTERN = /unknown option\s+['"]--(?:headless|no-auto-skills)['"]/i;
|
|
55
|
+
|
|
56
|
+
function stripRalphHeadlessFlags(command) {
|
|
57
|
+
return command.replace(/\s--headless\b/g, '').replace(/\s--no-auto-skills\b/g, '');
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function isLikelyGlyphNoise(text) {
|
|
61
|
+
const alnumCount = (text.match(/[A-Za-z0-9]/g) || []).length;
|
|
62
|
+
const nonAsciiCount = (text.match(/[^\x20-\x7E]/g) || []).length;
|
|
63
|
+
|
|
64
|
+
if (alnumCount === 0) {
|
|
65
|
+
return true;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return nonAsciiCount > alnumCount;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function truncateText(value, max = 500) {
|
|
72
|
+
if (!value) return '';
|
|
73
|
+
return value.length <= max ? value : `${value.slice(0, max)}...`;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function buildBenchmarkFeedback({
|
|
77
|
+
iteration,
|
|
78
|
+
benchmarkCommand,
|
|
79
|
+
metricRegex,
|
|
80
|
+
direction,
|
|
81
|
+
metric,
|
|
82
|
+
bestMetric,
|
|
83
|
+
improved,
|
|
84
|
+
benchmarkFailure,
|
|
85
|
+
benchmarkOutput,
|
|
86
|
+
}) {
|
|
87
|
+
if (benchmarkFailure) {
|
|
88
|
+
return [
|
|
89
|
+
`Iteration ${iteration} benchmark status: failed to execute or parse metric.`,
|
|
90
|
+
`Benchmark command: ${benchmarkCommand}`,
|
|
91
|
+
`Metric regex: ${metricRegex}`,
|
|
92
|
+
`Benchmark output excerpt: ${truncateText(benchmarkOutput.replace(/\s+/g, ' ').trim(), 300)}`,
|
|
93
|
+
'Before optimizing further, make sure benchmark execution and metric extraction are stable.',
|
|
94
|
+
].join('\n');
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const optimizationHint =
|
|
98
|
+
direction === 'min' ? 'Lower metric values are better.' : 'Higher metric values are better.';
|
|
99
|
+
|
|
100
|
+
if (improved) {
|
|
101
|
+
return [
|
|
102
|
+
`Iteration ${iteration} benchmark status: improved.`,
|
|
103
|
+
`Current metric: ${metric}`,
|
|
104
|
+
`Best metric so far: ${bestMetric}`,
|
|
105
|
+
optimizationHint,
|
|
106
|
+
'Continue in the same direction with another focused optimization.',
|
|
107
|
+
].join('\n');
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return [
|
|
111
|
+
`Iteration ${iteration} benchmark status: not improved.`,
|
|
112
|
+
`Current metric: ${metric}`,
|
|
113
|
+
`Best metric so far: ${bestMetric}`,
|
|
114
|
+
optimizationHint,
|
|
115
|
+
'Try a different approach and avoid repeating the same change pattern.',
|
|
116
|
+
].join('\n');
|
|
117
|
+
}
|
|
26
118
|
|
|
27
119
|
function createChunkLineRelay(onLine) {
|
|
28
120
|
let buffer = '';
|
|
@@ -45,13 +137,23 @@ function createChunkLineRelay(onLine) {
|
|
|
45
137
|
};
|
|
46
138
|
}
|
|
47
139
|
|
|
48
|
-
function normalizeInternalBackendLine(line) {
|
|
140
|
+
function normalizeInternalBackendLine(line, channel = 'stdout') {
|
|
49
141
|
const withoutAnsi = line.replace(ANSI_ESCAPE_REGEX, '').replaceAll('\r', '');
|
|
50
142
|
const cleaned = withoutAnsi.trimEnd();
|
|
51
143
|
const compact = cleaned.trim();
|
|
52
144
|
|
|
53
145
|
if (!compact) return null;
|
|
54
146
|
if (BOX_DRAWING_ONLY_REGEX.test(compact)) return null;
|
|
147
|
+
if (isLikelyGlyphNoise(compact)) return null;
|
|
148
|
+
|
|
149
|
+
if (INTERNAL_OUTPUT_SUPPRESS_PATTERNS.some((pattern) => pattern.test(compact))) {
|
|
150
|
+
return null;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const hasStatusSignal = INTERNAL_OUTPUT_STATUS_PATTERNS.some((pattern) => pattern.test(compact));
|
|
154
|
+
if (!hasStatusSignal && channel !== 'stderr') {
|
|
155
|
+
return null;
|
|
156
|
+
}
|
|
55
157
|
|
|
56
158
|
const lowered = compact.toLowerCase();
|
|
57
159
|
if (lowered === 'ralph-starter') return null;
|
|
@@ -148,6 +250,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
148
250
|
const iterations = Number(merged.iterations ?? 20);
|
|
149
251
|
const runId = new Date().toISOString().replace(/[:.]/g, '-');
|
|
150
252
|
const resolvedPrompt = await resolveAgentPrompt(merged, cwd);
|
|
253
|
+
let benchmarkFeedback = '';
|
|
151
254
|
|
|
152
255
|
let bestMetric = null;
|
|
153
256
|
let bestIteration = 0;
|
|
@@ -174,7 +277,11 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
174
277
|
console.log(`\n--- Iteration ${i}/${iterations} ---`);
|
|
175
278
|
const beforeCommit = await getGitCommit(cwd);
|
|
176
279
|
|
|
177
|
-
const
|
|
280
|
+
const iterationAgentPrompt = benchmarkFeedback
|
|
281
|
+
? `${resolvedPrompt.prompt}\n\n## Benchmark Feedback From Previous Iteration\n${benchmarkFeedback}`
|
|
282
|
+
: resolvedPrompt.prompt;
|
|
283
|
+
|
|
284
|
+
const agentStep = getAgentStepCommand(merged, cwd, i, runId, iterationAgentPrompt);
|
|
178
285
|
const shouldStreamRawCommandOutput =
|
|
179
286
|
agentStep.agentMode === 'command' && merged.streamAgentOutput === true;
|
|
180
287
|
const shouldStreamWhiteLabeledInternalOutput =
|
|
@@ -190,7 +297,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
190
297
|
|
|
191
298
|
const internalStdoutRelay = shouldStreamWhiteLabeledInternalOutput
|
|
192
299
|
? createChunkLineRelay((line) => {
|
|
193
|
-
const normalized = normalizeInternalBackendLine(line);
|
|
300
|
+
const normalized = normalizeInternalBackendLine(line, 'stdout');
|
|
194
301
|
if (normalized) {
|
|
195
302
|
console.log(` [agent] ${normalized}`);
|
|
196
303
|
}
|
|
@@ -198,14 +305,15 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
198
305
|
: null;
|
|
199
306
|
const internalStderrRelay = shouldStreamWhiteLabeledInternalOutput
|
|
200
307
|
? createChunkLineRelay((line) => {
|
|
201
|
-
const normalized = normalizeInternalBackendLine(line);
|
|
308
|
+
const normalized = normalizeInternalBackendLine(line, 'stderr');
|
|
202
309
|
if (normalized) {
|
|
203
310
|
console.log(` [agent:error] ${normalized}`);
|
|
204
311
|
}
|
|
205
312
|
})
|
|
206
313
|
: null;
|
|
207
314
|
|
|
208
|
-
|
|
315
|
+
let executedCommand = agentStep.command;
|
|
316
|
+
let agentResult = await runCommand(executedCommand, {
|
|
209
317
|
cwd,
|
|
210
318
|
stream: shouldStreamRawCommandOutput,
|
|
211
319
|
onStdout: internalStdoutRelay ? (chunk) => internalStdoutRelay.onChunk(chunk) : undefined,
|
|
@@ -213,6 +321,25 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
213
321
|
env: { AR_ITERATION: String(i), AR_RUN_ID: runId },
|
|
214
322
|
});
|
|
215
323
|
|
|
324
|
+
if (
|
|
325
|
+
agentStep.agentMode === 'internal' &&
|
|
326
|
+
agentResult.code !== 0 &&
|
|
327
|
+
UNSUPPORTED_RALPH_FLAG_PATTERN.test(`${agentResult.stdout}\n${agentResult.stderr}`)
|
|
328
|
+
) {
|
|
329
|
+
const fallbackCommand = stripRalphHeadlessFlags(agentStep.command);
|
|
330
|
+
if (fallbackCommand !== agentStep.command) {
|
|
331
|
+
console.log('Agent step: backend does not support headless flags, retrying with compatibility mode...');
|
|
332
|
+
executedCommand = fallbackCommand;
|
|
333
|
+
agentResult = await runCommand(executedCommand, {
|
|
334
|
+
cwd,
|
|
335
|
+
stream: shouldStreamRawCommandOutput,
|
|
336
|
+
onStdout: internalStdoutRelay ? (chunk) => internalStdoutRelay.onChunk(chunk) : undefined,
|
|
337
|
+
onStderr: internalStderrRelay ? (chunk) => internalStderrRelay.onChunk(chunk) : undefined,
|
|
338
|
+
env: { AR_ITERATION: String(i), AR_RUN_ID: runId },
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
216
343
|
internalStdoutRelay?.flush();
|
|
217
344
|
internalStderrRelay?.flush();
|
|
218
345
|
|
|
@@ -244,6 +371,15 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
244
371
|
if (benchmarkResult.stdout) console.log(benchmarkResult.stdout.trim());
|
|
245
372
|
if (benchmarkResult.stderr) console.log(benchmarkResult.stderr.trim());
|
|
246
373
|
|
|
374
|
+
benchmarkFeedback = buildBenchmarkFeedback({
|
|
375
|
+
iteration: i,
|
|
376
|
+
benchmarkCommand: merged.benchmarkCommand,
|
|
377
|
+
metricRegex: merged.metricRegex,
|
|
378
|
+
direction,
|
|
379
|
+
benchmarkFailure: true,
|
|
380
|
+
benchmarkOutput,
|
|
381
|
+
});
|
|
382
|
+
|
|
247
383
|
if (merged.onRejectCommand) {
|
|
248
384
|
await runCommand(merged.onRejectCommand, { cwd, stream: true });
|
|
249
385
|
}
|
|
@@ -262,6 +398,18 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
262
398
|
const improved = isBetter(metric, bestMetric, direction);
|
|
263
399
|
console.log(`Metric: ${metric}${bestMetric == null ? ' (baseline)' : ` | best: ${bestMetric}`}`);
|
|
264
400
|
|
|
401
|
+
const nextBestMetric = improved ? metric : bestMetric;
|
|
402
|
+
benchmarkFeedback = buildBenchmarkFeedback({
|
|
403
|
+
iteration: i,
|
|
404
|
+
benchmarkCommand: merged.benchmarkCommand,
|
|
405
|
+
metricRegex: merged.metricRegex,
|
|
406
|
+
direction,
|
|
407
|
+
metric,
|
|
408
|
+
bestMetric: nextBestMetric,
|
|
409
|
+
improved,
|
|
410
|
+
benchmarkFailure: false,
|
|
411
|
+
});
|
|
412
|
+
|
|
265
413
|
if (improved) {
|
|
266
414
|
bestMetric = metric;
|
|
267
415
|
bestIteration = i;
|