autoresearcher 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -55,7 +55,7 @@ The `init` command creates `.autoresearcher/config.json`:
55
55
  ```
56
56
 
57
57
  `agentMode: "internal"` is the default. For a fully custom step command, set `agentMode` to `"command"` and edit `agentCommand`.
58
- In internal mode, backend output is streamed through a white-labeled relay so users only see `autoresearcher` logs.
58
+ In internal mode, backend output is streamed through a status-focused relay so users only see clean `autoresearcher` loop logs.
59
59
 
60
60
  ## Example Configs
61
61
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autoresearcher",
3
- "version": "0.1.2",
3
+ "version": "0.1.4",
4
4
  "description": "Benchmark-driven autonomous research CLI for post-quantum and blockchain R&D",
5
5
  "type": "module",
6
6
  "bin": {
@@ -32,6 +32,6 @@
32
32
  "homepage": "https://autoresearcher.multivmlabs.com",
33
33
  "license": "MIT",
34
34
  "dependencies": {
35
- "ralph-starter": "^0.4.4"
35
+ "ralph-starter": "^0.4.5"
36
36
  }
37
37
  }
package/src/cli.js CHANGED
@@ -61,7 +61,7 @@ Key config fields:
61
61
  agentMode "internal" (default) or "command"
62
62
  agentPromptFile Markdown objective file (default: program.md)
63
63
  agentPrompt Iteration objective for internal headless agent backend
64
- streamAgentOutput Stream loop output (internal stream is white-labeled)
64
+ streamAgentOutput Stream loop output from the internal backend relay
65
65
  backendAgent Optional backend agent override (amp/codex/claude-code/...)
66
66
  backendModel Optional backend model override (provider-specific)
67
67
  agentCommand Shell command when agentMode is "command"
@@ -58,6 +58,7 @@ export function buildInternalBackendCommand({
58
58
  command += ' --auto';
59
59
  command += ` --max-iterations ${safeMaxIterations}`;
60
60
  command += ` --output-dir ${shellQuote(cwd)}`;
61
+ command += ' --headless --no-auto-skills';
61
62
  command += ' --no-track-progress --no-track-cost';
62
63
 
63
64
  if (backendAgent) {
package/src/run-loop.js CHANGED
@@ -23,6 +23,108 @@ function isBetter(metric, best, direction) {
23
23
 
24
24
  const ANSI_ESCAPE_REGEX = /\u001b\[[0-9;]*m/g;
25
25
  const BOX_DRAWING_ONLY_REGEX = /^[\s╭╮╰╯│─┌┐└┘═║╔╗╚╝]+$/;
26
+ const INTERNAL_OUTPUT_SUPPRESS_PATTERNS = [
27
+ /\b(?:auto[- ]?)?skills?\b/i,
28
+ /\bdownload(?:ing|ed)?\b/i,
29
+ /\binstall(?:ing|ed)?\b/i,
30
+ /\bskill catalog\b/i,
31
+ /\bskill registry\b/i,
32
+ ];
33
+ const INTERNAL_OUTPUT_STATUS_PATTERNS = [
34
+ /checking agent/i,
35
+ /agent (?:detected|selected|mode)/i,
36
+ /loop\s+\d+/i,
37
+ /iteration\s+\d+/i,
38
+ /planning/i,
39
+ /writing code/i,
40
+ /validat(?:e|ing|ion)/i,
41
+ /\btests?\b/i,
42
+ /\blint\b/i,
43
+ /\bbuild\b/i,
44
+ /\bbenchmark\b/i,
45
+ /\bcomplete(?:d)?\b/i,
46
+ /\bdone\b/i,
47
+ /\bcommit(?:ted)?\b/i,
48
+ /\berror\b/i,
49
+ /\bfailed\b/i,
50
+ /\bwarning\b/i,
51
+ /\bstopping\b/i,
52
+ /\bcircuit breaker\b/i,
53
+ ];
54
+ const UNSUPPORTED_RALPH_FLAG_PATTERN = /unknown option\s+['"]--(?:headless|no-auto-skills)['"]/i;
55
+ const NON_FATAL_MAX_ITERATIONS_PATTERN = /(?:\berror:\s*)?max_iterations\b/i;
56
+
57
+ function stripRalphHeadlessFlags(command) {
58
+ return command.replace(/\s--headless\b/g, '').replace(/\s--no-auto-skills\b/g, '');
59
+ }
60
+
61
+ function isNonFatalInternalMaxIterations(agentMode, result) {
62
+ if (agentMode !== 'internal' || result.code === 0) {
63
+ return false;
64
+ }
65
+
66
+ const combined = `${result.stdout}\n${result.stderr}`;
67
+ return NON_FATAL_MAX_ITERATIONS_PATTERN.test(combined);
68
+ }
69
+
70
+ function isLikelyGlyphNoise(text) {
71
+ const alnumCount = (text.match(/[A-Za-z0-9]/g) || []).length;
72
+ const nonAsciiCount = (text.match(/[^\x20-\x7E]/g) || []).length;
73
+
74
+ if (alnumCount === 0) {
75
+ return true;
76
+ }
77
+
78
+ return nonAsciiCount > alnumCount;
79
+ }
80
+
81
+ function truncateText(value, max = 500) {
82
+ if (!value) return '';
83
+ return value.length <= max ? value : `${value.slice(0, max)}...`;
84
+ }
85
+
86
+ function buildBenchmarkFeedback({
87
+ iteration,
88
+ benchmarkCommand,
89
+ metricRegex,
90
+ direction,
91
+ metric,
92
+ bestMetric,
93
+ improved,
94
+ benchmarkFailure,
95
+ benchmarkOutput,
96
+ }) {
97
+ if (benchmarkFailure) {
98
+ return [
99
+ `Iteration ${iteration} benchmark status: failed to execute or parse metric.`,
100
+ `Benchmark command: ${benchmarkCommand}`,
101
+ `Metric regex: ${metricRegex}`,
102
+ `Benchmark output excerpt: ${truncateText(benchmarkOutput.replace(/\s+/g, ' ').trim(), 300)}`,
103
+ 'Before optimizing further, make sure benchmark execution and metric extraction are stable.',
104
+ ].join('\n');
105
+ }
106
+
107
+ const optimizationHint =
108
+ direction === 'min' ? 'Lower metric values are better.' : 'Higher metric values are better.';
109
+
110
+ if (improved) {
111
+ return [
112
+ `Iteration ${iteration} benchmark status: improved.`,
113
+ `Current metric: ${metric}`,
114
+ `Best metric so far: ${bestMetric}`,
115
+ optimizationHint,
116
+ 'Continue in the same direction with another focused optimization.',
117
+ ].join('\n');
118
+ }
119
+
120
+ return [
121
+ `Iteration ${iteration} benchmark status: not improved.`,
122
+ `Current metric: ${metric}`,
123
+ `Best metric so far: ${bestMetric}`,
124
+ optimizationHint,
125
+ 'Try a different approach and avoid repeating the same change pattern.',
126
+ ].join('\n');
127
+ }
26
128
 
27
129
  function createChunkLineRelay(onLine) {
28
130
  let buffer = '';
@@ -45,13 +147,23 @@ function createChunkLineRelay(onLine) {
45
147
  };
46
148
  }
47
149
 
48
- function normalizeInternalBackendLine(line) {
150
+ function normalizeInternalBackendLine(line, channel = 'stdout') {
49
151
  const withoutAnsi = line.replace(ANSI_ESCAPE_REGEX, '').replaceAll('\r', '');
50
152
  const cleaned = withoutAnsi.trimEnd();
51
153
  const compact = cleaned.trim();
52
154
 
53
155
  if (!compact) return null;
54
156
  if (BOX_DRAWING_ONLY_REGEX.test(compact)) return null;
157
+ if (isLikelyGlyphNoise(compact)) return null;
158
+
159
+ if (INTERNAL_OUTPUT_SUPPRESS_PATTERNS.some((pattern) => pattern.test(compact))) {
160
+ return null;
161
+ }
162
+
163
+ const hasStatusSignal = INTERNAL_OUTPUT_STATUS_PATTERNS.some((pattern) => pattern.test(compact));
164
+ if (!hasStatusSignal && channel !== 'stderr') {
165
+ return null;
166
+ }
55
167
 
56
168
  const lowered = compact.toLowerCase();
57
169
  if (lowered === 'ralph-starter') return null;
@@ -148,6 +260,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
148
260
  const iterations = Number(merged.iterations ?? 20);
149
261
  const runId = new Date().toISOString().replace(/[:.]/g, '-');
150
262
  const resolvedPrompt = await resolveAgentPrompt(merged, cwd);
263
+ let benchmarkFeedback = '';
151
264
 
152
265
  let bestMetric = null;
153
266
  let bestIteration = 0;
@@ -174,38 +287,39 @@ export async function runResearchLoop(config, cliOverrides = {}) {
174
287
  console.log(`\n--- Iteration ${i}/${iterations} ---`);
175
288
  const beforeCommit = await getGitCommit(cwd);
176
289
 
177
- const agentStep = getAgentStepCommand(merged, cwd, i, runId, resolvedPrompt.prompt);
290
+ const iterationAgentPrompt = benchmarkFeedback
291
+ ? `${resolvedPrompt.prompt}\n\n## Benchmark Feedback From Previous Iteration\n${benchmarkFeedback}`
292
+ : resolvedPrompt.prompt;
293
+
294
+ const agentStep = getAgentStepCommand(merged, cwd, i, runId, iterationAgentPrompt);
178
295
  const shouldStreamRawCommandOutput =
179
296
  agentStep.agentMode === 'command' && merged.streamAgentOutput === true;
180
- const shouldStreamWhiteLabeledInternalOutput =
297
+ const shouldStreamInternalOutput =
181
298
  agentStep.agentMode === 'internal' && merged.streamAgentOutput === true;
182
299
 
183
300
  if (agentStep.agentMode === 'internal') {
184
- if (shouldStreamWhiteLabeledInternalOutput) {
185
- console.log('Agent step: running (white-labeled stream)...');
186
- } else {
187
- console.log('Agent step: running...');
188
- }
301
+ console.log('Agent step: running...');
189
302
  }
190
303
 
191
- const internalStdoutRelay = shouldStreamWhiteLabeledInternalOutput
304
+ const internalStdoutRelay = shouldStreamInternalOutput
192
305
  ? createChunkLineRelay((line) => {
193
- const normalized = normalizeInternalBackendLine(line);
306
+ const normalized = normalizeInternalBackendLine(line, 'stdout');
194
307
  if (normalized) {
195
308
  console.log(` [agent] ${normalized}`);
196
309
  }
197
310
  })
198
311
  : null;
199
- const internalStderrRelay = shouldStreamWhiteLabeledInternalOutput
312
+ const internalStderrRelay = shouldStreamInternalOutput
200
313
  ? createChunkLineRelay((line) => {
201
- const normalized = normalizeInternalBackendLine(line);
314
+ const normalized = normalizeInternalBackendLine(line, 'stderr');
202
315
  if (normalized) {
203
316
  console.log(` [agent:error] ${normalized}`);
204
317
  }
205
318
  })
206
319
  : null;
207
320
 
208
- const agentResult = await runCommand(agentStep.command, {
321
+ let executedCommand = agentStep.command;
322
+ let agentResult = await runCommand(executedCommand, {
209
323
  cwd,
210
324
  stream: shouldStreamRawCommandOutput,
211
325
  onStdout: internalStdoutRelay ? (chunk) => internalStdoutRelay.onChunk(chunk) : undefined,
@@ -213,6 +327,25 @@ export async function runResearchLoop(config, cliOverrides = {}) {
213
327
  env: { AR_ITERATION: String(i), AR_RUN_ID: runId },
214
328
  });
215
329
 
330
+ if (
331
+ agentStep.agentMode === 'internal' &&
332
+ agentResult.code !== 0 &&
333
+ UNSUPPORTED_RALPH_FLAG_PATTERN.test(`${agentResult.stdout}\n${agentResult.stderr}`)
334
+ ) {
335
+ const fallbackCommand = stripRalphHeadlessFlags(agentStep.command);
336
+ if (fallbackCommand !== agentStep.command) {
337
+ console.log('Agent step: backend does not support headless flags, retrying with compatibility mode...');
338
+ executedCommand = fallbackCommand;
339
+ agentResult = await runCommand(executedCommand, {
340
+ cwd,
341
+ stream: shouldStreamRawCommandOutput,
342
+ onStdout: internalStdoutRelay ? (chunk) => internalStdoutRelay.onChunk(chunk) : undefined,
343
+ onStderr: internalStderrRelay ? (chunk) => internalStderrRelay.onChunk(chunk) : undefined,
344
+ env: { AR_ITERATION: String(i), AR_RUN_ID: runId },
345
+ });
346
+ }
347
+ }
348
+
216
349
  internalStdoutRelay?.flush();
217
350
  internalStderrRelay?.flush();
218
351
 
@@ -220,7 +353,13 @@ export async function runResearchLoop(config, cliOverrides = {}) {
220
353
  console.log('Agent step: complete');
221
354
  }
222
355
 
223
- if (agentResult.code !== 0) {
356
+ const nonFatalInternalMaxIterations = isNonFatalInternalMaxIterations(agentStep.agentMode, agentResult);
357
+
358
+ if (nonFatalInternalMaxIterations) {
359
+ console.log('Agent step reached backend max iterations; continuing to benchmark...');
360
+ }
361
+
362
+ if (agentResult.code !== 0 && !nonFatalInternalMaxIterations) {
224
363
  console.log(`Agent step failed with code ${agentResult.code}`);
225
364
  if (merged.stopOnAgentFailure !== false) {
226
365
  await appendRunLog(cwd, runId, {
@@ -244,6 +383,15 @@ export async function runResearchLoop(config, cliOverrides = {}) {
244
383
  if (benchmarkResult.stdout) console.log(benchmarkResult.stdout.trim());
245
384
  if (benchmarkResult.stderr) console.log(benchmarkResult.stderr.trim());
246
385
 
386
+ benchmarkFeedback = buildBenchmarkFeedback({
387
+ iteration: i,
388
+ benchmarkCommand: merged.benchmarkCommand,
389
+ metricRegex: merged.metricRegex,
390
+ direction,
391
+ benchmarkFailure: true,
392
+ benchmarkOutput,
393
+ });
394
+
247
395
  if (merged.onRejectCommand) {
248
396
  await runCommand(merged.onRejectCommand, { cwd, stream: true });
249
397
  }
@@ -262,6 +410,18 @@ export async function runResearchLoop(config, cliOverrides = {}) {
262
410
  const improved = isBetter(metric, bestMetric, direction);
263
411
  console.log(`Metric: ${metric}${bestMetric == null ? ' (baseline)' : ` | best: ${bestMetric}`}`);
264
412
 
413
+ const nextBestMetric = improved ? metric : bestMetric;
414
+ benchmarkFeedback = buildBenchmarkFeedback({
415
+ iteration: i,
416
+ benchmarkCommand: merged.benchmarkCommand,
417
+ metricRegex: merged.metricRegex,
418
+ direction,
419
+ metric,
420
+ bestMetric: nextBestMetric,
421
+ improved,
422
+ benchmarkFailure: false,
423
+ });
424
+
265
425
  if (improved) {
266
426
  bestMetric = metric;
267
427
  bestIteration = i;