autoresearcher 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -55,11 +55,11 @@ The `init` command creates `.autoresearcher/config.json`:
55
55
  ```
56
56
 
57
57
  `agentMode: "internal"` is the default. For a fully custom step command, set `agentMode` to `"command"` and edit `agentCommand`.
58
- In internal mode, backend output is streamed through a white-labeled relay so users only see `autoresearcher` logs.
58
+ In internal mode, backend output is streamed through a white-labeled, status-focused relay so users only see clean `autoresearcher` loop logs.
59
59
 
60
60
  ## Example Configs
61
61
 
62
- Default internal headless mode:
62
+ Default internal mode:
63
63
 
64
64
  ```json
65
65
  {
@@ -73,7 +73,8 @@ Default internal headless mode:
73
73
  "metricRegex": "score=([0-9.]+)",
74
74
  "direction": "max",
75
75
  "iterations": 40,
76
- "autoCommit": false
76
+ "autoCommit": false,
77
+ "streamAgentOutput": true
77
78
  }
78
79
  ```
79
80
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autoresearcher",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "Benchmark-driven autonomous research CLI for post-quantum and blockchain R&D",
5
5
  "type": "module",
6
6
  "bin": {
@@ -32,6 +32,6 @@
32
32
  "homepage": "https://autoresearcher.multivmlabs.com",
33
33
  "license": "MIT",
34
34
  "dependencies": {
35
- "ralph-starter": "^0.4.4"
35
+ "ralph-starter": "^0.4.5"
36
36
  }
37
37
  }
@@ -58,6 +58,7 @@ export function buildInternalBackendCommand({
58
58
  command += ' --auto';
59
59
  command += ` --max-iterations ${safeMaxIterations}`;
60
60
  command += ` --output-dir ${shellQuote(cwd)}`;
61
+ command += ' --headless --no-auto-skills';
61
62
  command += ' --no-track-progress --no-track-cost';
62
63
 
63
64
  if (backendAgent) {
package/src/run-loop.js CHANGED
@@ -23,6 +23,98 @@ function isBetter(metric, best, direction) {
23
23
 
24
24
  const ANSI_ESCAPE_REGEX = /\u001b\[[0-9;]*m/g;
25
25
  const BOX_DRAWING_ONLY_REGEX = /^[\s╭╮╰╯│─┌┐└┘═║╔╗╚╝]+$/;
26
+ const INTERNAL_OUTPUT_SUPPRESS_PATTERNS = [
27
+ /\b(?:auto[- ]?)?skills?\b/i,
28
+ /\bdownload(?:ing|ed)?\b/i,
29
+ /\binstall(?:ing|ed)?\b/i,
30
+ /\bskill catalog\b/i,
31
+ /\bskill registry\b/i,
32
+ ];
33
+ const INTERNAL_OUTPUT_STATUS_PATTERNS = [
34
+ /checking agent/i,
35
+ /agent (?:detected|selected|mode)/i,
36
+ /loop\s+\d+/i,
37
+ /iteration\s+\d+/i,
38
+ /planning/i,
39
+ /writing code/i,
40
+ /validat(?:e|ing|ion)/i,
41
+ /\btests?\b/i,
42
+ /\blint\b/i,
43
+ /\bbuild\b/i,
44
+ /\bbenchmark\b/i,
45
+ /\bcomplete(?:d)?\b/i,
46
+ /\bdone\b/i,
47
+ /\bcommit(?:ted)?\b/i,
48
+ /\berror\b/i,
49
+ /\bfailed\b/i,
50
+ /\bwarning\b/i,
51
+ /\bstopping\b/i,
52
+ /\bcircuit breaker\b/i,
53
+ ];
54
+ const UNSUPPORTED_RALPH_FLAG_PATTERN = /unknown option\s+['"]--(?:headless|no-auto-skills)['"]/i;
55
+
56
+ function stripRalphHeadlessFlags(command) {
57
+ return command.replace(/\s--headless\b/g, '').replace(/\s--no-auto-skills\b/g, '');
58
+ }
59
+
60
+ function isLikelyGlyphNoise(text) {
61
+ const alnumCount = (text.match(/[A-Za-z0-9]/g) || []).length;
62
+ const nonAsciiCount = (text.match(/[^\x20-\x7E]/g) || []).length;
63
+
64
+ if (alnumCount === 0) {
65
+ return true;
66
+ }
67
+
68
+ return nonAsciiCount > alnumCount;
69
+ }
70
+
71
+ function truncateText(value, max = 500) {
72
+ if (!value) return '';
73
+ return value.length <= max ? value : `${value.slice(0, max)}...`;
74
+ }
75
+
76
+ function buildBenchmarkFeedback({
77
+ iteration,
78
+ benchmarkCommand,
79
+ metricRegex,
80
+ direction,
81
+ metric,
82
+ bestMetric,
83
+ improved,
84
+ benchmarkFailure,
85
+ benchmarkOutput,
86
+ }) {
87
+ if (benchmarkFailure) {
88
+ return [
89
+ `Iteration ${iteration} benchmark status: failed to execute or parse metric.`,
90
+ `Benchmark command: ${benchmarkCommand}`,
91
+ `Metric regex: ${metricRegex}`,
92
+ `Benchmark output excerpt: ${truncateText(benchmarkOutput.replace(/\s+/g, ' ').trim(), 300)}`,
93
+ 'Before optimizing further, make sure benchmark execution and metric extraction are stable.',
94
+ ].join('\n');
95
+ }
96
+
97
+ const optimizationHint =
98
+ direction === 'min' ? 'Lower metric values are better.' : 'Higher metric values are better.';
99
+
100
+ if (improved) {
101
+ return [
102
+ `Iteration ${iteration} benchmark status: improved.`,
103
+ `Current metric: ${metric}`,
104
+ `Best metric so far: ${bestMetric}`,
105
+ optimizationHint,
106
+ 'Continue in the same direction with another focused optimization.',
107
+ ].join('\n');
108
+ }
109
+
110
+ return [
111
+ `Iteration ${iteration} benchmark status: not improved.`,
112
+ `Current metric: ${metric}`,
113
+ `Best metric so far: ${bestMetric}`,
114
+ optimizationHint,
115
+ 'Try a different approach and avoid repeating the same change pattern.',
116
+ ].join('\n');
117
+ }
26
118
 
27
119
  function createChunkLineRelay(onLine) {
28
120
  let buffer = '';
@@ -45,13 +137,23 @@ function createChunkLineRelay(onLine) {
45
137
  };
46
138
  }
47
139
 
48
- function normalizeInternalBackendLine(line) {
140
+ function normalizeInternalBackendLine(line, channel = 'stdout') {
49
141
  const withoutAnsi = line.replace(ANSI_ESCAPE_REGEX, '').replaceAll('\r', '');
50
142
  const cleaned = withoutAnsi.trimEnd();
51
143
  const compact = cleaned.trim();
52
144
 
53
145
  if (!compact) return null;
54
146
  if (BOX_DRAWING_ONLY_REGEX.test(compact)) return null;
147
+ if (isLikelyGlyphNoise(compact)) return null;
148
+
149
+ if (INTERNAL_OUTPUT_SUPPRESS_PATTERNS.some((pattern) => pattern.test(compact))) {
150
+ return null;
151
+ }
152
+
153
+ const hasStatusSignal = INTERNAL_OUTPUT_STATUS_PATTERNS.some((pattern) => pattern.test(compact));
154
+ if (!hasStatusSignal && channel !== 'stderr') {
155
+ return null;
156
+ }
55
157
 
56
158
  const lowered = compact.toLowerCase();
57
159
  if (lowered === 'ralph-starter') return null;
@@ -148,6 +250,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
148
250
  const iterations = Number(merged.iterations ?? 20);
149
251
  const runId = new Date().toISOString().replace(/[:.]/g, '-');
150
252
  const resolvedPrompt = await resolveAgentPrompt(merged, cwd);
253
+ let benchmarkFeedback = '';
151
254
 
152
255
  let bestMetric = null;
153
256
  let bestIteration = 0;
@@ -174,7 +277,11 @@ export async function runResearchLoop(config, cliOverrides = {}) {
174
277
  console.log(`\n--- Iteration ${i}/${iterations} ---`);
175
278
  const beforeCommit = await getGitCommit(cwd);
176
279
 
177
- const agentStep = getAgentStepCommand(merged, cwd, i, runId, resolvedPrompt.prompt);
280
+ const iterationAgentPrompt = benchmarkFeedback
281
+ ? `${resolvedPrompt.prompt}\n\n## Benchmark Feedback From Previous Iteration\n${benchmarkFeedback}`
282
+ : resolvedPrompt.prompt;
283
+
284
+ const agentStep = getAgentStepCommand(merged, cwd, i, runId, iterationAgentPrompt);
178
285
  const shouldStreamRawCommandOutput =
179
286
  agentStep.agentMode === 'command' && merged.streamAgentOutput === true;
180
287
  const shouldStreamWhiteLabeledInternalOutput =
@@ -190,7 +297,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
190
297
 
191
298
  const internalStdoutRelay = shouldStreamWhiteLabeledInternalOutput
192
299
  ? createChunkLineRelay((line) => {
193
- const normalized = normalizeInternalBackendLine(line);
300
+ const normalized = normalizeInternalBackendLine(line, 'stdout');
194
301
  if (normalized) {
195
302
  console.log(` [agent] ${normalized}`);
196
303
  }
@@ -198,14 +305,15 @@ export async function runResearchLoop(config, cliOverrides = {}) {
198
305
  : null;
199
306
  const internalStderrRelay = shouldStreamWhiteLabeledInternalOutput
200
307
  ? createChunkLineRelay((line) => {
201
- const normalized = normalizeInternalBackendLine(line);
308
+ const normalized = normalizeInternalBackendLine(line, 'stderr');
202
309
  if (normalized) {
203
310
  console.log(` [agent:error] ${normalized}`);
204
311
  }
205
312
  })
206
313
  : null;
207
314
 
208
- const agentResult = await runCommand(agentStep.command, {
315
+ let executedCommand = agentStep.command;
316
+ let agentResult = await runCommand(executedCommand, {
209
317
  cwd,
210
318
  stream: shouldStreamRawCommandOutput,
211
319
  onStdout: internalStdoutRelay ? (chunk) => internalStdoutRelay.onChunk(chunk) : undefined,
@@ -213,6 +321,25 @@ export async function runResearchLoop(config, cliOverrides = {}) {
213
321
  env: { AR_ITERATION: String(i), AR_RUN_ID: runId },
214
322
  });
215
323
 
324
+ if (
325
+ agentStep.agentMode === 'internal' &&
326
+ agentResult.code !== 0 &&
327
+ UNSUPPORTED_RALPH_FLAG_PATTERN.test(`${agentResult.stdout}\n${agentResult.stderr}`)
328
+ ) {
329
+ const fallbackCommand = stripRalphHeadlessFlags(agentStep.command);
330
+ if (fallbackCommand !== agentStep.command) {
331
+ console.log('Agent step: backend does not support headless flags, retrying with compatibility mode...');
332
+ executedCommand = fallbackCommand;
333
+ agentResult = await runCommand(executedCommand, {
334
+ cwd,
335
+ stream: shouldStreamRawCommandOutput,
336
+ onStdout: internalStdoutRelay ? (chunk) => internalStdoutRelay.onChunk(chunk) : undefined,
337
+ onStderr: internalStderrRelay ? (chunk) => internalStderrRelay.onChunk(chunk) : undefined,
338
+ env: { AR_ITERATION: String(i), AR_RUN_ID: runId },
339
+ });
340
+ }
341
+ }
342
+
216
343
  internalStdoutRelay?.flush();
217
344
  internalStderrRelay?.flush();
218
345
 
@@ -244,6 +371,15 @@ export async function runResearchLoop(config, cliOverrides = {}) {
244
371
  if (benchmarkResult.stdout) console.log(benchmarkResult.stdout.trim());
245
372
  if (benchmarkResult.stderr) console.log(benchmarkResult.stderr.trim());
246
373
 
374
+ benchmarkFeedback = buildBenchmarkFeedback({
375
+ iteration: i,
376
+ benchmarkCommand: merged.benchmarkCommand,
377
+ metricRegex: merged.metricRegex,
378
+ direction,
379
+ benchmarkFailure: true,
380
+ benchmarkOutput,
381
+ });
382
+
247
383
  if (merged.onRejectCommand) {
248
384
  await runCommand(merged.onRejectCommand, { cwd, stream: true });
249
385
  }
@@ -262,6 +398,18 @@ export async function runResearchLoop(config, cliOverrides = {}) {
262
398
  const improved = isBetter(metric, bestMetric, direction);
263
399
  console.log(`Metric: ${metric}${bestMetric == null ? ' (baseline)' : ` | best: ${bestMetric}`}`);
264
400
 
401
+ const nextBestMetric = improved ? metric : bestMetric;
402
+ benchmarkFeedback = buildBenchmarkFeedback({
403
+ iteration: i,
404
+ benchmarkCommand: merged.benchmarkCommand,
405
+ metricRegex: merged.metricRegex,
406
+ direction,
407
+ metric,
408
+ bestMetric: nextBestMetric,
409
+ improved,
410
+ benchmarkFailure: false,
411
+ });
412
+
265
413
  if (improved) {
266
414
  bestMetric = metric;
267
415
  bestIteration = i;