npm - autoresearcher - Versions diffs - 0.1.2 → 0.1.4 - Mend

autoresearcher 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -55,7 +55,7 @@ The `init` command creates `.autoresearcher/config.json`:
 ```
 `agentMode: "internal"` is the default. For a fully custom step command, set `agentMode` to `"command"` and edit `agentCommand`.
-In internal mode, backend output is streamed through a white-labeled relay so users only see `autoresearcher` logs.
+In internal mode, backend output is streamed through a status-focused relay so users only see clean `autoresearcher` loop logs.
 ## Example Configs

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "autoresearcher",
-  "version": "0.1.2",
+  "version": "0.1.4",
   "description": "Benchmark-driven autonomous research CLI for post-quantum and blockchain R&D",
   "type": "module",
   "bin": {
@@ -32,6 +32,6 @@
   "homepage": "https://autoresearcher.multivmlabs.com",
   "license": "MIT",
   "dependencies": {
-    "ralph-starter": "^0.4.4"
+    "ralph-starter": "^0.4.5"
   }
 }

package/src/cli.js CHANGED Viewed

@@ -61,7 +61,7 @@ Key config fields:
   agentMode             "internal" (default) or "command"
   agentPromptFile       Markdown objective file (default: program.md)
   agentPrompt           Iteration objective for internal headless agent backend
-  streamAgentOutput     Stream loop output (internal stream is white-labeled)
+  streamAgentOutput     Stream loop output from the internal backend relay
   backendAgent          Optional backend agent override (amp/codex/claude-code/...)
   backendModel          Optional backend model override (provider-specific)
   agentCommand          Shell command when agentMode is "command"

package/src/internal-backend.js CHANGED Viewed

@@ -58,6 +58,7 @@ export function buildInternalBackendCommand({
   command += ' --auto';
   command += ` --max-iterations ${safeMaxIterations}`;
   command += ` --output-dir ${shellQuote(cwd)}`;
+  command += ' --headless --no-auto-skills';
   command += ' --no-track-progress --no-track-cost';
   if (backendAgent) {

package/src/run-loop.js CHANGED Viewed

@@ -23,6 +23,108 @@ function isBetter(metric, best, direction) {
 const ANSI_ESCAPE_REGEX = /\u001b\[[0-9;]*m/g;
 const BOX_DRAWING_ONLY_REGEX = /^[\s╭╮╰╯│─┌┐└┘═║╔╗╚╝]+$/;
+const INTERNAL_OUTPUT_SUPPRESS_PATTERNS = [
+  /\b(?:auto[- ]?)?skills?\b/i,
+  /\bdownload(?:ing|ed)?\b/i,
+  /\binstall(?:ing|ed)?\b/i,
+  /\bskill catalog\b/i,
+  /\bskill registry\b/i,
+];
+const INTERNAL_OUTPUT_STATUS_PATTERNS = [
+  /checking agent/i,
+  /agent (?:detected|selected|mode)/i,
+  /loop\s+\d+/i,
+  /iteration\s+\d+/i,
+  /planning/i,
+  /writing code/i,
+  /validat(?:e|ing|ion)/i,
+  /\btests?\b/i,
+  /\blint\b/i,
+  /\bbuild\b/i,
+  /\bbenchmark\b/i,
+  /\bcomplete(?:d)?\b/i,
+  /\bdone\b/i,
+  /\bcommit(?:ted)?\b/i,
+  /\berror\b/i,
+  /\bfailed\b/i,
+  /\bwarning\b/i,
+  /\bstopping\b/i,
+  /\bcircuit breaker\b/i,
+];
+const UNSUPPORTED_RALPH_FLAG_PATTERN = /unknown option\s+['"]--(?:headless|no-auto-skills)['"]/i;
+const NON_FATAL_MAX_ITERATIONS_PATTERN = /(?:\berror:\s*)?max_iterations\b/i;
+function stripRalphHeadlessFlags(command) {
+  return command.replace(/\s--headless\b/g, '').replace(/\s--no-auto-skills\b/g, '');
+}
+function isNonFatalInternalMaxIterations(agentMode, result) {
+  if (agentMode !== 'internal' || result.code === 0) {
+    return false;
+  }
+  const combined = `${result.stdout}\n${result.stderr}`;
+  return NON_FATAL_MAX_ITERATIONS_PATTERN.test(combined);
+}
+function isLikelyGlyphNoise(text) {
+  const alnumCount = (text.match(/[A-Za-z0-9]/g) || []).length;
+  const nonAsciiCount = (text.match(/[^\x20-\x7E]/g) || []).length;
+  if (alnumCount === 0) {
+    return true;
+  }
+  return nonAsciiCount > alnumCount;
+}
+function truncateText(value, max = 500) {
+  if (!value) return '';
+  return value.length <= max ? value : `${value.slice(0, max)}...`;
+}
+function buildBenchmarkFeedback({
+  iteration,
+  benchmarkCommand,
+  metricRegex,
+  direction,
+  metric,
+  bestMetric,
+  improved,
+  benchmarkFailure,
+  benchmarkOutput,
+}) {
+  if (benchmarkFailure) {
+    return [
+      `Iteration ${iteration} benchmark status: failed to execute or parse metric.`,
+      `Benchmark command: ${benchmarkCommand}`,
+      `Metric regex: ${metricRegex}`,
+      `Benchmark output excerpt: ${truncateText(benchmarkOutput.replace(/\s+/g, ' ').trim(), 300)}`,
+      'Before optimizing further, make sure benchmark execution and metric extraction are stable.',
+    ].join('\n');
+  }
+  const optimizationHint =
+    direction === 'min' ? 'Lower metric values are better.' : 'Higher metric values are better.';
+  if (improved) {
+    return [
+      `Iteration ${iteration} benchmark status: improved.`,
+      `Current metric: ${metric}`,
+      `Best metric so far: ${bestMetric}`,
+      optimizationHint,
+      'Continue in the same direction with another focused optimization.',
+    ].join('\n');
+  }
+  return [
+    `Iteration ${iteration} benchmark status: not improved.`,
+    `Current metric: ${metric}`,
+    `Best metric so far: ${bestMetric}`,
+    optimizationHint,
+    'Try a different approach and avoid repeating the same change pattern.',
+  ].join('\n');
+}
 function createChunkLineRelay(onLine) {
   let buffer = '';
@@ -45,13 +147,23 @@ function createChunkLineRelay(onLine) {
   };
 }
-function normalizeInternalBackendLine(line) {
+function normalizeInternalBackendLine(line, channel = 'stdout') {
   const withoutAnsi = line.replace(ANSI_ESCAPE_REGEX, '').replaceAll('\r', '');
   const cleaned = withoutAnsi.trimEnd();
   const compact = cleaned.trim();
   if (!compact) return null;
   if (BOX_DRAWING_ONLY_REGEX.test(compact)) return null;
+  if (isLikelyGlyphNoise(compact)) return null;
+  if (INTERNAL_OUTPUT_SUPPRESS_PATTERNS.some((pattern) => pattern.test(compact))) {
+    return null;
+  }
+  const hasStatusSignal = INTERNAL_OUTPUT_STATUS_PATTERNS.some((pattern) => pattern.test(compact));
+  if (!hasStatusSignal && channel !== 'stderr') {
+    return null;
+  }
   const lowered = compact.toLowerCase();
   if (lowered === 'ralph-starter') return null;
@@ -148,6 +260,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
   const iterations = Number(merged.iterations ?? 20);
   const runId = new Date().toISOString().replace(/[:.]/g, '-');
   const resolvedPrompt = await resolveAgentPrompt(merged, cwd);
+  let benchmarkFeedback = '';
   let bestMetric = null;
   let bestIteration = 0;
@@ -174,38 +287,39 @@ export async function runResearchLoop(config, cliOverrides = {}) {
     console.log(`\n--- Iteration ${i}/${iterations} ---`);
     const beforeCommit = await getGitCommit(cwd);
-    const agentStep = getAgentStepCommand(merged, cwd, i, runId, resolvedPrompt.prompt);
+    const iterationAgentPrompt = benchmarkFeedback
+      ? `${resolvedPrompt.prompt}\n\n## Benchmark Feedback From Previous Iteration\n${benchmarkFeedback}`
+      : resolvedPrompt.prompt;
+    const agentStep = getAgentStepCommand(merged, cwd, i, runId, iterationAgentPrompt);
     const shouldStreamRawCommandOutput =
       agentStep.agentMode === 'command' && merged.streamAgentOutput === true;
-    const shouldStreamWhiteLabeledInternalOutput =
+    const shouldStreamInternalOutput =
       agentStep.agentMode === 'internal' && merged.streamAgentOutput === true;
     if (agentStep.agentMode === 'internal') {
-      if (shouldStreamWhiteLabeledInternalOutput) {
-        console.log('Agent step: running (white-labeled stream)...');
-      } else {
-        console.log('Agent step: running...');
-      }
+      console.log('Agent step: running...');
     }
-    const internalStdoutRelay = shouldStreamWhiteLabeledInternalOutput
+    const internalStdoutRelay = shouldStreamInternalOutput
       ? createChunkLineRelay((line) => {
-          const normalized = normalizeInternalBackendLine(line);
+          const normalized = normalizeInternalBackendLine(line, 'stdout');
           if (normalized) {
             console.log(`  [agent] ${normalized}`);
           }
         })
       : null;
-    const internalStderrRelay = shouldStreamWhiteLabeledInternalOutput
+    const internalStderrRelay = shouldStreamInternalOutput
       ? createChunkLineRelay((line) => {
-          const normalized = normalizeInternalBackendLine(line);
+          const normalized = normalizeInternalBackendLine(line, 'stderr');
           if (normalized) {
             console.log(`  [agent:error] ${normalized}`);
           }
         })
       : null;
-    const agentResult = await runCommand(agentStep.command, {
+    let executedCommand = agentStep.command;
+    let agentResult = await runCommand(executedCommand, {
       cwd,
       stream: shouldStreamRawCommandOutput,
       onStdout: internalStdoutRelay ? (chunk) => internalStdoutRelay.onChunk(chunk) : undefined,
@@ -213,6 +327,25 @@ export async function runResearchLoop(config, cliOverrides = {}) {
       env: { AR_ITERATION: String(i), AR_RUN_ID: runId },
     });
+    if (
+      agentStep.agentMode === 'internal' &&
+      agentResult.code !== 0 &&
+      UNSUPPORTED_RALPH_FLAG_PATTERN.test(`${agentResult.stdout}\n${agentResult.stderr}`)
+    ) {
+      const fallbackCommand = stripRalphHeadlessFlags(agentStep.command);
+      if (fallbackCommand !== agentStep.command) {
+        console.log('Agent step: backend does not support headless flags, retrying with compatibility mode...');
+        executedCommand = fallbackCommand;
+        agentResult = await runCommand(executedCommand, {
+          cwd,
+          stream: shouldStreamRawCommandOutput,
+          onStdout: internalStdoutRelay ? (chunk) => internalStdoutRelay.onChunk(chunk) : undefined,
+          onStderr: internalStderrRelay ? (chunk) => internalStderrRelay.onChunk(chunk) : undefined,
+          env: { AR_ITERATION: String(i), AR_RUN_ID: runId },
+        });
+      }
+    }
     internalStdoutRelay?.flush();
     internalStderrRelay?.flush();
@@ -220,7 +353,13 @@ export async function runResearchLoop(config, cliOverrides = {}) {
       console.log('Agent step: complete');
     }
-    if (agentResult.code !== 0) {
+    const nonFatalInternalMaxIterations = isNonFatalInternalMaxIterations(agentStep.agentMode, agentResult);
+    if (nonFatalInternalMaxIterations) {
+      console.log('Agent step reached backend max iterations; continuing to benchmark...');
+    }
+    if (agentResult.code !== 0 && !nonFatalInternalMaxIterations) {
       console.log(`Agent step failed with code ${agentResult.code}`);
       if (merged.stopOnAgentFailure !== false) {
         await appendRunLog(cwd, runId, {
@@ -244,6 +383,15 @@ export async function runResearchLoop(config, cliOverrides = {}) {
       if (benchmarkResult.stdout) console.log(benchmarkResult.stdout.trim());
       if (benchmarkResult.stderr) console.log(benchmarkResult.stderr.trim());
+      benchmarkFeedback = buildBenchmarkFeedback({
+        iteration: i,
+        benchmarkCommand: merged.benchmarkCommand,
+        metricRegex: merged.metricRegex,
+        direction,
+        benchmarkFailure: true,
+        benchmarkOutput,
+      });
       if (merged.onRejectCommand) {
         await runCommand(merged.onRejectCommand, { cwd, stream: true });
       }
@@ -262,6 +410,18 @@ export async function runResearchLoop(config, cliOverrides = {}) {
     const improved = isBetter(metric, bestMetric, direction);
     console.log(`Metric: ${metric}${bestMetric == null ? ' (baseline)' : ` | best: ${bestMetric}`}`);
+    const nextBestMetric = improved ? metric : bestMetric;
+    benchmarkFeedback = buildBenchmarkFeedback({
+      iteration: i,
+      benchmarkCommand: merged.benchmarkCommand,
+      metricRegex: merged.metricRegex,
+      direction,
+      metric,
+      bestMetric: nextBestMetric,
+      improved,
+      benchmarkFailure: false,
+    });
     if (improved) {
       bestMetric = metric;
       bestIteration = i;