npm - autoresearcher - Versions diffs - 0.1.1 → 0.1.3 - Mend

autoresearcher 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -55,11 +55,11 @@ The `init` command creates `.autoresearcher/config.json`:
 ```
 `agentMode: "internal"` is the default. For a fully custom step command, set `agentMode` to `"command"` and edit `agentCommand`.
-In internal mode, backend output is streamed through a white-labeled relay so users only see `autoresearcher` logs.
+In internal mode, backend output is streamed through a white-labeled, status-focused relay so users only see clean `autoresearcher` loop logs.
 ## Example Configs
-Default internal headless mode:
+Default internal mode:
 ```json
 {
@@ -73,7 +73,8 @@ Default internal headless mode:
   "metricRegex": "score=([0-9.]+)",
   "direction": "max",
   "iterations": 40,
-  "autoCommit": false
+  "autoCommit": false,
+  "streamAgentOutput": true
 }
 ```

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "autoresearcher",
-  "version": "0.1.1",
+  "version": "0.1.3",
   "description": "Benchmark-driven autonomous research CLI for post-quantum and blockchain R&D",
   "type": "module",
   "bin": {
@@ -32,6 +32,6 @@
   "homepage": "https://autoresearcher.multivmlabs.com",
   "license": "MIT",
   "dependencies": {
-    "ralph-starter": "^0.4.4"
+    "ralph-starter": "^0.4.5"
   }
 }

package/src/internal-backend.js CHANGED Viewed

@@ -58,6 +58,7 @@ export function buildInternalBackendCommand({
   command += ' --auto';
   command += ` --max-iterations ${safeMaxIterations}`;
   command += ` --output-dir ${shellQuote(cwd)}`;
+  command += ' --headless --no-auto-skills';
   command += ' --no-track-progress --no-track-cost';
   if (backendAgent) {

package/src/run-loop.js CHANGED Viewed

@@ -23,6 +23,98 @@ function isBetter(metric, best, direction) {
 const ANSI_ESCAPE_REGEX = /\u001b\[[0-9;]*m/g;
 const BOX_DRAWING_ONLY_REGEX = /^[\s╭╮╰╯│─┌┐└┘═║╔╗╚╝]+$/;
+const INTERNAL_OUTPUT_SUPPRESS_PATTERNS = [
+  /\b(?:auto[- ]?)?skills?\b/i,
+  /\bdownload(?:ing|ed)?\b/i,
+  /\binstall(?:ing|ed)?\b/i,
+  /\bskill catalog\b/i,
+  /\bskill registry\b/i,
+];
+const INTERNAL_OUTPUT_STATUS_PATTERNS = [
+  /checking agent/i,
+  /agent (?:detected|selected|mode)/i,
+  /loop\s+\d+/i,
+  /iteration\s+\d+/i,
+  /planning/i,
+  /writing code/i,
+  /validat(?:e|ing|ion)/i,
+  /\btests?\b/i,
+  /\blint\b/i,
+  /\bbuild\b/i,
+  /\bbenchmark\b/i,
+  /\bcomplete(?:d)?\b/i,
+  /\bdone\b/i,
+  /\bcommit(?:ted)?\b/i,
+  /\berror\b/i,
+  /\bfailed\b/i,
+  /\bwarning\b/i,
+  /\bstopping\b/i,
+  /\bcircuit breaker\b/i,
+];
+const UNSUPPORTED_RALPH_FLAG_PATTERN = /unknown option\s+['"]--(?:headless|no-auto-skills)['"]/i;
+function stripRalphHeadlessFlags(command) {
+  return command.replace(/\s--headless\b/g, '').replace(/\s--no-auto-skills\b/g, '');
+}
+function isLikelyGlyphNoise(text) {
+  const alnumCount = (text.match(/[A-Za-z0-9]/g) || []).length;
+  const nonAsciiCount = (text.match(/[^\x20-\x7E]/g) || []).length;
+  if (alnumCount === 0) {
+    return true;
+  }
+  return nonAsciiCount > alnumCount;
+}
+function truncateText(value, max = 500) {
+  if (!value) return '';
+  return value.length <= max ? value : `${value.slice(0, max)}...`;
+}
+function buildBenchmarkFeedback({
+  iteration,
+  benchmarkCommand,
+  metricRegex,
+  direction,
+  metric,
+  bestMetric,
+  improved,
+  benchmarkFailure,
+  benchmarkOutput,
+}) {
+  if (benchmarkFailure) {
+    return [
+      `Iteration ${iteration} benchmark status: failed to execute or parse metric.`,
+      `Benchmark command: ${benchmarkCommand}`,
+      `Metric regex: ${metricRegex}`,
+      `Benchmark output excerpt: ${truncateText(benchmarkOutput.replace(/\s+/g, ' ').trim(), 300)}`,
+      'Before optimizing further, make sure benchmark execution and metric extraction are stable.',
+    ].join('\n');
+  }
+  const optimizationHint =
+    direction === 'min' ? 'Lower metric values are better.' : 'Higher metric values are better.';
+  if (improved) {
+    return [
+      `Iteration ${iteration} benchmark status: improved.`,
+      `Current metric: ${metric}`,
+      `Best metric so far: ${bestMetric}`,
+      optimizationHint,
+      'Continue in the same direction with another focused optimization.',
+    ].join('\n');
+  }
+  return [
+    `Iteration ${iteration} benchmark status: not improved.`,
+    `Current metric: ${metric}`,
+    `Best metric so far: ${bestMetric}`,
+    optimizationHint,
+    'Try a different approach and avoid repeating the same change pattern.',
+  ].join('\n');
+}
 function createChunkLineRelay(onLine) {
   let buffer = '';
@@ -45,13 +137,23 @@ function createChunkLineRelay(onLine) {
   };
 }
-function normalizeInternalBackendLine(line) {
+function normalizeInternalBackendLine(line, channel = 'stdout') {
   const withoutAnsi = line.replace(ANSI_ESCAPE_REGEX, '').replaceAll('\r', '');
   const cleaned = withoutAnsi.trimEnd();
   const compact = cleaned.trim();
   if (!compact) return null;
   if (BOX_DRAWING_ONLY_REGEX.test(compact)) return null;
+  if (isLikelyGlyphNoise(compact)) return null;
+  if (INTERNAL_OUTPUT_SUPPRESS_PATTERNS.some((pattern) => pattern.test(compact))) {
+    return null;
+  }
+  const hasStatusSignal = INTERNAL_OUTPUT_STATUS_PATTERNS.some((pattern) => pattern.test(compact));
+  if (!hasStatusSignal && channel !== 'stderr') {
+    return null;
+  }
   const lowered = compact.toLowerCase();
   if (lowered === 'ralph-starter') return null;
@@ -148,6 +250,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
   const iterations = Number(merged.iterations ?? 20);
   const runId = new Date().toISOString().replace(/[:.]/g, '-');
   const resolvedPrompt = await resolveAgentPrompt(merged, cwd);
+  let benchmarkFeedback = '';
   let bestMetric = null;
   let bestIteration = 0;
@@ -174,7 +277,11 @@ export async function runResearchLoop(config, cliOverrides = {}) {
     console.log(`\n--- Iteration ${i}/${iterations} ---`);
     const beforeCommit = await getGitCommit(cwd);
-    const agentStep = getAgentStepCommand(merged, cwd, i, runId, resolvedPrompt.prompt);
+    const iterationAgentPrompt = benchmarkFeedback
+      ? `${resolvedPrompt.prompt}\n\n## Benchmark Feedback From Previous Iteration\n${benchmarkFeedback}`
+      : resolvedPrompt.prompt;
+    const agentStep = getAgentStepCommand(merged, cwd, i, runId, iterationAgentPrompt);
     const shouldStreamRawCommandOutput =
       agentStep.agentMode === 'command' && merged.streamAgentOutput === true;
     const shouldStreamWhiteLabeledInternalOutput =
@@ -190,7 +297,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
     const internalStdoutRelay = shouldStreamWhiteLabeledInternalOutput
       ? createChunkLineRelay((line) => {
-          const normalized = normalizeInternalBackendLine(line);
+          const normalized = normalizeInternalBackendLine(line, 'stdout');
           if (normalized) {
             console.log(`  [agent] ${normalized}`);
           }
@@ -198,14 +305,15 @@ export async function runResearchLoop(config, cliOverrides = {}) {
       : null;
     const internalStderrRelay = shouldStreamWhiteLabeledInternalOutput
       ? createChunkLineRelay((line) => {
-          const normalized = normalizeInternalBackendLine(line);
+          const normalized = normalizeInternalBackendLine(line, 'stderr');
           if (normalized) {
             console.log(`  [agent:error] ${normalized}`);
           }
         })
       : null;
-    const agentResult = await runCommand(agentStep.command, {
+    let executedCommand = agentStep.command;
+    let agentResult = await runCommand(executedCommand, {
       cwd,
       stream: shouldStreamRawCommandOutput,
       onStdout: internalStdoutRelay ? (chunk) => internalStdoutRelay.onChunk(chunk) : undefined,
@@ -213,6 +321,25 @@ export async function runResearchLoop(config, cliOverrides = {}) {
       env: { AR_ITERATION: String(i), AR_RUN_ID: runId },
     });
+    if (
+      agentStep.agentMode === 'internal' &&
+      agentResult.code !== 0 &&
+      UNSUPPORTED_RALPH_FLAG_PATTERN.test(`${agentResult.stdout}\n${agentResult.stderr}`)
+    ) {
+      const fallbackCommand = stripRalphHeadlessFlags(agentStep.command);
+      if (fallbackCommand !== agentStep.command) {
+        console.log('Agent step: backend does not support headless flags, retrying with compatibility mode...');
+        executedCommand = fallbackCommand;
+        agentResult = await runCommand(executedCommand, {
+          cwd,
+          stream: shouldStreamRawCommandOutput,
+          onStdout: internalStdoutRelay ? (chunk) => internalStdoutRelay.onChunk(chunk) : undefined,
+          onStderr: internalStderrRelay ? (chunk) => internalStderrRelay.onChunk(chunk) : undefined,
+          env: { AR_ITERATION: String(i), AR_RUN_ID: runId },
+        });
+      }
+    }
     internalStdoutRelay?.flush();
     internalStderrRelay?.flush();
@@ -244,6 +371,15 @@ export async function runResearchLoop(config, cliOverrides = {}) {
       if (benchmarkResult.stdout) console.log(benchmarkResult.stdout.trim());
       if (benchmarkResult.stderr) console.log(benchmarkResult.stderr.trim());
+      benchmarkFeedback = buildBenchmarkFeedback({
+        iteration: i,
+        benchmarkCommand: merged.benchmarkCommand,
+        metricRegex: merged.metricRegex,
+        direction,
+        benchmarkFailure: true,
+        benchmarkOutput,
+      });
       if (merged.onRejectCommand) {
         await runCommand(merged.onRejectCommand, { cwd, stream: true });
       }
@@ -262,6 +398,18 @@ export async function runResearchLoop(config, cliOverrides = {}) {
     const improved = isBetter(metric, bestMetric, direction);
     console.log(`Metric: ${metric}${bestMetric == null ? ' (baseline)' : ` | best: ${bestMetric}`}`);
+    const nextBestMetric = improved ? metric : bestMetric;
+    benchmarkFeedback = buildBenchmarkFeedback({
+      iteration: i,
+      benchmarkCommand: merged.benchmarkCommand,
+      metricRegex: merged.metricRegex,
+      direction,
+      metric,
+      bestMetric: nextBestMetric,
+      improved,
+      benchmarkFailure: false,
+    });
     if (improved) {
       bestMetric = metric;
       bestIteration = i;