autoresearcher 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +2 -2
  2. package/src/run-loop.js +101 -2
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autoresearcher",
3
- "version": "0.1.5",
3
+ "version": "0.1.7",
4
4
  "description": "Benchmark-driven autonomous research CLI for post-quantum and blockchain R&D",
5
5
  "type": "module",
6
6
  "bin": {
@@ -29,7 +29,7 @@
29
29
  "src",
30
30
  "README.md"
31
31
  ],
32
- "homepage": "https://autoresearcher.multivmlabs.com",
32
+ "homepage": "https://autoresearcher.org",
33
33
  "license": "MIT",
34
34
  "dependencies": {
35
35
  "ralph-starter": "^0.4.5"
package/src/run-loop.js CHANGED
@@ -140,6 +140,76 @@ function escapeCell(value) {
140
140
  return String(value).replaceAll('|', '\\|').replaceAll('\n', ' ');
141
141
  }
142
142
 
143
+ function asFiniteNumber(value) {
144
+ return typeof value === 'number' && Number.isFinite(value) ? value : null;
145
+ }
146
+
147
+ function summarizeMetricEvolution(iterationEntries, direction) {
148
+ const points = iterationEntries
149
+ .map((entry) => ({
150
+ iteration: entry.iteration,
151
+ metric: asFiniteNumber(entry.metric),
152
+ }))
153
+ .filter((point) => point.metric != null);
154
+
155
+ if (points.length === 0) {
156
+ return null;
157
+ }
158
+
159
+ const baseline = points[0].metric;
160
+ const final = points[points.length - 1].metric;
161
+ const delta = final - baseline;
162
+ const relativeDeltaPct = baseline !== 0 ? (delta / baseline) * 100 : null;
163
+ const improvedFromBaseline = direction === 'min' ? final < baseline : final > baseline;
164
+ const monotonic = points.every((point, index) => {
165
+ if (index === 0) return true;
166
+ const prev = points[index - 1].metric;
167
+ return direction === 'min' ? point.metric <= prev : point.metric >= prev;
168
+ });
169
+
170
+ return {
171
+ baseline,
172
+ baselineIteration: points[0].iteration,
173
+ final,
174
+ finalIteration: points[points.length - 1].iteration,
175
+ delta,
176
+ relativeDeltaPct,
177
+ improvedFromBaseline,
178
+ monotonic,
179
+ };
180
+ }
181
+
182
+ function formatSigned(value) {
183
+ if (!Number.isFinite(value)) return 'n/a';
184
+ const sign = value > 0 ? '+' : '';
185
+ return `${sign}${value}`;
186
+ }
187
+
188
+ function sanitizeObjectiveText(value, maxChars = 4000) {
189
+ if (!value || typeof value !== 'string') {
190
+ return '';
191
+ }
192
+
193
+ const normalized = value.trim().replaceAll('\r', '');
194
+ if (!normalized) return '';
195
+ if (normalized.length <= maxChars) {
196
+ return normalized;
197
+ }
198
+
199
+ return `${normalized.slice(0, maxChars)}\n...[truncated]`;
200
+ }
201
+
202
+ function renderProgressBar(current, total, width = 24) {
203
+ if (!Number.isFinite(current) || !Number.isFinite(total) || total <= 0) {
204
+ return '[????????????????????????]';
205
+ }
206
+
207
+ const ratio = Math.max(0, Math.min(1, current / total));
208
+ const filled = Math.round(ratio * width);
209
+ const empty = Math.max(0, width - filled);
210
+ return `[${'#'.repeat(filled)}${'-'.repeat(empty)}]`;
211
+ }
212
+
143
213
  function renderFinalReport({
144
214
  runId,
145
215
  startedAt,
@@ -153,11 +223,14 @@ function renderFinalReport({
153
223
  bestIteration,
154
224
  iterationEntries,
155
225
  runLogRelativePath,
226
+ agentObjective,
156
227
  }) {
157
228
  const keepCount = iterationEntries.filter((entry) => entry.status === 'keep').length;
158
229
  const rejectCount = iterationEntries.filter((entry) => entry.status === 'reject').length;
159
230
  const benchmarkFailedCount = iterationEntries.filter((entry) => entry.status === 'benchmark_failed').length;
160
231
  const agentFailedCount = iterationEntries.filter((entry) => entry.status === 'agent_failed').length;
232
+ const metricSummary = summarizeMetricEvolution(iterationEntries, direction);
233
+ const objectiveText = sanitizeObjectiveText(agentObjective);
161
234
 
162
235
  const lines = [
163
236
  '# Autoresearcher Final Report',
@@ -179,6 +252,20 @@ function renderFinalReport({
179
252
  `- Agent failures: ${agentFailedCount}`,
180
253
  `- Run log: \`${runLogRelativePath}\``,
181
254
  '',
255
+ '## Metric Comparison',
256
+ '',
257
+ `- Baseline metric: ${metricSummary ? formatMetric(metricSummary.baseline) : 'n/a'}${metricSummary ? ` (iteration ${metricSummary.baselineIteration})` : ''}`,
258
+ `- Final metric: ${metricSummary ? formatMetric(metricSummary.final) : 'n/a'}${metricSummary ? ` (iteration ${metricSummary.finalIteration})` : ''}`,
259
+ `- Delta (final-baseline): ${metricSummary ? formatSigned(metricSummary.delta) : 'n/a'}`,
260
+ `- Relative change: ${metricSummary && metricSummary.relativeDeltaPct != null ? `${formatSigned(Number(metricSummary.relativeDeltaPct.toFixed(2)))}%` : 'n/a'}`,
261
+ `- Improved from baseline: ${metricSummary ? (metricSummary.improvedFromBaseline ? 'yes' : 'no') : 'n/a'}`,
262
+ `- Monotonic trend: ${metricSummary ? (metricSummary.monotonic ? 'yes' : 'no') : 'n/a'}`,
263
+ '',
264
+ '## Research Objective Snapshot',
265
+ '',
266
+ objectiveText ? '```text' : '_No objective text captured._',
267
+ ...(objectiveText ? [objectiveText, '```'] : []),
268
+ '',
182
269
  '## Iteration Results',
183
270
  '',
184
271
  '| Iteration | Status | Metric | Best Metric | Agent Exit | Benchmark Exit | Timestamp |',
@@ -372,6 +459,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
372
459
 
373
460
  let bestMetric = null;
374
461
  let bestIteration = 0;
462
+ let baselineMetric = null;
375
463
 
376
464
  console.log(`Starting run ${runId}`);
377
465
  console.log(`Agent mode: ${agentMode}`);
@@ -393,6 +481,8 @@ export async function runResearchLoop(config, cliOverrides = {}) {
393
481
 
394
482
  for (let i = 1; i <= iterations; i++) {
395
483
  console.log(`\n--- Iteration ${i}/${iterations} ---`);
484
+ const progressPercent = Number(((i / iterations) * 100).toFixed(1));
485
+ console.log(`Progress: ${renderProgressBar(i, iterations)} ${progressPercent}%`);
396
486
  const beforeCommit = await getGitCommit(cwd);
397
487
 
398
488
  const iterationAgentPrompt = benchmarkFeedback
@@ -464,7 +554,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
464
554
  const nonFatalInternalMaxIterations = isNonFatalInternalMaxIterations(agentStep.agentMode, agentResult);
465
555
 
466
556
  if (nonFatalInternalMaxIterations) {
467
- console.log('Agent step reached backend max iterations; continuing to benchmark...');
557
+ console.log('Agent step: backend iteration budget reached; continuing to benchmark...');
468
558
  }
469
559
 
470
560
  if (agentResult.code !== 0 && !nonFatalInternalMaxIterations) {
@@ -520,7 +610,15 @@ export async function runResearchLoop(config, cliOverrides = {}) {
520
610
  }
521
611
 
522
612
  const improved = isBetter(metric, bestMetric, direction);
523
- console.log(`Metric: ${metric}${bestMetric == null ? ' (baseline)' : ` | best: ${bestMetric}`}`);
613
+ if (baselineMetric == null) {
614
+ baselineMetric = metric;
615
+ }
616
+
617
+ const baselineDelta = metric - baselineMetric;
618
+ const baselineDeltaDisplay = `${baselineDelta > 0 ? '+' : ''}${baselineDelta}`;
619
+ console.log(
620
+ `Metric: ${metric}${bestMetric == null ? ' (baseline)' : ` | best: ${bestMetric}`} | delta_baseline: ${baselineDeltaDisplay}`
621
+ );
524
622
 
525
623
  const nextBestMetric = improved ? metric : bestMetric;
526
624
  benchmarkFeedback = buildBenchmarkFeedback({
@@ -596,6 +694,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
596
694
  bestIteration,
597
695
  iterationEntries,
598
696
  runLogRelativePath,
697
+ agentObjective: resolvedPrompt.prompt,
599
698
  });
600
699
 
601
700
  if (merged.onCompleteCommand) {