autoresearcher 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/run-loop.js +101 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "autoresearcher",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.7",
|
|
4
4
|
"description": "Benchmark-driven autonomous research CLI for post-quantum and blockchain R&D",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
"src",
|
|
30
30
|
"README.md"
|
|
31
31
|
],
|
|
32
|
-
"homepage": "https://autoresearcher.
|
|
32
|
+
"homepage": "https://autoresearcher.org",
|
|
33
33
|
"license": "MIT",
|
|
34
34
|
"dependencies": {
|
|
35
35
|
"ralph-starter": "^0.4.5"
|
package/src/run-loop.js
CHANGED
|
@@ -140,6 +140,76 @@ function escapeCell(value) {
|
|
|
140
140
|
return String(value).replaceAll('|', '\\|').replaceAll('\n', ' ');
|
|
141
141
|
}
|
|
142
142
|
|
|
143
|
+
function asFiniteNumber(value) {
|
|
144
|
+
return typeof value === 'number' && Number.isFinite(value) ? value : null;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function summarizeMetricEvolution(iterationEntries, direction) {
|
|
148
|
+
const points = iterationEntries
|
|
149
|
+
.map((entry) => ({
|
|
150
|
+
iteration: entry.iteration,
|
|
151
|
+
metric: asFiniteNumber(entry.metric),
|
|
152
|
+
}))
|
|
153
|
+
.filter((point) => point.metric != null);
|
|
154
|
+
|
|
155
|
+
if (points.length === 0) {
|
|
156
|
+
return null;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const baseline = points[0].metric;
|
|
160
|
+
const final = points[points.length - 1].metric;
|
|
161
|
+
const delta = final - baseline;
|
|
162
|
+
const relativeDeltaPct = baseline !== 0 ? (delta / baseline) * 100 : null;
|
|
163
|
+
const improvedFromBaseline = direction === 'min' ? final < baseline : final > baseline;
|
|
164
|
+
const monotonic = points.every((point, index) => {
|
|
165
|
+
if (index === 0) return true;
|
|
166
|
+
const prev = points[index - 1].metric;
|
|
167
|
+
return direction === 'min' ? point.metric <= prev : point.metric >= prev;
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
baseline,
|
|
172
|
+
baselineIteration: points[0].iteration,
|
|
173
|
+
final,
|
|
174
|
+
finalIteration: points[points.length - 1].iteration,
|
|
175
|
+
delta,
|
|
176
|
+
relativeDeltaPct,
|
|
177
|
+
improvedFromBaseline,
|
|
178
|
+
monotonic,
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function formatSigned(value) {
|
|
183
|
+
if (!Number.isFinite(value)) return 'n/a';
|
|
184
|
+
const sign = value > 0 ? '+' : '';
|
|
185
|
+
return `${sign}${value}`;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
function sanitizeObjectiveText(value, maxChars = 4000) {
|
|
189
|
+
if (!value || typeof value !== 'string') {
|
|
190
|
+
return '';
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const normalized = value.trim().replaceAll('\r', '');
|
|
194
|
+
if (!normalized) return '';
|
|
195
|
+
if (normalized.length <= maxChars) {
|
|
196
|
+
return normalized;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return `${normalized.slice(0, maxChars)}\n...[truncated]`;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
function renderProgressBar(current, total, width = 24) {
|
|
203
|
+
if (!Number.isFinite(current) || !Number.isFinite(total) || total <= 0) {
|
|
204
|
+
return '[????????????????????????]';
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const ratio = Math.max(0, Math.min(1, current / total));
|
|
208
|
+
const filled = Math.round(ratio * width);
|
|
209
|
+
const empty = Math.max(0, width - filled);
|
|
210
|
+
return `[${'#'.repeat(filled)}${'-'.repeat(empty)}]`;
|
|
211
|
+
}
|
|
212
|
+
|
|
143
213
|
function renderFinalReport({
|
|
144
214
|
runId,
|
|
145
215
|
startedAt,
|
|
@@ -153,11 +223,14 @@ function renderFinalReport({
|
|
|
153
223
|
bestIteration,
|
|
154
224
|
iterationEntries,
|
|
155
225
|
runLogRelativePath,
|
|
226
|
+
agentObjective,
|
|
156
227
|
}) {
|
|
157
228
|
const keepCount = iterationEntries.filter((entry) => entry.status === 'keep').length;
|
|
158
229
|
const rejectCount = iterationEntries.filter((entry) => entry.status === 'reject').length;
|
|
159
230
|
const benchmarkFailedCount = iterationEntries.filter((entry) => entry.status === 'benchmark_failed').length;
|
|
160
231
|
const agentFailedCount = iterationEntries.filter((entry) => entry.status === 'agent_failed').length;
|
|
232
|
+
const metricSummary = summarizeMetricEvolution(iterationEntries, direction);
|
|
233
|
+
const objectiveText = sanitizeObjectiveText(agentObjective);
|
|
161
234
|
|
|
162
235
|
const lines = [
|
|
163
236
|
'# Autoresearcher Final Report',
|
|
@@ -179,6 +252,20 @@ function renderFinalReport({
|
|
|
179
252
|
`- Agent failures: ${agentFailedCount}`,
|
|
180
253
|
`- Run log: \`${runLogRelativePath}\``,
|
|
181
254
|
'',
|
|
255
|
+
'## Metric Comparison',
|
|
256
|
+
'',
|
|
257
|
+
`- Baseline metric: ${metricSummary ? formatMetric(metricSummary.baseline) : 'n/a'}${metricSummary ? ` (iteration ${metricSummary.baselineIteration})` : ''}`,
|
|
258
|
+
`- Final metric: ${metricSummary ? formatMetric(metricSummary.final) : 'n/a'}${metricSummary ? ` (iteration ${metricSummary.finalIteration})` : ''}`,
|
|
259
|
+
`- Delta (final-baseline): ${metricSummary ? formatSigned(metricSummary.delta) : 'n/a'}`,
|
|
260
|
+
`- Relative change: ${metricSummary && metricSummary.relativeDeltaPct != null ? `${formatSigned(Number(metricSummary.relativeDeltaPct.toFixed(2)))}%` : 'n/a'}`,
|
|
261
|
+
`- Improved from baseline: ${metricSummary ? (metricSummary.improvedFromBaseline ? 'yes' : 'no') : 'n/a'}`,
|
|
262
|
+
`- Monotonic trend: ${metricSummary ? (metricSummary.monotonic ? 'yes' : 'no') : 'n/a'}`,
|
|
263
|
+
'',
|
|
264
|
+
'## Research Objective Snapshot',
|
|
265
|
+
'',
|
|
266
|
+
objectiveText ? '```text' : '_No objective text captured._',
|
|
267
|
+
...(objectiveText ? [objectiveText, '```'] : []),
|
|
268
|
+
'',
|
|
182
269
|
'## Iteration Results',
|
|
183
270
|
'',
|
|
184
271
|
'| Iteration | Status | Metric | Best Metric | Agent Exit | Benchmark Exit | Timestamp |',
|
|
@@ -372,6 +459,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
372
459
|
|
|
373
460
|
let bestMetric = null;
|
|
374
461
|
let bestIteration = 0;
|
|
462
|
+
let baselineMetric = null;
|
|
375
463
|
|
|
376
464
|
console.log(`Starting run ${runId}`);
|
|
377
465
|
console.log(`Agent mode: ${agentMode}`);
|
|
@@ -393,6 +481,8 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
393
481
|
|
|
394
482
|
for (let i = 1; i <= iterations; i++) {
|
|
395
483
|
console.log(`\n--- Iteration ${i}/${iterations} ---`);
|
|
484
|
+
const progressPercent = Number(((i / iterations) * 100).toFixed(1));
|
|
485
|
+
console.log(`Progress: ${renderProgressBar(i, iterations)} ${progressPercent}%`);
|
|
396
486
|
const beforeCommit = await getGitCommit(cwd);
|
|
397
487
|
|
|
398
488
|
const iterationAgentPrompt = benchmarkFeedback
|
|
@@ -464,7 +554,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
464
554
|
const nonFatalInternalMaxIterations = isNonFatalInternalMaxIterations(agentStep.agentMode, agentResult);
|
|
465
555
|
|
|
466
556
|
if (nonFatalInternalMaxIterations) {
|
|
467
|
-
console.log('Agent step
|
|
557
|
+
console.log('Agent step: backend iteration budget reached; continuing to benchmark...');
|
|
468
558
|
}
|
|
469
559
|
|
|
470
560
|
if (agentResult.code !== 0 && !nonFatalInternalMaxIterations) {
|
|
@@ -520,7 +610,15 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
520
610
|
}
|
|
521
611
|
|
|
522
612
|
const improved = isBetter(metric, bestMetric, direction);
|
|
523
|
-
|
|
613
|
+
if (baselineMetric == null) {
|
|
614
|
+
baselineMetric = metric;
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
const baselineDelta = metric - baselineMetric;
|
|
618
|
+
const baselineDeltaDisplay = `${baselineDelta > 0 ? '+' : ''}${baselineDelta}`;
|
|
619
|
+
console.log(
|
|
620
|
+
`Metric: ${metric}${bestMetric == null ? ' (baseline)' : ` | best: ${bestMetric}`} | delta_baseline: ${baselineDeltaDisplay}`
|
|
621
|
+
);
|
|
524
622
|
|
|
525
623
|
const nextBestMetric = improved ? metric : bestMetric;
|
|
526
624
|
benchmarkFeedback = buildBenchmarkFeedback({
|
|
@@ -596,6 +694,7 @@ export async function runResearchLoop(config, cliOverrides = {}) {
|
|
|
596
694
|
bestIteration,
|
|
597
695
|
iterationEntries,
|
|
598
696
|
runLogRelativePath,
|
|
697
|
+
agentObjective: resolvedPrompt.prompt,
|
|
599
698
|
});
|
|
600
699
|
|
|
601
700
|
if (merged.onCompleteCommand) {
|