@oh-my-pi/pi-coding-agent 13.19.0 → 14.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +277 -2
- package/package.json +86 -20
- package/scripts/format-prompts.ts +2 -2
- package/src/autoresearch/apply-contract-to-state.ts +24 -0
- package/src/autoresearch/contract.ts +0 -44
- package/src/autoresearch/dashboard.ts +1 -2
- package/src/autoresearch/git.ts +91 -0
- package/src/autoresearch/helpers.ts +49 -0
- package/src/autoresearch/index.ts +28 -187
- package/src/autoresearch/prompt.md +26 -9
- package/src/autoresearch/state.ts +0 -6
- package/src/autoresearch/tools/init-experiment.ts +202 -117
- package/src/autoresearch/tools/log-experiment.ts +83 -125
- package/src/autoresearch/tools/run-experiment.ts +48 -10
- package/src/autoresearch/types.ts +2 -2
- package/src/capability/index.ts +4 -2
- package/src/cli/file-processor.ts +3 -3
- package/src/cli/grep-cli.ts +8 -8
- package/src/cli/grievances-cli.ts +78 -0
- package/src/cli/read-cli.ts +67 -0
- package/src/cli/setup-cli.ts +4 -4
- package/src/cli/update-cli.ts +3 -3
- package/src/cli.ts +2 -0
- package/src/commands/grep.ts +6 -1
- package/src/commands/grievances.ts +20 -0
- package/src/commands/read.ts +33 -0
- package/src/commit/agentic/agent.ts +5 -5
- package/src/commit/agentic/index.ts +3 -4
- package/src/commit/agentic/tools/analyze-file.ts +3 -3
- package/src/commit/agentic/validation.ts +1 -1
- package/src/commit/analysis/conventional.ts +4 -4
- package/src/commit/analysis/summary.ts +3 -3
- package/src/commit/changelog/generate.ts +4 -4
- package/src/commit/map-reduce/map-phase.ts +4 -4
- package/src/commit/map-reduce/reduce-phase.ts +4 -4
- package/src/commit/pipeline.ts +3 -4
- package/src/config/model-registry.ts +17 -3
- package/src/config/prompt-templates.ts +44 -226
- package/src/config/resolve-config-value.ts +4 -2
- package/src/config/settings-schema.ts +54 -2
- package/src/config/settings.ts +25 -26
- package/src/dap/client.ts +674 -0
- package/src/dap/config.ts +150 -0
- package/src/dap/defaults.json +211 -0
- package/src/dap/index.ts +4 -0
- package/src/dap/session.ts +1255 -0
- package/src/dap/types.ts +600 -0
- package/src/debug/log-viewer.ts +3 -2
- package/src/discovery/builtin.ts +1 -2
- package/src/discovery/codex.ts +2 -2
- package/src/discovery/github.ts +2 -1
- package/src/discovery/helpers.ts +2 -2
- package/src/discovery/opencode.ts +2 -2
- package/src/edit/diff.ts +818 -0
- package/src/edit/index.ts +309 -0
- package/src/edit/line-hash.ts +67 -0
- package/src/edit/modes/chunk.ts +454 -0
- package/src/{patch → edit/modes}/hashline.ts +741 -361
- package/src/{patch/applicator.ts → edit/modes/patch.ts} +420 -117
- package/src/{patch/fuzzy.ts → edit/modes/replace.ts} +519 -197
- package/src/{patch → edit}/normalize.ts +97 -76
- package/src/{patch/shared.ts → edit/renderer.ts} +181 -108
- package/src/exec/bash-executor.ts +4 -2
- package/src/exec/idle-timeout-watchdog.ts +126 -0
- package/src/exec/non-interactive-env.ts +5 -0
- package/src/extensibility/custom-commands/bundled/ci-green/index.ts +2 -2
- package/src/extensibility/custom-commands/bundled/review/index.ts +36 -15
- package/src/extensibility/custom-commands/loader.ts +1 -2
- package/src/extensibility/custom-tools/loader.ts +34 -11
- package/src/extensibility/extensions/loader.ts +9 -4
- package/src/extensibility/extensions/runner.ts +24 -1
- package/src/extensibility/extensions/types.ts +1 -1
- package/src/extensibility/hooks/loader.ts +5 -6
- package/src/extensibility/hooks/types.ts +1 -1
- package/src/extensibility/plugins/doctor.ts +2 -1
- package/src/extensibility/slash-commands.ts +3 -7
- package/src/index.ts +2 -1
- package/src/internal-urls/docs-index.generated.ts +11 -11
- package/src/ipy/executor.ts +58 -17
- package/src/ipy/gateway-coordinator.ts +6 -4
- package/src/ipy/kernel.ts +45 -22
- package/src/ipy/runtime.ts +2 -2
- package/src/lsp/client.ts +7 -4
- package/src/lsp/clients/lsp-linter-client.ts +4 -4
- package/src/lsp/config.ts +20 -4
- package/src/lsp/defaults.json +688 -154
- package/src/lsp/index.ts +234 -45
- package/src/lsp/lspmux.ts +2 -2
- package/src/lsp/startup-events.ts +13 -0
- package/src/lsp/types.ts +12 -1
- package/src/lsp/utils.ts +8 -1
- package/src/main.ts +102 -46
- package/src/memories/index.ts +4 -5
- package/src/modes/acp/acp-agent.ts +563 -163
- package/src/modes/acp/acp-event-mapper.ts +9 -1
- package/src/modes/acp/acp-mode.ts +4 -2
- package/src/modes/components/agent-dashboard.ts +3 -4
- package/src/modes/components/diff.ts +6 -7
- package/src/modes/components/read-tool-group.ts +6 -12
- package/src/modes/components/session-observer-overlay.ts +21 -12
- package/src/modes/components/settings-defs.ts +5 -0
- package/src/modes/components/tool-execution.ts +1 -1
- package/src/modes/components/welcome.ts +1 -1
- package/src/modes/controllers/btw-controller.ts +2 -2
- package/src/modes/controllers/command-controller.ts +3 -2
- package/src/modes/controllers/input-controller.ts +12 -8
- package/src/modes/index.ts +20 -2
- package/src/modes/interactive-mode.ts +94 -37
- package/src/modes/rpc/host-tools.ts +186 -0
- package/src/modes/rpc/rpc-client.ts +178 -13
- package/src/modes/rpc/rpc-mode.ts +73 -3
- package/src/modes/rpc/rpc-types.ts +53 -1
- package/src/modes/theme/theme.ts +80 -8
- package/src/modes/types.ts +2 -2
- package/src/prompts/review-request.md +6 -0
- package/src/prompts/system/system-prompt.md +2 -1
- package/src/prompts/tools/chunk-edit.md +223 -0
- package/src/prompts/tools/debug.md +43 -0
- package/src/prompts/tools/grep.md +3 -0
- package/src/prompts/tools/lsp.md +5 -5
- package/src/prompts/tools/read-chunk.md +17 -0
- package/src/prompts/tools/read.md +19 -5
- package/src/sdk.ts +190 -154
- package/src/secrets/obfuscator.ts +1 -1
- package/src/session/agent-session.ts +306 -256
- package/src/session/agent-storage.ts +12 -12
- package/src/session/compaction/branch-summarization.ts +3 -3
- package/src/session/compaction/compaction.ts +5 -6
- package/src/session/compaction/utils.ts +3 -3
- package/src/session/history-storage.ts +62 -19
- package/src/session/messages.ts +3 -3
- package/src/session/session-dump-format.ts +203 -0
- package/src/session/session-storage.ts +4 -2
- package/src/session/streaming-output.ts +1 -1
- package/src/session/tool-choice-queue.ts +213 -0
- package/src/slash-commands/builtin-registry.ts +56 -8
- package/src/ssh/connection-manager.ts +2 -2
- package/src/ssh/sshfs-mount.ts +5 -5
- package/src/stt/downloader.ts +4 -4
- package/src/stt/recorder.ts +4 -4
- package/src/stt/transcriber.ts +2 -2
- package/src/system-prompt.ts +21 -13
- package/src/task/agents.ts +5 -6
- package/src/task/commands.ts +2 -5
- package/src/task/executor.ts +4 -4
- package/src/task/index.ts +3 -4
- package/src/task/template.ts +2 -2
- package/src/task/worktree.ts +4 -4
- package/src/tools/ask.ts +2 -3
- package/src/tools/ast-edit.ts +7 -7
- package/src/tools/ast-grep.ts +7 -7
- package/src/tools/auto-generated-guard.ts +36 -41
- package/src/tools/await-tool.ts +2 -2
- package/src/tools/bash.ts +5 -23
- package/src/tools/browser.ts +4 -5
- package/src/tools/calculator.ts +2 -3
- package/src/tools/cancel-job.ts +2 -2
- package/src/tools/checkpoint.ts +3 -3
- package/src/tools/debug.ts +1007 -0
- package/src/tools/exit-plan-mode.ts +2 -3
- package/src/tools/fetch.ts +67 -3
- package/src/tools/find.ts +4 -5
- package/src/tools/fs-cache-invalidation.ts +5 -0
- package/src/tools/gemini-image.ts +13 -5
- package/src/tools/gh.ts +10 -11
- package/src/tools/grep.ts +57 -9
- package/src/tools/index.ts +44 -22
- package/src/tools/inspect-image.ts +4 -4
- package/src/tools/output-meta.ts +1 -1
- package/src/tools/python.ts +19 -6
- package/src/tools/read.ts +198 -67
- package/src/tools/render-mermaid.ts +2 -3
- package/src/tools/render-utils.ts +20 -6
- package/src/tools/renderers.ts +3 -1
- package/src/tools/report-tool-issue.ts +80 -0
- package/src/tools/resolve.ts +70 -39
- package/src/tools/search-tool-bm25.ts +2 -2
- package/src/tools/ssh.ts +2 -2
- package/src/tools/todo-write.ts +2 -2
- package/src/tools/tool-timeouts.ts +1 -0
- package/src/tools/write.ts +5 -6
- package/src/tui/tree-list.ts +3 -1
- package/src/utils/clipboard.ts +80 -0
- package/src/utils/commit-message-generator.ts +2 -3
- package/src/utils/edit-mode.ts +49 -0
- package/src/utils/file-display-mode.ts +6 -5
- package/src/utils/file-mentions.ts +8 -7
- package/src/utils/git.ts +4 -4
- package/src/utils/image-loading.ts +98 -0
- package/src/utils/title-generator.ts +2 -3
- package/src/utils/tools-manager.ts +6 -6
- package/src/web/scrapers/choosealicense.ts +1 -1
- package/src/web/search/index.ts +3 -3
- package/src/autoresearch/command-initialize.md +0 -34
- package/src/patch/diff.ts +0 -433
- package/src/patch/index.ts +0 -888
- package/src/patch/parser.ts +0 -532
- package/src/patch/types.ts +0 -292
- package/src/prompts/agents/oracle.md +0 -77
- package/src/tools/pending-action.ts +0 -49
- package/src/utils/child-process.ts +0 -88
- package/src/utils/frontmatter.ts +0 -117
- package/src/utils/image-input.ts +0 -274
- package/src/utils/mime.ts +0 -53
- package/src/utils/prompt-format.ts +0 -170
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import * as crypto from "node:crypto";
|
|
2
1
|
import * as fs from "node:fs";
|
|
3
2
|
import * as path from "node:path";
|
|
4
3
|
import type { AutoresearchBenchmarkContract, AutoresearchContract, MetricDirection } from "./types";
|
|
@@ -76,49 +75,6 @@ export function validateAutoresearchContract(contract: AutoresearchContract): st
|
|
|
76
75
|
return errors;
|
|
77
76
|
}
|
|
78
77
|
|
|
79
|
-
export function buildAutoresearchSegmentFingerprint(
|
|
80
|
-
contract: AutoresearchContract,
|
|
81
|
-
scripts: {
|
|
82
|
-
benchmarkScript: string;
|
|
83
|
-
checksScript: string | null;
|
|
84
|
-
},
|
|
85
|
-
): string {
|
|
86
|
-
const payload = {
|
|
87
|
-
benchmark: contract.benchmark,
|
|
88
|
-
scopePaths: contract.scopePaths,
|
|
89
|
-
offLimits: contract.offLimits,
|
|
90
|
-
constraints: contract.constraints,
|
|
91
|
-
scripts,
|
|
92
|
-
};
|
|
93
|
-
return crypto.createHash("sha256").update(JSON.stringify(payload)).digest("hex");
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
export function getAutoresearchFingerprintMismatchError(
|
|
97
|
-
stateFingerprint: string | null,
|
|
98
|
-
workDir: string,
|
|
99
|
-
): string | null {
|
|
100
|
-
if (!stateFingerprint) {
|
|
101
|
-
return "The current segment has no fingerprint metadata. Re-run init_experiment before continuing.";
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
const contractResult = readAutoresearchContract(workDir);
|
|
105
|
-
const scriptSnapshot = loadAutoresearchScriptSnapshot(workDir);
|
|
106
|
-
const errors = [...contractResult.errors, ...scriptSnapshot.errors];
|
|
107
|
-
if (errors.length > 0) {
|
|
108
|
-
return `${errors.join(" ")} Re-run init_experiment after fixing the workspace contract.`;
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
const currentFingerprint = buildAutoresearchSegmentFingerprint(contractResult.contract, {
|
|
112
|
-
benchmarkScript: scriptSnapshot.benchmarkScript,
|
|
113
|
-
checksScript: scriptSnapshot.checksScript,
|
|
114
|
-
});
|
|
115
|
-
if (currentFingerprint === stateFingerprint) {
|
|
116
|
-
return null;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
return "autoresearch.md, autoresearch.sh, or autoresearch.checks.sh changed since the current segment was initialized. Re-run init_experiment before continuing.";
|
|
120
|
-
}
|
|
121
|
-
|
|
122
78
|
export function loadAutoresearchScriptSnapshot(workDir: string): AutoresearchScriptSnapshot {
|
|
123
79
|
const benchmarkScriptPath = path.join(workDir, "autoresearch.sh");
|
|
124
80
|
const checksScriptPath = path.join(workDir, "autoresearch.checks.sh");
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import { matchesKey, Text, truncateToWidth, visibleWidth } from "@oh-my-pi/pi-tui";
|
|
1
|
+
import { matchesKey, replaceTabs, Text, truncateToWidth, visibleWidth } from "@oh-my-pi/pi-tui";
|
|
2
2
|
import type { Theme } from "../modes/theme/theme";
|
|
3
|
-
import { replaceTabs } from "../tools/render-utils";
|
|
4
3
|
import { formatElapsed, formatNum, isBetter } from "./helpers";
|
|
5
4
|
import { currentResults, findBaselineMetric, findBaselineRunNumber, findBaselineSecondary } from "./state";
|
|
6
5
|
import type { AutoresearchRuntime, DashboardController, ExperimentResult, ExperimentState } from "./types";
|
package/src/autoresearch/git.ts
CHANGED
|
@@ -236,3 +236,94 @@ function collectUnsafeDirtyPaths(statusOutput: string, workDirPrefix: string): s
|
|
|
236
236
|
}
|
|
237
237
|
return unsafeDirtyPaths;
|
|
238
238
|
}
|
|
239
|
+
|
|
240
|
+
export interface DirtyPathEntry {
|
|
241
|
+
path: string;
|
|
242
|
+
untracked: boolean;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
export function parseDirtyPathsWithStatus(statusOutput: string): DirtyPathEntry[] {
|
|
246
|
+
if (statusOutput.includes("\0")) {
|
|
247
|
+
return parseDirtyPathsNulWithStatus(statusOutput);
|
|
248
|
+
}
|
|
249
|
+
return parseDirtyPathsLinesWithStatus(statusOutput);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
function parseDirtyPathsNulWithStatus(statusOutput: string): DirtyPathEntry[] {
|
|
253
|
+
const seen = new Set<string>();
|
|
254
|
+
const results: DirtyPathEntry[] = [];
|
|
255
|
+
let index = 0;
|
|
256
|
+
while (index + 3 <= statusOutput.length) {
|
|
257
|
+
const statusToken = statusOutput.slice(index, index + 3);
|
|
258
|
+
index += 3;
|
|
259
|
+
const pathEnd = statusOutput.indexOf("\0", index);
|
|
260
|
+
if (pathEnd < 0) break;
|
|
261
|
+
const firstPath = statusOutput.slice(index, pathEnd);
|
|
262
|
+
index = pathEnd + 1;
|
|
263
|
+
const untracked = statusToken.trim().startsWith("??");
|
|
264
|
+
addDirtyPathEntry(seen, results, firstPath, untracked);
|
|
265
|
+
if (isRenameOrCopy(statusToken)) {
|
|
266
|
+
const secondPathEnd = statusOutput.indexOf("\0", index);
|
|
267
|
+
if (secondPathEnd < 0) break;
|
|
268
|
+
const secondPath = statusOutput.slice(index, secondPathEnd);
|
|
269
|
+
index = secondPathEnd + 1;
|
|
270
|
+
addDirtyPathEntry(seen, results, secondPath, false);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
return results;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
function parseDirtyPathsLinesWithStatus(statusOutput: string): DirtyPathEntry[] {
|
|
277
|
+
const seen = new Set<string>();
|
|
278
|
+
const results: DirtyPathEntry[] = [];
|
|
279
|
+
for (const line of statusOutput.split("\n")) {
|
|
280
|
+
const trimmedLine = line.trimEnd();
|
|
281
|
+
if (trimmedLine.length < 4) continue;
|
|
282
|
+
const statusToken = trimmedLine.slice(0, 3);
|
|
283
|
+
const rawPath = trimmedLine.slice(3).trim();
|
|
284
|
+
if (rawPath.length === 0) continue;
|
|
285
|
+
const untracked = statusToken.trim().startsWith("??");
|
|
286
|
+
const renameParts = rawPath.split(" -> ");
|
|
287
|
+
for (const renamePart of renameParts) {
|
|
288
|
+
addDirtyPathEntry(seen, results, renamePart, untracked);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
return results;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function addDirtyPathEntry(seen: Set<string>, results: DirtyPathEntry[], rawPath: string, untracked: boolean): void {
|
|
295
|
+
const normalizedPath = normalizeStatusPath(rawPath);
|
|
296
|
+
if (normalizedPath.length === 0 || seen.has(normalizedPath)) return;
|
|
297
|
+
seen.add(normalizedPath);
|
|
298
|
+
results.push({ path: normalizedPath, untracked });
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
export function parseWorkDirDirtyPathsWithStatus(statusOutput: string, workDirPrefix: string): DirtyPathEntry[] {
|
|
302
|
+
const results: DirtyPathEntry[] = [];
|
|
303
|
+
for (const entry of parseDirtyPathsWithStatus(statusOutput)) {
|
|
304
|
+
const relativePath = relativizeGitPathToWorkDir(entry.path, workDirPrefix);
|
|
305
|
+
if (relativePath === null) continue;
|
|
306
|
+
results.push({ path: relativePath, untracked: entry.untracked });
|
|
307
|
+
}
|
|
308
|
+
return results;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
export function computeRunModifiedPaths(
|
|
312
|
+
preRunDirtyPaths: string[],
|
|
313
|
+
currentStatusOutput: string,
|
|
314
|
+
workDirPrefix: string,
|
|
315
|
+
): { tracked: string[]; untracked: string[] } {
|
|
316
|
+
const preRunSet = new Set(preRunDirtyPaths);
|
|
317
|
+
const tracked: string[] = [];
|
|
318
|
+
const untracked: string[] = [];
|
|
319
|
+
for (const entry of parseWorkDirDirtyPathsWithStatus(currentStatusOutput, workDirPrefix)) {
|
|
320
|
+
if (preRunSet.has(entry.path)) continue;
|
|
321
|
+
if (isAutoresearchLocalStatePath(entry.path)) continue;
|
|
322
|
+
if (entry.untracked) {
|
|
323
|
+
untracked.push(entry.path);
|
|
324
|
+
} else {
|
|
325
|
+
tracked.push(entry.path);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
return { tracked, untracked };
|
|
329
|
+
}
|
|
@@ -269,6 +269,45 @@ export async function readPendingRunSummary(
|
|
|
269
269
|
return null;
|
|
270
270
|
}
|
|
271
271
|
|
|
272
|
+
export async function abandonUnloggedAutoresearchRuns(
|
|
273
|
+
workDir: string,
|
|
274
|
+
loggedRunNumbers: ReadonlySet<number>,
|
|
275
|
+
): Promise<number> {
|
|
276
|
+
const runsDir = path.join(workDir, ".autoresearch", "runs");
|
|
277
|
+
let entries: fs.Dirent[];
|
|
278
|
+
try {
|
|
279
|
+
entries = await fs.promises.readdir(runsDir, { withFileTypes: true });
|
|
280
|
+
} catch (error) {
|
|
281
|
+
if (isEnoent(error)) return 0;
|
|
282
|
+
throw error;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
let abandoned = 0;
|
|
286
|
+
const stamp = new Date().toISOString();
|
|
287
|
+
for (const entry of entries) {
|
|
288
|
+
if (!entry.isDirectory()) continue;
|
|
289
|
+
const directoryName = entry.name;
|
|
290
|
+
const runDirectory = path.join(runsDir, directoryName);
|
|
291
|
+
const runJsonPath = path.join(runDirectory, "run.json");
|
|
292
|
+
let parsed: unknown;
|
|
293
|
+
try {
|
|
294
|
+
parsed = await Bun.file(runJsonPath).json();
|
|
295
|
+
} catch (error) {
|
|
296
|
+
if (isEnoent(error)) continue;
|
|
297
|
+
throw error;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
const pending = parsePendingRunSummary(parsed, runDirectory, directoryName, loggedRunNumbers);
|
|
301
|
+
if (!pending) continue;
|
|
302
|
+
|
|
303
|
+
const existing = typeof parsed === "object" && parsed !== null ? (parsed as Record<string, unknown>) : {};
|
|
304
|
+
await Bun.write(runJsonPath, JSON.stringify({ ...existing, abandonedAt: stamp }, null, 2));
|
|
305
|
+
abandoned += 1;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return abandoned;
|
|
309
|
+
}
|
|
310
|
+
|
|
272
311
|
export function readConfig(cwd: string): AutoresearchConfig {
|
|
273
312
|
const configPath = path.join(cwd, "autoresearch.config.json");
|
|
274
313
|
try {
|
|
@@ -326,6 +365,7 @@ function parsePendingRunSummary(
|
|
|
326
365
|
): PendingRunSummary | null {
|
|
327
366
|
if (typeof value !== "object" || value === null) return null;
|
|
328
367
|
const candidate = value as {
|
|
368
|
+
abandonedAt?: unknown;
|
|
329
369
|
checks?: { durationSeconds?: unknown; passed?: unknown; timedOut?: unknown };
|
|
330
370
|
completedAt?: unknown;
|
|
331
371
|
command?: unknown;
|
|
@@ -335,6 +375,7 @@ function parsePendingRunSummary(
|
|
|
335
375
|
parsedAsi?: unknown;
|
|
336
376
|
parsedMetrics?: unknown;
|
|
337
377
|
parsedPrimary?: unknown;
|
|
378
|
+
preRunDirtyPaths?: unknown;
|
|
338
379
|
runNumber?: unknown;
|
|
339
380
|
status?: unknown;
|
|
340
381
|
timedOut?: unknown;
|
|
@@ -342,6 +383,9 @@ function parsePendingRunSummary(
|
|
|
342
383
|
if (candidate.loggedAt !== undefined || candidate.status !== undefined) {
|
|
343
384
|
return null;
|
|
344
385
|
}
|
|
386
|
+
if (typeof candidate.abandonedAt === "string" && candidate.abandonedAt.trim().length > 0) {
|
|
387
|
+
return null;
|
|
388
|
+
}
|
|
345
389
|
|
|
346
390
|
const command = typeof candidate.command === "string" ? candidate.command : "";
|
|
347
391
|
const runNumber =
|
|
@@ -389,6 +433,10 @@ function parsePendingRunSummary(
|
|
|
389
433
|
: null;
|
|
390
434
|
const checksTimedOut = candidate.checks?.timedOut === true;
|
|
391
435
|
|
|
436
|
+
const preRunDirtyPaths = Array.isArray(candidate.preRunDirtyPaths)
|
|
437
|
+
? candidate.preRunDirtyPaths.filter((item): item is string => typeof item === "string")
|
|
438
|
+
: [];
|
|
439
|
+
|
|
392
440
|
return {
|
|
393
441
|
checksDurationSeconds,
|
|
394
442
|
checksPass,
|
|
@@ -399,6 +447,7 @@ function parsePendingRunSummary(
|
|
|
399
447
|
parsedMetrics,
|
|
400
448
|
parsedPrimary,
|
|
401
449
|
passed: exitCode === 0 && !timedOut && checksPass !== false,
|
|
450
|
+
preRunDirtyPaths,
|
|
402
451
|
runDirectory,
|
|
403
452
|
runNumber,
|
|
404
453
|
};
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as path from "node:path";
|
|
3
3
|
import type { AutocompleteItem } from "@oh-my-pi/pi-tui";
|
|
4
|
-
import {
|
|
4
|
+
import { prompt } from "@oh-my-pi/pi-utils";
|
|
5
5
|
import type { ExtensionContext, ExtensionFactory } from "../extensibility/extensions";
|
|
6
|
-
import commandInitializeTemplate from "./command-initialize.md" with { type: "text" };
|
|
7
6
|
import commandResumeTemplate from "./command-resume.md" with { type: "text" };
|
|
8
7
|
import { pathMatchesContractPath } from "./contract";
|
|
9
8
|
import { createDashboardController } from "./dashboard";
|
|
@@ -12,7 +11,6 @@ import {
|
|
|
12
11
|
formatNum,
|
|
13
12
|
isAutoresearchCommittableFile,
|
|
14
13
|
isAutoresearchLocalStatePath,
|
|
15
|
-
isAutoresearchShCommand,
|
|
16
14
|
normalizeAutoresearchPath,
|
|
17
15
|
readMaxExperiments,
|
|
18
16
|
readPendingRunSummary,
|
|
@@ -37,18 +35,6 @@ import type { AutoresearchRuntime, ChecksResult, ExperimentResult, PendingRunSum
|
|
|
37
35
|
|
|
38
36
|
const EXPERIMENT_TOOL_NAMES = ["init_experiment", "run_experiment", "log_experiment"];
|
|
39
37
|
|
|
40
|
-
interface AutoresearchSetupInput {
|
|
41
|
-
intent: string;
|
|
42
|
-
benchmarkCommand: string;
|
|
43
|
-
metricName: string;
|
|
44
|
-
metricUnit: string;
|
|
45
|
-
direction: "lower" | "higher";
|
|
46
|
-
secondaryMetrics: string[];
|
|
47
|
-
scopePaths: string[];
|
|
48
|
-
offLimits: string[];
|
|
49
|
-
constraints: string[];
|
|
50
|
-
}
|
|
51
|
-
|
|
52
38
|
export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
53
39
|
const runtimeStore = createRuntimeStore();
|
|
54
40
|
const dashboard = createDashboardController();
|
|
@@ -109,17 +95,6 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
109
95
|
api.on("tool_call", (event, ctx) => {
|
|
110
96
|
const runtime = getRuntime(ctx);
|
|
111
97
|
if (!runtime.autoresearchMode) return;
|
|
112
|
-
if (event.toolName === "bash") {
|
|
113
|
-
const command = typeof event.input.command === "string" ? event.input.command : "";
|
|
114
|
-
const validationError = validateAutoresearchBashCommand(command);
|
|
115
|
-
if (validationError) {
|
|
116
|
-
return {
|
|
117
|
-
block: true,
|
|
118
|
-
reason: validationError,
|
|
119
|
-
};
|
|
120
|
-
}
|
|
121
|
-
return;
|
|
122
|
-
}
|
|
123
98
|
if (event.toolName !== "write" && event.toolName !== "edit" && event.toolName !== "ast_edit") return;
|
|
124
99
|
|
|
125
100
|
const rawPaths = getGuardedToolPaths(event.toolName, event.input);
|
|
@@ -151,14 +126,17 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
151
126
|
});
|
|
152
127
|
|
|
153
128
|
api.registerCommand("autoresearch", {
|
|
154
|
-
description: "
|
|
129
|
+
description: "Toggle builtin autoresearch mode, or pass off / clear, or a goal message.",
|
|
155
130
|
getArgumentCompletions(argumentPrefix: string): AutocompleteItem[] | null {
|
|
156
131
|
if (argumentPrefix.includes(" ")) return null;
|
|
132
|
+
const normalized = argumentPrefix.trim().toLowerCase();
|
|
133
|
+
// No suggestions for an empty argument prefix so Tab after "/autoresearch " does not
|
|
134
|
+
// force-complete into off/clear; bare command submit toggles like /plan.
|
|
135
|
+
if (normalized.length === 0) return null;
|
|
157
136
|
const completions: AutocompleteItem[] = [
|
|
158
137
|
{ label: "off", value: "off", description: "Leave autoresearch mode" },
|
|
159
138
|
{ label: "clear", value: "clear", description: "Delete autoresearch.jsonl and leave autoresearch mode" },
|
|
160
139
|
];
|
|
161
|
-
const normalized = argumentPrefix.trim().toLowerCase();
|
|
162
140
|
const filtered = completions.filter(item => item.label.startsWith(normalized));
|
|
163
141
|
return filtered.length > 0 ? filtered : null;
|
|
164
142
|
},
|
|
@@ -171,6 +149,15 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
171
149
|
return;
|
|
172
150
|
}
|
|
173
151
|
|
|
152
|
+
if (trimmed === "" && runtime.autoresearchMode) {
|
|
153
|
+
setMode(ctx, false, runtime.goal, "off");
|
|
154
|
+
dashboard.updateWidget(ctx, runtime);
|
|
155
|
+
const experimentTools = new Set(EXPERIMENT_TOOL_NAMES);
|
|
156
|
+
await api.setActiveTools(api.getActiveTools().filter(name => !experimentTools.has(name)));
|
|
157
|
+
ctx.ui.notify("Autoresearch mode disabled", "info");
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
|
|
174
161
|
if (trimmed === "off") {
|
|
175
162
|
setMode(ctx, false, runtime.goal, "off");
|
|
176
163
|
dashboard.updateWidget(ctx, runtime);
|
|
@@ -227,7 +214,7 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
227
214
|
dashboard.updateWidget(ctx, runtime);
|
|
228
215
|
await api.setActiveTools([...new Set([...api.getActiveTools(), ...EXPERIMENT_TOOL_NAMES])]);
|
|
229
216
|
api.sendUserMessage(
|
|
230
|
-
|
|
217
|
+
prompt.render(commandResumeTemplate, {
|
|
231
218
|
autoresearch_md_path: autoresearchMdPath,
|
|
232
219
|
branch_status_line: branchResult.created
|
|
233
220
|
? `Created and checked out dedicated git branch \`${branchResult.branchName}\` before resuming.`
|
|
@@ -239,57 +226,21 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
239
226
|
return;
|
|
240
227
|
}
|
|
241
228
|
|
|
242
|
-
const
|
|
243
|
-
|
|
244
|
-
trimmed || runtime.goal || "what should autoresearch improve?",
|
|
245
|
-
);
|
|
246
|
-
if (!setup) return;
|
|
247
|
-
|
|
248
|
-
const branchResult = await ensureAutoresearchBranch(api, workDir, setup.intent);
|
|
229
|
+
const branchGoal = trimmed.length > 0 ? trimmed : null;
|
|
230
|
+
const branchResult = await ensureAutoresearchBranch(api, workDir, branchGoal);
|
|
249
231
|
if (!branchResult.ok) {
|
|
250
232
|
ctx.ui.notify(branchResult.error, "error");
|
|
251
233
|
return;
|
|
252
234
|
}
|
|
253
235
|
|
|
254
|
-
setMode(ctx, true,
|
|
255
|
-
runtime.state.name = setup.intent;
|
|
256
|
-
runtime.state.metricName = setup.metricName;
|
|
257
|
-
runtime.state.metricUnit = setup.metricUnit;
|
|
258
|
-
runtime.state.bestDirection = setup.direction;
|
|
259
|
-
runtime.state.secondaryMetrics = setup.secondaryMetrics.map(name => ({ name, unit: "" }));
|
|
260
|
-
runtime.state.benchmarkCommand = setup.benchmarkCommand;
|
|
261
|
-
runtime.state.scopePaths = [...setup.scopePaths];
|
|
262
|
-
runtime.state.offLimits = [...setup.offLimits];
|
|
263
|
-
runtime.state.constraints = [...setup.constraints];
|
|
236
|
+
setMode(ctx, true, branchGoal, "on");
|
|
264
237
|
dashboard.updateWidget(ctx, runtime);
|
|
265
238
|
await api.setActiveTools([...new Set([...api.getActiveTools(), ...EXPERIMENT_TOOL_NAMES])]);
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
intent: setup.intent,
|
|
272
|
-
benchmark_command: setup.benchmarkCommand,
|
|
273
|
-
metric_name: setup.metricName,
|
|
274
|
-
metric_unit: setup.metricUnit,
|
|
275
|
-
direction: setup.direction,
|
|
276
|
-
has_secondary_metrics: setup.secondaryMetrics.length > 0,
|
|
277
|
-
secondary_metrics: setup.secondaryMetrics,
|
|
278
|
-
secondary_metrics_block: formatBulletBlock(
|
|
279
|
-
setup.secondaryMetrics,
|
|
280
|
-
value => ` - \`${value}\``,
|
|
281
|
-
" - `(none)`",
|
|
282
|
-
),
|
|
283
|
-
scope_paths: setup.scopePaths,
|
|
284
|
-
scope_paths_block: formatBulletBlock(setup.scopePaths, value => ` - \`${value}\``),
|
|
285
|
-
has_off_limits: setup.offLimits.length > 0,
|
|
286
|
-
off_limits: setup.offLimits,
|
|
287
|
-
off_limits_block: formatBulletBlock(setup.offLimits, value => ` - \`${value}\``, " - `(none)`"),
|
|
288
|
-
has_constraints: setup.constraints.length > 0,
|
|
289
|
-
constraints: setup.constraints,
|
|
290
|
-
constraints_block: formatBulletBlock(setup.constraints, value => ` - ${value}`, " - `(none)`"),
|
|
291
|
-
}),
|
|
292
|
-
);
|
|
239
|
+
if (trimmed.length > 0) {
|
|
240
|
+
api.sendUserMessage(trimmed);
|
|
241
|
+
} else {
|
|
242
|
+
ctx.ui.notify("Autoresearch enabled—describe what to optimize in your next message.", "info");
|
|
243
|
+
}
|
|
293
244
|
},
|
|
294
245
|
});
|
|
295
246
|
|
|
@@ -352,7 +303,7 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
352
303
|
api.sendMessage(
|
|
353
304
|
{
|
|
354
305
|
customType: "autoresearch-resume",
|
|
355
|
-
content:
|
|
306
|
+
content: prompt.render(resumeMessageTemplate, {
|
|
356
307
|
autoresearch_md_path: autoresearchMdPath,
|
|
357
308
|
has_ideas: fs.existsSync(ideasPath),
|
|
358
309
|
has_pending_run: Boolean(pendingRun),
|
|
@@ -394,15 +345,16 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
394
345
|
status: result.status,
|
|
395
346
|
};
|
|
396
347
|
});
|
|
348
|
+
const hasAutoresearchMd = fs.existsSync(autoresearchMdPath);
|
|
397
349
|
return {
|
|
398
|
-
systemPrompt:
|
|
350
|
+
systemPrompt: prompt.render(promptTemplate, {
|
|
399
351
|
base_system_prompt: event.systemPrompt,
|
|
400
352
|
has_goal: goal.trim().length > 0,
|
|
401
353
|
goal,
|
|
354
|
+
has_autoresearch_md: hasAutoresearchMd,
|
|
402
355
|
working_dir: workDir,
|
|
403
356
|
default_metric_name: runtime.state.metricName,
|
|
404
357
|
metric_name: runtime.state.metricName,
|
|
405
|
-
has_autoresearch_md: fs.existsSync(autoresearchMdPath),
|
|
406
358
|
autoresearch_md_path: autoresearchMdPath,
|
|
407
359
|
has_checks: fs.existsSync(checksPath),
|
|
408
360
|
checks_path: checksPath,
|
|
@@ -438,93 +390,6 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
438
390
|
});
|
|
439
391
|
};
|
|
440
392
|
|
|
441
|
-
async function promptForAutoresearchSetup(
|
|
442
|
-
ctx: ExtensionContext,
|
|
443
|
-
defaultIntent: string,
|
|
444
|
-
): Promise<AutoresearchSetupInput | undefined> {
|
|
445
|
-
const intentInput = await ctx.ui.input("Autoresearch Intent", defaultIntent);
|
|
446
|
-
if (intentInput === undefined) return undefined;
|
|
447
|
-
const intent = intentInput.trim();
|
|
448
|
-
if (intent.length === 0) {
|
|
449
|
-
ctx.ui.notify("Autoresearch intent is required", "info");
|
|
450
|
-
return undefined;
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
const benchmarkCommandInput = await ctx.ui.input("Benchmark Command", "bash autoresearch.sh");
|
|
454
|
-
if (benchmarkCommandInput === undefined) return undefined;
|
|
455
|
-
const benchmarkCommand = benchmarkCommandInput.trim();
|
|
456
|
-
if (benchmarkCommand.length === 0) {
|
|
457
|
-
ctx.ui.notify("Benchmark command is required", "info");
|
|
458
|
-
return undefined;
|
|
459
|
-
}
|
|
460
|
-
if (!isAutoresearchShCommand(benchmarkCommand)) {
|
|
461
|
-
ctx.ui.notify("Benchmark command must invoke `autoresearch.sh` directly", "info");
|
|
462
|
-
return undefined;
|
|
463
|
-
}
|
|
464
|
-
|
|
465
|
-
const metricNameInput = await ctx.ui.input("Primary Metric Name", "runtime_ms");
|
|
466
|
-
if (metricNameInput === undefined) return undefined;
|
|
467
|
-
const metricName = metricNameInput.trim();
|
|
468
|
-
if (metricName.length === 0) {
|
|
469
|
-
ctx.ui.notify("Primary metric name is required", "info");
|
|
470
|
-
return undefined;
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
const metricUnitInput = await ctx.ui.input("Metric Unit", "ms");
|
|
474
|
-
if (metricUnitInput === undefined) return undefined;
|
|
475
|
-
const metricUnit = metricUnitInput.trim();
|
|
476
|
-
|
|
477
|
-
const directionInput = await ctx.ui.input("Metric Direction", "lower");
|
|
478
|
-
if (directionInput === undefined) return undefined;
|
|
479
|
-
const normalizedDirection = directionInput.trim().toLowerCase();
|
|
480
|
-
if (normalizedDirection !== "lower" && normalizedDirection !== "higher") {
|
|
481
|
-
ctx.ui.notify("Metric direction must be `lower` or `higher`", "info");
|
|
482
|
-
return undefined;
|
|
483
|
-
}
|
|
484
|
-
|
|
485
|
-
const secondaryMetricsInput = await ctx.ui.input("Tradeoff Metrics", "");
|
|
486
|
-
if (secondaryMetricsInput === undefined) return undefined;
|
|
487
|
-
|
|
488
|
-
const scopePathsInput = await ctx.ui.input("Files in Scope", "packages/coding-agent/src/autoresearch");
|
|
489
|
-
if (scopePathsInput === undefined) return undefined;
|
|
490
|
-
const scopePaths = splitSetupList(scopePathsInput);
|
|
491
|
-
if (scopePaths.length === 0) {
|
|
492
|
-
ctx.ui.notify("Files in Scope must include at least one path", "info");
|
|
493
|
-
return undefined;
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
const offLimitsInput = await ctx.ui.input("Off Limits", "");
|
|
497
|
-
if (offLimitsInput === undefined) return undefined;
|
|
498
|
-
const constraintsInput = await ctx.ui.input("Constraints", "");
|
|
499
|
-
if (constraintsInput === undefined) return undefined;
|
|
500
|
-
|
|
501
|
-
return {
|
|
502
|
-
intent,
|
|
503
|
-
benchmarkCommand,
|
|
504
|
-
metricName,
|
|
505
|
-
metricUnit,
|
|
506
|
-
direction: normalizedDirection,
|
|
507
|
-
secondaryMetrics: splitSetupList(secondaryMetricsInput),
|
|
508
|
-
scopePaths,
|
|
509
|
-
offLimits: splitSetupList(offLimitsInput),
|
|
510
|
-
constraints: splitSetupList(constraintsInput),
|
|
511
|
-
};
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
function splitSetupList(value: string): string[] {
|
|
515
|
-
return value
|
|
516
|
-
.split(/\r?\n|,/)
|
|
517
|
-
.map(entry => entry.trim())
|
|
518
|
-
.filter((entry, index, values) => entry.length > 0 && values.indexOf(entry) === index);
|
|
519
|
-
}
|
|
520
|
-
|
|
521
|
-
function formatBulletBlock(values: string[], renderValue: (value: string) => string, emptyValue = ""): string {
|
|
522
|
-
if (values.length === 0) {
|
|
523
|
-
return emptyValue;
|
|
524
|
-
}
|
|
525
|
-
return values.map(renderValue).join("\n");
|
|
526
|
-
}
|
|
527
|
-
|
|
528
393
|
function hasLocalAutoresearchState(workDir: string): boolean {
|
|
529
394
|
return fs.existsSync(path.join(workDir, "autoresearch.jsonl")) || fs.existsSync(path.join(workDir, ".autoresearch"));
|
|
530
395
|
}
|
|
@@ -667,27 +532,3 @@ function canonicalizeTargetPath(targetPath: string): string {
|
|
|
667
532
|
}
|
|
668
533
|
return path.resolve(canonicalizeExistingPath(currentPath), ...pendingSegments);
|
|
669
534
|
}
|
|
670
|
-
|
|
671
|
-
function validateAutoresearchBashCommand(command: string): string | null {
|
|
672
|
-
const trimmed = command.trim();
|
|
673
|
-
if (trimmed.length === 0) {
|
|
674
|
-
return null;
|
|
675
|
-
}
|
|
676
|
-
const mutationPatterns = [
|
|
677
|
-
/(^|[;&|()]\s*)(?:bash|sh)\b/,
|
|
678
|
-
/(^|[;&|()]\s*)(?:python|python3|node|perl|ruby|php)\b/,
|
|
679
|
-
/(^|[;&|()]\s*)(?:mv|cp|rm|mkdir|touch|chmod|chown|ln|install|patch)\b/,
|
|
680
|
-
/(^|[;&|()]\s*)sed\s+-i\b/,
|
|
681
|
-
/(^|[;&|()]\s*)git\s+(?:add|apply|checkout|clean|commit|merge|rebase|reset|restore|revert|stash|switch|worktree)\b/,
|
|
682
|
-
/(^|[^<])>>?/,
|
|
683
|
-
/\|\s*tee\b/,
|
|
684
|
-
/<<<?/,
|
|
685
|
-
];
|
|
686
|
-
if (mutationPatterns.some(pattern => pattern.test(trimmed))) {
|
|
687
|
-
return (
|
|
688
|
-
"Autoresearch only allows read-only shell inspection. " +
|
|
689
|
-
"Use write/edit/ast_edit for file changes and run_experiment for benchmark execution."
|
|
690
|
-
);
|
|
691
|
-
}
|
|
692
|
-
return null;
|
|
693
|
-
}
|
|
@@ -8,7 +8,11 @@ Autoresearch mode is active.
|
|
|
8
8
|
Primary goal:
|
|
9
9
|
{{goal}}
|
|
10
10
|
{{else}}
|
|
11
|
+
{{#if has_autoresearch_md}}
|
|
11
12
|
Primary goal is documented in `autoresearch.md` for this session.
|
|
13
|
+
{{else}}
|
|
14
|
+
There is no `autoresearch.md` yet. Infer what to optimize from the latest user message and the conversation; after you create `autoresearch.md`, keep it as the durable source of truth for goal and benchmark contract.
|
|
15
|
+
{{/if}}
|
|
12
16
|
{{/if}}
|
|
13
17
|
|
|
14
18
|
Working directory:
|
|
@@ -63,7 +67,7 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
|
|
|
63
67
|
|
|
64
68
|
- `init_experiment` — initialize or reset the experiment session for the current optimization target.
|
|
65
69
|
- `run_experiment` — run a benchmark or experiment command with timing, output capture, structured metric parsing, and optional backpressure checks.
|
|
66
|
-
- `log_experiment` — record the result, update the dashboard, persist JSONL history, auto-commit kept experiments, and
|
|
70
|
+
- `log_experiment` — record the result, update the dashboard, persist JSONL history, auto-commit kept experiments, and revert only run-modified files for discarded or failed experiments (pre-existing uncommitted changes are preserved).
|
|
67
71
|
|
|
68
72
|
### Operating protocol
|
|
69
73
|
|
|
@@ -83,6 +87,8 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
|
|
|
83
87
|
- Use the same workload every run unless you intentionally re-initialize with a new segment.
|
|
84
88
|
- Keep the measurement harness, evaluator, and fixed benchmark inputs stable unless you intentionally start a new segment and document the change.
|
|
85
89
|
4. Initialize the loop with `init_experiment` before the first logged run of a segment.
|
|
90
|
+
- Pass `from_autoresearch_md: true` with only `name` to load the benchmark contract from `autoresearch.md` without mirroring every field in the tool call.
|
|
91
|
+
- Use `abandon_unlogged_runs: true` only when you intentionally discard unlogged run artifacts and need a fresh segment (for example after a bad or obsolete benchmark directory).
|
|
86
92
|
5. Run a baseline first.
|
|
87
93
|
- Establish the baseline metric before attempting optimizations.
|
|
88
94
|
- Track secondary metrics only when they matter to correctness, quality, or obvious regressions.
|
|
@@ -90,7 +96,9 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
|
|
|
90
96
|
- Make one coherent experiment at a time.
|
|
91
97
|
- Run `run_experiment`.
|
|
92
98
|
- Interpret the result honestly.
|
|
93
|
-
- Call `log_experiment` after every run.
|
|
99
|
+
- Call `log_experiment` after every run (it refreshes benchmark/scope fields from `autoresearch.md` before logging so keep validation matches the file on disk).
|
|
100
|
+
- Use `run_experiment` with `force: true` only when you must override the segment benchmark command or skip the direct-`autoresearch.sh` rule.
|
|
101
|
+
- On `log_experiment`, `force: true` relaxes ASI requirements and allows keeping a primary-metric regression; prefer normal logging when possible.
|
|
94
102
|
7. Keep the primary metric as the decision maker.
|
|
95
103
|
- `keep` when the primary metric improves.
|
|
96
104
|
- `discard` when it regresses or stays flat.
|
|
@@ -137,7 +145,11 @@ Suggested structure:
|
|
|
137
145
|
{{#if has_goal}}
|
|
138
146
|
- {{goal}}
|
|
139
147
|
{{else}}
|
|
148
|
+
{{#if has_autoresearch_md}}
|
|
140
149
|
- document the active target here before the first benchmark
|
|
150
|
+
{{else}}
|
|
151
|
+
- (derive from the user's messages, then record here)
|
|
152
|
+
{{/if}}
|
|
141
153
|
{{/if}}
|
|
142
154
|
|
|
143
155
|
## Benchmark
|
|
@@ -194,15 +206,20 @@ Resume from the existing notes:
|
|
|
194
206
|
{{else}}
|
|
195
207
|
### Initial setup
|
|
196
208
|
|
|
197
|
-
`autoresearch.md` does not exist yet.
|
|
209
|
+
`autoresearch.md` does not exist yet. You decide the benchmark contract, harness, and scope from the user's messages and the repository—do not ask the user to re-type benchmark commands or metric names in a separate UI prompt.
|
|
210
|
+
|
|
211
|
+
Before the first benchmark:
|
|
198
212
|
|
|
199
|
-
|
|
213
|
+
- Write `autoresearch.md` with goal, benchmark command (must be a **direct** invocation of `autoresearch.sh`, e.g. `bash autoresearch.sh`), primary metric name and unit, direction (`lower` or `higher`), tradeoff metrics if relevant, files in scope, off limits, and constraints.
|
|
214
|
+
- Add a short preflight section: prerequisites, one-time setup, and the comparability invariant that must stay fixed across runs.
|
|
215
|
+
- Mark ground-truth evaluators, fixed datasets, and other measurement-critical files as off limits or hard constraints when they define the benchmark contract.
|
|
216
|
+
- Write or update `autoresearch.program.md` when you learn durable heuristics, failure patterns, or repo-specific strategy for later resume turns.
|
|
217
|
+
- Create `autoresearch.sh` as the canonical benchmark entrypoint; print the primary metric as `METRIC <name>=<number>` and optional secondary metrics as additional `METRIC` lines.
|
|
218
|
+
- Optionally add `autoresearch.checks.sh` if correctness or quality needs a hard gate.
|
|
219
|
+
- Call `init_experiment` with arguments that match `autoresearch.md` exactly (benchmark command, metric, unit, direction, scope paths, off limits, constraints).
|
|
220
|
+
- Run and log the baseline.
|
|
200
221
|
|
|
201
|
-
|
|
202
|
-
- write `autoresearch.sh`
|
|
203
|
-
- optionally write `autoresearch.checks.sh`
|
|
204
|
-
- run `init_experiment`
|
|
205
|
-
- run and log the baseline
|
|
222
|
+
Until `init_experiment` succeeds, only autoresearch control files (`autoresearch.md`, `autoresearch.sh`, `autoresearch.program.md`, `autoresearch.ideas.md`, `autoresearch.checks.sh`) may be edited; after initialization, respect Files in Scope from the contract.
|
|
206
223
|
|
|
207
224
|
{{/if}}
|
|
208
225
|
{{#if has_checks}}
|
|
@@ -34,7 +34,6 @@ export function createExperimentState(): ExperimentState {
|
|
|
34
34
|
scopePaths: [],
|
|
35
35
|
offLimits: [],
|
|
36
36
|
constraints: [],
|
|
37
|
-
segmentFingerprint: null,
|
|
38
37
|
};
|
|
39
38
|
}
|
|
40
39
|
|
|
@@ -203,8 +202,6 @@ export function reconstructStateFromJsonl(workDir: string): ReconstructedExperim
|
|
|
203
202
|
state.scopePaths = cloneStringArray(configEntry.scopePaths);
|
|
204
203
|
state.offLimits = cloneStringArray(configEntry.offLimits);
|
|
205
204
|
state.constraints = cloneStringArray(configEntry.constraints);
|
|
206
|
-
state.segmentFingerprint =
|
|
207
|
-
typeof configEntry.segmentFingerprint === "string" ? configEntry.segmentFingerprint : null;
|
|
208
205
|
state.secondaryMetrics = hydrateMetricDefs(configEntry.secondaryMetrics);
|
|
209
206
|
continue;
|
|
210
207
|
}
|
|
@@ -322,9 +319,6 @@ function parseConfigEntry(value: unknown): AutoresearchJsonConfigEntry | null {
|
|
|
322
319
|
candidate.constraints.filter((item): item is string => typeof item === "string"),
|
|
323
320
|
);
|
|
324
321
|
}
|
|
325
|
-
if (typeof candidate.segmentFingerprint === "string" && candidate.segmentFingerprint.trim().length > 0) {
|
|
326
|
-
config.segmentFingerprint = candidate.segmentFingerprint;
|
|
327
|
-
}
|
|
328
322
|
return config;
|
|
329
323
|
}
|
|
330
324
|
|