@spacek33z/autoauto 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +197 -0
- package/package.json +51 -0
- package/src/App.tsx +224 -0
- package/src/cli.ts +772 -0
- package/src/components/AgentPanel.tsx +254 -0
- package/src/components/Chat.test.tsx +71 -0
- package/src/components/Chat.tsx +308 -0
- package/src/components/CycleField.tsx +23 -0
- package/src/components/ModelPicker.tsx +97 -0
- package/src/components/PostUpdatePrompt.tsx +46 -0
- package/src/components/ResultsTable.tsx +172 -0
- package/src/components/RunCompletePrompt.tsx +90 -0
- package/src/components/RunSettingsOverlay.tsx +49 -0
- package/src/components/RunsTable.tsx +219 -0
- package/src/components/StatsHeader.tsx +100 -0
- package/src/daemon.ts +264 -0
- package/src/index.tsx +8 -0
- package/src/lib/agent/agent-provider.test.ts +133 -0
- package/src/lib/agent/claude-provider.ts +277 -0
- package/src/lib/agent/codex-provider.ts +413 -0
- package/src/lib/agent/default-providers.ts +10 -0
- package/src/lib/agent/index.ts +32 -0
- package/src/lib/agent/mock-provider.ts +61 -0
- package/src/lib/agent/opencode-provider.ts +424 -0
- package/src/lib/agent/types.ts +73 -0
- package/src/lib/auth.ts +11 -0
- package/src/lib/config.ts +152 -0
- package/src/lib/daemon-callbacks.ts +59 -0
- package/src/lib/daemon-client.ts +16 -0
- package/src/lib/daemon-lifecycle.ts +368 -0
- package/src/lib/daemon-spawn.ts +122 -0
- package/src/lib/daemon-status.ts +189 -0
- package/src/lib/daemon-watcher.ts +192 -0
- package/src/lib/experiment-loop.ts +679 -0
- package/src/lib/experiment.ts +356 -0
- package/src/lib/finalize.test.ts +143 -0
- package/src/lib/finalize.ts +511 -0
- package/src/lib/format.test.ts +32 -0
- package/src/lib/format.ts +44 -0
- package/src/lib/git.ts +176 -0
- package/src/lib/ideas-backlog.test.ts +54 -0
- package/src/lib/ideas-backlog.ts +109 -0
- package/src/lib/measure.ts +472 -0
- package/src/lib/model-options.ts +24 -0
- package/src/lib/programs.ts +247 -0
- package/src/lib/push-stream.ts +48 -0
- package/src/lib/run-context.ts +112 -0
- package/src/lib/run-setup.ts +34 -0
- package/src/lib/run.ts +383 -0
- package/src/lib/syntax-theme.ts +39 -0
- package/src/lib/system-prompts/experiment.ts +77 -0
- package/src/lib/system-prompts/finalize.ts +90 -0
- package/src/lib/system-prompts/index.ts +7 -0
- package/src/lib/system-prompts/setup.ts +516 -0
- package/src/lib/system-prompts/update.ts +188 -0
- package/src/lib/tool-events.ts +99 -0
- package/src/lib/validate-measurement.ts +326 -0
- package/src/lib/worktree.ts +40 -0
- package/src/screens/AuthErrorScreen.tsx +31 -0
- package/src/screens/ExecutionScreen.tsx +851 -0
- package/src/screens/FirstSetupScreen.tsx +168 -0
- package/src/screens/HomeScreen.tsx +406 -0
- package/src/screens/PreRunScreen.tsx +206 -0
- package/src/screens/SettingsScreen.tsx +189 -0
- package/src/screens/SetupScreen.tsx +226 -0
- package/src/tui.tsx +17 -0
- package/tsconfig.json +17 -0
|
@@ -0,0 +1,511 @@
|
|
|
1
|
+
import { join } from "node:path"
|
|
2
|
+
import type { ModelSlot } from "./config.ts"
|
|
3
|
+
import type { ProgramConfig } from "./programs.ts"
|
|
4
|
+
import type { RunState, ExperimentResult } from "./run.ts"
|
|
5
|
+
import { readAllResults, getRunStats } from "./run.ts"
|
|
6
|
+
import type { ExperimentCost } from "./experiment.ts"
|
|
7
|
+
import {
|
|
8
|
+
getFullSha,
|
|
9
|
+
getDiffBetween,
|
|
10
|
+
getRecentLog,
|
|
11
|
+
getFilesChangedBetween,
|
|
12
|
+
createGroupBranch,
|
|
13
|
+
checkoutBranch,
|
|
14
|
+
resetHard,
|
|
15
|
+
} from "./git.ts"
|
|
16
|
+
import { getProvider } from "./agent/index.ts"
|
|
17
|
+
import { formatToolEvent } from "./tool-events.ts"
|
|
18
|
+
import { getFinalizeSystemPrompt } from "./system-prompts/index.ts"
|
|
19
|
+
|
|
20
|
+
// --- Types ---
|
|
21
|
+
|
|
22
|
+
export interface ProposedGroup {
|
|
23
|
+
name: string
|
|
24
|
+
title: string
|
|
25
|
+
description: string
|
|
26
|
+
files: string[]
|
|
27
|
+
risk: "low" | "medium" | "high"
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface FinalizeGroupResult {
|
|
31
|
+
name: string
|
|
32
|
+
branchName: string
|
|
33
|
+
commitSha: string
|
|
34
|
+
title: string
|
|
35
|
+
description: string
|
|
36
|
+
files: string[]
|
|
37
|
+
risk: "low" | "medium" | "high"
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface FinalizeResult {
|
|
41
|
+
summary: string
|
|
42
|
+
mode: "grouped" | "summary-only"
|
|
43
|
+
groups: FinalizeGroupResult[]
|
|
44
|
+
cost?: ExperimentCost
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export interface FinalizeCallbacks {
|
|
48
|
+
onStreamText: (text: string) => void
|
|
49
|
+
onToolStatus: (status: string) => void
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// --- Extraction ---
|
|
53
|
+
|
|
54
|
+
const VALID_RISKS = new Set(["low", "medium", "high"])
|
|
55
|
+
|
|
56
|
+
export function extractFinalizeGroups(text: string): ProposedGroup[] | null {
|
|
57
|
+
const match = text.match(/<finalize_groups>\s*([\s\S]*?)\s*<\/finalize_groups>/)
|
|
58
|
+
if (!match) return null
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
const raw = JSON.parse(match[1])
|
|
62
|
+
if (!Array.isArray(raw) || raw.length === 0) return null
|
|
63
|
+
|
|
64
|
+
const groups: ProposedGroup[] = []
|
|
65
|
+
for (const item of raw) {
|
|
66
|
+
if (typeof item !== "object" || item == null) return null
|
|
67
|
+
const { name, title, description, files, risk } = item as Record<string, unknown>
|
|
68
|
+
|
|
69
|
+
if (typeof name !== "string" || !name.trim()) return null
|
|
70
|
+
if (typeof title !== "string" || !title.trim()) return null
|
|
71
|
+
if (!Array.isArray(files) || files.length === 0) return null
|
|
72
|
+
if (!files.every((f): f is string => typeof f === "string" && f.trim().length > 0)) return null
|
|
73
|
+
|
|
74
|
+
groups.push({
|
|
75
|
+
name: normalizeName(name),
|
|
76
|
+
title: title.trim(),
|
|
77
|
+
description: typeof description === "string" ? description.trim() : "",
|
|
78
|
+
files: files.map((f) => f.trim()),
|
|
79
|
+
risk: typeof risk === "string" && VALID_RISKS.has(risk) ? (risk as ProposedGroup["risk"]) : "low",
|
|
80
|
+
})
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return groups
|
|
84
|
+
} catch {
|
|
85
|
+
return null
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function normalizeName(raw: string): string {
|
|
90
|
+
return raw
|
|
91
|
+
.toLowerCase()
|
|
92
|
+
.replace(/[^a-z0-9-]/g, "-")
|
|
93
|
+
.replace(/-+/g, "-")
|
|
94
|
+
.replace(/^-|-$/g, "")
|
|
95
|
+
.slice(0, 30)
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// --- Validation ---
|
|
99
|
+
|
|
100
|
+
export interface ValidatedGroups {
|
|
101
|
+
valid: true
|
|
102
|
+
groups: ProposedGroup[]
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export interface ValidationError {
|
|
106
|
+
valid: false
|
|
107
|
+
reason: string
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export function validateGroups(
|
|
111
|
+
proposedGroups: ProposedGroup[],
|
|
112
|
+
changedFiles: string[],
|
|
113
|
+
): ValidatedGroups | ValidationError {
|
|
114
|
+
const changedSet = new Set(changedFiles)
|
|
115
|
+
const fileToGroup = new Map<string, string>()
|
|
116
|
+
|
|
117
|
+
// Check for overlaps and strip phantom files
|
|
118
|
+
for (const group of proposedGroups) {
|
|
119
|
+
const validFiles: string[] = []
|
|
120
|
+
for (const file of group.files) {
|
|
121
|
+
if (!changedSet.has(file)) continue // strip phantom files silently
|
|
122
|
+
if (fileToGroup.has(file)) {
|
|
123
|
+
return { valid: false, reason: `File "${file}" assigned to both "${fileToGroup.get(file)}" and "${group.name}"` }
|
|
124
|
+
}
|
|
125
|
+
fileToGroup.set(file, group.name)
|
|
126
|
+
validFiles.push(file)
|
|
127
|
+
}
|
|
128
|
+
group.files = validFiles
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Remove empty groups after phantom stripping
|
|
132
|
+
const nonEmpty = proposedGroups.filter((g) => g.files.length > 0)
|
|
133
|
+
if (nonEmpty.length === 0) {
|
|
134
|
+
return { valid: false, reason: "All groups empty after removing unrecognized file paths" }
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Check coverage — all changed files must be assigned
|
|
138
|
+
const unassigned = changedFiles.filter((f) => !fileToGroup.has(f))
|
|
139
|
+
if (unassigned.length > 0) {
|
|
140
|
+
return { valid: false, reason: `Files not assigned to any group: ${unassigned.slice(0, 5).join(", ")}${unassigned.length > 5 ? ` (+${unassigned.length - 5} more)` : ""}` }
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Check unique group names
|
|
144
|
+
const names = new Set<string>()
|
|
145
|
+
for (const group of nonEmpty) {
|
|
146
|
+
if (names.has(group.name)) {
|
|
147
|
+
return { valid: false, reason: `Duplicate group name: "${group.name}"` }
|
|
148
|
+
}
|
|
149
|
+
names.add(group.name)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return { valid: true, groups: nonEmpty }
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// --- Prompt Building ---
|
|
156
|
+
|
|
157
|
+
const MAX_DIFF_LENGTH = 50_000
|
|
158
|
+
|
|
159
|
+
export async function buildFinalizePrompt(
|
|
160
|
+
state: RunState,
|
|
161
|
+
results: ExperimentResult[],
|
|
162
|
+
projectRoot: string,
|
|
163
|
+
config: ProgramConfig,
|
|
164
|
+
changedFiles: string[],
|
|
165
|
+
): Promise<string> {
|
|
166
|
+
const [diff, gitLog] = await Promise.all([
|
|
167
|
+
getDiffBetween(projectRoot, state.original_baseline_sha, "HEAD"),
|
|
168
|
+
getRecentLog(projectRoot, 50),
|
|
169
|
+
])
|
|
170
|
+
|
|
171
|
+
const stats = getRunStats(state, config.direction)
|
|
172
|
+
|
|
173
|
+
const resultsSummary = results
|
|
174
|
+
.map((r) => `#${r.experiment_number}\t${r.status}\t${r.metric_value}\t${r.description}`)
|
|
175
|
+
.join("\n")
|
|
176
|
+
|
|
177
|
+
let diffSection: string
|
|
178
|
+
if (diff.length > MAX_DIFF_LENGTH) {
|
|
179
|
+
diffSection = diff.slice(0, MAX_DIFF_LENGTH) +
|
|
180
|
+
`\n\n... (diff truncated at ${MAX_DIFF_LENGTH} chars — use \`git diff ${state.original_baseline_sha} HEAD\` for the full output)`
|
|
181
|
+
} else {
|
|
182
|
+
diffSection = diff
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return `You are reviewing an AutoAuto experiment run for program "${state.program_slug}".
|
|
186
|
+
|
|
187
|
+
## Run Statistics
|
|
188
|
+
- Total experiments: ${stats.total_experiments} (${stats.total_keeps} kept, ${stats.total_discards} discarded, ${stats.total_crashes} crashed)
|
|
189
|
+
- Original baseline: ${state.original_baseline}
|
|
190
|
+
- Current metric: ${state.current_baseline}
|
|
191
|
+
- Best metric: ${state.best_metric} (experiment #${state.best_experiment})
|
|
192
|
+
- Improvement: ${stats.improvement_pct.toFixed(1)}%
|
|
193
|
+
- Branch: ${state.branch_name}
|
|
194
|
+
- Baseline SHA: ${state.original_baseline_sha.slice(0, 10)}
|
|
195
|
+
|
|
196
|
+
## Experiment Results
|
|
197
|
+
\`\`\`
|
|
198
|
+
${resultsSummary}
|
|
199
|
+
\`\`\`
|
|
200
|
+
|
|
201
|
+
## Changed Files
|
|
202
|
+
These are the files that changed between the baseline and current HEAD. Each file must appear in exactly one group — do not invent file paths.
|
|
203
|
+
\`\`\`
|
|
204
|
+
${changedFiles.join("\n")}
|
|
205
|
+
\`\`\`
|
|
206
|
+
|
|
207
|
+
## Git History
|
|
208
|
+
\`\`\`
|
|
209
|
+
${gitLog}
|
|
210
|
+
\`\`\`
|
|
211
|
+
|
|
212
|
+
## Full Diff (baseline → current)
|
|
213
|
+
\`\`\`diff
|
|
214
|
+
${diffSection}
|
|
215
|
+
\`\`\`
|
|
216
|
+
|
|
217
|
+
Review these changes, group them into logical changesets, and produce your structured summary. Use \`git show <sha>\` to inspect individual experiment commits if needed.`
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// --- Agent Runner ---
|
|
221
|
+
|
|
222
|
+
async function runFinalizeAgent(
|
|
223
|
+
projectRoot: string,
|
|
224
|
+
systemPrompt: string,
|
|
225
|
+
userPrompt: string,
|
|
226
|
+
modelConfig: ModelSlot,
|
|
227
|
+
callbacks: FinalizeCallbacks,
|
|
228
|
+
signal?: AbortSignal,
|
|
229
|
+
): Promise<{ summary: string; cost?: ExperimentCost }> {
|
|
230
|
+
if (signal?.aborted) {
|
|
231
|
+
return { summary: "" }
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
let fullText = ""
|
|
235
|
+
let cost: ExperimentCost | undefined
|
|
236
|
+
|
|
237
|
+
try {
|
|
238
|
+
const session = getProvider(modelConfig.provider).runOnce(userPrompt, {
|
|
239
|
+
systemPrompt,
|
|
240
|
+
tools: ["Read", "Bash", "Glob", "Grep"],
|
|
241
|
+
allowedTools: ["Read", "Bash", "Glob", "Grep"],
|
|
242
|
+
maxTurns: 10,
|
|
243
|
+
cwd: projectRoot,
|
|
244
|
+
model: modelConfig.model,
|
|
245
|
+
effort: modelConfig.provider !== "opencode" ? modelConfig.effort : undefined,
|
|
246
|
+
signal,
|
|
247
|
+
})
|
|
248
|
+
|
|
249
|
+
for await (const event of session) {
|
|
250
|
+
if (signal?.aborted) break
|
|
251
|
+
|
|
252
|
+
switch (event.type) {
|
|
253
|
+
case "text_delta":
|
|
254
|
+
fullText += event.text
|
|
255
|
+
callbacks.onStreamText(event.text)
|
|
256
|
+
break
|
|
257
|
+
case "tool_use":
|
|
258
|
+
callbacks.onToolStatus(formatToolEvent(event.tool, event.input ?? {}))
|
|
259
|
+
break
|
|
260
|
+
case "result":
|
|
261
|
+
cost = event.cost
|
|
262
|
+
break
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
} catch (err: unknown) {
|
|
266
|
+
if (signal?.aborted) {
|
|
267
|
+
return { summary: fullText, cost }
|
|
268
|
+
}
|
|
269
|
+
throw err
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
return { summary: fullText, cost }
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// --- Report Generation ---
|
|
276
|
+
|
|
277
|
+
function formatDuration(startIso: string, endIso: string): string {
|
|
278
|
+
const ms = new Date(endIso).getTime() - new Date(startIso).getTime()
|
|
279
|
+
const totalSec = Math.floor(ms / 1000)
|
|
280
|
+
const h = Math.floor(totalSec / 3600)
|
|
281
|
+
const m = Math.floor((totalSec % 3600) / 60)
|
|
282
|
+
const s = totalSec % 60
|
|
283
|
+
if (h > 0) return `${h}h ${m}m ${s}s`
|
|
284
|
+
if (m > 0) return `${m}m ${s}s`
|
|
285
|
+
return `${s}s`
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
function stripGroupsXml(text: string): string {
|
|
289
|
+
return text.replace(/<finalize_groups>[\s\S]*?<\/finalize_groups>/g, "").trim()
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
function computeChangePct(baseline: number, value: number, direction: "lower" | "higher"): string {
|
|
293
|
+
if (baseline === 0) return "N/A"
|
|
294
|
+
const pct = ((value - baseline) / Math.abs(baseline)) * 100
|
|
295
|
+
const sign = pct > 0 ? "+" : ""
|
|
296
|
+
const label = (direction === "higher" ? pct > 0 : pct < 0) ? "improved" : "regressed"
|
|
297
|
+
return `${sign}${pct.toFixed(1)}% (${label})`
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
export function generateSummaryReport(
|
|
301
|
+
state: RunState,
|
|
302
|
+
results: ExperimentResult[],
|
|
303
|
+
config: ProgramConfig,
|
|
304
|
+
agentReview: string,
|
|
305
|
+
groups?: FinalizeGroupResult[],
|
|
306
|
+
cost?: ExperimentCost,
|
|
307
|
+
): string {
|
|
308
|
+
const stats = getRunStats(state, config.direction)
|
|
309
|
+
const strippedReview = stripGroupsXml(agentReview)
|
|
310
|
+
|
|
311
|
+
const lines: string[] = []
|
|
312
|
+
|
|
313
|
+
lines.push(`# Run Summary: ${state.program_slug}`)
|
|
314
|
+
lines.push("")
|
|
315
|
+
|
|
316
|
+
lines.push("## Overview")
|
|
317
|
+
lines.push(`- **Branch:** ${state.branch_name}`)
|
|
318
|
+
lines.push(`- **Started:** ${state.started_at}`)
|
|
319
|
+
lines.push(`- **Duration:** ${formatDuration(state.started_at, state.updated_at)}`)
|
|
320
|
+
lines.push(`- **Baseline SHA:** ${state.original_baseline_sha.slice(0, 10)}`)
|
|
321
|
+
if (cost) {
|
|
322
|
+
lines.push(`- **Total cost:** $${cost.total_cost_usd.toFixed(2)}`)
|
|
323
|
+
}
|
|
324
|
+
lines.push("")
|
|
325
|
+
|
|
326
|
+
const improvementSign = stats.improvement_pct > 0 ? "+" : ""
|
|
327
|
+
lines.push("## Statistics")
|
|
328
|
+
lines.push("")
|
|
329
|
+
lines.push("| Metric | Value |")
|
|
330
|
+
lines.push("|--------|-------|")
|
|
331
|
+
lines.push(`| Total experiments | ${stats.total_experiments} |`)
|
|
332
|
+
lines.push(`| Kept | ${stats.total_keeps} |`)
|
|
333
|
+
lines.push(`| Discarded | ${stats.total_discards} |`)
|
|
334
|
+
lines.push(`| Crashed | ${stats.total_crashes} |`)
|
|
335
|
+
lines.push(`| Keep rate | ${(stats.keep_rate * 100).toFixed(0)}% |`)
|
|
336
|
+
lines.push(`| Original baseline | ${state.original_baseline} |`)
|
|
337
|
+
lines.push(`| Best metric | ${state.best_metric} (${improvementSign}${stats.improvement_pct.toFixed(1)}%) |`)
|
|
338
|
+
lines.push("")
|
|
339
|
+
|
|
340
|
+
// Skip baseline row #0
|
|
341
|
+
const experiments = results.filter((r) => r.experiment_number > 0)
|
|
342
|
+
if (experiments.length > 0) {
|
|
343
|
+
lines.push("## Metric Timeline")
|
|
344
|
+
lines.push("")
|
|
345
|
+
lines.push("| # | Commit | Metric | Status | Description |")
|
|
346
|
+
lines.push("|---|--------|--------|--------|-------------|")
|
|
347
|
+
for (const r of experiments) {
|
|
348
|
+
const metric = r.metric_value != null ? String(r.metric_value) : "-"
|
|
349
|
+
const desc = r.description.length > 60 ? `${r.description.slice(0, 57)}...` : r.description
|
|
350
|
+
lines.push(`| ${r.experiment_number} | ${r.commit.slice(0, 7)} | ${metric} | ${r.status} | ${desc} |`)
|
|
351
|
+
}
|
|
352
|
+
lines.push("")
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
const kept = experiments.filter((r) => r.status === "keep")
|
|
356
|
+
if (kept.length > 0) {
|
|
357
|
+
lines.push("## Kept Changes")
|
|
358
|
+
lines.push("")
|
|
359
|
+
for (const r of kept) {
|
|
360
|
+
lines.push(`### Experiment #${r.experiment_number}: ${r.description}`)
|
|
361
|
+
lines.push(`- **Commit:** ${r.commit}`)
|
|
362
|
+
lines.push(`- **Metric:** ${r.metric_value} (${computeChangePct(state.original_baseline, r.metric_value, config.direction)})`)
|
|
363
|
+
lines.push("")
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// Per-group section (grouped finalize only)
|
|
368
|
+
if (groups && groups.length > 0) {
|
|
369
|
+
lines.push("## Finalize Groups")
|
|
370
|
+
lines.push("")
|
|
371
|
+
for (let i = 0; i < groups.length; i++) {
|
|
372
|
+
const g = groups[i]
|
|
373
|
+
lines.push(`### ${i + 1}. ${g.name}`)
|
|
374
|
+
lines.push(`- **Branch:** \`${g.branchName}\``)
|
|
375
|
+
lines.push(`- **Commit:** ${g.commitSha.slice(0, 7)}`)
|
|
376
|
+
lines.push(`- **Risk:** ${g.risk}`)
|
|
377
|
+
lines.push(`- **Files:** ${g.files.join(", ")}`)
|
|
378
|
+
if (g.description) lines.push(`- ${g.description}`)
|
|
379
|
+
lines.push("")
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
if (strippedReview) {
|
|
384
|
+
lines.push("## Agent Review")
|
|
385
|
+
lines.push("")
|
|
386
|
+
lines.push(strippedReview)
|
|
387
|
+
lines.push("")
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
lines.push("---")
|
|
391
|
+
lines.push("*Generated by AutoAuto*")
|
|
392
|
+
|
|
393
|
+
return lines.join("\n")
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// --- Main Entry Point ---
|
|
397
|
+
|
|
398
|
+
export async function runFinalize(
|
|
399
|
+
projectRoot: string,
|
|
400
|
+
programSlug: string,
|
|
401
|
+
runDir: string,
|
|
402
|
+
state: RunState,
|
|
403
|
+
config: ProgramConfig,
|
|
404
|
+
modelConfig: ModelSlot,
|
|
405
|
+
callbacks: FinalizeCallbacks,
|
|
406
|
+
signal?: AbortSignal,
|
|
407
|
+
worktreePath?: string,
|
|
408
|
+
): Promise<FinalizeResult> {
|
|
409
|
+
const gitCwd = worktreePath ?? projectRoot
|
|
410
|
+
|
|
411
|
+
// Save HEAD before any modifications
|
|
412
|
+
const savedHead = await getFullSha(gitCwd)
|
|
413
|
+
const preAgentSha = savedHead
|
|
414
|
+
|
|
415
|
+
const results = await readAllResults(runDir)
|
|
416
|
+
const changedFiles = await getFilesChangedBetween(gitCwd, state.original_baseline_sha, "HEAD")
|
|
417
|
+
|
|
418
|
+
const systemPrompt = getFinalizeSystemPrompt()
|
|
419
|
+
const userPrompt = await buildFinalizePrompt(state, results, gitCwd, config, changedFiles)
|
|
420
|
+
|
|
421
|
+
const { summary, cost } = await runFinalizeAgent(
|
|
422
|
+
gitCwd,
|
|
423
|
+
systemPrompt,
|
|
424
|
+
userPrompt,
|
|
425
|
+
modelConfig,
|
|
426
|
+
callbacks,
|
|
427
|
+
signal,
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
// Verify agent didn't modify the repo
|
|
431
|
+
const postAgentSha = await getFullSha(gitCwd)
|
|
432
|
+
if (postAgentSha !== preAgentSha) {
|
|
433
|
+
throw new Error("Finalize agent modified the repository. Aborting.")
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// Try grouped finalize if there are keeps and enough changed files
|
|
437
|
+
if (state.total_keeps > 0 && changedFiles.length > 0) {
|
|
438
|
+
const proposed = extractFinalizeGroups(summary)
|
|
439
|
+
|
|
440
|
+
if (proposed && proposed.length > 1) {
|
|
441
|
+
const validation = validateGroups(proposed, changedFiles)
|
|
442
|
+
|
|
443
|
+
if (validation.valid) {
|
|
444
|
+
// Create group branches
|
|
445
|
+
const createdGroups: FinalizeGroupResult[] = []
|
|
446
|
+
try {
|
|
447
|
+
for (const group of validation.groups) {
|
|
448
|
+
const branchName = `autoauto-${programSlug}-${state.run_id}-${group.name}`.slice(0, 100)
|
|
449
|
+
const commitSha = await createGroupBranch(
|
|
450
|
+
gitCwd,
|
|
451
|
+
branchName,
|
|
452
|
+
state.original_baseline_sha,
|
|
453
|
+
savedHead,
|
|
454
|
+
group.files,
|
|
455
|
+
group.title,
|
|
456
|
+
)
|
|
457
|
+
createdGroups.push({
|
|
458
|
+
name: group.name,
|
|
459
|
+
branchName,
|
|
460
|
+
commitSha,
|
|
461
|
+
title: group.title,
|
|
462
|
+
description: group.description,
|
|
463
|
+
files: group.files,
|
|
464
|
+
risk: group.risk,
|
|
465
|
+
})
|
|
466
|
+
}
|
|
467
|
+
} catch {
|
|
468
|
+
// Partial failure — restore worktree to original state
|
|
469
|
+
// Already-created group branches are kept (they're valid refs)
|
|
470
|
+
await checkoutBranch(gitCwd, state.branch_name).catch(() => {})
|
|
471
|
+
await resetHard(gitCwd, savedHead).catch(() => {})
|
|
472
|
+
|
|
473
|
+
if (createdGroups.length === 0) {
|
|
474
|
+
// Total failure — fall through to summary-only below
|
|
475
|
+
return summaryOnly(state, results, config, summary, runDir, cost)
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// Partial success — return what we have
|
|
479
|
+
const report = generateSummaryReport(state, results, config, summary, createdGroups, cost)
|
|
480
|
+
await Bun.write(join(runDir, "summary.md"), report)
|
|
481
|
+
return { summary: report, mode: "grouped", groups: createdGroups, cost }
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// Restore worktree to original experiment branch
|
|
485
|
+
await checkoutBranch(gitCwd, state.branch_name).catch(() => {})
|
|
486
|
+
await resetHard(gitCwd, savedHead).catch(() => {})
|
|
487
|
+
|
|
488
|
+
const report = generateSummaryReport(state, results, config, summary, createdGroups, cost)
|
|
489
|
+
await Bun.write(join(runDir, "summary.md"), report)
|
|
490
|
+
return { summary: report, mode: "grouped", groups: createdGroups, cost }
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// Summary-only — no grouping possible, just generate the report
|
|
496
|
+
return summaryOnly(state, results, config, summary, runDir, cost)
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
async function summaryOnly(
|
|
500
|
+
state: RunState,
|
|
501
|
+
results: ExperimentResult[],
|
|
502
|
+
config: ProgramConfig,
|
|
503
|
+
agentSummary: string,
|
|
504
|
+
runDir: string,
|
|
505
|
+
cost?: ExperimentCost,
|
|
506
|
+
): Promise<FinalizeResult> {
|
|
507
|
+
const report = generateSummaryReport(state, results, config, agentSummary, undefined, cost)
|
|
508
|
+
await Bun.write(join(runDir, "summary.md"), report)
|
|
509
|
+
|
|
510
|
+
return { summary: report, mode: "summary-only", groups: [], cost }
|
|
511
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test"
|
|
2
|
+
import { allocateColumnWidths, formatCell, padRight, truncate } from "./format.ts"
|
|
3
|
+
|
|
4
|
+
describe("format helpers", () => {
|
|
5
|
+
test("handles zero and one character widths", () => {
|
|
6
|
+
expect(padRight("abc", 0)).toBe("")
|
|
7
|
+
expect(truncate("abc", 0)).toBe("")
|
|
8
|
+
expect(truncate("abc", 1)).toBe("…")
|
|
9
|
+
expect(formatCell("abc", 0)).toBe("")
|
|
10
|
+
})
|
|
11
|
+
|
|
12
|
+
test("shrinks columns to available width", () => {
|
|
13
|
+
expect(allocateColumnWidths(12, [
|
|
14
|
+
{ ideal: 4, min: 2 },
|
|
15
|
+
{ ideal: 9, min: 0 },
|
|
16
|
+
{ ideal: 12, min: 0 },
|
|
17
|
+
])).toEqual([4, 8, 0])
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
test("preserves ideal widths when they fit", () => {
|
|
21
|
+
expect(allocateColumnWidths(8, [
|
|
22
|
+
{ ideal: 2 },
|
|
23
|
+
{ ideal: 3 },
|
|
24
|
+
])).toEqual([2, 3])
|
|
25
|
+
})
|
|
26
|
+
|
|
27
|
+
test("does not grow columns when min exceeds ideal", () => {
|
|
28
|
+
expect(allocateColumnWidths(0, [
|
|
29
|
+
{ ideal: 1, min: 4 },
|
|
30
|
+
])).toEqual([0])
|
|
31
|
+
})
|
|
32
|
+
})
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/** Pad a string to a fixed width, truncating if too long. */
|
|
2
|
+
export function padRight(str: string, width: number): string {
|
|
3
|
+
if (width <= 0) return ""
|
|
4
|
+
return str.length >= width ? str.slice(0, width) : str + " ".repeat(width - str.length)
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
/** Truncate a string with an ellipsis if it exceeds maxLen. */
|
|
8
|
+
export function truncate(str: string, maxLen: number): string {
|
|
9
|
+
if (maxLen <= 0) return ""
|
|
10
|
+
if (maxLen === 1) return str.length > 1 ? "…" : str
|
|
11
|
+
return str.length > maxLen ? str.slice(0, maxLen - 1) + "…" : str
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface ColumnSpec {
|
|
15
|
+
ideal: number
|
|
16
|
+
min?: number
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function allocateColumnWidths(available: number, specs: ColumnSpec[]): number[] {
|
|
20
|
+
const target = Math.max(available, 0)
|
|
21
|
+
const widths = specs.map((spec) => Math.max(spec.ideal, 0))
|
|
22
|
+
let overflow = widths.reduce((sum, width) => sum + width, 0) - target
|
|
23
|
+
|
|
24
|
+
if (overflow <= 0) return widths
|
|
25
|
+
|
|
26
|
+
for (let i = widths.length - 1; i >= 0 && overflow > 0; i--) {
|
|
27
|
+
const min = Math.max(specs[i].min ?? 0, 0)
|
|
28
|
+
const shrink = Math.min(Math.max(widths[i] - min, 0), overflow)
|
|
29
|
+
widths[i] -= shrink
|
|
30
|
+
overflow -= shrink
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
for (let i = widths.length - 1; i >= 0 && overflow > 0; i--) {
|
|
34
|
+
const shrink = Math.min(widths[i], overflow)
|
|
35
|
+
widths[i] -= shrink
|
|
36
|
+
overflow -= shrink
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return widths
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function formatCell(str: string, width: number): string {
|
|
43
|
+
return padRight(truncate(str, width), width)
|
|
44
|
+
}
|