npm - @spacek33z/autoauto - Versions diffs - 0.0.1 - Mend

@spacek33z/autoauto 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/README.md +197 -0
package/package.json +51 -0
package/src/App.tsx +224 -0
package/src/cli.ts +772 -0
package/src/components/AgentPanel.tsx +254 -0
package/src/components/Chat.test.tsx +71 -0
package/src/components/Chat.tsx +308 -0
package/src/components/CycleField.tsx +23 -0
package/src/components/ModelPicker.tsx +97 -0
package/src/components/PostUpdatePrompt.tsx +46 -0
package/src/components/ResultsTable.tsx +172 -0
package/src/components/RunCompletePrompt.tsx +90 -0
package/src/components/RunSettingsOverlay.tsx +49 -0
package/src/components/RunsTable.tsx +219 -0
package/src/components/StatsHeader.tsx +100 -0
package/src/daemon.ts +264 -0
package/src/index.tsx +8 -0
package/src/lib/agent/agent-provider.test.ts +133 -0
package/src/lib/agent/claude-provider.ts +277 -0
package/src/lib/agent/codex-provider.ts +413 -0
package/src/lib/agent/default-providers.ts +10 -0
package/src/lib/agent/index.ts +32 -0
package/src/lib/agent/mock-provider.ts +61 -0
package/src/lib/agent/opencode-provider.ts +424 -0
package/src/lib/agent/types.ts +73 -0
package/src/lib/auth.ts +11 -0
package/src/lib/config.ts +152 -0
package/src/lib/daemon-callbacks.ts +59 -0
package/src/lib/daemon-client.ts +16 -0
package/src/lib/daemon-lifecycle.ts +368 -0
package/src/lib/daemon-spawn.ts +122 -0
package/src/lib/daemon-status.ts +189 -0
package/src/lib/daemon-watcher.ts +192 -0
package/src/lib/experiment-loop.ts +679 -0
package/src/lib/experiment.ts +356 -0
package/src/lib/finalize.test.ts +143 -0
package/src/lib/finalize.ts +511 -0
package/src/lib/format.test.ts +32 -0
package/src/lib/format.ts +44 -0
package/src/lib/git.ts +176 -0
package/src/lib/ideas-backlog.test.ts +54 -0
package/src/lib/ideas-backlog.ts +109 -0
package/src/lib/measure.ts +472 -0
package/src/lib/model-options.ts +24 -0
package/src/lib/programs.ts +247 -0
package/src/lib/push-stream.ts +48 -0
package/src/lib/run-context.ts +112 -0
package/src/lib/run-setup.ts +34 -0
package/src/lib/run.ts +383 -0
package/src/lib/syntax-theme.ts +39 -0
package/src/lib/system-prompts/experiment.ts +77 -0
package/src/lib/system-prompts/finalize.ts +90 -0
package/src/lib/system-prompts/index.ts +7 -0
package/src/lib/system-prompts/setup.ts +516 -0
package/src/lib/system-prompts/update.ts +188 -0
package/src/lib/tool-events.ts +99 -0
package/src/lib/validate-measurement.ts +326 -0
package/src/lib/worktree.ts +40 -0
package/src/screens/AuthErrorScreen.tsx +31 -0
package/src/screens/ExecutionScreen.tsx +851 -0
package/src/screens/FirstSetupScreen.tsx +168 -0
package/src/screens/HomeScreen.tsx +406 -0
package/src/screens/PreRunScreen.tsx +206 -0
package/src/screens/SettingsScreen.tsx +189 -0
package/src/screens/SetupScreen.tsx +226 -0
package/src/tui.tsx +17 -0
package/tsconfig.json +17 -0

package/src/lib/experiment.ts ADDED Viewed

@@ -0,0 +1,356 @@
+import { join } from "node:path"
+import type { RunState } from "./run.ts"
+import type { ModelSlot } from "./config.ts"
+import { formatRecentResults, parseLastResult, parseLastKeepResult, parseDiscardedShas, parseSecondaryValues } from "./run.ts"
+import {
+  getFullSha,
+  getRecentLog,
+  getLatestCommitMessage,
+  getFilesChangedBetween,
+  getDiscardedDiffs,
+  getDiffStats,
+  formatShellError,
+  type DiffStats,
+} from "./git.ts"
+import { getProvider, type AgentCost } from "./agent/index.ts"
+import { formatToolEvent } from "./tool-events.ts"
+import {
+  parseExperimentNotes,
+  readIdeasBacklogSummary,
+  type ExperimentNotes,
+} from "./ideas-backlog.ts"
+// --- Types ---
+/** Everything the experiment agent needs to know */
+export interface ContextPacket {
+  experiment: number
+  current_baseline: number
+  original_baseline: number
+  best_metric: number
+  best_experiment: number
+  total_keeps: number
+  total_discards: number
+  metric_field: string
+  direction: "lower" | "higher"
+  program_md: string
+  recent_results: string
+  recent_git_log: string
+  last_outcome: string
+  discarded_diffs: string
+  ideas_backlog: string
+  secondary_metrics?: Record<string, { direction: "lower" | "higher"; last_kept_value?: number }>
+  consecutive_discards: number
+  max_consecutive_discards: number
+  measurement_diagnostics?: string
+}
+/** Cost and usage data from an agent session. */
+export type ExperimentCost = AgentCost
+/** Result of running one experiment agent session */
+export type ExperimentOutcome =
+  | { type: "committed"; sha: string; description: string; files_changed: string[]; diff_stats: DiffStats; cost?: ExperimentCost; notes?: ExperimentNotes }
+  | { type: "no_commit"; cost?: ExperimentCost; notes?: ExperimentNotes }
+  | { type: "agent_error"; error: string; cost?: ExperimentCost; notes?: ExperimentNotes }
+/** Result of checking whether locked files were modified */
+export interface LockViolation {
+  violated: boolean
+  files: string[]
+}
+// --- Context Packet ---
+/** Assembles the context packet from disk for a single experiment. */
+export async function buildContextPacket(
+  cwd: string,
+  programDir: string,
+  runDir: string,
+  state: RunState,
+  config: { metric_field: string; direction: "lower" | "higher"; secondary_metrics?: Record<string, { direction: "lower" | "higher" }> },
+  options: { ideasBacklogEnabled?: boolean; consecutiveDiscards?: number; maxConsecutiveDiscards?: number; measurementDiagnostics?: string } = {},
+): Promise<ContextPacket> {
+  const [programMd, resultsRaw, recentGitLog] = await Promise.all([
+    Bun.file(join(programDir, "program.md")).text(),
+    Bun.file(join(runDir, "results.tsv")).text(),
+    getRecentLog(cwd, 15),
+  ])
+  const ideasBacklog = options.ideasBacklogEnabled === false
+    ? ""
+    : await readIdeasBacklogSummary(runDir)
+  const recentResults = formatRecentResults(resultsRaw, 15)
+  // Build last_outcome from last results.tsv row
+  const lastResult = parseLastResult(resultsRaw)
+  let lastOutcome = "none yet"
+  if (lastResult) {
+    switch (lastResult.status) {
+      case "keep":
+        lastOutcome = `kept: improved to ${lastResult.metric_value} (${lastResult.description})`
+        break
+      case "discard":
+        lastOutcome = `discarded: ${lastResult.metric_value} (${lastResult.description})`
+        break
+      case "crash":
+        lastOutcome = `crashed: ${lastResult.description}`
+        break
+      case "measurement_failure":
+        lastOutcome = `measurement failed: ${lastResult.description}`
+        break
+    }
+  }
+  // Build discarded diffs from recent discarded commits
+  const discardedShas = parseDiscardedShas(resultsRaw, 5)
+  let discardedDiffs = ""
+  if (discardedShas.length > 0) {
+    try {
+      discardedDiffs = await getDiscardedDiffs(cwd, discardedShas, 2000)
+    } catch {
+      // Discarded commits may have been garbage-collected — diffs unavailable
+      discardedDiffs = ""
+    }
+  }
+  let secondaryMetrics: ContextPacket["secondary_metrics"]
+  if (config.secondary_metrics && Object.keys(config.secondary_metrics).length > 0) {
+    secondaryMetrics = {}
+    const lastKeep = parseLastKeepResult(resultsRaw)
+    const lastKeepValues = parseSecondaryValues(lastKeep?.secondary_values)
+    for (const [field, metric] of Object.entries(config.secondary_metrics)) {
+      const currentValue = lastKeepValues.secondary_metrics[field]
+      secondaryMetrics[field] = {
+        direction: metric.direction,
+        last_kept_value: typeof currentValue === "number" ? currentValue : undefined,
+      }
+    }
+  }
+  return {
+    experiment: state.experiment_number,
+    current_baseline: state.current_baseline,
+    original_baseline: state.original_baseline,
+    best_metric: state.best_metric,
+    best_experiment: state.best_experiment,
+    total_keeps: state.total_keeps,
+    total_discards: state.total_discards,
+    metric_field: config.metric_field,
+    direction: config.direction,
+    program_md: programMd,
+    recent_results: recentResults,
+    recent_git_log: recentGitLog,
+    last_outcome: lastOutcome,
+    discarded_diffs: discardedDiffs,
+    ideas_backlog: ideasBacklog,
+    secondary_metrics: secondaryMetrics,
+    consecutive_discards: options.consecutiveDiscards ?? 0,
+    max_consecutive_discards: options.maxConsecutiveDiscards ?? 10,
+    measurement_diagnostics: options.measurementDiagnostics,
+  }
+}
+/** Returns an escalating diversity directive based on how stuck the loop is. */
+function getExplorationDirective(consecutiveDiscards: number, maxConsecutiveDiscards: number): string {
+  if (consecutiveDiscards < 1) return ""
+  // Use proportional thresholds so directives scale with the configured limit
+  const ratio = consecutiveDiscards / maxConsecutiveDiscards
+  if (ratio >= 0.7) {
+    return `## Exploration Directive — CRITICAL
+${consecutiveDiscards} consecutive experiments discarded. Stagnation is imminent (limit: ${maxConsecutiveDiscards}).
+- You MUST try something radically different from everything in the results history.
+- Profile the code mentally and find the ACTUAL bottleneck — not the assumed one. Question fundamental assumptions.
+- If you genuinely cannot find a promising change — EXIT WITHOUT COMMITTING. A no-commit is better than burning another cycle on a doomed approach.`
+  }
+  if (ratio >= 0.5) {
+    return `## Exploration Directive
+${consecutiveDiscards} consecutive experiments discarded. You are likely stuck in a local optimum.
+- STOP trying variations of what's been tried. Every recent approach has failed.
+- Try something orthogonal: a completely different part of the codebase within scope, a different algorithmic family, or a simplification that removes code.
+- Re-read the ideas backlog "next" suggestions — pick the LEAST similar to recent attempts.
+- Remember: simplification keeps are free wins and can open up new optimization paths.`
+  }
+  if (ratio >= 0.3) {
+    return `## Exploration Directive
+${consecutiveDiscards} consecutive experiments discarded. The obvious approaches aren't working.
+- Step back and re-read the hot path from scratch — look for something you've been overlooking.
+- Try an approach from a DIFFERENT category than recent attempts (e.g., if recent tries were algorithmic, try a data-structure change; if recent tries were micro-optimizations, try a structural change).`
+  }
+  return ""
+}
+/** Formats the context packet as the user message string for the agent. */
+export function buildExperimentPrompt(packet: ContextPacket): string {
+  let secondarySection = ""
+  if (packet.secondary_metrics && Object.keys(packet.secondary_metrics).length > 0) {
+    const lines = Object.entries(packet.secondary_metrics).map(([field, m]) => {
+      const val = m.last_kept_value !== undefined ? String(m.last_kept_value) : "unknown"
+      return `- ${field}: ${val} (${m.direction} is better, last kept measurement)`
+    })
+    secondarySection = `
+## Secondary Metrics (advisory — do NOT optimize at the expense of the primary metric)
+${lines.join("\n")}
+`
+  }
+  return `You are experiment ${packet.experiment} of an autoresearch loop.
+## Current State
+- Baseline ${packet.metric_field}: ${packet.current_baseline} (${packet.direction} is better)
+- Original baseline: ${packet.original_baseline}
+- Best achieved: ${packet.best_metric} (experiment #${packet.best_experiment})
+- Total: ${packet.total_keeps} keeps, ${packet.total_discards} discards
+${secondarySection}
+## Last Outcome
+${packet.last_outcome}
+## Recent Results
+\`\`\`
+${packet.recent_results}
+\`\`\`
+## Recent Git History
+\`\`\`
+${packet.recent_git_log}
+\`\`\`
+## Recently Discarded Experiments
+${packet.discarded_diffs || "(none yet)"}
+${packet.measurement_diagnostics ? `
+## Measurement Diagnostics
+Detailed diagnostic output from the last measurement run. Use this to identify exactly which audits, tests, or checks are underperforming — do NOT guess from code inspection alone.
+\`\`\`
+${packet.measurement_diagnostics}
+\`\`\`
+` : ""}
+${packet.ideas_backlog ? `
+## Ideas Backlog
+${packet.ideas_backlog}
+` : ""}
+${getExplorationDirective(packet.consecutive_discards, packet.max_consecutive_discards)}
+Review the recent results and discarded experiments${packet.ideas_backlog ? ", and ideas backlog" : ""} above. Focus on what was tried, why it failed, and what should be tried next.
+Implement ONE change, validate, and commit. Then stop.`
+}
+// --- Lock Violation Detection ---
+/** Checks if any changed files are in the locked .autoauto/ directory. */
+export function checkLockViolation(filesChanged: string[]): LockViolation {
+  const violated = filesChanged.filter((f) => f.startsWith(".autoauto/"))
+  return {
+    violated: violated.length > 0,
+    files: violated,
+  }
+}
+// --- Experiment Agent ---
+/**
+ * Spawns a fresh agent session for one experiment.
+ * One-shot: push one user message, iterate to result, return outcome.
+ */
+export async function runExperimentAgent(
+  cwd: string,
+  systemPrompt: string,
+  userPrompt: string,
+  modelConfig: ModelSlot,
+  startSha: string,
+  onStreamText?: (text: string) => void,
+  onToolStatus?: (status: string) => void,
+  signal?: AbortSignal,
+  maxTurns = 50,
+): Promise<ExperimentOutcome> {
+  const raw = await runExperimentAgentRaw(cwd, systemPrompt, userPrompt, modelConfig, startSha, onStreamText, onToolStatus, signal, maxTurns)
+  return { ...raw.outcome, notes: parseExperimentNotes(raw.assistantText) }
+}
+async function runExperimentAgentRaw(
+  cwd: string,
+  systemPrompt: string,
+  userPrompt: string,
+  modelConfig: ModelSlot,
+  startSha: string,
+  onStreamText?: (text: string) => void,
+  onToolStatus?: (status: string) => void,
+  signal?: AbortSignal,
+  maxTurns = 50,
+): Promise<{ outcome: ExperimentOutcome; assistantText: string }> {
+  if (signal?.aborted) {
+    return { outcome: { type: "agent_error", error: "aborted before start" }, assistantText: "" }
+  }
+  let cost: ExperimentCost | undefined
+  let assistantText = ""
+  try {
+    const session = getProvider(modelConfig.provider).runOnce(userPrompt, {
+      systemPrompt,
+      tools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
+      allowedTools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
+      maxTurns,
+      cwd,
+      model: modelConfig.model,
+      effort: modelConfig.provider !== "opencode" ? modelConfig.effort : undefined,
+      signal,
+    })
+    for await (const event of session) {
+      if (signal?.aborted) break
+      switch (event.type) {
+        case "text_delta":
+          onStreamText?.(event.text)
+          break
+        case "tool_use":
+          onToolStatus?.(formatToolEvent(event.tool, event.input ?? {}))
+          break
+        case "assistant_complete":
+          assistantText += `\n${event.text}`
+          break
+        case "error":
+          return { outcome: { type: "agent_error", error: event.error, cost }, assistantText }
+        case "result":
+          cost = event.cost
+          if (!event.success) {
+            return { outcome: { type: "agent_error", error: event.error ?? "unknown", cost }, assistantText }
+          }
+          break
+      }
+    }
+  } catch (err: unknown) {
+    if (signal?.aborted) {
+      return { outcome: { type: "agent_error", error: "aborted", cost }, assistantText }
+    }
+    return {
+      outcome: { type: "agent_error", error: formatShellError(err), cost },
+      assistantText,
+    }
+  }
+  // Check if the agent produced a commit
+  const endSha = await getFullSha(cwd)
+  if (endSha === startSha) {
+    return { outcome: { type: "no_commit", cost }, assistantText }
+  }
+  const [description, filesChanged, diffStats] = await Promise.all([
+    getLatestCommitMessage(cwd),
+    getFilesChangedBetween(cwd, startSha, endSha),
+    getDiffStats(cwd, startSha, endSha),
+  ])
+  return {
+    outcome: { type: "committed", sha: endSha, description, files_changed: filesChanged, diff_stats: diffStats, cost },
+    assistantText,
+  }
+}

package/src/lib/finalize.test.ts ADDED Viewed

@@ -0,0 +1,143 @@
+import { describe, expect, test } from "bun:test"
+import { extractFinalizeGroups, validateGroups } from "./finalize.ts"
+describe("extractFinalizeGroups", () => {
+  test("extracts valid groups", () => {
+    const text = `Some review text here.
+<finalize_groups>
+[
+  {
+    "name": "lazy-load-images",
+    "title": "perf(images): lazy-load below-fold images",
+    "description": "Added intersection observer",
+    "files": ["src/ImageLoader.tsx", "src/lazy.ts"],
+    "risk": "low"
+  },
+  {
+    "name": "remove-lodash",
+    "title": "refactor: remove lodash dependency",
+    "description": "Replaced with native methods",
+    "files": ["package.json", "src/utils.ts"],
+    "risk": "medium"
+  }
+]
+</finalize_groups>
+More text after.`
+    const groups = extractFinalizeGroups(text)
+    expect(groups).not.toBeNull()
+    expect(groups!.length).toBe(2)
+    expect(groups![0].name).toBe("lazy-load-images")
+    expect(groups![0].files).toEqual(["src/ImageLoader.tsx", "src/lazy.ts"])
+    expect(groups![0].risk).toBe("low")
+    expect(groups![1].name).toBe("remove-lodash")
+    expect(groups![1].risk).toBe("medium")
+  })
+  test("returns null when no XML tags present", () => {
+    expect(extractFinalizeGroups("just some text without tags")).toBeNull()
+  })
+  test("returns null for empty array", () => {
+    expect(extractFinalizeGroups("<finalize_groups>[]</finalize_groups>")).toBeNull()
+  })
+  test("returns null for malformed JSON", () => {
+    expect(extractFinalizeGroups("<finalize_groups>{not json]</finalize_groups>")).toBeNull()
+  })
+  test("returns null when name is missing", () => {
+    const text = `<finalize_groups>[{"title": "fix", "files": ["a.ts"]}]</finalize_groups>`
+    expect(extractFinalizeGroups(text)).toBeNull()
+  })
+  test("returns null when files is empty", () => {
+    const text = `<finalize_groups>[{"name": "a", "title": "fix", "files": []}]</finalize_groups>`
+    expect(extractFinalizeGroups(text)).toBeNull()
+  })
+  test("normalizes group names to kebab-case", () => {
+    const text = `<finalize_groups>[{"name": "My Cool Feature!", "title": "feat", "files": ["a.ts"]}]</finalize_groups>`
+    const groups = extractFinalizeGroups(text)
+    expect(groups![0].name).toBe("my-cool-feature")
+  })
+  test("defaults risk to low when invalid", () => {
+    const text = `<finalize_groups>[{"name": "a", "title": "fix", "files": ["a.ts"], "risk": "extreme"}]</finalize_groups>`
+    const groups = extractFinalizeGroups(text)
+    expect(groups![0].risk).toBe("low")
+  })
+  test("defaults description to empty string when missing", () => {
+    const text = `<finalize_groups>[{"name": "a", "title": "fix", "files": ["a.ts"]}]</finalize_groups>`
+    const groups = extractFinalizeGroups(text)
+    expect(groups![0].description).toBe("")
+  })
+})
+describe("validateGroups", () => {
+  test("validates a correct partition", () => {
+    const groups = [
+      { name: "a", title: "fix a", description: "", files: ["x.ts", "y.ts"], risk: "low" as const },
+      { name: "b", title: "fix b", description: "", files: ["z.ts"], risk: "low" as const },
+    ]
+    const result = validateGroups(groups, ["x.ts", "y.ts", "z.ts"])
+    expect(result.valid).toBe(true)
+  })
+  test("rejects overlapping files", () => {
+    const groups = [
+      { name: "a", title: "fix", description: "", files: ["x.ts"], risk: "low" as const },
+      { name: "b", title: "fix", description: "", files: ["x.ts"], risk: "low" as const },
+    ]
+    const result = validateGroups(groups, ["x.ts"])
+    expect(result.valid).toBe(false)
+    if (!result.valid) expect(result.reason).toContain("x.ts")
+  })
+  test("rejects when files are unassigned", () => {
+    const groups = [
+      { name: "a", title: "fix", description: "", files: ["x.ts"], risk: "low" as const },
+    ]
+    const result = validateGroups(groups, ["x.ts", "y.ts"])
+    expect(result.valid).toBe(false)
+    if (!result.valid) expect(result.reason).toContain("y.ts")
+  })
+  test("strips phantom files silently", () => {
+    const groups = [
+      { name: "a", title: "fix", description: "", files: ["x.ts", "phantom.ts"], risk: "low" as const },
+    ]
+    const result = validateGroups(groups, ["x.ts"])
+    expect(result.valid).toBe(true)
+    if (result.valid) expect(result.groups[0].files).toEqual(["x.ts"])
+  })
+  test("removes groups left empty after phantom stripping", () => {
+    const groups = [
+      { name: "a", title: "fix", description: "", files: ["x.ts"], risk: "low" as const },
+      { name: "b", title: "fix", description: "", files: ["phantom.ts"], risk: "low" as const },
+    ]
+    const result = validateGroups(groups, ["x.ts"])
+    expect(result.valid).toBe(true)
+    if (result.valid) expect(result.groups.length).toBe(1)
+  })
+  test("rejects all-phantom groups", () => {
+    const groups = [
+      { name: "a", title: "fix", description: "", files: ["phantom.ts"], risk: "low" as const },
+    ]
+    const result = validateGroups(groups, ["x.ts"])
+    expect(result.valid).toBe(false)
+  })
+  test("rejects duplicate group names", () => {
+    const groups = [
+      { name: "a", title: "fix", description: "", files: ["x.ts"], risk: "low" as const },
+      { name: "a", title: "fix", description: "", files: ["y.ts"], risk: "low" as const },
+    ]
+    const result = validateGroups(groups, ["x.ts", "y.ts"])
+    expect(result.valid).toBe(false)
+    if (!result.valid) expect(result.reason).toContain("Duplicate")
+  })
+})