@axplusb/kepler 2.0.0 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@axplusb/kepler",
3
- "version": "2.0.0",
3
+ "version": "2.0.3",
4
4
  "description": "Kepler — AI coding agent with operating brief, preflight planning, and sub-agents. SWE-bench Lite evaluated.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,113 @@
1
+ import { NextResponse } from 'next/server'
2
+ import { readFileSync } from 'fs'
3
+ import { join } from 'path'
4
+
5
+ export const dynamic = 'force-dynamic'
6
+
7
+ interface BenchmarkResult {
8
+ instance_id: string
9
+ repo: string
10
+ base_commit: string
11
+ test_patch: string
12
+ resolved: boolean
13
+ test_result: {
14
+ result: string[]
15
+ exit_code: number
16
+ }
17
+ metadata: {
18
+ agent_class: string
19
+ model_name: string
20
+ max_iterations: number
21
+ eval_history: Array<{
22
+ timestamp: string
23
+ action: string
24
+ observation: string
25
+ }>
26
+ submission: string
27
+ instance_id: string
28
+ predict_output: string
29
+ model_patch: string
30
+ test_result: {
31
+ result: string[]
32
+ exit_code: number
33
+ }
34
+ }
35
+ }
36
+
37
+ interface BenchmarkData {
38
+ results: BenchmarkResult[]
39
+ }
40
+
41
+ export async function GET(request: Request) {
42
+ try {
43
+ const { searchParams } = new URL(request.url)
44
+ const run = searchParams.get('run') || 'swebench-v4-flash-300'
45
+ const limit = parseInt(searchParams.get('limit') || '50', 10)
46
+ const offset = parseInt(searchParams.get('offset') || '0', 10)
47
+
48
+ // Load benchmark results from file
49
+ const resultsPath = join(
50
+ process.cwd(),
51
+ '..',
52
+ 'benchmark',
53
+ 'results',
54
+ 'runs',
55
+ run,
56
+ 'harness-results.json'
57
+ )
58
+
59
+ let data: BenchmarkData
60
+ try {
61
+ const fileContent = readFileSync(resultsPath, 'utf-8')
62
+ data = JSON.parse(fileContent)
63
+ } catch (error) {
64
+ return NextResponse.json(
65
+ { error: `Benchmark run "${run}" not found` },
66
+ { status: 404 }
67
+ )
68
+ }
69
+
70
+ // Calculate statistics
71
+ const results = data.results || []
72
+ const totalTests = results.length
73
+ const resolvedTests = results.filter((r) => r.resolved).length
74
+ const passRate = totalTests > 0 ? (resolvedTests / totalTests) * 100 : 0
75
+
76
+ // Group by repo
77
+ const byRepo = new Map<string, number>()
78
+ const byRepoResolved = new Map<string, number>()
79
+ for (const result of results) {
80
+ const repo = result.repo || 'unknown'
81
+ byRepo.set(repo, (byRepo.get(repo) || 0) + 1)
82
+ if (result.resolved) {
83
+ byRepoResolved.set(repo, (byRepoResolved.get(repo) || 0) + 1)
84
+ }
85
+ }
86
+
87
+ // Paginate results
88
+ const paginatedResults = results.slice(offset, offset + limit)
89
+
90
+ return NextResponse.json({
91
+ run,
92
+ stats: {
93
+ totalTests,
94
+ resolvedTests,
95
+ passRate: parseFloat(passRate.toFixed(2)),
96
+ byRepo: Object.fromEntries(byRepo),
97
+ byRepoResolved: Object.fromEntries(byRepoResolved),
98
+ },
99
+ pagination: {
100
+ limit,
101
+ offset,
102
+ total: totalTests,
103
+ },
104
+ results: paginatedResults,
105
+ })
106
+ } catch (error) {
107
+ console.error('Benchmark API error:', error)
108
+ return NextResponse.json(
109
+ { error: 'Failed to load benchmark data' },
110
+ { status: 500 }
111
+ )
112
+ }
113
+ }
@@ -0,0 +1,195 @@
1
+ import { NextResponse } from 'next/server'
2
+ import fs from 'fs'
3
+ import path from 'path'
4
+
5
+ export const dynamic = 'force-dynamic'
6
+
7
+ interface BenchmarkResult {
8
+ instance_id: string
9
+ repo: string
10
+ model: string
11
+ timestamp: string
12
+ kepler: {
13
+ status: string
14
+ exit_code: number
15
+ duration_seconds: number
16
+ tokens_used: number
17
+ cost: number
18
+ tool_calls: number
19
+ sub_agents: string[]
20
+ }
21
+ patch_lines: number
22
+ model_patch: string
23
+ status: string
24
+ }
25
+
26
+ interface BenchmarkStats {
27
+ total_runs: number
28
+ passed: number
29
+ failed: number
30
+ error: number
31
+ success_rate: number
32
+ avg_duration: number
33
+ total_cost: number
34
+ total_tokens: number
35
+ avg_tokens_per_run: number
36
+ by_status: Record<string, number>
37
+ by_repo: Record<string, { count: number; passed: number; success_rate: number }>
38
+ by_model: Record<string, { count: number; passed: number; success_rate: number }>
39
+ }
40
+
41
+ async function loadBenchmarkResults(): Promise<BenchmarkResult[]> {
42
+ try {
43
+ const resultsPath = path.join(
44
+ process.cwd(),
45
+ 'benchmark/results/runs/swebench-v4-flash-300/harness-results.json'
46
+ )
47
+
48
+ if (!fs.existsSync(resultsPath)) {
49
+ return []
50
+ }
51
+
52
+ const data = JSON.parse(fs.readFileSync(resultsPath, 'utf-8'))
53
+ return data.results || []
54
+ } catch (error) {
55
+ console.error('Error loading benchmark results:', error)
56
+ return []
57
+ }
58
+ }
59
+
60
+ function calculateStats(results: BenchmarkResult[]): BenchmarkStats {
61
+ if (results.length === 0) {
62
+ return {
63
+ total_runs: 0,
64
+ passed: 0,
65
+ failed: 0,
66
+ error: 0,
67
+ success_rate: 0,
68
+ avg_duration: 0,
69
+ total_cost: 0,
70
+ total_tokens: 0,
71
+ avg_tokens_per_run: 0,
72
+ by_status: {},
73
+ by_repo: {},
74
+ by_model: {},
75
+ }
76
+ }
77
+
78
+ const by_status: Record<string, number> = {}
79
+ const by_repo: Record<string, { count: number; passed: number }> = {}
80
+ const by_model: Record<string, { count: number; passed: number }> = {}
81
+
82
+ let total_cost = 0
83
+ let total_tokens = 0
84
+ let total_duration = 0
85
+ let passed = 0
86
+
87
+ results.forEach((result) => {
88
+ // Count by status
89
+ by_status[result.status] = (by_status[result.status] || 0) + 1
90
+
91
+ // Count by repo
92
+ if (!by_repo[result.repo]) {
93
+ by_repo[result.repo] = { count: 0, passed: 0 }
94
+ }
95
+ by_repo[result.repo].count++
96
+
97
+ // Count by model
98
+ if (!by_model[result.model]) {
99
+ by_model[result.model] = { count: 0, passed: 0 }
100
+ }
101
+ by_model[result.model].count++
102
+
103
+ // Aggregate metrics
104
+ if (result.kepler) {
105
+ total_cost += result.kepler.cost || 0
106
+ total_tokens += result.kepler.tokens_used || 0
107
+ total_duration += result.kepler.duration_seconds || 0
108
+
109
+ if (result.kepler.status === 'success') {
110
+ passed++
111
+ by_repo[result.repo].passed++
112
+ by_model[result.model].passed++
113
+ }
114
+ }
115
+ })
116
+
117
+ // Calculate success rates
118
+ const by_repo_with_rates = Object.entries(by_repo).reduce(
119
+ (acc, [repo, data]) => {
120
+ acc[repo] = {
121
+ ...data,
122
+ success_rate: data.count > 0 ? (data.passed / data.count) * 100 : 0,
123
+ }
124
+ return acc
125
+ },
126
+ {} as Record<string, { count: number; passed: number; success_rate: number }>
127
+ )
128
+
129
+ const by_model_with_rates = Object.entries(by_model).reduce(
130
+ (acc, [model, data]) => {
131
+ acc[model] = {
132
+ ...data,
133
+ success_rate: data.count > 0 ? (data.passed / data.count) * 100 : 0,
134
+ }
135
+ return acc
136
+ },
137
+ {} as Record<string, { count: number; passed: number; success_rate: number }>
138
+ )
139
+
140
+ return {
141
+ total_runs: results.length,
142
+ passed,
143
+ failed: by_status['failed'] || 0,
144
+ error: by_status['error'] || 0,
145
+ success_rate: (passed / results.length) * 100,
146
+ avg_duration: total_duration / results.length,
147
+ total_cost,
148
+ total_tokens,
149
+ avg_tokens_per_run: total_tokens / results.length,
150
+ by_status,
151
+ by_repo: by_repo_with_rates,
152
+ by_model: by_model_with_rates,
153
+ }
154
+ }
155
+
156
+ export async function GET(request: Request) {
157
+ const { searchParams } = new URL(request.url)
158
+ const format = searchParams.get('format') || 'summary'
159
+ const repo = searchParams.get('repo')
160
+ const model = searchParams.get('model')
161
+ const status = searchParams.get('status')
162
+
163
+ const results = await loadBenchmarkResults()
164
+
165
+ // Filter results
166
+ let filtered = results
167
+ if (repo) {
168
+ filtered = filtered.filter((r) => r.repo === repo)
169
+ }
170
+ if (model) {
171
+ filtered = filtered.filter((r) => r.model === model)
172
+ }
173
+ if (status) {
174
+ filtered = filtered.filter((r) => r.status === status)
175
+ }
176
+
177
+ if (format === 'detailed') {
178
+ return NextResponse.json({
179
+ results: filtered,
180
+ count: filtered.length,
181
+ })
182
+ }
183
+
184
+ // Default: summary format
185
+ const stats = calculateStats(filtered)
186
+
187
+ return NextResponse.json({
188
+ stats,
189
+ filters: {
190
+ repo: repo || null,
191
+ model: model || null,
192
+ status: status || null,
193
+ },
194
+ })
195
+ }
@@ -0,0 +1,224 @@
1
+ 'use client'
2
+
3
+ import { useEffect, useState } from 'react'
4
+ import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'
5
+ import { Badge } from '@/components/ui/badge'
6
+
7
+ interface BenchmarkStats {
8
+ total_runs: number
9
+ passed: number
10
+ failed: number
11
+ error: number
12
+ success_rate: number
13
+ avg_duration: number
14
+ total_cost: number
15
+ total_tokens: number
16
+ avg_tokens_per_run: number
17
+ by_status: Record<string, number>
18
+ by_repo: Record<string, { count: number; passed: number; success_rate: number }>
19
+ by_model: Record<string, { count: number; passed: number; success_rate: number }>
20
+ }
21
+
22
+ interface BenchmarkResponse {
23
+ stats: BenchmarkStats
24
+ filters: {
25
+ repo: string | null
26
+ model: string | null
27
+ status: string | null
28
+ }
29
+ }
30
+
31
+ export default function BenchmarksPage() {
32
+ const [data, setData] = useState<BenchmarkResponse | null>(null)
33
+ const [loading, setLoading] = useState(true)
34
+ const [error, setError] = useState<string | null>(null)
35
+
36
+ useEffect(() => {
37
+ const fetchBenchmarks = async () => {
38
+ try {
39
+ const response = await fetch('/api/benchmarks')
40
+ if (!response.ok) {
41
+ throw new Error('Failed to fetch benchmarks')
42
+ }
43
+ const json = await response.json()
44
+ setData(json)
45
+ } catch (err) {
46
+ setError(err instanceof Error ? err.message : 'Unknown error')
47
+ } finally {
48
+ setLoading(false)
49
+ }
50
+ }
51
+
52
+ fetchBenchmarks()
53
+ }, [])
54
+
55
+ if (loading) {
56
+ return (
57
+ <div className="flex items-center justify-center min-h-screen">
58
+ <p className="text-muted-foreground">Loading benchmarks...</p>
59
+ </div>
60
+ )
61
+ }
62
+
63
+ if (error) {
64
+ return (
65
+ <div className="flex items-center justify-center min-h-screen">
66
+ <p className="text-destructive">Error: {error}</p>
67
+ </div>
68
+ )
69
+ }
70
+
71
+ if (!data) {
72
+ return (
73
+ <div className="flex items-center justify-center min-h-screen">
74
+ <p className="text-muted-foreground">No benchmark data available</p>
75
+ </div>
76
+ )
77
+ }
78
+
79
+ const stats = data.stats
80
+
81
+ return (
82
+ <div className="space-y-6 p-6">
83
+ <div>
84
+ <h1 className="text-3xl font-bold tracking-tight">Benchmarks</h1>
85
+ <p className="text-muted-foreground mt-2">SWE-Bench v4 Flash 300 Results</p>
86
+ </div>
87
+
88
+ {/* Key Metrics */}
89
+ <div className="grid gap-4 md:grid-cols-2 lg:grid-cols-4">
90
+ <Card>
91
+ <CardHeader className="pb-2">
92
+ <CardTitle className="text-sm font-medium">Total Runs</CardTitle>
93
+ </CardHeader>
94
+ <CardContent>
95
+ <div className="text-2xl font-bold">{stats.total_runs}</div>
96
+ </CardContent>
97
+ </Card>
98
+
99
+ <Card>
100
+ <CardHeader className="pb-2">
101
+ <CardTitle className="text-sm font-medium">Success Rate</CardTitle>
102
+ </CardHeader>
103
+ <CardContent>
104
+ <div className="text-2xl font-bold">{stats.success_rate.toFixed(1)}%</div>
105
+ <p className="text-xs text-muted-foreground mt-1">
106
+ {stats.passed} passed, {stats.failed} failed
107
+ </p>
108
+ </CardContent>
109
+ </Card>
110
+
111
+ <Card>
112
+ <CardHeader className="pb-2">
113
+ <CardTitle className="text-sm font-medium">Total Cost</CardTitle>
114
+ </CardHeader>
115
+ <CardContent>
116
+ <div className="text-2xl font-bold">${stats.total_cost.toFixed(2)}</div>
117
+ <p className="text-xs text-muted-foreground mt-1">
118
+ {stats.avg_tokens_per_run.toFixed(0)} tokens/run
119
+ </p>
120
+ </CardContent>
121
+ </Card>
122
+
123
+ <Card>
124
+ <CardHeader className="pb-2">
125
+ <CardTitle className="text-sm font-medium">Avg Duration</CardTitle>
126
+ </CardHeader>
127
+ <CardContent>
128
+ <div className="text-2xl font-bold">{stats.avg_duration.toFixed(1)}s</div>
129
+ <p className="text-xs text-muted-foreground mt-1">
130
+ {(stats.total_tokens / 1000).toFixed(1)}K tokens total
131
+ </p>
132
+ </CardContent>
133
+ </Card>
134
+ </div>
135
+
136
+ {/* Status Breakdown */}
137
+ <Card>
138
+ <CardHeader>
139
+ <CardTitle>Status Breakdown</CardTitle>
140
+ <CardDescription>Distribution of run statuses</CardDescription>
141
+ </CardHeader>
142
+ <CardContent>
143
+ <div className="space-y-3">
144
+ {Object.entries(stats.by_status).map(([status, count]) => (
145
+ <div key={status} className="flex items-center justify-between">
146
+ <div className="flex items-center gap-2">
147
+ <Badge
148
+ variant={
149
+ status === 'success'
150
+ ? 'default'
151
+ : status === 'failed'
152
+ ? 'destructive'
153
+ : 'secondary'
154
+ }
155
+ >
156
+ {status}
157
+ </Badge>
158
+ <span className="text-sm text-muted-foreground">{count} runs</span>
159
+ </div>
160
+ <span className="text-sm font-medium">
161
+ {((count / stats.total_runs) * 100).toFixed(1)}%
162
+ </span>
163
+ </div>
164
+ ))}
165
+ </div>
166
+ </CardContent>
167
+ </Card>
168
+
169
+ {/* By Repository */}
170
+ <Card>
171
+ <CardHeader>
172
+ <CardTitle>Performance by Repository</CardTitle>
173
+ <CardDescription>Success rate and run count per repository</CardDescription>
174
+ </CardHeader>
175
+ <CardContent>
176
+ <div className="space-y-4">
177
+ {Object.entries(stats.by_repo)
178
+ .sort((a, b) => b[1].count - a[1].count)
179
+ .map(([repo, data]) => (
180
+ <div key={repo} className="flex items-center justify-between border-b pb-3 last:border-0">
181
+ <div>
182
+ <p className="font-medium text-sm">{repo}</p>
183
+ <p className="text-xs text-muted-foreground">
184
+ {data.count} runs, {data.passed} passed
185
+ </p>
186
+ </div>
187
+ <div className="text-right">
188
+ <p className="font-bold text-sm">{data.success_rate.toFixed(1)}%</p>
189
+ </div>
190
+ </div>
191
+ ))}
192
+ </div>
193
+ </CardContent>
194
+ </Card>
195
+
196
+ {/* By Model */}
197
+ <Card>
198
+ <CardHeader>
199
+ <CardTitle>Performance by Model</CardTitle>
200
+ <CardDescription>Success rate and run count per model</CardDescription>
201
+ </CardHeader>
202
+ <CardContent>
203
+ <div className="space-y-4">
204
+ {Object.entries(stats.by_model)
205
+ .sort((a, b) => b[1].count - a[1].count)
206
+ .map(([model, data]) => (
207
+ <div key={model} className="flex items-center justify-between border-b pb-3 last:border-0">
208
+ <div>
209
+ <p className="font-medium text-sm">{model}</p>
210
+ <p className="text-xs text-muted-foreground">
211
+ {data.count} runs, {data.passed} passed
212
+ </p>
213
+ </div>
214
+ <div className="text-right">
215
+ <p className="font-bold text-sm">{data.success_rate.toFixed(1)}%</p>
216
+ </div>
217
+ </div>
218
+ ))}
219
+ </div>
220
+ </CardContent>
221
+ </Card>
222
+ </div>
223
+ )
224
+ }
@@ -4,7 +4,7 @@ import Link from 'next/link'
4
4
  import { usePathname } from 'next/navigation'
5
5
  import {
6
6
  LayoutDashboard, MessageSquare, DollarSign,
7
- FolderOpen, Activity, Moon, Sun,
7
+ FolderOpen, Activity, Moon, Sun, Zap,
8
8
  } from 'lucide-react'
9
9
  import { useTheme } from '@/components/theme-provider'
10
10
  import { cn } from '@/lib/utils'
@@ -15,6 +15,7 @@ const NAV = [
15
15
  { href: '/costs', label: 'Costs', icon: DollarSign },
16
16
  { href: '/projects', label: 'Projects', icon: FolderOpen },
17
17
  { href: '/activity', label: 'Activity', icon: Activity },
18
+ { href: '/benchmarks', label: 'Benchmarks', icon: Zap },
18
19
  ]
19
20
 
20
21
  export function BottomNav() {
@@ -5,7 +5,7 @@ import { usePathname } from 'next/navigation'
5
5
  import {
6
6
  LayoutDashboard, FolderOpen, MessageSquare, DollarSign,
7
7
  Wrench, Activity, History, CheckSquare, FileText,
8
- Brain, Settings, Download, HelpCircle, Moon, Sun, PanelLeftClose, PanelLeft,
8
+ Brain, Settings, Download, HelpCircle, Moon, Sun, PanelLeftClose, PanelLeft, Zap,
9
9
  } from 'lucide-react'
10
10
  import { useTheme } from '@/components/theme-provider'
11
11
  import { useSidebar } from '@/components/layout/sidebar-context'
@@ -24,6 +24,7 @@ const NAV = [
24
24
  { href: '/todos', label: 'Todos', icon: CheckSquare },
25
25
  { href: '/plans', label: 'Plans', icon: FileText },
26
26
  { href: '/memory', label: 'Memory', icon: Brain },
27
+ { href: '/benchmarks', label: 'Benchmarks', icon: Zap },
27
28
  { href: '/settings', label: 'Settings', icon: Settings },
28
29
  { href: '/help', label: 'Help', icon: HelpCircle },
29
30
  { href: '/export', label: 'Export', icon: Download },
@@ -73,8 +73,14 @@ const NETWORK_TOOLS = new Set([
73
73
  // ── Shell sub-classifier ────────────────────────────────────────────────
74
74
 
75
75
  const SHELL_SAFE_RE = [
76
- // Inspection / read-only
77
- /^\s*(ls|cat|head|tail|less|more|wc|file|stat|tree|find|grep|rg|ag|fd|echo|printf|pwd|whoami|date|which|type|env|printenv|uname|hostname|id|df|du|uptime|free|top|ps|lsof)\b/i,
76
+ // Inspection / read-only + harmless shell navigation built-ins.
77
+ // `cd` / `pushd` / `popd` only change the process working directory; if
78
+ // chained with something dangerous, the multi-segment classifier still
79
+ // catches the danger (`cd /x && rm -rf .` → SHELL_DANGEROUS).
80
+ /^\s*(cd|pushd|popd|ls|cat|head|tail|less|more|wc|file|stat|tree|find|grep|rg|ag|fd|echo|printf|pwd|whoami|date|which|type|env|printenv|uname|hostname|id|df|du|uptime|free|top|ps|lsof)\b/i,
81
+ // mkdir -p / touch are creation primitives but harmless in scope.
82
+ /^\s*mkdir\s+-p\b/i,
83
+ /^\s*touch\s/i,
78
84
  /^\s*git\s+(status|log|diff|show|branch|tag|remote|stash\s+list|blame|shortlog|describe|rev-parse|ls-files|ls-tree|config\s+--get)\b/i,
79
85
  // Test-only invocations
80
86
  /^\s*(npm|pnpm|yarn)\s+(test|run\s+test|run\s+lint|list|ls|view|info|outdated)\b/i,
@@ -93,14 +93,23 @@ export class TarangStreamClient {
93
93
  };
94
94
  if (this.token) headers['Authorization'] = `Bearer ${this.token}`;
95
95
 
96
+ // Abort controller so cancel() can break out of a stalled reader
97
+ // instead of waiting for the next SSE event to notice _cancelled.
98
+ this._abort = new AbortController();
99
+
96
100
  let response;
97
101
  try {
98
102
  response = await fetch(url, {
99
103
  method: 'POST',
100
104
  headers,
101
105
  body: JSON.stringify(body),
106
+ signal: this._abort.signal,
102
107
  });
103
108
  } catch (err) {
109
+ if (err.name === 'AbortError') {
110
+ yield { type: EVENT_TYPES.STATUS, data: { message: 'Cancelled by user.' } };
111
+ return;
112
+ }
104
113
  yield { type: EVENT_TYPES.ERROR, data: { message: `Network error: ${err.message}. Check your connection or use --local mode.`, fatal: true } };
105
114
  return;
106
115
  }
@@ -175,7 +184,15 @@ export class TarangStreamClient {
175
184
 
176
185
  try {
177
186
  while (true) {
178
- const { done, value } = await reader.read();
187
+ let read;
188
+ try {
189
+ read = await reader.read();
190
+ } catch (err) {
191
+ // Aborted via cancel() — treat as a clean end-of-stream.
192
+ if (err && (err.name === 'AbortError' || this._cancelled)) break;
193
+ throw err;
194
+ }
195
+ const { done, value } = read;
179
196
  if (done) break;
180
197
 
181
198
  buffer += decoder.decode(value, { stream: true });
@@ -335,6 +352,7 @@ export class TarangStreamClient {
335
352
  /** Cancel the current stream. */
336
353
  async cancel() {
337
354
  this._cancelled = true;
355
+ // Best-effort backend POST — the stream may already be torn down.
338
356
  if (this.currentTaskId) {
339
357
  try {
340
358
  await fetch(`${this.baseUrl}/api/cancel/${this.currentTaskId}`, {
@@ -343,6 +361,11 @@ export class TarangStreamClient {
343
361
  });
344
362
  } catch { /* best effort */ }
345
363
  }
364
+ // Force the in-flight SSE reader to abort so the REPL returns to the
365
+ // prompt immediately instead of waiting on a parked reader.read().
366
+ if (this._abort) {
367
+ try { this._abort.abort(); } catch {}
368
+ }
346
369
  }
347
370
 
348
371
  /** Pause the current stream. */
@@ -91,6 +91,12 @@ export function createToolExecutor({
91
91
  '.rs': (file) => `rustfmt --check "${file}" 2>&1`,
92
92
  };
93
93
 
94
+ // tsc --pretty and eslint emit ANSI codes (including background-red
95
+ // highlights) which bleed when our renderer slices the first 80 chars.
96
+ // Strip color codes so the stored lint string is always plain text.
97
+ const ANSI_RE = /\x1b\[[0-9;]*[a-zA-Z]/g;
98
+ function stripAnsi(s) { return String(s || '').replace(ANSI_RE, ''); }
99
+
94
100
  function autoLint(filePath) {
95
101
  const ext = path.extname(filePath);
96
102
  const cmdFn = LINT_COMMANDS[ext];
@@ -102,13 +108,14 @@ export function createToolExecutor({
102
108
  timeout: 15_000,
103
109
  cwd: process.cwd(),
104
110
  stdio: ['pipe', 'pipe', 'pipe'],
111
+ env: { ...process.env, FORCE_COLOR: '0', NO_COLOR: '1', TERM: 'dumb' },
105
112
  });
106
- const trimmed = output.trim();
113
+ const trimmed = stripAnsi(output).trim();
107
114
  if (!trimmed) return null;
108
115
  return trimmed;
109
116
  } catch (err) {
110
117
  // Non-zero exit means lint errors found
111
- const output = (err.stderr || err.stdout || '').trim();
118
+ const output = stripAnsi(err.stderr || err.stdout || '').trim();
112
119
  if (!output) return null;
113
120
  return output;
114
121
  }