@axplusb/kepler 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@axplusb/kepler",
3
- "version": "2.0.0",
3
+ "version": "2.0.2",
4
4
  "description": "Kepler — AI coding agent with operating brief, preflight planning, and sub-agents. SWE-bench Lite evaluated.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,113 @@
1
+ import { NextResponse } from 'next/server'
2
+ import { readFileSync } from 'fs'
3
+ import { join } from 'path'
4
+
5
+ export const dynamic = 'force-dynamic'
6
+
7
+ interface BenchmarkResult {
8
+ instance_id: string
9
+ repo: string
10
+ base_commit: string
11
+ test_patch: string
12
+ resolved: boolean
13
+ test_result: {
14
+ result: string[]
15
+ exit_code: number
16
+ }
17
+ metadata: {
18
+ agent_class: string
19
+ model_name: string
20
+ max_iterations: number
21
+ eval_history: Array<{
22
+ timestamp: string
23
+ action: string
24
+ observation: string
25
+ }>
26
+ submission: string
27
+ instance_id: string
28
+ predict_output: string
29
+ model_patch: string
30
+ test_result: {
31
+ result: string[]
32
+ exit_code: number
33
+ }
34
+ }
35
+ }
36
+
37
+ interface BenchmarkData {
38
+ results: BenchmarkResult[]
39
+ }
40
+
41
+ export async function GET(request: Request) {
42
+ try {
43
+ const { searchParams } = new URL(request.url)
44
+ const run = searchParams.get('run') || 'swebench-v4-flash-300'
45
+ const limit = parseInt(searchParams.get('limit') || '50', 10)
46
+ const offset = parseInt(searchParams.get('offset') || '0', 10)
47
+
48
+ // Load benchmark results from file
49
+ const resultsPath = join(
50
+ process.cwd(),
51
+ '..',
52
+ 'benchmark',
53
+ 'results',
54
+ 'runs',
55
+ run,
56
+ 'harness-results.json'
57
+ )
58
+
59
+ let data: BenchmarkData
60
+ try {
61
+ const fileContent = readFileSync(resultsPath, 'utf-8')
62
+ data = JSON.parse(fileContent)
63
+ } catch (error) {
64
+ return NextResponse.json(
65
+ { error: `Benchmark run "${run}" not found` },
66
+ { status: 404 }
67
+ )
68
+ }
69
+
70
+ // Calculate statistics
71
+ const results = data.results || []
72
+ const totalTests = results.length
73
+ const resolvedTests = results.filter((r) => r.resolved).length
74
+ const passRate = totalTests > 0 ? (resolvedTests / totalTests) * 100 : 0
75
+
76
+ // Group by repo
77
+ const byRepo = new Map<string, number>()
78
+ const byRepoResolved = new Map<string, number>()
79
+ for (const result of results) {
80
+ const repo = result.repo || 'unknown'
81
+ byRepo.set(repo, (byRepo.get(repo) || 0) + 1)
82
+ if (result.resolved) {
83
+ byRepoResolved.set(repo, (byRepoResolved.get(repo) || 0) + 1)
84
+ }
85
+ }
86
+
87
+ // Paginate results
88
+ const paginatedResults = results.slice(offset, offset + limit)
89
+
90
+ return NextResponse.json({
91
+ run,
92
+ stats: {
93
+ totalTests,
94
+ resolvedTests,
95
+ passRate: parseFloat(passRate.toFixed(2)),
96
+ byRepo: Object.fromEntries(byRepo),
97
+ byRepoResolved: Object.fromEntries(byRepoResolved),
98
+ },
99
+ pagination: {
100
+ limit,
101
+ offset,
102
+ total: totalTests,
103
+ },
104
+ results: paginatedResults,
105
+ })
106
+ } catch (error) {
107
+ console.error('Benchmark API error:', error)
108
+ return NextResponse.json(
109
+ { error: 'Failed to load benchmark data' },
110
+ { status: 500 }
111
+ )
112
+ }
113
+ }
@@ -0,0 +1,195 @@
1
+ import { NextResponse } from 'next/server'
2
+ import fs from 'fs'
3
+ import path from 'path'
4
+
5
+ export const dynamic = 'force-dynamic'
6
+
7
+ interface BenchmarkResult {
8
+ instance_id: string
9
+ repo: string
10
+ model: string
11
+ timestamp: string
12
+ kepler: {
13
+ status: string
14
+ exit_code: number
15
+ duration_seconds: number
16
+ tokens_used: number
17
+ cost: number
18
+ tool_calls: number
19
+ sub_agents: string[]
20
+ }
21
+ patch_lines: number
22
+ model_patch: string
23
+ status: string
24
+ }
25
+
26
+ interface BenchmarkStats {
27
+ total_runs: number
28
+ passed: number
29
+ failed: number
30
+ error: number
31
+ success_rate: number
32
+ avg_duration: number
33
+ total_cost: number
34
+ total_tokens: number
35
+ avg_tokens_per_run: number
36
+ by_status: Record<string, number>
37
+ by_repo: Record<string, { count: number; passed: number; success_rate: number }>
38
+ by_model: Record<string, { count: number; passed: number; success_rate: number }>
39
+ }
40
+
41
+ async function loadBenchmarkResults(): Promise<BenchmarkResult[]> {
42
+ try {
43
+ const resultsPath = path.join(
44
+ process.cwd(),
45
+ 'benchmark/results/runs/swebench-v4-flash-300/harness-results.json'
46
+ )
47
+
48
+ if (!fs.existsSync(resultsPath)) {
49
+ return []
50
+ }
51
+
52
+ const data = JSON.parse(fs.readFileSync(resultsPath, 'utf-8'))
53
+ return data.results || []
54
+ } catch (error) {
55
+ console.error('Error loading benchmark results:', error)
56
+ return []
57
+ }
58
+ }
59
+
60
+ function calculateStats(results: BenchmarkResult[]): BenchmarkStats {
61
+ if (results.length === 0) {
62
+ return {
63
+ total_runs: 0,
64
+ passed: 0,
65
+ failed: 0,
66
+ error: 0,
67
+ success_rate: 0,
68
+ avg_duration: 0,
69
+ total_cost: 0,
70
+ total_tokens: 0,
71
+ avg_tokens_per_run: 0,
72
+ by_status: {},
73
+ by_repo: {},
74
+ by_model: {},
75
+ }
76
+ }
77
+
78
+ const by_status: Record<string, number> = {}
79
+ const by_repo: Record<string, { count: number; passed: number }> = {}
80
+ const by_model: Record<string, { count: number; passed: number }> = {}
81
+
82
+ let total_cost = 0
83
+ let total_tokens = 0
84
+ let total_duration = 0
85
+ let passed = 0
86
+
87
+ results.forEach((result) => {
88
+ // Count by status
89
+ by_status[result.status] = (by_status[result.status] || 0) + 1
90
+
91
+ // Count by repo
92
+ if (!by_repo[result.repo]) {
93
+ by_repo[result.repo] = { count: 0, passed: 0 }
94
+ }
95
+ by_repo[result.repo].count++
96
+
97
+ // Count by model
98
+ if (!by_model[result.model]) {
99
+ by_model[result.model] = { count: 0, passed: 0 }
100
+ }
101
+ by_model[result.model].count++
102
+
103
+ // Aggregate metrics
104
+ if (result.kepler) {
105
+ total_cost += result.kepler.cost || 0
106
+ total_tokens += result.kepler.tokens_used || 0
107
+ total_duration += result.kepler.duration_seconds || 0
108
+
109
+ if (result.kepler.status === 'success') {
110
+ passed++
111
+ by_repo[result.repo].passed++
112
+ by_model[result.model].passed++
113
+ }
114
+ }
115
+ })
116
+
117
+ // Calculate success rates
118
+ const by_repo_with_rates = Object.entries(by_repo).reduce(
119
+ (acc, [repo, data]) => {
120
+ acc[repo] = {
121
+ ...data,
122
+ success_rate: data.count > 0 ? (data.passed / data.count) * 100 : 0,
123
+ }
124
+ return acc
125
+ },
126
+ {} as Record<string, { count: number; passed: number; success_rate: number }>
127
+ )
128
+
129
+ const by_model_with_rates = Object.entries(by_model).reduce(
130
+ (acc, [model, data]) => {
131
+ acc[model] = {
132
+ ...data,
133
+ success_rate: data.count > 0 ? (data.passed / data.count) * 100 : 0,
134
+ }
135
+ return acc
136
+ },
137
+ {} as Record<string, { count: number; passed: number; success_rate: number }>
138
+ )
139
+
140
+ return {
141
+ total_runs: results.length,
142
+ passed,
143
+ failed: by_status['failed'] || 0,
144
+ error: by_status['error'] || 0,
145
+ success_rate: (passed / results.length) * 100,
146
+ avg_duration: total_duration / results.length,
147
+ total_cost,
148
+ total_tokens,
149
+ avg_tokens_per_run: total_tokens / results.length,
150
+ by_status,
151
+ by_repo: by_repo_with_rates,
152
+ by_model: by_model_with_rates,
153
+ }
154
+ }
155
+
156
+ export async function GET(request: Request) {
157
+ const { searchParams } = new URL(request.url)
158
+ const format = searchParams.get('format') || 'summary'
159
+ const repo = searchParams.get('repo')
160
+ const model = searchParams.get('model')
161
+ const status = searchParams.get('status')
162
+
163
+ const results = await loadBenchmarkResults()
164
+
165
+ // Filter results
166
+ let filtered = results
167
+ if (repo) {
168
+ filtered = filtered.filter((r) => r.repo === repo)
169
+ }
170
+ if (model) {
171
+ filtered = filtered.filter((r) => r.model === model)
172
+ }
173
+ if (status) {
174
+ filtered = filtered.filter((r) => r.status === status)
175
+ }
176
+
177
+ if (format === 'detailed') {
178
+ return NextResponse.json({
179
+ results: filtered,
180
+ count: filtered.length,
181
+ })
182
+ }
183
+
184
+ // Default: summary format
185
+ const stats = calculateStats(filtered)
186
+
187
+ return NextResponse.json({
188
+ stats,
189
+ filters: {
190
+ repo: repo || null,
191
+ model: model || null,
192
+ status: status || null,
193
+ },
194
+ })
195
+ }
@@ -0,0 +1,224 @@
1
+ 'use client'
2
+
3
+ import { useEffect, useState } from 'react'
4
+ import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'
5
+ import { Badge } from '@/components/ui/badge'
6
+
7
+ interface BenchmarkStats {
8
+ total_runs: number
9
+ passed: number
10
+ failed: number
11
+ error: number
12
+ success_rate: number
13
+ avg_duration: number
14
+ total_cost: number
15
+ total_tokens: number
16
+ avg_tokens_per_run: number
17
+ by_status: Record<string, number>
18
+ by_repo: Record<string, { count: number; passed: number; success_rate: number }>
19
+ by_model: Record<string, { count: number; passed: number; success_rate: number }>
20
+ }
21
+
22
+ interface BenchmarkResponse {
23
+ stats: BenchmarkStats
24
+ filters: {
25
+ repo: string | null
26
+ model: string | null
27
+ status: string | null
28
+ }
29
+ }
30
+
31
+ export default function BenchmarksPage() {
32
+ const [data, setData] = useState<BenchmarkResponse | null>(null)
33
+ const [loading, setLoading] = useState(true)
34
+ const [error, setError] = useState<string | null>(null)
35
+
36
+ useEffect(() => {
37
+ const fetchBenchmarks = async () => {
38
+ try {
39
+ const response = await fetch('/api/benchmarks')
40
+ if (!response.ok) {
41
+ throw new Error('Failed to fetch benchmarks')
42
+ }
43
+ const json = await response.json()
44
+ setData(json)
45
+ } catch (err) {
46
+ setError(err instanceof Error ? err.message : 'Unknown error')
47
+ } finally {
48
+ setLoading(false)
49
+ }
50
+ }
51
+
52
+ fetchBenchmarks()
53
+ }, [])
54
+
55
+ if (loading) {
56
+ return (
57
+ <div className="flex items-center justify-center min-h-screen">
58
+ <p className="text-muted-foreground">Loading benchmarks...</p>
59
+ </div>
60
+ )
61
+ }
62
+
63
+ if (error) {
64
+ return (
65
+ <div className="flex items-center justify-center min-h-screen">
66
+ <p className="text-destructive">Error: {error}</p>
67
+ </div>
68
+ )
69
+ }
70
+
71
+ if (!data) {
72
+ return (
73
+ <div className="flex items-center justify-center min-h-screen">
74
+ <p className="text-muted-foreground">No benchmark data available</p>
75
+ </div>
76
+ )
77
+ }
78
+
79
+ const stats = data.stats
80
+
81
+ return (
82
+ <div className="space-y-6 p-6">
83
+ <div>
84
+ <h1 className="text-3xl font-bold tracking-tight">Benchmarks</h1>
85
+ <p className="text-muted-foreground mt-2">SWE-Bench v4 Flash 300 Results</p>
86
+ </div>
87
+
88
+ {/* Key Metrics */}
89
+ <div className="grid gap-4 md:grid-cols-2 lg:grid-cols-4">
90
+ <Card>
91
+ <CardHeader className="pb-2">
92
+ <CardTitle className="text-sm font-medium">Total Runs</CardTitle>
93
+ </CardHeader>
94
+ <CardContent>
95
+ <div className="text-2xl font-bold">{stats.total_runs}</div>
96
+ </CardContent>
97
+ </Card>
98
+
99
+ <Card>
100
+ <CardHeader className="pb-2">
101
+ <CardTitle className="text-sm font-medium">Success Rate</CardTitle>
102
+ </CardHeader>
103
+ <CardContent>
104
+ <div className="text-2xl font-bold">{stats.success_rate.toFixed(1)}%</div>
105
+ <p className="text-xs text-muted-foreground mt-1">
106
+ {stats.passed} passed, {stats.failed} failed
107
+ </p>
108
+ </CardContent>
109
+ </Card>
110
+
111
+ <Card>
112
+ <CardHeader className="pb-2">
113
+ <CardTitle className="text-sm font-medium">Total Cost</CardTitle>
114
+ </CardHeader>
115
+ <CardContent>
116
+ <div className="text-2xl font-bold">${stats.total_cost.toFixed(2)}</div>
117
+ <p className="text-xs text-muted-foreground mt-1">
118
+ {stats.avg_tokens_per_run.toFixed(0)} tokens/run
119
+ </p>
120
+ </CardContent>
121
+ </Card>
122
+
123
+ <Card>
124
+ <CardHeader className="pb-2">
125
+ <CardTitle className="text-sm font-medium">Avg Duration</CardTitle>
126
+ </CardHeader>
127
+ <CardContent>
128
+ <div className="text-2xl font-bold">{stats.avg_duration.toFixed(1)}s</div>
129
+ <p className="text-xs text-muted-foreground mt-1">
130
+ {(stats.total_tokens / 1000).toFixed(1)}K tokens total
131
+ </p>
132
+ </CardContent>
133
+ </Card>
134
+ </div>
135
+
136
+ {/* Status Breakdown */}
137
+ <Card>
138
+ <CardHeader>
139
+ <CardTitle>Status Breakdown</CardTitle>
140
+ <CardDescription>Distribution of run statuses</CardDescription>
141
+ </CardHeader>
142
+ <CardContent>
143
+ <div className="space-y-3">
144
+ {Object.entries(stats.by_status).map(([status, count]) => (
145
+ <div key={status} className="flex items-center justify-between">
146
+ <div className="flex items-center gap-2">
147
+ <Badge
148
+ variant={
149
+ status === 'success'
150
+ ? 'default'
151
+ : status === 'failed'
152
+ ? 'destructive'
153
+ : 'secondary'
154
+ }
155
+ >
156
+ {status}
157
+ </Badge>
158
+ <span className="text-sm text-muted-foreground">{count} runs</span>
159
+ </div>
160
+ <span className="text-sm font-medium">
161
+ {((count / stats.total_runs) * 100).toFixed(1)}%
162
+ </span>
163
+ </div>
164
+ ))}
165
+ </div>
166
+ </CardContent>
167
+ </Card>
168
+
169
+ {/* By Repository */}
170
+ <Card>
171
+ <CardHeader>
172
+ <CardTitle>Performance by Repository</CardTitle>
173
+ <CardDescription>Success rate and run count per repository</CardDescription>
174
+ </CardHeader>
175
+ <CardContent>
176
+ <div className="space-y-4">
177
+ {Object.entries(stats.by_repo)
178
+ .sort((a, b) => b[1].count - a[1].count)
179
+ .map(([repo, data]) => (
180
+ <div key={repo} className="flex items-center justify-between border-b pb-3 last:border-0">
181
+ <div>
182
+ <p className="font-medium text-sm">{repo}</p>
183
+ <p className="text-xs text-muted-foreground">
184
+ {data.count} runs, {data.passed} passed
185
+ </p>
186
+ </div>
187
+ <div className="text-right">
188
+ <p className="font-bold text-sm">{data.success_rate.toFixed(1)}%</p>
189
+ </div>
190
+ </div>
191
+ ))}
192
+ </div>
193
+ </CardContent>
194
+ </Card>
195
+
196
+ {/* By Model */}
197
+ <Card>
198
+ <CardHeader>
199
+ <CardTitle>Performance by Model</CardTitle>
200
+ <CardDescription>Success rate and run count per model</CardDescription>
201
+ </CardHeader>
202
+ <CardContent>
203
+ <div className="space-y-4">
204
+ {Object.entries(stats.by_model)
205
+ .sort((a, b) => b[1].count - a[1].count)
206
+ .map(([model, data]) => (
207
+ <div key={model} className="flex items-center justify-between border-b pb-3 last:border-0">
208
+ <div>
209
+ <p className="font-medium text-sm">{model}</p>
210
+ <p className="text-xs text-muted-foreground">
211
+ {data.count} runs, {data.passed} passed
212
+ </p>
213
+ </div>
214
+ <div className="text-right">
215
+ <p className="font-bold text-sm">{data.success_rate.toFixed(1)}%</p>
216
+ </div>
217
+ </div>
218
+ ))}
219
+ </div>
220
+ </CardContent>
221
+ </Card>
222
+ </div>
223
+ )
224
+ }
@@ -4,7 +4,7 @@ import Link from 'next/link'
4
4
  import { usePathname } from 'next/navigation'
5
5
  import {
6
6
  LayoutDashboard, MessageSquare, DollarSign,
7
- FolderOpen, Activity, Moon, Sun,
7
+ FolderOpen, Activity, Moon, Sun, Zap,
8
8
  } from 'lucide-react'
9
9
  import { useTheme } from '@/components/theme-provider'
10
10
  import { cn } from '@/lib/utils'
@@ -15,6 +15,7 @@ const NAV = [
15
15
  { href: '/costs', label: 'Costs', icon: DollarSign },
16
16
  { href: '/projects', label: 'Projects', icon: FolderOpen },
17
17
  { href: '/activity', label: 'Activity', icon: Activity },
18
+ { href: '/benchmarks', label: 'Benchmarks', icon: Zap },
18
19
  ]
19
20
 
20
21
  export function BottomNav() {
@@ -5,7 +5,7 @@ import { usePathname } from 'next/navigation'
5
5
  import {
6
6
  LayoutDashboard, FolderOpen, MessageSquare, DollarSign,
7
7
  Wrench, Activity, History, CheckSquare, FileText,
8
- Brain, Settings, Download, HelpCircle, Moon, Sun, PanelLeftClose, PanelLeft,
8
+ Brain, Settings, Download, HelpCircle, Moon, Sun, PanelLeftClose, PanelLeft, Zap,
9
9
  } from 'lucide-react'
10
10
  import { useTheme } from '@/components/theme-provider'
11
11
  import { useSidebar } from '@/components/layout/sidebar-context'
@@ -24,6 +24,7 @@ const NAV = [
24
24
  { href: '/todos', label: 'Todos', icon: CheckSquare },
25
25
  { href: '/plans', label: 'Plans', icon: FileText },
26
26
  { href: '/memory', label: 'Memory', icon: Brain },
27
+ { href: '/benchmarks', label: 'Benchmarks', icon: Zap },
27
28
  { href: '/settings', label: 'Settings', icon: Settings },
28
29
  { href: '/help', label: 'Help', icon: HelpCircle },
29
30
  { href: '/export', label: 'Export', icon: Download },
@@ -73,8 +73,14 @@ const NETWORK_TOOLS = new Set([
73
73
  // ── Shell sub-classifier ────────────────────────────────────────────────
74
74
 
75
75
  const SHELL_SAFE_RE = [
76
- // Inspection / read-only
77
- /^\s*(ls|cat|head|tail|less|more|wc|file|stat|tree|find|grep|rg|ag|fd|echo|printf|pwd|whoami|date|which|type|env|printenv|uname|hostname|id|df|du|uptime|free|top|ps|lsof)\b/i,
76
+ // Inspection / read-only + harmless shell navigation built-ins.
77
+ // `cd` / `pushd` / `popd` only change the process working directory; if
78
+ // chained with something dangerous, the multi-segment classifier still
79
+ // catches the danger (`cd /x && rm -rf .` → SHELL_DANGEROUS).
80
+ /^\s*(cd|pushd|popd|ls|cat|head|tail|less|more|wc|file|stat|tree|find|grep|rg|ag|fd|echo|printf|pwd|whoami|date|which|type|env|printenv|uname|hostname|id|df|du|uptime|free|top|ps|lsof)\b/i,
81
+ // mkdir -p / touch are creation primitives but harmless in scope.
82
+ /^\s*mkdir\s+-p\b/i,
83
+ /^\s*touch\s/i,
78
84
  /^\s*git\s+(status|log|diff|show|branch|tag|remote|stash\s+list|blame|shortlog|describe|rev-parse|ls-files|ls-tree|config\s+--get)\b/i,
79
85
  // Test-only invocations
80
86
  /^\s*(npm|pnpm|yarn)\s+(test|run\s+test|run\s+lint|list|ls|view|info|outdated)\b/i,
@@ -93,14 +93,23 @@ export class TarangStreamClient {
93
93
  };
94
94
  if (this.token) headers['Authorization'] = `Bearer ${this.token}`;
95
95
 
96
+ // Abort controller so cancel() can break out of a stalled reader
97
+ // instead of waiting for the next SSE event to notice _cancelled.
98
+ this._abort = new AbortController();
99
+
96
100
  let response;
97
101
  try {
98
102
  response = await fetch(url, {
99
103
  method: 'POST',
100
104
  headers,
101
105
  body: JSON.stringify(body),
106
+ signal: this._abort.signal,
102
107
  });
103
108
  } catch (err) {
109
+ if (err.name === 'AbortError') {
110
+ yield { type: EVENT_TYPES.STATUS, data: { message: 'Cancelled by user.' } };
111
+ return;
112
+ }
104
113
  yield { type: EVENT_TYPES.ERROR, data: { message: `Network error: ${err.message}. Check your connection or use --local mode.`, fatal: true } };
105
114
  return;
106
115
  }
@@ -175,7 +184,15 @@ export class TarangStreamClient {
175
184
 
176
185
  try {
177
186
  while (true) {
178
- const { done, value } = await reader.read();
187
+ let read;
188
+ try {
189
+ read = await reader.read();
190
+ } catch (err) {
191
+ // Aborted via cancel() — treat as a clean end-of-stream.
192
+ if (err && (err.name === 'AbortError' || this._cancelled)) break;
193
+ throw err;
194
+ }
195
+ const { done, value } = read;
179
196
  if (done) break;
180
197
 
181
198
  buffer += decoder.decode(value, { stream: true });
@@ -335,6 +352,7 @@ export class TarangStreamClient {
335
352
  /** Cancel the current stream. */
336
353
  async cancel() {
337
354
  this._cancelled = true;
355
+ // Best-effort backend POST — the stream may already be torn down.
338
356
  if (this.currentTaskId) {
339
357
  try {
340
358
  await fetch(`${this.baseUrl}/api/cancel/${this.currentTaskId}`, {
@@ -343,6 +361,11 @@ export class TarangStreamClient {
343
361
  });
344
362
  } catch { /* best effort */ }
345
363
  }
364
+ // Force the in-flight SSE reader to abort so the REPL returns to the
365
+ // prompt immediately instead of waiting on a parked reader.read().
366
+ if (this._abort) {
367
+ try { this._abort.abort(); } catch {}
368
+ }
346
369
  }
347
370
 
348
371
  /** Pause the current stream. */
@@ -91,6 +91,12 @@ export function createToolExecutor({
91
91
  '.rs': (file) => `rustfmt --check "${file}" 2>&1`,
92
92
  };
93
93
 
94
+ // tsc --pretty and eslint emit ANSI codes (including background-red
95
+ // highlights) which bleed when our renderer slices the first 80 chars.
96
+ // Strip color codes so the stored lint string is always plain text.
97
+ const ANSI_RE = /\x1b\[[0-9;]*[a-zA-Z]/g;
98
+ function stripAnsi(s) { return String(s || '').replace(ANSI_RE, ''); }
99
+
94
100
  function autoLint(filePath) {
95
101
  const ext = path.extname(filePath);
96
102
  const cmdFn = LINT_COMMANDS[ext];
@@ -102,13 +108,14 @@ export function createToolExecutor({
102
108
  timeout: 15_000,
103
109
  cwd: process.cwd(),
104
110
  stdio: ['pipe', 'pipe', 'pipe'],
111
+ env: { ...process.env, FORCE_COLOR: '0', NO_COLOR: '1', TERM: 'dumb' },
105
112
  });
106
- const trimmed = output.trim();
113
+ const trimmed = stripAnsi(output).trim();
107
114
  if (!trimmed) return null;
108
115
  return trimmed;
109
116
  } catch (err) {
110
117
  // Non-zero exit means lint errors found
111
- const output = (err.stderr || err.stdout || '').trim();
118
+ const output = stripAnsi(err.stderr || err.stdout || '').trim();
112
119
  if (!output) return null;
113
120
  return output;
114
121
  }
@@ -37,31 +37,44 @@ const FAIL = (s) => `${paint.state.danger('[✗]')} ${s}`;
37
37
 
38
38
  // ── Individual checks (each returns { status, label, hint? }) ──────────
39
39
 
40
- function checkAuthToken(auth) {
40
+ async function checkAuthAndBackend(auth, { timeoutMs = 2500 } = {}) {
41
41
  const creds = auth.loadCredentials();
42
- if (creds.token) return { status: 'ok', label: `Auth token` };
43
- return { status: 'warn', label: 'Auth token missing', hint: '/login to sign in' };
44
- }
42
+ const hasToken = !!creds.token;
43
+ const url = creds.backendUrl;
45
44
 
46
- function checkProviderKey(auth) {
47
- const creds = auth.loadCredentials();
48
- if (creds.openRouterKey) return { status: 'ok', label: 'OpenRouter key' };
49
- if (creds.anthropicKey) return { status: 'ok', label: 'Anthropic key' };
50
- if (creds.openaiKey) return { status: 'ok', label: 'OpenAI key' };
51
- if (creds.googleKey) return { status: 'ok', label: 'Google key' };
52
- return { status: 'warn', label: 'No model provider key configured', hint: 'set OPENROUTER_API_KEY or run /config' };
53
- }
45
+ // No token: just probe whether the backend is reachable so we can hint
46
+ // /login when it makes sense.
47
+ if (!hasToken) {
48
+ const reachable = url ? await ping(url, timeoutMs).catch(() => false) : false;
49
+ return reachable
50
+ ? { status: 'warn', label: 'Not signed in · backend ready', hint: '/login to sign in' }
51
+ : { status: 'warn', label: 'Not signed in · backend offline', hint: '/login once the network is back' };
52
+ }
54
53
 
55
- async function checkBackend(auth, { timeoutMs = 1500 } = {}) {
56
- const creds = auth.loadCredentials();
57
- const url = creds.backendUrl;
58
- if (!url) return { status: 'warn', label: 'Backend not configured' };
54
+ // Token present: real authenticated round-trip against /api/user/me.
55
+ // Three outcomes: valid (200), expired (401/403), unreachable (network).
59
56
  try {
60
- const reachable = await ping(url, timeoutMs);
61
- if (reachable) return { status: 'ok', label: `Backend ${shorten(url, 48)}` };
62
- return { status: 'warn', label: `Backend ${shorten(url, 48)}`, hint: 'unreachable — check network or start backend' };
57
+ const ctrl = new AbortController();
58
+ const t = setTimeout(() => ctrl.abort(), timeoutMs);
59
+ let resp;
60
+ try {
61
+ resp = await fetch(`${url}/api/user/me`, {
62
+ headers: { 'Authorization': `Bearer ${creds.token}` },
63
+ signal: ctrl.signal,
64
+ });
65
+ } finally { clearTimeout(t); }
66
+
67
+ if (resp.ok) {
68
+ const user = await resp.json().catch(() => null);
69
+ const who = user?.github_username || user?.email || 'user';
70
+ return { status: 'ok', label: `Signed in as ${who} · connected` };
71
+ }
72
+ if (resp.status === 401 || resp.status === 403) {
73
+ return { status: 'warn', label: 'Token expired · connected', hint: '/login again to refresh' };
74
+ }
75
+ return { status: 'warn', label: `Backend returned ${resp.status}`, hint: 'try again shortly' };
63
76
  } catch {
64
- return { status: 'warn', label: `Backend ${shorten(url, 48)}`, hint: 'unreachable' };
77
+ return { status: 'warn', label: 'Signed in · backend offline', hint: 'check network or try again shortly' };
65
78
  }
66
79
  }
67
80
 
@@ -83,25 +96,32 @@ function checkGit(cwd) {
83
96
  function checkLinters(cwd) {
84
97
  const present = [];
85
98
  const missing = [];
86
- for (const [name, kind] of LINTERS) {
87
- if (which(name)) present.push({ name, kind });
88
- else if (projectUses(cwd, kind)) missing.push({ name, kind });
99
+ for (const linter of LINTERS) {
100
+ if (which(linter.bin)) present.push(linter);
101
+ else if (projectUses(cwd, linter.kind)) missing.push(linter);
89
102
  }
90
103
  if (present.length === 0 && missing.length === 0) {
91
104
  return { status: 'ok', label: 'Linters none required' };
92
105
  }
93
106
  if (missing.length === 0) {
94
- return { status: 'ok', label: `Linters ${present.map(p => p.name).join(', ')}` };
107
+ return { status: 'ok', label: `Linters ${present.map(p => p.bin).join(', ')}` };
95
108
  }
96
- const hint = missing.map(m => `/install ${m.name} to enable lint_check for ${m.kind}`).join(' · ');
97
- return { status: 'warn', label: `Linter (${missing.map(m => m.name).join(', ')}) not found`, hint };
109
+ // Honest install command per linter. Falls back to "install via your
110
+ // package manager" when there is no clean one-liner (e.g. cargo).
111
+ const hint = missing.map(m => m.install
112
+ ? `${m.bin}: ${m.install}`
113
+ : `install ${m.bin} for ${m.kind} support`
114
+ ).join(' · ');
115
+ return { status: 'warn', label: `Linter (${missing.map(m => m.bin).join(', ')}) not found`, hint };
98
116
  }
99
117
 
100
118
  const LINTERS = [
101
- ['ruff', 'python'],
102
- ['eslint', 'javascript'],
103
- ['tsc', 'typescript'],
104
- ['cargo', 'rust'],
119
+ { bin: 'ruff', kind: 'python', install: 'pip install ruff' },
120
+ { bin: 'eslint', kind: 'javascript', install: 'npm i -g eslint' },
121
+ { bin: 'tsc', kind: 'typescript', install: 'npm i -g typescript' },
122
+ // cargo ships with rustup; no clean one-liner — surface the warning
123
+ // without a misleading "/install" command.
124
+ { bin: 'cargo', kind: 'rust', install: null },
105
125
  ];
106
126
 
107
127
  function projectUses(cwd, kind) {
@@ -252,9 +272,7 @@ export async function runPreflight({ auth, cwd, version, silent = false } = {})
252
272
  write('\n' + header + '\n\n');
253
273
 
254
274
  const checks = [];
255
- checks.push(checkAuthToken(auth));
256
- checks.push(checkProviderKey(auth));
257
- checks.push(await checkBackend(auth));
275
+ checks.push(await checkAuthAndBackend(auth));
258
276
  checks.push(checkGit(cwd));
259
277
  checks.push(checkLinters(cwd));
260
278
  checks.push(checkProjectMap(cwd));
@@ -24,6 +24,7 @@ import { JsonlWriter } from '../core/jsonl-writer.mjs';
24
24
  import { createToolExecutor } from '../core/tool-executor.mjs';
25
25
  import { CheckpointManager } from '../core/checkpoints.mjs';
26
26
  import { runPreflight } from '../onboarding/preflight.mjs';
27
+ import { printBanner as printBrandedBanner } from '../ui/banner.mjs';
27
28
  import { renderMissionReport, saveReport, toMarkdown as missionMarkdown } from '../ui/mission-report.mjs';
28
29
  import {
29
30
  getVerbosity,
@@ -121,6 +122,7 @@ const session = {
121
122
  costBreakdown: [], // per-model usage: [{ model, role, input_tokens, output_tokens, cost }]
122
123
  totalCost: 0, // accumulated session cost (USD)
123
124
  costAccurate: false, // true if backend provides per-model breakdown
125
+ isByok: false, // set from session_info; hides cost + credits when true
124
126
  };
125
127
 
126
128
  // ── Commands ──
@@ -165,26 +167,15 @@ const COMMANDS = {
165
167
  // ── Banner ──
166
168
 
167
169
  function printBanner(auth) {
170
+ // Delegate the visual block to the branded banner module (PRD-055 §4.3,
171
+ // gradient KEPLER letters in Deep Space Purple → Stellar Magenta → Neon
172
+ // Cyan). The trailing status line stays here because it needs `auth`.
173
+ printBrandedBanner();
174
+
168
175
  const creds = auth.loadCredentials();
169
176
  const env = process.env.TARANG_ENV || 'production';
170
177
  const authStatus = creds.token ? c.green('authenticated') : c.red('/login to start');
171
-
172
- const CYAN = '\x1b[36m';
173
- const DIM = '\x1b[2m';
174
- const BOLD = '\x1b[1m';
175
- const YELLOW = '\x1b[33m';
176
- const RST = '\x1b[0m';
177
-
178
- process.stderr.write('\n');
179
- process.stderr.write(`${DIM} ✦${RST}\n`);
180
- process.stderr.write(`${DIM} ╭──────────────────────────╮${RST}\n`);
181
- process.stderr.write(`${DIM} │${RST} ${BOLD}${CYAN}K · E · P · L · E · R${RST} ${DIM}│${RST}\n`);
182
- process.stderr.write(`${DIM} ╰──────── ${YELLOW}◯${RST}${DIM} ───────────────╯${RST}\n`);
183
- process.stderr.write(`${DIM} ╱ ╲${RST}\n`);
184
- process.stderr.write(`${DIM} the agentic os${RST}\n`);
185
- process.stderr.write('\n');
186
- process.stderr.write(` ${c.gray('v' + VERSION)} ${c.dim(env)} ${authStatus}\n`);
187
- process.stderr.write('\n');
178
+ process.stderr.write(` ${c.gray('v' + VERSION)} ${c.dim(env)} ${authStatus}\n\n`);
188
179
  }
189
180
 
190
181
  // ── Prompt Chrome ──
@@ -212,12 +203,12 @@ function printBanner(auth) {
212
203
  */
213
204
  function buildContextStrip() {
214
205
  const totalTokens = session.inputTokens + session.outputTokens;
215
- const credits = formatCredits(costToCredits(session.totalCost));
216
206
  const elapsed = formatElapsed(session.startTime);
217
207
 
208
+ // BYOK: user pays the provider directly, suppress credits entirely.
218
209
  const right = [
219
210
  c.dim(`${formatTokens(totalTokens)} tok`),
220
- c.dim(credits),
211
+ ...(session.isByok ? [] : [c.dim(formatCredits(costToCredits(session.totalCost)))]),
221
212
  c.dim(elapsed),
222
213
  ].join(c.dim(' · '));
223
214
 
@@ -264,7 +255,7 @@ function printTurnSummary(toolCount, durationS, turnCost) {
264
255
  const parts = [];
265
256
  if (toolCount > 0) parts.push(`${toolCount} tools`);
266
257
  if (durationS) parts.push(`${Number(durationS).toFixed(1)}s`);
267
- if (turnCost > 0) parts.push(formatCredits(costToCredits(turnCost)));
258
+ if (turnCost > 0 && !session.isByok) parts.push(formatCredits(costToCredits(turnCost)));
268
259
  if (parts.length > 0) {
269
260
  process.stderr.write(`\n ${c.green('✓')} ${c.dim(parts.join(' · '))}\n`);
270
261
  }
@@ -281,6 +272,14 @@ function updateStatusBar() {
281
272
  * args. The result arrives later via `renderToolResult` and is appended as a
282
273
  * gutter line. Sub-agent calls are indented per session.inSubAgent.
283
274
  */
275
+ // Set by renderToolCall, consumed by renderToolResult so we can collapse the
276
+ // "head\n ⎿ → outcome\n" two-line shape into a single line whenever nothing
277
+ // else printed in between. Cleared by any handler that writes interleaving
278
+ // content (content/thinking/sub_agent_*/delegation/etc).
279
+ let _pendingHead = null; // { callId, head }
280
+
281
+ function clearPendingHead() { _pendingHead = null; }
282
+
284
283
  function renderToolCall(data) {
285
284
  const tool = data?.tool || 'unknown';
286
285
  const args = data?.args || {};
@@ -296,6 +295,7 @@ function renderToolCall(data) {
296
295
  recordCard({ id: callId, tool, args, head, startedAt: Date.now() });
297
296
  session.toolCounts[tool] = (session.toolCounts[tool] || 0) + 1;
298
297
  process.stderr.write(`\n${head}\n`);
298
+ _pendingHead = { callId, head };
299
299
  }
300
300
 
301
301
  /**
@@ -333,10 +333,32 @@ function renderToolResult(data, eventType = 'tool_result') {
333
333
  : paint.text.dim;
334
334
  const duration = formatToolDuration(data);
335
335
  const tail = duration ? paint.text.dim(` · ${duration}`) : '';
336
- process.stderr.write(`${gutter}${arrow} ${painter(text || 'done')}${tail}\n`);
336
+ const outcome = `${arrow} ${painter(text || 'done')}${tail}`;
337
+
338
+ // ── Single-line collapse ──
339
+ // If nothing has interleaved between renderToolCall and this result, rewrite
340
+ // the head line in-place as "<head> → outcome · duration" — saves a full
341
+ // row per tool call. Falls back to the two-line gutter form when the head
342
+ // is gone (something scrolled it away) or the combined line would not fit.
343
+ const hasLint = (tool === 'write_file' || tool === 'edit_file') && data.lint;
344
+ if (_pendingHead && _pendingHead.callId === callId && !hasLint) {
345
+ const cols = process.stderr.columns || 120;
346
+ const combined = `${_pendingHead.head} ${outcome}`;
347
+ if (stripAnsi(combined).length <= cols) {
348
+ // Move up one line, clear it, rewrite as one line. No leading newline
349
+ // because the cursor is already at the start of the (now-cleared) line.
350
+ process.stderr.write(`\x1b[1A\x1b[2K\r${combined}\n`);
351
+ _pendingHead = null;
352
+ return;
353
+ }
354
+ }
355
+ _pendingHead = null;
356
+
357
+ // Default two-line shape.
358
+ process.stderr.write(`${gutter}${outcome}\n`);
337
359
 
338
360
  // Lint warnings stay visible alongside writes.
339
- if ((tool === 'write_file' || tool === 'edit_file') && data.lint) {
361
+ if (hasLint) {
340
362
  process.stderr.write(`${gutter}${paint.state.warn('⚠ ' + String(data.lint).split('\n')[0].slice(0, 80))}\n`);
341
363
  }
342
364
  }
@@ -436,6 +458,9 @@ function startContentStream() {
436
458
 
437
459
  function appendContent(text) {
438
460
  if (!text) return;
461
+ // Any streamed content between renderToolCall and renderToolResult would
462
+ // scroll the head off "the line above", breaking the in-place collapse.
463
+ clearPendingHead();
439
464
  _streamBuffer += text;
440
465
  _streamedPartialText += text;
441
466
 
@@ -621,6 +646,7 @@ function renderEvent(event) {
621
646
 
622
647
  case 'delegation': {
623
648
  stopSpinner();
649
+ clearPendingHead();
624
650
  const from = data?.from || '';
625
651
  const to = data?.to || '';
626
652
  session.delegations.push({ from, to, time: Date.now() });
@@ -636,6 +662,7 @@ function renderEvent(event) {
636
662
 
637
663
  case 'sub_agent_start': {
638
664
  stopSpinner();
665
+ clearPendingHead();
639
666
  const agentType = data?.type || 'sub-agent';
640
667
  const model = data?.model || '';
641
668
  const query = data?.query || '';
@@ -657,6 +684,7 @@ function renderEvent(event) {
657
684
 
658
685
  case 'sub_agent_complete': {
659
686
  stopSpinner();
687
+ clearPendingHead();
660
688
  const agentType = data?.type || 'sub-agent';
661
689
  const usage = data?.usage || {};
662
690
  const tokens = (usage.input_tokens || 0) + (usage.output_tokens || 0);
@@ -697,6 +725,9 @@ function renderEvent(event) {
697
725
  }
698
726
  if (data?.model) session.model = data.model;
699
727
  if (data?.user) session.user = { ...session.user, ...data.user };
728
+ // BYOK users pay their model provider directly; the platform does not
729
+ // charge them credits. Hide cost + credits when this flag is set.
730
+ if (typeof data?.is_byok === 'boolean') session.isByok = data.is_byok;
700
731
  break;
701
732
  }
702
733
 
@@ -773,8 +804,9 @@ function renderEvent(event) {
773
804
  success: successOverall,
774
805
  filesChanged: session.filesChanged,
775
806
  toolCounts: session.toolCounts,
776
- subAgents: { ...session.subAgentCounts, savedUsd: session.savedUsd },
777
- costUsd: turnCost || session.totalCost,
807
+ subAgents: { ...session.subAgentCounts, savedUsd: session.isByok ? 0 : session.savedUsd },
808
+ // BYOK users pay their provider directly; suppress cost in the report.
809
+ costUsd: session.isByok ? null : (turnCost || session.totalCost),
778
810
  durationS: data?.duration_s,
779
811
  testsPass: data?.tests_passed != null
780
812
  ? { passed: data.tests_passed, total: data.tests_total || data.tests_passed }
@@ -873,7 +905,11 @@ async function handleCommand(input, ctx) {
873
905
  process.stderr.write(` ${c.dim('Turns')} ${session.turns}\n`);
874
906
  process.stderr.write(` ${c.dim('Tools')} ${session.totalToolCalls} total, ${session.toolCalls} last turn\n`);
875
907
  process.stderr.write(` ${c.dim('Duration')} ${formatElapsed(session.startTime)}\n`);
876
- process.stderr.write(` ${c.dim('Credits')} ${formatCredits(costToCredits(session.totalCost))}${session.costAccurate ? '' : c.dim(' (est)')}\n`);
908
+ if (session.isByok) {
909
+ process.stderr.write(` ${c.dim('Billing')} ${c.green('BYOK')} ${c.dim('(provider-billed)')}\n`);
910
+ } else {
911
+ process.stderr.write(` ${c.dim('Credits')} ${formatCredits(costToCredits(session.totalCost))}${session.costAccurate ? '' : c.dim(' (est)')}\n`);
912
+ }
877
913
  process.stderr.write(` ${c.dim('CWD')} ${safeCwd()}\n`);
878
914
 
879
915
  // Permissions
@@ -941,12 +977,20 @@ async function handleCommand(input, ctx) {
941
977
  process.stderr.write(` ${c.gray('Turns:')} ${session.turns}\n`);
942
978
  process.stderr.write(` ${c.gray('Tools:')} ${session.toolCalls}\n`);
943
979
  process.stderr.write(` ${c.gray('Blocked:')} ${session.blockedOps}\n`);
944
- process.stderr.write(` ${c.gray('Credits:')} ${formatCredits(costToCredits(session.totalCost))}${session.costAccurate ? '' : c.dim(' (est)')}\n`);
980
+ if (session.isByok) {
981
+ process.stderr.write(` ${c.gray('Billing:')} ${c.green('BYOK')} ${c.dim('(provider-billed)')}\n`);
982
+ } else {
983
+ process.stderr.write(` ${c.gray('Credits:')} ${formatCredits(costToCredits(session.totalCost))}${session.costAccurate ? '' : c.dim(' (est)')}\n`);
984
+ }
945
985
  process.stderr.write(` ${c.gray('Elapsed:')} ${formatElapsed(session.startTime)}\n\n`);
946
986
  return;
947
987
  }
948
988
 
949
989
  case '/cost': {
990
+ if (session.isByok) {
991
+ process.stderr.write(`\n ${c.bold('Billing')} ${c.green('BYOK')} ${c.dim('— you pay your model provider directly. Kepler does not charge credits for BYOK usage.')}\n\n`);
992
+ return;
993
+ }
950
994
  process.stderr.write(`\n ${c.bold('Session Credits')} ${c.brand(formatCredits(costToCredits(session.totalCost)))}`);
951
995
  if (!session.costAccurate) {
952
996
  process.stderr.write(` ${c.yellow('(estimated)')}`);
@@ -1065,8 +1109,8 @@ async function handleCommand(input, ctx) {
1065
1109
  success: true,
1066
1110
  filesChanged: session.filesChanged,
1067
1111
  toolCounts: session.toolCounts,
1068
- subAgents: { ...session.subAgentCounts, savedUsd: session.savedUsd },
1069
- costUsd: session.totalCost,
1112
+ subAgents: { ...session.subAgentCounts, savedUsd: session.isByok ? 0 : session.savedUsd },
1113
+ costUsd: session.isByok ? null : session.totalCost,
1070
1114
  durationS: (Date.now() - session.startTime) / 1000,
1071
1115
  nextActions: ['/commit', '/pr', '/undo'],
1072
1116
  };
@@ -1378,19 +1422,9 @@ export async function startTerminalRepl() {
1378
1422
 
1379
1423
  const ctx = { auth, toolExecutor, approval, jsonlWriter, sessionMgr, checkpoints };
1380
1424
 
1381
- // ── Mission Control orbit + status bar ──
1382
- // Opt-out via KEPLER_STATUS_BAR=0 (debugging) or KEPLER_PLAIN=1 (PRD-055).
1383
- // status-bar.mjs already no-ops when stdout is not a TTY, but the explicit
1384
- // env opt-out is useful for debugging escape-sequence noise.
1385
- const statusBarEnabled = process.env.KEPLER_STATUS_BAR !== '0' && term().isTTY && !term().plain;
1386
- if (statusBarEnabled) {
1387
- _orbit = createOrbit();
1388
- attachOrbit(_orbit);
1389
- // Always unmount before exit so the terminal scroll region is restored.
1390
- process.on('beforeExit', unmountStatusBar);
1391
- process.on('exit', unmountStatusBar);
1392
- }
1393
-
1425
+ // ── Print banner + preflight + init BEFORE mounting the status bar ──
1426
+ // The status bar shrinks the scroll region; if it mounts first, the
1427
+ // banner scrolls off-screen before the user ever sees it.
1394
1428
  printBanner(auth);
1395
1429
 
1396
1430
  // Preflight diagnostic (PRD-055 §9). Non-blocking; opt-out via
@@ -1437,12 +1471,41 @@ export async function startTerminalRepl() {
1437
1471
 
1438
1472
  process.stderr.write(`\n ${c.dim('Press')} ${c.brand('Enter')} ${c.dim('to start, or type a prompt below.')}\n`);
1439
1473
 
1440
- const PROMPT = `${c.brand('kepler')} ${c.dim('›')} `;
1474
+ // Mission Control status bar is OPT-IN as of v2.0.1.
1475
+ // Set KEPLER_STATUS_BAR=1 (or KEPLER_MISSION=1) to enable the persistent
1476
+ // bottom-anchored ORBIT bar. Default off because the DECSTBM scroll
1477
+ // region was eating the prompt visibility on some terminals (issue
1478
+ // observed during v2.0.0 testing). The orbit state machine and tool
1479
+ // cards still work without the bar — the bar is just the rendering.
1480
+ const statusBarEnabled = (
1481
+ process.env.KEPLER_STATUS_BAR === '1' || process.env.KEPLER_MISSION === '1'
1482
+ ) && term().isTTY && !term().plain;
1483
+ if (statusBarEnabled) {
1484
+ _orbit = createOrbit();
1485
+ attachOrbit(_orbit);
1486
+ process.on('beforeExit', unmountStatusBar);
1487
+ process.on('exit', unmountStatusBar);
1488
+ }
1489
+
1490
+ // The prompt label is the USER speaking, not the agent. Use the signed-in
1491
+ // GitHub handle if known, otherwise fall back to "You".
1492
+ //
1493
+ // readline counts every byte of the prompt as a visible column when it
1494
+ // computes cursor position for line-wrapping; ANSI color codes throw the
1495
+ // math off and produce duplicated text on wrap. Wrap each escape sequence
1496
+ // in SOH (\x01) ... STX (\x02) so readline skips it when measuring width.
1497
+ function rlSafe(s) {
1498
+ return String(s || '').replace(/\x1b\[[0-9;]*m/g, '\x01$&\x02');
1499
+ }
1500
+ function userPrompt() {
1501
+ const who = session.user?.github_username || session.user?.email?.split('@')[0] || 'You';
1502
+ return rlSafe(`${c.brand(who)} ${c.dim('›')} `);
1503
+ }
1441
1504
 
1442
1505
  const rl = readline.createInterface({
1443
1506
  input: process.stdin,
1444
1507
  output: process.stderr,
1445
- prompt: PROMPT,
1508
+ prompt: userPrompt(),
1446
1509
  completer: (line) => {
1447
1510
  if (line.startsWith('/')) {
1448
1511
  const hits = Object.keys(COMMANDS).filter(cmd => cmd.startsWith(line));
@@ -1461,6 +1524,7 @@ export async function startTerminalRepl() {
1461
1524
  function showPrompt() {
1462
1525
  printPromptBlock();
1463
1526
  process.stderr.write('\n'); // half-inch vertical gap above input line
1527
+ rl.setPrompt(userPrompt()); // refresh label in case session.user resolved
1464
1528
  rl.prompt();
1465
1529
  }
1466
1530
 
@@ -1555,7 +1619,10 @@ export async function startTerminalRepl() {
1555
1619
  // Esc key (single byte 0x1b, not part of arrow sequence)
1556
1620
  if (bytes.length === 1 && bytes[0] === 0x1b) {
1557
1621
  stopSpinner();
1558
- process.stderr.write(`\n ${c.yellow('⏹')} ${c.dim('Cancelling...')}\n`);
1622
+ process.stderr.write(`\n ${c.yellow('⏹')} ${c.dim('Cancelled.')}\n`);
1623
+ // cancel() now aborts the in-flight SSE reader; the for-await loop
1624
+ // wakes up immediately and the prompt returns. No more "stuck"
1625
+ // Cancelling… message.
1559
1626
  client.cancel();
1560
1627
  return;
1561
1628
  }
@@ -8,6 +8,35 @@ import { buildProjectSkeleton } from '../context/skeleton.mjs';
8
8
  import { indexDir as getIndexDir } from '../core/paths.mjs';
9
9
 
10
10
  const RESOURCE_FILE = 'project-resource.json';
11
+
12
+ /**
13
+ * Expand "~" and trim surrounding quotes/whitespace. Does NOT unescape shell
14
+ * meta characters — that is a separate, last-resort step done only if the
15
+ * literal path does not resolve.
16
+ */
17
+ function normalizePathInput(p) {
18
+ let s = String(p || '').trim();
19
+ // Trim balanced surrounding quotes.
20
+ if ((s.startsWith('"') && s.endsWith('"')) ||
21
+ (s.startsWith("'") && s.endsWith("'"))) {
22
+ s = s.slice(1, -1);
23
+ }
24
+ // Tilde expansion (~ or ~/...).
25
+ if (s === '~' || s.startsWith('~/')) {
26
+ s = path.join(os.homedir(), s.slice(1));
27
+ }
28
+ return s;
29
+ }
30
+
31
+ /**
32
+ * Replace common shell escape sequences with their literal characters. Used
33
+ * as a fallback when the literal path does not resolve — the agent may have
34
+ * pasted a copy of what they would type at a shell prompt.
35
+ */
36
+ function unescapeShellPath(p) {
37
+ return String(p || '').replace(/\\([ \t()&$;'"])/g, '$1');
38
+ }
39
+
11
40
  const LANGUAGE_EXTENSIONS = new Map([
12
41
  ['.py', 'Python'],
13
42
  ['.js', 'JavaScript'],
@@ -280,6 +309,12 @@ export class ProjectRegistry {
280
309
  if (!rawPath) {
281
310
  throw new Error('get_project_overview requires a project path');
282
311
  }
312
+
313
+ // LLM sometimes passes shell-escaped paths ("Tarang\ Orca") or paths
314
+ // beginning with "~". Normalize defensively so the tool does not bounce
315
+ // back a "not found" error on a path that's correct apart from quoting.
316
+ rawPath = normalizePathInput(rawPath);
317
+
283
318
  if (!path.isAbsolute(rawPath)) {
284
319
  rawPath = path.resolve(process.cwd(), rawPath);
285
320
  }
@@ -288,7 +323,15 @@ export class ProjectRegistry {
288
323
  try {
289
324
  root = fs.realpathSync(rawPath);
290
325
  } catch {
291
- throw new Error(`Project path not found: ${rawPath}`);
326
+ // Try the unescaped variant explicitly so the error message can
327
+ // tell the agent what it actually attempted.
328
+ const unescaped = unescapeShellPath(rawPath);
329
+ if (unescaped !== rawPath) {
330
+ try { root = fs.realpathSync(unescaped); }
331
+ catch { throw new Error(`Project path not found: ${rawPath} (also tried ${unescaped})`); }
332
+ } else {
333
+ throw new Error(`Project path not found: ${rawPath}`);
334
+ }
292
335
  }
293
336
  if (!fs.statSync(root).isDirectory()) {
294
337
  throw new Error(`Project path is not a directory: ${root}`);
@@ -377,25 +420,64 @@ export class ProjectRegistry {
377
420
  if (!rawPath) {
378
421
  if (root) return root;
379
422
  if (this.projects.size === 1) return this.resources()[0].root;
380
- throw new Error('Path requires project_id when multiple or no projects are registered');
423
+ // Fall back to the first registered project when the model omits
424
+ // both path and project_id. Beats throwing on an inferable case.
425
+ const first = this.resources()[0];
426
+ if (first) return first.root;
427
+ throw new Error('No projects registered. Call get_project_overview first.');
381
428
  }
382
429
 
383
- let candidate;
384
- if (path.isAbsolute(rawPath)) {
385
- candidate = canonicalizeCandidate(path.resolve(rawPath));
386
- } else {
430
+ // LLM frequently passes shell-quoted paths copied from a terminal,
431
+ // e.g. "Tarang\ Orca/src/app/\(kepler\)/page.tsx". Normalize here so
432
+ // every tool benefits, not just get_project_overview.
433
+ rawPath = normalizePathInput(rawPath);
434
+
435
+ const buildCandidate = (input) => {
436
+ if (path.isAbsolute(input)) {
437
+ return canonicalizeCandidate(path.resolve(input));
438
+ }
387
439
  if (!root) {
388
- if (this.projects.size !== 1) {
389
- throw new Error('Relative path requires project_id when multiple or no projects are registered');
440
+ if (this.projects.size === 1) {
441
+ return canonicalizeCandidate(path.resolve(this.resources()[0].root, input));
390
442
  }
391
- root = this.resources()[0].root;
443
+ if (this.projects.size > 1) {
444
+ throw new Error('Relative path requires project_id when multiple projects are registered. Pass project_id or use an absolute path.');
445
+ }
446
+ throw new Error('No projects registered. Call get_project_overview first.');
392
447
  }
393
- candidate = canonicalizeCandidate(path.resolve(root, rawPath));
394
- }
448
+ return canonicalizeCandidate(path.resolve(root, input));
449
+ };
395
450
 
396
- const containingProject = [...this.projects.values()].find(({ resource }) =>
397
- isWithin(resource.root, candidate)
451
+ let candidate = buildCandidate(rawPath);
452
+
453
+ const findContaining = (cand) => [...this.projects.values()].find(({ resource }) =>
454
+ isWithin(resource.root, cand)
398
455
  );
456
+
457
+ let containingProject = findContaining(candidate);
458
+
459
+ // Two reasons to try the unescaped variant:
460
+ // (1) candidate is outside every project root (literal "Tarang\ Orca"
461
+ // does not contain a real project), or
462
+ // (2) candidate is inside a root but does not exist on disk because
463
+ // a path segment like "\(kepler\)" only resolves once unescaped.
464
+ // We retry once on the unescaped form before raising.
465
+ const needsRetry = !containingProject ||
466
+ (!allowMissing && !fs.existsSync(candidate));
467
+ if (needsRetry) {
468
+ const unescaped = unescapeShellPath(rawPath);
469
+ if (unescaped !== rawPath) {
470
+ try {
471
+ const altCandidate = buildCandidate(unescaped);
472
+ const altProject = findContaining(altCandidate);
473
+ if (altProject && (allowMissing || fs.existsSync(altCandidate))) {
474
+ candidate = altCandidate;
475
+ containingProject = altProject;
476
+ }
477
+ } catch { /* fall through to the original error */ }
478
+ }
479
+ }
480
+
399
481
  if (!containingProject) {
400
482
  throw new Error(`Path is outside registered project roots: ${rawPath}`);
401
483
  }
@@ -406,10 +488,21 @@ export class ProjectRegistry {
406
488
  }
407
489
 
408
490
  projectForPath(filePath) {
409
- const candidate = canonicalizeCandidate(path.resolve(filePath));
410
- return [...this.projects.values()].find(({ resource }) =>
491
+ const normalized = normalizePathInput(filePath);
492
+ const candidate = canonicalizeCandidate(path.resolve(normalized));
493
+ const direct = [...this.projects.values()].find(({ resource }) =>
411
494
  isWithin(resource.root, candidate)
412
- ) || null;
495
+ );
496
+ if (direct) return direct;
497
+ // Same unescape fallback used in resolvePath.
498
+ const unescaped = unescapeShellPath(normalized);
499
+ if (unescaped !== normalized) {
500
+ const altCandidate = canonicalizeCandidate(path.resolve(unescaped));
501
+ return [...this.projects.values()].find(({ resource }) =>
502
+ isWithin(resource.root, altCandidate)
503
+ ) || null;
504
+ }
505
+ return null;
413
506
  }
414
507
 
415
508
  reset() {
@@ -129,12 +129,20 @@ export function summarizeResult(tool, data) {
129
129
  return { text: head || 'ok', tone: 'success' };
130
130
  }
131
131
 
132
+ case 'analyze_code': {
133
+ // Backend returns "filename (N lines, ext)" — the filename already
134
+ // appears in the card head, so strip it and keep just the metadata.
135
+ const head = firstOutputLine(data);
136
+ const m = head.match(/\((\d+)\s+lines?,?\s+([^)]+)\)/);
137
+ if (m) return { text: `${m[1]} lines · ${m[2].trim()}`, tone: 'success' };
138
+ return { text: head.slice(0, 80) || 'done', tone: 'success' };
139
+ }
140
+
132
141
  case 'plan':
133
142
  case 'explore':
134
143
  case 'verify':
135
144
  case 'debug':
136
- case 'refactor':
137
- case 'analyze_code': {
145
+ case 'refactor': {
138
146
  const head = firstOutputLine(data).slice(0, 100);
139
147
  return { text: head || 'done', tone: 'success' };
140
148
  }