@axplusb/kepler 1.0.10 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -2
- package/pulse/app/api/benchmark/route.ts +113 -0
- package/pulse/app/api/benchmarks/route.ts +195 -0
- package/pulse/app/benchmarks/page.tsx +224 -0
- package/pulse/components/layout/bottom-nav.tsx +2 -1
- package/pulse/components/layout/sidebar.tsx +2 -1
- package/src/context/retriever.mjs +42 -4
- package/src/context/symbol-indexer.mjs +375 -0
- package/src/core/approval.mjs +154 -95
- package/src/core/backend-url.mjs +2 -2
- package/src/core/headless.mjs +5 -0
- package/src/core/risk-tier.mjs +245 -0
- package/src/core/stream-client.mjs +24 -1
- package/src/core/tool-executor.mjs +58 -5
- package/src/onboarding/preflight.mjs +292 -0
- package/src/state/orbit.mjs +263 -0
- package/src/state/verbosity.mjs +99 -0
- package/src/terminal/ansi.mjs +44 -22
- package/src/terminal/repl.mjs +487 -133
- package/src/tools/project-overview.mjs +109 -16
- package/src/ui/approval.mjs +167 -0
- package/src/ui/banner.mjs +133 -122
- package/src/ui/dock.mjs +88 -0
- package/src/ui/icons.mjs +164 -0
- package/src/ui/mission-report.mjs +264 -0
- package/src/ui/palette.mjs +189 -0
- package/src/ui/spinner.mjs +116 -0
- package/src/ui/status-bar.mjs +275 -0
- package/src/ui/sub-agent.mjs +152 -0
- package/src/ui/term.mjs +159 -0
- package/src/ui/tool-card.mjs +322 -0
- package/src/ui/tool-details.mjs +277 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@axplusb/kepler",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.2",
|
|
4
4
|
"description": "Kepler — AI coding agent with operating brief, preflight planning, and sub-agents. SWE-bench Lite evaluated.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -40,5 +40,8 @@
|
|
|
40
40
|
"type": "git",
|
|
41
41
|
"url": "git+https://github.com/raviakasapu/codekepler-npm.git"
|
|
42
42
|
},
|
|
43
|
-
"dependencies": {
|
|
43
|
+
"dependencies": {
|
|
44
|
+
"tree-sitter-wasms": "^0.1.13",
|
|
45
|
+
"web-tree-sitter": "^0.26.9"
|
|
46
|
+
}
|
|
44
47
|
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import { NextResponse } from 'next/server'
|
|
2
|
+
import { readFileSync } from 'fs'
|
|
3
|
+
import { join } from 'path'
|
|
4
|
+
|
|
5
|
+
export const dynamic = 'force-dynamic'
|
|
6
|
+
|
|
7
|
+
interface BenchmarkResult {
|
|
8
|
+
instance_id: string
|
|
9
|
+
repo: string
|
|
10
|
+
base_commit: string
|
|
11
|
+
test_patch: string
|
|
12
|
+
resolved: boolean
|
|
13
|
+
test_result: {
|
|
14
|
+
result: string[]
|
|
15
|
+
exit_code: number
|
|
16
|
+
}
|
|
17
|
+
metadata: {
|
|
18
|
+
agent_class: string
|
|
19
|
+
model_name: string
|
|
20
|
+
max_iterations: number
|
|
21
|
+
eval_history: Array<{
|
|
22
|
+
timestamp: string
|
|
23
|
+
action: string
|
|
24
|
+
observation: string
|
|
25
|
+
}>
|
|
26
|
+
submission: string
|
|
27
|
+
instance_id: string
|
|
28
|
+
predict_output: string
|
|
29
|
+
model_patch: string
|
|
30
|
+
test_result: {
|
|
31
|
+
result: string[]
|
|
32
|
+
exit_code: number
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
interface BenchmarkData {
|
|
38
|
+
results: BenchmarkResult[]
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export async function GET(request: Request) {
|
|
42
|
+
try {
|
|
43
|
+
const { searchParams } = new URL(request.url)
|
|
44
|
+
const run = searchParams.get('run') || 'swebench-v4-flash-300'
|
|
45
|
+
const limit = parseInt(searchParams.get('limit') || '50', 10)
|
|
46
|
+
const offset = parseInt(searchParams.get('offset') || '0', 10)
|
|
47
|
+
|
|
48
|
+
// Load benchmark results from file
|
|
49
|
+
const resultsPath = join(
|
|
50
|
+
process.cwd(),
|
|
51
|
+
'..',
|
|
52
|
+
'benchmark',
|
|
53
|
+
'results',
|
|
54
|
+
'runs',
|
|
55
|
+
run,
|
|
56
|
+
'harness-results.json'
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
let data: BenchmarkData
|
|
60
|
+
try {
|
|
61
|
+
const fileContent = readFileSync(resultsPath, 'utf-8')
|
|
62
|
+
data = JSON.parse(fileContent)
|
|
63
|
+
} catch (error) {
|
|
64
|
+
return NextResponse.json(
|
|
65
|
+
{ error: `Benchmark run "${run}" not found` },
|
|
66
|
+
{ status: 404 }
|
|
67
|
+
)
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Calculate statistics
|
|
71
|
+
const results = data.results || []
|
|
72
|
+
const totalTests = results.length
|
|
73
|
+
const resolvedTests = results.filter((r) => r.resolved).length
|
|
74
|
+
const passRate = totalTests > 0 ? (resolvedTests / totalTests) * 100 : 0
|
|
75
|
+
|
|
76
|
+
// Group by repo
|
|
77
|
+
const byRepo = new Map<string, number>()
|
|
78
|
+
const byRepoResolved = new Map<string, number>()
|
|
79
|
+
for (const result of results) {
|
|
80
|
+
const repo = result.repo || 'unknown'
|
|
81
|
+
byRepo.set(repo, (byRepo.get(repo) || 0) + 1)
|
|
82
|
+
if (result.resolved) {
|
|
83
|
+
byRepoResolved.set(repo, (byRepoResolved.get(repo) || 0) + 1)
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Paginate results
|
|
88
|
+
const paginatedResults = results.slice(offset, offset + limit)
|
|
89
|
+
|
|
90
|
+
return NextResponse.json({
|
|
91
|
+
run,
|
|
92
|
+
stats: {
|
|
93
|
+
totalTests,
|
|
94
|
+
resolvedTests,
|
|
95
|
+
passRate: parseFloat(passRate.toFixed(2)),
|
|
96
|
+
byRepo: Object.fromEntries(byRepo),
|
|
97
|
+
byRepoResolved: Object.fromEntries(byRepoResolved),
|
|
98
|
+
},
|
|
99
|
+
pagination: {
|
|
100
|
+
limit,
|
|
101
|
+
offset,
|
|
102
|
+
total: totalTests,
|
|
103
|
+
},
|
|
104
|
+
results: paginatedResults,
|
|
105
|
+
})
|
|
106
|
+
} catch (error) {
|
|
107
|
+
console.error('Benchmark API error:', error)
|
|
108
|
+
return NextResponse.json(
|
|
109
|
+
{ error: 'Failed to load benchmark data' },
|
|
110
|
+
{ status: 500 }
|
|
111
|
+
)
|
|
112
|
+
}
|
|
113
|
+
}
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
import { NextResponse } from 'next/server'
|
|
2
|
+
import fs from 'fs'
|
|
3
|
+
import path from 'path'
|
|
4
|
+
|
|
5
|
+
export const dynamic = 'force-dynamic'
|
|
6
|
+
|
|
7
|
+
interface BenchmarkResult {
|
|
8
|
+
instance_id: string
|
|
9
|
+
repo: string
|
|
10
|
+
model: string
|
|
11
|
+
timestamp: string
|
|
12
|
+
kepler: {
|
|
13
|
+
status: string
|
|
14
|
+
exit_code: number
|
|
15
|
+
duration_seconds: number
|
|
16
|
+
tokens_used: number
|
|
17
|
+
cost: number
|
|
18
|
+
tool_calls: number
|
|
19
|
+
sub_agents: string[]
|
|
20
|
+
}
|
|
21
|
+
patch_lines: number
|
|
22
|
+
model_patch: string
|
|
23
|
+
status: string
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
interface BenchmarkStats {
|
|
27
|
+
total_runs: number
|
|
28
|
+
passed: number
|
|
29
|
+
failed: number
|
|
30
|
+
error: number
|
|
31
|
+
success_rate: number
|
|
32
|
+
avg_duration: number
|
|
33
|
+
total_cost: number
|
|
34
|
+
total_tokens: number
|
|
35
|
+
avg_tokens_per_run: number
|
|
36
|
+
by_status: Record<string, number>
|
|
37
|
+
by_repo: Record<string, { count: number; passed: number; success_rate: number }>
|
|
38
|
+
by_model: Record<string, { count: number; passed: number; success_rate: number }>
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async function loadBenchmarkResults(): Promise<BenchmarkResult[]> {
|
|
42
|
+
try {
|
|
43
|
+
const resultsPath = path.join(
|
|
44
|
+
process.cwd(),
|
|
45
|
+
'benchmark/results/runs/swebench-v4-flash-300/harness-results.json'
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if (!fs.existsSync(resultsPath)) {
|
|
49
|
+
return []
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const data = JSON.parse(fs.readFileSync(resultsPath, 'utf-8'))
|
|
53
|
+
return data.results || []
|
|
54
|
+
} catch (error) {
|
|
55
|
+
console.error('Error loading benchmark results:', error)
|
|
56
|
+
return []
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function calculateStats(results: BenchmarkResult[]): BenchmarkStats {
|
|
61
|
+
if (results.length === 0) {
|
|
62
|
+
return {
|
|
63
|
+
total_runs: 0,
|
|
64
|
+
passed: 0,
|
|
65
|
+
failed: 0,
|
|
66
|
+
error: 0,
|
|
67
|
+
success_rate: 0,
|
|
68
|
+
avg_duration: 0,
|
|
69
|
+
total_cost: 0,
|
|
70
|
+
total_tokens: 0,
|
|
71
|
+
avg_tokens_per_run: 0,
|
|
72
|
+
by_status: {},
|
|
73
|
+
by_repo: {},
|
|
74
|
+
by_model: {},
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const by_status: Record<string, number> = {}
|
|
79
|
+
const by_repo: Record<string, { count: number; passed: number }> = {}
|
|
80
|
+
const by_model: Record<string, { count: number; passed: number }> = {}
|
|
81
|
+
|
|
82
|
+
let total_cost = 0
|
|
83
|
+
let total_tokens = 0
|
|
84
|
+
let total_duration = 0
|
|
85
|
+
let passed = 0
|
|
86
|
+
|
|
87
|
+
results.forEach((result) => {
|
|
88
|
+
// Count by status
|
|
89
|
+
by_status[result.status] = (by_status[result.status] || 0) + 1
|
|
90
|
+
|
|
91
|
+
// Count by repo
|
|
92
|
+
if (!by_repo[result.repo]) {
|
|
93
|
+
by_repo[result.repo] = { count: 0, passed: 0 }
|
|
94
|
+
}
|
|
95
|
+
by_repo[result.repo].count++
|
|
96
|
+
|
|
97
|
+
// Count by model
|
|
98
|
+
if (!by_model[result.model]) {
|
|
99
|
+
by_model[result.model] = { count: 0, passed: 0 }
|
|
100
|
+
}
|
|
101
|
+
by_model[result.model].count++
|
|
102
|
+
|
|
103
|
+
// Aggregate metrics
|
|
104
|
+
if (result.kepler) {
|
|
105
|
+
total_cost += result.kepler.cost || 0
|
|
106
|
+
total_tokens += result.kepler.tokens_used || 0
|
|
107
|
+
total_duration += result.kepler.duration_seconds || 0
|
|
108
|
+
|
|
109
|
+
if (result.kepler.status === 'success') {
|
|
110
|
+
passed++
|
|
111
|
+
by_repo[result.repo].passed++
|
|
112
|
+
by_model[result.model].passed++
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
// Calculate success rates
|
|
118
|
+
const by_repo_with_rates = Object.entries(by_repo).reduce(
|
|
119
|
+
(acc, [repo, data]) => {
|
|
120
|
+
acc[repo] = {
|
|
121
|
+
...data,
|
|
122
|
+
success_rate: data.count > 0 ? (data.passed / data.count) * 100 : 0,
|
|
123
|
+
}
|
|
124
|
+
return acc
|
|
125
|
+
},
|
|
126
|
+
{} as Record<string, { count: number; passed: number; success_rate: number }>
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
const by_model_with_rates = Object.entries(by_model).reduce(
|
|
130
|
+
(acc, [model, data]) => {
|
|
131
|
+
acc[model] = {
|
|
132
|
+
...data,
|
|
133
|
+
success_rate: data.count > 0 ? (data.passed / data.count) * 100 : 0,
|
|
134
|
+
}
|
|
135
|
+
return acc
|
|
136
|
+
},
|
|
137
|
+
{} as Record<string, { count: number; passed: number; success_rate: number }>
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
return {
|
|
141
|
+
total_runs: results.length,
|
|
142
|
+
passed,
|
|
143
|
+
failed: by_status['failed'] || 0,
|
|
144
|
+
error: by_status['error'] || 0,
|
|
145
|
+
success_rate: (passed / results.length) * 100,
|
|
146
|
+
avg_duration: total_duration / results.length,
|
|
147
|
+
total_cost,
|
|
148
|
+
total_tokens,
|
|
149
|
+
avg_tokens_per_run: total_tokens / results.length,
|
|
150
|
+
by_status,
|
|
151
|
+
by_repo: by_repo_with_rates,
|
|
152
|
+
by_model: by_model_with_rates,
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export async function GET(request: Request) {
|
|
157
|
+
const { searchParams } = new URL(request.url)
|
|
158
|
+
const format = searchParams.get('format') || 'summary'
|
|
159
|
+
const repo = searchParams.get('repo')
|
|
160
|
+
const model = searchParams.get('model')
|
|
161
|
+
const status = searchParams.get('status')
|
|
162
|
+
|
|
163
|
+
const results = await loadBenchmarkResults()
|
|
164
|
+
|
|
165
|
+
// Filter results
|
|
166
|
+
let filtered = results
|
|
167
|
+
if (repo) {
|
|
168
|
+
filtered = filtered.filter((r) => r.repo === repo)
|
|
169
|
+
}
|
|
170
|
+
if (model) {
|
|
171
|
+
filtered = filtered.filter((r) => r.model === model)
|
|
172
|
+
}
|
|
173
|
+
if (status) {
|
|
174
|
+
filtered = filtered.filter((r) => r.status === status)
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (format === 'detailed') {
|
|
178
|
+
return NextResponse.json({
|
|
179
|
+
results: filtered,
|
|
180
|
+
count: filtered.length,
|
|
181
|
+
})
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Default: summary format
|
|
185
|
+
const stats = calculateStats(filtered)
|
|
186
|
+
|
|
187
|
+
return NextResponse.json({
|
|
188
|
+
stats,
|
|
189
|
+
filters: {
|
|
190
|
+
repo: repo || null,
|
|
191
|
+
model: model || null,
|
|
192
|
+
status: status || null,
|
|
193
|
+
},
|
|
194
|
+
})
|
|
195
|
+
}
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
'use client'
|
|
2
|
+
|
|
3
|
+
import { useEffect, useState } from 'react'
|
|
4
|
+
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'
|
|
5
|
+
import { Badge } from '@/components/ui/badge'
|
|
6
|
+
|
|
7
|
+
interface BenchmarkStats {
|
|
8
|
+
total_runs: number
|
|
9
|
+
passed: number
|
|
10
|
+
failed: number
|
|
11
|
+
error: number
|
|
12
|
+
success_rate: number
|
|
13
|
+
avg_duration: number
|
|
14
|
+
total_cost: number
|
|
15
|
+
total_tokens: number
|
|
16
|
+
avg_tokens_per_run: number
|
|
17
|
+
by_status: Record<string, number>
|
|
18
|
+
by_repo: Record<string, { count: number; passed: number; success_rate: number }>
|
|
19
|
+
by_model: Record<string, { count: number; passed: number; success_rate: number }>
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
interface BenchmarkResponse {
|
|
23
|
+
stats: BenchmarkStats
|
|
24
|
+
filters: {
|
|
25
|
+
repo: string | null
|
|
26
|
+
model: string | null
|
|
27
|
+
status: string | null
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export default function BenchmarksPage() {
|
|
32
|
+
const [data, setData] = useState<BenchmarkResponse | null>(null)
|
|
33
|
+
const [loading, setLoading] = useState(true)
|
|
34
|
+
const [error, setError] = useState<string | null>(null)
|
|
35
|
+
|
|
36
|
+
useEffect(() => {
|
|
37
|
+
const fetchBenchmarks = async () => {
|
|
38
|
+
try {
|
|
39
|
+
const response = await fetch('/api/benchmarks')
|
|
40
|
+
if (!response.ok) {
|
|
41
|
+
throw new Error('Failed to fetch benchmarks')
|
|
42
|
+
}
|
|
43
|
+
const json = await response.json()
|
|
44
|
+
setData(json)
|
|
45
|
+
} catch (err) {
|
|
46
|
+
setError(err instanceof Error ? err.message : 'Unknown error')
|
|
47
|
+
} finally {
|
|
48
|
+
setLoading(false)
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
fetchBenchmarks()
|
|
53
|
+
}, [])
|
|
54
|
+
|
|
55
|
+
if (loading) {
|
|
56
|
+
return (
|
|
57
|
+
<div className="flex items-center justify-center min-h-screen">
|
|
58
|
+
<p className="text-muted-foreground">Loading benchmarks...</p>
|
|
59
|
+
</div>
|
|
60
|
+
)
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (error) {
|
|
64
|
+
return (
|
|
65
|
+
<div className="flex items-center justify-center min-h-screen">
|
|
66
|
+
<p className="text-destructive">Error: {error}</p>
|
|
67
|
+
</div>
|
|
68
|
+
)
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (!data) {
|
|
72
|
+
return (
|
|
73
|
+
<div className="flex items-center justify-center min-h-screen">
|
|
74
|
+
<p className="text-muted-foreground">No benchmark data available</p>
|
|
75
|
+
</div>
|
|
76
|
+
)
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const stats = data.stats
|
|
80
|
+
|
|
81
|
+
return (
|
|
82
|
+
<div className="space-y-6 p-6">
|
|
83
|
+
<div>
|
|
84
|
+
<h1 className="text-3xl font-bold tracking-tight">Benchmarks</h1>
|
|
85
|
+
<p className="text-muted-foreground mt-2">SWE-Bench v4 Flash 300 Results</p>
|
|
86
|
+
</div>
|
|
87
|
+
|
|
88
|
+
{/* Key Metrics */}
|
|
89
|
+
<div className="grid gap-4 md:grid-cols-2 lg:grid-cols-4">
|
|
90
|
+
<Card>
|
|
91
|
+
<CardHeader className="pb-2">
|
|
92
|
+
<CardTitle className="text-sm font-medium">Total Runs</CardTitle>
|
|
93
|
+
</CardHeader>
|
|
94
|
+
<CardContent>
|
|
95
|
+
<div className="text-2xl font-bold">{stats.total_runs}</div>
|
|
96
|
+
</CardContent>
|
|
97
|
+
</Card>
|
|
98
|
+
|
|
99
|
+
<Card>
|
|
100
|
+
<CardHeader className="pb-2">
|
|
101
|
+
<CardTitle className="text-sm font-medium">Success Rate</CardTitle>
|
|
102
|
+
</CardHeader>
|
|
103
|
+
<CardContent>
|
|
104
|
+
<div className="text-2xl font-bold">{stats.success_rate.toFixed(1)}%</div>
|
|
105
|
+
<p className="text-xs text-muted-foreground mt-1">
|
|
106
|
+
{stats.passed} passed, {stats.failed} failed
|
|
107
|
+
</p>
|
|
108
|
+
</CardContent>
|
|
109
|
+
</Card>
|
|
110
|
+
|
|
111
|
+
<Card>
|
|
112
|
+
<CardHeader className="pb-2">
|
|
113
|
+
<CardTitle className="text-sm font-medium">Total Cost</CardTitle>
|
|
114
|
+
</CardHeader>
|
|
115
|
+
<CardContent>
|
|
116
|
+
<div className="text-2xl font-bold">${stats.total_cost.toFixed(2)}</div>
|
|
117
|
+
<p className="text-xs text-muted-foreground mt-1">
|
|
118
|
+
{stats.avg_tokens_per_run.toFixed(0)} tokens/run
|
|
119
|
+
</p>
|
|
120
|
+
</CardContent>
|
|
121
|
+
</Card>
|
|
122
|
+
|
|
123
|
+
<Card>
|
|
124
|
+
<CardHeader className="pb-2">
|
|
125
|
+
<CardTitle className="text-sm font-medium">Avg Duration</CardTitle>
|
|
126
|
+
</CardHeader>
|
|
127
|
+
<CardContent>
|
|
128
|
+
<div className="text-2xl font-bold">{stats.avg_duration.toFixed(1)}s</div>
|
|
129
|
+
<p className="text-xs text-muted-foreground mt-1">
|
|
130
|
+
{(stats.total_tokens / 1000).toFixed(1)}K tokens total
|
|
131
|
+
</p>
|
|
132
|
+
</CardContent>
|
|
133
|
+
</Card>
|
|
134
|
+
</div>
|
|
135
|
+
|
|
136
|
+
{/* Status Breakdown */}
|
|
137
|
+
<Card>
|
|
138
|
+
<CardHeader>
|
|
139
|
+
<CardTitle>Status Breakdown</CardTitle>
|
|
140
|
+
<CardDescription>Distribution of run statuses</CardDescription>
|
|
141
|
+
</CardHeader>
|
|
142
|
+
<CardContent>
|
|
143
|
+
<div className="space-y-3">
|
|
144
|
+
{Object.entries(stats.by_status).map(([status, count]) => (
|
|
145
|
+
<div key={status} className="flex items-center justify-between">
|
|
146
|
+
<div className="flex items-center gap-2">
|
|
147
|
+
<Badge
|
|
148
|
+
variant={
|
|
149
|
+
status === 'success'
|
|
150
|
+
? 'default'
|
|
151
|
+
: status === 'failed'
|
|
152
|
+
? 'destructive'
|
|
153
|
+
: 'secondary'
|
|
154
|
+
}
|
|
155
|
+
>
|
|
156
|
+
{status}
|
|
157
|
+
</Badge>
|
|
158
|
+
<span className="text-sm text-muted-foreground">{count} runs</span>
|
|
159
|
+
</div>
|
|
160
|
+
<span className="text-sm font-medium">
|
|
161
|
+
{((count / stats.total_runs) * 100).toFixed(1)}%
|
|
162
|
+
</span>
|
|
163
|
+
</div>
|
|
164
|
+
))}
|
|
165
|
+
</div>
|
|
166
|
+
</CardContent>
|
|
167
|
+
</Card>
|
|
168
|
+
|
|
169
|
+
{/* By Repository */}
|
|
170
|
+
<Card>
|
|
171
|
+
<CardHeader>
|
|
172
|
+
<CardTitle>Performance by Repository</CardTitle>
|
|
173
|
+
<CardDescription>Success rate and run count per repository</CardDescription>
|
|
174
|
+
</CardHeader>
|
|
175
|
+
<CardContent>
|
|
176
|
+
<div className="space-y-4">
|
|
177
|
+
{Object.entries(stats.by_repo)
|
|
178
|
+
.sort((a, b) => b[1].count - a[1].count)
|
|
179
|
+
.map(([repo, data]) => (
|
|
180
|
+
<div key={repo} className="flex items-center justify-between border-b pb-3 last:border-0">
|
|
181
|
+
<div>
|
|
182
|
+
<p className="font-medium text-sm">{repo}</p>
|
|
183
|
+
<p className="text-xs text-muted-foreground">
|
|
184
|
+
{data.count} runs, {data.passed} passed
|
|
185
|
+
</p>
|
|
186
|
+
</div>
|
|
187
|
+
<div className="text-right">
|
|
188
|
+
<p className="font-bold text-sm">{data.success_rate.toFixed(1)}%</p>
|
|
189
|
+
</div>
|
|
190
|
+
</div>
|
|
191
|
+
))}
|
|
192
|
+
</div>
|
|
193
|
+
</CardContent>
|
|
194
|
+
</Card>
|
|
195
|
+
|
|
196
|
+
{/* By Model */}
|
|
197
|
+
<Card>
|
|
198
|
+
<CardHeader>
|
|
199
|
+
<CardTitle>Performance by Model</CardTitle>
|
|
200
|
+
<CardDescription>Success rate and run count per model</CardDescription>
|
|
201
|
+
</CardHeader>
|
|
202
|
+
<CardContent>
|
|
203
|
+
<div className="space-y-4">
|
|
204
|
+
{Object.entries(stats.by_model)
|
|
205
|
+
.sort((a, b) => b[1].count - a[1].count)
|
|
206
|
+
.map(([model, data]) => (
|
|
207
|
+
<div key={model} className="flex items-center justify-between border-b pb-3 last:border-0">
|
|
208
|
+
<div>
|
|
209
|
+
<p className="font-medium text-sm">{model}</p>
|
|
210
|
+
<p className="text-xs text-muted-foreground">
|
|
211
|
+
{data.count} runs, {data.passed} passed
|
|
212
|
+
</p>
|
|
213
|
+
</div>
|
|
214
|
+
<div className="text-right">
|
|
215
|
+
<p className="font-bold text-sm">{data.success_rate.toFixed(1)}%</p>
|
|
216
|
+
</div>
|
|
217
|
+
</div>
|
|
218
|
+
))}
|
|
219
|
+
</div>
|
|
220
|
+
</CardContent>
|
|
221
|
+
</Card>
|
|
222
|
+
</div>
|
|
223
|
+
)
|
|
224
|
+
}
|
|
@@ -4,7 +4,7 @@ import Link from 'next/link'
|
|
|
4
4
|
import { usePathname } from 'next/navigation'
|
|
5
5
|
import {
|
|
6
6
|
LayoutDashboard, MessageSquare, DollarSign,
|
|
7
|
-
FolderOpen, Activity, Moon, Sun,
|
|
7
|
+
FolderOpen, Activity, Moon, Sun, Zap,
|
|
8
8
|
} from 'lucide-react'
|
|
9
9
|
import { useTheme } from '@/components/theme-provider'
|
|
10
10
|
import { cn } from '@/lib/utils'
|
|
@@ -15,6 +15,7 @@ const NAV = [
|
|
|
15
15
|
{ href: '/costs', label: 'Costs', icon: DollarSign },
|
|
16
16
|
{ href: '/projects', label: 'Projects', icon: FolderOpen },
|
|
17
17
|
{ href: '/activity', label: 'Activity', icon: Activity },
|
|
18
|
+
{ href: '/benchmarks', label: 'Benchmarks', icon: Zap },
|
|
18
19
|
]
|
|
19
20
|
|
|
20
21
|
export function BottomNav() {
|
|
@@ -5,7 +5,7 @@ import { usePathname } from 'next/navigation'
|
|
|
5
5
|
import {
|
|
6
6
|
LayoutDashboard, FolderOpen, MessageSquare, DollarSign,
|
|
7
7
|
Wrench, Activity, History, CheckSquare, FileText,
|
|
8
|
-
Brain, Settings, Download, HelpCircle, Moon, Sun, PanelLeftClose, PanelLeft,
|
|
8
|
+
Brain, Settings, Download, HelpCircle, Moon, Sun, PanelLeftClose, PanelLeft, Zap,
|
|
9
9
|
} from 'lucide-react'
|
|
10
10
|
import { useTheme } from '@/components/theme-provider'
|
|
11
11
|
import { useSidebar } from '@/components/layout/sidebar-context'
|
|
@@ -24,6 +24,7 @@ const NAV = [
|
|
|
24
24
|
{ href: '/todos', label: 'Todos', icon: CheckSquare },
|
|
25
25
|
{ href: '/plans', label: 'Plans', icon: FileText },
|
|
26
26
|
{ href: '/memory', label: 'Memory', icon: Brain },
|
|
27
|
+
{ href: '/benchmarks', label: 'Benchmarks', icon: Zap },
|
|
27
28
|
{ href: '/settings', label: 'Settings', icon: Settings },
|
|
28
29
|
{ href: '/help', label: 'Help', icon: HelpCircle },
|
|
29
30
|
{ href: '/export', label: 'Export', icon: Download },
|
|
@@ -4,12 +4,14 @@
|
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
import { BM25Index } from './bm25.mjs';
|
|
7
|
+
import { SymbolIndexer } from './symbol-indexer.mjs';
|
|
7
8
|
import * as fs from 'node:fs';
|
|
8
9
|
import * as path from 'node:path';
|
|
9
10
|
import { indexDir as getIndexDir } from '../core/paths.mjs';
|
|
10
11
|
|
|
11
12
|
const IGNORED_DIRS = new Set(['.git', 'node_modules', '.kepler', '__pycache__', '.venv', 'venv', 'dist', 'build', '.next']);
|
|
12
13
|
const CODE_EXTS = new Set(['.js', '.mjs', '.ts', '.tsx', '.py', '.go', '.rs', '.java', '.rb', '.php', '.c', '.cpp', '.h', '.css', '.html', '.json', '.yaml', '.yml', '.toml', '.md', '.sh']);
|
|
14
|
+
const SYMBOL_EXTS = new Set(['.py', '.js', '.mjs', '.ts', '.tsx', '.jsx', '.go', '.rs']);
|
|
13
15
|
const MAX_FILE_SIZE = 100_000; // 100KB
|
|
14
16
|
const CHUNK_LINES = 50;
|
|
15
17
|
const CHUNK_OVERLAP = 10;
|
|
@@ -19,20 +21,33 @@ export class ContextRetriever {
|
|
|
19
21
|
this.projectDir = projectDir;
|
|
20
22
|
this.indexDir = getIndexDir(projectDir);
|
|
21
23
|
this.index = null;
|
|
24
|
+
this.symbolIndexer = null;
|
|
22
25
|
this.chunkTexts = new Map(); // id → original text content
|
|
23
26
|
}
|
|
24
27
|
|
|
25
|
-
/** Build or rebuild the search index. */
|
|
28
|
+
/** Build or rebuild the search index (BM25 chunks + symbol index). */
|
|
26
29
|
async buildIndex() {
|
|
27
30
|
const files = this._scanFiles(this.projectDir);
|
|
28
31
|
const documents = [];
|
|
29
32
|
|
|
33
|
+
// Symbol indexer for AST-based search
|
|
34
|
+
this.symbolIndexer = new SymbolIndexer();
|
|
35
|
+
await this.symbolIndexer.init();
|
|
36
|
+
|
|
30
37
|
for (const filePath of files) {
|
|
31
38
|
try {
|
|
32
39
|
const content = fs.readFileSync(filePath, 'utf-8');
|
|
33
40
|
const relPath = path.relative(this.projectDir, filePath);
|
|
41
|
+
|
|
42
|
+
// BM25 chunks (existing behavior)
|
|
34
43
|
const chunks = this._chunkFile(content, relPath);
|
|
35
44
|
documents.push(...chunks);
|
|
45
|
+
|
|
46
|
+
// Symbol extraction for code files
|
|
47
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
48
|
+
if (SYMBOL_EXTS.has(ext)) {
|
|
49
|
+
await this.symbolIndexer.indexFile(relPath, content);
|
|
50
|
+
}
|
|
36
51
|
} catch { /* skip unreadable files */ }
|
|
37
52
|
}
|
|
38
53
|
|
|
@@ -45,12 +60,13 @@ export class ContextRetriever {
|
|
|
45
60
|
this.chunkTexts.set(doc.id, doc.text);
|
|
46
61
|
}
|
|
47
62
|
|
|
48
|
-
// Persist
|
|
63
|
+
// Persist
|
|
49
64
|
if (!fs.existsSync(this.indexDir)) fs.mkdirSync(this.indexDir, { recursive: true });
|
|
50
65
|
fs.writeFileSync(path.join(this.indexDir, 'bm25.json'), JSON.stringify(this.index.toJSON()));
|
|
51
66
|
fs.writeFileSync(path.join(this.indexDir, 'chunks.json'), JSON.stringify(Object.fromEntries(this.chunkTexts)));
|
|
67
|
+
fs.writeFileSync(path.join(this.indexDir, 'symbols.json'), JSON.stringify(this.symbolIndexer.toJSON()));
|
|
52
68
|
|
|
53
|
-
return { fileCount: files.length, chunkCount: documents.length };
|
|
69
|
+
return { fileCount: files.length, chunkCount: documents.length, symbolCount: this.symbolIndexer.symbolCount };
|
|
54
70
|
}
|
|
55
71
|
|
|
56
72
|
/**
|
|
@@ -120,22 +136,44 @@ export class ContextRetriever {
|
|
|
120
136
|
loadIndex() {
|
|
121
137
|
const indexPath = path.join(this.indexDir, 'bm25.json');
|
|
122
138
|
const chunksPath = path.join(this.indexDir, 'chunks.json');
|
|
139
|
+
const symbolsPath = path.join(this.indexDir, 'symbols.json');
|
|
123
140
|
if (!fs.existsSync(indexPath)) return false;
|
|
124
141
|
try {
|
|
125
142
|
const data = JSON.parse(fs.readFileSync(indexPath, 'utf-8'));
|
|
126
143
|
this.index = BM25Index.fromJSON(data);
|
|
127
144
|
|
|
128
|
-
// Load chunk texts if available
|
|
129
145
|
if (fs.existsSync(chunksPath)) {
|
|
130
146
|
const chunks = JSON.parse(fs.readFileSync(chunksPath, 'utf-8'));
|
|
131
147
|
this.chunkTexts = new Map(Object.entries(chunks));
|
|
132
148
|
}
|
|
149
|
+
|
|
150
|
+
if (fs.existsSync(symbolsPath)) {
|
|
151
|
+
const symData = JSON.parse(fs.readFileSync(symbolsPath, 'utf-8'));
|
|
152
|
+
this.symbolIndexer = SymbolIndexer.fromJSON(symData);
|
|
153
|
+
}
|
|
133
154
|
return true;
|
|
134
155
|
} catch {
|
|
135
156
|
return false;
|
|
136
157
|
}
|
|
137
158
|
}
|
|
138
159
|
|
|
160
|
+
/**
|
|
161
|
+
* Search symbols (functions, classes, methods) by query.
|
|
162
|
+
* Returns structured results with file:line, signature, parent class.
|
|
163
|
+
*/
|
|
164
|
+
searchSymbols(query, topK = 5) {
|
|
165
|
+
if (!this.symbolIndexer) return [];
|
|
166
|
+
return this.symbolIndexer.search(query, topK);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Format symbol search results for the agent.
|
|
171
|
+
*/
|
|
172
|
+
formatSymbolResults(results) {
|
|
173
|
+
if (!this.symbolIndexer || !results.length) return '';
|
|
174
|
+
return this.symbolIndexer.formatResults(results);
|
|
175
|
+
}
|
|
176
|
+
|
|
139
177
|
/** Retrieve relevant context chunks for a query, with full text. */
|
|
140
178
|
retrieve(query, topK = 10) {
|
|
141
179
|
if (!this.index) {
|