npm - @axplusb/kepler - Versions diffs - 1.0.10 → 2.0.2 - Mend

@axplusb/kepler 1.0.10 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/package.json +5 -2
package/pulse/app/api/benchmark/route.ts +113 -0
package/pulse/app/api/benchmarks/route.ts +195 -0
package/pulse/app/benchmarks/page.tsx +224 -0
package/pulse/components/layout/bottom-nav.tsx +2 -1
package/pulse/components/layout/sidebar.tsx +2 -1
package/src/context/retriever.mjs +42 -4
package/src/context/symbol-indexer.mjs +375 -0
package/src/core/approval.mjs +154 -95
package/src/core/backend-url.mjs +2 -2
package/src/core/headless.mjs +5 -0
package/src/core/risk-tier.mjs +245 -0
package/src/core/stream-client.mjs +24 -1
package/src/core/tool-executor.mjs +58 -5
package/src/onboarding/preflight.mjs +292 -0
package/src/state/orbit.mjs +263 -0
package/src/state/verbosity.mjs +99 -0
package/src/terminal/ansi.mjs +44 -22
package/src/terminal/repl.mjs +487 -133
package/src/tools/project-overview.mjs +109 -16
package/src/ui/approval.mjs +167 -0
package/src/ui/banner.mjs +133 -122
package/src/ui/dock.mjs +88 -0
package/src/ui/icons.mjs +164 -0
package/src/ui/mission-report.mjs +264 -0
package/src/ui/palette.mjs +189 -0
package/src/ui/spinner.mjs +116 -0
package/src/ui/status-bar.mjs +275 -0
package/src/ui/sub-agent.mjs +152 -0
package/src/ui/term.mjs +159 -0
package/src/ui/tool-card.mjs +322 -0
package/src/ui/tool-details.mjs +277 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@axplusb/kepler",
-  "version": "1.0.10",
+  "version": "2.0.2",
   "description": "Kepler — AI coding agent with operating brief, preflight planning, and sub-agents. SWE-bench Lite evaluated.",
   "type": "module",
   "bin": {
@@ -40,5 +40,8 @@
     "type": "git",
     "url": "git+https://github.com/raviakasapu/codekepler-npm.git"
   },
-  "dependencies": {}
+  "dependencies": {
+    "tree-sitter-wasms": "^0.1.13",
+    "web-tree-sitter": "^0.26.9"
+  }
 }

package/pulse/app/api/benchmark/route.ts ADDED Viewed

@@ -0,0 +1,113 @@
+import { NextResponse } from 'next/server'
+import { readFileSync } from 'fs'
+import { join } from 'path'
+export const dynamic = 'force-dynamic'
+interface BenchmarkResult {
+  instance_id: string
+  repo: string
+  base_commit: string
+  test_patch: string
+  resolved: boolean
+  test_result: {
+    result: string[]
+    exit_code: number
+  }
+  metadata: {
+    agent_class: string
+    model_name: string
+    max_iterations: number
+    eval_history: Array<{
+      timestamp: string
+      action: string
+      observation: string
+    }>
+    submission: string
+    instance_id: string
+    predict_output: string
+    model_patch: string
+    test_result: {
+      result: string[]
+      exit_code: number
+    }
+  }
+}
+interface BenchmarkData {
+  results: BenchmarkResult[]
+}
+export async function GET(request: Request) {
+  try {
+    const { searchParams } = new URL(request.url)
+    const run = searchParams.get('run') || 'swebench-v4-flash-300'
+    const limit = parseInt(searchParams.get('limit') || '50', 10)
+    const offset = parseInt(searchParams.get('offset') || '0', 10)
+    // Load benchmark results from file
+    const resultsPath = join(
+      process.cwd(),
+      '..',
+      'benchmark',
+      'results',
+      'runs',
+      run,
+      'harness-results.json'
+    )
+    let data: BenchmarkData
+    try {
+      const fileContent = readFileSync(resultsPath, 'utf-8')
+      data = JSON.parse(fileContent)
+    } catch (error) {
+      return NextResponse.json(
+        { error: `Benchmark run "${run}" not found` },
+        { status: 404 }
+      )
+    }
+    // Calculate statistics
+    const results = data.results || []
+    const totalTests = results.length
+    const resolvedTests = results.filter((r) => r.resolved).length
+    const passRate = totalTests > 0 ? (resolvedTests / totalTests) * 100 : 0
+    // Group by repo
+    const byRepo = new Map<string, number>()
+    const byRepoResolved = new Map<string, number>()
+    for (const result of results) {
+      const repo = result.repo || 'unknown'
+      byRepo.set(repo, (byRepo.get(repo) || 0) + 1)
+      if (result.resolved) {
+        byRepoResolved.set(repo, (byRepoResolved.get(repo) || 0) + 1)
+      }
+    }
+    // Paginate results
+    const paginatedResults = results.slice(offset, offset + limit)
+    return NextResponse.json({
+      run,
+      stats: {
+        totalTests,
+        resolvedTests,
+        passRate: parseFloat(passRate.toFixed(2)),
+        byRepo: Object.fromEntries(byRepo),
+        byRepoResolved: Object.fromEntries(byRepoResolved),
+      },
+      pagination: {
+        limit,
+        offset,
+        total: totalTests,
+      },
+      results: paginatedResults,
+    })
+  } catch (error) {
+    console.error('Benchmark API error:', error)
+    return NextResponse.json(
+      { error: 'Failed to load benchmark data' },
+      { status: 500 }
+    )
+  }
+}

package/pulse/app/api/benchmarks/route.ts ADDED Viewed

@@ -0,0 +1,195 @@
+import { NextResponse } from 'next/server'
+import fs from 'fs'
+import path from 'path'
+export const dynamic = 'force-dynamic'
+interface BenchmarkResult {
+  instance_id: string
+  repo: string
+  model: string
+  timestamp: string
+  kepler: {
+    status: string
+    exit_code: number
+    duration_seconds: number
+    tokens_used: number
+    cost: number
+    tool_calls: number
+    sub_agents: string[]
+  }
+  patch_lines: number
+  model_patch: string
+  status: string
+}
+interface BenchmarkStats {
+  total_runs: number
+  passed: number
+  failed: number
+  error: number
+  success_rate: number
+  avg_duration: number
+  total_cost: number
+  total_tokens: number
+  avg_tokens_per_run: number
+  by_status: Record<string, number>
+  by_repo: Record<string, { count: number; passed: number; success_rate: number }>
+  by_model: Record<string, { count: number; passed: number; success_rate: number }>
+}
+async function loadBenchmarkResults(): Promise<BenchmarkResult[]> {
+  try {
+    const resultsPath = path.join(
+      process.cwd(),
+      'benchmark/results/runs/swebench-v4-flash-300/harness-results.json'
+    )
+    if (!fs.existsSync(resultsPath)) {
+      return []
+    }
+    const data = JSON.parse(fs.readFileSync(resultsPath, 'utf-8'))
+    return data.results || []
+  } catch (error) {
+    console.error('Error loading benchmark results:', error)
+    return []
+  }
+}
+function calculateStats(results: BenchmarkResult[]): BenchmarkStats {
+  if (results.length === 0) {
+    return {
+      total_runs: 0,
+      passed: 0,
+      failed: 0,
+      error: 0,
+      success_rate: 0,
+      avg_duration: 0,
+      total_cost: 0,
+      total_tokens: 0,
+      avg_tokens_per_run: 0,
+      by_status: {},
+      by_repo: {},
+      by_model: {},
+    }
+  }
+  const by_status: Record<string, number> = {}
+  const by_repo: Record<string, { count: number; passed: number }> = {}
+  const by_model: Record<string, { count: number; passed: number }> = {}
+  let total_cost = 0
+  let total_tokens = 0
+  let total_duration = 0
+  let passed = 0
+  results.forEach((result) => {
+    // Count by status
+    by_status[result.status] = (by_status[result.status] || 0) + 1
+    // Count by repo
+    if (!by_repo[result.repo]) {
+      by_repo[result.repo] = { count: 0, passed: 0 }
+    }
+    by_repo[result.repo].count++
+    // Count by model
+    if (!by_model[result.model]) {
+      by_model[result.model] = { count: 0, passed: 0 }
+    }
+    by_model[result.model].count++
+    // Aggregate metrics
+    if (result.kepler) {
+      total_cost += result.kepler.cost || 0
+      total_tokens += result.kepler.tokens_used || 0
+      total_duration += result.kepler.duration_seconds || 0
+      if (result.kepler.status === 'success') {
+        passed++
+        by_repo[result.repo].passed++
+        by_model[result.model].passed++
+      }
+    }
+  })
+  // Calculate success rates
+  const by_repo_with_rates = Object.entries(by_repo).reduce(
+    (acc, [repo, data]) => {
+      acc[repo] = {
+        ...data,
+        success_rate: data.count > 0 ? (data.passed / data.count) * 100 : 0,
+      }
+      return acc
+    },
+    {} as Record<string, { count: number; passed: number; success_rate: number }>
+  )
+  const by_model_with_rates = Object.entries(by_model).reduce(
+    (acc, [model, data]) => {
+      acc[model] = {
+        ...data,
+        success_rate: data.count > 0 ? (data.passed / data.count) * 100 : 0,
+      }
+      return acc
+    },
+    {} as Record<string, { count: number; passed: number; success_rate: number }>
+  )
+  return {
+    total_runs: results.length,
+    passed,
+    failed: by_status['failed'] || 0,
+    error: by_status['error'] || 0,
+    success_rate: (passed / results.length) * 100,
+    avg_duration: total_duration / results.length,
+    total_cost,
+    total_tokens,
+    avg_tokens_per_run: total_tokens / results.length,
+    by_status,
+    by_repo: by_repo_with_rates,
+    by_model: by_model_with_rates,
+  }
+}
+export async function GET(request: Request) {
+  const { searchParams } = new URL(request.url)
+  const format = searchParams.get('format') || 'summary'
+  const repo = searchParams.get('repo')
+  const model = searchParams.get('model')
+  const status = searchParams.get('status')
+  const results = await loadBenchmarkResults()
+  // Filter results
+  let filtered = results
+  if (repo) {
+    filtered = filtered.filter((r) => r.repo === repo)
+  }
+  if (model) {
+    filtered = filtered.filter((r) => r.model === model)
+  }
+  if (status) {
+    filtered = filtered.filter((r) => r.status === status)
+  }
+  if (format === 'detailed') {
+    return NextResponse.json({
+      results: filtered,
+      count: filtered.length,
+    })
+  }
+  // Default: summary format
+  const stats = calculateStats(filtered)
+  return NextResponse.json({
+    stats,
+    filters: {
+      repo: repo || null,
+      model: model || null,
+      status: status || null,
+    },
+  })
+}

package/pulse/app/benchmarks/page.tsx ADDED Viewed

@@ -0,0 +1,224 @@
+'use client'
+import { useEffect, useState } from 'react'
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'
+import { Badge } from '@/components/ui/badge'
+interface BenchmarkStats {
+  total_runs: number
+  passed: number
+  failed: number
+  error: number
+  success_rate: number
+  avg_duration: number
+  total_cost: number
+  total_tokens: number
+  avg_tokens_per_run: number
+  by_status: Record<string, number>
+  by_repo: Record<string, { count: number; passed: number; success_rate: number }>
+  by_model: Record<string, { count: number; passed: number; success_rate: number }>
+}
+interface BenchmarkResponse {
+  stats: BenchmarkStats
+  filters: {
+    repo: string | null
+    model: string | null
+    status: string | null
+  }
+}
+export default function BenchmarksPage() {
+  const [data, setData] = useState<BenchmarkResponse | null>(null)
+  const [loading, setLoading] = useState(true)
+  const [error, setError] = useState<string | null>(null)
+  useEffect(() => {
+    const fetchBenchmarks = async () => {
+      try {
+        const response = await fetch('/api/benchmarks')
+        if (!response.ok) {
+          throw new Error('Failed to fetch benchmarks')
+        }
+        const json = await response.json()
+        setData(json)
+      } catch (err) {
+        setError(err instanceof Error ? err.message : 'Unknown error')
+      } finally {
+        setLoading(false)
+      }
+    }
+    fetchBenchmarks()
+  }, [])
+  if (loading) {
+    return (
+      <div className="flex items-center justify-center min-h-screen">
+        <p className="text-muted-foreground">Loading benchmarks...</p>
+      </div>
+    )
+  }
+  if (error) {
+    return (
+      <div className="flex items-center justify-center min-h-screen">
+        <p className="text-destructive">Error: {error}</p>
+      </div>
+    )
+  }
+  if (!data) {
+    return (
+      <div className="flex items-center justify-center min-h-screen">
+        <p className="text-muted-foreground">No benchmark data available</p>
+      </div>
+    )
+  }
+  const stats = data.stats
+  return (
+    <div className="space-y-6 p-6">
+      <div>
+        <h1 className="text-3xl font-bold tracking-tight">Benchmarks</h1>
+        <p className="text-muted-foreground mt-2">SWE-Bench v4 Flash 300 Results</p>
+      </div>
+      {/* Key Metrics */}
+      <div className="grid gap-4 md:grid-cols-2 lg:grid-cols-4">
+        <Card>
+          <CardHeader className="pb-2">
+            <CardTitle className="text-sm font-medium">Total Runs</CardTitle>
+          </CardHeader>
+          <CardContent>
+            <div className="text-2xl font-bold">{stats.total_runs}</div>
+          </CardContent>
+        </Card>
+        <Card>
+          <CardHeader className="pb-2">
+            <CardTitle className="text-sm font-medium">Success Rate</CardTitle>
+          </CardHeader>
+          <CardContent>
+            <div className="text-2xl font-bold">{stats.success_rate.toFixed(1)}%</div>
+            <p className="text-xs text-muted-foreground mt-1">
+              {stats.passed} passed, {stats.failed} failed
+            </p>
+          </CardContent>
+        </Card>
+        <Card>
+          <CardHeader className="pb-2">
+            <CardTitle className="text-sm font-medium">Total Cost</CardTitle>
+          </CardHeader>
+          <CardContent>
+            <div className="text-2xl font-bold">${stats.total_cost.toFixed(2)}</div>
+            <p className="text-xs text-muted-foreground mt-1">
+              {stats.avg_tokens_per_run.toFixed(0)} tokens/run
+            </p>
+          </CardContent>
+        </Card>
+        <Card>
+          <CardHeader className="pb-2">
+            <CardTitle className="text-sm font-medium">Avg Duration</CardTitle>
+          </CardHeader>
+          <CardContent>
+            <div className="text-2xl font-bold">{stats.avg_duration.toFixed(1)}s</div>
+            <p className="text-xs text-muted-foreground mt-1">
+              {(stats.total_tokens / 1000).toFixed(1)}K tokens total
+            </p>
+          </CardContent>
+        </Card>
+      </div>
+      {/* Status Breakdown */}
+      <Card>
+        <CardHeader>
+          <CardTitle>Status Breakdown</CardTitle>
+          <CardDescription>Distribution of run statuses</CardDescription>
+        </CardHeader>
+        <CardContent>
+          <div className="space-y-3">
+            {Object.entries(stats.by_status).map(([status, count]) => (
+              <div key={status} className="flex items-center justify-between">
+                <div className="flex items-center gap-2">
+                  <Badge
+                    variant={
+                      status === 'success'
+                        ? 'default'
+                        : status === 'failed'
+                          ? 'destructive'
+                          : 'secondary'
+                    }
+                  >
+                    {status}
+                  </Badge>
+                  <span className="text-sm text-muted-foreground">{count} runs</span>
+                </div>
+                <span className="text-sm font-medium">
+                  {((count / stats.total_runs) * 100).toFixed(1)}%
+                </span>
+              </div>
+            ))}
+          </div>
+        </CardContent>
+      </Card>
+      {/* By Repository */}
+      <Card>
+        <CardHeader>
+          <CardTitle>Performance by Repository</CardTitle>
+          <CardDescription>Success rate and run count per repository</CardDescription>
+        </CardHeader>
+        <CardContent>
+          <div className="space-y-4">
+            {Object.entries(stats.by_repo)
+              .sort((a, b) => b[1].count - a[1].count)
+              .map(([repo, data]) => (
+                <div key={repo} className="flex items-center justify-between border-b pb-3 last:border-0">
+                  <div>
+                    <p className="font-medium text-sm">{repo}</p>
+                    <p className="text-xs text-muted-foreground">
+                      {data.count} runs, {data.passed} passed
+                    </p>
+                  </div>
+                  <div className="text-right">
+                    <p className="font-bold text-sm">{data.success_rate.toFixed(1)}%</p>
+                  </div>
+                </div>
+              ))}
+          </div>
+        </CardContent>
+      </Card>
+      {/* By Model */}
+      <Card>
+        <CardHeader>
+          <CardTitle>Performance by Model</CardTitle>
+          <CardDescription>Success rate and run count per model</CardDescription>
+        </CardHeader>
+        <CardContent>
+          <div className="space-y-4">
+            {Object.entries(stats.by_model)
+              .sort((a, b) => b[1].count - a[1].count)
+              .map(([model, data]) => (
+                <div key={model} className="flex items-center justify-between border-b pb-3 last:border-0">
+                  <div>
+                    <p className="font-medium text-sm">{model}</p>
+                    <p className="text-xs text-muted-foreground">
+                      {data.count} runs, {data.passed} passed
+                    </p>
+                  </div>
+                  <div className="text-right">
+                    <p className="font-bold text-sm">{data.success_rate.toFixed(1)}%</p>
+                  </div>
+                </div>
+              ))}
+          </div>
+        </CardContent>
+      </Card>
+    </div>
+  )
+}

package/pulse/components/layout/bottom-nav.tsx CHANGED Viewed

@@ -4,7 +4,7 @@ import Link from 'next/link'
 import { usePathname } from 'next/navigation'
 import {
   LayoutDashboard, MessageSquare, DollarSign,
-  FolderOpen, Activity, Moon, Sun,
+  FolderOpen, Activity, Moon, Sun, Zap,
 } from 'lucide-react'
 import { useTheme } from '@/components/theme-provider'
 import { cn } from '@/lib/utils'
@@ -15,6 +15,7 @@ const NAV = [
   { href: '/costs',    label: 'Costs',     icon: DollarSign      },
   { href: '/projects', label: 'Projects',  icon: FolderOpen      },
   { href: '/activity', label: 'Activity',  icon: Activity        },
+  { href: '/benchmarks', label: 'Benchmarks', icon: Zap          },
 ]
 export function BottomNav() {

package/pulse/components/layout/sidebar.tsx CHANGED Viewed

@@ -5,7 +5,7 @@ import { usePathname } from 'next/navigation'
 import {
   LayoutDashboard, FolderOpen, MessageSquare, DollarSign,
   Wrench, Activity, History, CheckSquare, FileText,
-  Brain, Settings, Download, HelpCircle, Moon, Sun, PanelLeftClose, PanelLeft,
+  Brain, Settings, Download, HelpCircle, Moon, Sun, PanelLeftClose, PanelLeft, Zap,
 } from 'lucide-react'
 import { useTheme } from '@/components/theme-provider'
 import { useSidebar } from '@/components/layout/sidebar-context'
@@ -24,6 +24,7 @@ const NAV = [
   { href: '/todos',    label: 'Todos',     icon: CheckSquare     },
   { href: '/plans',    label: 'Plans',     icon: FileText        },
   { href: '/memory',   label: 'Memory',    icon: Brain           },
+  { href: '/benchmarks', label: 'Benchmarks', icon: Zap          },
   { href: '/settings', label: 'Settings',  icon: Settings        },
   { href: '/help',     label: 'Help',      icon: HelpCircle      },
   { href: '/export',   label: 'Export',    icon: Download        },

package/src/context/retriever.mjs CHANGED Viewed

@@ -4,12 +4,14 @@
  */
 import { BM25Index } from './bm25.mjs';
+import { SymbolIndexer } from './symbol-indexer.mjs';
 import * as fs from 'node:fs';
 import * as path from 'node:path';
 import { indexDir as getIndexDir } from '../core/paths.mjs';
 const IGNORED_DIRS = new Set(['.git', 'node_modules', '.kepler', '__pycache__', '.venv', 'venv', 'dist', 'build', '.next']);
 const CODE_EXTS = new Set(['.js', '.mjs', '.ts', '.tsx', '.py', '.go', '.rs', '.java', '.rb', '.php', '.c', '.cpp', '.h', '.css', '.html', '.json', '.yaml', '.yml', '.toml', '.md', '.sh']);
+const SYMBOL_EXTS = new Set(['.py', '.js', '.mjs', '.ts', '.tsx', '.jsx', '.go', '.rs']);
 const MAX_FILE_SIZE = 100_000; // 100KB
 const CHUNK_LINES = 50;
 const CHUNK_OVERLAP = 10;
@@ -19,20 +21,33 @@ export class ContextRetriever {
         this.projectDir = projectDir;
         this.indexDir = getIndexDir(projectDir);
         this.index = null;
+        this.symbolIndexer = null;
         this.chunkTexts = new Map(); // id → original text content
     }
-    /** Build or rebuild the search index. */
+    /** Build or rebuild the search index (BM25 chunks + symbol index). */
     async buildIndex() {
         const files = this._scanFiles(this.projectDir);
         const documents = [];
+        // Symbol indexer for AST-based search
+        this.symbolIndexer = new SymbolIndexer();
+        await this.symbolIndexer.init();
         for (const filePath of files) {
             try {
                 const content = fs.readFileSync(filePath, 'utf-8');
                 const relPath = path.relative(this.projectDir, filePath);
+                // BM25 chunks (existing behavior)
                 const chunks = this._chunkFile(content, relPath);
                 documents.push(...chunks);
+                // Symbol extraction for code files
+                const ext = path.extname(filePath).toLowerCase();
+                if (SYMBOL_EXTS.has(ext)) {
+                    await this.symbolIndexer.indexFile(relPath, content);
+                }
             } catch { /* skip unreadable files */ }
         }
@@ -45,12 +60,13 @@ export class ContextRetriever {
             this.chunkTexts.set(doc.id, doc.text);
         }
-        // Persist index + chunk texts
+        // Persist
         if (!fs.existsSync(this.indexDir)) fs.mkdirSync(this.indexDir, { recursive: true });
         fs.writeFileSync(path.join(this.indexDir, 'bm25.json'), JSON.stringify(this.index.toJSON()));
         fs.writeFileSync(path.join(this.indexDir, 'chunks.json'), JSON.stringify(Object.fromEntries(this.chunkTexts)));
+        fs.writeFileSync(path.join(this.indexDir, 'symbols.json'), JSON.stringify(this.symbolIndexer.toJSON()));
-        return { fileCount: files.length, chunkCount: documents.length };
+        return { fileCount: files.length, chunkCount: documents.length, symbolCount: this.symbolIndexer.symbolCount };
     }
     /**
@@ -120,22 +136,44 @@ export class ContextRetriever {
     loadIndex() {
         const indexPath = path.join(this.indexDir, 'bm25.json');
         const chunksPath = path.join(this.indexDir, 'chunks.json');
+        const symbolsPath = path.join(this.indexDir, 'symbols.json');
         if (!fs.existsSync(indexPath)) return false;
         try {
             const data = JSON.parse(fs.readFileSync(indexPath, 'utf-8'));
             this.index = BM25Index.fromJSON(data);
-            // Load chunk texts if available
             if (fs.existsSync(chunksPath)) {
                 const chunks = JSON.parse(fs.readFileSync(chunksPath, 'utf-8'));
                 this.chunkTexts = new Map(Object.entries(chunks));
             }
+            if (fs.existsSync(symbolsPath)) {
+                const symData = JSON.parse(fs.readFileSync(symbolsPath, 'utf-8'));
+                this.symbolIndexer = SymbolIndexer.fromJSON(symData);
+            }
             return true;
         } catch {
             return false;
         }
     }
+    /**
+     * Search symbols (functions, classes, methods) by query.
+     * Returns structured results with file:line, signature, parent class.
+     */
+    searchSymbols(query, topK = 5) {
+        if (!this.symbolIndexer) return [];
+        return this.symbolIndexer.search(query, topK);
+    }
+    /**
+     * Format symbol search results for the agent.
+     */
+    formatSymbolResults(results) {
+        if (!this.symbolIndexer || !results.length) return '';
+        return this.symbolIndexer.formatResults(results);
+    }
     /** Retrieve relevant context chunks for a query, with full text. */
     retrieve(query, topK = 10) {
         if (!this.index) {