gsd-trae 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +40 -0
- package/README.md +7 -76
- package/assets/screenshot.png +0 -0
- package/package.json +12 -3
- package/.claude/settings.local.json +0 -8
- package/.gitmodules +0 -6
- package/.trae/project_rules.md +0 -56
- package/.trae/rules/project_rules.md +0 -56
- package/.vscode/code-counter/code-counter.db +0 -0
- package/.vscode/settings.json +0 -5
- package/refs/gsd/.github/CODEOWNERS +0 -2
- package/refs/gsd/.github/FUNDING.yml +0 -1
- package/refs/gsd/.github/ISSUE_TEMPLATE/bug_report.yml +0 -59
- package/refs/gsd/.github/ISSUE_TEMPLATE/feature_request.yml +0 -37
- package/refs/gsd/.github/pull_request_template.md +0 -24
- package/refs/gsd/.github/workflows/auto-label-issues.yml +0 -21
- package/refs/gsd/CHANGELOG.md +0 -1520
- package/refs/gsd/LICENSE +0 -21
- package/refs/gsd/README.md +0 -704
- package/refs/gsd/SECURITY.md +0 -33
- package/refs/gsd/agents/gsd-codebase-mapper.md +0 -764
- package/refs/gsd/agents/gsd-debugger.md +0 -1246
- package/refs/gsd/agents/gsd-executor.md +0 -469
- package/refs/gsd/agents/gsd-integration-checker.md +0 -443
- package/refs/gsd/agents/gsd-phase-researcher.md +0 -546
- package/refs/gsd/agents/gsd-plan-checker.md +0 -690
- package/refs/gsd/agents/gsd-planner.md +0 -1275
- package/refs/gsd/agents/gsd-project-researcher.md +0 -621
- package/refs/gsd/agents/gsd-research-synthesizer.md +0 -239
- package/refs/gsd/agents/gsd-roadmapper.md +0 -642
- package/refs/gsd/agents/gsd-verifier.md +0 -573
- package/refs/gsd/assets/gsd-logo-2000-transparent.png +0 -0
- package/refs/gsd/assets/gsd-logo-2000-transparent.svg +0 -17
- package/refs/gsd/assets/gsd-logo-2000.png +0 -0
- package/refs/gsd/assets/gsd-logo-2000.svg +0 -21
- package/refs/gsd/assets/terminal.svg +0 -68
- package/refs/gsd/bin/install.js +0 -2090
- package/refs/gsd/commands/gsd/add-phase.md +0 -43
- package/refs/gsd/commands/gsd/add-tests.md +0 -41
- package/refs/gsd/commands/gsd/add-todo.md +0 -47
- package/refs/gsd/commands/gsd/audit-milestone.md +0 -36
- package/refs/gsd/commands/gsd/check-todos.md +0 -45
- package/refs/gsd/commands/gsd/cleanup.md +0 -18
- package/refs/gsd/commands/gsd/complete-milestone.md +0 -136
- package/refs/gsd/commands/gsd/debug.md +0 -167
- package/refs/gsd/commands/gsd/discuss-phase.md +0 -83
- package/refs/gsd/commands/gsd/execute-phase.md +0 -41
- package/refs/gsd/commands/gsd/health.md +0 -22
- package/refs/gsd/commands/gsd/help.md +0 -22
- package/refs/gsd/commands/gsd/insert-phase.md +0 -32
- package/refs/gsd/commands/gsd/join-discord.md +0 -18
- package/refs/gsd/commands/gsd/list-phase-assumptions.md +0 -46
- package/refs/gsd/commands/gsd/map-codebase.md +0 -71
- package/refs/gsd/commands/gsd/new-milestone.md +0 -44
- package/refs/gsd/commands/gsd/new-project.md +0 -42
- package/refs/gsd/commands/gsd/new-project.md.bak +0 -1041
- package/refs/gsd/commands/gsd/pause-work.md +0 -38
- package/refs/gsd/commands/gsd/plan-milestone-gaps.md +0 -34
- package/refs/gsd/commands/gsd/plan-phase.md +0 -45
- package/refs/gsd/commands/gsd/progress.md +0 -24
- package/refs/gsd/commands/gsd/quick.md +0 -41
- package/refs/gsd/commands/gsd/reapply-patches.md +0 -110
- package/refs/gsd/commands/gsd/remove-phase.md +0 -31
- package/refs/gsd/commands/gsd/research-phase.md +0 -189
- package/refs/gsd/commands/gsd/resume-work.md +0 -40
- package/refs/gsd/commands/gsd/set-profile.md +0 -34
- package/refs/gsd/commands/gsd/settings.md +0 -36
- package/refs/gsd/commands/gsd/update.md +0 -37
- package/refs/gsd/commands/gsd/verify-work.md +0 -38
- package/refs/gsd/docs/USER-GUIDE.md +0 -471
- package/refs/gsd/docs/context-monitor.md +0 -96
- package/refs/gsd/get-shit-done/bin/gsd-tools.cjs +0 -585
- package/refs/gsd/get-shit-done/bin/lib/commands.cjs +0 -553
- package/refs/gsd/get-shit-done/bin/lib/config.cjs +0 -162
- package/refs/gsd/get-shit-done/bin/lib/core.cjs +0 -411
- package/refs/gsd/get-shit-done/bin/lib/frontmatter.cjs +0 -299
- package/refs/gsd/get-shit-done/bin/lib/init.cjs +0 -710
- package/refs/gsd/get-shit-done/bin/lib/milestone.cjs +0 -215
- package/refs/gsd/get-shit-done/bin/lib/phase.cjs +0 -870
- package/refs/gsd/get-shit-done/bin/lib/roadmap.cjs +0 -298
- package/refs/gsd/get-shit-done/bin/lib/state.cjs +0 -521
- package/refs/gsd/get-shit-done/bin/lib/template.cjs +0 -222
- package/refs/gsd/get-shit-done/bin/lib/verify.cjs +0 -772
- package/refs/gsd/get-shit-done/references/checkpoints.md +0 -776
- package/refs/gsd/get-shit-done/references/continuation-format.md +0 -249
- package/refs/gsd/get-shit-done/references/decimal-phase-calculation.md +0 -65
- package/refs/gsd/get-shit-done/references/git-integration.md +0 -248
- package/refs/gsd/get-shit-done/references/git-planning-commit.md +0 -38
- package/refs/gsd/get-shit-done/references/model-profile-resolution.md +0 -34
- package/refs/gsd/get-shit-done/references/model-profiles.md +0 -92
- package/refs/gsd/get-shit-done/references/phase-argument-parsing.md +0 -61
- package/refs/gsd/get-shit-done/references/planning-config.md +0 -196
- package/refs/gsd/get-shit-done/references/questioning.md +0 -145
- package/refs/gsd/get-shit-done/references/tdd.md +0 -263
- package/refs/gsd/get-shit-done/references/ui-brand.md +0 -160
- package/refs/gsd/get-shit-done/references/verification-patterns.md +0 -612
- package/refs/gsd/get-shit-done/templates/DEBUG.md +0 -164
- package/refs/gsd/get-shit-done/templates/UAT.md +0 -247
- package/refs/gsd/get-shit-done/templates/VALIDATION.md +0 -76
- package/refs/gsd/get-shit-done/templates/codebase/architecture.md +0 -255
- package/refs/gsd/get-shit-done/templates/codebase/concerns.md +0 -310
- package/refs/gsd/get-shit-done/templates/codebase/conventions.md +0 -307
- package/refs/gsd/get-shit-done/templates/codebase/integrations.md +0 -280
- package/refs/gsd/get-shit-done/templates/codebase/stack.md +0 -186
- package/refs/gsd/get-shit-done/templates/codebase/structure.md +0 -285
- package/refs/gsd/get-shit-done/templates/codebase/testing.md +0 -480
- package/refs/gsd/get-shit-done/templates/config.json +0 -37
- package/refs/gsd/get-shit-done/templates/context.md +0 -283
- package/refs/gsd/get-shit-done/templates/continue-here.md +0 -78
- package/refs/gsd/get-shit-done/templates/debug-subagent-prompt.md +0 -91
- package/refs/gsd/get-shit-done/templates/discovery.md +0 -146
- package/refs/gsd/get-shit-done/templates/milestone-archive.md +0 -123
- package/refs/gsd/get-shit-done/templates/milestone.md +0 -115
- package/refs/gsd/get-shit-done/templates/phase-prompt.md +0 -569
- package/refs/gsd/get-shit-done/templates/planner-subagent-prompt.md +0 -117
- package/refs/gsd/get-shit-done/templates/project.md +0 -184
- package/refs/gsd/get-shit-done/templates/requirements.md +0 -231
- package/refs/gsd/get-shit-done/templates/research-project/ARCHITECTURE.md +0 -204
- package/refs/gsd/get-shit-done/templates/research-project/FEATURES.md +0 -147
- package/refs/gsd/get-shit-done/templates/research-project/PITFALLS.md +0 -200
- package/refs/gsd/get-shit-done/templates/research-project/STACK.md +0 -120
- package/refs/gsd/get-shit-done/templates/research-project/SUMMARY.md +0 -170
- package/refs/gsd/get-shit-done/templates/research.md +0 -552
- package/refs/gsd/get-shit-done/templates/retrospective.md +0 -54
- package/refs/gsd/get-shit-done/templates/roadmap.md +0 -202
- package/refs/gsd/get-shit-done/templates/state.md +0 -176
- package/refs/gsd/get-shit-done/templates/summary-complex.md +0 -59
- package/refs/gsd/get-shit-done/templates/summary-minimal.md +0 -41
- package/refs/gsd/get-shit-done/templates/summary-standard.md +0 -48
- package/refs/gsd/get-shit-done/templates/summary.md +0 -248
- package/refs/gsd/get-shit-done/templates/user-setup.md +0 -311
- package/refs/gsd/get-shit-done/templates/verification-report.md +0 -322
- package/refs/gsd/get-shit-done/workflows/add-phase.md +0 -111
- package/refs/gsd/get-shit-done/workflows/add-tests.md +0 -350
- package/refs/gsd/get-shit-done/workflows/add-todo.md +0 -157
- package/refs/gsd/get-shit-done/workflows/audit-milestone.md +0 -297
- package/refs/gsd/get-shit-done/workflows/check-todos.md +0 -176
- package/refs/gsd/get-shit-done/workflows/cleanup.md +0 -152
- package/refs/gsd/get-shit-done/workflows/complete-milestone.md +0 -763
- package/refs/gsd/get-shit-done/workflows/diagnose-issues.md +0 -219
- package/refs/gsd/get-shit-done/workflows/discovery-phase.md +0 -289
- package/refs/gsd/get-shit-done/workflows/discuss-phase.md +0 -542
- package/refs/gsd/get-shit-done/workflows/execute-phase.md +0 -449
- package/refs/gsd/get-shit-done/workflows/execute-plan.md +0 -448
- package/refs/gsd/get-shit-done/workflows/health.md +0 -156
- package/refs/gsd/get-shit-done/workflows/help.md +0 -489
- package/refs/gsd/get-shit-done/workflows/insert-phase.md +0 -129
- package/refs/gsd/get-shit-done/workflows/list-phase-assumptions.md +0 -178
- package/refs/gsd/get-shit-done/workflows/map-codebase.md +0 -315
- package/refs/gsd/get-shit-done/workflows/new-milestone.md +0 -382
- package/refs/gsd/get-shit-done/workflows/new-project.md +0 -1116
- package/refs/gsd/get-shit-done/workflows/pause-work.md +0 -122
- package/refs/gsd/get-shit-done/workflows/plan-milestone-gaps.md +0 -274
- package/refs/gsd/get-shit-done/workflows/plan-phase.md +0 -569
- package/refs/gsd/get-shit-done/workflows/progress.md +0 -381
- package/refs/gsd/get-shit-done/workflows/quick.md +0 -453
- package/refs/gsd/get-shit-done/workflows/remove-phase.md +0 -154
- package/refs/gsd/get-shit-done/workflows/research-phase.md +0 -73
- package/refs/gsd/get-shit-done/workflows/resume-project.md +0 -306
- package/refs/gsd/get-shit-done/workflows/set-profile.md +0 -80
- package/refs/gsd/get-shit-done/workflows/settings.md +0 -213
- package/refs/gsd/get-shit-done/workflows/transition.md +0 -544
- package/refs/gsd/get-shit-done/workflows/update.md +0 -219
- package/refs/gsd/get-shit-done/workflows/verify-phase.md +0 -242
- package/refs/gsd/get-shit-done/workflows/verify-work.md +0 -569
- package/refs/gsd/hooks/gsd-check-update.js +0 -62
- package/refs/gsd/hooks/gsd-context-monitor.js +0 -122
- package/refs/gsd/hooks/gsd-statusline.js +0 -108
- package/refs/gsd/package.json +0 -50
- package/refs/gsd/scripts/build-hooks.js +0 -43
- package/refs/gsd/tests/commands.test.cjs +0 -661
- package/refs/gsd/tests/helpers.cjs +0 -40
- package/refs/gsd/tests/init.test.cjs +0 -205
- package/refs/gsd/tests/milestone.test.cjs +0 -98
- package/refs/gsd/tests/phase.test.cjs +0 -1241
- package/refs/gsd/tests/roadmap.test.cjs +0 -265
- package/refs/gsd/tests/state.test.cjs +0 -302
- package/refs/gsd/tests/verify.test.cjs +0 -80
- package/refs/vbenchmark/.agent/agents/codebase-explorer.md +0 -224
- package/refs/vbenchmark/.agent/agents/debugger.md +0 -180
- package/refs/vbenchmark/.agent/agents/documenter.md +0 -166
- package/refs/vbenchmark/.agent/agents/implementer.md +0 -70
- package/refs/vbenchmark/.agent/agents/orchestrator.md +0 -212
- package/refs/vbenchmark/.agent/agents/researcher.md +0 -80
- package/refs/vbenchmark/.agent/agents/reviewer.md +0 -184
- package/refs/vbenchmark/.agent/agents/tester.md +0 -170
- package/refs/vbenchmark/.agent/commands/commit.md +0 -29
- package/refs/vbenchmark/.agent/commands/debug.md +0 -59
- package/refs/vbenchmark/.agent/commands/document.md +0 -52
- package/refs/vbenchmark/.agent/commands/gather-context.md +0 -58
- package/refs/vbenchmark/.agent/commands/init.md +0 -56
- package/refs/vbenchmark/.agent/commands/preset-help.md +0 -50
- package/refs/vbenchmark/.agent/commands/refactor.md +0 -71
- package/refs/vbenchmark/.agent/commands/research.md +0 -37
- package/refs/vbenchmark/.agent/commands/review.md +0 -38
- package/refs/vbenchmark/.agent/commands/test.md +0 -61
- package/refs/vbenchmark/.agent/rules/01-code-quality.md +0 -33
- package/refs/vbenchmark/.agent/rules/02-typescript-go.md +0 -46
- package/refs/vbenchmark/.agent/rules/03-security-git.md +0 -34
- package/refs/vbenchmark/.agent/rules/04-architecture.md +0 -40
- package/refs/vbenchmark/.agent/sync.js +0 -536
- package/refs/vbenchmark/.agent/workflows/commit.md +0 -29
- package/refs/vbenchmark/.agent/workflows/debug.md +0 -59
- package/refs/vbenchmark/.agent/workflows/document.md +0 -52
- package/refs/vbenchmark/.agent/workflows/gather-context.md +0 -58
- package/refs/vbenchmark/.agent/workflows/init.md +0 -56
- package/refs/vbenchmark/.agent/workflows/preset-help.md +0 -50
- package/refs/vbenchmark/.agent/workflows/refactor.md +0 -71
- package/refs/vbenchmark/.agent/workflows/research.md +0 -37
- package/refs/vbenchmark/.agent/workflows/review.md +0 -38
- package/refs/vbenchmark/.agent/workflows/test.md +0 -61
- package/refs/vbenchmark/.claude/commands/agentic-dev/apply.md +0 -222
- package/refs/vbenchmark/.claude/commands/agentic-dev/done.md +0 -166
- package/refs/vbenchmark/.claude/commands/agentic-dev/proposal.md +0 -220
- package/refs/vbenchmark/.claude/commands/openspec/apply.md +0 -23
- package/refs/vbenchmark/.claude/commands/openspec/archive.md +0 -27
- package/refs/vbenchmark/.claude/commands/openspec/proposal.md +0 -28
- package/refs/vbenchmark/.clinerules/01-rules.md +0 -73
- package/refs/vbenchmark/.clinerules/02-agents.md +0 -34
- package/refs/vbenchmark/.cursor/commands/commit.md +0 -29
- package/refs/vbenchmark/.cursor/commands/debug.md +0 -59
- package/refs/vbenchmark/.cursor/commands/document.md +0 -52
- package/refs/vbenchmark/.cursor/commands/gather-context.md +0 -58
- package/refs/vbenchmark/.cursor/commands/init.md +0 -56
- package/refs/vbenchmark/.cursor/commands/preset-help.md +0 -50
- package/refs/vbenchmark/.cursor/commands/refactor.md +0 -71
- package/refs/vbenchmark/.cursor/commands/research.md +0 -37
- package/refs/vbenchmark/.cursor/commands/review.md +0 -38
- package/refs/vbenchmark/.cursor/commands/test.md +0 -61
- package/refs/vbenchmark/.cursor/rules/agents.mdc +0 -1357
- package/refs/vbenchmark/.factory/droids/codebase-explorer.md +0 -224
- package/refs/vbenchmark/.factory/droids/debugger.md +0 -180
- package/refs/vbenchmark/.factory/droids/documenter.md +0 -166
- package/refs/vbenchmark/.factory/droids/implementer.md +0 -70
- package/refs/vbenchmark/.factory/droids/orchestrator.md +0 -212
- package/refs/vbenchmark/.factory/droids/researcher.md +0 -80
- package/refs/vbenchmark/.factory/droids/reviewer.md +0 -184
- package/refs/vbenchmark/.factory/droids/tester.md +0 -170
- package/refs/vbenchmark/.gemini/workflows/commit.md +0 -29
- package/refs/vbenchmark/.gemini/workflows/debug.md +0 -59
- package/refs/vbenchmark/.gemini/workflows/document.md +0 -52
- package/refs/vbenchmark/.gemini/workflows/gather-context.md +0 -58
- package/refs/vbenchmark/.gemini/workflows/init.md +0 -56
- package/refs/vbenchmark/.gemini/workflows/preset-help.md +0 -50
- package/refs/vbenchmark/.gemini/workflows/refactor.md +0 -71
- package/refs/vbenchmark/.gemini/workflows/research.md +0 -37
- package/refs/vbenchmark/.gemini/workflows/review.md +0 -38
- package/refs/vbenchmark/.gemini/workflows/test.md +0 -61
- package/refs/vbenchmark/.github/CODEOWNERS +0 -20
- package/refs/vbenchmark/.github/FUNDING.yml +0 -4
- package/refs/vbenchmark/.github/ISSUE_TEMPLATE/bug-report.yml +0 -76
- package/refs/vbenchmark/.github/ISSUE_TEMPLATE/new-task.yml +0 -106
- package/refs/vbenchmark/.github/PULL_REQUEST_TEMPLATE.md +0 -38
- package/refs/vbenchmark/.github/copilot-instructions.md +0 -73
- package/refs/vbenchmark/.github/workflows/ci.yaml +0 -33
- package/refs/vbenchmark/.github/workflows/vercel-auto-pr.yml +0 -478
- package/refs/vbenchmark/.github/workflows/vercel-deploy.yaml +0 -487
- package/refs/vbenchmark/.github/workflows/vercel-pr-command.yaml +0 -337
- package/refs/vbenchmark/.github/workflows/vercel-project-init.yaml +0 -208
- package/refs/vbenchmark/.opencode/agent/codebase-explorer.md +0 -224
- package/refs/vbenchmark/.opencode/agent/debugger.md +0 -180
- package/refs/vbenchmark/.opencode/agent/documenter.md +0 -166
- package/refs/vbenchmark/.opencode/agent/implementer.md +0 -70
- package/refs/vbenchmark/.opencode/agent/orchestrator.md +0 -212
- package/refs/vbenchmark/.opencode/agent/researcher.md +0 -80
- package/refs/vbenchmark/.opencode/agent/reviewer.md +0 -184
- package/refs/vbenchmark/.opencode/agent/tester.md +0 -170
- package/refs/vbenchmark/.opencode/command/commit.md +0 -29
- package/refs/vbenchmark/.opencode/command/debug.md +0 -59
- package/refs/vbenchmark/.opencode/command/document.md +0 -52
- package/refs/vbenchmark/.opencode/command/gather-context.md +0 -58
- package/refs/vbenchmark/.opencode/command/init.md +0 -56
- package/refs/vbenchmark/.opencode/command/preset-help.md +0 -50
- package/refs/vbenchmark/.opencode/command/refactor.md +0 -71
- package/refs/vbenchmark/.opencode/command/research.md +0 -37
- package/refs/vbenchmark/.opencode/command/review.md +0 -38
- package/refs/vbenchmark/.opencode/command/test.md +0 -61
- package/refs/vbenchmark/.trae/project_rules.md +0 -73
- package/refs/vbenchmark/.windsurf/rules/rules.md +0 -85
- package/refs/vbenchmark/AGENTS.md +0 -73
- package/refs/vbenchmark/CONTRIBUTING.md +0 -332
- package/refs/vbenchmark/Caddyfile +0 -3
- package/refs/vbenchmark/LICENSE +0 -47
- package/refs/vbenchmark/README.md +0 -354
- package/refs/vbenchmark/docker-compose.prod.yaml +0 -35
- package/refs/vbenchmark/docker-compose.yaml +0 -53
- package/refs/vbenchmark/docs/TASK_EXPANSION_PLAN.md +0 -211
- package/refs/vbenchmark/docs/THESIS.md +0 -441
- package/refs/vbenchmark/docs/categories/code-evolution.md +0 -138
- package/refs/vbenchmark/openspec/changes/init-vibecodingbench/design.md +0 -111
- package/refs/vbenchmark/openspec/changes/init-vibecodingbench/proposal.md +0 -15
- package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/evaluation/spec.md +0 -105
- package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/leaderboard/spec.md +0 -68
- package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/task-definition/spec.md +0 -45
- package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/task-runner/spec.md +0 -49
- package/refs/vbenchmark/openspec/changes/init-vibecodingbench/tasks.md +0 -413
- package/refs/vbenchmark/package.json +0 -51
- package/refs/vbenchmark/packages/cli/eslint.config.js +0 -16
- package/refs/vbenchmark/packages/cli/package.json +0 -35
- package/refs/vbenchmark/packages/cli/src/agents/index.ts +0 -655
- package/refs/vbenchmark/packages/cli/src/commands/eval.ts +0 -197
- package/refs/vbenchmark/packages/cli/src/commands/list.ts +0 -63
- package/refs/vbenchmark/packages/cli/src/commands/run.ts +0 -147
- package/refs/vbenchmark/packages/cli/src/evaluator.ts +0 -125
- package/refs/vbenchmark/packages/cli/src/index.ts +0 -21
- package/refs/vbenchmark/packages/cli/src/lib/task-variation.ts +0 -153
- package/refs/vbenchmark/packages/cli/src/loader.ts +0 -258
- package/refs/vbenchmark/packages/cli/src/reporter.ts +0 -222
- package/refs/vbenchmark/packages/cli/src/runtime/docker.ts +0 -385
- package/refs/vbenchmark/packages/cli/tsconfig.json +0 -8
- package/refs/vbenchmark/packages/dashboard/Dockerfile +0 -42
- package/refs/vbenchmark/packages/dashboard/index.html +0 -21
- package/refs/vbenchmark/packages/dashboard/package.json +0 -29
- package/refs/vbenchmark/packages/dashboard/postcss.config.js +0 -6
- package/refs/vbenchmark/packages/dashboard/public/favicon.svg +0 -24
- package/refs/vbenchmark/packages/dashboard/public/logo.png +0 -0
- package/refs/vbenchmark/packages/dashboard/public/logo.svg +0 -39
- package/refs/vbenchmark/packages/dashboard/src/App.tsx +0 -1468
- package/refs/vbenchmark/packages/dashboard/src/data/category-performance.json +0 -1
- package/refs/vbenchmark/packages/dashboard/src/data/leaderboard.json +0 -1
- package/refs/vbenchmark/packages/dashboard/src/data/task-results.json +0 -1
- package/refs/vbenchmark/packages/dashboard/src/data/tasks.json +0 -1
- package/refs/vbenchmark/packages/dashboard/src/index.css +0 -3
- package/refs/vbenchmark/packages/dashboard/src/main.tsx +0 -13
- package/refs/vbenchmark/packages/dashboard/src/vite-env.d.ts +0 -9
- package/refs/vbenchmark/packages/dashboard/tailwind.config.js +0 -11
- package/refs/vbenchmark/packages/dashboard/tsconfig.json +0 -21
- package/refs/vbenchmark/packages/dashboard/tsconfig.node.json +0 -11
- package/refs/vbenchmark/packages/dashboard/vercel.json +0 -6
- package/refs/vbenchmark/packages/dashboard/vite.config.ts +0 -28
- package/refs/vbenchmark/packages/evaluator/eslint.config.js +0 -16
- package/refs/vbenchmark/packages/evaluator/package.json +0 -24
- package/refs/vbenchmark/packages/evaluator/src/index.ts +0 -15
- package/refs/vbenchmark/packages/evaluator/src/runners/functional.ts +0 -88
- package/refs/vbenchmark/packages/evaluator/src/runners/quality.ts +0 -140
- package/refs/vbenchmark/packages/evaluator/src/runners/security.ts +0 -94
- package/refs/vbenchmark/packages/evaluator/src/runners/visual.ts +0 -108
- package/refs/vbenchmark/packages/evaluator/src/types.d.ts +0 -19
- package/refs/vbenchmark/packages/evaluator/tsconfig.json +0 -8
- package/refs/vbenchmark/packages/leaderboard/Dockerfile +0 -38
- package/refs/vbenchmark/packages/leaderboard/drizzle.config.ts +0 -10
- package/refs/vbenchmark/packages/leaderboard/eslint.config.js +0 -16
- package/refs/vbenchmark/packages/leaderboard/fly.toml +0 -29
- package/refs/vbenchmark/packages/leaderboard/package.json +0 -36
- package/refs/vbenchmark/packages/leaderboard/src/app.ts +0 -29
- package/refs/vbenchmark/packages/leaderboard/src/components/BrowserPreview.tsx +0 -190
- package/refs/vbenchmark/packages/leaderboard/src/components/ComparisonView.tsx +0 -205
- package/refs/vbenchmark/packages/leaderboard/src/components/LeaderboardTable.tsx +0 -150
- package/refs/vbenchmark/packages/leaderboard/src/components/LiveRunCard.tsx +0 -133
- package/refs/vbenchmark/packages/leaderboard/src/components/SubmissionForm.tsx +0 -406
- package/refs/vbenchmark/packages/leaderboard/src/components/SubmitForm.tsx +0 -293
- package/refs/vbenchmark/packages/leaderboard/src/components/TerminalStream.tsx +0 -111
- package/refs/vbenchmark/packages/leaderboard/src/config/pricing.ts +0 -206
- package/refs/vbenchmark/packages/leaderboard/src/db/index.ts +0 -31
- package/refs/vbenchmark/packages/leaderboard/src/db/schema.ts +0 -125
- package/refs/vbenchmark/packages/leaderboard/src/index.ts +0 -13
- package/refs/vbenchmark/packages/leaderboard/src/lib/websocket.ts +0 -124
- package/refs/vbenchmark/packages/leaderboard/src/routes/leaderboard.ts +0 -698
- package/refs/vbenchmark/packages/leaderboard/src/routes/live.ts +0 -175
- package/refs/vbenchmark/packages/leaderboard/src/routes/submissions.ts +0 -183
- package/refs/vbenchmark/packages/leaderboard/src/routes/tasks.ts +0 -215
- package/refs/vbenchmark/packages/leaderboard/tests/api.test.ts +0 -228
- package/refs/vbenchmark/packages/leaderboard/tsconfig.json +0 -9
- package/refs/vbenchmark/scripts/deploy.sh +0 -70
- package/refs/vbenchmark/tasks/ai-integration/advanced/context-management/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/ai-integration/advanced/context-management/task.yaml +0 -16
- package/refs/vbenchmark/tasks/ai-integration/advanced/evaluation-framework/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/ai-integration/advanced/evaluation-framework/task.yaml +0 -16
- package/refs/vbenchmark/tasks/ai-integration/advanced/guardrails-safety/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/ai-integration/advanced/guardrails-safety/task.yaml +0 -16
- package/refs/vbenchmark/tasks/ai-integration/advanced/memory-system/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/ai-integration/advanced/memory-system/task.yaml +0 -16
- package/refs/vbenchmark/tasks/ai-integration/advanced/model-routing/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/ai-integration/advanced/model-routing/task.yaml +0 -16
- package/refs/vbenchmark/tasks/ai-integration/advanced/multi-agent-system/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/ai-integration/advanced/multi-agent-system/task.yaml +0 -16
- package/refs/vbenchmark/tasks/ai-integration/advanced/prompt-optimization/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/ai-integration/advanced/prompt-optimization/task.yaml +0 -16
- package/refs/vbenchmark/tasks/ai-integration/advanced/reasoning-chain/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/ai-integration/advanced/reasoning-chain/task.yaml +0 -16
- package/refs/vbenchmark/tasks/ai-integration/advanced/streaming-pipeline/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/ai-integration/advanced/streaming-pipeline/task.yaml +0 -16
- package/refs/vbenchmark/tasks/ai-integration/advanced/tool-use-orchestration/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/ai-integration/advanced/tool-use-orchestration/task.yaml +0 -16
- package/refs/vbenchmark/tasks/ai-integration/agents/code-review-agent/PROMPT.md +0 -64
- package/refs/vbenchmark/tasks/ai-integration/agents/code-review-agent/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/agents/research-agent/PROMPT.md +0 -61
- package/refs/vbenchmark/tasks/ai-integration/agents/research-agent/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/agents/web-scraper-agent/PROMPT.md +0 -57
- package/refs/vbenchmark/tasks/ai-integration/agents/web-scraper-agent/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/embeddings/duplicate-detection/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/ai-integration/embeddings/duplicate-detection/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/embeddings/recommendation-engine/PROMPT.md +0 -51
- package/refs/vbenchmark/tasks/ai-integration/embeddings/recommendation-engine/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/embeddings/semantic-search/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/ai-integration/embeddings/semantic-search/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/fine-tuning/classification-model/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/ai-integration/fine-tuning/classification-model/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/function-calling/api-orchestrator/PROMPT.md +0 -60
- package/refs/vbenchmark/tasks/ai-integration/function-calling/api-orchestrator/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/function-calling/calendar-assistant/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/ai-integration/function-calling/calendar-assistant/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/function-calling/database-query/PROMPT.md +0 -62
- package/refs/vbenchmark/tasks/ai-integration/function-calling/database-query/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/multimodal/chart-interpreter/PROMPT.md +0 -60
- package/refs/vbenchmark/tasks/ai-integration/multimodal/chart-interpreter/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/multimodal/image-captioning/PROMPT.md +0 -49
- package/refs/vbenchmark/tasks/ai-integration/multimodal/image-captioning/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/code-assistant/PROMPT.md +0 -51
- package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/code-assistant/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/doc-search/PROMPT.md +0 -51
- package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/doc-search/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/PROMPT.md +0 -76
- package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/docker-compose.yaml +0 -30
- package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/task.yaml +0 -30
- package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/tests/functional/qa.test.py +0 -146
- package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/support-bot/PROMPT.md +0 -51
- package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/support-bot/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/structured-output/contract-analyzer/PROMPT.md +0 -67
- package/refs/vbenchmark/tasks/ai-integration/structured-output/contract-analyzer/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/structured-output/invoice-parser/PROMPT.md +0 -61
- package/refs/vbenchmark/tasks/ai-integration/structured-output/invoice-parser/task.yaml +0 -27
- package/refs/vbenchmark/tasks/ai-integration/structured-output/receipt-scanner/PROMPT.md +0 -65
- package/refs/vbenchmark/tasks/ai-integration/structured-output/receipt-scanner/task.yaml +0 -24
- package/refs/vbenchmark/tasks/ai-integration/structured-output/resume-parser/PROMPT.md +0 -70
- package/refs/vbenchmark/tasks/ai-integration/structured-output/resume-parser/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/advanced/api-analytics/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/api-integrations/advanced/api-analytics/task.yaml +0 -16
- package/refs/vbenchmark/tasks/api-integrations/advanced/api-gateway/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/api-integrations/advanced/api-gateway/task.yaml +0 -16
- package/refs/vbenchmark/tasks/api-integrations/advanced/api-mocking/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/api-integrations/advanced/api-mocking/task.yaml +0 -16
- package/refs/vbenchmark/tasks/api-integrations/advanced/contract-testing/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/api-integrations/advanced/contract-testing/task.yaml +0 -16
- package/refs/vbenchmark/tasks/api-integrations/advanced/graphql-federation/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/api-integrations/advanced/graphql-federation/task.yaml +0 -16
- package/refs/vbenchmark/tasks/api-integrations/advanced/grpc-gateway/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/api-integrations/advanced/grpc-gateway/task.yaml +0 -16
- package/refs/vbenchmark/tasks/api-integrations/advanced/rate-limiter/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/api-integrations/advanced/rate-limiter/task.yaml +0 -16
- package/refs/vbenchmark/tasks/api-integrations/advanced/request-validator/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/api-integrations/advanced/request-validator/task.yaml +0 -16
- package/refs/vbenchmark/tasks/api-integrations/advanced/sdk-generator/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/api-integrations/advanced/sdk-generator/task.yaml +0 -16
- package/refs/vbenchmark/tasks/api-integrations/advanced/webhook-processor/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/api-integrations/advanced/webhook-processor/task.yaml +0 -16
- package/refs/vbenchmark/tasks/api-integrations/analytics/mixpanel-events/PROMPT.md +0 -42
- package/refs/vbenchmark/tasks/api-integrations/analytics/mixpanel-events/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/analytics/segment-tracking/PROMPT.md +0 -42
- package/refs/vbenchmark/tasks/api-integrations/analytics/segment-tracking/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/auth-provider/oauth2-github/PROMPT.md +0 -42
- package/refs/vbenchmark/tasks/api-integrations/auth-provider/oauth2-github/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/auth-provider/okta-integration/PROMPT.md +0 -44
- package/refs/vbenchmark/tasks/api-integrations/auth-provider/okta-integration/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/auth-provider/saml-sso/PROMPT.md +0 -42
- package/refs/vbenchmark/tasks/api-integrations/auth-provider/saml-sso/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/communication/discord-webhook/PROMPT.md +0 -44
- package/refs/vbenchmark/tasks/api-integrations/communication/discord-webhook/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/communication/slack-bot/PROMPT.md +0 -42
- package/refs/vbenchmark/tasks/api-integrations/communication/slack-bot/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/communication/twilio-sms/PROMPT.md +0 -42
- package/refs/vbenchmark/tasks/api-integrations/communication/twilio-sms/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/email/transactional/PROMPT.md +0 -82
- package/refs/vbenchmark/tasks/api-integrations/email/transactional/task.yaml +0 -27
- package/refs/vbenchmark/tasks/api-integrations/maps/google-maps-geocoding/PROMPT.md +0 -41
- package/refs/vbenchmark/tasks/api-integrations/maps/google-maps-geocoding/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/maps/mapbox-directions/PROMPT.md +0 -41
- package/refs/vbenchmark/tasks/api-integrations/maps/mapbox-directions/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/payment/crypto-payments/PROMPT.md +0 -43
- package/refs/vbenchmark/tasks/api-integrations/payment/crypto-payments/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/payment/paypal-integration/PROMPT.md +0 -41
- package/refs/vbenchmark/tasks/api-integrations/payment/paypal-integration/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/social/twitter-api/PROMPT.md +0 -41
- package/refs/vbenchmark/tasks/api-integrations/social/twitter-api/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/storage/cloudinary-upload/PROMPT.md +0 -43
- package/refs/vbenchmark/tasks/api-integrations/storage/cloudinary-upload/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/storage/gcs-streaming/PROMPT.md +0 -43
- package/refs/vbenchmark/tasks/api-integrations/storage/gcs-streaming/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/storage/s3-presigned-urls/PROMPT.md +0 -41
- package/refs/vbenchmark/tasks/api-integrations/storage/s3-presigned-urls/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/stripe/checkout-session/PROMPT.md +0 -41
- package/refs/vbenchmark/tasks/api-integrations/stripe/checkout-session/task.yaml +0 -24
- package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/PROMPT.md +0 -60
- package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/docker-compose.yaml +0 -38
- package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/task.yaml +0 -31
- package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/tests/webhook.test.ts +0 -193
- package/refs/vbenchmark/tasks/api-integrations/stripe/subscription-portal/PROMPT.md +0 -41
- package/refs/vbenchmark/tasks/api-integrations/stripe/subscription-portal/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/advanced/api-deprecation/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/code-evolution/advanced/api-deprecation/task.yaml +0 -16
- package/refs/vbenchmark/tasks/code-evolution/advanced/ast-refactoring/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/code-evolution/advanced/ast-refactoring/task.yaml +0 -16
- package/refs/vbenchmark/tasks/code-evolution/advanced/concurrency-fix/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/code-evolution/advanced/concurrency-fix/task.yaml +0 -16
- package/refs/vbenchmark/tasks/code-evolution/advanced/database-schema-migration/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/code-evolution/advanced/database-schema-migration/task.yaml +0 -16
- package/refs/vbenchmark/tasks/code-evolution/advanced/dead-code-elimination/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/code-evolution/advanced/dead-code-elimination/task.yaml +0 -16
- package/refs/vbenchmark/tasks/code-evolution/advanced/dependency-upgrade/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/code-evolution/advanced/dependency-upgrade/task.yaml +0 -16
- package/refs/vbenchmark/tasks/code-evolution/advanced/memory-optimization/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/code-evolution/advanced/memory-optimization/task.yaml +0 -16
- package/refs/vbenchmark/tasks/code-evolution/advanced/monorepo-extraction/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/code-evolution/advanced/monorepo-extraction/task.yaml +0 -16
- package/refs/vbenchmark/tasks/code-evolution/advanced/performance-profiling/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/code-evolution/advanced/performance-profiling/task.yaml +0 -16
- package/refs/vbenchmark/tasks/code-evolution/advanced/type-migration/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/code-evolution/advanced/type-migration/task.yaml +0 -16
- package/refs/vbenchmark/tasks/code-evolution/legacy-migration/callback-to-async/PROMPT.md +0 -47
- package/refs/vbenchmark/tasks/code-evolution/legacy-migration/callback-to-async/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/PROMPT.md +0 -49
- package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/base-code/src/app.ts +0 -22
- package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/task.yaml +0 -37
- package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/tests/api.test.ts +0 -70
- package/refs/vbenchmark/tasks/code-evolution/legacy-migration/flask-to-fastapi/PROMPT.md +0 -46
- package/refs/vbenchmark/tasks/code-evolution/legacy-migration/flask-to-fastapi/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/legacy-migration/java-to-kotlin/PROMPT.md +0 -45
- package/refs/vbenchmark/tasks/code-evolution/legacy-migration/java-to-kotlin/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/legacy-migration/jquery-to-react/PROMPT.md +0 -47
- package/refs/vbenchmark/tasks/code-evolution/legacy-migration/jquery-to-react/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/legacy-migration/rest-to-grpc/PROMPT.md +0 -47
- package/refs/vbenchmark/tasks/code-evolution/legacy-migration/rest-to-grpc/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/performance/async-refactor/PROMPT.md +0 -47
- package/refs/vbenchmark/tasks/code-evolution/performance/async-refactor/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/performance/memory-leak-fix/PROMPT.md +0 -47
- package/refs/vbenchmark/tasks/code-evolution/performance/memory-leak-fix/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/performance/query-optimization/PROMPT.md +0 -49
- package/refs/vbenchmark/tasks/code-evolution/performance/query-optimization/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/refactoring/class-to-hooks/PROMPT.md +0 -96
- package/refs/vbenchmark/tasks/code-evolution/refactoring/class-to-hooks/task.yaml +0 -27
- package/refs/vbenchmark/tasks/code-evolution/refactoring/dependency-injection/PROMPT.md +0 -47
- package/refs/vbenchmark/tasks/code-evolution/refactoring/dependency-injection/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/refactoring/error-handling/PROMPT.md +0 -48
- package/refs/vbenchmark/tasks/code-evolution/refactoring/error-handling/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/refactoring/monolith-to-modules/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/code-evolution/refactoring/monolith-to-modules/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/refactoring/orm-migration/PROMPT.md +0 -47
- package/refs/vbenchmark/tasks/code-evolution/refactoring/orm-migration/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/security/secrets-rotation/PROMPT.md +0 -49
- package/refs/vbenchmark/tasks/code-evolution/security/secrets-rotation/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/security/sql-injection-fix/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/code-evolution/security/sql-injection-fix/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/security/xss-prevention/PROMPT.md +0 -47
- package/refs/vbenchmark/tasks/code-evolution/security/xss-prevention/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/testing/add-unit-tests/PROMPT.md +0 -48
- package/refs/vbenchmark/tasks/code-evolution/testing/add-unit-tests/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/testing/e2e-playwright/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/code-evolution/testing/e2e-playwright/task.yaml +0 -24
- package/refs/vbenchmark/tasks/code-evolution/testing/pytest-fixtures/PROMPT.md +0 -47
- package/refs/vbenchmark/tasks/code-evolution/testing/pytest-fixtures/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/accessibility/keyboard-shortcuts/PROMPT.md +0 -44
- package/refs/vbenchmark/tasks/frontend/accessibility/keyboard-shortcuts/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/accessibility/screen-reader-nav/PROMPT.md +0 -44
- package/refs/vbenchmark/tasks/frontend/accessibility/screen-reader-nav/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/advanced/canvas-editor/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/frontend/advanced/canvas-editor/task.yaml +0 -16
- package/refs/vbenchmark/tasks/frontend/advanced/micro-frontend/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/frontend/advanced/micro-frontend/task.yaml +0 -16
- package/refs/vbenchmark/tasks/frontend/advanced/offline-first/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/frontend/advanced/offline-first/task.yaml +0 -16
- package/refs/vbenchmark/tasks/frontend/advanced/realtime-collab/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/frontend/advanced/realtime-collab/task.yaml +0 -16
- package/refs/vbenchmark/tasks/frontend/advanced/service-worker/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/frontend/advanced/service-worker/task.yaml +0 -16
- package/refs/vbenchmark/tasks/frontend/advanced/state-machine/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/frontend/advanced/state-machine/task.yaml +0 -16
- package/refs/vbenchmark/tasks/frontend/advanced/virtual-list/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/frontend/advanced/virtual-list/task.yaml +0 -16
- package/refs/vbenchmark/tasks/frontend/advanced/wasm-integration/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/frontend/advanced/wasm-integration/task.yaml +0 -16
- package/refs/vbenchmark/tasks/frontend/advanced/web-worker/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/frontend/advanced/web-worker/task.yaml +0 -16
- package/refs/vbenchmark/tasks/frontend/advanced/webgl-visualization/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/frontend/advanced/webgl-visualization/task.yaml +0 -16
- package/refs/vbenchmark/tasks/frontend/animation/page-transitions/PROMPT.md +0 -44
- package/refs/vbenchmark/tasks/frontend/animation/page-transitions/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/components/data-grid/PROMPT.md +0 -59
- package/refs/vbenchmark/tasks/frontend/components/data-grid/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/components/date-range-picker/PROMPT.md +0 -57
- package/refs/vbenchmark/tasks/frontend/components/date-range-picker/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/components/file-uploader/PROMPT.md +0 -55
- package/refs/vbenchmark/tasks/frontend/components/file-uploader/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/components/form-builder/PROMPT.md +0 -96
- package/refs/vbenchmark/tasks/frontend/components/form-builder/task.yaml +0 -28
- package/refs/vbenchmark/tasks/frontend/components/rich-text-editor/PROMPT.md +0 -45
- package/refs/vbenchmark/tasks/frontend/components/rich-text-editor/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/figma-to-code/dashboard-layout/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/frontend/figma-to-code/dashboard-layout/task.yaml +0 -25
- package/refs/vbenchmark/tasks/frontend/figma-to-code/landing-page/PROMPT.md +0 -49
- package/refs/vbenchmark/tasks/frontend/figma-to-code/landing-page/task.yaml +0 -25
- package/refs/vbenchmark/tasks/frontend/figma-to-code/mobile-app-screen/PROMPT.md +0 -51
- package/refs/vbenchmark/tasks/frontend/figma-to-code/mobile-app-screen/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/PROMPT.md +0 -93
- package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/docker-compose.yaml +0 -23
- package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/task.yaml +0 -30
- package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/tests/visual/diff.test.ts +0 -107
- package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/tests/visual/interaction.test.ts +0 -88
- package/refs/vbenchmark/tasks/frontend/performance/image-lazy-load/PROMPT.md +0 -43
- package/refs/vbenchmark/tasks/frontend/performance/image-lazy-load/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/performance/infinite-scroll/PROMPT.md +0 -44
- package/refs/vbenchmark/tasks/frontend/performance/infinite-scroll/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/state-management/collaborative-editor/PROMPT.md +0 -44
- package/refs/vbenchmark/tasks/frontend/state-management/collaborative-editor/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/state-management/shopping-cart/PROMPT.md +0 -53
- package/refs/vbenchmark/tasks/frontend/state-management/shopping-cart/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/visualization/chart-dashboard/PROMPT.md +0 -83
- package/refs/vbenchmark/tasks/frontend/visualization/chart-dashboard/task.yaml +0 -28
- package/refs/vbenchmark/tasks/frontend/visualization/gantt-chart/PROMPT.md +0 -57
- package/refs/vbenchmark/tasks/frontend/visualization/gantt-chart/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/visualization/map-dashboard/PROMPT.md +0 -44
- package/refs/vbenchmark/tasks/frontend/visualization/map-dashboard/task.yaml +0 -24
- package/refs/vbenchmark/tasks/frontend/visualization/realtime-charts/PROMPT.md +0 -43
- package/refs/vbenchmark/tasks/frontend/visualization/realtime-charts/task.yaml +0 -24
- package/refs/vbenchmark/tasks/glue-code/advanced/blue-green-deploy/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/glue-code/advanced/blue-green-deploy/task.yaml +0 -16
- package/refs/vbenchmark/tasks/glue-code/advanced/canary-release/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/glue-code/advanced/canary-release/task.yaml +0 -16
- package/refs/vbenchmark/tasks/glue-code/advanced/change-data-capture/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/glue-code/advanced/change-data-capture/task.yaml +0 -16
- package/refs/vbenchmark/tasks/glue-code/advanced/config-management/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/glue-code/advanced/config-management/task.yaml +0 -16
- package/refs/vbenchmark/tasks/glue-code/advanced/data-pipeline/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/glue-code/advanced/data-pipeline/task.yaml +0 -16
- package/refs/vbenchmark/tasks/glue-code/advanced/distributed-tracing/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/glue-code/advanced/distributed-tracing/task.yaml +0 -16
- package/refs/vbenchmark/tasks/glue-code/advanced/log-aggregation/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/glue-code/advanced/log-aggregation/task.yaml +0 -16
- package/refs/vbenchmark/tasks/glue-code/advanced/schema-registry/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/glue-code/advanced/schema-registry/task.yaml +0 -16
- package/refs/vbenchmark/tasks/glue-code/advanced/secret-rotation/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/glue-code/advanced/secret-rotation/task.yaml +0 -16
- package/refs/vbenchmark/tasks/glue-code/advanced/stream-processing/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/glue-code/advanced/stream-processing/task.yaml +0 -16
- package/refs/vbenchmark/tasks/glue-code/api-sync/rest-to-graphql/PROMPT.md +0 -66
- package/refs/vbenchmark/tasks/glue-code/api-sync/rest-to-graphql/task.yaml +0 -27
- package/refs/vbenchmark/tasks/glue-code/caching/redis-cache/PROMPT.md +0 -82
- package/refs/vbenchmark/tasks/glue-code/caching/redis-cache/task.yaml +0 -27
- package/refs/vbenchmark/tasks/glue-code/data-transform/avro-schema-evolution/PROMPT.md +0 -51
- package/refs/vbenchmark/tasks/glue-code/data-transform/avro-schema-evolution/task.yaml +0 -24
- package/refs/vbenchmark/tasks/glue-code/data-transform/csv-normalizer/PROMPT.md +0 -49
- package/refs/vbenchmark/tasks/glue-code/data-transform/csv-normalizer/task.yaml +0 -24
- package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/PROMPT.md +0 -67
- package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/task.yaml +0 -28
- package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/tests/transform.test.py +0 -137
- package/refs/vbenchmark/tasks/glue-code/data-transform/json-to-xml/PROMPT.md +0 -45
- package/refs/vbenchmark/tasks/glue-code/data-transform/json-to-xml/task.yaml +0 -24
- package/refs/vbenchmark/tasks/glue-code/data-transform/protobuf-converter/PROMPT.md +0 -44
- package/refs/vbenchmark/tasks/glue-code/data-transform/protobuf-converter/task.yaml +0 -24
- package/refs/vbenchmark/tasks/glue-code/etl/cdc-pipeline/PROMPT.md +0 -52
- package/refs/vbenchmark/tasks/glue-code/etl/cdc-pipeline/task.yaml +0 -27
- package/refs/vbenchmark/tasks/glue-code/etl/database-sync/PROMPT.md +0 -51
- package/refs/vbenchmark/tasks/glue-code/etl/database-sync/task.yaml +0 -24
- package/refs/vbenchmark/tasks/glue-code/etl/s3-to-warehouse/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/glue-code/etl/s3-to-warehouse/task.yaml +0 -24
- package/refs/vbenchmark/tasks/glue-code/file-processing/image-resizer/PROMPT.md +0 -52
- package/refs/vbenchmark/tasks/glue-code/file-processing/image-resizer/task.yaml +0 -24
- package/refs/vbenchmark/tasks/glue-code/file-processing/pdf-merger/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/glue-code/file-processing/pdf-merger/task.yaml +0 -24
- package/refs/vbenchmark/tasks/glue-code/file-processing/video-transcoder/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/glue-code/file-processing/video-transcoder/task.yaml +0 -27
- package/refs/vbenchmark/tasks/glue-code/migration/data-backfill/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/glue-code/migration/data-backfill/task.yaml +0 -24
- package/refs/vbenchmark/tasks/glue-code/migration/database-versioning/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/glue-code/migration/database-versioning/task.yaml +0 -24
- package/refs/vbenchmark/tasks/glue-code/queue/kafka-producer/PROMPT.md +0 -49
- package/refs/vbenchmark/tasks/glue-code/queue/kafka-producer/task.yaml +0 -27
- package/refs/vbenchmark/tasks/glue-code/queue/rabbitmq-consumer/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/glue-code/queue/rabbitmq-consumer/task.yaml +0 -27
- package/refs/vbenchmark/tasks/glue-code/queue/sqs-batch-processor/PROMPT.md +0 -47
- package/refs/vbenchmark/tasks/glue-code/queue/sqs-batch-processor/task.yaml +0 -24
- package/refs/vbenchmark/tasks/glue-code/scheduler/cron-job-manager/PROMPT.md +0 -52
- package/refs/vbenchmark/tasks/glue-code/scheduler/cron-job-manager/task.yaml +0 -27
- package/refs/vbenchmark/tasks/glue-code/scheduler/delayed-tasks/PROMPT.md +0 -51
- package/refs/vbenchmark/tasks/glue-code/scheduler/delayed-tasks/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/advanced/api-versioning/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/saas-core/advanced/api-versioning/task.yaml +0 -16
- package/refs/vbenchmark/tasks/saas-core/advanced/circuit-breaker/PROMPT.md +0 -13
- package/refs/vbenchmark/tasks/saas-core/advanced/circuit-breaker/task.yaml +0 -16
- package/refs/vbenchmark/tasks/saas-core/advanced/compliance-gdpr/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/saas-core/advanced/compliance-gdpr/task.yaml +0 -16
- package/refs/vbenchmark/tasks/saas-core/advanced/cqrs-pattern/PROMPT.md +0 -13
- package/refs/vbenchmark/tasks/saas-core/advanced/cqrs-pattern/task.yaml +0 -16
- package/refs/vbenchmark/tasks/saas-core/advanced/data-encryption/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/saas-core/advanced/data-encryption/task.yaml +0 -16
- package/refs/vbenchmark/tasks/saas-core/advanced/distributed-locking/PROMPT.md +0 -46
- package/refs/vbenchmark/tasks/saas-core/advanced/distributed-locking/task.yaml +0 -24
- package/refs/vbenchmark/tasks/saas-core/advanced/event-sourcing/PROMPT.md +0 -23
- package/refs/vbenchmark/tasks/saas-core/advanced/event-sourcing/task.yaml +0 -16
- package/refs/vbenchmark/tasks/saas-core/advanced/feature-flags-ab/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/saas-core/advanced/feature-flags-ab/task.yaml +0 -16
- package/refs/vbenchmark/tasks/saas-core/advanced/saga-orchestration/PROMPT.md +0 -13
- package/refs/vbenchmark/tasks/saas-core/advanced/saga-orchestration/task.yaml +0 -16
- package/refs/vbenchmark/tasks/saas-core/advanced/webhook-delivery/PROMPT.md +0 -15
- package/refs/vbenchmark/tasks/saas-core/advanced/webhook-delivery/task.yaml +0 -16
- package/refs/vbenchmark/tasks/saas-core/audit/activity-logging/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/saas-core/audit/activity-logging/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/auth/jwt-refresh-tokens/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/saas-core/auth/jwt-refresh-tokens/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/auth/magic-link-email/PROMPT.md +0 -53
- package/refs/vbenchmark/tasks/saas-core/auth/magic-link-email/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/auth/mfa-totp/PROMPT.md +0 -79
- package/refs/vbenchmark/tasks/saas-core/auth/mfa-totp/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/auth/rbac-permissions/PROMPT.md +0 -51
- package/refs/vbenchmark/tasks/saas-core/auth/rbac-permissions/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/auth/session-management/PROMPT.md +0 -52
- package/refs/vbenchmark/tasks/saas-core/auth/session-management/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/PROMPT.md +0 -45
- package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/docker-compose.yaml +0 -47
- package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/task.yaml +0 -32
- package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/tests/auth.test.ts +0 -59
- package/refs/vbenchmark/tasks/saas-core/billing/invoice-generation/PROMPT.md +0 -53
- package/refs/vbenchmark/tasks/saas-core/billing/invoice-generation/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/billing/stripe-subscriptions/PROMPT.md +0 -51
- package/refs/vbenchmark/tasks/saas-core/billing/stripe-subscriptions/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/billing/usage-metering/PROMPT.md +0 -52
- package/refs/vbenchmark/tasks/saas-core/billing/usage-metering/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/crud/dashboard-table/PROMPT.md +0 -48
- package/refs/vbenchmark/tasks/saas-core/crud/dashboard-table/task.yaml +0 -28
- package/refs/vbenchmark/tasks/saas-core/multi-tenant/org-isolation/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/saas-core/multi-tenant/org-isolation/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/multi-tenant/subdomain-routing/PROMPT.md +0 -50
- package/refs/vbenchmark/tasks/saas-core/multi-tenant/subdomain-routing/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/notifications/email-queue/PROMPT.md +0 -53
- package/refs/vbenchmark/tasks/saas-core/notifications/email-queue/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/notifications/in-app-alerts/PROMPT.md +0 -51
- package/refs/vbenchmark/tasks/saas-core/notifications/in-app-alerts/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/notifications/push-notifications/PROMPT.md +0 -51
- package/refs/vbenchmark/tasks/saas-core/notifications/push-notifications/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/realtime/websocket-chat/PROMPT.md +0 -80
- package/refs/vbenchmark/tasks/saas-core/realtime/websocket-chat/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/search/full-text-search/PROMPT.md +0 -51
- package/refs/vbenchmark/tasks/saas-core/search/full-text-search/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/security/rate-limiter/PROMPT.md +0 -99
- package/refs/vbenchmark/tasks/saas-core/security/rate-limiter/task.yaml +0 -27
- package/refs/vbenchmark/tasks/saas-core/settings/user-preferences/PROMPT.md +0 -78
- package/refs/vbenchmark/tasks/saas-core/settings/user-preferences/task.yaml +0 -27
- package/refs/vbenchmark/templates/fastapi-postgres/docker-compose.yaml +0 -36
- package/refs/vbenchmark/templates/fastapi-postgres/pyproject.toml +0 -34
- package/refs/vbenchmark/templates/fastapi-postgres/src/__init__.py +0 -0
- package/refs/vbenchmark/templates/fastapi-postgres/src/config.py +0 -12
- package/refs/vbenchmark/templates/fastapi-postgres/src/database.py +0 -15
- package/refs/vbenchmark/templates/fastapi-postgres/src/main.py +0 -51
- package/refs/vbenchmark/templates/fastapi-postgres/src/models.py +0 -12
- package/refs/vbenchmark/templates/fastapi-postgres/src/schemas.py +0 -20
- package/refs/vbenchmark/templates/go-fiber/docker-compose.yaml +0 -34
- package/refs/vbenchmark/templates/go-fiber/go.mod +0 -33
- package/refs/vbenchmark/templates/go-fiber/go.sum +0 -68
- package/refs/vbenchmark/templates/go-fiber/main.go +0 -98
- package/refs/vbenchmark/templates/nextjs-supabase/.env.example +0 -3
- package/refs/vbenchmark/templates/nextjs-supabase/docker-compose.yaml +0 -68
- package/refs/vbenchmark/templates/nextjs-supabase/src/app/globals.css +0 -13
- package/refs/vbenchmark/templates/nextjs-supabase/src/app/layout.tsx +0 -19
- package/refs/vbenchmark/templates/nextjs-supabase/src/app/page.tsx +0 -38
- package/refs/vbenchmark/templates/nextjs-supabase/src/lib/supabase/client.ts +0 -8
- package/refs/vbenchmark/templates/nextjs-supabase/src/lib/supabase/server.ts +0 -32
- package/refs/vbenchmark/templates/rust-axum/Cargo.lock +0 -2371
- package/refs/vbenchmark/templates/rust-axum/Cargo.toml +0 -16
- package/refs/vbenchmark/templates/rust-axum/docker-compose.yaml +0 -34
- package/refs/vbenchmark/templates/rust-axum/migrations/20240101000000_init.sql +0 -20
- package/refs/vbenchmark/templates/rust-axum/src/main.rs +0 -121
- package/refs/vbenchmark/tsconfig.base.json +0 -18
- package/refs/vbenchmark/turbo.json +0 -23
- package/refs/vbenchmark/vercel.json +0 -10
|
@@ -1,1468 +0,0 @@
|
|
|
1
|
-
import { Routes, Route, Link, useLocation } from 'react-router-dom';
|
|
2
|
-
import { useState, useRef, useMemo } from 'react';
|
|
3
|
-
import {
|
|
4
|
-
Chart as ChartJS,
|
|
5
|
-
CategoryScale,
|
|
6
|
-
LinearScale,
|
|
7
|
-
BarElement,
|
|
8
|
-
PointElement,
|
|
9
|
-
LineElement,
|
|
10
|
-
ArcElement,
|
|
11
|
-
RadialLinearScale,
|
|
12
|
-
Title,
|
|
13
|
-
Tooltip,
|
|
14
|
-
Legend,
|
|
15
|
-
Filler,
|
|
16
|
-
} from 'chart.js';
|
|
17
|
-
import { Bar, Radar, Scatter, Line } from 'react-chartjs-2';
|
|
18
|
-
|
|
19
|
-
// Static data imports for GitHub Pages hosting
|
|
20
|
-
import leaderboardData from './data/leaderboard.json';
|
|
21
|
-
import categoryPerformanceData from './data/category-performance.json';
|
|
22
|
-
import taskResultsData from './data/task-results.json';
|
|
23
|
-
import tasksData from './data/tasks.json';
|
|
24
|
-
|
|
25
|
-
ChartJS.register(
|
|
26
|
-
CategoryScale,
|
|
27
|
-
LinearScale,
|
|
28
|
-
BarElement,
|
|
29
|
-
PointElement,
|
|
30
|
-
LineElement,
|
|
31
|
-
ArcElement,
|
|
32
|
-
RadialLinearScale,
|
|
33
|
-
Title,
|
|
34
|
-
Tooltip,
|
|
35
|
-
Legend,
|
|
36
|
-
Filler
|
|
37
|
-
);
|
|
38
|
-
|
|
39
|
-
// shadcn/ui inspired color system with CSS variables pattern
|
|
40
|
-
const colors = {
|
|
41
|
-
primary: '#4285f4',
|
|
42
|
-
primaryForeground: '#ffffff',
|
|
43
|
-
secondary: '#f1f5f9',
|
|
44
|
-
secondaryForeground: '#0f172a',
|
|
45
|
-
muted: '#f8fafc',
|
|
46
|
-
mutedForeground: '#64748b',
|
|
47
|
-
accent: '#f1f5f9',
|
|
48
|
-
accentForeground: '#0f172a',
|
|
49
|
-
destructive: '#ef4444',
|
|
50
|
-
border: '#e2e8f0',
|
|
51
|
-
input: '#e2e8f0',
|
|
52
|
-
ring: '#4285f4',
|
|
53
|
-
background: '#ffffff',
|
|
54
|
-
foreground: '#0f172a',
|
|
55
|
-
card: '#ffffff',
|
|
56
|
-
cardForeground: '#0f172a',
|
|
57
|
-
success: '#22c55e',
|
|
58
|
-
warning: '#f59e0b',
|
|
59
|
-
};
|
|
60
|
-
|
|
61
|
-
// Chart color palette - distinct and accessible
|
|
62
|
-
const CHART_COLORS = [
|
|
63
|
-
'#4285f4', '#22c55e', '#f59e0b', '#ef4444', '#8b5cf6',
|
|
64
|
-
'#06b6d4', '#f97316', '#64748b', '#ec4899', '#6366f1',
|
|
65
|
-
'#14b8a6', '#a855f7', '#84cc16', '#0ea5e9',
|
|
66
|
-
];
|
|
67
|
-
|
|
68
|
-
interface LeaderboardEntry {
|
|
69
|
-
rank: number;
|
|
70
|
-
agentName: string;
|
|
71
|
-
agentVersion: string;
|
|
72
|
-
modelName?: string;
|
|
73
|
-
avgScore: number;
|
|
74
|
-
avgFunctional: number;
|
|
75
|
-
avgQuality: number;
|
|
76
|
-
avgCost: number;
|
|
77
|
-
tasksCompleted: number;
|
|
78
|
-
passedTasks?: number;
|
|
79
|
-
failedTasks?: number;
|
|
80
|
-
totalTokens: number;
|
|
81
|
-
inputTokens?: number;
|
|
82
|
-
outputTokens?: number;
|
|
83
|
-
totalCostUSD?: number;
|
|
84
|
-
avgTimeMs?: number;
|
|
85
|
-
pricingInput?: number;
|
|
86
|
-
pricingOutput?: number;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
// HoverCard Component - shadcn/ui pattern (for inline elements)
|
|
90
|
-
function HoverCard({ children, content }: { children: React.ReactNode; content: React.ReactNode }) {
|
|
91
|
-
const [isOpen, setIsOpen] = useState(false);
|
|
92
|
-
const [position, setPosition] = useState({ x: 0, y: 0 });
|
|
93
|
-
const triggerRef = useRef<HTMLSpanElement>(null);
|
|
94
|
-
const timeoutRef = useRef<NodeJS.Timeout>();
|
|
95
|
-
|
|
96
|
-
const handleMouseEnter = () => {
|
|
97
|
-
clearTimeout(timeoutRef.current);
|
|
98
|
-
timeoutRef.current = setTimeout(() => {
|
|
99
|
-
if (triggerRef.current) {
|
|
100
|
-
const rect = triggerRef.current.getBoundingClientRect();
|
|
101
|
-
setPosition({ x: rect.left + rect.width / 2, y: rect.top });
|
|
102
|
-
}
|
|
103
|
-
setIsOpen(true);
|
|
104
|
-
}, 200);
|
|
105
|
-
};
|
|
106
|
-
|
|
107
|
-
const handleMouseLeave = () => {
|
|
108
|
-
clearTimeout(timeoutRef.current);
|
|
109
|
-
timeoutRef.current = setTimeout(() => setIsOpen(false), 100);
|
|
110
|
-
};
|
|
111
|
-
|
|
112
|
-
return (
|
|
113
|
-
<span ref={triggerRef} onMouseEnter={handleMouseEnter} onMouseLeave={handleMouseLeave} className="inline-block">
|
|
114
|
-
{children}
|
|
115
|
-
{isOpen && (
|
|
116
|
-
<div
|
|
117
|
-
className="fixed z-50 animate-in fade-in-0 zoom-in-95"
|
|
118
|
-
style={{ left: position.x, top: position.y - 8, transform: 'translate(-50%, -100%)' }}
|
|
119
|
-
onMouseEnter={() => clearTimeout(timeoutRef.current)}
|
|
120
|
-
onMouseLeave={handleMouseLeave}
|
|
121
|
-
>
|
|
122
|
-
<div className="bg-white rounded-lg border border-slate-200 shadow-lg p-4 min-w-[280px] max-w-[350px]">
|
|
123
|
-
{content}
|
|
124
|
-
</div>
|
|
125
|
-
</div>
|
|
126
|
-
)}
|
|
127
|
-
</span>
|
|
128
|
-
);
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
// HoverRow Component - for table rows
|
|
132
|
-
function HoverRow({ children, content, className = '' }: {
|
|
133
|
-
children: React.ReactNode;
|
|
134
|
-
content: React.ReactNode;
|
|
135
|
-
className?: string;
|
|
136
|
-
}) {
|
|
137
|
-
const [isOpen, setIsOpen] = useState(false);
|
|
138
|
-
const [position, setPosition] = useState({ x: 0, y: 0 });
|
|
139
|
-
const rowRef = useRef<HTMLTableRowElement>(null);
|
|
140
|
-
const timeoutRef = useRef<NodeJS.Timeout>();
|
|
141
|
-
|
|
142
|
-
const handleMouseEnter = () => {
|
|
143
|
-
clearTimeout(timeoutRef.current);
|
|
144
|
-
timeoutRef.current = setTimeout(() => {
|
|
145
|
-
if (rowRef.current) {
|
|
146
|
-
const rect = rowRef.current.getBoundingClientRect();
|
|
147
|
-
setPosition({ x: rect.left + rect.width / 2, y: rect.top });
|
|
148
|
-
}
|
|
149
|
-
setIsOpen(true);
|
|
150
|
-
}, 300);
|
|
151
|
-
};
|
|
152
|
-
|
|
153
|
-
const handleMouseLeave = () => {
|
|
154
|
-
clearTimeout(timeoutRef.current);
|
|
155
|
-
timeoutRef.current = setTimeout(() => setIsOpen(false), 100);
|
|
156
|
-
};
|
|
157
|
-
|
|
158
|
-
return (
|
|
159
|
-
<tr
|
|
160
|
-
ref={rowRef}
|
|
161
|
-
onMouseEnter={handleMouseEnter}
|
|
162
|
-
onMouseLeave={handleMouseLeave}
|
|
163
|
-
className={className}
|
|
164
|
-
>
|
|
165
|
-
{children}
|
|
166
|
-
{isOpen && (
|
|
167
|
-
<td className="absolute" style={{ padding: 0, border: 'none' }}>
|
|
168
|
-
<div
|
|
169
|
-
className="fixed z-50 animate-in fade-in-0 zoom-in-95"
|
|
170
|
-
style={{ left: position.x, top: position.y - 8, transform: 'translate(-50%, -100%)' }}
|
|
171
|
-
onMouseEnter={() => clearTimeout(timeoutRef.current)}
|
|
172
|
-
onMouseLeave={handleMouseLeave}
|
|
173
|
-
>
|
|
174
|
-
<div className="bg-white rounded-lg border border-slate-200 shadow-lg p-4 min-w-[280px] max-w-[350px]">
|
|
175
|
-
{content}
|
|
176
|
-
</div>
|
|
177
|
-
</div>
|
|
178
|
-
</td>
|
|
179
|
-
)}
|
|
180
|
-
</tr>
|
|
181
|
-
);
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
// Card Component - shadcn/ui pattern
|
|
185
|
-
function Card({ children, className = '', hover = false }: { children: React.ReactNode; className?: string; hover?: boolean }) {
|
|
186
|
-
return (
|
|
187
|
-
<div className={`bg-white rounded-xl border border-slate-200 shadow-sm ${hover ? 'hover:shadow-md hover:border-slate-300 transition-all duration-200' : ''} ${className}`}>
|
|
188
|
-
{children}
|
|
189
|
-
</div>
|
|
190
|
-
);
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
function CardHeader({ children, className = '' }: { children: React.ReactNode; className?: string }) {
|
|
194
|
-
return <div className={`px-6 py-4 border-b border-slate-100 ${className}`}>{children}</div>;
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
function CardTitle({ children, className = '' }: { children: React.ReactNode; className?: string }) {
|
|
198
|
-
return <h3 className={`text-sm font-semibold text-slate-900 ${className}`}>{children}</h3>;
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
function CardDescription({ children }: { children: React.ReactNode }) {
|
|
202
|
-
return <p className="text-xs text-slate-500 mt-0.5">{children}</p>;
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
function CardContent({ children, className = '' }: { children: React.ReactNode; className?: string }) {
|
|
206
|
-
return <div className={`p-6 ${className}`}>{children}</div>;
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
// Badge Component
|
|
210
|
-
function Badge({ children, variant = 'default' }: { children: React.ReactNode; variant?: 'default' | 'success' | 'warning' | 'destructive' | 'outline' }) {
|
|
211
|
-
const variants = {
|
|
212
|
-
default: 'bg-slate-100 text-slate-900',
|
|
213
|
-
success: 'bg-emerald-50 text-emerald-700 border-emerald-200',
|
|
214
|
-
warning: 'bg-amber-50 text-amber-700 border-amber-200',
|
|
215
|
-
destructive: 'bg-red-50 text-red-700 border-red-200',
|
|
216
|
-
outline: 'bg-transparent border-slate-200 text-slate-700',
|
|
217
|
-
};
|
|
218
|
-
return (
|
|
219
|
-
<span className={`inline-flex items-center px-2 py-0.5 text-xs font-medium rounded-md border ${variants[variant]}`}>
|
|
220
|
-
{children}
|
|
221
|
-
</span>
|
|
222
|
-
);
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
// Stat Card with shadcn/ui styling
|
|
226
|
-
function StatCard({ label, value, subtext, icon, trend }: {
|
|
227
|
-
label: string;
|
|
228
|
-
value: string | number;
|
|
229
|
-
subtext?: string;
|
|
230
|
-
icon?: React.ReactNode;
|
|
231
|
-
trend?: { value: number; label: string };
|
|
232
|
-
}) {
|
|
233
|
-
return (
|
|
234
|
-
<Card hover>
|
|
235
|
-
<CardContent className="p-5">
|
|
236
|
-
<div className="flex items-start justify-between">
|
|
237
|
-
<div className="space-y-1">
|
|
238
|
-
<p className="text-xs font-medium text-slate-500 uppercase tracking-wider">{label}</p>
|
|
239
|
-
<p className="text-2xl font-semibold text-slate-900">{value}</p>
|
|
240
|
-
{subtext && <p className="text-xs text-slate-500">{subtext}</p>}
|
|
241
|
-
{trend && (
|
|
242
|
-
<div className={`flex items-center gap-1 text-xs ${trend.value >= 0 ? 'text-emerald-600' : 'text-red-600'}`}>
|
|
243
|
-
<span>{trend.value >= 0 ? '↑' : '↓'}</span>
|
|
244
|
-
<span>{Math.abs(trend.value)}% {trend.label}</span>
|
|
245
|
-
</div>
|
|
246
|
-
)}
|
|
247
|
-
</div>
|
|
248
|
-
{icon && <div className="p-2 bg-slate-50 rounded-lg text-slate-600">{icon}</div>}
|
|
249
|
-
</div>
|
|
250
|
-
</CardContent>
|
|
251
|
-
</Card>
|
|
252
|
-
);
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
// Model Info Hover Content
|
|
256
|
-
function ModelHoverContent({ entry }: { entry: LeaderboardEntry }) {
|
|
257
|
-
const passRate = ((entry.passedTasks || 0) / (entry.tasksCompleted || 1)) * 100;
|
|
258
|
-
return (
|
|
259
|
-
<div className="space-y-3">
|
|
260
|
-
<div className="flex items-center gap-3">
|
|
261
|
-
<div className="w-10 h-10 rounded-lg flex items-center justify-center text-white font-bold"
|
|
262
|
-
style={{ backgroundColor: CHART_COLORS[entry.rank % CHART_COLORS.length] }}>
|
|
263
|
-
{(entry.modelName || entry.agentName).charAt(0)}
|
|
264
|
-
</div>
|
|
265
|
-
<div>
|
|
266
|
-
<div className="font-semibold text-slate-900">{entry.modelName || entry.agentName}</div>
|
|
267
|
-
<div className="text-xs text-slate-500">{entry.agentVersion}</div>
|
|
268
|
-
</div>
|
|
269
|
-
</div>
|
|
270
|
-
<div className="grid grid-cols-2 gap-3 text-xs">
|
|
271
|
-
<div className="space-y-0.5">
|
|
272
|
-
<div className="text-slate-500">Score</div>
|
|
273
|
-
<div className="font-semibold text-slate-900">{entry.avgScore.toFixed(1)}%</div>
|
|
274
|
-
</div>
|
|
275
|
-
<div className="space-y-0.5">
|
|
276
|
-
<div className="text-slate-500">Pass Rate</div>
|
|
277
|
-
<div className="font-semibold text-slate-900">{passRate.toFixed(0)}%</div>
|
|
278
|
-
</div>
|
|
279
|
-
<div className="space-y-0.5">
|
|
280
|
-
<div className="text-slate-500">Total Cost</div>
|
|
281
|
-
<div className="font-semibold text-emerald-600">${(entry.totalCostUSD || 0).toFixed(2)}</div>
|
|
282
|
-
</div>
|
|
283
|
-
<div className="space-y-0.5">
|
|
284
|
-
<div className="text-slate-500">Avg Time</div>
|
|
285
|
-
<div className="font-semibold text-slate-900">{((entry.avgTimeMs || 0) / 1000).toFixed(0)}s</div>
|
|
286
|
-
</div>
|
|
287
|
-
<div className="space-y-0.5">
|
|
288
|
-
<div className="text-slate-500">Input Tokens</div>
|
|
289
|
-
<div className="font-semibold text-slate-900">{((entry.inputTokens || 0) / 1000).toFixed(0)}K</div>
|
|
290
|
-
</div>
|
|
291
|
-
<div className="space-y-0.5">
|
|
292
|
-
<div className="text-slate-500">Output Tokens</div>
|
|
293
|
-
<div className="font-semibold text-slate-900">{((entry.outputTokens || 0) / 1000).toFixed(0)}K</div>
|
|
294
|
-
</div>
|
|
295
|
-
</div>
|
|
296
|
-
<div className="pt-2 border-t border-slate-100">
|
|
297
|
-
<div className="flex items-center justify-between text-xs">
|
|
298
|
-
<span className="text-slate-500">Pricing</span>
|
|
299
|
-
<span className="text-slate-700">
|
|
300
|
-
${entry.pricingInput}/M in · ${entry.pricingOutput}/M out
|
|
301
|
-
</span>
|
|
302
|
-
</div>
|
|
303
|
-
</div>
|
|
304
|
-
</div>
|
|
305
|
-
);
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
// Progress bar component
|
|
309
|
-
function Progress({ value, max = 100, className = '', color }: { value: number; max?: number; className?: string; color?: string }) {
|
|
310
|
-
const percentage = Math.min((value / max) * 100, 100);
|
|
311
|
-
return (
|
|
312
|
-
<div className={`h-2 bg-slate-100 rounded-full overflow-hidden ${className}`}>
|
|
313
|
-
<div
|
|
314
|
-
className="h-full rounded-full transition-all duration-300"
|
|
315
|
-
style={{ width: `${percentage}%`, backgroundColor: color || colors.primary }}
|
|
316
|
-
/>
|
|
317
|
-
</div>
|
|
318
|
-
);
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
function NotFound() {
|
|
322
|
-
return (
|
|
323
|
-
<div style={{
|
|
324
|
-
display: 'flex',
|
|
325
|
-
flexDirection: 'column',
|
|
326
|
-
alignItems: 'center',
|
|
327
|
-
justifyContent: 'center',
|
|
328
|
-
minHeight: '60vh',
|
|
329
|
-
textAlign: 'center',
|
|
330
|
-
padding: '2rem'
|
|
331
|
-
}}>
|
|
332
|
-
<h1 style={{ fontSize: '6rem', fontWeight: 700, color: colors.mutedForeground, margin: 0 }}>404</h1>
|
|
333
|
-
<h2 style={{ fontSize: '1.5rem', fontWeight: 600, color: colors.foreground, marginTop: '1rem' }}>Page Not Found</h2>
|
|
334
|
-
<p style={{ color: colors.mutedForeground, marginTop: '0.5rem', maxWidth: '400px' }}>
|
|
335
|
-
The page you're looking for doesn't exist or has been moved.
|
|
336
|
-
</p>
|
|
337
|
-
<Link
|
|
338
|
-
to="/"
|
|
339
|
-
style={{
|
|
340
|
-
marginTop: '2rem',
|
|
341
|
-
padding: '0.75rem 1.5rem',
|
|
342
|
-
backgroundColor: colors.primary,
|
|
343
|
-
color: colors.primaryForeground,
|
|
344
|
-
borderRadius: '0.5rem',
|
|
345
|
-
textDecoration: 'none',
|
|
346
|
-
fontWeight: 500,
|
|
347
|
-
transition: 'opacity 0.2s'
|
|
348
|
-
}}
|
|
349
|
-
>
|
|
350
|
-
Back to Leaderboard
|
|
351
|
-
</Link>
|
|
352
|
-
</div>
|
|
353
|
-
);
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
function Leaderboard() {
|
|
357
|
-
const entries = leaderboardData.leaderboard as LeaderboardEntry[] || [];
|
|
358
|
-
const [sortBy, setSortBy] = useState<'score' | 'cost' | 'speed' | 'efficiency'>('score');
|
|
359
|
-
|
|
360
|
-
const sortedEntries = [...entries].sort((a, b) => {
|
|
361
|
-
if (sortBy === 'cost') return (a.totalCostUSD || 0) - (b.totalCostUSD || 0);
|
|
362
|
-
if (sortBy === 'speed') return (a.avgTimeMs || 0) - (b.avgTimeMs || 0);
|
|
363
|
-
if (sortBy === 'efficiency') return (b.avgScore / (b.totalCostUSD || 1)) - (a.avgScore / (a.totalCostUSD || 1));
|
|
364
|
-
return b.avgScore - a.avgScore;
|
|
365
|
-
});
|
|
366
|
-
|
|
367
|
-
const topModel = entries.reduce((max, e) => e.avgScore > max.avgScore ? e : max, entries[0]);
|
|
368
|
-
const cheapestModel = entries.reduce((min, e) => (e.totalCostUSD || 99) < (min.totalCostUSD || 99) ? e : min, entries[0]);
|
|
369
|
-
const fastestModel = entries.reduce((min, e) => (e.avgTimeMs || 999999) < (min.avgTimeMs || 999999) ? e : min, entries[0]);
|
|
370
|
-
const bestValue = entries.reduce((max, e) => {
|
|
371
|
-
const val = e.avgScore / (e.totalCostUSD || 1);
|
|
372
|
-
const maxVal = max.avgScore / (max.totalCostUSD || 1);
|
|
373
|
-
return val > maxVal ? e : max;
|
|
374
|
-
}, entries[0]);
|
|
375
|
-
|
|
376
|
-
const crowns = ['🥇', '🥈', '🥉'];
|
|
377
|
-
|
|
378
|
-
return (
|
|
379
|
-
<div className="space-y-8">
|
|
380
|
-
{/* Header */}
|
|
381
|
-
<div className="flex flex-col md:flex-row md:items-end md:justify-between gap-4">
|
|
382
|
-
<div>
|
|
383
|
-
<h1 className="text-2xl font-bold text-slate-900">Leaderboard</h1>
|
|
384
|
-
<p className="text-sm text-slate-500 mt-1">AI coding agent performance on 180 benchmark tasks</p>
|
|
385
|
-
</div>
|
|
386
|
-
<Badge variant="outline">Updated Jan 27, 2026</Badge>
|
|
387
|
-
</div>
|
|
388
|
-
|
|
389
|
-
{/* Summary Cards */}
|
|
390
|
-
<div className="grid grid-cols-2 lg:grid-cols-4 gap-4">
|
|
391
|
-
<StatCard
|
|
392
|
-
label="Top Score"
|
|
393
|
-
value={`${topModel?.avgScore.toFixed(1)}%`}
|
|
394
|
-
subtext={topModel?.modelName}
|
|
395
|
-
icon={<span className="text-lg">🏆</span>}
|
|
396
|
-
/>
|
|
397
|
-
<StatCard
|
|
398
|
-
label="Lowest Cost"
|
|
399
|
-
value={`$${(cheapestModel?.totalCostUSD || 0).toFixed(2)}`}
|
|
400
|
-
subtext={cheapestModel?.modelName}
|
|
401
|
-
icon={<span className="text-lg">💰</span>}
|
|
402
|
-
/>
|
|
403
|
-
<StatCard
|
|
404
|
-
label="Fastest"
|
|
405
|
-
value={`${((fastestModel?.avgTimeMs || 0) / 1000).toFixed(0)}s avg`}
|
|
406
|
-
subtext={fastestModel?.modelName}
|
|
407
|
-
icon={<span className="text-lg">⚡</span>}
|
|
408
|
-
/>
|
|
409
|
-
<StatCard
|
|
410
|
-
label="Best Value"
|
|
411
|
-
value={`${(bestValue.avgScore / (bestValue.totalCostUSD || 1)).toFixed(0)} pts/$`}
|
|
412
|
-
subtext={bestValue?.modelName}
|
|
413
|
-
icon={<span className="text-lg">✨</span>}
|
|
414
|
-
/>
|
|
415
|
-
</div>
|
|
416
|
-
|
|
417
|
-
{/* Sort Controls */}
|
|
418
|
-
<div className="flex items-center gap-2 flex-wrap">
|
|
419
|
-
<span className="text-sm text-slate-500">Sort by:</span>
|
|
420
|
-
{[
|
|
421
|
-
{ key: 'score', label: 'Score' },
|
|
422
|
-
{ key: 'cost', label: 'Cost' },
|
|
423
|
-
{ key: 'speed', label: 'Speed' },
|
|
424
|
-
{ key: 'efficiency', label: 'Value' },
|
|
425
|
-
].map((option) => (
|
|
426
|
-
<button
|
|
427
|
-
key={option.key}
|
|
428
|
-
onClick={() => setSortBy(option.key as typeof sortBy)}
|
|
429
|
-
className={`px-3 py-1.5 text-sm font-medium rounded-lg transition-all ${
|
|
430
|
-
sortBy === option.key
|
|
431
|
-
? 'bg-slate-900 text-white'
|
|
432
|
-
: 'bg-slate-100 text-slate-600 hover:bg-slate-200'
|
|
433
|
-
}`}
|
|
434
|
-
>
|
|
435
|
-
{option.label}
|
|
436
|
-
</button>
|
|
437
|
-
))}
|
|
438
|
-
</div>
|
|
439
|
-
|
|
440
|
-
{/* Main Table */}
|
|
441
|
-
<Card>
|
|
442
|
-
<div className="overflow-x-auto">
|
|
443
|
-
<table className="min-w-full">
|
|
444
|
-
<thead>
|
|
445
|
-
<tr className="border-b border-slate-100 bg-slate-50/50">
|
|
446
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider w-16">Rank</th>
|
|
447
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Model</th>
|
|
448
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Score</th>
|
|
449
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Pass Rate</th>
|
|
450
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Quality</th>
|
|
451
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Cost</th>
|
|
452
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Time</th>
|
|
453
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Tokens</th>
|
|
454
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Value</th>
|
|
455
|
-
</tr>
|
|
456
|
-
</thead>
|
|
457
|
-
<tbody className="divide-y divide-slate-100">
|
|
458
|
-
{sortedEntries.map((entry, idx) => {
|
|
459
|
-
const passRate = ((entry.passedTasks || 0) / (entry.tasksCompleted || 1)) * 100;
|
|
460
|
-
const valueScore = entry.avgScore / (entry.totalCostUSD || 1);
|
|
461
|
-
const maxScore = Math.max(...entries.map(e => e.avgScore));
|
|
462
|
-
return (
|
|
463
|
-
<HoverRow
|
|
464
|
-
key={entry.agentName + entry.agentVersion}
|
|
465
|
-
content={<ModelHoverContent entry={entry} />}
|
|
466
|
-
className="hover:bg-slate-50/80 transition-colors cursor-pointer group"
|
|
467
|
-
>
|
|
468
|
-
<td className="px-4 py-4">
|
|
469
|
-
{idx < 3 ? (
|
|
470
|
-
<span className="text-xl">{crowns[idx]}</span>
|
|
471
|
-
) : (
|
|
472
|
-
<span className="text-slate-400 font-medium">{idx + 1}</span>
|
|
473
|
-
)}
|
|
474
|
-
</td>
|
|
475
|
-
<td className="px-4 py-4">
|
|
476
|
-
<div className="flex items-center gap-3">
|
|
477
|
-
<div
|
|
478
|
-
className="w-9 h-9 rounded-lg flex items-center justify-center text-white font-semibold text-sm"
|
|
479
|
-
style={{ backgroundColor: CHART_COLORS[idx % CHART_COLORS.length] }}
|
|
480
|
-
>
|
|
481
|
-
{(entry.modelName || entry.agentName).charAt(0)}
|
|
482
|
-
</div>
|
|
483
|
-
<div>
|
|
484
|
-
<div className="font-medium text-slate-900 group-hover:text-blue-600 transition-colors">
|
|
485
|
-
{entry.modelName || entry.agentName}
|
|
486
|
-
</div>
|
|
487
|
-
<div className="text-xs text-slate-400">{entry.agentVersion}</div>
|
|
488
|
-
</div>
|
|
489
|
-
</div>
|
|
490
|
-
</td>
|
|
491
|
-
<td className="px-4 py-4">
|
|
492
|
-
<div className="flex items-center gap-3">
|
|
493
|
-
<Progress value={entry.avgScore} max={maxScore} className="w-20" color={CHART_COLORS[idx % CHART_COLORS.length]} />
|
|
494
|
-
<span className="font-semibold text-slate-900">{entry.avgScore.toFixed(1)}%</span>
|
|
495
|
-
</div>
|
|
496
|
-
</td>
|
|
497
|
-
<td className="px-4 py-4">
|
|
498
|
-
<Badge variant={passRate >= 98 ? 'success' : passRate >= 90 ? 'warning' : 'outline'}>
|
|
499
|
-
{passRate.toFixed(0)}%
|
|
500
|
-
</Badge>
|
|
501
|
-
</td>
|
|
502
|
-
<td className="px-4 py-4 text-sm text-slate-600">{entry.avgQuality.toFixed(0)}%</td>
|
|
503
|
-
<td className="px-4 py-4">
|
|
504
|
-
<span className="font-medium text-emerald-600">${(entry.totalCostUSD || 0).toFixed(2)}</span>
|
|
505
|
-
</td>
|
|
506
|
-
<td className="px-4 py-4 text-sm text-slate-600">
|
|
507
|
-
{entry.avgTimeMs ? `${(entry.avgTimeMs / 1000).toFixed(0)}s` : '-'}
|
|
508
|
-
</td>
|
|
509
|
-
<td className="px-4 py-4 text-sm text-slate-500">
|
|
510
|
-
{((entry.totalTokens || 0) / 1000000).toFixed(2)}M
|
|
511
|
-
</td>
|
|
512
|
-
<td className="px-4 py-4">
|
|
513
|
-
<span className="font-medium text-violet-600">{valueScore.toFixed(0)}</span>
|
|
514
|
-
</td>
|
|
515
|
-
</HoverRow>
|
|
516
|
-
);
|
|
517
|
-
})}
|
|
518
|
-
</tbody>
|
|
519
|
-
</table>
|
|
520
|
-
</div>
|
|
521
|
-
</Card>
|
|
522
|
-
</div>
|
|
523
|
-
);
|
|
524
|
-
}
|
|
525
|
-
|
|
526
|
-
// Live Dashboard Component
|
|
527
|
-
function LiveDashboard() {
|
|
528
|
-
return (
|
|
529
|
-
<div className="space-y-8">
|
|
530
|
-
<div>
|
|
531
|
-
<h1 className="text-2xl font-bold text-slate-900">Live Benchmark</h1>
|
|
532
|
-
<p className="text-sm text-slate-500 mt-1">Real-time benchmark execution monitoring</p>
|
|
533
|
-
</div>
|
|
534
|
-
<Card>
|
|
535
|
-
<CardContent className="py-16">
|
|
536
|
-
<div className="text-center text-slate-500">
|
|
537
|
-
<div className="text-4xl mb-4">📡</div>
|
|
538
|
-
<p className="font-medium">No active benchmark runs</p>
|
|
539
|
-
<p className="text-sm mt-1">Start a benchmark with: npm run cli -- run <task> -a <agent></p>
|
|
540
|
-
</div>
|
|
541
|
-
</CardContent>
|
|
542
|
-
</Card>
|
|
543
|
-
</div>
|
|
544
|
-
);
|
|
545
|
-
}
|
|
546
|
-
|
|
547
|
-
// Tasks Component
|
|
548
|
-
interface Task {
|
|
549
|
-
id: string;
|
|
550
|
-
name: string;
|
|
551
|
-
category: string;
|
|
552
|
-
difficulty: string;
|
|
553
|
-
description: string;
|
|
554
|
-
tags?: string[];
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
function Tasks() {
|
|
558
|
-
const [selectedCategory, setSelectedCategory] = useState<string | null>(null);
|
|
559
|
-
const [searchQuery, setSearchQuery] = useState('');
|
|
560
|
-
|
|
561
|
-
// Process static data
|
|
562
|
-
const { tasks, summary } = useMemo(() => {
|
|
563
|
-
const allTasks: Task[] = [];
|
|
564
|
-
const categoryCounts: { category: string; count: number }[] = [];
|
|
565
|
-
Object.entries(tasksData.categories || {}).forEach(([category, categoryTasks]) => {
|
|
566
|
-
const catTasks = categoryTasks as Task[];
|
|
567
|
-
categoryCounts.push({ category, count: catTasks.length });
|
|
568
|
-
catTasks.forEach((t) => allTasks.push({ ...t, category }));
|
|
569
|
-
});
|
|
570
|
-
return { tasks: allTasks, summary: categoryCounts };
|
|
571
|
-
}, []);
|
|
572
|
-
|
|
573
|
-
const filteredTasks = tasks.filter((t) => {
|
|
574
|
-
const matchesCategory = !selectedCategory || t.category === selectedCategory;
|
|
575
|
-
const matchesSearch = !searchQuery ||
|
|
576
|
-
t.name.toLowerCase().includes(searchQuery.toLowerCase()) ||
|
|
577
|
-
t.description.toLowerCase().includes(searchQuery.toLowerCase());
|
|
578
|
-
return matchesCategory && matchesSearch;
|
|
579
|
-
});
|
|
580
|
-
|
|
581
|
-
const totalTasks = tasks.length;
|
|
582
|
-
|
|
583
|
-
return (
|
|
584
|
-
<div className="space-y-6">
|
|
585
|
-
<div className="flex flex-col md:flex-row md:items-end md:justify-between gap-4">
|
|
586
|
-
<div>
|
|
587
|
-
<h1 className="text-2xl font-bold text-slate-900">Benchmark Tasks</h1>
|
|
588
|
-
<p className="text-sm text-slate-500 mt-1">{totalTasks} tasks across 6 categories</p>
|
|
589
|
-
</div>
|
|
590
|
-
</div>
|
|
591
|
-
|
|
592
|
-
{/* Category Pills */}
|
|
593
|
-
<div className="flex flex-wrap gap-2">
|
|
594
|
-
<button
|
|
595
|
-
onClick={() => setSelectedCategory(null)}
|
|
596
|
-
className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
|
|
597
|
-
!selectedCategory
|
|
598
|
-
? 'bg-slate-900 text-white'
|
|
599
|
-
: 'bg-slate-100 text-slate-600 hover:bg-slate-200'
|
|
600
|
-
}`}
|
|
601
|
-
>
|
|
602
|
-
All ({totalTasks})
|
|
603
|
-
</button>
|
|
604
|
-
{summary.map((cat) => (
|
|
605
|
-
<button
|
|
606
|
-
key={cat.category}
|
|
607
|
-
onClick={() => setSelectedCategory(selectedCategory === cat.category ? null : cat.category)}
|
|
608
|
-
className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
|
|
609
|
-
selectedCategory === cat.category
|
|
610
|
-
? 'bg-slate-900 text-white'
|
|
611
|
-
: 'bg-slate-100 text-slate-600 hover:bg-slate-200'
|
|
612
|
-
}`}
|
|
613
|
-
>
|
|
614
|
-
{cat.category.replace(/-/g, ' ')} ({cat.count})
|
|
615
|
-
</button>
|
|
616
|
-
))}
|
|
617
|
-
</div>
|
|
618
|
-
|
|
619
|
-
{/* Search */}
|
|
620
|
-
<div className="relative">
|
|
621
|
-
<input
|
|
622
|
-
type="text"
|
|
623
|
-
placeholder="Search tasks..."
|
|
624
|
-
value={searchQuery}
|
|
625
|
-
onChange={(e) => setSearchQuery(e.target.value)}
|
|
626
|
-
className="w-full px-4 py-3 bg-white border border-slate-200 rounded-xl text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent transition-all"
|
|
627
|
-
/>
|
|
628
|
-
</div>
|
|
629
|
-
|
|
630
|
-
{/* Task Grid */}
|
|
631
|
-
<div className="grid gap-4 md:grid-cols-2 lg:grid-cols-3">
|
|
632
|
-
{filteredTasks.slice(0, 30).map((task) => (
|
|
633
|
-
<Card key={task.id} hover>
|
|
634
|
-
<CardContent className="p-5">
|
|
635
|
-
<div className="flex items-start justify-between mb-3">
|
|
636
|
-
<Badge variant={task.difficulty === 'easy' ? 'success' : task.difficulty === 'medium' ? 'warning' : 'destructive'}>
|
|
637
|
-
{task.difficulty}
|
|
638
|
-
</Badge>
|
|
639
|
-
<span className="text-xs text-slate-400">{task.category}</span>
|
|
640
|
-
</div>
|
|
641
|
-
<h3 className="font-semibold text-slate-900 mb-2">{task.name}</h3>
|
|
642
|
-
<p className="text-sm text-slate-500 line-clamp-2">{task.description}</p>
|
|
643
|
-
{task.tags && task.tags.length > 0 && (
|
|
644
|
-
<div className="mt-3 flex flex-wrap gap-1">
|
|
645
|
-
{task.tags.slice(0, 3).map((tag) => (
|
|
646
|
-
<span key={tag} className="px-2 py-0.5 text-xs bg-slate-100 text-slate-600 rounded-md">
|
|
647
|
-
{tag}
|
|
648
|
-
</span>
|
|
649
|
-
))}
|
|
650
|
-
</div>
|
|
651
|
-
)}
|
|
652
|
-
</CardContent>
|
|
653
|
-
</Card>
|
|
654
|
-
))}
|
|
655
|
-
</div>
|
|
656
|
-
</div>
|
|
657
|
-
);
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
// Charts/Analytics Component
|
|
661
|
-
function Charts() {
|
|
662
|
-
const entries = leaderboardData.leaderboard as LeaderboardEntry[] || [];
|
|
663
|
-
|
|
664
|
-
const sortedByScore = [...entries].sort((a, b) => b.avgScore - a.avgScore);
|
|
665
|
-
const labels = sortedByScore.map(e => (e.modelName || e.agentName).split(' ').slice(0, 2).join(' '));
|
|
666
|
-
|
|
667
|
-
const baseOptions = {
|
|
668
|
-
responsive: true,
|
|
669
|
-
maintainAspectRatio: false,
|
|
670
|
-
plugins: {
|
|
671
|
-
legend: { display: false },
|
|
672
|
-
tooltip: {
|
|
673
|
-
backgroundColor: '#ffffff',
|
|
674
|
-
titleColor: '#0f172a',
|
|
675
|
-
bodyColor: '#64748b',
|
|
676
|
-
borderColor: '#e2e8f0',
|
|
677
|
-
borderWidth: 1,
|
|
678
|
-
padding: 12,
|
|
679
|
-
cornerRadius: 8,
|
|
680
|
-
displayColors: true,
|
|
681
|
-
},
|
|
682
|
-
},
|
|
683
|
-
scales: {
|
|
684
|
-
y: { beginAtZero: true, grid: { color: '#f1f5f9' }, ticks: { color: '#64748b', font: { size: 11 } } },
|
|
685
|
-
x: { grid: { display: false }, ticks: { color: '#64748b', font: { size: 10 }, maxRotation: 45 } },
|
|
686
|
-
},
|
|
687
|
-
};
|
|
688
|
-
|
|
689
|
-
const scoreData = {
|
|
690
|
-
labels,
|
|
691
|
-
datasets: [{
|
|
692
|
-
label: 'Score',
|
|
693
|
-
data: sortedByScore.map(e => e.avgScore),
|
|
694
|
-
backgroundColor: sortedByScore.map((_, i) => CHART_COLORS[i % CHART_COLORS.length] + '80'),
|
|
695
|
-
borderColor: sortedByScore.map((_, i) => CHART_COLORS[i % CHART_COLORS.length]),
|
|
696
|
-
borderWidth: 2,
|
|
697
|
-
borderRadius: 6,
|
|
698
|
-
}],
|
|
699
|
-
};
|
|
700
|
-
|
|
701
|
-
const costData = {
|
|
702
|
-
labels,
|
|
703
|
-
datasets: [{
|
|
704
|
-
label: 'Cost ($)',
|
|
705
|
-
data: sortedByScore.map(e => e.totalCostUSD || 0),
|
|
706
|
-
backgroundColor: '#22c55e80',
|
|
707
|
-
borderColor: '#22c55e',
|
|
708
|
-
borderWidth: 2,
|
|
709
|
-
borderRadius: 6,
|
|
710
|
-
}],
|
|
711
|
-
};
|
|
712
|
-
|
|
713
|
-
const timeData = {
|
|
714
|
-
labels,
|
|
715
|
-
datasets: [{
|
|
716
|
-
label: 'Time (s)',
|
|
717
|
-
data: sortedByScore.map(e => (e.avgTimeMs || 0) / 1000),
|
|
718
|
-
backgroundColor: '#f59e0b80',
|
|
719
|
-
borderColor: '#f59e0b',
|
|
720
|
-
borderWidth: 2,
|
|
721
|
-
borderRadius: 6,
|
|
722
|
-
}],
|
|
723
|
-
};
|
|
724
|
-
|
|
725
|
-
const scatterData = {
|
|
726
|
-
datasets: sortedByScore.map((e, i) => ({
|
|
727
|
-
label: (e.modelName || e.agentName).split(' ').slice(0, 2).join(' '),
|
|
728
|
-
data: [{ x: e.totalCostUSD || 0, y: e.avgScore }],
|
|
729
|
-
backgroundColor: CHART_COLORS[i % CHART_COLORS.length],
|
|
730
|
-
borderColor: CHART_COLORS[i % CHART_COLORS.length],
|
|
731
|
-
pointRadius: 10,
|
|
732
|
-
pointHoverRadius: 14,
|
|
733
|
-
})),
|
|
734
|
-
};
|
|
735
|
-
|
|
736
|
-
const crowns = ['🥇', '🥈', '🥉'];
|
|
737
|
-
|
|
738
|
-
return (
|
|
739
|
-
<div className="space-y-8">
|
|
740
|
-
<div>
|
|
741
|
-
<h1 className="text-2xl font-bold text-slate-900">Analytics</h1>
|
|
742
|
-
<p className="text-sm text-slate-500 mt-1">Performance metrics and comparisons</p>
|
|
743
|
-
</div>
|
|
744
|
-
|
|
745
|
-
{/* Charts Grid */}
|
|
746
|
-
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
|
747
|
-
<Card>
|
|
748
|
-
<CardHeader>
|
|
749
|
-
<CardTitle>Score Distribution</CardTitle>
|
|
750
|
-
<CardDescription>Overall benchmark scores by model</CardDescription>
|
|
751
|
-
</CardHeader>
|
|
752
|
-
<CardContent>
|
|
753
|
-
<div className="h-72">
|
|
754
|
-
<Bar data={scoreData} options={{ ...baseOptions, scales: { ...baseOptions.scales, y: { ...baseOptions.scales.y, max: 100 } } }} />
|
|
755
|
-
</div>
|
|
756
|
-
</CardContent>
|
|
757
|
-
</Card>
|
|
758
|
-
|
|
759
|
-
<Card>
|
|
760
|
-
<CardHeader>
|
|
761
|
-
<CardTitle>Cost Comparison</CardTitle>
|
|
762
|
-
<CardDescription>Total cost in USD for 180 tasks</CardDescription>
|
|
763
|
-
</CardHeader>
|
|
764
|
-
<CardContent>
|
|
765
|
-
<div className="h-72">
|
|
766
|
-
<Bar data={costData} options={baseOptions} />
|
|
767
|
-
</div>
|
|
768
|
-
</CardContent>
|
|
769
|
-
</Card>
|
|
770
|
-
|
|
771
|
-
<Card>
|
|
772
|
-
<CardHeader>
|
|
773
|
-
<CardTitle>Execution Time</CardTitle>
|
|
774
|
-
<CardDescription>Average time per task in seconds</CardDescription>
|
|
775
|
-
</CardHeader>
|
|
776
|
-
<CardContent>
|
|
777
|
-
<div className="h-72">
|
|
778
|
-
<Bar data={timeData} options={baseOptions} />
|
|
779
|
-
</div>
|
|
780
|
-
</CardContent>
|
|
781
|
-
</Card>
|
|
782
|
-
|
|
783
|
-
<Card>
|
|
784
|
-
<CardHeader>
|
|
785
|
-
<CardTitle>Cost vs Score</CardTitle>
|
|
786
|
-
<CardDescription>Efficiency visualization (top-left is best)</CardDescription>
|
|
787
|
-
</CardHeader>
|
|
788
|
-
<CardContent>
|
|
789
|
-
<div className="h-72">
|
|
790
|
-
<Scatter data={scatterData} options={{
|
|
791
|
-
...baseOptions,
|
|
792
|
-
plugins: { ...baseOptions.plugins, legend: { display: true, position: 'bottom' as const, labels: { usePointStyle: true, padding: 8, font: { size: 9 } } } },
|
|
793
|
-
scales: {
|
|
794
|
-
x: { ...baseOptions.scales.x, title: { display: true, text: 'Cost ($)', color: '#64748b' } },
|
|
795
|
-
y: { ...baseOptions.scales.y, min: 55, max: 95, title: { display: true, text: 'Score (%)', color: '#64748b' } },
|
|
796
|
-
},
|
|
797
|
-
}} />
|
|
798
|
-
</div>
|
|
799
|
-
</CardContent>
|
|
800
|
-
</Card>
|
|
801
|
-
</div>
|
|
802
|
-
|
|
803
|
-
{/* Data Table */}
|
|
804
|
-
<Card>
|
|
805
|
-
<CardHeader>
|
|
806
|
-
<CardTitle>Detailed Metrics</CardTitle>
|
|
807
|
-
<CardDescription>Complete performance data for all models</CardDescription>
|
|
808
|
-
</CardHeader>
|
|
809
|
-
<div className="overflow-x-auto">
|
|
810
|
-
<table className="min-w-full text-sm">
|
|
811
|
-
<thead>
|
|
812
|
-
<tr className="bg-slate-50/50 border-b border-slate-100">
|
|
813
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">#</th>
|
|
814
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Model</th>
|
|
815
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Score</th>
|
|
816
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Pass</th>
|
|
817
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Functional</th>
|
|
818
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Quality</th>
|
|
819
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Cost</th>
|
|
820
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Time</th>
|
|
821
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Tokens</th>
|
|
822
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Value</th>
|
|
823
|
-
</tr>
|
|
824
|
-
</thead>
|
|
825
|
-
<tbody className="divide-y divide-slate-100">
|
|
826
|
-
{sortedByScore.map((e, i) => (
|
|
827
|
-
<HoverRow
|
|
828
|
-
key={e.agentName + e.agentVersion}
|
|
829
|
-
content={<ModelHoverContent entry={e} />}
|
|
830
|
-
className="hover:bg-slate-50/80 transition-colors cursor-pointer"
|
|
831
|
-
>
|
|
832
|
-
<td className="px-4 py-3">
|
|
833
|
-
{i < 3 ? (
|
|
834
|
-
<span className="text-lg">{crowns[i]}</span>
|
|
835
|
-
) : (
|
|
836
|
-
<span className="text-slate-400">{i + 1}</span>
|
|
837
|
-
)}
|
|
838
|
-
</td>
|
|
839
|
-
<td className="px-4 py-3 font-medium text-slate-900">{e.modelName || e.agentName}</td>
|
|
840
|
-
<td className="px-4 py-3 font-semibold text-blue-600">{e.avgScore.toFixed(1)}%</td>
|
|
841
|
-
<td className="px-4 py-3 text-slate-600">{((e.passedTasks || 0) / (e.tasksCompleted || 1) * 100).toFixed(0)}%</td>
|
|
842
|
-
<td className="px-4 py-3 text-slate-600">{e.avgFunctional.toFixed(0)}%</td>
|
|
843
|
-
<td className="px-4 py-3 text-slate-600">{e.avgQuality.toFixed(0)}%</td>
|
|
844
|
-
<td className="px-4 py-3 font-medium text-emerald-600">${(e.totalCostUSD || 0).toFixed(2)}</td>
|
|
845
|
-
<td className="px-4 py-3 text-slate-600">{((e.avgTimeMs || 0) / 1000).toFixed(0)}s</td>
|
|
846
|
-
<td className="px-4 py-3 text-slate-500">{((e.totalTokens || 0) / 1000000).toFixed(2)}M</td>
|
|
847
|
-
<td className="px-4 py-3 font-medium text-violet-600">
|
|
848
|
-
{(e.totalCostUSD || 0) > 0 ? (e.avgScore / (e.totalCostUSD || 1)).toFixed(0) : '∞'}
|
|
849
|
-
</td>
|
|
850
|
-
</HoverRow>
|
|
851
|
-
))}
|
|
852
|
-
</tbody>
|
|
853
|
-
</table>
|
|
854
|
-
</div>
|
|
855
|
-
</Card>
|
|
856
|
-
</div>
|
|
857
|
-
);
|
|
858
|
-
}
|
|
859
|
-
|
|
860
|
-
// Category Performance Component
|
|
861
|
-
interface CategoryPerformance {
|
|
862
|
-
category: string;
|
|
863
|
-
models: Array<{
|
|
864
|
-
modelName: string;
|
|
865
|
-
avgScore: number;
|
|
866
|
-
passRate: number;
|
|
867
|
-
avgTokens: number;
|
|
868
|
-
avgTimeMs: number;
|
|
869
|
-
avgCost: number;
|
|
870
|
-
}>;
|
|
871
|
-
}
|
|
872
|
-
|
|
873
|
-
function TaskPerformance() {
|
|
874
|
-
const categoryData = categoryPerformanceData.performance as CategoryPerformance[] || [];
|
|
875
|
-
const models = categoryPerformanceData.models as string[] || [];
|
|
876
|
-
const [selectedModels, setSelectedModels] = useState<string[]>(models.slice(0, 5));
|
|
877
|
-
|
|
878
|
-
const categories = categoryData.map(c => c.category.replace(/-/g, ' '));
|
|
879
|
-
const filteredData = categoryData.map(cat => ({
|
|
880
|
-
...cat,
|
|
881
|
-
models: cat.models.filter(m => selectedModels.includes(m.modelName)),
|
|
882
|
-
}));
|
|
883
|
-
|
|
884
|
-
const chartOptions = {
|
|
885
|
-
responsive: true,
|
|
886
|
-
maintainAspectRatio: false,
|
|
887
|
-
plugins: {
|
|
888
|
-
legend: { position: 'bottom' as const, labels: { usePointStyle: true, padding: 12, font: { size: 10 } } },
|
|
889
|
-
tooltip: {
|
|
890
|
-
backgroundColor: '#ffffff',
|
|
891
|
-
titleColor: '#0f172a',
|
|
892
|
-
bodyColor: '#64748b',
|
|
893
|
-
borderColor: '#e2e8f0',
|
|
894
|
-
borderWidth: 1,
|
|
895
|
-
padding: 12,
|
|
896
|
-
cornerRadius: 8,
|
|
897
|
-
},
|
|
898
|
-
},
|
|
899
|
-
scales: {
|
|
900
|
-
y: { beginAtZero: true, grid: { color: '#f1f5f9' }, ticks: { color: '#64748b', font: { size: 11 } } },
|
|
901
|
-
x: { grid: { display: false }, ticks: { color: '#64748b', font: { size: 10 } } },
|
|
902
|
-
},
|
|
903
|
-
};
|
|
904
|
-
|
|
905
|
-
const createDataset = (metricFn: (m: CategoryPerformance['models'][0]) => number) => ({
|
|
906
|
-
labels: categories,
|
|
907
|
-
datasets: selectedModels.map((modelName) => ({
|
|
908
|
-
label: modelName.split(' ').slice(0, 2).join(' '),
|
|
909
|
-
data: filteredData.map(cat => {
|
|
910
|
-
const m = cat.models.find(x => x.modelName === modelName);
|
|
911
|
-
return m ? metricFn(m) : 0;
|
|
912
|
-
}),
|
|
913
|
-
backgroundColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length] + '80',
|
|
914
|
-
borderColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length],
|
|
915
|
-
borderWidth: 2,
|
|
916
|
-
borderRadius: 4,
|
|
917
|
-
})),
|
|
918
|
-
});
|
|
919
|
-
|
|
920
|
-
const radarData = {
|
|
921
|
-
labels: categories,
|
|
922
|
-
datasets: selectedModels.slice(0, 5).map((modelName) => ({
|
|
923
|
-
label: modelName.split(' ').slice(0, 2).join(' '),
|
|
924
|
-
data: filteredData.map(cat => cat.models.find(m => m.modelName === modelName)?.avgScore || 0),
|
|
925
|
-
backgroundColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length] + '15',
|
|
926
|
-
borderColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length],
|
|
927
|
-
borderWidth: 2,
|
|
928
|
-
pointBackgroundColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length],
|
|
929
|
-
pointRadius: 3,
|
|
930
|
-
})),
|
|
931
|
-
};
|
|
932
|
-
|
|
933
|
-
const crowns = ['🥇', '🥈', '🥉'];
|
|
934
|
-
|
|
935
|
-
return (
|
|
936
|
-
<div className="space-y-8">
|
|
937
|
-
<div>
|
|
938
|
-
<h1 className="text-2xl font-bold text-slate-900">Category Performance</h1>
|
|
939
|
-
<p className="text-sm text-slate-500 mt-1">Breakdown across 6 task categories (30 tasks each)</p>
|
|
940
|
-
</div>
|
|
941
|
-
|
|
942
|
-
{/* Model Selector */}
|
|
943
|
-
<Card>
|
|
944
|
-
<CardContent className="p-5">
|
|
945
|
-
<div className="text-sm font-medium text-slate-700 mb-3">Select models to compare:</div>
|
|
946
|
-
<div className="flex flex-wrap gap-2">
|
|
947
|
-
{models.map((model, i) => (
|
|
948
|
-
<button
|
|
949
|
-
key={model}
|
|
950
|
-
onClick={() => {
|
|
951
|
-
if (selectedModels.includes(model)) {
|
|
952
|
-
setSelectedModels(selectedModels.filter(m => m !== model));
|
|
953
|
-
} else if (selectedModels.length < 7) {
|
|
954
|
-
setSelectedModels([...selectedModels, model]);
|
|
955
|
-
}
|
|
956
|
-
}}
|
|
957
|
-
className={`px-3 py-1.5 text-sm font-medium rounded-lg border-2 transition-all ${
|
|
958
|
-
selectedModels.includes(model)
|
|
959
|
-
? 'text-white'
|
|
960
|
-
: 'border-slate-200 text-slate-600 hover:border-slate-300 bg-white'
|
|
961
|
-
}`}
|
|
962
|
-
style={{
|
|
963
|
-
backgroundColor: selectedModels.includes(model) ? CHART_COLORS[i % CHART_COLORS.length] : undefined,
|
|
964
|
-
borderColor: selectedModels.includes(model) ? CHART_COLORS[i % CHART_COLORS.length] : undefined,
|
|
965
|
-
}}
|
|
966
|
-
>
|
|
967
|
-
{model.split(' ').slice(0, 2).join(' ')}
|
|
968
|
-
</button>
|
|
969
|
-
))}
|
|
970
|
-
</div>
|
|
971
|
-
</CardContent>
|
|
972
|
-
</Card>
|
|
973
|
-
|
|
974
|
-
{/* Charts */}
|
|
975
|
-
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
|
976
|
-
<Card>
|
|
977
|
-
<CardHeader>
|
|
978
|
-
<CardTitle>Score by Category</CardTitle>
|
|
979
|
-
</CardHeader>
|
|
980
|
-
<CardContent>
|
|
981
|
-
<div className="h-72">
|
|
982
|
-
<Bar data={createDataset(m => m.avgScore)} options={{ ...chartOptions, scales: { ...chartOptions.scales, y: { ...chartOptions.scales.y, max: 100 } } }} />
|
|
983
|
-
</div>
|
|
984
|
-
</CardContent>
|
|
985
|
-
</Card>
|
|
986
|
-
|
|
987
|
-
<Card>
|
|
988
|
-
<CardHeader>
|
|
989
|
-
<CardTitle>Pass Rate by Category</CardTitle>
|
|
990
|
-
</CardHeader>
|
|
991
|
-
<CardContent>
|
|
992
|
-
<div className="h-72">
|
|
993
|
-
<Bar data={createDataset(m => m.passRate)} options={{ ...chartOptions, scales: { ...chartOptions.scales, y: { ...chartOptions.scales.y, max: 110 } } }} />
|
|
994
|
-
</div>
|
|
995
|
-
</CardContent>
|
|
996
|
-
</Card>
|
|
997
|
-
|
|
998
|
-
<Card>
|
|
999
|
-
<CardHeader>
|
|
1000
|
-
<CardTitle>Cost by Category ($)</CardTitle>
|
|
1001
|
-
</CardHeader>
|
|
1002
|
-
<CardContent>
|
|
1003
|
-
<div className="h-72">
|
|
1004
|
-
<Bar data={createDataset(m => m.avgCost)} options={chartOptions} />
|
|
1005
|
-
</div>
|
|
1006
|
-
</CardContent>
|
|
1007
|
-
</Card>
|
|
1008
|
-
|
|
1009
|
-
<Card>
|
|
1010
|
-
<CardHeader>
|
|
1011
|
-
<CardTitle>Time by Category (seconds)</CardTitle>
|
|
1012
|
-
</CardHeader>
|
|
1013
|
-
<CardContent>
|
|
1014
|
-
<div className="h-72">
|
|
1015
|
-
<Bar data={createDataset(m => m.avgTimeMs / 1000)} options={chartOptions} />
|
|
1016
|
-
</div>
|
|
1017
|
-
</CardContent>
|
|
1018
|
-
</Card>
|
|
1019
|
-
|
|
1020
|
-
<Card>
|
|
1021
|
-
<CardHeader>
|
|
1022
|
-
<CardTitle>Tokens by Category (K)</CardTitle>
|
|
1023
|
-
</CardHeader>
|
|
1024
|
-
<CardContent>
|
|
1025
|
-
<div className="h-72">
|
|
1026
|
-
<Bar data={createDataset(m => m.avgTokens / 1000)} options={chartOptions} />
|
|
1027
|
-
</div>
|
|
1028
|
-
</CardContent>
|
|
1029
|
-
</Card>
|
|
1030
|
-
|
|
1031
|
-
<Card>
|
|
1032
|
-
<CardHeader>
|
|
1033
|
-
<CardTitle>Category Strength</CardTitle>
|
|
1034
|
-
</CardHeader>
|
|
1035
|
-
<CardContent>
|
|
1036
|
-
<div className="h-72">
|
|
1037
|
-
<Radar data={radarData} options={{
|
|
1038
|
-
responsive: true,
|
|
1039
|
-
maintainAspectRatio: false,
|
|
1040
|
-
plugins: { legend: { position: 'bottom' as const, labels: { usePointStyle: true, padding: 10, font: { size: 10 } } } },
|
|
1041
|
-
scales: { r: { beginAtZero: true, max: 100, ticks: { stepSize: 25, display: false }, grid: { color: '#e2e8f0' }, angleLines: { color: '#e2e8f0' } } },
|
|
1042
|
-
}} />
|
|
1043
|
-
</div>
|
|
1044
|
-
</CardContent>
|
|
1045
|
-
</Card>
|
|
1046
|
-
</div>
|
|
1047
|
-
|
|
1048
|
-
{/* Table */}
|
|
1049
|
-
<Card>
|
|
1050
|
-
<CardHeader>
|
|
1051
|
-
<CardTitle>Category Breakdown</CardTitle>
|
|
1052
|
-
</CardHeader>
|
|
1053
|
-
<div className="overflow-x-auto">
|
|
1054
|
-
<table className="min-w-full text-sm">
|
|
1055
|
-
<thead>
|
|
1056
|
-
<tr className="bg-slate-50/50 border-b border-slate-100">
|
|
1057
|
-
<th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Category</th>
|
|
1058
|
-
{selectedModels.map(model => (
|
|
1059
|
-
<th key={model} className="px-3 py-3 text-left text-xs font-semibold text-slate-600 uppercase" style={{ minWidth: 90 }}>
|
|
1060
|
-
{model.split(' ').slice(0, 2).join(' ')}
|
|
1061
|
-
</th>
|
|
1062
|
-
))}
|
|
1063
|
-
</tr>
|
|
1064
|
-
</thead>
|
|
1065
|
-
<tbody className="divide-y divide-slate-100">
|
|
1066
|
-
{categoryData.map((cat, i) => {
|
|
1067
|
-
const sorted = [...cat.models].sort((a, b) => b.avgScore - a.avgScore);
|
|
1068
|
-
const top3 = sorted.slice(0, 3).map(m => m.modelName);
|
|
1069
|
-
const getRank = (modelName: string) => top3.indexOf(modelName);
|
|
1070
|
-
|
|
1071
|
-
return (
|
|
1072
|
-
<tr key={cat.category} className={i % 2 === 0 ? 'bg-slate-50/30' : ''}>
|
|
1073
|
-
<td className="px-4 py-3 font-medium text-slate-800 capitalize">{cat.category.replace(/-/g, ' ')}</td>
|
|
1074
|
-
{selectedModels.map(model => {
|
|
1075
|
-
const m = cat.models.find(x => x.modelName === model);
|
|
1076
|
-
const rank = getRank(model);
|
|
1077
|
-
return (
|
|
1078
|
-
<td key={model} className="px-3 py-3">
|
|
1079
|
-
<HoverCard content={
|
|
1080
|
-
<div className="space-y-2">
|
|
1081
|
-
<div className="font-semibold text-slate-900">{model}</div>
|
|
1082
|
-
<div className="text-xs text-slate-500">{cat.category.replace(/-/g, ' ')}</div>
|
|
1083
|
-
<div className="grid grid-cols-2 gap-2 text-xs pt-2">
|
|
1084
|
-
<div>Score: <span className="font-semibold">{m?.avgScore.toFixed(1)}%</span></div>
|
|
1085
|
-
<div>Pass: <span className="font-semibold">{m?.passRate.toFixed(0)}%</span></div>
|
|
1086
|
-
<div>Cost: <span className="font-semibold text-emerald-600">${m?.avgCost.toFixed(3)}</span></div>
|
|
1087
|
-
<div>Time: <span className="font-semibold">{((m?.avgTimeMs || 0) / 1000).toFixed(0)}s</span></div>
|
|
1088
|
-
</div>
|
|
1089
|
-
</div>
|
|
1090
|
-
}>
|
|
1091
|
-
<div className="cursor-pointer hover:bg-slate-100 rounded px-1 -mx-1 transition-colors">
|
|
1092
|
-
<div className="font-medium flex items-center gap-1 text-blue-600">
|
|
1093
|
-
{rank >= 0 && <span>{crowns[rank]}</span>}
|
|
1094
|
-
{m?.avgScore.toFixed(1)}%
|
|
1095
|
-
</div>
|
|
1096
|
-
<div className="text-xs text-slate-400">${m?.avgCost.toFixed(3)}</div>
|
|
1097
|
-
</div>
|
|
1098
|
-
</HoverCard>
|
|
1099
|
-
</td>
|
|
1100
|
-
);
|
|
1101
|
-
})}
|
|
1102
|
-
</tr>
|
|
1103
|
-
);
|
|
1104
|
-
})}
|
|
1105
|
-
</tbody>
|
|
1106
|
-
</table>
|
|
1107
|
-
</div>
|
|
1108
|
-
</Card>
|
|
1109
|
-
</div>
|
|
1110
|
-
);
|
|
1111
|
-
}
|
|
1112
|
-
|
|
1113
|
-
// Per-Task Performance Charts Component
|
|
1114
|
-
interface TaskResult {
|
|
1115
|
-
taskId: string;
|
|
1116
|
-
category: string;
|
|
1117
|
-
subcategory: string;
|
|
1118
|
-
results: Array<{
|
|
1119
|
-
modelName: string;
|
|
1120
|
-
score: number;
|
|
1121
|
-
functional: number;
|
|
1122
|
-
quality: number;
|
|
1123
|
-
passed: boolean;
|
|
1124
|
-
tokens: number;
|
|
1125
|
-
timeMs: number;
|
|
1126
|
-
cost: number;
|
|
1127
|
-
}>;
|
|
1128
|
-
}
|
|
1129
|
-
|
|
1130
|
-
function PerTaskCharts() {
|
|
1131
|
-
const taskResults = taskResultsData.tasks as TaskResult[] || [];
|
|
1132
|
-
const models = taskResultsData.models as string[] || [];
|
|
1133
|
-
const categories = taskResultsData.categories as string[] || [];
|
|
1134
|
-
const [selectedCategory, setSelectedCategory] = useState<string | null>(null);
|
|
1135
|
-
const [selectedModels, setSelectedModels] = useState<string[]>(models.slice(0, 5));
|
|
1136
|
-
const [viewMode, setViewMode] = useState<'chart' | 'heatmap'>('chart');
|
|
1137
|
-
|
|
1138
|
-
const filteredTasks = selectedCategory
|
|
1139
|
-
? taskResults.filter(t => t.category === selectedCategory)
|
|
1140
|
-
: taskResults.slice(0, 30);
|
|
1141
|
-
|
|
1142
|
-
const chartData = {
|
|
1143
|
-
labels: filteredTasks.map((_, i) => `Task ${i + 1}`),
|
|
1144
|
-
datasets: selectedModels.map((modelName, idx) => ({
|
|
1145
|
-
label: modelName.split(' ').slice(0, 2).join(' '),
|
|
1146
|
-
data: filteredTasks.map(task => {
|
|
1147
|
-
const result = task.results.find(r => r.modelName === modelName);
|
|
1148
|
-
return result?.score || 0;
|
|
1149
|
-
}),
|
|
1150
|
-
borderColor: CHART_COLORS[idx % CHART_COLORS.length],
|
|
1151
|
-
backgroundColor: CHART_COLORS[idx % CHART_COLORS.length] + '20',
|
|
1152
|
-
borderWidth: 2,
|
|
1153
|
-
tension: 0.3,
|
|
1154
|
-
fill: false,
|
|
1155
|
-
pointRadius: 3,
|
|
1156
|
-
pointHoverRadius: 6,
|
|
1157
|
-
})),
|
|
1158
|
-
};
|
|
1159
|
-
|
|
1160
|
-
const chartOptions = {
|
|
1161
|
-
responsive: true,
|
|
1162
|
-
maintainAspectRatio: false,
|
|
1163
|
-
plugins: {
|
|
1164
|
-
legend: { position: 'bottom' as const, labels: { usePointStyle: true, padding: 12, font: { size: 10 } } },
|
|
1165
|
-
tooltip: {
|
|
1166
|
-
backgroundColor: '#ffffff',
|
|
1167
|
-
titleColor: '#0f172a',
|
|
1168
|
-
bodyColor: '#64748b',
|
|
1169
|
-
borderColor: '#e2e8f0',
|
|
1170
|
-
borderWidth: 1,
|
|
1171
|
-
padding: 12,
|
|
1172
|
-
cornerRadius: 8,
|
|
1173
|
-
callbacks: {
|
|
1174
|
-
title: (items: any[]) => {
|
|
1175
|
-
const idx = items[0]?.dataIndex;
|
|
1176
|
-
if (idx !== undefined && filteredTasks[idx]) {
|
|
1177
|
-
return filteredTasks[idx].taskId.split('/').pop() || `Task ${idx + 1}`;
|
|
1178
|
-
}
|
|
1179
|
-
return '';
|
|
1180
|
-
},
|
|
1181
|
-
},
|
|
1182
|
-
},
|
|
1183
|
-
},
|
|
1184
|
-
scales: {
|
|
1185
|
-
y: { beginAtZero: true, max: 100, grid: { color: '#f1f5f9' }, ticks: { color: '#64748b', font: { size: 11 } } },
|
|
1186
|
-
x: { grid: { display: false }, ticks: { color: '#64748b', font: { size: 9 }, maxRotation: 0 } },
|
|
1187
|
-
},
|
|
1188
|
-
};
|
|
1189
|
-
|
|
1190
|
-
// Heatmap data for selected models and tasks
|
|
1191
|
-
const getScoreColor = (score: number) => {
|
|
1192
|
-
if (score >= 90) return '#22c55e';
|
|
1193
|
-
if (score >= 80) return '#84cc16';
|
|
1194
|
-
if (score >= 70) return '#eab308';
|
|
1195
|
-
if (score >= 60) return '#f97316';
|
|
1196
|
-
return '#ef4444';
|
|
1197
|
-
};
|
|
1198
|
-
|
|
1199
|
-
return (
|
|
1200
|
-
<div className="space-y-8">
|
|
1201
|
-
<div className="flex flex-col md:flex-row md:items-end md:justify-between gap-4">
|
|
1202
|
-
<div>
|
|
1203
|
-
<h1 className="text-2xl font-bold text-slate-900">Per-Task Performance</h1>
|
|
1204
|
-
<p className="text-sm text-slate-500 mt-1">Model performance on individual benchmark tasks</p>
|
|
1205
|
-
</div>
|
|
1206
|
-
<div className="flex gap-2">
|
|
1207
|
-
<button
|
|
1208
|
-
onClick={() => setViewMode('chart')}
|
|
1209
|
-
className={`px-3 py-1.5 text-sm font-medium rounded-lg transition-all ${
|
|
1210
|
-
viewMode === 'chart' ? 'bg-slate-900 text-white' : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
|
|
1211
|
-
}`}
|
|
1212
|
-
>
|
|
1213
|
-
Line Chart
|
|
1214
|
-
</button>
|
|
1215
|
-
<button
|
|
1216
|
-
onClick={() => setViewMode('heatmap')}
|
|
1217
|
-
className={`px-3 py-1.5 text-sm font-medium rounded-lg transition-all ${
|
|
1218
|
-
viewMode === 'heatmap' ? 'bg-slate-900 text-white' : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
|
|
1219
|
-
}`}
|
|
1220
|
-
>
|
|
1221
|
-
Heatmap
|
|
1222
|
-
</button>
|
|
1223
|
-
</div>
|
|
1224
|
-
</div>
|
|
1225
|
-
|
|
1226
|
-
{/* Category Filter */}
|
|
1227
|
-
<div className="flex flex-wrap gap-2">
|
|
1228
|
-
<button
|
|
1229
|
-
onClick={() => setSelectedCategory(null)}
|
|
1230
|
-
className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
|
|
1231
|
-
!selectedCategory ? 'bg-slate-900 text-white' : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
|
|
1232
|
-
}`}
|
|
1233
|
-
>
|
|
1234
|
-
All Categories
|
|
1235
|
-
</button>
|
|
1236
|
-
{categories.map((cat) => (
|
|
1237
|
-
<button
|
|
1238
|
-
key={cat}
|
|
1239
|
-
onClick={() => setSelectedCategory(selectedCategory === cat ? null : cat)}
|
|
1240
|
-
className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
|
|
1241
|
-
selectedCategory === cat ? 'bg-slate-900 text-white' : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
|
|
1242
|
-
}`}
|
|
1243
|
-
>
|
|
1244
|
-
{cat.replace(/-/g, ' ')}
|
|
1245
|
-
</button>
|
|
1246
|
-
))}
|
|
1247
|
-
</div>
|
|
1248
|
-
|
|
1249
|
-
{/* Model Selector */}
|
|
1250
|
-
<Card>
|
|
1251
|
-
<CardContent className="p-5">
|
|
1252
|
-
<div className="text-sm font-medium text-slate-700 mb-3">Select models to compare:</div>
|
|
1253
|
-
<div className="flex flex-wrap gap-2">
|
|
1254
|
-
{models.map((model, i) => (
|
|
1255
|
-
<button
|
|
1256
|
-
key={model}
|
|
1257
|
-
onClick={() => {
|
|
1258
|
-
if (selectedModels.includes(model)) {
|
|
1259
|
-
setSelectedModels(selectedModels.filter(m => m !== model));
|
|
1260
|
-
} else if (selectedModels.length < 7) {
|
|
1261
|
-
setSelectedModels([...selectedModels, model]);
|
|
1262
|
-
}
|
|
1263
|
-
}}
|
|
1264
|
-
className={`px-3 py-1.5 text-sm font-medium rounded-lg border-2 transition-all ${
|
|
1265
|
-
selectedModels.includes(model)
|
|
1266
|
-
? 'text-white'
|
|
1267
|
-
: 'border-slate-200 text-slate-600 hover:border-slate-300 bg-white'
|
|
1268
|
-
}`}
|
|
1269
|
-
style={{
|
|
1270
|
-
backgroundColor: selectedModels.includes(model) ? CHART_COLORS[i % CHART_COLORS.length] : undefined,
|
|
1271
|
-
borderColor: selectedModels.includes(model) ? CHART_COLORS[i % CHART_COLORS.length] : undefined,
|
|
1272
|
-
}}
|
|
1273
|
-
>
|
|
1274
|
-
{model.split(' ').slice(0, 2).join(' ')}
|
|
1275
|
-
</button>
|
|
1276
|
-
))}
|
|
1277
|
-
</div>
|
|
1278
|
-
</CardContent>
|
|
1279
|
-
</Card>
|
|
1280
|
-
|
|
1281
|
-
{viewMode === 'chart' ? (
|
|
1282
|
-
<Card>
|
|
1283
|
-
<CardHeader>
|
|
1284
|
-
<CardTitle>Score Trend Across Tasks</CardTitle>
|
|
1285
|
-
<CardDescription>
|
|
1286
|
-
{selectedCategory ? `${selectedCategory.replace(/-/g, ' ')} - ${filteredTasks.length} tasks` : `Showing first 30 tasks`}
|
|
1287
|
-
</CardDescription>
|
|
1288
|
-
</CardHeader>
|
|
1289
|
-
<CardContent>
|
|
1290
|
-
<div className="h-96">
|
|
1291
|
-
<Line data={chartData} options={chartOptions} />
|
|
1292
|
-
</div>
|
|
1293
|
-
</CardContent>
|
|
1294
|
-
</Card>
|
|
1295
|
-
) : (
|
|
1296
|
-
<Card>
|
|
1297
|
-
<CardHeader>
|
|
1298
|
-
<CardTitle>Score Heatmap</CardTitle>
|
|
1299
|
-
<CardDescription>Color indicates score: green (90+) → yellow (70-80) → red (<60)</CardDescription>
|
|
1300
|
-
</CardHeader>
|
|
1301
|
-
<div className="overflow-x-auto">
|
|
1302
|
-
<table className="min-w-full text-xs">
|
|
1303
|
-
<thead>
|
|
1304
|
-
<tr className="bg-slate-50/50 border-b border-slate-100">
|
|
1305
|
-
<th className="px-3 py-2 text-left font-semibold text-slate-600 sticky left-0 bg-slate-50">Task</th>
|
|
1306
|
-
{selectedModels.map(model => (
|
|
1307
|
-
<th key={model} className="px-2 py-2 text-center font-semibold text-slate-600" style={{ minWidth: 60 }}>
|
|
1308
|
-
{model.split(' ')[0]}
|
|
1309
|
-
</th>
|
|
1310
|
-
))}
|
|
1311
|
-
</tr>
|
|
1312
|
-
</thead>
|
|
1313
|
-
<tbody className="divide-y divide-slate-100">
|
|
1314
|
-
{filteredTasks.slice(0, 50).map((task, i) => (
|
|
1315
|
-
<tr key={task.taskId} className="hover:bg-slate-50/50">
|
|
1316
|
-
<td className="px-3 py-1.5 font-medium text-slate-700 sticky left-0 bg-white">
|
|
1317
|
-
<HoverCard content={
|
|
1318
|
-
<div className="space-y-2">
|
|
1319
|
-
<div className="font-semibold text-slate-900">{task.taskId}</div>
|
|
1320
|
-
<div className="text-xs text-slate-500">Category: {task.category}</div>
|
|
1321
|
-
<div className="text-xs text-slate-500">Subcategory: {task.subcategory}</div>
|
|
1322
|
-
</div>
|
|
1323
|
-
}>
|
|
1324
|
-
<span className="cursor-pointer hover:text-blue-600">T{i + 1}</span>
|
|
1325
|
-
</HoverCard>
|
|
1326
|
-
</td>
|
|
1327
|
-
{selectedModels.map(modelName => {
|
|
1328
|
-
const result = task.results.find(r => r.modelName === modelName);
|
|
1329
|
-
const score = result?.score || 0;
|
|
1330
|
-
return (
|
|
1331
|
-
<td key={modelName} className="px-2 py-1.5 text-center">
|
|
1332
|
-
<HoverCard content={
|
|
1333
|
-
<div className="space-y-2">
|
|
1334
|
-
<div className="font-semibold text-slate-900">{modelName}</div>
|
|
1335
|
-
<div className="text-xs text-slate-500">{task.taskId.split('/').pop()}</div>
|
|
1336
|
-
<div className="grid grid-cols-2 gap-2 text-xs pt-2">
|
|
1337
|
-
<div>Score: <span className="font-semibold">{score.toFixed(1)}%</span></div>
|
|
1338
|
-
<div>Passed: <span className={result?.passed ? 'text-emerald-600' : 'text-red-600'}>{result?.passed ? 'Yes' : 'No'}</span></div>
|
|
1339
|
-
<div>Tokens: <span className="font-semibold">{((result?.tokens || 0) / 1000).toFixed(1)}K</span></div>
|
|
1340
|
-
<div>Cost: <span className="font-semibold text-emerald-600">${(result?.cost || 0).toFixed(4)}</span></div>
|
|
1341
|
-
</div>
|
|
1342
|
-
</div>
|
|
1343
|
-
}>
|
|
1344
|
-
<div
|
|
1345
|
-
className="w-8 h-6 rounded flex items-center justify-center text-white text-xs font-medium cursor-pointer mx-auto"
|
|
1346
|
-
style={{ backgroundColor: getScoreColor(score) }}
|
|
1347
|
-
>
|
|
1348
|
-
{score.toFixed(0)}
|
|
1349
|
-
</div>
|
|
1350
|
-
</HoverCard>
|
|
1351
|
-
</td>
|
|
1352
|
-
);
|
|
1353
|
-
})}
|
|
1354
|
-
</tr>
|
|
1355
|
-
))}
|
|
1356
|
-
</tbody>
|
|
1357
|
-
</table>
|
|
1358
|
-
</div>
|
|
1359
|
-
</Card>
|
|
1360
|
-
)}
|
|
1361
|
-
|
|
1362
|
-
{/* Summary Stats */}
|
|
1363
|
-
<div className="grid grid-cols-2 lg:grid-cols-4 gap-4">
|
|
1364
|
-
{selectedModels.slice(0, 4).map((modelName, i) => {
|
|
1365
|
-
const modelResults = filteredTasks.flatMap(t => t.results.filter(r => r.modelName === modelName));
|
|
1366
|
-
const avgScore = modelResults.reduce((sum, r) => sum + r.score, 0) / (modelResults.length || 1);
|
|
1367
|
-
const passCount = modelResults.filter(r => r.passed).length;
|
|
1368
|
-
return (
|
|
1369
|
-
<Card key={modelName} hover>
|
|
1370
|
-
<CardContent className="p-4">
|
|
1371
|
-
<div className="flex items-center gap-2 mb-2">
|
|
1372
|
-
<div
|
|
1373
|
-
className="w-3 h-3 rounded-full"
|
|
1374
|
-
style={{ backgroundColor: CHART_COLORS[i % CHART_COLORS.length] }}
|
|
1375
|
-
/>
|
|
1376
|
-
<span className="font-medium text-slate-900 text-sm">{modelName.split(' ').slice(0, 2).join(' ')}</span>
|
|
1377
|
-
</div>
|
|
1378
|
-
<div className="text-2xl font-bold text-slate-900">{avgScore.toFixed(1)}%</div>
|
|
1379
|
-
<div className="text-xs text-slate-500">{passCount}/{modelResults.length} tasks passed</div>
|
|
1380
|
-
</CardContent>
|
|
1381
|
-
</Card>
|
|
1382
|
-
);
|
|
1383
|
-
})}
|
|
1384
|
-
</div>
|
|
1385
|
-
</div>
|
|
1386
|
-
);
|
|
1387
|
-
}
|
|
1388
|
-
|
|
1389
|
-
// Navigation Link Component
|
|
1390
|
-
function NavLink({ to, children }: { to: string; children: React.ReactNode }) {
|
|
1391
|
-
const location = useLocation();
|
|
1392
|
-
const isActive = location.pathname === to || (to !== '/' && location.pathname.startsWith(to));
|
|
1393
|
-
|
|
1394
|
-
return (
|
|
1395
|
-
<Link
|
|
1396
|
-
to={to}
|
|
1397
|
-
className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
|
|
1398
|
-
isActive
|
|
1399
|
-
? 'bg-slate-100 text-slate-900'
|
|
1400
|
-
: 'text-slate-600 hover:bg-slate-50 hover:text-slate-900'
|
|
1401
|
-
}`}
|
|
1402
|
-
>
|
|
1403
|
-
{children}
|
|
1404
|
-
</Link>
|
|
1405
|
-
);
|
|
1406
|
-
}
|
|
1407
|
-
|
|
1408
|
-
// Main App Component
|
|
1409
|
-
export default function App() {
|
|
1410
|
-
return (
|
|
1411
|
-
<div className="min-h-screen bg-slate-50">
|
|
1412
|
-
{/* Navigation */}
|
|
1413
|
-
<nav className="bg-white border-b border-slate-200 sticky top-0 z-50">
|
|
1414
|
-
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
|
1415
|
-
<div className="flex items-center justify-between h-16">
|
|
1416
|
-
<div className="flex items-center gap-8">
|
|
1417
|
-
<Link to="/" className="flex items-center gap-3">
|
|
1418
|
-
<img src="/favicon.svg" alt="VibeCodingBench" className="w-9 h-9" />
|
|
1419
|
-
<span className="text-lg font-semibold text-slate-900">VibeCodingBench</span>
|
|
1420
|
-
</Link>
|
|
1421
|
-
<div className="hidden md:flex items-center gap-1">
|
|
1422
|
-
<NavLink to="/">Leaderboard</NavLink>
|
|
1423
|
-
<NavLink to="/charts">Analytics</NavLink>
|
|
1424
|
-
<NavLink to="/task-performance">Categories</NavLink>
|
|
1425
|
-
<NavLink to="/per-task">Per-Task</NavLink>
|
|
1426
|
-
<NavLink to="/tasks">Tasks</NavLink>
|
|
1427
|
-
</div>
|
|
1428
|
-
</div>
|
|
1429
|
-
<a
|
|
1430
|
-
href="https://github.com/alt-research/vibe-coding-benchmark-public"
|
|
1431
|
-
target="_blank"
|
|
1432
|
-
rel="noopener noreferrer"
|
|
1433
|
-
className="text-sm text-slate-500 hover:text-slate-900 flex items-center gap-2 transition-colors"
|
|
1434
|
-
>
|
|
1435
|
-
<svg className="w-5 h-5" fill="currentColor" viewBox="0 0 24 24">
|
|
1436
|
-
<path fillRule="evenodd" d="M12 2C6.477 2 2 6.484 2 12.017c0 4.425 2.865 8.18 6.839 9.504.5.092.682-.217.682-.483 0-.237-.008-.868-.013-1.703-2.782.605-3.369-1.343-3.369-1.343-.454-1.158-1.11-1.466-1.11-1.466-.908-.62.069-.608.069-.608 1.003.07 1.531 1.032 1.531 1.032.892 1.53 2.341 1.088 2.91.832.092-.647.35-1.088.636-1.338-2.22-.253-4.555-1.113-4.555-4.951 0-1.093.39-1.988 1.029-2.688-.103-.253-.446-1.272.098-2.65 0 0 .84-.27 2.75 1.026A9.564 9.564 0 0112 6.844c.85.004 1.705.115 2.504.337 1.909-1.296 2.747-1.027 2.747-1.027.546 1.379.202 2.398.1 2.651.64.7 1.028 1.595 1.028 2.688 0 3.848-2.339 4.695-4.566 4.943.359.309.678.92.678 1.855 0 1.338-.012 2.419-.012 2.747 0 .268.18.58.688.482A10.019 10.019 0 0022 12.017C22 6.484 17.522 2 12 2z" clipRule="evenodd" />
|
|
1437
|
-
</svg>
|
|
1438
|
-
GitHub
|
|
1439
|
-
</a>
|
|
1440
|
-
</div>
|
|
1441
|
-
</div>
|
|
1442
|
-
</nav>
|
|
1443
|
-
|
|
1444
|
-
{/* Main Content */}
|
|
1445
|
-
<main className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
|
|
1446
|
-
<Routes>
|
|
1447
|
-
<Route path="/" element={<Leaderboard />} />
|
|
1448
|
-
<Route path="/live" element={<LiveDashboard />} />
|
|
1449
|
-
<Route path="/tasks" element={<Tasks />} />
|
|
1450
|
-
<Route path="/charts" element={<Charts />} />
|
|
1451
|
-
<Route path="/task-performance" element={<TaskPerformance />} />
|
|
1452
|
-
<Route path="/per-task" element={<PerTaskCharts />} />
|
|
1453
|
-
<Route path="*" element={<NotFound />} />
|
|
1454
|
-
</Routes>
|
|
1455
|
-
</main>
|
|
1456
|
-
|
|
1457
|
-
{/* Footer */}
|
|
1458
|
-
<footer className="border-t border-slate-200 bg-white mt-12">
|
|
1459
|
-
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-6">
|
|
1460
|
-
<div className="flex items-center justify-between text-sm text-slate-500">
|
|
1461
|
-
<span>VibeCodingBench - AI Coding Agent Benchmark</span>
|
|
1462
|
-
<span>180 tasks · 14 models · Updated Jan 2026</span>
|
|
1463
|
-
</div>
|
|
1464
|
-
</div>
|
|
1465
|
-
</footer>
|
|
1466
|
-
</div>
|
|
1467
|
-
);
|
|
1468
|
-
}
|