gsd-trae 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (761) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/assets/screenshot.png +0 -0
  3. package/package.json +9 -2
  4. package/.claude/settings.local.json +0 -8
  5. package/.gitmodules +0 -6
  6. package/.trae/rules/project_rules.md +0 -56
  7. package/.vscode/code-counter/code-counter.db +0 -0
  8. package/.vscode/settings.json +0 -6
  9. package/refs/gsd/.github/CODEOWNERS +0 -2
  10. package/refs/gsd/.github/FUNDING.yml +0 -1
  11. package/refs/gsd/.github/ISSUE_TEMPLATE/bug_report.yml +0 -59
  12. package/refs/gsd/.github/ISSUE_TEMPLATE/feature_request.yml +0 -37
  13. package/refs/gsd/.github/pull_request_template.md +0 -24
  14. package/refs/gsd/.github/workflows/auto-label-issues.yml +0 -21
  15. package/refs/gsd/CHANGELOG.md +0 -1520
  16. package/refs/gsd/LICENSE +0 -21
  17. package/refs/gsd/README.md +0 -704
  18. package/refs/gsd/SECURITY.md +0 -33
  19. package/refs/gsd/agents/gsd-codebase-mapper.md +0 -764
  20. package/refs/gsd/agents/gsd-debugger.md +0 -1246
  21. package/refs/gsd/agents/gsd-executor.md +0 -469
  22. package/refs/gsd/agents/gsd-integration-checker.md +0 -443
  23. package/refs/gsd/agents/gsd-phase-researcher.md +0 -546
  24. package/refs/gsd/agents/gsd-plan-checker.md +0 -690
  25. package/refs/gsd/agents/gsd-planner.md +0 -1275
  26. package/refs/gsd/agents/gsd-project-researcher.md +0 -621
  27. package/refs/gsd/agents/gsd-research-synthesizer.md +0 -239
  28. package/refs/gsd/agents/gsd-roadmapper.md +0 -642
  29. package/refs/gsd/agents/gsd-verifier.md +0 -573
  30. package/refs/gsd/assets/gsd-logo-2000-transparent.png +0 -0
  31. package/refs/gsd/assets/gsd-logo-2000-transparent.svg +0 -17
  32. package/refs/gsd/assets/gsd-logo-2000.png +0 -0
  33. package/refs/gsd/assets/gsd-logo-2000.svg +0 -21
  34. package/refs/gsd/assets/terminal.svg +0 -68
  35. package/refs/gsd/bin/install.js +0 -2090
  36. package/refs/gsd/commands/gsd/add-phase.md +0 -43
  37. package/refs/gsd/commands/gsd/add-tests.md +0 -41
  38. package/refs/gsd/commands/gsd/add-todo.md +0 -47
  39. package/refs/gsd/commands/gsd/audit-milestone.md +0 -36
  40. package/refs/gsd/commands/gsd/check-todos.md +0 -45
  41. package/refs/gsd/commands/gsd/cleanup.md +0 -18
  42. package/refs/gsd/commands/gsd/complete-milestone.md +0 -136
  43. package/refs/gsd/commands/gsd/debug.md +0 -167
  44. package/refs/gsd/commands/gsd/discuss-phase.md +0 -83
  45. package/refs/gsd/commands/gsd/execute-phase.md +0 -41
  46. package/refs/gsd/commands/gsd/health.md +0 -22
  47. package/refs/gsd/commands/gsd/help.md +0 -22
  48. package/refs/gsd/commands/gsd/insert-phase.md +0 -32
  49. package/refs/gsd/commands/gsd/join-discord.md +0 -18
  50. package/refs/gsd/commands/gsd/list-phase-assumptions.md +0 -46
  51. package/refs/gsd/commands/gsd/map-codebase.md +0 -71
  52. package/refs/gsd/commands/gsd/new-milestone.md +0 -44
  53. package/refs/gsd/commands/gsd/new-project.md +0 -42
  54. package/refs/gsd/commands/gsd/new-project.md.bak +0 -1041
  55. package/refs/gsd/commands/gsd/pause-work.md +0 -38
  56. package/refs/gsd/commands/gsd/plan-milestone-gaps.md +0 -34
  57. package/refs/gsd/commands/gsd/plan-phase.md +0 -45
  58. package/refs/gsd/commands/gsd/progress.md +0 -24
  59. package/refs/gsd/commands/gsd/quick.md +0 -41
  60. package/refs/gsd/commands/gsd/reapply-patches.md +0 -110
  61. package/refs/gsd/commands/gsd/remove-phase.md +0 -31
  62. package/refs/gsd/commands/gsd/research-phase.md +0 -189
  63. package/refs/gsd/commands/gsd/resume-work.md +0 -40
  64. package/refs/gsd/commands/gsd/set-profile.md +0 -34
  65. package/refs/gsd/commands/gsd/settings.md +0 -36
  66. package/refs/gsd/commands/gsd/update.md +0 -37
  67. package/refs/gsd/commands/gsd/verify-work.md +0 -38
  68. package/refs/gsd/docs/USER-GUIDE.md +0 -471
  69. package/refs/gsd/docs/context-monitor.md +0 -96
  70. package/refs/gsd/get-shit-done/bin/gsd-tools.cjs +0 -585
  71. package/refs/gsd/get-shit-done/bin/lib/commands.cjs +0 -553
  72. package/refs/gsd/get-shit-done/bin/lib/config.cjs +0 -162
  73. package/refs/gsd/get-shit-done/bin/lib/core.cjs +0 -411
  74. package/refs/gsd/get-shit-done/bin/lib/frontmatter.cjs +0 -299
  75. package/refs/gsd/get-shit-done/bin/lib/init.cjs +0 -710
  76. package/refs/gsd/get-shit-done/bin/lib/milestone.cjs +0 -215
  77. package/refs/gsd/get-shit-done/bin/lib/phase.cjs +0 -870
  78. package/refs/gsd/get-shit-done/bin/lib/roadmap.cjs +0 -298
  79. package/refs/gsd/get-shit-done/bin/lib/state.cjs +0 -521
  80. package/refs/gsd/get-shit-done/bin/lib/template.cjs +0 -222
  81. package/refs/gsd/get-shit-done/bin/lib/verify.cjs +0 -772
  82. package/refs/gsd/get-shit-done/references/checkpoints.md +0 -776
  83. package/refs/gsd/get-shit-done/references/continuation-format.md +0 -249
  84. package/refs/gsd/get-shit-done/references/decimal-phase-calculation.md +0 -65
  85. package/refs/gsd/get-shit-done/references/git-integration.md +0 -248
  86. package/refs/gsd/get-shit-done/references/git-planning-commit.md +0 -38
  87. package/refs/gsd/get-shit-done/references/model-profile-resolution.md +0 -34
  88. package/refs/gsd/get-shit-done/references/model-profiles.md +0 -92
  89. package/refs/gsd/get-shit-done/references/phase-argument-parsing.md +0 -61
  90. package/refs/gsd/get-shit-done/references/planning-config.md +0 -196
  91. package/refs/gsd/get-shit-done/references/questioning.md +0 -145
  92. package/refs/gsd/get-shit-done/references/tdd.md +0 -263
  93. package/refs/gsd/get-shit-done/references/ui-brand.md +0 -160
  94. package/refs/gsd/get-shit-done/references/verification-patterns.md +0 -612
  95. package/refs/gsd/get-shit-done/templates/DEBUG.md +0 -164
  96. package/refs/gsd/get-shit-done/templates/UAT.md +0 -247
  97. package/refs/gsd/get-shit-done/templates/VALIDATION.md +0 -76
  98. package/refs/gsd/get-shit-done/templates/codebase/architecture.md +0 -255
  99. package/refs/gsd/get-shit-done/templates/codebase/concerns.md +0 -310
  100. package/refs/gsd/get-shit-done/templates/codebase/conventions.md +0 -307
  101. package/refs/gsd/get-shit-done/templates/codebase/integrations.md +0 -280
  102. package/refs/gsd/get-shit-done/templates/codebase/stack.md +0 -186
  103. package/refs/gsd/get-shit-done/templates/codebase/structure.md +0 -285
  104. package/refs/gsd/get-shit-done/templates/codebase/testing.md +0 -480
  105. package/refs/gsd/get-shit-done/templates/config.json +0 -37
  106. package/refs/gsd/get-shit-done/templates/context.md +0 -283
  107. package/refs/gsd/get-shit-done/templates/continue-here.md +0 -78
  108. package/refs/gsd/get-shit-done/templates/debug-subagent-prompt.md +0 -91
  109. package/refs/gsd/get-shit-done/templates/discovery.md +0 -146
  110. package/refs/gsd/get-shit-done/templates/milestone-archive.md +0 -123
  111. package/refs/gsd/get-shit-done/templates/milestone.md +0 -115
  112. package/refs/gsd/get-shit-done/templates/phase-prompt.md +0 -569
  113. package/refs/gsd/get-shit-done/templates/planner-subagent-prompt.md +0 -117
  114. package/refs/gsd/get-shit-done/templates/project.md +0 -184
  115. package/refs/gsd/get-shit-done/templates/requirements.md +0 -231
  116. package/refs/gsd/get-shit-done/templates/research-project/ARCHITECTURE.md +0 -204
  117. package/refs/gsd/get-shit-done/templates/research-project/FEATURES.md +0 -147
  118. package/refs/gsd/get-shit-done/templates/research-project/PITFALLS.md +0 -200
  119. package/refs/gsd/get-shit-done/templates/research-project/STACK.md +0 -120
  120. package/refs/gsd/get-shit-done/templates/research-project/SUMMARY.md +0 -170
  121. package/refs/gsd/get-shit-done/templates/research.md +0 -552
  122. package/refs/gsd/get-shit-done/templates/retrospective.md +0 -54
  123. package/refs/gsd/get-shit-done/templates/roadmap.md +0 -202
  124. package/refs/gsd/get-shit-done/templates/state.md +0 -176
  125. package/refs/gsd/get-shit-done/templates/summary-complex.md +0 -59
  126. package/refs/gsd/get-shit-done/templates/summary-minimal.md +0 -41
  127. package/refs/gsd/get-shit-done/templates/summary-standard.md +0 -48
  128. package/refs/gsd/get-shit-done/templates/summary.md +0 -248
  129. package/refs/gsd/get-shit-done/templates/user-setup.md +0 -311
  130. package/refs/gsd/get-shit-done/templates/verification-report.md +0 -322
  131. package/refs/gsd/get-shit-done/workflows/add-phase.md +0 -111
  132. package/refs/gsd/get-shit-done/workflows/add-tests.md +0 -350
  133. package/refs/gsd/get-shit-done/workflows/add-todo.md +0 -157
  134. package/refs/gsd/get-shit-done/workflows/audit-milestone.md +0 -297
  135. package/refs/gsd/get-shit-done/workflows/check-todos.md +0 -176
  136. package/refs/gsd/get-shit-done/workflows/cleanup.md +0 -152
  137. package/refs/gsd/get-shit-done/workflows/complete-milestone.md +0 -763
  138. package/refs/gsd/get-shit-done/workflows/diagnose-issues.md +0 -219
  139. package/refs/gsd/get-shit-done/workflows/discovery-phase.md +0 -289
  140. package/refs/gsd/get-shit-done/workflows/discuss-phase.md +0 -542
  141. package/refs/gsd/get-shit-done/workflows/execute-phase.md +0 -449
  142. package/refs/gsd/get-shit-done/workflows/execute-plan.md +0 -448
  143. package/refs/gsd/get-shit-done/workflows/health.md +0 -156
  144. package/refs/gsd/get-shit-done/workflows/help.md +0 -489
  145. package/refs/gsd/get-shit-done/workflows/insert-phase.md +0 -129
  146. package/refs/gsd/get-shit-done/workflows/list-phase-assumptions.md +0 -178
  147. package/refs/gsd/get-shit-done/workflows/map-codebase.md +0 -315
  148. package/refs/gsd/get-shit-done/workflows/new-milestone.md +0 -382
  149. package/refs/gsd/get-shit-done/workflows/new-project.md +0 -1116
  150. package/refs/gsd/get-shit-done/workflows/pause-work.md +0 -122
  151. package/refs/gsd/get-shit-done/workflows/plan-milestone-gaps.md +0 -274
  152. package/refs/gsd/get-shit-done/workflows/plan-phase.md +0 -569
  153. package/refs/gsd/get-shit-done/workflows/progress.md +0 -381
  154. package/refs/gsd/get-shit-done/workflows/quick.md +0 -453
  155. package/refs/gsd/get-shit-done/workflows/remove-phase.md +0 -154
  156. package/refs/gsd/get-shit-done/workflows/research-phase.md +0 -73
  157. package/refs/gsd/get-shit-done/workflows/resume-project.md +0 -306
  158. package/refs/gsd/get-shit-done/workflows/set-profile.md +0 -80
  159. package/refs/gsd/get-shit-done/workflows/settings.md +0 -213
  160. package/refs/gsd/get-shit-done/workflows/transition.md +0 -544
  161. package/refs/gsd/get-shit-done/workflows/update.md +0 -219
  162. package/refs/gsd/get-shit-done/workflows/verify-phase.md +0 -242
  163. package/refs/gsd/get-shit-done/workflows/verify-work.md +0 -569
  164. package/refs/gsd/hooks/gsd-check-update.js +0 -62
  165. package/refs/gsd/hooks/gsd-context-monitor.js +0 -122
  166. package/refs/gsd/hooks/gsd-statusline.js +0 -108
  167. package/refs/gsd/package.json +0 -50
  168. package/refs/gsd/scripts/build-hooks.js +0 -43
  169. package/refs/gsd/tests/commands.test.cjs +0 -661
  170. package/refs/gsd/tests/helpers.cjs +0 -40
  171. package/refs/gsd/tests/init.test.cjs +0 -205
  172. package/refs/gsd/tests/milestone.test.cjs +0 -98
  173. package/refs/gsd/tests/phase.test.cjs +0 -1241
  174. package/refs/gsd/tests/roadmap.test.cjs +0 -265
  175. package/refs/gsd/tests/state.test.cjs +0 -302
  176. package/refs/gsd/tests/verify.test.cjs +0 -80
  177. package/refs/vbenchmark/.agent/agents/codebase-explorer.md +0 -224
  178. package/refs/vbenchmark/.agent/agents/debugger.md +0 -180
  179. package/refs/vbenchmark/.agent/agents/documenter.md +0 -166
  180. package/refs/vbenchmark/.agent/agents/implementer.md +0 -70
  181. package/refs/vbenchmark/.agent/agents/orchestrator.md +0 -212
  182. package/refs/vbenchmark/.agent/agents/researcher.md +0 -80
  183. package/refs/vbenchmark/.agent/agents/reviewer.md +0 -184
  184. package/refs/vbenchmark/.agent/agents/tester.md +0 -170
  185. package/refs/vbenchmark/.agent/commands/commit.md +0 -29
  186. package/refs/vbenchmark/.agent/commands/debug.md +0 -59
  187. package/refs/vbenchmark/.agent/commands/document.md +0 -52
  188. package/refs/vbenchmark/.agent/commands/gather-context.md +0 -58
  189. package/refs/vbenchmark/.agent/commands/init.md +0 -56
  190. package/refs/vbenchmark/.agent/commands/preset-help.md +0 -50
  191. package/refs/vbenchmark/.agent/commands/refactor.md +0 -71
  192. package/refs/vbenchmark/.agent/commands/research.md +0 -37
  193. package/refs/vbenchmark/.agent/commands/review.md +0 -38
  194. package/refs/vbenchmark/.agent/commands/test.md +0 -61
  195. package/refs/vbenchmark/.agent/rules/01-code-quality.md +0 -33
  196. package/refs/vbenchmark/.agent/rules/02-typescript-go.md +0 -46
  197. package/refs/vbenchmark/.agent/rules/03-security-git.md +0 -34
  198. package/refs/vbenchmark/.agent/rules/04-architecture.md +0 -40
  199. package/refs/vbenchmark/.agent/sync.js +0 -536
  200. package/refs/vbenchmark/.agent/workflows/commit.md +0 -29
  201. package/refs/vbenchmark/.agent/workflows/debug.md +0 -59
  202. package/refs/vbenchmark/.agent/workflows/document.md +0 -52
  203. package/refs/vbenchmark/.agent/workflows/gather-context.md +0 -58
  204. package/refs/vbenchmark/.agent/workflows/init.md +0 -56
  205. package/refs/vbenchmark/.agent/workflows/preset-help.md +0 -50
  206. package/refs/vbenchmark/.agent/workflows/refactor.md +0 -71
  207. package/refs/vbenchmark/.agent/workflows/research.md +0 -37
  208. package/refs/vbenchmark/.agent/workflows/review.md +0 -38
  209. package/refs/vbenchmark/.agent/workflows/test.md +0 -61
  210. package/refs/vbenchmark/.claude/commands/agentic-dev/apply.md +0 -222
  211. package/refs/vbenchmark/.claude/commands/agentic-dev/done.md +0 -166
  212. package/refs/vbenchmark/.claude/commands/agentic-dev/proposal.md +0 -220
  213. package/refs/vbenchmark/.claude/commands/openspec/apply.md +0 -23
  214. package/refs/vbenchmark/.claude/commands/openspec/archive.md +0 -27
  215. package/refs/vbenchmark/.claude/commands/openspec/proposal.md +0 -28
  216. package/refs/vbenchmark/.clinerules/01-rules.md +0 -73
  217. package/refs/vbenchmark/.clinerules/02-agents.md +0 -34
  218. package/refs/vbenchmark/.cursor/commands/commit.md +0 -29
  219. package/refs/vbenchmark/.cursor/commands/debug.md +0 -59
  220. package/refs/vbenchmark/.cursor/commands/document.md +0 -52
  221. package/refs/vbenchmark/.cursor/commands/gather-context.md +0 -58
  222. package/refs/vbenchmark/.cursor/commands/init.md +0 -56
  223. package/refs/vbenchmark/.cursor/commands/preset-help.md +0 -50
  224. package/refs/vbenchmark/.cursor/commands/refactor.md +0 -71
  225. package/refs/vbenchmark/.cursor/commands/research.md +0 -37
  226. package/refs/vbenchmark/.cursor/commands/review.md +0 -38
  227. package/refs/vbenchmark/.cursor/commands/test.md +0 -61
  228. package/refs/vbenchmark/.cursor/rules/agents.mdc +0 -1357
  229. package/refs/vbenchmark/.factory/droids/codebase-explorer.md +0 -224
  230. package/refs/vbenchmark/.factory/droids/debugger.md +0 -180
  231. package/refs/vbenchmark/.factory/droids/documenter.md +0 -166
  232. package/refs/vbenchmark/.factory/droids/implementer.md +0 -70
  233. package/refs/vbenchmark/.factory/droids/orchestrator.md +0 -212
  234. package/refs/vbenchmark/.factory/droids/researcher.md +0 -80
  235. package/refs/vbenchmark/.factory/droids/reviewer.md +0 -184
  236. package/refs/vbenchmark/.factory/droids/tester.md +0 -170
  237. package/refs/vbenchmark/.gemini/workflows/commit.md +0 -29
  238. package/refs/vbenchmark/.gemini/workflows/debug.md +0 -59
  239. package/refs/vbenchmark/.gemini/workflows/document.md +0 -52
  240. package/refs/vbenchmark/.gemini/workflows/gather-context.md +0 -58
  241. package/refs/vbenchmark/.gemini/workflows/init.md +0 -56
  242. package/refs/vbenchmark/.gemini/workflows/preset-help.md +0 -50
  243. package/refs/vbenchmark/.gemini/workflows/refactor.md +0 -71
  244. package/refs/vbenchmark/.gemini/workflows/research.md +0 -37
  245. package/refs/vbenchmark/.gemini/workflows/review.md +0 -38
  246. package/refs/vbenchmark/.gemini/workflows/test.md +0 -61
  247. package/refs/vbenchmark/.github/CODEOWNERS +0 -20
  248. package/refs/vbenchmark/.github/FUNDING.yml +0 -4
  249. package/refs/vbenchmark/.github/ISSUE_TEMPLATE/bug-report.yml +0 -76
  250. package/refs/vbenchmark/.github/ISSUE_TEMPLATE/new-task.yml +0 -106
  251. package/refs/vbenchmark/.github/PULL_REQUEST_TEMPLATE.md +0 -38
  252. package/refs/vbenchmark/.github/copilot-instructions.md +0 -73
  253. package/refs/vbenchmark/.github/workflows/ci.yaml +0 -33
  254. package/refs/vbenchmark/.github/workflows/vercel-auto-pr.yml +0 -478
  255. package/refs/vbenchmark/.github/workflows/vercel-deploy.yaml +0 -487
  256. package/refs/vbenchmark/.github/workflows/vercel-pr-command.yaml +0 -337
  257. package/refs/vbenchmark/.github/workflows/vercel-project-init.yaml +0 -208
  258. package/refs/vbenchmark/.opencode/agent/codebase-explorer.md +0 -224
  259. package/refs/vbenchmark/.opencode/agent/debugger.md +0 -180
  260. package/refs/vbenchmark/.opencode/agent/documenter.md +0 -166
  261. package/refs/vbenchmark/.opencode/agent/implementer.md +0 -70
  262. package/refs/vbenchmark/.opencode/agent/orchestrator.md +0 -212
  263. package/refs/vbenchmark/.opencode/agent/researcher.md +0 -80
  264. package/refs/vbenchmark/.opencode/agent/reviewer.md +0 -184
  265. package/refs/vbenchmark/.opencode/agent/tester.md +0 -170
  266. package/refs/vbenchmark/.opencode/command/commit.md +0 -29
  267. package/refs/vbenchmark/.opencode/command/debug.md +0 -59
  268. package/refs/vbenchmark/.opencode/command/document.md +0 -52
  269. package/refs/vbenchmark/.opencode/command/gather-context.md +0 -58
  270. package/refs/vbenchmark/.opencode/command/init.md +0 -56
  271. package/refs/vbenchmark/.opencode/command/preset-help.md +0 -50
  272. package/refs/vbenchmark/.opencode/command/refactor.md +0 -71
  273. package/refs/vbenchmark/.opencode/command/research.md +0 -37
  274. package/refs/vbenchmark/.opencode/command/review.md +0 -38
  275. package/refs/vbenchmark/.opencode/command/test.md +0 -61
  276. package/refs/vbenchmark/.trae/project_rules.md +0 -73
  277. package/refs/vbenchmark/.windsurf/rules/rules.md +0 -85
  278. package/refs/vbenchmark/AGENTS.md +0 -73
  279. package/refs/vbenchmark/CONTRIBUTING.md +0 -332
  280. package/refs/vbenchmark/Caddyfile +0 -3
  281. package/refs/vbenchmark/LICENSE +0 -47
  282. package/refs/vbenchmark/README.md +0 -354
  283. package/refs/vbenchmark/docker-compose.prod.yaml +0 -35
  284. package/refs/vbenchmark/docker-compose.yaml +0 -53
  285. package/refs/vbenchmark/docs/TASK_EXPANSION_PLAN.md +0 -211
  286. package/refs/vbenchmark/docs/THESIS.md +0 -441
  287. package/refs/vbenchmark/docs/categories/code-evolution.md +0 -138
  288. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/design.md +0 -111
  289. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/proposal.md +0 -15
  290. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/evaluation/spec.md +0 -105
  291. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/leaderboard/spec.md +0 -68
  292. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/task-definition/spec.md +0 -45
  293. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/task-runner/spec.md +0 -49
  294. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/tasks.md +0 -413
  295. package/refs/vbenchmark/package.json +0 -51
  296. package/refs/vbenchmark/packages/cli/eslint.config.js +0 -16
  297. package/refs/vbenchmark/packages/cli/package.json +0 -35
  298. package/refs/vbenchmark/packages/cli/src/agents/index.ts +0 -655
  299. package/refs/vbenchmark/packages/cli/src/commands/eval.ts +0 -197
  300. package/refs/vbenchmark/packages/cli/src/commands/list.ts +0 -63
  301. package/refs/vbenchmark/packages/cli/src/commands/run.ts +0 -147
  302. package/refs/vbenchmark/packages/cli/src/evaluator.ts +0 -125
  303. package/refs/vbenchmark/packages/cli/src/index.ts +0 -21
  304. package/refs/vbenchmark/packages/cli/src/lib/task-variation.ts +0 -153
  305. package/refs/vbenchmark/packages/cli/src/loader.ts +0 -258
  306. package/refs/vbenchmark/packages/cli/src/reporter.ts +0 -222
  307. package/refs/vbenchmark/packages/cli/src/runtime/docker.ts +0 -385
  308. package/refs/vbenchmark/packages/cli/tsconfig.json +0 -8
  309. package/refs/vbenchmark/packages/dashboard/Dockerfile +0 -42
  310. package/refs/vbenchmark/packages/dashboard/index.html +0 -21
  311. package/refs/vbenchmark/packages/dashboard/package.json +0 -29
  312. package/refs/vbenchmark/packages/dashboard/postcss.config.js +0 -6
  313. package/refs/vbenchmark/packages/dashboard/public/favicon.svg +0 -24
  314. package/refs/vbenchmark/packages/dashboard/public/logo.png +0 -0
  315. package/refs/vbenchmark/packages/dashboard/public/logo.svg +0 -39
  316. package/refs/vbenchmark/packages/dashboard/src/App.tsx +0 -1468
  317. package/refs/vbenchmark/packages/dashboard/src/data/category-performance.json +0 -1
  318. package/refs/vbenchmark/packages/dashboard/src/data/leaderboard.json +0 -1
  319. package/refs/vbenchmark/packages/dashboard/src/data/task-results.json +0 -1
  320. package/refs/vbenchmark/packages/dashboard/src/data/tasks.json +0 -1
  321. package/refs/vbenchmark/packages/dashboard/src/index.css +0 -3
  322. package/refs/vbenchmark/packages/dashboard/src/main.tsx +0 -13
  323. package/refs/vbenchmark/packages/dashboard/src/vite-env.d.ts +0 -9
  324. package/refs/vbenchmark/packages/dashboard/tailwind.config.js +0 -11
  325. package/refs/vbenchmark/packages/dashboard/tsconfig.json +0 -21
  326. package/refs/vbenchmark/packages/dashboard/tsconfig.node.json +0 -11
  327. package/refs/vbenchmark/packages/dashboard/vercel.json +0 -6
  328. package/refs/vbenchmark/packages/dashboard/vite.config.ts +0 -28
  329. package/refs/vbenchmark/packages/evaluator/eslint.config.js +0 -16
  330. package/refs/vbenchmark/packages/evaluator/package.json +0 -24
  331. package/refs/vbenchmark/packages/evaluator/src/index.ts +0 -15
  332. package/refs/vbenchmark/packages/evaluator/src/runners/functional.ts +0 -88
  333. package/refs/vbenchmark/packages/evaluator/src/runners/quality.ts +0 -140
  334. package/refs/vbenchmark/packages/evaluator/src/runners/security.ts +0 -94
  335. package/refs/vbenchmark/packages/evaluator/src/runners/visual.ts +0 -108
  336. package/refs/vbenchmark/packages/evaluator/src/types.d.ts +0 -19
  337. package/refs/vbenchmark/packages/evaluator/tsconfig.json +0 -8
  338. package/refs/vbenchmark/packages/leaderboard/Dockerfile +0 -38
  339. package/refs/vbenchmark/packages/leaderboard/drizzle.config.ts +0 -10
  340. package/refs/vbenchmark/packages/leaderboard/eslint.config.js +0 -16
  341. package/refs/vbenchmark/packages/leaderboard/fly.toml +0 -29
  342. package/refs/vbenchmark/packages/leaderboard/package.json +0 -36
  343. package/refs/vbenchmark/packages/leaderboard/src/app.ts +0 -29
  344. package/refs/vbenchmark/packages/leaderboard/src/components/BrowserPreview.tsx +0 -190
  345. package/refs/vbenchmark/packages/leaderboard/src/components/ComparisonView.tsx +0 -205
  346. package/refs/vbenchmark/packages/leaderboard/src/components/LeaderboardTable.tsx +0 -150
  347. package/refs/vbenchmark/packages/leaderboard/src/components/LiveRunCard.tsx +0 -133
  348. package/refs/vbenchmark/packages/leaderboard/src/components/SubmissionForm.tsx +0 -406
  349. package/refs/vbenchmark/packages/leaderboard/src/components/SubmitForm.tsx +0 -293
  350. package/refs/vbenchmark/packages/leaderboard/src/components/TerminalStream.tsx +0 -111
  351. package/refs/vbenchmark/packages/leaderboard/src/config/pricing.ts +0 -206
  352. package/refs/vbenchmark/packages/leaderboard/src/db/index.ts +0 -31
  353. package/refs/vbenchmark/packages/leaderboard/src/db/schema.ts +0 -125
  354. package/refs/vbenchmark/packages/leaderboard/src/index.ts +0 -13
  355. package/refs/vbenchmark/packages/leaderboard/src/lib/websocket.ts +0 -124
  356. package/refs/vbenchmark/packages/leaderboard/src/routes/leaderboard.ts +0 -698
  357. package/refs/vbenchmark/packages/leaderboard/src/routes/live.ts +0 -175
  358. package/refs/vbenchmark/packages/leaderboard/src/routes/submissions.ts +0 -183
  359. package/refs/vbenchmark/packages/leaderboard/src/routes/tasks.ts +0 -215
  360. package/refs/vbenchmark/packages/leaderboard/tests/api.test.ts +0 -228
  361. package/refs/vbenchmark/packages/leaderboard/tsconfig.json +0 -9
  362. package/refs/vbenchmark/scripts/deploy.sh +0 -70
  363. package/refs/vbenchmark/tasks/ai-integration/advanced/context-management/PROMPT.md +0 -15
  364. package/refs/vbenchmark/tasks/ai-integration/advanced/context-management/task.yaml +0 -16
  365. package/refs/vbenchmark/tasks/ai-integration/advanced/evaluation-framework/PROMPT.md +0 -15
  366. package/refs/vbenchmark/tasks/ai-integration/advanced/evaluation-framework/task.yaml +0 -16
  367. package/refs/vbenchmark/tasks/ai-integration/advanced/guardrails-safety/PROMPT.md +0 -15
  368. package/refs/vbenchmark/tasks/ai-integration/advanced/guardrails-safety/task.yaml +0 -16
  369. package/refs/vbenchmark/tasks/ai-integration/advanced/memory-system/PROMPT.md +0 -15
  370. package/refs/vbenchmark/tasks/ai-integration/advanced/memory-system/task.yaml +0 -16
  371. package/refs/vbenchmark/tasks/ai-integration/advanced/model-routing/PROMPT.md +0 -15
  372. package/refs/vbenchmark/tasks/ai-integration/advanced/model-routing/task.yaml +0 -16
  373. package/refs/vbenchmark/tasks/ai-integration/advanced/multi-agent-system/PROMPT.md +0 -15
  374. package/refs/vbenchmark/tasks/ai-integration/advanced/multi-agent-system/task.yaml +0 -16
  375. package/refs/vbenchmark/tasks/ai-integration/advanced/prompt-optimization/PROMPT.md +0 -15
  376. package/refs/vbenchmark/tasks/ai-integration/advanced/prompt-optimization/task.yaml +0 -16
  377. package/refs/vbenchmark/tasks/ai-integration/advanced/reasoning-chain/PROMPT.md +0 -15
  378. package/refs/vbenchmark/tasks/ai-integration/advanced/reasoning-chain/task.yaml +0 -16
  379. package/refs/vbenchmark/tasks/ai-integration/advanced/streaming-pipeline/PROMPT.md +0 -15
  380. package/refs/vbenchmark/tasks/ai-integration/advanced/streaming-pipeline/task.yaml +0 -16
  381. package/refs/vbenchmark/tasks/ai-integration/advanced/tool-use-orchestration/PROMPT.md +0 -15
  382. package/refs/vbenchmark/tasks/ai-integration/advanced/tool-use-orchestration/task.yaml +0 -16
  383. package/refs/vbenchmark/tasks/ai-integration/agents/code-review-agent/PROMPT.md +0 -64
  384. package/refs/vbenchmark/tasks/ai-integration/agents/code-review-agent/task.yaml +0 -24
  385. package/refs/vbenchmark/tasks/ai-integration/agents/research-agent/PROMPT.md +0 -61
  386. package/refs/vbenchmark/tasks/ai-integration/agents/research-agent/task.yaml +0 -24
  387. package/refs/vbenchmark/tasks/ai-integration/agents/web-scraper-agent/PROMPT.md +0 -57
  388. package/refs/vbenchmark/tasks/ai-integration/agents/web-scraper-agent/task.yaml +0 -24
  389. package/refs/vbenchmark/tasks/ai-integration/embeddings/duplicate-detection/PROMPT.md +0 -50
  390. package/refs/vbenchmark/tasks/ai-integration/embeddings/duplicate-detection/task.yaml +0 -24
  391. package/refs/vbenchmark/tasks/ai-integration/embeddings/recommendation-engine/PROMPT.md +0 -51
  392. package/refs/vbenchmark/tasks/ai-integration/embeddings/recommendation-engine/task.yaml +0 -24
  393. package/refs/vbenchmark/tasks/ai-integration/embeddings/semantic-search/PROMPT.md +0 -50
  394. package/refs/vbenchmark/tasks/ai-integration/embeddings/semantic-search/task.yaml +0 -24
  395. package/refs/vbenchmark/tasks/ai-integration/fine-tuning/classification-model/PROMPT.md +0 -50
  396. package/refs/vbenchmark/tasks/ai-integration/fine-tuning/classification-model/task.yaml +0 -24
  397. package/refs/vbenchmark/tasks/ai-integration/function-calling/api-orchestrator/PROMPT.md +0 -60
  398. package/refs/vbenchmark/tasks/ai-integration/function-calling/api-orchestrator/task.yaml +0 -24
  399. package/refs/vbenchmark/tasks/ai-integration/function-calling/calendar-assistant/PROMPT.md +0 -50
  400. package/refs/vbenchmark/tasks/ai-integration/function-calling/calendar-assistant/task.yaml +0 -24
  401. package/refs/vbenchmark/tasks/ai-integration/function-calling/database-query/PROMPT.md +0 -62
  402. package/refs/vbenchmark/tasks/ai-integration/function-calling/database-query/task.yaml +0 -24
  403. package/refs/vbenchmark/tasks/ai-integration/multimodal/chart-interpreter/PROMPT.md +0 -60
  404. package/refs/vbenchmark/tasks/ai-integration/multimodal/chart-interpreter/task.yaml +0 -24
  405. package/refs/vbenchmark/tasks/ai-integration/multimodal/image-captioning/PROMPT.md +0 -49
  406. package/refs/vbenchmark/tasks/ai-integration/multimodal/image-captioning/task.yaml +0 -24
  407. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/code-assistant/PROMPT.md +0 -51
  408. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/code-assistant/task.yaml +0 -24
  409. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/doc-search/PROMPT.md +0 -51
  410. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/doc-search/task.yaml +0 -24
  411. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/PROMPT.md +0 -76
  412. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/docker-compose.yaml +0 -30
  413. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/task.yaml +0 -30
  414. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/tests/functional/qa.test.py +0 -146
  415. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/support-bot/PROMPT.md +0 -51
  416. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/support-bot/task.yaml +0 -24
  417. package/refs/vbenchmark/tasks/ai-integration/structured-output/contract-analyzer/PROMPT.md +0 -67
  418. package/refs/vbenchmark/tasks/ai-integration/structured-output/contract-analyzer/task.yaml +0 -24
  419. package/refs/vbenchmark/tasks/ai-integration/structured-output/invoice-parser/PROMPT.md +0 -61
  420. package/refs/vbenchmark/tasks/ai-integration/structured-output/invoice-parser/task.yaml +0 -27
  421. package/refs/vbenchmark/tasks/ai-integration/structured-output/receipt-scanner/PROMPT.md +0 -65
  422. package/refs/vbenchmark/tasks/ai-integration/structured-output/receipt-scanner/task.yaml +0 -24
  423. package/refs/vbenchmark/tasks/ai-integration/structured-output/resume-parser/PROMPT.md +0 -70
  424. package/refs/vbenchmark/tasks/ai-integration/structured-output/resume-parser/task.yaml +0 -24
  425. package/refs/vbenchmark/tasks/api-integrations/advanced/api-analytics/PROMPT.md +0 -15
  426. package/refs/vbenchmark/tasks/api-integrations/advanced/api-analytics/task.yaml +0 -16
  427. package/refs/vbenchmark/tasks/api-integrations/advanced/api-gateway/PROMPT.md +0 -15
  428. package/refs/vbenchmark/tasks/api-integrations/advanced/api-gateway/task.yaml +0 -16
  429. package/refs/vbenchmark/tasks/api-integrations/advanced/api-mocking/PROMPT.md +0 -15
  430. package/refs/vbenchmark/tasks/api-integrations/advanced/api-mocking/task.yaml +0 -16
  431. package/refs/vbenchmark/tasks/api-integrations/advanced/contract-testing/PROMPT.md +0 -15
  432. package/refs/vbenchmark/tasks/api-integrations/advanced/contract-testing/task.yaml +0 -16
  433. package/refs/vbenchmark/tasks/api-integrations/advanced/graphql-federation/PROMPT.md +0 -15
  434. package/refs/vbenchmark/tasks/api-integrations/advanced/graphql-federation/task.yaml +0 -16
  435. package/refs/vbenchmark/tasks/api-integrations/advanced/grpc-gateway/PROMPT.md +0 -15
  436. package/refs/vbenchmark/tasks/api-integrations/advanced/grpc-gateway/task.yaml +0 -16
  437. package/refs/vbenchmark/tasks/api-integrations/advanced/rate-limiter/PROMPT.md +0 -15
  438. package/refs/vbenchmark/tasks/api-integrations/advanced/rate-limiter/task.yaml +0 -16
  439. package/refs/vbenchmark/tasks/api-integrations/advanced/request-validator/PROMPT.md +0 -15
  440. package/refs/vbenchmark/tasks/api-integrations/advanced/request-validator/task.yaml +0 -16
  441. package/refs/vbenchmark/tasks/api-integrations/advanced/sdk-generator/PROMPT.md +0 -15
  442. package/refs/vbenchmark/tasks/api-integrations/advanced/sdk-generator/task.yaml +0 -16
  443. package/refs/vbenchmark/tasks/api-integrations/advanced/webhook-processor/PROMPT.md +0 -15
  444. package/refs/vbenchmark/tasks/api-integrations/advanced/webhook-processor/task.yaml +0 -16
  445. package/refs/vbenchmark/tasks/api-integrations/analytics/mixpanel-events/PROMPT.md +0 -42
  446. package/refs/vbenchmark/tasks/api-integrations/analytics/mixpanel-events/task.yaml +0 -24
  447. package/refs/vbenchmark/tasks/api-integrations/analytics/segment-tracking/PROMPT.md +0 -42
  448. package/refs/vbenchmark/tasks/api-integrations/analytics/segment-tracking/task.yaml +0 -24
  449. package/refs/vbenchmark/tasks/api-integrations/auth-provider/oauth2-github/PROMPT.md +0 -42
  450. package/refs/vbenchmark/tasks/api-integrations/auth-provider/oauth2-github/task.yaml +0 -24
  451. package/refs/vbenchmark/tasks/api-integrations/auth-provider/okta-integration/PROMPT.md +0 -44
  452. package/refs/vbenchmark/tasks/api-integrations/auth-provider/okta-integration/task.yaml +0 -24
  453. package/refs/vbenchmark/tasks/api-integrations/auth-provider/saml-sso/PROMPT.md +0 -42
  454. package/refs/vbenchmark/tasks/api-integrations/auth-provider/saml-sso/task.yaml +0 -24
  455. package/refs/vbenchmark/tasks/api-integrations/communication/discord-webhook/PROMPT.md +0 -44
  456. package/refs/vbenchmark/tasks/api-integrations/communication/discord-webhook/task.yaml +0 -24
  457. package/refs/vbenchmark/tasks/api-integrations/communication/slack-bot/PROMPT.md +0 -42
  458. package/refs/vbenchmark/tasks/api-integrations/communication/slack-bot/task.yaml +0 -24
  459. package/refs/vbenchmark/tasks/api-integrations/communication/twilio-sms/PROMPT.md +0 -42
  460. package/refs/vbenchmark/tasks/api-integrations/communication/twilio-sms/task.yaml +0 -24
  461. package/refs/vbenchmark/tasks/api-integrations/email/transactional/PROMPT.md +0 -82
  462. package/refs/vbenchmark/tasks/api-integrations/email/transactional/task.yaml +0 -27
  463. package/refs/vbenchmark/tasks/api-integrations/maps/google-maps-geocoding/PROMPT.md +0 -41
  464. package/refs/vbenchmark/tasks/api-integrations/maps/google-maps-geocoding/task.yaml +0 -24
  465. package/refs/vbenchmark/tasks/api-integrations/maps/mapbox-directions/PROMPT.md +0 -41
  466. package/refs/vbenchmark/tasks/api-integrations/maps/mapbox-directions/task.yaml +0 -24
  467. package/refs/vbenchmark/tasks/api-integrations/payment/crypto-payments/PROMPT.md +0 -43
  468. package/refs/vbenchmark/tasks/api-integrations/payment/crypto-payments/task.yaml +0 -24
  469. package/refs/vbenchmark/tasks/api-integrations/payment/paypal-integration/PROMPT.md +0 -41
  470. package/refs/vbenchmark/tasks/api-integrations/payment/paypal-integration/task.yaml +0 -24
  471. package/refs/vbenchmark/tasks/api-integrations/social/twitter-api/PROMPT.md +0 -41
  472. package/refs/vbenchmark/tasks/api-integrations/social/twitter-api/task.yaml +0 -24
  473. package/refs/vbenchmark/tasks/api-integrations/storage/cloudinary-upload/PROMPT.md +0 -43
  474. package/refs/vbenchmark/tasks/api-integrations/storage/cloudinary-upload/task.yaml +0 -24
  475. package/refs/vbenchmark/tasks/api-integrations/storage/gcs-streaming/PROMPT.md +0 -43
  476. package/refs/vbenchmark/tasks/api-integrations/storage/gcs-streaming/task.yaml +0 -24
  477. package/refs/vbenchmark/tasks/api-integrations/storage/s3-presigned-urls/PROMPT.md +0 -41
  478. package/refs/vbenchmark/tasks/api-integrations/storage/s3-presigned-urls/task.yaml +0 -24
  479. package/refs/vbenchmark/tasks/api-integrations/stripe/checkout-session/PROMPT.md +0 -41
  480. package/refs/vbenchmark/tasks/api-integrations/stripe/checkout-session/task.yaml +0 -24
  481. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/PROMPT.md +0 -60
  482. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/docker-compose.yaml +0 -38
  483. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/task.yaml +0 -31
  484. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/tests/webhook.test.ts +0 -193
  485. package/refs/vbenchmark/tasks/api-integrations/stripe/subscription-portal/PROMPT.md +0 -41
  486. package/refs/vbenchmark/tasks/api-integrations/stripe/subscription-portal/task.yaml +0 -24
  487. package/refs/vbenchmark/tasks/code-evolution/advanced/api-deprecation/PROMPT.md +0 -15
  488. package/refs/vbenchmark/tasks/code-evolution/advanced/api-deprecation/task.yaml +0 -16
  489. package/refs/vbenchmark/tasks/code-evolution/advanced/ast-refactoring/PROMPT.md +0 -15
  490. package/refs/vbenchmark/tasks/code-evolution/advanced/ast-refactoring/task.yaml +0 -16
  491. package/refs/vbenchmark/tasks/code-evolution/advanced/concurrency-fix/PROMPT.md +0 -15
  492. package/refs/vbenchmark/tasks/code-evolution/advanced/concurrency-fix/task.yaml +0 -16
  493. package/refs/vbenchmark/tasks/code-evolution/advanced/database-schema-migration/PROMPT.md +0 -15
  494. package/refs/vbenchmark/tasks/code-evolution/advanced/database-schema-migration/task.yaml +0 -16
  495. package/refs/vbenchmark/tasks/code-evolution/advanced/dead-code-elimination/PROMPT.md +0 -15
  496. package/refs/vbenchmark/tasks/code-evolution/advanced/dead-code-elimination/task.yaml +0 -16
  497. package/refs/vbenchmark/tasks/code-evolution/advanced/dependency-upgrade/PROMPT.md +0 -15
  498. package/refs/vbenchmark/tasks/code-evolution/advanced/dependency-upgrade/task.yaml +0 -16
  499. package/refs/vbenchmark/tasks/code-evolution/advanced/memory-optimization/PROMPT.md +0 -15
  500. package/refs/vbenchmark/tasks/code-evolution/advanced/memory-optimization/task.yaml +0 -16
  501. package/refs/vbenchmark/tasks/code-evolution/advanced/monorepo-extraction/PROMPT.md +0 -15
  502. package/refs/vbenchmark/tasks/code-evolution/advanced/monorepo-extraction/task.yaml +0 -16
  503. package/refs/vbenchmark/tasks/code-evolution/advanced/performance-profiling/PROMPT.md +0 -15
  504. package/refs/vbenchmark/tasks/code-evolution/advanced/performance-profiling/task.yaml +0 -16
  505. package/refs/vbenchmark/tasks/code-evolution/advanced/type-migration/PROMPT.md +0 -15
  506. package/refs/vbenchmark/tasks/code-evolution/advanced/type-migration/task.yaml +0 -16
  507. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/callback-to-async/PROMPT.md +0 -47
  508. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/callback-to-async/task.yaml +0 -24
  509. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/PROMPT.md +0 -49
  510. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/base-code/src/app.ts +0 -22
  511. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/task.yaml +0 -37
  512. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/tests/api.test.ts +0 -70
  513. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/flask-to-fastapi/PROMPT.md +0 -46
  514. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/flask-to-fastapi/task.yaml +0 -24
  515. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/java-to-kotlin/PROMPT.md +0 -45
  516. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/java-to-kotlin/task.yaml +0 -24
  517. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/jquery-to-react/PROMPT.md +0 -47
  518. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/jquery-to-react/task.yaml +0 -24
  519. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/rest-to-grpc/PROMPT.md +0 -47
  520. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/rest-to-grpc/task.yaml +0 -24
  521. package/refs/vbenchmark/tasks/code-evolution/performance/async-refactor/PROMPT.md +0 -47
  522. package/refs/vbenchmark/tasks/code-evolution/performance/async-refactor/task.yaml +0 -24
  523. package/refs/vbenchmark/tasks/code-evolution/performance/memory-leak-fix/PROMPT.md +0 -47
  524. package/refs/vbenchmark/tasks/code-evolution/performance/memory-leak-fix/task.yaml +0 -24
  525. package/refs/vbenchmark/tasks/code-evolution/performance/query-optimization/PROMPT.md +0 -49
  526. package/refs/vbenchmark/tasks/code-evolution/performance/query-optimization/task.yaml +0 -24
  527. package/refs/vbenchmark/tasks/code-evolution/refactoring/class-to-hooks/PROMPT.md +0 -96
  528. package/refs/vbenchmark/tasks/code-evolution/refactoring/class-to-hooks/task.yaml +0 -27
  529. package/refs/vbenchmark/tasks/code-evolution/refactoring/dependency-injection/PROMPT.md +0 -47
  530. package/refs/vbenchmark/tasks/code-evolution/refactoring/dependency-injection/task.yaml +0 -24
  531. package/refs/vbenchmark/tasks/code-evolution/refactoring/error-handling/PROMPT.md +0 -48
  532. package/refs/vbenchmark/tasks/code-evolution/refactoring/error-handling/task.yaml +0 -24
  533. package/refs/vbenchmark/tasks/code-evolution/refactoring/monolith-to-modules/PROMPT.md +0 -50
  534. package/refs/vbenchmark/tasks/code-evolution/refactoring/monolith-to-modules/task.yaml +0 -24
  535. package/refs/vbenchmark/tasks/code-evolution/refactoring/orm-migration/PROMPT.md +0 -47
  536. package/refs/vbenchmark/tasks/code-evolution/refactoring/orm-migration/task.yaml +0 -24
  537. package/refs/vbenchmark/tasks/code-evolution/security/secrets-rotation/PROMPT.md +0 -49
  538. package/refs/vbenchmark/tasks/code-evolution/security/secrets-rotation/task.yaml +0 -24
  539. package/refs/vbenchmark/tasks/code-evolution/security/sql-injection-fix/PROMPT.md +0 -50
  540. package/refs/vbenchmark/tasks/code-evolution/security/sql-injection-fix/task.yaml +0 -24
  541. package/refs/vbenchmark/tasks/code-evolution/security/xss-prevention/PROMPT.md +0 -47
  542. package/refs/vbenchmark/tasks/code-evolution/security/xss-prevention/task.yaml +0 -24
  543. package/refs/vbenchmark/tasks/code-evolution/testing/add-unit-tests/PROMPT.md +0 -48
  544. package/refs/vbenchmark/tasks/code-evolution/testing/add-unit-tests/task.yaml +0 -24
  545. package/refs/vbenchmark/tasks/code-evolution/testing/e2e-playwright/PROMPT.md +0 -50
  546. package/refs/vbenchmark/tasks/code-evolution/testing/e2e-playwright/task.yaml +0 -24
  547. package/refs/vbenchmark/tasks/code-evolution/testing/pytest-fixtures/PROMPT.md +0 -47
  548. package/refs/vbenchmark/tasks/code-evolution/testing/pytest-fixtures/task.yaml +0 -24
  549. package/refs/vbenchmark/tasks/frontend/accessibility/keyboard-shortcuts/PROMPT.md +0 -44
  550. package/refs/vbenchmark/tasks/frontend/accessibility/keyboard-shortcuts/task.yaml +0 -24
  551. package/refs/vbenchmark/tasks/frontend/accessibility/screen-reader-nav/PROMPT.md +0 -44
  552. package/refs/vbenchmark/tasks/frontend/accessibility/screen-reader-nav/task.yaml +0 -24
  553. package/refs/vbenchmark/tasks/frontend/advanced/canvas-editor/PROMPT.md +0 -15
  554. package/refs/vbenchmark/tasks/frontend/advanced/canvas-editor/task.yaml +0 -16
  555. package/refs/vbenchmark/tasks/frontend/advanced/micro-frontend/PROMPT.md +0 -15
  556. package/refs/vbenchmark/tasks/frontend/advanced/micro-frontend/task.yaml +0 -16
  557. package/refs/vbenchmark/tasks/frontend/advanced/offline-first/PROMPT.md +0 -15
  558. package/refs/vbenchmark/tasks/frontend/advanced/offline-first/task.yaml +0 -16
  559. package/refs/vbenchmark/tasks/frontend/advanced/realtime-collab/PROMPT.md +0 -15
  560. package/refs/vbenchmark/tasks/frontend/advanced/realtime-collab/task.yaml +0 -16
  561. package/refs/vbenchmark/tasks/frontend/advanced/service-worker/PROMPT.md +0 -15
  562. package/refs/vbenchmark/tasks/frontend/advanced/service-worker/task.yaml +0 -16
  563. package/refs/vbenchmark/tasks/frontend/advanced/state-machine/PROMPT.md +0 -15
  564. package/refs/vbenchmark/tasks/frontend/advanced/state-machine/task.yaml +0 -16
  565. package/refs/vbenchmark/tasks/frontend/advanced/virtual-list/PROMPT.md +0 -15
  566. package/refs/vbenchmark/tasks/frontend/advanced/virtual-list/task.yaml +0 -16
  567. package/refs/vbenchmark/tasks/frontend/advanced/wasm-integration/PROMPT.md +0 -15
  568. package/refs/vbenchmark/tasks/frontend/advanced/wasm-integration/task.yaml +0 -16
  569. package/refs/vbenchmark/tasks/frontend/advanced/web-worker/PROMPT.md +0 -15
  570. package/refs/vbenchmark/tasks/frontend/advanced/web-worker/task.yaml +0 -16
  571. package/refs/vbenchmark/tasks/frontend/advanced/webgl-visualization/PROMPT.md +0 -15
  572. package/refs/vbenchmark/tasks/frontend/advanced/webgl-visualization/task.yaml +0 -16
  573. package/refs/vbenchmark/tasks/frontend/animation/page-transitions/PROMPT.md +0 -44
  574. package/refs/vbenchmark/tasks/frontend/animation/page-transitions/task.yaml +0 -24
  575. package/refs/vbenchmark/tasks/frontend/components/data-grid/PROMPT.md +0 -59
  576. package/refs/vbenchmark/tasks/frontend/components/data-grid/task.yaml +0 -24
  577. package/refs/vbenchmark/tasks/frontend/components/date-range-picker/PROMPT.md +0 -57
  578. package/refs/vbenchmark/tasks/frontend/components/date-range-picker/task.yaml +0 -24
  579. package/refs/vbenchmark/tasks/frontend/components/file-uploader/PROMPT.md +0 -55
  580. package/refs/vbenchmark/tasks/frontend/components/file-uploader/task.yaml +0 -24
  581. package/refs/vbenchmark/tasks/frontend/components/form-builder/PROMPT.md +0 -96
  582. package/refs/vbenchmark/tasks/frontend/components/form-builder/task.yaml +0 -28
  583. package/refs/vbenchmark/tasks/frontend/components/rich-text-editor/PROMPT.md +0 -45
  584. package/refs/vbenchmark/tasks/frontend/components/rich-text-editor/task.yaml +0 -24
  585. package/refs/vbenchmark/tasks/frontend/figma-to-code/dashboard-layout/PROMPT.md +0 -50
  586. package/refs/vbenchmark/tasks/frontend/figma-to-code/dashboard-layout/task.yaml +0 -25
  587. package/refs/vbenchmark/tasks/frontend/figma-to-code/landing-page/PROMPT.md +0 -49
  588. package/refs/vbenchmark/tasks/frontend/figma-to-code/landing-page/task.yaml +0 -25
  589. package/refs/vbenchmark/tasks/frontend/figma-to-code/mobile-app-screen/PROMPT.md +0 -51
  590. package/refs/vbenchmark/tasks/frontend/figma-to-code/mobile-app-screen/task.yaml +0 -24
  591. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/PROMPT.md +0 -93
  592. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/docker-compose.yaml +0 -23
  593. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/task.yaml +0 -30
  594. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/tests/visual/diff.test.ts +0 -107
  595. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/tests/visual/interaction.test.ts +0 -88
  596. package/refs/vbenchmark/tasks/frontend/performance/image-lazy-load/PROMPT.md +0 -43
  597. package/refs/vbenchmark/tasks/frontend/performance/image-lazy-load/task.yaml +0 -24
  598. package/refs/vbenchmark/tasks/frontend/performance/infinite-scroll/PROMPT.md +0 -44
  599. package/refs/vbenchmark/tasks/frontend/performance/infinite-scroll/task.yaml +0 -24
  600. package/refs/vbenchmark/tasks/frontend/state-management/collaborative-editor/PROMPT.md +0 -44
  601. package/refs/vbenchmark/tasks/frontend/state-management/collaborative-editor/task.yaml +0 -24
  602. package/refs/vbenchmark/tasks/frontend/state-management/shopping-cart/PROMPT.md +0 -53
  603. package/refs/vbenchmark/tasks/frontend/state-management/shopping-cart/task.yaml +0 -24
  604. package/refs/vbenchmark/tasks/frontend/visualization/chart-dashboard/PROMPT.md +0 -83
  605. package/refs/vbenchmark/tasks/frontend/visualization/chart-dashboard/task.yaml +0 -28
  606. package/refs/vbenchmark/tasks/frontend/visualization/gantt-chart/PROMPT.md +0 -57
  607. package/refs/vbenchmark/tasks/frontend/visualization/gantt-chart/task.yaml +0 -24
  608. package/refs/vbenchmark/tasks/frontend/visualization/map-dashboard/PROMPT.md +0 -44
  609. package/refs/vbenchmark/tasks/frontend/visualization/map-dashboard/task.yaml +0 -24
  610. package/refs/vbenchmark/tasks/frontend/visualization/realtime-charts/PROMPT.md +0 -43
  611. package/refs/vbenchmark/tasks/frontend/visualization/realtime-charts/task.yaml +0 -24
  612. package/refs/vbenchmark/tasks/glue-code/advanced/blue-green-deploy/PROMPT.md +0 -15
  613. package/refs/vbenchmark/tasks/glue-code/advanced/blue-green-deploy/task.yaml +0 -16
  614. package/refs/vbenchmark/tasks/glue-code/advanced/canary-release/PROMPT.md +0 -15
  615. package/refs/vbenchmark/tasks/glue-code/advanced/canary-release/task.yaml +0 -16
  616. package/refs/vbenchmark/tasks/glue-code/advanced/change-data-capture/PROMPT.md +0 -15
  617. package/refs/vbenchmark/tasks/glue-code/advanced/change-data-capture/task.yaml +0 -16
  618. package/refs/vbenchmark/tasks/glue-code/advanced/config-management/PROMPT.md +0 -15
  619. package/refs/vbenchmark/tasks/glue-code/advanced/config-management/task.yaml +0 -16
  620. package/refs/vbenchmark/tasks/glue-code/advanced/data-pipeline/PROMPT.md +0 -15
  621. package/refs/vbenchmark/tasks/glue-code/advanced/data-pipeline/task.yaml +0 -16
  622. package/refs/vbenchmark/tasks/glue-code/advanced/distributed-tracing/PROMPT.md +0 -15
  623. package/refs/vbenchmark/tasks/glue-code/advanced/distributed-tracing/task.yaml +0 -16
  624. package/refs/vbenchmark/tasks/glue-code/advanced/log-aggregation/PROMPT.md +0 -15
  625. package/refs/vbenchmark/tasks/glue-code/advanced/log-aggregation/task.yaml +0 -16
  626. package/refs/vbenchmark/tasks/glue-code/advanced/schema-registry/PROMPT.md +0 -15
  627. package/refs/vbenchmark/tasks/glue-code/advanced/schema-registry/task.yaml +0 -16
  628. package/refs/vbenchmark/tasks/glue-code/advanced/secret-rotation/PROMPT.md +0 -15
  629. package/refs/vbenchmark/tasks/glue-code/advanced/secret-rotation/task.yaml +0 -16
  630. package/refs/vbenchmark/tasks/glue-code/advanced/stream-processing/PROMPT.md +0 -15
  631. package/refs/vbenchmark/tasks/glue-code/advanced/stream-processing/task.yaml +0 -16
  632. package/refs/vbenchmark/tasks/glue-code/api-sync/rest-to-graphql/PROMPT.md +0 -66
  633. package/refs/vbenchmark/tasks/glue-code/api-sync/rest-to-graphql/task.yaml +0 -27
  634. package/refs/vbenchmark/tasks/glue-code/caching/redis-cache/PROMPT.md +0 -82
  635. package/refs/vbenchmark/tasks/glue-code/caching/redis-cache/task.yaml +0 -27
  636. package/refs/vbenchmark/tasks/glue-code/data-transform/avro-schema-evolution/PROMPT.md +0 -51
  637. package/refs/vbenchmark/tasks/glue-code/data-transform/avro-schema-evolution/task.yaml +0 -24
  638. package/refs/vbenchmark/tasks/glue-code/data-transform/csv-normalizer/PROMPT.md +0 -49
  639. package/refs/vbenchmark/tasks/glue-code/data-transform/csv-normalizer/task.yaml +0 -24
  640. package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/PROMPT.md +0 -67
  641. package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/task.yaml +0 -28
  642. package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/tests/transform.test.py +0 -137
  643. package/refs/vbenchmark/tasks/glue-code/data-transform/json-to-xml/PROMPT.md +0 -45
  644. package/refs/vbenchmark/tasks/glue-code/data-transform/json-to-xml/task.yaml +0 -24
  645. package/refs/vbenchmark/tasks/glue-code/data-transform/protobuf-converter/PROMPT.md +0 -44
  646. package/refs/vbenchmark/tasks/glue-code/data-transform/protobuf-converter/task.yaml +0 -24
  647. package/refs/vbenchmark/tasks/glue-code/etl/cdc-pipeline/PROMPT.md +0 -52
  648. package/refs/vbenchmark/tasks/glue-code/etl/cdc-pipeline/task.yaml +0 -27
  649. package/refs/vbenchmark/tasks/glue-code/etl/database-sync/PROMPT.md +0 -51
  650. package/refs/vbenchmark/tasks/glue-code/etl/database-sync/task.yaml +0 -24
  651. package/refs/vbenchmark/tasks/glue-code/etl/s3-to-warehouse/PROMPT.md +0 -50
  652. package/refs/vbenchmark/tasks/glue-code/etl/s3-to-warehouse/task.yaml +0 -24
  653. package/refs/vbenchmark/tasks/glue-code/file-processing/image-resizer/PROMPT.md +0 -52
  654. package/refs/vbenchmark/tasks/glue-code/file-processing/image-resizer/task.yaml +0 -24
  655. package/refs/vbenchmark/tasks/glue-code/file-processing/pdf-merger/PROMPT.md +0 -50
  656. package/refs/vbenchmark/tasks/glue-code/file-processing/pdf-merger/task.yaml +0 -24
  657. package/refs/vbenchmark/tasks/glue-code/file-processing/video-transcoder/PROMPT.md +0 -50
  658. package/refs/vbenchmark/tasks/glue-code/file-processing/video-transcoder/task.yaml +0 -27
  659. package/refs/vbenchmark/tasks/glue-code/migration/data-backfill/PROMPT.md +0 -50
  660. package/refs/vbenchmark/tasks/glue-code/migration/data-backfill/task.yaml +0 -24
  661. package/refs/vbenchmark/tasks/glue-code/migration/database-versioning/PROMPT.md +0 -50
  662. package/refs/vbenchmark/tasks/glue-code/migration/database-versioning/task.yaml +0 -24
  663. package/refs/vbenchmark/tasks/glue-code/queue/kafka-producer/PROMPT.md +0 -49
  664. package/refs/vbenchmark/tasks/glue-code/queue/kafka-producer/task.yaml +0 -27
  665. package/refs/vbenchmark/tasks/glue-code/queue/rabbitmq-consumer/PROMPT.md +0 -50
  666. package/refs/vbenchmark/tasks/glue-code/queue/rabbitmq-consumer/task.yaml +0 -27
  667. package/refs/vbenchmark/tasks/glue-code/queue/sqs-batch-processor/PROMPT.md +0 -47
  668. package/refs/vbenchmark/tasks/glue-code/queue/sqs-batch-processor/task.yaml +0 -24
  669. package/refs/vbenchmark/tasks/glue-code/scheduler/cron-job-manager/PROMPT.md +0 -52
  670. package/refs/vbenchmark/tasks/glue-code/scheduler/cron-job-manager/task.yaml +0 -27
  671. package/refs/vbenchmark/tasks/glue-code/scheduler/delayed-tasks/PROMPT.md +0 -51
  672. package/refs/vbenchmark/tasks/glue-code/scheduler/delayed-tasks/task.yaml +0 -27
  673. package/refs/vbenchmark/tasks/saas-core/advanced/api-versioning/PROMPT.md +0 -15
  674. package/refs/vbenchmark/tasks/saas-core/advanced/api-versioning/task.yaml +0 -16
  675. package/refs/vbenchmark/tasks/saas-core/advanced/circuit-breaker/PROMPT.md +0 -13
  676. package/refs/vbenchmark/tasks/saas-core/advanced/circuit-breaker/task.yaml +0 -16
  677. package/refs/vbenchmark/tasks/saas-core/advanced/compliance-gdpr/PROMPT.md +0 -15
  678. package/refs/vbenchmark/tasks/saas-core/advanced/compliance-gdpr/task.yaml +0 -16
  679. package/refs/vbenchmark/tasks/saas-core/advanced/cqrs-pattern/PROMPT.md +0 -13
  680. package/refs/vbenchmark/tasks/saas-core/advanced/cqrs-pattern/task.yaml +0 -16
  681. package/refs/vbenchmark/tasks/saas-core/advanced/data-encryption/PROMPT.md +0 -15
  682. package/refs/vbenchmark/tasks/saas-core/advanced/data-encryption/task.yaml +0 -16
  683. package/refs/vbenchmark/tasks/saas-core/advanced/distributed-locking/PROMPT.md +0 -46
  684. package/refs/vbenchmark/tasks/saas-core/advanced/distributed-locking/task.yaml +0 -24
  685. package/refs/vbenchmark/tasks/saas-core/advanced/event-sourcing/PROMPT.md +0 -23
  686. package/refs/vbenchmark/tasks/saas-core/advanced/event-sourcing/task.yaml +0 -16
  687. package/refs/vbenchmark/tasks/saas-core/advanced/feature-flags-ab/PROMPT.md +0 -15
  688. package/refs/vbenchmark/tasks/saas-core/advanced/feature-flags-ab/task.yaml +0 -16
  689. package/refs/vbenchmark/tasks/saas-core/advanced/saga-orchestration/PROMPT.md +0 -13
  690. package/refs/vbenchmark/tasks/saas-core/advanced/saga-orchestration/task.yaml +0 -16
  691. package/refs/vbenchmark/tasks/saas-core/advanced/webhook-delivery/PROMPT.md +0 -15
  692. package/refs/vbenchmark/tasks/saas-core/advanced/webhook-delivery/task.yaml +0 -16
  693. package/refs/vbenchmark/tasks/saas-core/audit/activity-logging/PROMPT.md +0 -50
  694. package/refs/vbenchmark/tasks/saas-core/audit/activity-logging/task.yaml +0 -27
  695. package/refs/vbenchmark/tasks/saas-core/auth/jwt-refresh-tokens/PROMPT.md +0 -50
  696. package/refs/vbenchmark/tasks/saas-core/auth/jwt-refresh-tokens/task.yaml +0 -27
  697. package/refs/vbenchmark/tasks/saas-core/auth/magic-link-email/PROMPT.md +0 -53
  698. package/refs/vbenchmark/tasks/saas-core/auth/magic-link-email/task.yaml +0 -27
  699. package/refs/vbenchmark/tasks/saas-core/auth/mfa-totp/PROMPT.md +0 -79
  700. package/refs/vbenchmark/tasks/saas-core/auth/mfa-totp/task.yaml +0 -27
  701. package/refs/vbenchmark/tasks/saas-core/auth/rbac-permissions/PROMPT.md +0 -51
  702. package/refs/vbenchmark/tasks/saas-core/auth/rbac-permissions/task.yaml +0 -27
  703. package/refs/vbenchmark/tasks/saas-core/auth/session-management/PROMPT.md +0 -52
  704. package/refs/vbenchmark/tasks/saas-core/auth/session-management/task.yaml +0 -27
  705. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/PROMPT.md +0 -45
  706. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/docker-compose.yaml +0 -47
  707. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/task.yaml +0 -32
  708. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/tests/auth.test.ts +0 -59
  709. package/refs/vbenchmark/tasks/saas-core/billing/invoice-generation/PROMPT.md +0 -53
  710. package/refs/vbenchmark/tasks/saas-core/billing/invoice-generation/task.yaml +0 -27
  711. package/refs/vbenchmark/tasks/saas-core/billing/stripe-subscriptions/PROMPT.md +0 -51
  712. package/refs/vbenchmark/tasks/saas-core/billing/stripe-subscriptions/task.yaml +0 -27
  713. package/refs/vbenchmark/tasks/saas-core/billing/usage-metering/PROMPT.md +0 -52
  714. package/refs/vbenchmark/tasks/saas-core/billing/usage-metering/task.yaml +0 -27
  715. package/refs/vbenchmark/tasks/saas-core/crud/dashboard-table/PROMPT.md +0 -48
  716. package/refs/vbenchmark/tasks/saas-core/crud/dashboard-table/task.yaml +0 -28
  717. package/refs/vbenchmark/tasks/saas-core/multi-tenant/org-isolation/PROMPT.md +0 -50
  718. package/refs/vbenchmark/tasks/saas-core/multi-tenant/org-isolation/task.yaml +0 -27
  719. package/refs/vbenchmark/tasks/saas-core/multi-tenant/subdomain-routing/PROMPT.md +0 -50
  720. package/refs/vbenchmark/tasks/saas-core/multi-tenant/subdomain-routing/task.yaml +0 -27
  721. package/refs/vbenchmark/tasks/saas-core/notifications/email-queue/PROMPT.md +0 -53
  722. package/refs/vbenchmark/tasks/saas-core/notifications/email-queue/task.yaml +0 -27
  723. package/refs/vbenchmark/tasks/saas-core/notifications/in-app-alerts/PROMPT.md +0 -51
  724. package/refs/vbenchmark/tasks/saas-core/notifications/in-app-alerts/task.yaml +0 -27
  725. package/refs/vbenchmark/tasks/saas-core/notifications/push-notifications/PROMPT.md +0 -51
  726. package/refs/vbenchmark/tasks/saas-core/notifications/push-notifications/task.yaml +0 -27
  727. package/refs/vbenchmark/tasks/saas-core/realtime/websocket-chat/PROMPT.md +0 -80
  728. package/refs/vbenchmark/tasks/saas-core/realtime/websocket-chat/task.yaml +0 -27
  729. package/refs/vbenchmark/tasks/saas-core/search/full-text-search/PROMPT.md +0 -51
  730. package/refs/vbenchmark/tasks/saas-core/search/full-text-search/task.yaml +0 -27
  731. package/refs/vbenchmark/tasks/saas-core/security/rate-limiter/PROMPT.md +0 -99
  732. package/refs/vbenchmark/tasks/saas-core/security/rate-limiter/task.yaml +0 -27
  733. package/refs/vbenchmark/tasks/saas-core/settings/user-preferences/PROMPT.md +0 -78
  734. package/refs/vbenchmark/tasks/saas-core/settings/user-preferences/task.yaml +0 -27
  735. package/refs/vbenchmark/templates/fastapi-postgres/docker-compose.yaml +0 -36
  736. package/refs/vbenchmark/templates/fastapi-postgres/pyproject.toml +0 -34
  737. package/refs/vbenchmark/templates/fastapi-postgres/src/__init__.py +0 -0
  738. package/refs/vbenchmark/templates/fastapi-postgres/src/config.py +0 -12
  739. package/refs/vbenchmark/templates/fastapi-postgres/src/database.py +0 -15
  740. package/refs/vbenchmark/templates/fastapi-postgres/src/main.py +0 -51
  741. package/refs/vbenchmark/templates/fastapi-postgres/src/models.py +0 -12
  742. package/refs/vbenchmark/templates/fastapi-postgres/src/schemas.py +0 -20
  743. package/refs/vbenchmark/templates/go-fiber/docker-compose.yaml +0 -34
  744. package/refs/vbenchmark/templates/go-fiber/go.mod +0 -33
  745. package/refs/vbenchmark/templates/go-fiber/go.sum +0 -68
  746. package/refs/vbenchmark/templates/go-fiber/main.go +0 -98
  747. package/refs/vbenchmark/templates/nextjs-supabase/.env.example +0 -3
  748. package/refs/vbenchmark/templates/nextjs-supabase/docker-compose.yaml +0 -68
  749. package/refs/vbenchmark/templates/nextjs-supabase/src/app/globals.css +0 -13
  750. package/refs/vbenchmark/templates/nextjs-supabase/src/app/layout.tsx +0 -19
  751. package/refs/vbenchmark/templates/nextjs-supabase/src/app/page.tsx +0 -38
  752. package/refs/vbenchmark/templates/nextjs-supabase/src/lib/supabase/client.ts +0 -8
  753. package/refs/vbenchmark/templates/nextjs-supabase/src/lib/supabase/server.ts +0 -32
  754. package/refs/vbenchmark/templates/rust-axum/Cargo.lock +0 -2371
  755. package/refs/vbenchmark/templates/rust-axum/Cargo.toml +0 -16
  756. package/refs/vbenchmark/templates/rust-axum/docker-compose.yaml +0 -34
  757. package/refs/vbenchmark/templates/rust-axum/migrations/20240101000000_init.sql +0 -20
  758. package/refs/vbenchmark/templates/rust-axum/src/main.rs +0 -121
  759. package/refs/vbenchmark/tsconfig.base.json +0 -18
  760. package/refs/vbenchmark/turbo.json +0 -23
  761. package/refs/vbenchmark/vercel.json +0 -10
@@ -1,1468 +0,0 @@
1
- import { Routes, Route, Link, useLocation } from 'react-router-dom';
2
- import { useState, useRef, useMemo } from 'react';
3
- import {
4
- Chart as ChartJS,
5
- CategoryScale,
6
- LinearScale,
7
- BarElement,
8
- PointElement,
9
- LineElement,
10
- ArcElement,
11
- RadialLinearScale,
12
- Title,
13
- Tooltip,
14
- Legend,
15
- Filler,
16
- } from 'chart.js';
17
- import { Bar, Radar, Scatter, Line } from 'react-chartjs-2';
18
-
19
- // Static data imports for GitHub Pages hosting
20
- import leaderboardData from './data/leaderboard.json';
21
- import categoryPerformanceData from './data/category-performance.json';
22
- import taskResultsData from './data/task-results.json';
23
- import tasksData from './data/tasks.json';
24
-
25
- ChartJS.register(
26
- CategoryScale,
27
- LinearScale,
28
- BarElement,
29
- PointElement,
30
- LineElement,
31
- ArcElement,
32
- RadialLinearScale,
33
- Title,
34
- Tooltip,
35
- Legend,
36
- Filler
37
- );
38
-
39
- // shadcn/ui inspired color system with CSS variables pattern
40
- const colors = {
41
- primary: '#4285f4',
42
- primaryForeground: '#ffffff',
43
- secondary: '#f1f5f9',
44
- secondaryForeground: '#0f172a',
45
- muted: '#f8fafc',
46
- mutedForeground: '#64748b',
47
- accent: '#f1f5f9',
48
- accentForeground: '#0f172a',
49
- destructive: '#ef4444',
50
- border: '#e2e8f0',
51
- input: '#e2e8f0',
52
- ring: '#4285f4',
53
- background: '#ffffff',
54
- foreground: '#0f172a',
55
- card: '#ffffff',
56
- cardForeground: '#0f172a',
57
- success: '#22c55e',
58
- warning: '#f59e0b',
59
- };
60
-
61
- // Chart color palette - distinct and accessible
62
- const CHART_COLORS = [
63
- '#4285f4', '#22c55e', '#f59e0b', '#ef4444', '#8b5cf6',
64
- '#06b6d4', '#f97316', '#64748b', '#ec4899', '#6366f1',
65
- '#14b8a6', '#a855f7', '#84cc16', '#0ea5e9',
66
- ];
67
-
68
- interface LeaderboardEntry {
69
- rank: number;
70
- agentName: string;
71
- agentVersion: string;
72
- modelName?: string;
73
- avgScore: number;
74
- avgFunctional: number;
75
- avgQuality: number;
76
- avgCost: number;
77
- tasksCompleted: number;
78
- passedTasks?: number;
79
- failedTasks?: number;
80
- totalTokens: number;
81
- inputTokens?: number;
82
- outputTokens?: number;
83
- totalCostUSD?: number;
84
- avgTimeMs?: number;
85
- pricingInput?: number;
86
- pricingOutput?: number;
87
- }
88
-
89
- // HoverCard Component - shadcn/ui pattern (for inline elements)
90
- function HoverCard({ children, content }: { children: React.ReactNode; content: React.ReactNode }) {
91
- const [isOpen, setIsOpen] = useState(false);
92
- const [position, setPosition] = useState({ x: 0, y: 0 });
93
- const triggerRef = useRef<HTMLSpanElement>(null);
94
- const timeoutRef = useRef<NodeJS.Timeout>();
95
-
96
- const handleMouseEnter = () => {
97
- clearTimeout(timeoutRef.current);
98
- timeoutRef.current = setTimeout(() => {
99
- if (triggerRef.current) {
100
- const rect = triggerRef.current.getBoundingClientRect();
101
- setPosition({ x: rect.left + rect.width / 2, y: rect.top });
102
- }
103
- setIsOpen(true);
104
- }, 200);
105
- };
106
-
107
- const handleMouseLeave = () => {
108
- clearTimeout(timeoutRef.current);
109
- timeoutRef.current = setTimeout(() => setIsOpen(false), 100);
110
- };
111
-
112
- return (
113
- <span ref={triggerRef} onMouseEnter={handleMouseEnter} onMouseLeave={handleMouseLeave} className="inline-block">
114
- {children}
115
- {isOpen && (
116
- <div
117
- className="fixed z-50 animate-in fade-in-0 zoom-in-95"
118
- style={{ left: position.x, top: position.y - 8, transform: 'translate(-50%, -100%)' }}
119
- onMouseEnter={() => clearTimeout(timeoutRef.current)}
120
- onMouseLeave={handleMouseLeave}
121
- >
122
- <div className="bg-white rounded-lg border border-slate-200 shadow-lg p-4 min-w-[280px] max-w-[350px]">
123
- {content}
124
- </div>
125
- </div>
126
- )}
127
- </span>
128
- );
129
- }
130
-
131
- // HoverRow Component - for table rows
132
- function HoverRow({ children, content, className = '' }: {
133
- children: React.ReactNode;
134
- content: React.ReactNode;
135
- className?: string;
136
- }) {
137
- const [isOpen, setIsOpen] = useState(false);
138
- const [position, setPosition] = useState({ x: 0, y: 0 });
139
- const rowRef = useRef<HTMLTableRowElement>(null);
140
- const timeoutRef = useRef<NodeJS.Timeout>();
141
-
142
- const handleMouseEnter = () => {
143
- clearTimeout(timeoutRef.current);
144
- timeoutRef.current = setTimeout(() => {
145
- if (rowRef.current) {
146
- const rect = rowRef.current.getBoundingClientRect();
147
- setPosition({ x: rect.left + rect.width / 2, y: rect.top });
148
- }
149
- setIsOpen(true);
150
- }, 300);
151
- };
152
-
153
- const handleMouseLeave = () => {
154
- clearTimeout(timeoutRef.current);
155
- timeoutRef.current = setTimeout(() => setIsOpen(false), 100);
156
- };
157
-
158
- return (
159
- <tr
160
- ref={rowRef}
161
- onMouseEnter={handleMouseEnter}
162
- onMouseLeave={handleMouseLeave}
163
- className={className}
164
- >
165
- {children}
166
- {isOpen && (
167
- <td className="absolute" style={{ padding: 0, border: 'none' }}>
168
- <div
169
- className="fixed z-50 animate-in fade-in-0 zoom-in-95"
170
- style={{ left: position.x, top: position.y - 8, transform: 'translate(-50%, -100%)' }}
171
- onMouseEnter={() => clearTimeout(timeoutRef.current)}
172
- onMouseLeave={handleMouseLeave}
173
- >
174
- <div className="bg-white rounded-lg border border-slate-200 shadow-lg p-4 min-w-[280px] max-w-[350px]">
175
- {content}
176
- </div>
177
- </div>
178
- </td>
179
- )}
180
- </tr>
181
- );
182
- }
183
-
184
- // Card Component - shadcn/ui pattern
185
- function Card({ children, className = '', hover = false }: { children: React.ReactNode; className?: string; hover?: boolean }) {
186
- return (
187
- <div className={`bg-white rounded-xl border border-slate-200 shadow-sm ${hover ? 'hover:shadow-md hover:border-slate-300 transition-all duration-200' : ''} ${className}`}>
188
- {children}
189
- </div>
190
- );
191
- }
192
-
193
- function CardHeader({ children, className = '' }: { children: React.ReactNode; className?: string }) {
194
- return <div className={`px-6 py-4 border-b border-slate-100 ${className}`}>{children}</div>;
195
- }
196
-
197
- function CardTitle({ children, className = '' }: { children: React.ReactNode; className?: string }) {
198
- return <h3 className={`text-sm font-semibold text-slate-900 ${className}`}>{children}</h3>;
199
- }
200
-
201
- function CardDescription({ children }: { children: React.ReactNode }) {
202
- return <p className="text-xs text-slate-500 mt-0.5">{children}</p>;
203
- }
204
-
205
- function CardContent({ children, className = '' }: { children: React.ReactNode; className?: string }) {
206
- return <div className={`p-6 ${className}`}>{children}</div>;
207
- }
208
-
209
- // Badge Component
210
- function Badge({ children, variant = 'default' }: { children: React.ReactNode; variant?: 'default' | 'success' | 'warning' | 'destructive' | 'outline' }) {
211
- const variants = {
212
- default: 'bg-slate-100 text-slate-900',
213
- success: 'bg-emerald-50 text-emerald-700 border-emerald-200',
214
- warning: 'bg-amber-50 text-amber-700 border-amber-200',
215
- destructive: 'bg-red-50 text-red-700 border-red-200',
216
- outline: 'bg-transparent border-slate-200 text-slate-700',
217
- };
218
- return (
219
- <span className={`inline-flex items-center px-2 py-0.5 text-xs font-medium rounded-md border ${variants[variant]}`}>
220
- {children}
221
- </span>
222
- );
223
- }
224
-
225
- // Stat Card with shadcn/ui styling
226
- function StatCard({ label, value, subtext, icon, trend }: {
227
- label: string;
228
- value: string | number;
229
- subtext?: string;
230
- icon?: React.ReactNode;
231
- trend?: { value: number; label: string };
232
- }) {
233
- return (
234
- <Card hover>
235
- <CardContent className="p-5">
236
- <div className="flex items-start justify-between">
237
- <div className="space-y-1">
238
- <p className="text-xs font-medium text-slate-500 uppercase tracking-wider">{label}</p>
239
- <p className="text-2xl font-semibold text-slate-900">{value}</p>
240
- {subtext && <p className="text-xs text-slate-500">{subtext}</p>}
241
- {trend && (
242
- <div className={`flex items-center gap-1 text-xs ${trend.value >= 0 ? 'text-emerald-600' : 'text-red-600'}`}>
243
- <span>{trend.value >= 0 ? '↑' : '↓'}</span>
244
- <span>{Math.abs(trend.value)}% {trend.label}</span>
245
- </div>
246
- )}
247
- </div>
248
- {icon && <div className="p-2 bg-slate-50 rounded-lg text-slate-600">{icon}</div>}
249
- </div>
250
- </CardContent>
251
- </Card>
252
- );
253
- }
254
-
255
- // Model Info Hover Content
256
- function ModelHoverContent({ entry }: { entry: LeaderboardEntry }) {
257
- const passRate = ((entry.passedTasks || 0) / (entry.tasksCompleted || 1)) * 100;
258
- return (
259
- <div className="space-y-3">
260
- <div className="flex items-center gap-3">
261
- <div className="w-10 h-10 rounded-lg flex items-center justify-center text-white font-bold"
262
- style={{ backgroundColor: CHART_COLORS[entry.rank % CHART_COLORS.length] }}>
263
- {(entry.modelName || entry.agentName).charAt(0)}
264
- </div>
265
- <div>
266
- <div className="font-semibold text-slate-900">{entry.modelName || entry.agentName}</div>
267
- <div className="text-xs text-slate-500">{entry.agentVersion}</div>
268
- </div>
269
- </div>
270
- <div className="grid grid-cols-2 gap-3 text-xs">
271
- <div className="space-y-0.5">
272
- <div className="text-slate-500">Score</div>
273
- <div className="font-semibold text-slate-900">{entry.avgScore.toFixed(1)}%</div>
274
- </div>
275
- <div className="space-y-0.5">
276
- <div className="text-slate-500">Pass Rate</div>
277
- <div className="font-semibold text-slate-900">{passRate.toFixed(0)}%</div>
278
- </div>
279
- <div className="space-y-0.5">
280
- <div className="text-slate-500">Total Cost</div>
281
- <div className="font-semibold text-emerald-600">${(entry.totalCostUSD || 0).toFixed(2)}</div>
282
- </div>
283
- <div className="space-y-0.5">
284
- <div className="text-slate-500">Avg Time</div>
285
- <div className="font-semibold text-slate-900">{((entry.avgTimeMs || 0) / 1000).toFixed(0)}s</div>
286
- </div>
287
- <div className="space-y-0.5">
288
- <div className="text-slate-500">Input Tokens</div>
289
- <div className="font-semibold text-slate-900">{((entry.inputTokens || 0) / 1000).toFixed(0)}K</div>
290
- </div>
291
- <div className="space-y-0.5">
292
- <div className="text-slate-500">Output Tokens</div>
293
- <div className="font-semibold text-slate-900">{((entry.outputTokens || 0) / 1000).toFixed(0)}K</div>
294
- </div>
295
- </div>
296
- <div className="pt-2 border-t border-slate-100">
297
- <div className="flex items-center justify-between text-xs">
298
- <span className="text-slate-500">Pricing</span>
299
- <span className="text-slate-700">
300
- ${entry.pricingInput}/M in · ${entry.pricingOutput}/M out
301
- </span>
302
- </div>
303
- </div>
304
- </div>
305
- );
306
- }
307
-
308
- // Progress bar component
309
- function Progress({ value, max = 100, className = '', color }: { value: number; max?: number; className?: string; color?: string }) {
310
- const percentage = Math.min((value / max) * 100, 100);
311
- return (
312
- <div className={`h-2 bg-slate-100 rounded-full overflow-hidden ${className}`}>
313
- <div
314
- className="h-full rounded-full transition-all duration-300"
315
- style={{ width: `${percentage}%`, backgroundColor: color || colors.primary }}
316
- />
317
- </div>
318
- );
319
- }
320
-
321
- function NotFound() {
322
- return (
323
- <div style={{
324
- display: 'flex',
325
- flexDirection: 'column',
326
- alignItems: 'center',
327
- justifyContent: 'center',
328
- minHeight: '60vh',
329
- textAlign: 'center',
330
- padding: '2rem'
331
- }}>
332
- <h1 style={{ fontSize: '6rem', fontWeight: 700, color: colors.mutedForeground, margin: 0 }}>404</h1>
333
- <h2 style={{ fontSize: '1.5rem', fontWeight: 600, color: colors.foreground, marginTop: '1rem' }}>Page Not Found</h2>
334
- <p style={{ color: colors.mutedForeground, marginTop: '0.5rem', maxWidth: '400px' }}>
335
- The page you're looking for doesn't exist or has been moved.
336
- </p>
337
- <Link
338
- to="/"
339
- style={{
340
- marginTop: '2rem',
341
- padding: '0.75rem 1.5rem',
342
- backgroundColor: colors.primary,
343
- color: colors.primaryForeground,
344
- borderRadius: '0.5rem',
345
- textDecoration: 'none',
346
- fontWeight: 500,
347
- transition: 'opacity 0.2s'
348
- }}
349
- >
350
- Back to Leaderboard
351
- </Link>
352
- </div>
353
- );
354
- }
355
-
356
- function Leaderboard() {
357
- const entries = leaderboardData.leaderboard as LeaderboardEntry[] || [];
358
- const [sortBy, setSortBy] = useState<'score' | 'cost' | 'speed' | 'efficiency'>('score');
359
-
360
- const sortedEntries = [...entries].sort((a, b) => {
361
- if (sortBy === 'cost') return (a.totalCostUSD || 0) - (b.totalCostUSD || 0);
362
- if (sortBy === 'speed') return (a.avgTimeMs || 0) - (b.avgTimeMs || 0);
363
- if (sortBy === 'efficiency') return (b.avgScore / (b.totalCostUSD || 1)) - (a.avgScore / (a.totalCostUSD || 1));
364
- return b.avgScore - a.avgScore;
365
- });
366
-
367
- const topModel = entries.reduce((max, e) => e.avgScore > max.avgScore ? e : max, entries[0]);
368
- const cheapestModel = entries.reduce((min, e) => (e.totalCostUSD || 99) < (min.totalCostUSD || 99) ? e : min, entries[0]);
369
- const fastestModel = entries.reduce((min, e) => (e.avgTimeMs || 999999) < (min.avgTimeMs || 999999) ? e : min, entries[0]);
370
- const bestValue = entries.reduce((max, e) => {
371
- const val = e.avgScore / (e.totalCostUSD || 1);
372
- const maxVal = max.avgScore / (max.totalCostUSD || 1);
373
- return val > maxVal ? e : max;
374
- }, entries[0]);
375
-
376
- const crowns = ['🥇', '🥈', '🥉'];
377
-
378
- return (
379
- <div className="space-y-8">
380
- {/* Header */}
381
- <div className="flex flex-col md:flex-row md:items-end md:justify-between gap-4">
382
- <div>
383
- <h1 className="text-2xl font-bold text-slate-900">Leaderboard</h1>
384
- <p className="text-sm text-slate-500 mt-1">AI coding agent performance on 180 benchmark tasks</p>
385
- </div>
386
- <Badge variant="outline">Updated Jan 27, 2026</Badge>
387
- </div>
388
-
389
- {/* Summary Cards */}
390
- <div className="grid grid-cols-2 lg:grid-cols-4 gap-4">
391
- <StatCard
392
- label="Top Score"
393
- value={`${topModel?.avgScore.toFixed(1)}%`}
394
- subtext={topModel?.modelName}
395
- icon={<span className="text-lg">🏆</span>}
396
- />
397
- <StatCard
398
- label="Lowest Cost"
399
- value={`$${(cheapestModel?.totalCostUSD || 0).toFixed(2)}`}
400
- subtext={cheapestModel?.modelName}
401
- icon={<span className="text-lg">💰</span>}
402
- />
403
- <StatCard
404
- label="Fastest"
405
- value={`${((fastestModel?.avgTimeMs || 0) / 1000).toFixed(0)}s avg`}
406
- subtext={fastestModel?.modelName}
407
- icon={<span className="text-lg">⚡</span>}
408
- />
409
- <StatCard
410
- label="Best Value"
411
- value={`${(bestValue.avgScore / (bestValue.totalCostUSD || 1)).toFixed(0)} pts/$`}
412
- subtext={bestValue?.modelName}
413
- icon={<span className="text-lg">✨</span>}
414
- />
415
- </div>
416
-
417
- {/* Sort Controls */}
418
- <div className="flex items-center gap-2 flex-wrap">
419
- <span className="text-sm text-slate-500">Sort by:</span>
420
- {[
421
- { key: 'score', label: 'Score' },
422
- { key: 'cost', label: 'Cost' },
423
- { key: 'speed', label: 'Speed' },
424
- { key: 'efficiency', label: 'Value' },
425
- ].map((option) => (
426
- <button
427
- key={option.key}
428
- onClick={() => setSortBy(option.key as typeof sortBy)}
429
- className={`px-3 py-1.5 text-sm font-medium rounded-lg transition-all ${
430
- sortBy === option.key
431
- ? 'bg-slate-900 text-white'
432
- : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
433
- }`}
434
- >
435
- {option.label}
436
- </button>
437
- ))}
438
- </div>
439
-
440
- {/* Main Table */}
441
- <Card>
442
- <div className="overflow-x-auto">
443
- <table className="min-w-full">
444
- <thead>
445
- <tr className="border-b border-slate-100 bg-slate-50/50">
446
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider w-16">Rank</th>
447
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Model</th>
448
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Score</th>
449
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Pass Rate</th>
450
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Quality</th>
451
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Cost</th>
452
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Time</th>
453
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Tokens</th>
454
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Value</th>
455
- </tr>
456
- </thead>
457
- <tbody className="divide-y divide-slate-100">
458
- {sortedEntries.map((entry, idx) => {
459
- const passRate = ((entry.passedTasks || 0) / (entry.tasksCompleted || 1)) * 100;
460
- const valueScore = entry.avgScore / (entry.totalCostUSD || 1);
461
- const maxScore = Math.max(...entries.map(e => e.avgScore));
462
- return (
463
- <HoverRow
464
- key={entry.agentName + entry.agentVersion}
465
- content={<ModelHoverContent entry={entry} />}
466
- className="hover:bg-slate-50/80 transition-colors cursor-pointer group"
467
- >
468
- <td className="px-4 py-4">
469
- {idx < 3 ? (
470
- <span className="text-xl">{crowns[idx]}</span>
471
- ) : (
472
- <span className="text-slate-400 font-medium">{idx + 1}</span>
473
- )}
474
- </td>
475
- <td className="px-4 py-4">
476
- <div className="flex items-center gap-3">
477
- <div
478
- className="w-9 h-9 rounded-lg flex items-center justify-center text-white font-semibold text-sm"
479
- style={{ backgroundColor: CHART_COLORS[idx % CHART_COLORS.length] }}
480
- >
481
- {(entry.modelName || entry.agentName).charAt(0)}
482
- </div>
483
- <div>
484
- <div className="font-medium text-slate-900 group-hover:text-blue-600 transition-colors">
485
- {entry.modelName || entry.agentName}
486
- </div>
487
- <div className="text-xs text-slate-400">{entry.agentVersion}</div>
488
- </div>
489
- </div>
490
- </td>
491
- <td className="px-4 py-4">
492
- <div className="flex items-center gap-3">
493
- <Progress value={entry.avgScore} max={maxScore} className="w-20" color={CHART_COLORS[idx % CHART_COLORS.length]} />
494
- <span className="font-semibold text-slate-900">{entry.avgScore.toFixed(1)}%</span>
495
- </div>
496
- </td>
497
- <td className="px-4 py-4">
498
- <Badge variant={passRate >= 98 ? 'success' : passRate >= 90 ? 'warning' : 'outline'}>
499
- {passRate.toFixed(0)}%
500
- </Badge>
501
- </td>
502
- <td className="px-4 py-4 text-sm text-slate-600">{entry.avgQuality.toFixed(0)}%</td>
503
- <td className="px-4 py-4">
504
- <span className="font-medium text-emerald-600">${(entry.totalCostUSD || 0).toFixed(2)}</span>
505
- </td>
506
- <td className="px-4 py-4 text-sm text-slate-600">
507
- {entry.avgTimeMs ? `${(entry.avgTimeMs / 1000).toFixed(0)}s` : '-'}
508
- </td>
509
- <td className="px-4 py-4 text-sm text-slate-500">
510
- {((entry.totalTokens || 0) / 1000000).toFixed(2)}M
511
- </td>
512
- <td className="px-4 py-4">
513
- <span className="font-medium text-violet-600">{valueScore.toFixed(0)}</span>
514
- </td>
515
- </HoverRow>
516
- );
517
- })}
518
- </tbody>
519
- </table>
520
- </div>
521
- </Card>
522
- </div>
523
- );
524
- }
525
-
526
- // Live Dashboard Component
527
- function LiveDashboard() {
528
- return (
529
- <div className="space-y-8">
530
- <div>
531
- <h1 className="text-2xl font-bold text-slate-900">Live Benchmark</h1>
532
- <p className="text-sm text-slate-500 mt-1">Real-time benchmark execution monitoring</p>
533
- </div>
534
- <Card>
535
- <CardContent className="py-16">
536
- <div className="text-center text-slate-500">
537
- <div className="text-4xl mb-4">📡</div>
538
- <p className="font-medium">No active benchmark runs</p>
539
- <p className="text-sm mt-1">Start a benchmark with: npm run cli -- run &lt;task&gt; -a &lt;agent&gt;</p>
540
- </div>
541
- </CardContent>
542
- </Card>
543
- </div>
544
- );
545
- }
546
-
547
- // Tasks Component
548
- interface Task {
549
- id: string;
550
- name: string;
551
- category: string;
552
- difficulty: string;
553
- description: string;
554
- tags?: string[];
555
- }
556
-
557
- function Tasks() {
558
- const [selectedCategory, setSelectedCategory] = useState<string | null>(null);
559
- const [searchQuery, setSearchQuery] = useState('');
560
-
561
- // Process static data
562
- const { tasks, summary } = useMemo(() => {
563
- const allTasks: Task[] = [];
564
- const categoryCounts: { category: string; count: number }[] = [];
565
- Object.entries(tasksData.categories || {}).forEach(([category, categoryTasks]) => {
566
- const catTasks = categoryTasks as Task[];
567
- categoryCounts.push({ category, count: catTasks.length });
568
- catTasks.forEach((t) => allTasks.push({ ...t, category }));
569
- });
570
- return { tasks: allTasks, summary: categoryCounts };
571
- }, []);
572
-
573
- const filteredTasks = tasks.filter((t) => {
574
- const matchesCategory = !selectedCategory || t.category === selectedCategory;
575
- const matchesSearch = !searchQuery ||
576
- t.name.toLowerCase().includes(searchQuery.toLowerCase()) ||
577
- t.description.toLowerCase().includes(searchQuery.toLowerCase());
578
- return matchesCategory && matchesSearch;
579
- });
580
-
581
- const totalTasks = tasks.length;
582
-
583
- return (
584
- <div className="space-y-6">
585
- <div className="flex flex-col md:flex-row md:items-end md:justify-between gap-4">
586
- <div>
587
- <h1 className="text-2xl font-bold text-slate-900">Benchmark Tasks</h1>
588
- <p className="text-sm text-slate-500 mt-1">{totalTasks} tasks across 6 categories</p>
589
- </div>
590
- </div>
591
-
592
- {/* Category Pills */}
593
- <div className="flex flex-wrap gap-2">
594
- <button
595
- onClick={() => setSelectedCategory(null)}
596
- className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
597
- !selectedCategory
598
- ? 'bg-slate-900 text-white'
599
- : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
600
- }`}
601
- >
602
- All ({totalTasks})
603
- </button>
604
- {summary.map((cat) => (
605
- <button
606
- key={cat.category}
607
- onClick={() => setSelectedCategory(selectedCategory === cat.category ? null : cat.category)}
608
- className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
609
- selectedCategory === cat.category
610
- ? 'bg-slate-900 text-white'
611
- : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
612
- }`}
613
- >
614
- {cat.category.replace(/-/g, ' ')} ({cat.count})
615
- </button>
616
- ))}
617
- </div>
618
-
619
- {/* Search */}
620
- <div className="relative">
621
- <input
622
- type="text"
623
- placeholder="Search tasks..."
624
- value={searchQuery}
625
- onChange={(e) => setSearchQuery(e.target.value)}
626
- className="w-full px-4 py-3 bg-white border border-slate-200 rounded-xl text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent transition-all"
627
- />
628
- </div>
629
-
630
- {/* Task Grid */}
631
- <div className="grid gap-4 md:grid-cols-2 lg:grid-cols-3">
632
- {filteredTasks.slice(0, 30).map((task) => (
633
- <Card key={task.id} hover>
634
- <CardContent className="p-5">
635
- <div className="flex items-start justify-between mb-3">
636
- <Badge variant={task.difficulty === 'easy' ? 'success' : task.difficulty === 'medium' ? 'warning' : 'destructive'}>
637
- {task.difficulty}
638
- </Badge>
639
- <span className="text-xs text-slate-400">{task.category}</span>
640
- </div>
641
- <h3 className="font-semibold text-slate-900 mb-2">{task.name}</h3>
642
- <p className="text-sm text-slate-500 line-clamp-2">{task.description}</p>
643
- {task.tags && task.tags.length > 0 && (
644
- <div className="mt-3 flex flex-wrap gap-1">
645
- {task.tags.slice(0, 3).map((tag) => (
646
- <span key={tag} className="px-2 py-0.5 text-xs bg-slate-100 text-slate-600 rounded-md">
647
- {tag}
648
- </span>
649
- ))}
650
- </div>
651
- )}
652
- </CardContent>
653
- </Card>
654
- ))}
655
- </div>
656
- </div>
657
- );
658
- }
659
-
660
- // Charts/Analytics Component
661
- function Charts() {
662
- const entries = leaderboardData.leaderboard as LeaderboardEntry[] || [];
663
-
664
- const sortedByScore = [...entries].sort((a, b) => b.avgScore - a.avgScore);
665
- const labels = sortedByScore.map(e => (e.modelName || e.agentName).split(' ').slice(0, 2).join(' '));
666
-
667
- const baseOptions = {
668
- responsive: true,
669
- maintainAspectRatio: false,
670
- plugins: {
671
- legend: { display: false },
672
- tooltip: {
673
- backgroundColor: '#ffffff',
674
- titleColor: '#0f172a',
675
- bodyColor: '#64748b',
676
- borderColor: '#e2e8f0',
677
- borderWidth: 1,
678
- padding: 12,
679
- cornerRadius: 8,
680
- displayColors: true,
681
- },
682
- },
683
- scales: {
684
- y: { beginAtZero: true, grid: { color: '#f1f5f9' }, ticks: { color: '#64748b', font: { size: 11 } } },
685
- x: { grid: { display: false }, ticks: { color: '#64748b', font: { size: 10 }, maxRotation: 45 } },
686
- },
687
- };
688
-
689
- const scoreData = {
690
- labels,
691
- datasets: [{
692
- label: 'Score',
693
- data: sortedByScore.map(e => e.avgScore),
694
- backgroundColor: sortedByScore.map((_, i) => CHART_COLORS[i % CHART_COLORS.length] + '80'),
695
- borderColor: sortedByScore.map((_, i) => CHART_COLORS[i % CHART_COLORS.length]),
696
- borderWidth: 2,
697
- borderRadius: 6,
698
- }],
699
- };
700
-
701
- const costData = {
702
- labels,
703
- datasets: [{
704
- label: 'Cost ($)',
705
- data: sortedByScore.map(e => e.totalCostUSD || 0),
706
- backgroundColor: '#22c55e80',
707
- borderColor: '#22c55e',
708
- borderWidth: 2,
709
- borderRadius: 6,
710
- }],
711
- };
712
-
713
- const timeData = {
714
- labels,
715
- datasets: [{
716
- label: 'Time (s)',
717
- data: sortedByScore.map(e => (e.avgTimeMs || 0) / 1000),
718
- backgroundColor: '#f59e0b80',
719
- borderColor: '#f59e0b',
720
- borderWidth: 2,
721
- borderRadius: 6,
722
- }],
723
- };
724
-
725
- const scatterData = {
726
- datasets: sortedByScore.map((e, i) => ({
727
- label: (e.modelName || e.agentName).split(' ').slice(0, 2).join(' '),
728
- data: [{ x: e.totalCostUSD || 0, y: e.avgScore }],
729
- backgroundColor: CHART_COLORS[i % CHART_COLORS.length],
730
- borderColor: CHART_COLORS[i % CHART_COLORS.length],
731
- pointRadius: 10,
732
- pointHoverRadius: 14,
733
- })),
734
- };
735
-
736
- const crowns = ['🥇', '🥈', '🥉'];
737
-
738
- return (
739
- <div className="space-y-8">
740
- <div>
741
- <h1 className="text-2xl font-bold text-slate-900">Analytics</h1>
742
- <p className="text-sm text-slate-500 mt-1">Performance metrics and comparisons</p>
743
- </div>
744
-
745
- {/* Charts Grid */}
746
- <div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
747
- <Card>
748
- <CardHeader>
749
- <CardTitle>Score Distribution</CardTitle>
750
- <CardDescription>Overall benchmark scores by model</CardDescription>
751
- </CardHeader>
752
- <CardContent>
753
- <div className="h-72">
754
- <Bar data={scoreData} options={{ ...baseOptions, scales: { ...baseOptions.scales, y: { ...baseOptions.scales.y, max: 100 } } }} />
755
- </div>
756
- </CardContent>
757
- </Card>
758
-
759
- <Card>
760
- <CardHeader>
761
- <CardTitle>Cost Comparison</CardTitle>
762
- <CardDescription>Total cost in USD for 180 tasks</CardDescription>
763
- </CardHeader>
764
- <CardContent>
765
- <div className="h-72">
766
- <Bar data={costData} options={baseOptions} />
767
- </div>
768
- </CardContent>
769
- </Card>
770
-
771
- <Card>
772
- <CardHeader>
773
- <CardTitle>Execution Time</CardTitle>
774
- <CardDescription>Average time per task in seconds</CardDescription>
775
- </CardHeader>
776
- <CardContent>
777
- <div className="h-72">
778
- <Bar data={timeData} options={baseOptions} />
779
- </div>
780
- </CardContent>
781
- </Card>
782
-
783
- <Card>
784
- <CardHeader>
785
- <CardTitle>Cost vs Score</CardTitle>
786
- <CardDescription>Efficiency visualization (top-left is best)</CardDescription>
787
- </CardHeader>
788
- <CardContent>
789
- <div className="h-72">
790
- <Scatter data={scatterData} options={{
791
- ...baseOptions,
792
- plugins: { ...baseOptions.plugins, legend: { display: true, position: 'bottom' as const, labels: { usePointStyle: true, padding: 8, font: { size: 9 } } } },
793
- scales: {
794
- x: { ...baseOptions.scales.x, title: { display: true, text: 'Cost ($)', color: '#64748b' } },
795
- y: { ...baseOptions.scales.y, min: 55, max: 95, title: { display: true, text: 'Score (%)', color: '#64748b' } },
796
- },
797
- }} />
798
- </div>
799
- </CardContent>
800
- </Card>
801
- </div>
802
-
803
- {/* Data Table */}
804
- <Card>
805
- <CardHeader>
806
- <CardTitle>Detailed Metrics</CardTitle>
807
- <CardDescription>Complete performance data for all models</CardDescription>
808
- </CardHeader>
809
- <div className="overflow-x-auto">
810
- <table className="min-w-full text-sm">
811
- <thead>
812
- <tr className="bg-slate-50/50 border-b border-slate-100">
813
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">#</th>
814
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Model</th>
815
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Score</th>
816
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Pass</th>
817
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Functional</th>
818
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Quality</th>
819
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Cost</th>
820
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Time</th>
821
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Tokens</th>
822
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Value</th>
823
- </tr>
824
- </thead>
825
- <tbody className="divide-y divide-slate-100">
826
- {sortedByScore.map((e, i) => (
827
- <HoverRow
828
- key={e.agentName + e.agentVersion}
829
- content={<ModelHoverContent entry={e} />}
830
- className="hover:bg-slate-50/80 transition-colors cursor-pointer"
831
- >
832
- <td className="px-4 py-3">
833
- {i < 3 ? (
834
- <span className="text-lg">{crowns[i]}</span>
835
- ) : (
836
- <span className="text-slate-400">{i + 1}</span>
837
- )}
838
- </td>
839
- <td className="px-4 py-3 font-medium text-slate-900">{e.modelName || e.agentName}</td>
840
- <td className="px-4 py-3 font-semibold text-blue-600">{e.avgScore.toFixed(1)}%</td>
841
- <td className="px-4 py-3 text-slate-600">{((e.passedTasks || 0) / (e.tasksCompleted || 1) * 100).toFixed(0)}%</td>
842
- <td className="px-4 py-3 text-slate-600">{e.avgFunctional.toFixed(0)}%</td>
843
- <td className="px-4 py-3 text-slate-600">{e.avgQuality.toFixed(0)}%</td>
844
- <td className="px-4 py-3 font-medium text-emerald-600">${(e.totalCostUSD || 0).toFixed(2)}</td>
845
- <td className="px-4 py-3 text-slate-600">{((e.avgTimeMs || 0) / 1000).toFixed(0)}s</td>
846
- <td className="px-4 py-3 text-slate-500">{((e.totalTokens || 0) / 1000000).toFixed(2)}M</td>
847
- <td className="px-4 py-3 font-medium text-violet-600">
848
- {(e.totalCostUSD || 0) > 0 ? (e.avgScore / (e.totalCostUSD || 1)).toFixed(0) : '∞'}
849
- </td>
850
- </HoverRow>
851
- ))}
852
- </tbody>
853
- </table>
854
- </div>
855
- </Card>
856
- </div>
857
- );
858
- }
859
-
860
- // Category Performance Component
861
- interface CategoryPerformance {
862
- category: string;
863
- models: Array<{
864
- modelName: string;
865
- avgScore: number;
866
- passRate: number;
867
- avgTokens: number;
868
- avgTimeMs: number;
869
- avgCost: number;
870
- }>;
871
- }
872
-
873
- function TaskPerformance() {
874
- const categoryData = categoryPerformanceData.performance as CategoryPerformance[] || [];
875
- const models = categoryPerformanceData.models as string[] || [];
876
- const [selectedModels, setSelectedModels] = useState<string[]>(models.slice(0, 5));
877
-
878
- const categories = categoryData.map(c => c.category.replace(/-/g, ' '));
879
- const filteredData = categoryData.map(cat => ({
880
- ...cat,
881
- models: cat.models.filter(m => selectedModels.includes(m.modelName)),
882
- }));
883
-
884
- const chartOptions = {
885
- responsive: true,
886
- maintainAspectRatio: false,
887
- plugins: {
888
- legend: { position: 'bottom' as const, labels: { usePointStyle: true, padding: 12, font: { size: 10 } } },
889
- tooltip: {
890
- backgroundColor: '#ffffff',
891
- titleColor: '#0f172a',
892
- bodyColor: '#64748b',
893
- borderColor: '#e2e8f0',
894
- borderWidth: 1,
895
- padding: 12,
896
- cornerRadius: 8,
897
- },
898
- },
899
- scales: {
900
- y: { beginAtZero: true, grid: { color: '#f1f5f9' }, ticks: { color: '#64748b', font: { size: 11 } } },
901
- x: { grid: { display: false }, ticks: { color: '#64748b', font: { size: 10 } } },
902
- },
903
- };
904
-
905
- const createDataset = (metricFn: (m: CategoryPerformance['models'][0]) => number) => ({
906
- labels: categories,
907
- datasets: selectedModels.map((modelName) => ({
908
- label: modelName.split(' ').slice(0, 2).join(' '),
909
- data: filteredData.map(cat => {
910
- const m = cat.models.find(x => x.modelName === modelName);
911
- return m ? metricFn(m) : 0;
912
- }),
913
- backgroundColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length] + '80',
914
- borderColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length],
915
- borderWidth: 2,
916
- borderRadius: 4,
917
- })),
918
- });
919
-
920
- const radarData = {
921
- labels: categories,
922
- datasets: selectedModels.slice(0, 5).map((modelName) => ({
923
- label: modelName.split(' ').slice(0, 2).join(' '),
924
- data: filteredData.map(cat => cat.models.find(m => m.modelName === modelName)?.avgScore || 0),
925
- backgroundColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length] + '15',
926
- borderColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length],
927
- borderWidth: 2,
928
- pointBackgroundColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length],
929
- pointRadius: 3,
930
- })),
931
- };
932
-
933
- const crowns = ['🥇', '🥈', '🥉'];
934
-
935
- return (
936
- <div className="space-y-8">
937
- <div>
938
- <h1 className="text-2xl font-bold text-slate-900">Category Performance</h1>
939
- <p className="text-sm text-slate-500 mt-1">Breakdown across 6 task categories (30 tasks each)</p>
940
- </div>
941
-
942
- {/* Model Selector */}
943
- <Card>
944
- <CardContent className="p-5">
945
- <div className="text-sm font-medium text-slate-700 mb-3">Select models to compare:</div>
946
- <div className="flex flex-wrap gap-2">
947
- {models.map((model, i) => (
948
- <button
949
- key={model}
950
- onClick={() => {
951
- if (selectedModels.includes(model)) {
952
- setSelectedModels(selectedModels.filter(m => m !== model));
953
- } else if (selectedModels.length < 7) {
954
- setSelectedModels([...selectedModels, model]);
955
- }
956
- }}
957
- className={`px-3 py-1.5 text-sm font-medium rounded-lg border-2 transition-all ${
958
- selectedModels.includes(model)
959
- ? 'text-white'
960
- : 'border-slate-200 text-slate-600 hover:border-slate-300 bg-white'
961
- }`}
962
- style={{
963
- backgroundColor: selectedModels.includes(model) ? CHART_COLORS[i % CHART_COLORS.length] : undefined,
964
- borderColor: selectedModels.includes(model) ? CHART_COLORS[i % CHART_COLORS.length] : undefined,
965
- }}
966
- >
967
- {model.split(' ').slice(0, 2).join(' ')}
968
- </button>
969
- ))}
970
- </div>
971
- </CardContent>
972
- </Card>
973
-
974
- {/* Charts */}
975
- <div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
976
- <Card>
977
- <CardHeader>
978
- <CardTitle>Score by Category</CardTitle>
979
- </CardHeader>
980
- <CardContent>
981
- <div className="h-72">
982
- <Bar data={createDataset(m => m.avgScore)} options={{ ...chartOptions, scales: { ...chartOptions.scales, y: { ...chartOptions.scales.y, max: 100 } } }} />
983
- </div>
984
- </CardContent>
985
- </Card>
986
-
987
- <Card>
988
- <CardHeader>
989
- <CardTitle>Pass Rate by Category</CardTitle>
990
- </CardHeader>
991
- <CardContent>
992
- <div className="h-72">
993
- <Bar data={createDataset(m => m.passRate)} options={{ ...chartOptions, scales: { ...chartOptions.scales, y: { ...chartOptions.scales.y, max: 110 } } }} />
994
- </div>
995
- </CardContent>
996
- </Card>
997
-
998
- <Card>
999
- <CardHeader>
1000
- <CardTitle>Cost by Category ($)</CardTitle>
1001
- </CardHeader>
1002
- <CardContent>
1003
- <div className="h-72">
1004
- <Bar data={createDataset(m => m.avgCost)} options={chartOptions} />
1005
- </div>
1006
- </CardContent>
1007
- </Card>
1008
-
1009
- <Card>
1010
- <CardHeader>
1011
- <CardTitle>Time by Category (seconds)</CardTitle>
1012
- </CardHeader>
1013
- <CardContent>
1014
- <div className="h-72">
1015
- <Bar data={createDataset(m => m.avgTimeMs / 1000)} options={chartOptions} />
1016
- </div>
1017
- </CardContent>
1018
- </Card>
1019
-
1020
- <Card>
1021
- <CardHeader>
1022
- <CardTitle>Tokens by Category (K)</CardTitle>
1023
- </CardHeader>
1024
- <CardContent>
1025
- <div className="h-72">
1026
- <Bar data={createDataset(m => m.avgTokens / 1000)} options={chartOptions} />
1027
- </div>
1028
- </CardContent>
1029
- </Card>
1030
-
1031
- <Card>
1032
- <CardHeader>
1033
- <CardTitle>Category Strength</CardTitle>
1034
- </CardHeader>
1035
- <CardContent>
1036
- <div className="h-72">
1037
- <Radar data={radarData} options={{
1038
- responsive: true,
1039
- maintainAspectRatio: false,
1040
- plugins: { legend: { position: 'bottom' as const, labels: { usePointStyle: true, padding: 10, font: { size: 10 } } } },
1041
- scales: { r: { beginAtZero: true, max: 100, ticks: { stepSize: 25, display: false }, grid: { color: '#e2e8f0' }, angleLines: { color: '#e2e8f0' } } },
1042
- }} />
1043
- </div>
1044
- </CardContent>
1045
- </Card>
1046
- </div>
1047
-
1048
- {/* Table */}
1049
- <Card>
1050
- <CardHeader>
1051
- <CardTitle>Category Breakdown</CardTitle>
1052
- </CardHeader>
1053
- <div className="overflow-x-auto">
1054
- <table className="min-w-full text-sm">
1055
- <thead>
1056
- <tr className="bg-slate-50/50 border-b border-slate-100">
1057
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Category</th>
1058
- {selectedModels.map(model => (
1059
- <th key={model} className="px-3 py-3 text-left text-xs font-semibold text-slate-600 uppercase" style={{ minWidth: 90 }}>
1060
- {model.split(' ').slice(0, 2).join(' ')}
1061
- </th>
1062
- ))}
1063
- </tr>
1064
- </thead>
1065
- <tbody className="divide-y divide-slate-100">
1066
- {categoryData.map((cat, i) => {
1067
- const sorted = [...cat.models].sort((a, b) => b.avgScore - a.avgScore);
1068
- const top3 = sorted.slice(0, 3).map(m => m.modelName);
1069
- const getRank = (modelName: string) => top3.indexOf(modelName);
1070
-
1071
- return (
1072
- <tr key={cat.category} className={i % 2 === 0 ? 'bg-slate-50/30' : ''}>
1073
- <td className="px-4 py-3 font-medium text-slate-800 capitalize">{cat.category.replace(/-/g, ' ')}</td>
1074
- {selectedModels.map(model => {
1075
- const m = cat.models.find(x => x.modelName === model);
1076
- const rank = getRank(model);
1077
- return (
1078
- <td key={model} className="px-3 py-3">
1079
- <HoverCard content={
1080
- <div className="space-y-2">
1081
- <div className="font-semibold text-slate-900">{model}</div>
1082
- <div className="text-xs text-slate-500">{cat.category.replace(/-/g, ' ')}</div>
1083
- <div className="grid grid-cols-2 gap-2 text-xs pt-2">
1084
- <div>Score: <span className="font-semibold">{m?.avgScore.toFixed(1)}%</span></div>
1085
- <div>Pass: <span className="font-semibold">{m?.passRate.toFixed(0)}%</span></div>
1086
- <div>Cost: <span className="font-semibold text-emerald-600">${m?.avgCost.toFixed(3)}</span></div>
1087
- <div>Time: <span className="font-semibold">{((m?.avgTimeMs || 0) / 1000).toFixed(0)}s</span></div>
1088
- </div>
1089
- </div>
1090
- }>
1091
- <div className="cursor-pointer hover:bg-slate-100 rounded px-1 -mx-1 transition-colors">
1092
- <div className="font-medium flex items-center gap-1 text-blue-600">
1093
- {rank >= 0 && <span>{crowns[rank]}</span>}
1094
- {m?.avgScore.toFixed(1)}%
1095
- </div>
1096
- <div className="text-xs text-slate-400">${m?.avgCost.toFixed(3)}</div>
1097
- </div>
1098
- </HoverCard>
1099
- </td>
1100
- );
1101
- })}
1102
- </tr>
1103
- );
1104
- })}
1105
- </tbody>
1106
- </table>
1107
- </div>
1108
- </Card>
1109
- </div>
1110
- );
1111
- }
1112
-
1113
- // Per-Task Performance Charts Component
1114
- interface TaskResult {
1115
- taskId: string;
1116
- category: string;
1117
- subcategory: string;
1118
- results: Array<{
1119
- modelName: string;
1120
- score: number;
1121
- functional: number;
1122
- quality: number;
1123
- passed: boolean;
1124
- tokens: number;
1125
- timeMs: number;
1126
- cost: number;
1127
- }>;
1128
- }
1129
-
1130
- function PerTaskCharts() {
1131
- const taskResults = taskResultsData.tasks as TaskResult[] || [];
1132
- const models = taskResultsData.models as string[] || [];
1133
- const categories = taskResultsData.categories as string[] || [];
1134
- const [selectedCategory, setSelectedCategory] = useState<string | null>(null);
1135
- const [selectedModels, setSelectedModels] = useState<string[]>(models.slice(0, 5));
1136
- const [viewMode, setViewMode] = useState<'chart' | 'heatmap'>('chart');
1137
-
1138
- const filteredTasks = selectedCategory
1139
- ? taskResults.filter(t => t.category === selectedCategory)
1140
- : taskResults.slice(0, 30);
1141
-
1142
- const chartData = {
1143
- labels: filteredTasks.map((_, i) => `Task ${i + 1}`),
1144
- datasets: selectedModels.map((modelName, idx) => ({
1145
- label: modelName.split(' ').slice(0, 2).join(' '),
1146
- data: filteredTasks.map(task => {
1147
- const result = task.results.find(r => r.modelName === modelName);
1148
- return result?.score || 0;
1149
- }),
1150
- borderColor: CHART_COLORS[idx % CHART_COLORS.length],
1151
- backgroundColor: CHART_COLORS[idx % CHART_COLORS.length] + '20',
1152
- borderWidth: 2,
1153
- tension: 0.3,
1154
- fill: false,
1155
- pointRadius: 3,
1156
- pointHoverRadius: 6,
1157
- })),
1158
- };
1159
-
1160
- const chartOptions = {
1161
- responsive: true,
1162
- maintainAspectRatio: false,
1163
- plugins: {
1164
- legend: { position: 'bottom' as const, labels: { usePointStyle: true, padding: 12, font: { size: 10 } } },
1165
- tooltip: {
1166
- backgroundColor: '#ffffff',
1167
- titleColor: '#0f172a',
1168
- bodyColor: '#64748b',
1169
- borderColor: '#e2e8f0',
1170
- borderWidth: 1,
1171
- padding: 12,
1172
- cornerRadius: 8,
1173
- callbacks: {
1174
- title: (items: any[]) => {
1175
- const idx = items[0]?.dataIndex;
1176
- if (idx !== undefined && filteredTasks[idx]) {
1177
- return filteredTasks[idx].taskId.split('/').pop() || `Task ${idx + 1}`;
1178
- }
1179
- return '';
1180
- },
1181
- },
1182
- },
1183
- },
1184
- scales: {
1185
- y: { beginAtZero: true, max: 100, grid: { color: '#f1f5f9' }, ticks: { color: '#64748b', font: { size: 11 } } },
1186
- x: { grid: { display: false }, ticks: { color: '#64748b', font: { size: 9 }, maxRotation: 0 } },
1187
- },
1188
- };
1189
-
1190
- // Heatmap data for selected models and tasks
1191
- const getScoreColor = (score: number) => {
1192
- if (score >= 90) return '#22c55e';
1193
- if (score >= 80) return '#84cc16';
1194
- if (score >= 70) return '#eab308';
1195
- if (score >= 60) return '#f97316';
1196
- return '#ef4444';
1197
- };
1198
-
1199
- return (
1200
- <div className="space-y-8">
1201
- <div className="flex flex-col md:flex-row md:items-end md:justify-between gap-4">
1202
- <div>
1203
- <h1 className="text-2xl font-bold text-slate-900">Per-Task Performance</h1>
1204
- <p className="text-sm text-slate-500 mt-1">Model performance on individual benchmark tasks</p>
1205
- </div>
1206
- <div className="flex gap-2">
1207
- <button
1208
- onClick={() => setViewMode('chart')}
1209
- className={`px-3 py-1.5 text-sm font-medium rounded-lg transition-all ${
1210
- viewMode === 'chart' ? 'bg-slate-900 text-white' : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
1211
- }`}
1212
- >
1213
- Line Chart
1214
- </button>
1215
- <button
1216
- onClick={() => setViewMode('heatmap')}
1217
- className={`px-3 py-1.5 text-sm font-medium rounded-lg transition-all ${
1218
- viewMode === 'heatmap' ? 'bg-slate-900 text-white' : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
1219
- }`}
1220
- >
1221
- Heatmap
1222
- </button>
1223
- </div>
1224
- </div>
1225
-
1226
- {/* Category Filter */}
1227
- <div className="flex flex-wrap gap-2">
1228
- <button
1229
- onClick={() => setSelectedCategory(null)}
1230
- className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
1231
- !selectedCategory ? 'bg-slate-900 text-white' : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
1232
- }`}
1233
- >
1234
- All Categories
1235
- </button>
1236
- {categories.map((cat) => (
1237
- <button
1238
- key={cat}
1239
- onClick={() => setSelectedCategory(selectedCategory === cat ? null : cat)}
1240
- className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
1241
- selectedCategory === cat ? 'bg-slate-900 text-white' : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
1242
- }`}
1243
- >
1244
- {cat.replace(/-/g, ' ')}
1245
- </button>
1246
- ))}
1247
- </div>
1248
-
1249
- {/* Model Selector */}
1250
- <Card>
1251
- <CardContent className="p-5">
1252
- <div className="text-sm font-medium text-slate-700 mb-3">Select models to compare:</div>
1253
- <div className="flex flex-wrap gap-2">
1254
- {models.map((model, i) => (
1255
- <button
1256
- key={model}
1257
- onClick={() => {
1258
- if (selectedModels.includes(model)) {
1259
- setSelectedModels(selectedModels.filter(m => m !== model));
1260
- } else if (selectedModels.length < 7) {
1261
- setSelectedModels([...selectedModels, model]);
1262
- }
1263
- }}
1264
- className={`px-3 py-1.5 text-sm font-medium rounded-lg border-2 transition-all ${
1265
- selectedModels.includes(model)
1266
- ? 'text-white'
1267
- : 'border-slate-200 text-slate-600 hover:border-slate-300 bg-white'
1268
- }`}
1269
- style={{
1270
- backgroundColor: selectedModels.includes(model) ? CHART_COLORS[i % CHART_COLORS.length] : undefined,
1271
- borderColor: selectedModels.includes(model) ? CHART_COLORS[i % CHART_COLORS.length] : undefined,
1272
- }}
1273
- >
1274
- {model.split(' ').slice(0, 2).join(' ')}
1275
- </button>
1276
- ))}
1277
- </div>
1278
- </CardContent>
1279
- </Card>
1280
-
1281
- {viewMode === 'chart' ? (
1282
- <Card>
1283
- <CardHeader>
1284
- <CardTitle>Score Trend Across Tasks</CardTitle>
1285
- <CardDescription>
1286
- {selectedCategory ? `${selectedCategory.replace(/-/g, ' ')} - ${filteredTasks.length} tasks` : `Showing first 30 tasks`}
1287
- </CardDescription>
1288
- </CardHeader>
1289
- <CardContent>
1290
- <div className="h-96">
1291
- <Line data={chartData} options={chartOptions} />
1292
- </div>
1293
- </CardContent>
1294
- </Card>
1295
- ) : (
1296
- <Card>
1297
- <CardHeader>
1298
- <CardTitle>Score Heatmap</CardTitle>
1299
- <CardDescription>Color indicates score: green (90+) → yellow (70-80) → red (&lt;60)</CardDescription>
1300
- </CardHeader>
1301
- <div className="overflow-x-auto">
1302
- <table className="min-w-full text-xs">
1303
- <thead>
1304
- <tr className="bg-slate-50/50 border-b border-slate-100">
1305
- <th className="px-3 py-2 text-left font-semibold text-slate-600 sticky left-0 bg-slate-50">Task</th>
1306
- {selectedModels.map(model => (
1307
- <th key={model} className="px-2 py-2 text-center font-semibold text-slate-600" style={{ minWidth: 60 }}>
1308
- {model.split(' ')[0]}
1309
- </th>
1310
- ))}
1311
- </tr>
1312
- </thead>
1313
- <tbody className="divide-y divide-slate-100">
1314
- {filteredTasks.slice(0, 50).map((task, i) => (
1315
- <tr key={task.taskId} className="hover:bg-slate-50/50">
1316
- <td className="px-3 py-1.5 font-medium text-slate-700 sticky left-0 bg-white">
1317
- <HoverCard content={
1318
- <div className="space-y-2">
1319
- <div className="font-semibold text-slate-900">{task.taskId}</div>
1320
- <div className="text-xs text-slate-500">Category: {task.category}</div>
1321
- <div className="text-xs text-slate-500">Subcategory: {task.subcategory}</div>
1322
- </div>
1323
- }>
1324
- <span className="cursor-pointer hover:text-blue-600">T{i + 1}</span>
1325
- </HoverCard>
1326
- </td>
1327
- {selectedModels.map(modelName => {
1328
- const result = task.results.find(r => r.modelName === modelName);
1329
- const score = result?.score || 0;
1330
- return (
1331
- <td key={modelName} className="px-2 py-1.5 text-center">
1332
- <HoverCard content={
1333
- <div className="space-y-2">
1334
- <div className="font-semibold text-slate-900">{modelName}</div>
1335
- <div className="text-xs text-slate-500">{task.taskId.split('/').pop()}</div>
1336
- <div className="grid grid-cols-2 gap-2 text-xs pt-2">
1337
- <div>Score: <span className="font-semibold">{score.toFixed(1)}%</span></div>
1338
- <div>Passed: <span className={result?.passed ? 'text-emerald-600' : 'text-red-600'}>{result?.passed ? 'Yes' : 'No'}</span></div>
1339
- <div>Tokens: <span className="font-semibold">{((result?.tokens || 0) / 1000).toFixed(1)}K</span></div>
1340
- <div>Cost: <span className="font-semibold text-emerald-600">${(result?.cost || 0).toFixed(4)}</span></div>
1341
- </div>
1342
- </div>
1343
- }>
1344
- <div
1345
- className="w-8 h-6 rounded flex items-center justify-center text-white text-xs font-medium cursor-pointer mx-auto"
1346
- style={{ backgroundColor: getScoreColor(score) }}
1347
- >
1348
- {score.toFixed(0)}
1349
- </div>
1350
- </HoverCard>
1351
- </td>
1352
- );
1353
- })}
1354
- </tr>
1355
- ))}
1356
- </tbody>
1357
- </table>
1358
- </div>
1359
- </Card>
1360
- )}
1361
-
1362
- {/* Summary Stats */}
1363
- <div className="grid grid-cols-2 lg:grid-cols-4 gap-4">
1364
- {selectedModels.slice(0, 4).map((modelName, i) => {
1365
- const modelResults = filteredTasks.flatMap(t => t.results.filter(r => r.modelName === modelName));
1366
- const avgScore = modelResults.reduce((sum, r) => sum + r.score, 0) / (modelResults.length || 1);
1367
- const passCount = modelResults.filter(r => r.passed).length;
1368
- return (
1369
- <Card key={modelName} hover>
1370
- <CardContent className="p-4">
1371
- <div className="flex items-center gap-2 mb-2">
1372
- <div
1373
- className="w-3 h-3 rounded-full"
1374
- style={{ backgroundColor: CHART_COLORS[i % CHART_COLORS.length] }}
1375
- />
1376
- <span className="font-medium text-slate-900 text-sm">{modelName.split(' ').slice(0, 2).join(' ')}</span>
1377
- </div>
1378
- <div className="text-2xl font-bold text-slate-900">{avgScore.toFixed(1)}%</div>
1379
- <div className="text-xs text-slate-500">{passCount}/{modelResults.length} tasks passed</div>
1380
- </CardContent>
1381
- </Card>
1382
- );
1383
- })}
1384
- </div>
1385
- </div>
1386
- );
1387
- }
1388
-
1389
- // Navigation Link Component
1390
- function NavLink({ to, children }: { to: string; children: React.ReactNode }) {
1391
- const location = useLocation();
1392
- const isActive = location.pathname === to || (to !== '/' && location.pathname.startsWith(to));
1393
-
1394
- return (
1395
- <Link
1396
- to={to}
1397
- className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
1398
- isActive
1399
- ? 'bg-slate-100 text-slate-900'
1400
- : 'text-slate-600 hover:bg-slate-50 hover:text-slate-900'
1401
- }`}
1402
- >
1403
- {children}
1404
- </Link>
1405
- );
1406
- }
1407
-
1408
- // Main App Component
1409
- export default function App() {
1410
- return (
1411
- <div className="min-h-screen bg-slate-50">
1412
- {/* Navigation */}
1413
- <nav className="bg-white border-b border-slate-200 sticky top-0 z-50">
1414
- <div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
1415
- <div className="flex items-center justify-between h-16">
1416
- <div className="flex items-center gap-8">
1417
- <Link to="/" className="flex items-center gap-3">
1418
- <img src="/favicon.svg" alt="VibeCodingBench" className="w-9 h-9" />
1419
- <span className="text-lg font-semibold text-slate-900">VibeCodingBench</span>
1420
- </Link>
1421
- <div className="hidden md:flex items-center gap-1">
1422
- <NavLink to="/">Leaderboard</NavLink>
1423
- <NavLink to="/charts">Analytics</NavLink>
1424
- <NavLink to="/task-performance">Categories</NavLink>
1425
- <NavLink to="/per-task">Per-Task</NavLink>
1426
- <NavLink to="/tasks">Tasks</NavLink>
1427
- </div>
1428
- </div>
1429
- <a
1430
- href="https://github.com/alt-research/vibe-coding-benchmark-public"
1431
- target="_blank"
1432
- rel="noopener noreferrer"
1433
- className="text-sm text-slate-500 hover:text-slate-900 flex items-center gap-2 transition-colors"
1434
- >
1435
- <svg className="w-5 h-5" fill="currentColor" viewBox="0 0 24 24">
1436
- <path fillRule="evenodd" d="M12 2C6.477 2 2 6.484 2 12.017c0 4.425 2.865 8.18 6.839 9.504.5.092.682-.217.682-.483 0-.237-.008-.868-.013-1.703-2.782.605-3.369-1.343-3.369-1.343-.454-1.158-1.11-1.466-1.11-1.466-.908-.62.069-.608.069-.608 1.003.07 1.531 1.032 1.531 1.032.892 1.53 2.341 1.088 2.91.832.092-.647.35-1.088.636-1.338-2.22-.253-4.555-1.113-4.555-4.951 0-1.093.39-1.988 1.029-2.688-.103-.253-.446-1.272.098-2.65 0 0 .84-.27 2.75 1.026A9.564 9.564 0 0112 6.844c.85.004 1.705.115 2.504.337 1.909-1.296 2.747-1.027 2.747-1.027.546 1.379.202 2.398.1 2.651.64.7 1.028 1.595 1.028 2.688 0 3.848-2.339 4.695-4.566 4.943.359.309.678.92.678 1.855 0 1.338-.012 2.419-.012 2.747 0 .268.18.58.688.482A10.019 10.019 0 0022 12.017C22 6.484 17.522 2 12 2z" clipRule="evenodd" />
1437
- </svg>
1438
- GitHub
1439
- </a>
1440
- </div>
1441
- </div>
1442
- </nav>
1443
-
1444
- {/* Main Content */}
1445
- <main className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
1446
- <Routes>
1447
- <Route path="/" element={<Leaderboard />} />
1448
- <Route path="/live" element={<LiveDashboard />} />
1449
- <Route path="/tasks" element={<Tasks />} />
1450
- <Route path="/charts" element={<Charts />} />
1451
- <Route path="/task-performance" element={<TaskPerformance />} />
1452
- <Route path="/per-task" element={<PerTaskCharts />} />
1453
- <Route path="*" element={<NotFound />} />
1454
- </Routes>
1455
- </main>
1456
-
1457
- {/* Footer */}
1458
- <footer className="border-t border-slate-200 bg-white mt-12">
1459
- <div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-6">
1460
- <div className="flex items-center justify-between text-sm text-slate-500">
1461
- <span>VibeCodingBench - AI Coding Agent Benchmark</span>
1462
- <span>180 tasks · 14 models · Updated Jan 2026</span>
1463
- </div>
1464
- </div>
1465
- </footer>
1466
- </div>
1467
- );
1468
- }