gsd-trae 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (763) hide show
  1. package/CHANGELOG.md +40 -0
  2. package/README.md +7 -76
  3. package/assets/screenshot.png +0 -0
  4. package/package.json +12 -3
  5. package/.claude/settings.local.json +0 -8
  6. package/.gitmodules +0 -6
  7. package/.trae/project_rules.md +0 -56
  8. package/.trae/rules/project_rules.md +0 -56
  9. package/.vscode/code-counter/code-counter.db +0 -0
  10. package/.vscode/settings.json +0 -5
  11. package/refs/gsd/.github/CODEOWNERS +0 -2
  12. package/refs/gsd/.github/FUNDING.yml +0 -1
  13. package/refs/gsd/.github/ISSUE_TEMPLATE/bug_report.yml +0 -59
  14. package/refs/gsd/.github/ISSUE_TEMPLATE/feature_request.yml +0 -37
  15. package/refs/gsd/.github/pull_request_template.md +0 -24
  16. package/refs/gsd/.github/workflows/auto-label-issues.yml +0 -21
  17. package/refs/gsd/CHANGELOG.md +0 -1520
  18. package/refs/gsd/LICENSE +0 -21
  19. package/refs/gsd/README.md +0 -704
  20. package/refs/gsd/SECURITY.md +0 -33
  21. package/refs/gsd/agents/gsd-codebase-mapper.md +0 -764
  22. package/refs/gsd/agents/gsd-debugger.md +0 -1246
  23. package/refs/gsd/agents/gsd-executor.md +0 -469
  24. package/refs/gsd/agents/gsd-integration-checker.md +0 -443
  25. package/refs/gsd/agents/gsd-phase-researcher.md +0 -546
  26. package/refs/gsd/agents/gsd-plan-checker.md +0 -690
  27. package/refs/gsd/agents/gsd-planner.md +0 -1275
  28. package/refs/gsd/agents/gsd-project-researcher.md +0 -621
  29. package/refs/gsd/agents/gsd-research-synthesizer.md +0 -239
  30. package/refs/gsd/agents/gsd-roadmapper.md +0 -642
  31. package/refs/gsd/agents/gsd-verifier.md +0 -573
  32. package/refs/gsd/assets/gsd-logo-2000-transparent.png +0 -0
  33. package/refs/gsd/assets/gsd-logo-2000-transparent.svg +0 -17
  34. package/refs/gsd/assets/gsd-logo-2000.png +0 -0
  35. package/refs/gsd/assets/gsd-logo-2000.svg +0 -21
  36. package/refs/gsd/assets/terminal.svg +0 -68
  37. package/refs/gsd/bin/install.js +0 -2090
  38. package/refs/gsd/commands/gsd/add-phase.md +0 -43
  39. package/refs/gsd/commands/gsd/add-tests.md +0 -41
  40. package/refs/gsd/commands/gsd/add-todo.md +0 -47
  41. package/refs/gsd/commands/gsd/audit-milestone.md +0 -36
  42. package/refs/gsd/commands/gsd/check-todos.md +0 -45
  43. package/refs/gsd/commands/gsd/cleanup.md +0 -18
  44. package/refs/gsd/commands/gsd/complete-milestone.md +0 -136
  45. package/refs/gsd/commands/gsd/debug.md +0 -167
  46. package/refs/gsd/commands/gsd/discuss-phase.md +0 -83
  47. package/refs/gsd/commands/gsd/execute-phase.md +0 -41
  48. package/refs/gsd/commands/gsd/health.md +0 -22
  49. package/refs/gsd/commands/gsd/help.md +0 -22
  50. package/refs/gsd/commands/gsd/insert-phase.md +0 -32
  51. package/refs/gsd/commands/gsd/join-discord.md +0 -18
  52. package/refs/gsd/commands/gsd/list-phase-assumptions.md +0 -46
  53. package/refs/gsd/commands/gsd/map-codebase.md +0 -71
  54. package/refs/gsd/commands/gsd/new-milestone.md +0 -44
  55. package/refs/gsd/commands/gsd/new-project.md +0 -42
  56. package/refs/gsd/commands/gsd/new-project.md.bak +0 -1041
  57. package/refs/gsd/commands/gsd/pause-work.md +0 -38
  58. package/refs/gsd/commands/gsd/plan-milestone-gaps.md +0 -34
  59. package/refs/gsd/commands/gsd/plan-phase.md +0 -45
  60. package/refs/gsd/commands/gsd/progress.md +0 -24
  61. package/refs/gsd/commands/gsd/quick.md +0 -41
  62. package/refs/gsd/commands/gsd/reapply-patches.md +0 -110
  63. package/refs/gsd/commands/gsd/remove-phase.md +0 -31
  64. package/refs/gsd/commands/gsd/research-phase.md +0 -189
  65. package/refs/gsd/commands/gsd/resume-work.md +0 -40
  66. package/refs/gsd/commands/gsd/set-profile.md +0 -34
  67. package/refs/gsd/commands/gsd/settings.md +0 -36
  68. package/refs/gsd/commands/gsd/update.md +0 -37
  69. package/refs/gsd/commands/gsd/verify-work.md +0 -38
  70. package/refs/gsd/docs/USER-GUIDE.md +0 -471
  71. package/refs/gsd/docs/context-monitor.md +0 -96
  72. package/refs/gsd/get-shit-done/bin/gsd-tools.cjs +0 -585
  73. package/refs/gsd/get-shit-done/bin/lib/commands.cjs +0 -553
  74. package/refs/gsd/get-shit-done/bin/lib/config.cjs +0 -162
  75. package/refs/gsd/get-shit-done/bin/lib/core.cjs +0 -411
  76. package/refs/gsd/get-shit-done/bin/lib/frontmatter.cjs +0 -299
  77. package/refs/gsd/get-shit-done/bin/lib/init.cjs +0 -710
  78. package/refs/gsd/get-shit-done/bin/lib/milestone.cjs +0 -215
  79. package/refs/gsd/get-shit-done/bin/lib/phase.cjs +0 -870
  80. package/refs/gsd/get-shit-done/bin/lib/roadmap.cjs +0 -298
  81. package/refs/gsd/get-shit-done/bin/lib/state.cjs +0 -521
  82. package/refs/gsd/get-shit-done/bin/lib/template.cjs +0 -222
  83. package/refs/gsd/get-shit-done/bin/lib/verify.cjs +0 -772
  84. package/refs/gsd/get-shit-done/references/checkpoints.md +0 -776
  85. package/refs/gsd/get-shit-done/references/continuation-format.md +0 -249
  86. package/refs/gsd/get-shit-done/references/decimal-phase-calculation.md +0 -65
  87. package/refs/gsd/get-shit-done/references/git-integration.md +0 -248
  88. package/refs/gsd/get-shit-done/references/git-planning-commit.md +0 -38
  89. package/refs/gsd/get-shit-done/references/model-profile-resolution.md +0 -34
  90. package/refs/gsd/get-shit-done/references/model-profiles.md +0 -92
  91. package/refs/gsd/get-shit-done/references/phase-argument-parsing.md +0 -61
  92. package/refs/gsd/get-shit-done/references/planning-config.md +0 -196
  93. package/refs/gsd/get-shit-done/references/questioning.md +0 -145
  94. package/refs/gsd/get-shit-done/references/tdd.md +0 -263
  95. package/refs/gsd/get-shit-done/references/ui-brand.md +0 -160
  96. package/refs/gsd/get-shit-done/references/verification-patterns.md +0 -612
  97. package/refs/gsd/get-shit-done/templates/DEBUG.md +0 -164
  98. package/refs/gsd/get-shit-done/templates/UAT.md +0 -247
  99. package/refs/gsd/get-shit-done/templates/VALIDATION.md +0 -76
  100. package/refs/gsd/get-shit-done/templates/codebase/architecture.md +0 -255
  101. package/refs/gsd/get-shit-done/templates/codebase/concerns.md +0 -310
  102. package/refs/gsd/get-shit-done/templates/codebase/conventions.md +0 -307
  103. package/refs/gsd/get-shit-done/templates/codebase/integrations.md +0 -280
  104. package/refs/gsd/get-shit-done/templates/codebase/stack.md +0 -186
  105. package/refs/gsd/get-shit-done/templates/codebase/structure.md +0 -285
  106. package/refs/gsd/get-shit-done/templates/codebase/testing.md +0 -480
  107. package/refs/gsd/get-shit-done/templates/config.json +0 -37
  108. package/refs/gsd/get-shit-done/templates/context.md +0 -283
  109. package/refs/gsd/get-shit-done/templates/continue-here.md +0 -78
  110. package/refs/gsd/get-shit-done/templates/debug-subagent-prompt.md +0 -91
  111. package/refs/gsd/get-shit-done/templates/discovery.md +0 -146
  112. package/refs/gsd/get-shit-done/templates/milestone-archive.md +0 -123
  113. package/refs/gsd/get-shit-done/templates/milestone.md +0 -115
  114. package/refs/gsd/get-shit-done/templates/phase-prompt.md +0 -569
  115. package/refs/gsd/get-shit-done/templates/planner-subagent-prompt.md +0 -117
  116. package/refs/gsd/get-shit-done/templates/project.md +0 -184
  117. package/refs/gsd/get-shit-done/templates/requirements.md +0 -231
  118. package/refs/gsd/get-shit-done/templates/research-project/ARCHITECTURE.md +0 -204
  119. package/refs/gsd/get-shit-done/templates/research-project/FEATURES.md +0 -147
  120. package/refs/gsd/get-shit-done/templates/research-project/PITFALLS.md +0 -200
  121. package/refs/gsd/get-shit-done/templates/research-project/STACK.md +0 -120
  122. package/refs/gsd/get-shit-done/templates/research-project/SUMMARY.md +0 -170
  123. package/refs/gsd/get-shit-done/templates/research.md +0 -552
  124. package/refs/gsd/get-shit-done/templates/retrospective.md +0 -54
  125. package/refs/gsd/get-shit-done/templates/roadmap.md +0 -202
  126. package/refs/gsd/get-shit-done/templates/state.md +0 -176
  127. package/refs/gsd/get-shit-done/templates/summary-complex.md +0 -59
  128. package/refs/gsd/get-shit-done/templates/summary-minimal.md +0 -41
  129. package/refs/gsd/get-shit-done/templates/summary-standard.md +0 -48
  130. package/refs/gsd/get-shit-done/templates/summary.md +0 -248
  131. package/refs/gsd/get-shit-done/templates/user-setup.md +0 -311
  132. package/refs/gsd/get-shit-done/templates/verification-report.md +0 -322
  133. package/refs/gsd/get-shit-done/workflows/add-phase.md +0 -111
  134. package/refs/gsd/get-shit-done/workflows/add-tests.md +0 -350
  135. package/refs/gsd/get-shit-done/workflows/add-todo.md +0 -157
  136. package/refs/gsd/get-shit-done/workflows/audit-milestone.md +0 -297
  137. package/refs/gsd/get-shit-done/workflows/check-todos.md +0 -176
  138. package/refs/gsd/get-shit-done/workflows/cleanup.md +0 -152
  139. package/refs/gsd/get-shit-done/workflows/complete-milestone.md +0 -763
  140. package/refs/gsd/get-shit-done/workflows/diagnose-issues.md +0 -219
  141. package/refs/gsd/get-shit-done/workflows/discovery-phase.md +0 -289
  142. package/refs/gsd/get-shit-done/workflows/discuss-phase.md +0 -542
  143. package/refs/gsd/get-shit-done/workflows/execute-phase.md +0 -449
  144. package/refs/gsd/get-shit-done/workflows/execute-plan.md +0 -448
  145. package/refs/gsd/get-shit-done/workflows/health.md +0 -156
  146. package/refs/gsd/get-shit-done/workflows/help.md +0 -489
  147. package/refs/gsd/get-shit-done/workflows/insert-phase.md +0 -129
  148. package/refs/gsd/get-shit-done/workflows/list-phase-assumptions.md +0 -178
  149. package/refs/gsd/get-shit-done/workflows/map-codebase.md +0 -315
  150. package/refs/gsd/get-shit-done/workflows/new-milestone.md +0 -382
  151. package/refs/gsd/get-shit-done/workflows/new-project.md +0 -1116
  152. package/refs/gsd/get-shit-done/workflows/pause-work.md +0 -122
  153. package/refs/gsd/get-shit-done/workflows/plan-milestone-gaps.md +0 -274
  154. package/refs/gsd/get-shit-done/workflows/plan-phase.md +0 -569
  155. package/refs/gsd/get-shit-done/workflows/progress.md +0 -381
  156. package/refs/gsd/get-shit-done/workflows/quick.md +0 -453
  157. package/refs/gsd/get-shit-done/workflows/remove-phase.md +0 -154
  158. package/refs/gsd/get-shit-done/workflows/research-phase.md +0 -73
  159. package/refs/gsd/get-shit-done/workflows/resume-project.md +0 -306
  160. package/refs/gsd/get-shit-done/workflows/set-profile.md +0 -80
  161. package/refs/gsd/get-shit-done/workflows/settings.md +0 -213
  162. package/refs/gsd/get-shit-done/workflows/transition.md +0 -544
  163. package/refs/gsd/get-shit-done/workflows/update.md +0 -219
  164. package/refs/gsd/get-shit-done/workflows/verify-phase.md +0 -242
  165. package/refs/gsd/get-shit-done/workflows/verify-work.md +0 -569
  166. package/refs/gsd/hooks/gsd-check-update.js +0 -62
  167. package/refs/gsd/hooks/gsd-context-monitor.js +0 -122
  168. package/refs/gsd/hooks/gsd-statusline.js +0 -108
  169. package/refs/gsd/package.json +0 -50
  170. package/refs/gsd/scripts/build-hooks.js +0 -43
  171. package/refs/gsd/tests/commands.test.cjs +0 -661
  172. package/refs/gsd/tests/helpers.cjs +0 -40
  173. package/refs/gsd/tests/init.test.cjs +0 -205
  174. package/refs/gsd/tests/milestone.test.cjs +0 -98
  175. package/refs/gsd/tests/phase.test.cjs +0 -1241
  176. package/refs/gsd/tests/roadmap.test.cjs +0 -265
  177. package/refs/gsd/tests/state.test.cjs +0 -302
  178. package/refs/gsd/tests/verify.test.cjs +0 -80
  179. package/refs/vbenchmark/.agent/agents/codebase-explorer.md +0 -224
  180. package/refs/vbenchmark/.agent/agents/debugger.md +0 -180
  181. package/refs/vbenchmark/.agent/agents/documenter.md +0 -166
  182. package/refs/vbenchmark/.agent/agents/implementer.md +0 -70
  183. package/refs/vbenchmark/.agent/agents/orchestrator.md +0 -212
  184. package/refs/vbenchmark/.agent/agents/researcher.md +0 -80
  185. package/refs/vbenchmark/.agent/agents/reviewer.md +0 -184
  186. package/refs/vbenchmark/.agent/agents/tester.md +0 -170
  187. package/refs/vbenchmark/.agent/commands/commit.md +0 -29
  188. package/refs/vbenchmark/.agent/commands/debug.md +0 -59
  189. package/refs/vbenchmark/.agent/commands/document.md +0 -52
  190. package/refs/vbenchmark/.agent/commands/gather-context.md +0 -58
  191. package/refs/vbenchmark/.agent/commands/init.md +0 -56
  192. package/refs/vbenchmark/.agent/commands/preset-help.md +0 -50
  193. package/refs/vbenchmark/.agent/commands/refactor.md +0 -71
  194. package/refs/vbenchmark/.agent/commands/research.md +0 -37
  195. package/refs/vbenchmark/.agent/commands/review.md +0 -38
  196. package/refs/vbenchmark/.agent/commands/test.md +0 -61
  197. package/refs/vbenchmark/.agent/rules/01-code-quality.md +0 -33
  198. package/refs/vbenchmark/.agent/rules/02-typescript-go.md +0 -46
  199. package/refs/vbenchmark/.agent/rules/03-security-git.md +0 -34
  200. package/refs/vbenchmark/.agent/rules/04-architecture.md +0 -40
  201. package/refs/vbenchmark/.agent/sync.js +0 -536
  202. package/refs/vbenchmark/.agent/workflows/commit.md +0 -29
  203. package/refs/vbenchmark/.agent/workflows/debug.md +0 -59
  204. package/refs/vbenchmark/.agent/workflows/document.md +0 -52
  205. package/refs/vbenchmark/.agent/workflows/gather-context.md +0 -58
  206. package/refs/vbenchmark/.agent/workflows/init.md +0 -56
  207. package/refs/vbenchmark/.agent/workflows/preset-help.md +0 -50
  208. package/refs/vbenchmark/.agent/workflows/refactor.md +0 -71
  209. package/refs/vbenchmark/.agent/workflows/research.md +0 -37
  210. package/refs/vbenchmark/.agent/workflows/review.md +0 -38
  211. package/refs/vbenchmark/.agent/workflows/test.md +0 -61
  212. package/refs/vbenchmark/.claude/commands/agentic-dev/apply.md +0 -222
  213. package/refs/vbenchmark/.claude/commands/agentic-dev/done.md +0 -166
  214. package/refs/vbenchmark/.claude/commands/agentic-dev/proposal.md +0 -220
  215. package/refs/vbenchmark/.claude/commands/openspec/apply.md +0 -23
  216. package/refs/vbenchmark/.claude/commands/openspec/archive.md +0 -27
  217. package/refs/vbenchmark/.claude/commands/openspec/proposal.md +0 -28
  218. package/refs/vbenchmark/.clinerules/01-rules.md +0 -73
  219. package/refs/vbenchmark/.clinerules/02-agents.md +0 -34
  220. package/refs/vbenchmark/.cursor/commands/commit.md +0 -29
  221. package/refs/vbenchmark/.cursor/commands/debug.md +0 -59
  222. package/refs/vbenchmark/.cursor/commands/document.md +0 -52
  223. package/refs/vbenchmark/.cursor/commands/gather-context.md +0 -58
  224. package/refs/vbenchmark/.cursor/commands/init.md +0 -56
  225. package/refs/vbenchmark/.cursor/commands/preset-help.md +0 -50
  226. package/refs/vbenchmark/.cursor/commands/refactor.md +0 -71
  227. package/refs/vbenchmark/.cursor/commands/research.md +0 -37
  228. package/refs/vbenchmark/.cursor/commands/review.md +0 -38
  229. package/refs/vbenchmark/.cursor/commands/test.md +0 -61
  230. package/refs/vbenchmark/.cursor/rules/agents.mdc +0 -1357
  231. package/refs/vbenchmark/.factory/droids/codebase-explorer.md +0 -224
  232. package/refs/vbenchmark/.factory/droids/debugger.md +0 -180
  233. package/refs/vbenchmark/.factory/droids/documenter.md +0 -166
  234. package/refs/vbenchmark/.factory/droids/implementer.md +0 -70
  235. package/refs/vbenchmark/.factory/droids/orchestrator.md +0 -212
  236. package/refs/vbenchmark/.factory/droids/researcher.md +0 -80
  237. package/refs/vbenchmark/.factory/droids/reviewer.md +0 -184
  238. package/refs/vbenchmark/.factory/droids/tester.md +0 -170
  239. package/refs/vbenchmark/.gemini/workflows/commit.md +0 -29
  240. package/refs/vbenchmark/.gemini/workflows/debug.md +0 -59
  241. package/refs/vbenchmark/.gemini/workflows/document.md +0 -52
  242. package/refs/vbenchmark/.gemini/workflows/gather-context.md +0 -58
  243. package/refs/vbenchmark/.gemini/workflows/init.md +0 -56
  244. package/refs/vbenchmark/.gemini/workflows/preset-help.md +0 -50
  245. package/refs/vbenchmark/.gemini/workflows/refactor.md +0 -71
  246. package/refs/vbenchmark/.gemini/workflows/research.md +0 -37
  247. package/refs/vbenchmark/.gemini/workflows/review.md +0 -38
  248. package/refs/vbenchmark/.gemini/workflows/test.md +0 -61
  249. package/refs/vbenchmark/.github/CODEOWNERS +0 -20
  250. package/refs/vbenchmark/.github/FUNDING.yml +0 -4
  251. package/refs/vbenchmark/.github/ISSUE_TEMPLATE/bug-report.yml +0 -76
  252. package/refs/vbenchmark/.github/ISSUE_TEMPLATE/new-task.yml +0 -106
  253. package/refs/vbenchmark/.github/PULL_REQUEST_TEMPLATE.md +0 -38
  254. package/refs/vbenchmark/.github/copilot-instructions.md +0 -73
  255. package/refs/vbenchmark/.github/workflows/ci.yaml +0 -33
  256. package/refs/vbenchmark/.github/workflows/vercel-auto-pr.yml +0 -478
  257. package/refs/vbenchmark/.github/workflows/vercel-deploy.yaml +0 -487
  258. package/refs/vbenchmark/.github/workflows/vercel-pr-command.yaml +0 -337
  259. package/refs/vbenchmark/.github/workflows/vercel-project-init.yaml +0 -208
  260. package/refs/vbenchmark/.opencode/agent/codebase-explorer.md +0 -224
  261. package/refs/vbenchmark/.opencode/agent/debugger.md +0 -180
  262. package/refs/vbenchmark/.opencode/agent/documenter.md +0 -166
  263. package/refs/vbenchmark/.opencode/agent/implementer.md +0 -70
  264. package/refs/vbenchmark/.opencode/agent/orchestrator.md +0 -212
  265. package/refs/vbenchmark/.opencode/agent/researcher.md +0 -80
  266. package/refs/vbenchmark/.opencode/agent/reviewer.md +0 -184
  267. package/refs/vbenchmark/.opencode/agent/tester.md +0 -170
  268. package/refs/vbenchmark/.opencode/command/commit.md +0 -29
  269. package/refs/vbenchmark/.opencode/command/debug.md +0 -59
  270. package/refs/vbenchmark/.opencode/command/document.md +0 -52
  271. package/refs/vbenchmark/.opencode/command/gather-context.md +0 -58
  272. package/refs/vbenchmark/.opencode/command/init.md +0 -56
  273. package/refs/vbenchmark/.opencode/command/preset-help.md +0 -50
  274. package/refs/vbenchmark/.opencode/command/refactor.md +0 -71
  275. package/refs/vbenchmark/.opencode/command/research.md +0 -37
  276. package/refs/vbenchmark/.opencode/command/review.md +0 -38
  277. package/refs/vbenchmark/.opencode/command/test.md +0 -61
  278. package/refs/vbenchmark/.trae/project_rules.md +0 -73
  279. package/refs/vbenchmark/.windsurf/rules/rules.md +0 -85
  280. package/refs/vbenchmark/AGENTS.md +0 -73
  281. package/refs/vbenchmark/CONTRIBUTING.md +0 -332
  282. package/refs/vbenchmark/Caddyfile +0 -3
  283. package/refs/vbenchmark/LICENSE +0 -47
  284. package/refs/vbenchmark/README.md +0 -354
  285. package/refs/vbenchmark/docker-compose.prod.yaml +0 -35
  286. package/refs/vbenchmark/docker-compose.yaml +0 -53
  287. package/refs/vbenchmark/docs/TASK_EXPANSION_PLAN.md +0 -211
  288. package/refs/vbenchmark/docs/THESIS.md +0 -441
  289. package/refs/vbenchmark/docs/categories/code-evolution.md +0 -138
  290. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/design.md +0 -111
  291. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/proposal.md +0 -15
  292. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/evaluation/spec.md +0 -105
  293. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/leaderboard/spec.md +0 -68
  294. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/task-definition/spec.md +0 -45
  295. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/task-runner/spec.md +0 -49
  296. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/tasks.md +0 -413
  297. package/refs/vbenchmark/package.json +0 -51
  298. package/refs/vbenchmark/packages/cli/eslint.config.js +0 -16
  299. package/refs/vbenchmark/packages/cli/package.json +0 -35
  300. package/refs/vbenchmark/packages/cli/src/agents/index.ts +0 -655
  301. package/refs/vbenchmark/packages/cli/src/commands/eval.ts +0 -197
  302. package/refs/vbenchmark/packages/cli/src/commands/list.ts +0 -63
  303. package/refs/vbenchmark/packages/cli/src/commands/run.ts +0 -147
  304. package/refs/vbenchmark/packages/cli/src/evaluator.ts +0 -125
  305. package/refs/vbenchmark/packages/cli/src/index.ts +0 -21
  306. package/refs/vbenchmark/packages/cli/src/lib/task-variation.ts +0 -153
  307. package/refs/vbenchmark/packages/cli/src/loader.ts +0 -258
  308. package/refs/vbenchmark/packages/cli/src/reporter.ts +0 -222
  309. package/refs/vbenchmark/packages/cli/src/runtime/docker.ts +0 -385
  310. package/refs/vbenchmark/packages/cli/tsconfig.json +0 -8
  311. package/refs/vbenchmark/packages/dashboard/Dockerfile +0 -42
  312. package/refs/vbenchmark/packages/dashboard/index.html +0 -21
  313. package/refs/vbenchmark/packages/dashboard/package.json +0 -29
  314. package/refs/vbenchmark/packages/dashboard/postcss.config.js +0 -6
  315. package/refs/vbenchmark/packages/dashboard/public/favicon.svg +0 -24
  316. package/refs/vbenchmark/packages/dashboard/public/logo.png +0 -0
  317. package/refs/vbenchmark/packages/dashboard/public/logo.svg +0 -39
  318. package/refs/vbenchmark/packages/dashboard/src/App.tsx +0 -1468
  319. package/refs/vbenchmark/packages/dashboard/src/data/category-performance.json +0 -1
  320. package/refs/vbenchmark/packages/dashboard/src/data/leaderboard.json +0 -1
  321. package/refs/vbenchmark/packages/dashboard/src/data/task-results.json +0 -1
  322. package/refs/vbenchmark/packages/dashboard/src/data/tasks.json +0 -1
  323. package/refs/vbenchmark/packages/dashboard/src/index.css +0 -3
  324. package/refs/vbenchmark/packages/dashboard/src/main.tsx +0 -13
  325. package/refs/vbenchmark/packages/dashboard/src/vite-env.d.ts +0 -9
  326. package/refs/vbenchmark/packages/dashboard/tailwind.config.js +0 -11
  327. package/refs/vbenchmark/packages/dashboard/tsconfig.json +0 -21
  328. package/refs/vbenchmark/packages/dashboard/tsconfig.node.json +0 -11
  329. package/refs/vbenchmark/packages/dashboard/vercel.json +0 -6
  330. package/refs/vbenchmark/packages/dashboard/vite.config.ts +0 -28
  331. package/refs/vbenchmark/packages/evaluator/eslint.config.js +0 -16
  332. package/refs/vbenchmark/packages/evaluator/package.json +0 -24
  333. package/refs/vbenchmark/packages/evaluator/src/index.ts +0 -15
  334. package/refs/vbenchmark/packages/evaluator/src/runners/functional.ts +0 -88
  335. package/refs/vbenchmark/packages/evaluator/src/runners/quality.ts +0 -140
  336. package/refs/vbenchmark/packages/evaluator/src/runners/security.ts +0 -94
  337. package/refs/vbenchmark/packages/evaluator/src/runners/visual.ts +0 -108
  338. package/refs/vbenchmark/packages/evaluator/src/types.d.ts +0 -19
  339. package/refs/vbenchmark/packages/evaluator/tsconfig.json +0 -8
  340. package/refs/vbenchmark/packages/leaderboard/Dockerfile +0 -38
  341. package/refs/vbenchmark/packages/leaderboard/drizzle.config.ts +0 -10
  342. package/refs/vbenchmark/packages/leaderboard/eslint.config.js +0 -16
  343. package/refs/vbenchmark/packages/leaderboard/fly.toml +0 -29
  344. package/refs/vbenchmark/packages/leaderboard/package.json +0 -36
  345. package/refs/vbenchmark/packages/leaderboard/src/app.ts +0 -29
  346. package/refs/vbenchmark/packages/leaderboard/src/components/BrowserPreview.tsx +0 -190
  347. package/refs/vbenchmark/packages/leaderboard/src/components/ComparisonView.tsx +0 -205
  348. package/refs/vbenchmark/packages/leaderboard/src/components/LeaderboardTable.tsx +0 -150
  349. package/refs/vbenchmark/packages/leaderboard/src/components/LiveRunCard.tsx +0 -133
  350. package/refs/vbenchmark/packages/leaderboard/src/components/SubmissionForm.tsx +0 -406
  351. package/refs/vbenchmark/packages/leaderboard/src/components/SubmitForm.tsx +0 -293
  352. package/refs/vbenchmark/packages/leaderboard/src/components/TerminalStream.tsx +0 -111
  353. package/refs/vbenchmark/packages/leaderboard/src/config/pricing.ts +0 -206
  354. package/refs/vbenchmark/packages/leaderboard/src/db/index.ts +0 -31
  355. package/refs/vbenchmark/packages/leaderboard/src/db/schema.ts +0 -125
  356. package/refs/vbenchmark/packages/leaderboard/src/index.ts +0 -13
  357. package/refs/vbenchmark/packages/leaderboard/src/lib/websocket.ts +0 -124
  358. package/refs/vbenchmark/packages/leaderboard/src/routes/leaderboard.ts +0 -698
  359. package/refs/vbenchmark/packages/leaderboard/src/routes/live.ts +0 -175
  360. package/refs/vbenchmark/packages/leaderboard/src/routes/submissions.ts +0 -183
  361. package/refs/vbenchmark/packages/leaderboard/src/routes/tasks.ts +0 -215
  362. package/refs/vbenchmark/packages/leaderboard/tests/api.test.ts +0 -228
  363. package/refs/vbenchmark/packages/leaderboard/tsconfig.json +0 -9
  364. package/refs/vbenchmark/scripts/deploy.sh +0 -70
  365. package/refs/vbenchmark/tasks/ai-integration/advanced/context-management/PROMPT.md +0 -15
  366. package/refs/vbenchmark/tasks/ai-integration/advanced/context-management/task.yaml +0 -16
  367. package/refs/vbenchmark/tasks/ai-integration/advanced/evaluation-framework/PROMPT.md +0 -15
  368. package/refs/vbenchmark/tasks/ai-integration/advanced/evaluation-framework/task.yaml +0 -16
  369. package/refs/vbenchmark/tasks/ai-integration/advanced/guardrails-safety/PROMPT.md +0 -15
  370. package/refs/vbenchmark/tasks/ai-integration/advanced/guardrails-safety/task.yaml +0 -16
  371. package/refs/vbenchmark/tasks/ai-integration/advanced/memory-system/PROMPT.md +0 -15
  372. package/refs/vbenchmark/tasks/ai-integration/advanced/memory-system/task.yaml +0 -16
  373. package/refs/vbenchmark/tasks/ai-integration/advanced/model-routing/PROMPT.md +0 -15
  374. package/refs/vbenchmark/tasks/ai-integration/advanced/model-routing/task.yaml +0 -16
  375. package/refs/vbenchmark/tasks/ai-integration/advanced/multi-agent-system/PROMPT.md +0 -15
  376. package/refs/vbenchmark/tasks/ai-integration/advanced/multi-agent-system/task.yaml +0 -16
  377. package/refs/vbenchmark/tasks/ai-integration/advanced/prompt-optimization/PROMPT.md +0 -15
  378. package/refs/vbenchmark/tasks/ai-integration/advanced/prompt-optimization/task.yaml +0 -16
  379. package/refs/vbenchmark/tasks/ai-integration/advanced/reasoning-chain/PROMPT.md +0 -15
  380. package/refs/vbenchmark/tasks/ai-integration/advanced/reasoning-chain/task.yaml +0 -16
  381. package/refs/vbenchmark/tasks/ai-integration/advanced/streaming-pipeline/PROMPT.md +0 -15
  382. package/refs/vbenchmark/tasks/ai-integration/advanced/streaming-pipeline/task.yaml +0 -16
  383. package/refs/vbenchmark/tasks/ai-integration/advanced/tool-use-orchestration/PROMPT.md +0 -15
  384. package/refs/vbenchmark/tasks/ai-integration/advanced/tool-use-orchestration/task.yaml +0 -16
  385. package/refs/vbenchmark/tasks/ai-integration/agents/code-review-agent/PROMPT.md +0 -64
  386. package/refs/vbenchmark/tasks/ai-integration/agents/code-review-agent/task.yaml +0 -24
  387. package/refs/vbenchmark/tasks/ai-integration/agents/research-agent/PROMPT.md +0 -61
  388. package/refs/vbenchmark/tasks/ai-integration/agents/research-agent/task.yaml +0 -24
  389. package/refs/vbenchmark/tasks/ai-integration/agents/web-scraper-agent/PROMPT.md +0 -57
  390. package/refs/vbenchmark/tasks/ai-integration/agents/web-scraper-agent/task.yaml +0 -24
  391. package/refs/vbenchmark/tasks/ai-integration/embeddings/duplicate-detection/PROMPT.md +0 -50
  392. package/refs/vbenchmark/tasks/ai-integration/embeddings/duplicate-detection/task.yaml +0 -24
  393. package/refs/vbenchmark/tasks/ai-integration/embeddings/recommendation-engine/PROMPT.md +0 -51
  394. package/refs/vbenchmark/tasks/ai-integration/embeddings/recommendation-engine/task.yaml +0 -24
  395. package/refs/vbenchmark/tasks/ai-integration/embeddings/semantic-search/PROMPT.md +0 -50
  396. package/refs/vbenchmark/tasks/ai-integration/embeddings/semantic-search/task.yaml +0 -24
  397. package/refs/vbenchmark/tasks/ai-integration/fine-tuning/classification-model/PROMPT.md +0 -50
  398. package/refs/vbenchmark/tasks/ai-integration/fine-tuning/classification-model/task.yaml +0 -24
  399. package/refs/vbenchmark/tasks/ai-integration/function-calling/api-orchestrator/PROMPT.md +0 -60
  400. package/refs/vbenchmark/tasks/ai-integration/function-calling/api-orchestrator/task.yaml +0 -24
  401. package/refs/vbenchmark/tasks/ai-integration/function-calling/calendar-assistant/PROMPT.md +0 -50
  402. package/refs/vbenchmark/tasks/ai-integration/function-calling/calendar-assistant/task.yaml +0 -24
  403. package/refs/vbenchmark/tasks/ai-integration/function-calling/database-query/PROMPT.md +0 -62
  404. package/refs/vbenchmark/tasks/ai-integration/function-calling/database-query/task.yaml +0 -24
  405. package/refs/vbenchmark/tasks/ai-integration/multimodal/chart-interpreter/PROMPT.md +0 -60
  406. package/refs/vbenchmark/tasks/ai-integration/multimodal/chart-interpreter/task.yaml +0 -24
  407. package/refs/vbenchmark/tasks/ai-integration/multimodal/image-captioning/PROMPT.md +0 -49
  408. package/refs/vbenchmark/tasks/ai-integration/multimodal/image-captioning/task.yaml +0 -24
  409. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/code-assistant/PROMPT.md +0 -51
  410. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/code-assistant/task.yaml +0 -24
  411. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/doc-search/PROMPT.md +0 -51
  412. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/doc-search/task.yaml +0 -24
  413. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/PROMPT.md +0 -76
  414. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/docker-compose.yaml +0 -30
  415. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/task.yaml +0 -30
  416. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/tests/functional/qa.test.py +0 -146
  417. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/support-bot/PROMPT.md +0 -51
  418. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/support-bot/task.yaml +0 -24
  419. package/refs/vbenchmark/tasks/ai-integration/structured-output/contract-analyzer/PROMPT.md +0 -67
  420. package/refs/vbenchmark/tasks/ai-integration/structured-output/contract-analyzer/task.yaml +0 -24
  421. package/refs/vbenchmark/tasks/ai-integration/structured-output/invoice-parser/PROMPT.md +0 -61
  422. package/refs/vbenchmark/tasks/ai-integration/structured-output/invoice-parser/task.yaml +0 -27
  423. package/refs/vbenchmark/tasks/ai-integration/structured-output/receipt-scanner/PROMPT.md +0 -65
  424. package/refs/vbenchmark/tasks/ai-integration/structured-output/receipt-scanner/task.yaml +0 -24
  425. package/refs/vbenchmark/tasks/ai-integration/structured-output/resume-parser/PROMPT.md +0 -70
  426. package/refs/vbenchmark/tasks/ai-integration/structured-output/resume-parser/task.yaml +0 -24
  427. package/refs/vbenchmark/tasks/api-integrations/advanced/api-analytics/PROMPT.md +0 -15
  428. package/refs/vbenchmark/tasks/api-integrations/advanced/api-analytics/task.yaml +0 -16
  429. package/refs/vbenchmark/tasks/api-integrations/advanced/api-gateway/PROMPT.md +0 -15
  430. package/refs/vbenchmark/tasks/api-integrations/advanced/api-gateway/task.yaml +0 -16
  431. package/refs/vbenchmark/tasks/api-integrations/advanced/api-mocking/PROMPT.md +0 -15
  432. package/refs/vbenchmark/tasks/api-integrations/advanced/api-mocking/task.yaml +0 -16
  433. package/refs/vbenchmark/tasks/api-integrations/advanced/contract-testing/PROMPT.md +0 -15
  434. package/refs/vbenchmark/tasks/api-integrations/advanced/contract-testing/task.yaml +0 -16
  435. package/refs/vbenchmark/tasks/api-integrations/advanced/graphql-federation/PROMPT.md +0 -15
  436. package/refs/vbenchmark/tasks/api-integrations/advanced/graphql-federation/task.yaml +0 -16
  437. package/refs/vbenchmark/tasks/api-integrations/advanced/grpc-gateway/PROMPT.md +0 -15
  438. package/refs/vbenchmark/tasks/api-integrations/advanced/grpc-gateway/task.yaml +0 -16
  439. package/refs/vbenchmark/tasks/api-integrations/advanced/rate-limiter/PROMPT.md +0 -15
  440. package/refs/vbenchmark/tasks/api-integrations/advanced/rate-limiter/task.yaml +0 -16
  441. package/refs/vbenchmark/tasks/api-integrations/advanced/request-validator/PROMPT.md +0 -15
  442. package/refs/vbenchmark/tasks/api-integrations/advanced/request-validator/task.yaml +0 -16
  443. package/refs/vbenchmark/tasks/api-integrations/advanced/sdk-generator/PROMPT.md +0 -15
  444. package/refs/vbenchmark/tasks/api-integrations/advanced/sdk-generator/task.yaml +0 -16
  445. package/refs/vbenchmark/tasks/api-integrations/advanced/webhook-processor/PROMPT.md +0 -15
  446. package/refs/vbenchmark/tasks/api-integrations/advanced/webhook-processor/task.yaml +0 -16
  447. package/refs/vbenchmark/tasks/api-integrations/analytics/mixpanel-events/PROMPT.md +0 -42
  448. package/refs/vbenchmark/tasks/api-integrations/analytics/mixpanel-events/task.yaml +0 -24
  449. package/refs/vbenchmark/tasks/api-integrations/analytics/segment-tracking/PROMPT.md +0 -42
  450. package/refs/vbenchmark/tasks/api-integrations/analytics/segment-tracking/task.yaml +0 -24
  451. package/refs/vbenchmark/tasks/api-integrations/auth-provider/oauth2-github/PROMPT.md +0 -42
  452. package/refs/vbenchmark/tasks/api-integrations/auth-provider/oauth2-github/task.yaml +0 -24
  453. package/refs/vbenchmark/tasks/api-integrations/auth-provider/okta-integration/PROMPT.md +0 -44
  454. package/refs/vbenchmark/tasks/api-integrations/auth-provider/okta-integration/task.yaml +0 -24
  455. package/refs/vbenchmark/tasks/api-integrations/auth-provider/saml-sso/PROMPT.md +0 -42
  456. package/refs/vbenchmark/tasks/api-integrations/auth-provider/saml-sso/task.yaml +0 -24
  457. package/refs/vbenchmark/tasks/api-integrations/communication/discord-webhook/PROMPT.md +0 -44
  458. package/refs/vbenchmark/tasks/api-integrations/communication/discord-webhook/task.yaml +0 -24
  459. package/refs/vbenchmark/tasks/api-integrations/communication/slack-bot/PROMPT.md +0 -42
  460. package/refs/vbenchmark/tasks/api-integrations/communication/slack-bot/task.yaml +0 -24
  461. package/refs/vbenchmark/tasks/api-integrations/communication/twilio-sms/PROMPT.md +0 -42
  462. package/refs/vbenchmark/tasks/api-integrations/communication/twilio-sms/task.yaml +0 -24
  463. package/refs/vbenchmark/tasks/api-integrations/email/transactional/PROMPT.md +0 -82
  464. package/refs/vbenchmark/tasks/api-integrations/email/transactional/task.yaml +0 -27
  465. package/refs/vbenchmark/tasks/api-integrations/maps/google-maps-geocoding/PROMPT.md +0 -41
  466. package/refs/vbenchmark/tasks/api-integrations/maps/google-maps-geocoding/task.yaml +0 -24
  467. package/refs/vbenchmark/tasks/api-integrations/maps/mapbox-directions/PROMPT.md +0 -41
  468. package/refs/vbenchmark/tasks/api-integrations/maps/mapbox-directions/task.yaml +0 -24
  469. package/refs/vbenchmark/tasks/api-integrations/payment/crypto-payments/PROMPT.md +0 -43
  470. package/refs/vbenchmark/tasks/api-integrations/payment/crypto-payments/task.yaml +0 -24
  471. package/refs/vbenchmark/tasks/api-integrations/payment/paypal-integration/PROMPT.md +0 -41
  472. package/refs/vbenchmark/tasks/api-integrations/payment/paypal-integration/task.yaml +0 -24
  473. package/refs/vbenchmark/tasks/api-integrations/social/twitter-api/PROMPT.md +0 -41
  474. package/refs/vbenchmark/tasks/api-integrations/social/twitter-api/task.yaml +0 -24
  475. package/refs/vbenchmark/tasks/api-integrations/storage/cloudinary-upload/PROMPT.md +0 -43
  476. package/refs/vbenchmark/tasks/api-integrations/storage/cloudinary-upload/task.yaml +0 -24
  477. package/refs/vbenchmark/tasks/api-integrations/storage/gcs-streaming/PROMPT.md +0 -43
  478. package/refs/vbenchmark/tasks/api-integrations/storage/gcs-streaming/task.yaml +0 -24
  479. package/refs/vbenchmark/tasks/api-integrations/storage/s3-presigned-urls/PROMPT.md +0 -41
  480. package/refs/vbenchmark/tasks/api-integrations/storage/s3-presigned-urls/task.yaml +0 -24
  481. package/refs/vbenchmark/tasks/api-integrations/stripe/checkout-session/PROMPT.md +0 -41
  482. package/refs/vbenchmark/tasks/api-integrations/stripe/checkout-session/task.yaml +0 -24
  483. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/PROMPT.md +0 -60
  484. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/docker-compose.yaml +0 -38
  485. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/task.yaml +0 -31
  486. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/tests/webhook.test.ts +0 -193
  487. package/refs/vbenchmark/tasks/api-integrations/stripe/subscription-portal/PROMPT.md +0 -41
  488. package/refs/vbenchmark/tasks/api-integrations/stripe/subscription-portal/task.yaml +0 -24
  489. package/refs/vbenchmark/tasks/code-evolution/advanced/api-deprecation/PROMPT.md +0 -15
  490. package/refs/vbenchmark/tasks/code-evolution/advanced/api-deprecation/task.yaml +0 -16
  491. package/refs/vbenchmark/tasks/code-evolution/advanced/ast-refactoring/PROMPT.md +0 -15
  492. package/refs/vbenchmark/tasks/code-evolution/advanced/ast-refactoring/task.yaml +0 -16
  493. package/refs/vbenchmark/tasks/code-evolution/advanced/concurrency-fix/PROMPT.md +0 -15
  494. package/refs/vbenchmark/tasks/code-evolution/advanced/concurrency-fix/task.yaml +0 -16
  495. package/refs/vbenchmark/tasks/code-evolution/advanced/database-schema-migration/PROMPT.md +0 -15
  496. package/refs/vbenchmark/tasks/code-evolution/advanced/database-schema-migration/task.yaml +0 -16
  497. package/refs/vbenchmark/tasks/code-evolution/advanced/dead-code-elimination/PROMPT.md +0 -15
  498. package/refs/vbenchmark/tasks/code-evolution/advanced/dead-code-elimination/task.yaml +0 -16
  499. package/refs/vbenchmark/tasks/code-evolution/advanced/dependency-upgrade/PROMPT.md +0 -15
  500. package/refs/vbenchmark/tasks/code-evolution/advanced/dependency-upgrade/task.yaml +0 -16
  501. package/refs/vbenchmark/tasks/code-evolution/advanced/memory-optimization/PROMPT.md +0 -15
  502. package/refs/vbenchmark/tasks/code-evolution/advanced/memory-optimization/task.yaml +0 -16
  503. package/refs/vbenchmark/tasks/code-evolution/advanced/monorepo-extraction/PROMPT.md +0 -15
  504. package/refs/vbenchmark/tasks/code-evolution/advanced/monorepo-extraction/task.yaml +0 -16
  505. package/refs/vbenchmark/tasks/code-evolution/advanced/performance-profiling/PROMPT.md +0 -15
  506. package/refs/vbenchmark/tasks/code-evolution/advanced/performance-profiling/task.yaml +0 -16
  507. package/refs/vbenchmark/tasks/code-evolution/advanced/type-migration/PROMPT.md +0 -15
  508. package/refs/vbenchmark/tasks/code-evolution/advanced/type-migration/task.yaml +0 -16
  509. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/callback-to-async/PROMPT.md +0 -47
  510. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/callback-to-async/task.yaml +0 -24
  511. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/PROMPT.md +0 -49
  512. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/base-code/src/app.ts +0 -22
  513. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/task.yaml +0 -37
  514. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/tests/api.test.ts +0 -70
  515. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/flask-to-fastapi/PROMPT.md +0 -46
  516. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/flask-to-fastapi/task.yaml +0 -24
  517. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/java-to-kotlin/PROMPT.md +0 -45
  518. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/java-to-kotlin/task.yaml +0 -24
  519. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/jquery-to-react/PROMPT.md +0 -47
  520. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/jquery-to-react/task.yaml +0 -24
  521. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/rest-to-grpc/PROMPT.md +0 -47
  522. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/rest-to-grpc/task.yaml +0 -24
  523. package/refs/vbenchmark/tasks/code-evolution/performance/async-refactor/PROMPT.md +0 -47
  524. package/refs/vbenchmark/tasks/code-evolution/performance/async-refactor/task.yaml +0 -24
  525. package/refs/vbenchmark/tasks/code-evolution/performance/memory-leak-fix/PROMPT.md +0 -47
  526. package/refs/vbenchmark/tasks/code-evolution/performance/memory-leak-fix/task.yaml +0 -24
  527. package/refs/vbenchmark/tasks/code-evolution/performance/query-optimization/PROMPT.md +0 -49
  528. package/refs/vbenchmark/tasks/code-evolution/performance/query-optimization/task.yaml +0 -24
  529. package/refs/vbenchmark/tasks/code-evolution/refactoring/class-to-hooks/PROMPT.md +0 -96
  530. package/refs/vbenchmark/tasks/code-evolution/refactoring/class-to-hooks/task.yaml +0 -27
  531. package/refs/vbenchmark/tasks/code-evolution/refactoring/dependency-injection/PROMPT.md +0 -47
  532. package/refs/vbenchmark/tasks/code-evolution/refactoring/dependency-injection/task.yaml +0 -24
  533. package/refs/vbenchmark/tasks/code-evolution/refactoring/error-handling/PROMPT.md +0 -48
  534. package/refs/vbenchmark/tasks/code-evolution/refactoring/error-handling/task.yaml +0 -24
  535. package/refs/vbenchmark/tasks/code-evolution/refactoring/monolith-to-modules/PROMPT.md +0 -50
  536. package/refs/vbenchmark/tasks/code-evolution/refactoring/monolith-to-modules/task.yaml +0 -24
  537. package/refs/vbenchmark/tasks/code-evolution/refactoring/orm-migration/PROMPT.md +0 -47
  538. package/refs/vbenchmark/tasks/code-evolution/refactoring/orm-migration/task.yaml +0 -24
  539. package/refs/vbenchmark/tasks/code-evolution/security/secrets-rotation/PROMPT.md +0 -49
  540. package/refs/vbenchmark/tasks/code-evolution/security/secrets-rotation/task.yaml +0 -24
  541. package/refs/vbenchmark/tasks/code-evolution/security/sql-injection-fix/PROMPT.md +0 -50
  542. package/refs/vbenchmark/tasks/code-evolution/security/sql-injection-fix/task.yaml +0 -24
  543. package/refs/vbenchmark/tasks/code-evolution/security/xss-prevention/PROMPT.md +0 -47
  544. package/refs/vbenchmark/tasks/code-evolution/security/xss-prevention/task.yaml +0 -24
  545. package/refs/vbenchmark/tasks/code-evolution/testing/add-unit-tests/PROMPT.md +0 -48
  546. package/refs/vbenchmark/tasks/code-evolution/testing/add-unit-tests/task.yaml +0 -24
  547. package/refs/vbenchmark/tasks/code-evolution/testing/e2e-playwright/PROMPT.md +0 -50
  548. package/refs/vbenchmark/tasks/code-evolution/testing/e2e-playwright/task.yaml +0 -24
  549. package/refs/vbenchmark/tasks/code-evolution/testing/pytest-fixtures/PROMPT.md +0 -47
  550. package/refs/vbenchmark/tasks/code-evolution/testing/pytest-fixtures/task.yaml +0 -24
  551. package/refs/vbenchmark/tasks/frontend/accessibility/keyboard-shortcuts/PROMPT.md +0 -44
  552. package/refs/vbenchmark/tasks/frontend/accessibility/keyboard-shortcuts/task.yaml +0 -24
  553. package/refs/vbenchmark/tasks/frontend/accessibility/screen-reader-nav/PROMPT.md +0 -44
  554. package/refs/vbenchmark/tasks/frontend/accessibility/screen-reader-nav/task.yaml +0 -24
  555. package/refs/vbenchmark/tasks/frontend/advanced/canvas-editor/PROMPT.md +0 -15
  556. package/refs/vbenchmark/tasks/frontend/advanced/canvas-editor/task.yaml +0 -16
  557. package/refs/vbenchmark/tasks/frontend/advanced/micro-frontend/PROMPT.md +0 -15
  558. package/refs/vbenchmark/tasks/frontend/advanced/micro-frontend/task.yaml +0 -16
  559. package/refs/vbenchmark/tasks/frontend/advanced/offline-first/PROMPT.md +0 -15
  560. package/refs/vbenchmark/tasks/frontend/advanced/offline-first/task.yaml +0 -16
  561. package/refs/vbenchmark/tasks/frontend/advanced/realtime-collab/PROMPT.md +0 -15
  562. package/refs/vbenchmark/tasks/frontend/advanced/realtime-collab/task.yaml +0 -16
  563. package/refs/vbenchmark/tasks/frontend/advanced/service-worker/PROMPT.md +0 -15
  564. package/refs/vbenchmark/tasks/frontend/advanced/service-worker/task.yaml +0 -16
  565. package/refs/vbenchmark/tasks/frontend/advanced/state-machine/PROMPT.md +0 -15
  566. package/refs/vbenchmark/tasks/frontend/advanced/state-machine/task.yaml +0 -16
  567. package/refs/vbenchmark/tasks/frontend/advanced/virtual-list/PROMPT.md +0 -15
  568. package/refs/vbenchmark/tasks/frontend/advanced/virtual-list/task.yaml +0 -16
  569. package/refs/vbenchmark/tasks/frontend/advanced/wasm-integration/PROMPT.md +0 -15
  570. package/refs/vbenchmark/tasks/frontend/advanced/wasm-integration/task.yaml +0 -16
  571. package/refs/vbenchmark/tasks/frontend/advanced/web-worker/PROMPT.md +0 -15
  572. package/refs/vbenchmark/tasks/frontend/advanced/web-worker/task.yaml +0 -16
  573. package/refs/vbenchmark/tasks/frontend/advanced/webgl-visualization/PROMPT.md +0 -15
  574. package/refs/vbenchmark/tasks/frontend/advanced/webgl-visualization/task.yaml +0 -16
  575. package/refs/vbenchmark/tasks/frontend/animation/page-transitions/PROMPT.md +0 -44
  576. package/refs/vbenchmark/tasks/frontend/animation/page-transitions/task.yaml +0 -24
  577. package/refs/vbenchmark/tasks/frontend/components/data-grid/PROMPT.md +0 -59
  578. package/refs/vbenchmark/tasks/frontend/components/data-grid/task.yaml +0 -24
  579. package/refs/vbenchmark/tasks/frontend/components/date-range-picker/PROMPT.md +0 -57
  580. package/refs/vbenchmark/tasks/frontend/components/date-range-picker/task.yaml +0 -24
  581. package/refs/vbenchmark/tasks/frontend/components/file-uploader/PROMPT.md +0 -55
  582. package/refs/vbenchmark/tasks/frontend/components/file-uploader/task.yaml +0 -24
  583. package/refs/vbenchmark/tasks/frontend/components/form-builder/PROMPT.md +0 -96
  584. package/refs/vbenchmark/tasks/frontend/components/form-builder/task.yaml +0 -28
  585. package/refs/vbenchmark/tasks/frontend/components/rich-text-editor/PROMPT.md +0 -45
  586. package/refs/vbenchmark/tasks/frontend/components/rich-text-editor/task.yaml +0 -24
  587. package/refs/vbenchmark/tasks/frontend/figma-to-code/dashboard-layout/PROMPT.md +0 -50
  588. package/refs/vbenchmark/tasks/frontend/figma-to-code/dashboard-layout/task.yaml +0 -25
  589. package/refs/vbenchmark/tasks/frontend/figma-to-code/landing-page/PROMPT.md +0 -49
  590. package/refs/vbenchmark/tasks/frontend/figma-to-code/landing-page/task.yaml +0 -25
  591. package/refs/vbenchmark/tasks/frontend/figma-to-code/mobile-app-screen/PROMPT.md +0 -51
  592. package/refs/vbenchmark/tasks/frontend/figma-to-code/mobile-app-screen/task.yaml +0 -24
  593. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/PROMPT.md +0 -93
  594. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/docker-compose.yaml +0 -23
  595. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/task.yaml +0 -30
  596. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/tests/visual/diff.test.ts +0 -107
  597. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/tests/visual/interaction.test.ts +0 -88
  598. package/refs/vbenchmark/tasks/frontend/performance/image-lazy-load/PROMPT.md +0 -43
  599. package/refs/vbenchmark/tasks/frontend/performance/image-lazy-load/task.yaml +0 -24
  600. package/refs/vbenchmark/tasks/frontend/performance/infinite-scroll/PROMPT.md +0 -44
  601. package/refs/vbenchmark/tasks/frontend/performance/infinite-scroll/task.yaml +0 -24
  602. package/refs/vbenchmark/tasks/frontend/state-management/collaborative-editor/PROMPT.md +0 -44
  603. package/refs/vbenchmark/tasks/frontend/state-management/collaborative-editor/task.yaml +0 -24
  604. package/refs/vbenchmark/tasks/frontend/state-management/shopping-cart/PROMPT.md +0 -53
  605. package/refs/vbenchmark/tasks/frontend/state-management/shopping-cart/task.yaml +0 -24
  606. package/refs/vbenchmark/tasks/frontend/visualization/chart-dashboard/PROMPT.md +0 -83
  607. package/refs/vbenchmark/tasks/frontend/visualization/chart-dashboard/task.yaml +0 -28
  608. package/refs/vbenchmark/tasks/frontend/visualization/gantt-chart/PROMPT.md +0 -57
  609. package/refs/vbenchmark/tasks/frontend/visualization/gantt-chart/task.yaml +0 -24
  610. package/refs/vbenchmark/tasks/frontend/visualization/map-dashboard/PROMPT.md +0 -44
  611. package/refs/vbenchmark/tasks/frontend/visualization/map-dashboard/task.yaml +0 -24
  612. package/refs/vbenchmark/tasks/frontend/visualization/realtime-charts/PROMPT.md +0 -43
  613. package/refs/vbenchmark/tasks/frontend/visualization/realtime-charts/task.yaml +0 -24
  614. package/refs/vbenchmark/tasks/glue-code/advanced/blue-green-deploy/PROMPT.md +0 -15
  615. package/refs/vbenchmark/tasks/glue-code/advanced/blue-green-deploy/task.yaml +0 -16
  616. package/refs/vbenchmark/tasks/glue-code/advanced/canary-release/PROMPT.md +0 -15
  617. package/refs/vbenchmark/tasks/glue-code/advanced/canary-release/task.yaml +0 -16
  618. package/refs/vbenchmark/tasks/glue-code/advanced/change-data-capture/PROMPT.md +0 -15
  619. package/refs/vbenchmark/tasks/glue-code/advanced/change-data-capture/task.yaml +0 -16
  620. package/refs/vbenchmark/tasks/glue-code/advanced/config-management/PROMPT.md +0 -15
  621. package/refs/vbenchmark/tasks/glue-code/advanced/config-management/task.yaml +0 -16
  622. package/refs/vbenchmark/tasks/glue-code/advanced/data-pipeline/PROMPT.md +0 -15
  623. package/refs/vbenchmark/tasks/glue-code/advanced/data-pipeline/task.yaml +0 -16
  624. package/refs/vbenchmark/tasks/glue-code/advanced/distributed-tracing/PROMPT.md +0 -15
  625. package/refs/vbenchmark/tasks/glue-code/advanced/distributed-tracing/task.yaml +0 -16
  626. package/refs/vbenchmark/tasks/glue-code/advanced/log-aggregation/PROMPT.md +0 -15
  627. package/refs/vbenchmark/tasks/glue-code/advanced/log-aggregation/task.yaml +0 -16
  628. package/refs/vbenchmark/tasks/glue-code/advanced/schema-registry/PROMPT.md +0 -15
  629. package/refs/vbenchmark/tasks/glue-code/advanced/schema-registry/task.yaml +0 -16
  630. package/refs/vbenchmark/tasks/glue-code/advanced/secret-rotation/PROMPT.md +0 -15
  631. package/refs/vbenchmark/tasks/glue-code/advanced/secret-rotation/task.yaml +0 -16
  632. package/refs/vbenchmark/tasks/glue-code/advanced/stream-processing/PROMPT.md +0 -15
  633. package/refs/vbenchmark/tasks/glue-code/advanced/stream-processing/task.yaml +0 -16
  634. package/refs/vbenchmark/tasks/glue-code/api-sync/rest-to-graphql/PROMPT.md +0 -66
  635. package/refs/vbenchmark/tasks/glue-code/api-sync/rest-to-graphql/task.yaml +0 -27
  636. package/refs/vbenchmark/tasks/glue-code/caching/redis-cache/PROMPT.md +0 -82
  637. package/refs/vbenchmark/tasks/glue-code/caching/redis-cache/task.yaml +0 -27
  638. package/refs/vbenchmark/tasks/glue-code/data-transform/avro-schema-evolution/PROMPT.md +0 -51
  639. package/refs/vbenchmark/tasks/glue-code/data-transform/avro-schema-evolution/task.yaml +0 -24
  640. package/refs/vbenchmark/tasks/glue-code/data-transform/csv-normalizer/PROMPT.md +0 -49
  641. package/refs/vbenchmark/tasks/glue-code/data-transform/csv-normalizer/task.yaml +0 -24
  642. package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/PROMPT.md +0 -67
  643. package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/task.yaml +0 -28
  644. package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/tests/transform.test.py +0 -137
  645. package/refs/vbenchmark/tasks/glue-code/data-transform/json-to-xml/PROMPT.md +0 -45
  646. package/refs/vbenchmark/tasks/glue-code/data-transform/json-to-xml/task.yaml +0 -24
  647. package/refs/vbenchmark/tasks/glue-code/data-transform/protobuf-converter/PROMPT.md +0 -44
  648. package/refs/vbenchmark/tasks/glue-code/data-transform/protobuf-converter/task.yaml +0 -24
  649. package/refs/vbenchmark/tasks/glue-code/etl/cdc-pipeline/PROMPT.md +0 -52
  650. package/refs/vbenchmark/tasks/glue-code/etl/cdc-pipeline/task.yaml +0 -27
  651. package/refs/vbenchmark/tasks/glue-code/etl/database-sync/PROMPT.md +0 -51
  652. package/refs/vbenchmark/tasks/glue-code/etl/database-sync/task.yaml +0 -24
  653. package/refs/vbenchmark/tasks/glue-code/etl/s3-to-warehouse/PROMPT.md +0 -50
  654. package/refs/vbenchmark/tasks/glue-code/etl/s3-to-warehouse/task.yaml +0 -24
  655. package/refs/vbenchmark/tasks/glue-code/file-processing/image-resizer/PROMPT.md +0 -52
  656. package/refs/vbenchmark/tasks/glue-code/file-processing/image-resizer/task.yaml +0 -24
  657. package/refs/vbenchmark/tasks/glue-code/file-processing/pdf-merger/PROMPT.md +0 -50
  658. package/refs/vbenchmark/tasks/glue-code/file-processing/pdf-merger/task.yaml +0 -24
  659. package/refs/vbenchmark/tasks/glue-code/file-processing/video-transcoder/PROMPT.md +0 -50
  660. package/refs/vbenchmark/tasks/glue-code/file-processing/video-transcoder/task.yaml +0 -27
  661. package/refs/vbenchmark/tasks/glue-code/migration/data-backfill/PROMPT.md +0 -50
  662. package/refs/vbenchmark/tasks/glue-code/migration/data-backfill/task.yaml +0 -24
  663. package/refs/vbenchmark/tasks/glue-code/migration/database-versioning/PROMPT.md +0 -50
  664. package/refs/vbenchmark/tasks/glue-code/migration/database-versioning/task.yaml +0 -24
  665. package/refs/vbenchmark/tasks/glue-code/queue/kafka-producer/PROMPT.md +0 -49
  666. package/refs/vbenchmark/tasks/glue-code/queue/kafka-producer/task.yaml +0 -27
  667. package/refs/vbenchmark/tasks/glue-code/queue/rabbitmq-consumer/PROMPT.md +0 -50
  668. package/refs/vbenchmark/tasks/glue-code/queue/rabbitmq-consumer/task.yaml +0 -27
  669. package/refs/vbenchmark/tasks/glue-code/queue/sqs-batch-processor/PROMPT.md +0 -47
  670. package/refs/vbenchmark/tasks/glue-code/queue/sqs-batch-processor/task.yaml +0 -24
  671. package/refs/vbenchmark/tasks/glue-code/scheduler/cron-job-manager/PROMPT.md +0 -52
  672. package/refs/vbenchmark/tasks/glue-code/scheduler/cron-job-manager/task.yaml +0 -27
  673. package/refs/vbenchmark/tasks/glue-code/scheduler/delayed-tasks/PROMPT.md +0 -51
  674. package/refs/vbenchmark/tasks/glue-code/scheduler/delayed-tasks/task.yaml +0 -27
  675. package/refs/vbenchmark/tasks/saas-core/advanced/api-versioning/PROMPT.md +0 -15
  676. package/refs/vbenchmark/tasks/saas-core/advanced/api-versioning/task.yaml +0 -16
  677. package/refs/vbenchmark/tasks/saas-core/advanced/circuit-breaker/PROMPT.md +0 -13
  678. package/refs/vbenchmark/tasks/saas-core/advanced/circuit-breaker/task.yaml +0 -16
  679. package/refs/vbenchmark/tasks/saas-core/advanced/compliance-gdpr/PROMPT.md +0 -15
  680. package/refs/vbenchmark/tasks/saas-core/advanced/compliance-gdpr/task.yaml +0 -16
  681. package/refs/vbenchmark/tasks/saas-core/advanced/cqrs-pattern/PROMPT.md +0 -13
  682. package/refs/vbenchmark/tasks/saas-core/advanced/cqrs-pattern/task.yaml +0 -16
  683. package/refs/vbenchmark/tasks/saas-core/advanced/data-encryption/PROMPT.md +0 -15
  684. package/refs/vbenchmark/tasks/saas-core/advanced/data-encryption/task.yaml +0 -16
  685. package/refs/vbenchmark/tasks/saas-core/advanced/distributed-locking/PROMPT.md +0 -46
  686. package/refs/vbenchmark/tasks/saas-core/advanced/distributed-locking/task.yaml +0 -24
  687. package/refs/vbenchmark/tasks/saas-core/advanced/event-sourcing/PROMPT.md +0 -23
  688. package/refs/vbenchmark/tasks/saas-core/advanced/event-sourcing/task.yaml +0 -16
  689. package/refs/vbenchmark/tasks/saas-core/advanced/feature-flags-ab/PROMPT.md +0 -15
  690. package/refs/vbenchmark/tasks/saas-core/advanced/feature-flags-ab/task.yaml +0 -16
  691. package/refs/vbenchmark/tasks/saas-core/advanced/saga-orchestration/PROMPT.md +0 -13
  692. package/refs/vbenchmark/tasks/saas-core/advanced/saga-orchestration/task.yaml +0 -16
  693. package/refs/vbenchmark/tasks/saas-core/advanced/webhook-delivery/PROMPT.md +0 -15
  694. package/refs/vbenchmark/tasks/saas-core/advanced/webhook-delivery/task.yaml +0 -16
  695. package/refs/vbenchmark/tasks/saas-core/audit/activity-logging/PROMPT.md +0 -50
  696. package/refs/vbenchmark/tasks/saas-core/audit/activity-logging/task.yaml +0 -27
  697. package/refs/vbenchmark/tasks/saas-core/auth/jwt-refresh-tokens/PROMPT.md +0 -50
  698. package/refs/vbenchmark/tasks/saas-core/auth/jwt-refresh-tokens/task.yaml +0 -27
  699. package/refs/vbenchmark/tasks/saas-core/auth/magic-link-email/PROMPT.md +0 -53
  700. package/refs/vbenchmark/tasks/saas-core/auth/magic-link-email/task.yaml +0 -27
  701. package/refs/vbenchmark/tasks/saas-core/auth/mfa-totp/PROMPT.md +0 -79
  702. package/refs/vbenchmark/tasks/saas-core/auth/mfa-totp/task.yaml +0 -27
  703. package/refs/vbenchmark/tasks/saas-core/auth/rbac-permissions/PROMPT.md +0 -51
  704. package/refs/vbenchmark/tasks/saas-core/auth/rbac-permissions/task.yaml +0 -27
  705. package/refs/vbenchmark/tasks/saas-core/auth/session-management/PROMPT.md +0 -52
  706. package/refs/vbenchmark/tasks/saas-core/auth/session-management/task.yaml +0 -27
  707. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/PROMPT.md +0 -45
  708. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/docker-compose.yaml +0 -47
  709. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/task.yaml +0 -32
  710. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/tests/auth.test.ts +0 -59
  711. package/refs/vbenchmark/tasks/saas-core/billing/invoice-generation/PROMPT.md +0 -53
  712. package/refs/vbenchmark/tasks/saas-core/billing/invoice-generation/task.yaml +0 -27
  713. package/refs/vbenchmark/tasks/saas-core/billing/stripe-subscriptions/PROMPT.md +0 -51
  714. package/refs/vbenchmark/tasks/saas-core/billing/stripe-subscriptions/task.yaml +0 -27
  715. package/refs/vbenchmark/tasks/saas-core/billing/usage-metering/PROMPT.md +0 -52
  716. package/refs/vbenchmark/tasks/saas-core/billing/usage-metering/task.yaml +0 -27
  717. package/refs/vbenchmark/tasks/saas-core/crud/dashboard-table/PROMPT.md +0 -48
  718. package/refs/vbenchmark/tasks/saas-core/crud/dashboard-table/task.yaml +0 -28
  719. package/refs/vbenchmark/tasks/saas-core/multi-tenant/org-isolation/PROMPT.md +0 -50
  720. package/refs/vbenchmark/tasks/saas-core/multi-tenant/org-isolation/task.yaml +0 -27
  721. package/refs/vbenchmark/tasks/saas-core/multi-tenant/subdomain-routing/PROMPT.md +0 -50
  722. package/refs/vbenchmark/tasks/saas-core/multi-tenant/subdomain-routing/task.yaml +0 -27
  723. package/refs/vbenchmark/tasks/saas-core/notifications/email-queue/PROMPT.md +0 -53
  724. package/refs/vbenchmark/tasks/saas-core/notifications/email-queue/task.yaml +0 -27
  725. package/refs/vbenchmark/tasks/saas-core/notifications/in-app-alerts/PROMPT.md +0 -51
  726. package/refs/vbenchmark/tasks/saas-core/notifications/in-app-alerts/task.yaml +0 -27
  727. package/refs/vbenchmark/tasks/saas-core/notifications/push-notifications/PROMPT.md +0 -51
  728. package/refs/vbenchmark/tasks/saas-core/notifications/push-notifications/task.yaml +0 -27
  729. package/refs/vbenchmark/tasks/saas-core/realtime/websocket-chat/PROMPT.md +0 -80
  730. package/refs/vbenchmark/tasks/saas-core/realtime/websocket-chat/task.yaml +0 -27
  731. package/refs/vbenchmark/tasks/saas-core/search/full-text-search/PROMPT.md +0 -51
  732. package/refs/vbenchmark/tasks/saas-core/search/full-text-search/task.yaml +0 -27
  733. package/refs/vbenchmark/tasks/saas-core/security/rate-limiter/PROMPT.md +0 -99
  734. package/refs/vbenchmark/tasks/saas-core/security/rate-limiter/task.yaml +0 -27
  735. package/refs/vbenchmark/tasks/saas-core/settings/user-preferences/PROMPT.md +0 -78
  736. package/refs/vbenchmark/tasks/saas-core/settings/user-preferences/task.yaml +0 -27
  737. package/refs/vbenchmark/templates/fastapi-postgres/docker-compose.yaml +0 -36
  738. package/refs/vbenchmark/templates/fastapi-postgres/pyproject.toml +0 -34
  739. package/refs/vbenchmark/templates/fastapi-postgres/src/__init__.py +0 -0
  740. package/refs/vbenchmark/templates/fastapi-postgres/src/config.py +0 -12
  741. package/refs/vbenchmark/templates/fastapi-postgres/src/database.py +0 -15
  742. package/refs/vbenchmark/templates/fastapi-postgres/src/main.py +0 -51
  743. package/refs/vbenchmark/templates/fastapi-postgres/src/models.py +0 -12
  744. package/refs/vbenchmark/templates/fastapi-postgres/src/schemas.py +0 -20
  745. package/refs/vbenchmark/templates/go-fiber/docker-compose.yaml +0 -34
  746. package/refs/vbenchmark/templates/go-fiber/go.mod +0 -33
  747. package/refs/vbenchmark/templates/go-fiber/go.sum +0 -68
  748. package/refs/vbenchmark/templates/go-fiber/main.go +0 -98
  749. package/refs/vbenchmark/templates/nextjs-supabase/.env.example +0 -3
  750. package/refs/vbenchmark/templates/nextjs-supabase/docker-compose.yaml +0 -68
  751. package/refs/vbenchmark/templates/nextjs-supabase/src/app/globals.css +0 -13
  752. package/refs/vbenchmark/templates/nextjs-supabase/src/app/layout.tsx +0 -19
  753. package/refs/vbenchmark/templates/nextjs-supabase/src/app/page.tsx +0 -38
  754. package/refs/vbenchmark/templates/nextjs-supabase/src/lib/supabase/client.ts +0 -8
  755. package/refs/vbenchmark/templates/nextjs-supabase/src/lib/supabase/server.ts +0 -32
  756. package/refs/vbenchmark/templates/rust-axum/Cargo.lock +0 -2371
  757. package/refs/vbenchmark/templates/rust-axum/Cargo.toml +0 -16
  758. package/refs/vbenchmark/templates/rust-axum/docker-compose.yaml +0 -34
  759. package/refs/vbenchmark/templates/rust-axum/migrations/20240101000000_init.sql +0 -20
  760. package/refs/vbenchmark/templates/rust-axum/src/main.rs +0 -121
  761. package/refs/vbenchmark/tsconfig.base.json +0 -18
  762. package/refs/vbenchmark/turbo.json +0 -23
  763. package/refs/vbenchmark/vercel.json +0 -10
@@ -1,1468 +0,0 @@
1
- import { Routes, Route, Link, useLocation } from 'react-router-dom';
2
- import { useState, useRef, useMemo } from 'react';
3
- import {
4
- Chart as ChartJS,
5
- CategoryScale,
6
- LinearScale,
7
- BarElement,
8
- PointElement,
9
- LineElement,
10
- ArcElement,
11
- RadialLinearScale,
12
- Title,
13
- Tooltip,
14
- Legend,
15
- Filler,
16
- } from 'chart.js';
17
- import { Bar, Radar, Scatter, Line } from 'react-chartjs-2';
18
-
19
- // Static data imports for GitHub Pages hosting
20
- import leaderboardData from './data/leaderboard.json';
21
- import categoryPerformanceData from './data/category-performance.json';
22
- import taskResultsData from './data/task-results.json';
23
- import tasksData from './data/tasks.json';
24
-
25
- ChartJS.register(
26
- CategoryScale,
27
- LinearScale,
28
- BarElement,
29
- PointElement,
30
- LineElement,
31
- ArcElement,
32
- RadialLinearScale,
33
- Title,
34
- Tooltip,
35
- Legend,
36
- Filler
37
- );
38
-
39
- // shadcn/ui inspired color system with CSS variables pattern
40
- const colors = {
41
- primary: '#4285f4',
42
- primaryForeground: '#ffffff',
43
- secondary: '#f1f5f9',
44
- secondaryForeground: '#0f172a',
45
- muted: '#f8fafc',
46
- mutedForeground: '#64748b',
47
- accent: '#f1f5f9',
48
- accentForeground: '#0f172a',
49
- destructive: '#ef4444',
50
- border: '#e2e8f0',
51
- input: '#e2e8f0',
52
- ring: '#4285f4',
53
- background: '#ffffff',
54
- foreground: '#0f172a',
55
- card: '#ffffff',
56
- cardForeground: '#0f172a',
57
- success: '#22c55e',
58
- warning: '#f59e0b',
59
- };
60
-
61
- // Chart color palette - distinct and accessible
62
- const CHART_COLORS = [
63
- '#4285f4', '#22c55e', '#f59e0b', '#ef4444', '#8b5cf6',
64
- '#06b6d4', '#f97316', '#64748b', '#ec4899', '#6366f1',
65
- '#14b8a6', '#a855f7', '#84cc16', '#0ea5e9',
66
- ];
67
-
68
- interface LeaderboardEntry {
69
- rank: number;
70
- agentName: string;
71
- agentVersion: string;
72
- modelName?: string;
73
- avgScore: number;
74
- avgFunctional: number;
75
- avgQuality: number;
76
- avgCost: number;
77
- tasksCompleted: number;
78
- passedTasks?: number;
79
- failedTasks?: number;
80
- totalTokens: number;
81
- inputTokens?: number;
82
- outputTokens?: number;
83
- totalCostUSD?: number;
84
- avgTimeMs?: number;
85
- pricingInput?: number;
86
- pricingOutput?: number;
87
- }
88
-
89
- // HoverCard Component - shadcn/ui pattern (for inline elements)
90
- function HoverCard({ children, content }: { children: React.ReactNode; content: React.ReactNode }) {
91
- const [isOpen, setIsOpen] = useState(false);
92
- const [position, setPosition] = useState({ x: 0, y: 0 });
93
- const triggerRef = useRef<HTMLSpanElement>(null);
94
- const timeoutRef = useRef<NodeJS.Timeout>();
95
-
96
- const handleMouseEnter = () => {
97
- clearTimeout(timeoutRef.current);
98
- timeoutRef.current = setTimeout(() => {
99
- if (triggerRef.current) {
100
- const rect = triggerRef.current.getBoundingClientRect();
101
- setPosition({ x: rect.left + rect.width / 2, y: rect.top });
102
- }
103
- setIsOpen(true);
104
- }, 200);
105
- };
106
-
107
- const handleMouseLeave = () => {
108
- clearTimeout(timeoutRef.current);
109
- timeoutRef.current = setTimeout(() => setIsOpen(false), 100);
110
- };
111
-
112
- return (
113
- <span ref={triggerRef} onMouseEnter={handleMouseEnter} onMouseLeave={handleMouseLeave} className="inline-block">
114
- {children}
115
- {isOpen && (
116
- <div
117
- className="fixed z-50 animate-in fade-in-0 zoom-in-95"
118
- style={{ left: position.x, top: position.y - 8, transform: 'translate(-50%, -100%)' }}
119
- onMouseEnter={() => clearTimeout(timeoutRef.current)}
120
- onMouseLeave={handleMouseLeave}
121
- >
122
- <div className="bg-white rounded-lg border border-slate-200 shadow-lg p-4 min-w-[280px] max-w-[350px]">
123
- {content}
124
- </div>
125
- </div>
126
- )}
127
- </span>
128
- );
129
- }
130
-
131
- // HoverRow Component - for table rows
132
- function HoverRow({ children, content, className = '' }: {
133
- children: React.ReactNode;
134
- content: React.ReactNode;
135
- className?: string;
136
- }) {
137
- const [isOpen, setIsOpen] = useState(false);
138
- const [position, setPosition] = useState({ x: 0, y: 0 });
139
- const rowRef = useRef<HTMLTableRowElement>(null);
140
- const timeoutRef = useRef<NodeJS.Timeout>();
141
-
142
- const handleMouseEnter = () => {
143
- clearTimeout(timeoutRef.current);
144
- timeoutRef.current = setTimeout(() => {
145
- if (rowRef.current) {
146
- const rect = rowRef.current.getBoundingClientRect();
147
- setPosition({ x: rect.left + rect.width / 2, y: rect.top });
148
- }
149
- setIsOpen(true);
150
- }, 300);
151
- };
152
-
153
- const handleMouseLeave = () => {
154
- clearTimeout(timeoutRef.current);
155
- timeoutRef.current = setTimeout(() => setIsOpen(false), 100);
156
- };
157
-
158
- return (
159
- <tr
160
- ref={rowRef}
161
- onMouseEnter={handleMouseEnter}
162
- onMouseLeave={handleMouseLeave}
163
- className={className}
164
- >
165
- {children}
166
- {isOpen && (
167
- <td className="absolute" style={{ padding: 0, border: 'none' }}>
168
- <div
169
- className="fixed z-50 animate-in fade-in-0 zoom-in-95"
170
- style={{ left: position.x, top: position.y - 8, transform: 'translate(-50%, -100%)' }}
171
- onMouseEnter={() => clearTimeout(timeoutRef.current)}
172
- onMouseLeave={handleMouseLeave}
173
- >
174
- <div className="bg-white rounded-lg border border-slate-200 shadow-lg p-4 min-w-[280px] max-w-[350px]">
175
- {content}
176
- </div>
177
- </div>
178
- </td>
179
- )}
180
- </tr>
181
- );
182
- }
183
-
184
- // Card Component - shadcn/ui pattern
185
- function Card({ children, className = '', hover = false }: { children: React.ReactNode; className?: string; hover?: boolean }) {
186
- return (
187
- <div className={`bg-white rounded-xl border border-slate-200 shadow-sm ${hover ? 'hover:shadow-md hover:border-slate-300 transition-all duration-200' : ''} ${className}`}>
188
- {children}
189
- </div>
190
- );
191
- }
192
-
193
- function CardHeader({ children, className = '' }: { children: React.ReactNode; className?: string }) {
194
- return <div className={`px-6 py-4 border-b border-slate-100 ${className}`}>{children}</div>;
195
- }
196
-
197
- function CardTitle({ children, className = '' }: { children: React.ReactNode; className?: string }) {
198
- return <h3 className={`text-sm font-semibold text-slate-900 ${className}`}>{children}</h3>;
199
- }
200
-
201
- function CardDescription({ children }: { children: React.ReactNode }) {
202
- return <p className="text-xs text-slate-500 mt-0.5">{children}</p>;
203
- }
204
-
205
- function CardContent({ children, className = '' }: { children: React.ReactNode; className?: string }) {
206
- return <div className={`p-6 ${className}`}>{children}</div>;
207
- }
208
-
209
- // Badge Component
210
- function Badge({ children, variant = 'default' }: { children: React.ReactNode; variant?: 'default' | 'success' | 'warning' | 'destructive' | 'outline' }) {
211
- const variants = {
212
- default: 'bg-slate-100 text-slate-900',
213
- success: 'bg-emerald-50 text-emerald-700 border-emerald-200',
214
- warning: 'bg-amber-50 text-amber-700 border-amber-200',
215
- destructive: 'bg-red-50 text-red-700 border-red-200',
216
- outline: 'bg-transparent border-slate-200 text-slate-700',
217
- };
218
- return (
219
- <span className={`inline-flex items-center px-2 py-0.5 text-xs font-medium rounded-md border ${variants[variant]}`}>
220
- {children}
221
- </span>
222
- );
223
- }
224
-
225
- // Stat Card with shadcn/ui styling
226
- function StatCard({ label, value, subtext, icon, trend }: {
227
- label: string;
228
- value: string | number;
229
- subtext?: string;
230
- icon?: React.ReactNode;
231
- trend?: { value: number; label: string };
232
- }) {
233
- return (
234
- <Card hover>
235
- <CardContent className="p-5">
236
- <div className="flex items-start justify-between">
237
- <div className="space-y-1">
238
- <p className="text-xs font-medium text-slate-500 uppercase tracking-wider">{label}</p>
239
- <p className="text-2xl font-semibold text-slate-900">{value}</p>
240
- {subtext && <p className="text-xs text-slate-500">{subtext}</p>}
241
- {trend && (
242
- <div className={`flex items-center gap-1 text-xs ${trend.value >= 0 ? 'text-emerald-600' : 'text-red-600'}`}>
243
- <span>{trend.value >= 0 ? '↑' : '↓'}</span>
244
- <span>{Math.abs(trend.value)}% {trend.label}</span>
245
- </div>
246
- )}
247
- </div>
248
- {icon && <div className="p-2 bg-slate-50 rounded-lg text-slate-600">{icon}</div>}
249
- </div>
250
- </CardContent>
251
- </Card>
252
- );
253
- }
254
-
255
- // Model Info Hover Content
256
- function ModelHoverContent({ entry }: { entry: LeaderboardEntry }) {
257
- const passRate = ((entry.passedTasks || 0) / (entry.tasksCompleted || 1)) * 100;
258
- return (
259
- <div className="space-y-3">
260
- <div className="flex items-center gap-3">
261
- <div className="w-10 h-10 rounded-lg flex items-center justify-center text-white font-bold"
262
- style={{ backgroundColor: CHART_COLORS[entry.rank % CHART_COLORS.length] }}>
263
- {(entry.modelName || entry.agentName).charAt(0)}
264
- </div>
265
- <div>
266
- <div className="font-semibold text-slate-900">{entry.modelName || entry.agentName}</div>
267
- <div className="text-xs text-slate-500">{entry.agentVersion}</div>
268
- </div>
269
- </div>
270
- <div className="grid grid-cols-2 gap-3 text-xs">
271
- <div className="space-y-0.5">
272
- <div className="text-slate-500">Score</div>
273
- <div className="font-semibold text-slate-900">{entry.avgScore.toFixed(1)}%</div>
274
- </div>
275
- <div className="space-y-0.5">
276
- <div className="text-slate-500">Pass Rate</div>
277
- <div className="font-semibold text-slate-900">{passRate.toFixed(0)}%</div>
278
- </div>
279
- <div className="space-y-0.5">
280
- <div className="text-slate-500">Total Cost</div>
281
- <div className="font-semibold text-emerald-600">${(entry.totalCostUSD || 0).toFixed(2)}</div>
282
- </div>
283
- <div className="space-y-0.5">
284
- <div className="text-slate-500">Avg Time</div>
285
- <div className="font-semibold text-slate-900">{((entry.avgTimeMs || 0) / 1000).toFixed(0)}s</div>
286
- </div>
287
- <div className="space-y-0.5">
288
- <div className="text-slate-500">Input Tokens</div>
289
- <div className="font-semibold text-slate-900">{((entry.inputTokens || 0) / 1000).toFixed(0)}K</div>
290
- </div>
291
- <div className="space-y-0.5">
292
- <div className="text-slate-500">Output Tokens</div>
293
- <div className="font-semibold text-slate-900">{((entry.outputTokens || 0) / 1000).toFixed(0)}K</div>
294
- </div>
295
- </div>
296
- <div className="pt-2 border-t border-slate-100">
297
- <div className="flex items-center justify-between text-xs">
298
- <span className="text-slate-500">Pricing</span>
299
- <span className="text-slate-700">
300
- ${entry.pricingInput}/M in · ${entry.pricingOutput}/M out
301
- </span>
302
- </div>
303
- </div>
304
- </div>
305
- );
306
- }
307
-
308
- // Progress bar component
309
- function Progress({ value, max = 100, className = '', color }: { value: number; max?: number; className?: string; color?: string }) {
310
- const percentage = Math.min((value / max) * 100, 100);
311
- return (
312
- <div className={`h-2 bg-slate-100 rounded-full overflow-hidden ${className}`}>
313
- <div
314
- className="h-full rounded-full transition-all duration-300"
315
- style={{ width: `${percentage}%`, backgroundColor: color || colors.primary }}
316
- />
317
- </div>
318
- );
319
- }
320
-
321
- function NotFound() {
322
- return (
323
- <div style={{
324
- display: 'flex',
325
- flexDirection: 'column',
326
- alignItems: 'center',
327
- justifyContent: 'center',
328
- minHeight: '60vh',
329
- textAlign: 'center',
330
- padding: '2rem'
331
- }}>
332
- <h1 style={{ fontSize: '6rem', fontWeight: 700, color: colors.mutedForeground, margin: 0 }}>404</h1>
333
- <h2 style={{ fontSize: '1.5rem', fontWeight: 600, color: colors.foreground, marginTop: '1rem' }}>Page Not Found</h2>
334
- <p style={{ color: colors.mutedForeground, marginTop: '0.5rem', maxWidth: '400px' }}>
335
- The page you're looking for doesn't exist or has been moved.
336
- </p>
337
- <Link
338
- to="/"
339
- style={{
340
- marginTop: '2rem',
341
- padding: '0.75rem 1.5rem',
342
- backgroundColor: colors.primary,
343
- color: colors.primaryForeground,
344
- borderRadius: '0.5rem',
345
- textDecoration: 'none',
346
- fontWeight: 500,
347
- transition: 'opacity 0.2s'
348
- }}
349
- >
350
- Back to Leaderboard
351
- </Link>
352
- </div>
353
- );
354
- }
355
-
356
- function Leaderboard() {
357
- const entries = leaderboardData.leaderboard as LeaderboardEntry[] || [];
358
- const [sortBy, setSortBy] = useState<'score' | 'cost' | 'speed' | 'efficiency'>('score');
359
-
360
- const sortedEntries = [...entries].sort((a, b) => {
361
- if (sortBy === 'cost') return (a.totalCostUSD || 0) - (b.totalCostUSD || 0);
362
- if (sortBy === 'speed') return (a.avgTimeMs || 0) - (b.avgTimeMs || 0);
363
- if (sortBy === 'efficiency') return (b.avgScore / (b.totalCostUSD || 1)) - (a.avgScore / (a.totalCostUSD || 1));
364
- return b.avgScore - a.avgScore;
365
- });
366
-
367
- const topModel = entries.reduce((max, e) => e.avgScore > max.avgScore ? e : max, entries[0]);
368
- const cheapestModel = entries.reduce((min, e) => (e.totalCostUSD || 99) < (min.totalCostUSD || 99) ? e : min, entries[0]);
369
- const fastestModel = entries.reduce((min, e) => (e.avgTimeMs || 999999) < (min.avgTimeMs || 999999) ? e : min, entries[0]);
370
- const bestValue = entries.reduce((max, e) => {
371
- const val = e.avgScore / (e.totalCostUSD || 1);
372
- const maxVal = max.avgScore / (max.totalCostUSD || 1);
373
- return val > maxVal ? e : max;
374
- }, entries[0]);
375
-
376
- const crowns = ['🥇', '🥈', '🥉'];
377
-
378
- return (
379
- <div className="space-y-8">
380
- {/* Header */}
381
- <div className="flex flex-col md:flex-row md:items-end md:justify-between gap-4">
382
- <div>
383
- <h1 className="text-2xl font-bold text-slate-900">Leaderboard</h1>
384
- <p className="text-sm text-slate-500 mt-1">AI coding agent performance on 180 benchmark tasks</p>
385
- </div>
386
- <Badge variant="outline">Updated Jan 27, 2026</Badge>
387
- </div>
388
-
389
- {/* Summary Cards */}
390
- <div className="grid grid-cols-2 lg:grid-cols-4 gap-4">
391
- <StatCard
392
- label="Top Score"
393
- value={`${topModel?.avgScore.toFixed(1)}%`}
394
- subtext={topModel?.modelName}
395
- icon={<span className="text-lg">🏆</span>}
396
- />
397
- <StatCard
398
- label="Lowest Cost"
399
- value={`$${(cheapestModel?.totalCostUSD || 0).toFixed(2)}`}
400
- subtext={cheapestModel?.modelName}
401
- icon={<span className="text-lg">💰</span>}
402
- />
403
- <StatCard
404
- label="Fastest"
405
- value={`${((fastestModel?.avgTimeMs || 0) / 1000).toFixed(0)}s avg`}
406
- subtext={fastestModel?.modelName}
407
- icon={<span className="text-lg">⚡</span>}
408
- />
409
- <StatCard
410
- label="Best Value"
411
- value={`${(bestValue.avgScore / (bestValue.totalCostUSD || 1)).toFixed(0)} pts/$`}
412
- subtext={bestValue?.modelName}
413
- icon={<span className="text-lg">✨</span>}
414
- />
415
- </div>
416
-
417
- {/* Sort Controls */}
418
- <div className="flex items-center gap-2 flex-wrap">
419
- <span className="text-sm text-slate-500">Sort by:</span>
420
- {[
421
- { key: 'score', label: 'Score' },
422
- { key: 'cost', label: 'Cost' },
423
- { key: 'speed', label: 'Speed' },
424
- { key: 'efficiency', label: 'Value' },
425
- ].map((option) => (
426
- <button
427
- key={option.key}
428
- onClick={() => setSortBy(option.key as typeof sortBy)}
429
- className={`px-3 py-1.5 text-sm font-medium rounded-lg transition-all ${
430
- sortBy === option.key
431
- ? 'bg-slate-900 text-white'
432
- : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
433
- }`}
434
- >
435
- {option.label}
436
- </button>
437
- ))}
438
- </div>
439
-
440
- {/* Main Table */}
441
- <Card>
442
- <div className="overflow-x-auto">
443
- <table className="min-w-full">
444
- <thead>
445
- <tr className="border-b border-slate-100 bg-slate-50/50">
446
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider w-16">Rank</th>
447
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Model</th>
448
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Score</th>
449
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Pass Rate</th>
450
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Quality</th>
451
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Cost</th>
452
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Time</th>
453
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Tokens</th>
454
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase tracking-wider">Value</th>
455
- </tr>
456
- </thead>
457
- <tbody className="divide-y divide-slate-100">
458
- {sortedEntries.map((entry, idx) => {
459
- const passRate = ((entry.passedTasks || 0) / (entry.tasksCompleted || 1)) * 100;
460
- const valueScore = entry.avgScore / (entry.totalCostUSD || 1);
461
- const maxScore = Math.max(...entries.map(e => e.avgScore));
462
- return (
463
- <HoverRow
464
- key={entry.agentName + entry.agentVersion}
465
- content={<ModelHoverContent entry={entry} />}
466
- className="hover:bg-slate-50/80 transition-colors cursor-pointer group"
467
- >
468
- <td className="px-4 py-4">
469
- {idx < 3 ? (
470
- <span className="text-xl">{crowns[idx]}</span>
471
- ) : (
472
- <span className="text-slate-400 font-medium">{idx + 1}</span>
473
- )}
474
- </td>
475
- <td className="px-4 py-4">
476
- <div className="flex items-center gap-3">
477
- <div
478
- className="w-9 h-9 rounded-lg flex items-center justify-center text-white font-semibold text-sm"
479
- style={{ backgroundColor: CHART_COLORS[idx % CHART_COLORS.length] }}
480
- >
481
- {(entry.modelName || entry.agentName).charAt(0)}
482
- </div>
483
- <div>
484
- <div className="font-medium text-slate-900 group-hover:text-blue-600 transition-colors">
485
- {entry.modelName || entry.agentName}
486
- </div>
487
- <div className="text-xs text-slate-400">{entry.agentVersion}</div>
488
- </div>
489
- </div>
490
- </td>
491
- <td className="px-4 py-4">
492
- <div className="flex items-center gap-3">
493
- <Progress value={entry.avgScore} max={maxScore} className="w-20" color={CHART_COLORS[idx % CHART_COLORS.length]} />
494
- <span className="font-semibold text-slate-900">{entry.avgScore.toFixed(1)}%</span>
495
- </div>
496
- </td>
497
- <td className="px-4 py-4">
498
- <Badge variant={passRate >= 98 ? 'success' : passRate >= 90 ? 'warning' : 'outline'}>
499
- {passRate.toFixed(0)}%
500
- </Badge>
501
- </td>
502
- <td className="px-4 py-4 text-sm text-slate-600">{entry.avgQuality.toFixed(0)}%</td>
503
- <td className="px-4 py-4">
504
- <span className="font-medium text-emerald-600">${(entry.totalCostUSD || 0).toFixed(2)}</span>
505
- </td>
506
- <td className="px-4 py-4 text-sm text-slate-600">
507
- {entry.avgTimeMs ? `${(entry.avgTimeMs / 1000).toFixed(0)}s` : '-'}
508
- </td>
509
- <td className="px-4 py-4 text-sm text-slate-500">
510
- {((entry.totalTokens || 0) / 1000000).toFixed(2)}M
511
- </td>
512
- <td className="px-4 py-4">
513
- <span className="font-medium text-violet-600">{valueScore.toFixed(0)}</span>
514
- </td>
515
- </HoverRow>
516
- );
517
- })}
518
- </tbody>
519
- </table>
520
- </div>
521
- </Card>
522
- </div>
523
- );
524
- }
525
-
526
- // Live Dashboard Component
527
- function LiveDashboard() {
528
- return (
529
- <div className="space-y-8">
530
- <div>
531
- <h1 className="text-2xl font-bold text-slate-900">Live Benchmark</h1>
532
- <p className="text-sm text-slate-500 mt-1">Real-time benchmark execution monitoring</p>
533
- </div>
534
- <Card>
535
- <CardContent className="py-16">
536
- <div className="text-center text-slate-500">
537
- <div className="text-4xl mb-4">📡</div>
538
- <p className="font-medium">No active benchmark runs</p>
539
- <p className="text-sm mt-1">Start a benchmark with: npm run cli -- run &lt;task&gt; -a &lt;agent&gt;</p>
540
- </div>
541
- </CardContent>
542
- </Card>
543
- </div>
544
- );
545
- }
546
-
547
- // Tasks Component
548
- interface Task {
549
- id: string;
550
- name: string;
551
- category: string;
552
- difficulty: string;
553
- description: string;
554
- tags?: string[];
555
- }
556
-
557
- function Tasks() {
558
- const [selectedCategory, setSelectedCategory] = useState<string | null>(null);
559
- const [searchQuery, setSearchQuery] = useState('');
560
-
561
- // Process static data
562
- const { tasks, summary } = useMemo(() => {
563
- const allTasks: Task[] = [];
564
- const categoryCounts: { category: string; count: number }[] = [];
565
- Object.entries(tasksData.categories || {}).forEach(([category, categoryTasks]) => {
566
- const catTasks = categoryTasks as Task[];
567
- categoryCounts.push({ category, count: catTasks.length });
568
- catTasks.forEach((t) => allTasks.push({ ...t, category }));
569
- });
570
- return { tasks: allTasks, summary: categoryCounts };
571
- }, []);
572
-
573
- const filteredTasks = tasks.filter((t) => {
574
- const matchesCategory = !selectedCategory || t.category === selectedCategory;
575
- const matchesSearch = !searchQuery ||
576
- t.name.toLowerCase().includes(searchQuery.toLowerCase()) ||
577
- t.description.toLowerCase().includes(searchQuery.toLowerCase());
578
- return matchesCategory && matchesSearch;
579
- });
580
-
581
- const totalTasks = tasks.length;
582
-
583
- return (
584
- <div className="space-y-6">
585
- <div className="flex flex-col md:flex-row md:items-end md:justify-between gap-4">
586
- <div>
587
- <h1 className="text-2xl font-bold text-slate-900">Benchmark Tasks</h1>
588
- <p className="text-sm text-slate-500 mt-1">{totalTasks} tasks across 6 categories</p>
589
- </div>
590
- </div>
591
-
592
- {/* Category Pills */}
593
- <div className="flex flex-wrap gap-2">
594
- <button
595
- onClick={() => setSelectedCategory(null)}
596
- className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
597
- !selectedCategory
598
- ? 'bg-slate-900 text-white'
599
- : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
600
- }`}
601
- >
602
- All ({totalTasks})
603
- </button>
604
- {summary.map((cat) => (
605
- <button
606
- key={cat.category}
607
- onClick={() => setSelectedCategory(selectedCategory === cat.category ? null : cat.category)}
608
- className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
609
- selectedCategory === cat.category
610
- ? 'bg-slate-900 text-white'
611
- : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
612
- }`}
613
- >
614
- {cat.category.replace(/-/g, ' ')} ({cat.count})
615
- </button>
616
- ))}
617
- </div>
618
-
619
- {/* Search */}
620
- <div className="relative">
621
- <input
622
- type="text"
623
- placeholder="Search tasks..."
624
- value={searchQuery}
625
- onChange={(e) => setSearchQuery(e.target.value)}
626
- className="w-full px-4 py-3 bg-white border border-slate-200 rounded-xl text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent transition-all"
627
- />
628
- </div>
629
-
630
- {/* Task Grid */}
631
- <div className="grid gap-4 md:grid-cols-2 lg:grid-cols-3">
632
- {filteredTasks.slice(0, 30).map((task) => (
633
- <Card key={task.id} hover>
634
- <CardContent className="p-5">
635
- <div className="flex items-start justify-between mb-3">
636
- <Badge variant={task.difficulty === 'easy' ? 'success' : task.difficulty === 'medium' ? 'warning' : 'destructive'}>
637
- {task.difficulty}
638
- </Badge>
639
- <span className="text-xs text-slate-400">{task.category}</span>
640
- </div>
641
- <h3 className="font-semibold text-slate-900 mb-2">{task.name}</h3>
642
- <p className="text-sm text-slate-500 line-clamp-2">{task.description}</p>
643
- {task.tags && task.tags.length > 0 && (
644
- <div className="mt-3 flex flex-wrap gap-1">
645
- {task.tags.slice(0, 3).map((tag) => (
646
- <span key={tag} className="px-2 py-0.5 text-xs bg-slate-100 text-slate-600 rounded-md">
647
- {tag}
648
- </span>
649
- ))}
650
- </div>
651
- )}
652
- </CardContent>
653
- </Card>
654
- ))}
655
- </div>
656
- </div>
657
- );
658
- }
659
-
660
- // Charts/Analytics Component
661
- function Charts() {
662
- const entries = leaderboardData.leaderboard as LeaderboardEntry[] || [];
663
-
664
- const sortedByScore = [...entries].sort((a, b) => b.avgScore - a.avgScore);
665
- const labels = sortedByScore.map(e => (e.modelName || e.agentName).split(' ').slice(0, 2).join(' '));
666
-
667
- const baseOptions = {
668
- responsive: true,
669
- maintainAspectRatio: false,
670
- plugins: {
671
- legend: { display: false },
672
- tooltip: {
673
- backgroundColor: '#ffffff',
674
- titleColor: '#0f172a',
675
- bodyColor: '#64748b',
676
- borderColor: '#e2e8f0',
677
- borderWidth: 1,
678
- padding: 12,
679
- cornerRadius: 8,
680
- displayColors: true,
681
- },
682
- },
683
- scales: {
684
- y: { beginAtZero: true, grid: { color: '#f1f5f9' }, ticks: { color: '#64748b', font: { size: 11 } } },
685
- x: { grid: { display: false }, ticks: { color: '#64748b', font: { size: 10 }, maxRotation: 45 } },
686
- },
687
- };
688
-
689
- const scoreData = {
690
- labels,
691
- datasets: [{
692
- label: 'Score',
693
- data: sortedByScore.map(e => e.avgScore),
694
- backgroundColor: sortedByScore.map((_, i) => CHART_COLORS[i % CHART_COLORS.length] + '80'),
695
- borderColor: sortedByScore.map((_, i) => CHART_COLORS[i % CHART_COLORS.length]),
696
- borderWidth: 2,
697
- borderRadius: 6,
698
- }],
699
- };
700
-
701
- const costData = {
702
- labels,
703
- datasets: [{
704
- label: 'Cost ($)',
705
- data: sortedByScore.map(e => e.totalCostUSD || 0),
706
- backgroundColor: '#22c55e80',
707
- borderColor: '#22c55e',
708
- borderWidth: 2,
709
- borderRadius: 6,
710
- }],
711
- };
712
-
713
- const timeData = {
714
- labels,
715
- datasets: [{
716
- label: 'Time (s)',
717
- data: sortedByScore.map(e => (e.avgTimeMs || 0) / 1000),
718
- backgroundColor: '#f59e0b80',
719
- borderColor: '#f59e0b',
720
- borderWidth: 2,
721
- borderRadius: 6,
722
- }],
723
- };
724
-
725
- const scatterData = {
726
- datasets: sortedByScore.map((e, i) => ({
727
- label: (e.modelName || e.agentName).split(' ').slice(0, 2).join(' '),
728
- data: [{ x: e.totalCostUSD || 0, y: e.avgScore }],
729
- backgroundColor: CHART_COLORS[i % CHART_COLORS.length],
730
- borderColor: CHART_COLORS[i % CHART_COLORS.length],
731
- pointRadius: 10,
732
- pointHoverRadius: 14,
733
- })),
734
- };
735
-
736
- const crowns = ['🥇', '🥈', '🥉'];
737
-
738
- return (
739
- <div className="space-y-8">
740
- <div>
741
- <h1 className="text-2xl font-bold text-slate-900">Analytics</h1>
742
- <p className="text-sm text-slate-500 mt-1">Performance metrics and comparisons</p>
743
- </div>
744
-
745
- {/* Charts Grid */}
746
- <div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
747
- <Card>
748
- <CardHeader>
749
- <CardTitle>Score Distribution</CardTitle>
750
- <CardDescription>Overall benchmark scores by model</CardDescription>
751
- </CardHeader>
752
- <CardContent>
753
- <div className="h-72">
754
- <Bar data={scoreData} options={{ ...baseOptions, scales: { ...baseOptions.scales, y: { ...baseOptions.scales.y, max: 100 } } }} />
755
- </div>
756
- </CardContent>
757
- </Card>
758
-
759
- <Card>
760
- <CardHeader>
761
- <CardTitle>Cost Comparison</CardTitle>
762
- <CardDescription>Total cost in USD for 180 tasks</CardDescription>
763
- </CardHeader>
764
- <CardContent>
765
- <div className="h-72">
766
- <Bar data={costData} options={baseOptions} />
767
- </div>
768
- </CardContent>
769
- </Card>
770
-
771
- <Card>
772
- <CardHeader>
773
- <CardTitle>Execution Time</CardTitle>
774
- <CardDescription>Average time per task in seconds</CardDescription>
775
- </CardHeader>
776
- <CardContent>
777
- <div className="h-72">
778
- <Bar data={timeData} options={baseOptions} />
779
- </div>
780
- </CardContent>
781
- </Card>
782
-
783
- <Card>
784
- <CardHeader>
785
- <CardTitle>Cost vs Score</CardTitle>
786
- <CardDescription>Efficiency visualization (top-left is best)</CardDescription>
787
- </CardHeader>
788
- <CardContent>
789
- <div className="h-72">
790
- <Scatter data={scatterData} options={{
791
- ...baseOptions,
792
- plugins: { ...baseOptions.plugins, legend: { display: true, position: 'bottom' as const, labels: { usePointStyle: true, padding: 8, font: { size: 9 } } } },
793
- scales: {
794
- x: { ...baseOptions.scales.x, title: { display: true, text: 'Cost ($)', color: '#64748b' } },
795
- y: { ...baseOptions.scales.y, min: 55, max: 95, title: { display: true, text: 'Score (%)', color: '#64748b' } },
796
- },
797
- }} />
798
- </div>
799
- </CardContent>
800
- </Card>
801
- </div>
802
-
803
- {/* Data Table */}
804
- <Card>
805
- <CardHeader>
806
- <CardTitle>Detailed Metrics</CardTitle>
807
- <CardDescription>Complete performance data for all models</CardDescription>
808
- </CardHeader>
809
- <div className="overflow-x-auto">
810
- <table className="min-w-full text-sm">
811
- <thead>
812
- <tr className="bg-slate-50/50 border-b border-slate-100">
813
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">#</th>
814
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Model</th>
815
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Score</th>
816
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Pass</th>
817
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Functional</th>
818
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Quality</th>
819
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Cost</th>
820
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Time</th>
821
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Tokens</th>
822
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Value</th>
823
- </tr>
824
- </thead>
825
- <tbody className="divide-y divide-slate-100">
826
- {sortedByScore.map((e, i) => (
827
- <HoverRow
828
- key={e.agentName + e.agentVersion}
829
- content={<ModelHoverContent entry={e} />}
830
- className="hover:bg-slate-50/80 transition-colors cursor-pointer"
831
- >
832
- <td className="px-4 py-3">
833
- {i < 3 ? (
834
- <span className="text-lg">{crowns[i]}</span>
835
- ) : (
836
- <span className="text-slate-400">{i + 1}</span>
837
- )}
838
- </td>
839
- <td className="px-4 py-3 font-medium text-slate-900">{e.modelName || e.agentName}</td>
840
- <td className="px-4 py-3 font-semibold text-blue-600">{e.avgScore.toFixed(1)}%</td>
841
- <td className="px-4 py-3 text-slate-600">{((e.passedTasks || 0) / (e.tasksCompleted || 1) * 100).toFixed(0)}%</td>
842
- <td className="px-4 py-3 text-slate-600">{e.avgFunctional.toFixed(0)}%</td>
843
- <td className="px-4 py-3 text-slate-600">{e.avgQuality.toFixed(0)}%</td>
844
- <td className="px-4 py-3 font-medium text-emerald-600">${(e.totalCostUSD || 0).toFixed(2)}</td>
845
- <td className="px-4 py-3 text-slate-600">{((e.avgTimeMs || 0) / 1000).toFixed(0)}s</td>
846
- <td className="px-4 py-3 text-slate-500">{((e.totalTokens || 0) / 1000000).toFixed(2)}M</td>
847
- <td className="px-4 py-3 font-medium text-violet-600">
848
- {(e.totalCostUSD || 0) > 0 ? (e.avgScore / (e.totalCostUSD || 1)).toFixed(0) : '∞'}
849
- </td>
850
- </HoverRow>
851
- ))}
852
- </tbody>
853
- </table>
854
- </div>
855
- </Card>
856
- </div>
857
- );
858
- }
859
-
860
- // Category Performance Component
861
- interface CategoryPerformance {
862
- category: string;
863
- models: Array<{
864
- modelName: string;
865
- avgScore: number;
866
- passRate: number;
867
- avgTokens: number;
868
- avgTimeMs: number;
869
- avgCost: number;
870
- }>;
871
- }
872
-
873
- function TaskPerformance() {
874
- const categoryData = categoryPerformanceData.performance as CategoryPerformance[] || [];
875
- const models = categoryPerformanceData.models as string[] || [];
876
- const [selectedModels, setSelectedModels] = useState<string[]>(models.slice(0, 5));
877
-
878
- const categories = categoryData.map(c => c.category.replace(/-/g, ' '));
879
- const filteredData = categoryData.map(cat => ({
880
- ...cat,
881
- models: cat.models.filter(m => selectedModels.includes(m.modelName)),
882
- }));
883
-
884
- const chartOptions = {
885
- responsive: true,
886
- maintainAspectRatio: false,
887
- plugins: {
888
- legend: { position: 'bottom' as const, labels: { usePointStyle: true, padding: 12, font: { size: 10 } } },
889
- tooltip: {
890
- backgroundColor: '#ffffff',
891
- titleColor: '#0f172a',
892
- bodyColor: '#64748b',
893
- borderColor: '#e2e8f0',
894
- borderWidth: 1,
895
- padding: 12,
896
- cornerRadius: 8,
897
- },
898
- },
899
- scales: {
900
- y: { beginAtZero: true, grid: { color: '#f1f5f9' }, ticks: { color: '#64748b', font: { size: 11 } } },
901
- x: { grid: { display: false }, ticks: { color: '#64748b', font: { size: 10 } } },
902
- },
903
- };
904
-
905
- const createDataset = (metricFn: (m: CategoryPerformance['models'][0]) => number) => ({
906
- labels: categories,
907
- datasets: selectedModels.map((modelName) => ({
908
- label: modelName.split(' ').slice(0, 2).join(' '),
909
- data: filteredData.map(cat => {
910
- const m = cat.models.find(x => x.modelName === modelName);
911
- return m ? metricFn(m) : 0;
912
- }),
913
- backgroundColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length] + '80',
914
- borderColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length],
915
- borderWidth: 2,
916
- borderRadius: 4,
917
- })),
918
- });
919
-
920
- const radarData = {
921
- labels: categories,
922
- datasets: selectedModels.slice(0, 5).map((modelName) => ({
923
- label: modelName.split(' ').slice(0, 2).join(' '),
924
- data: filteredData.map(cat => cat.models.find(m => m.modelName === modelName)?.avgScore || 0),
925
- backgroundColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length] + '15',
926
- borderColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length],
927
- borderWidth: 2,
928
- pointBackgroundColor: CHART_COLORS[models.indexOf(modelName) % CHART_COLORS.length],
929
- pointRadius: 3,
930
- })),
931
- };
932
-
933
- const crowns = ['🥇', '🥈', '🥉'];
934
-
935
- return (
936
- <div className="space-y-8">
937
- <div>
938
- <h1 className="text-2xl font-bold text-slate-900">Category Performance</h1>
939
- <p className="text-sm text-slate-500 mt-1">Breakdown across 6 task categories (30 tasks each)</p>
940
- </div>
941
-
942
- {/* Model Selector */}
943
- <Card>
944
- <CardContent className="p-5">
945
- <div className="text-sm font-medium text-slate-700 mb-3">Select models to compare:</div>
946
- <div className="flex flex-wrap gap-2">
947
- {models.map((model, i) => (
948
- <button
949
- key={model}
950
- onClick={() => {
951
- if (selectedModels.includes(model)) {
952
- setSelectedModels(selectedModels.filter(m => m !== model));
953
- } else if (selectedModels.length < 7) {
954
- setSelectedModels([...selectedModels, model]);
955
- }
956
- }}
957
- className={`px-3 py-1.5 text-sm font-medium rounded-lg border-2 transition-all ${
958
- selectedModels.includes(model)
959
- ? 'text-white'
960
- : 'border-slate-200 text-slate-600 hover:border-slate-300 bg-white'
961
- }`}
962
- style={{
963
- backgroundColor: selectedModels.includes(model) ? CHART_COLORS[i % CHART_COLORS.length] : undefined,
964
- borderColor: selectedModels.includes(model) ? CHART_COLORS[i % CHART_COLORS.length] : undefined,
965
- }}
966
- >
967
- {model.split(' ').slice(0, 2).join(' ')}
968
- </button>
969
- ))}
970
- </div>
971
- </CardContent>
972
- </Card>
973
-
974
- {/* Charts */}
975
- <div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
976
- <Card>
977
- <CardHeader>
978
- <CardTitle>Score by Category</CardTitle>
979
- </CardHeader>
980
- <CardContent>
981
- <div className="h-72">
982
- <Bar data={createDataset(m => m.avgScore)} options={{ ...chartOptions, scales: { ...chartOptions.scales, y: { ...chartOptions.scales.y, max: 100 } } }} />
983
- </div>
984
- </CardContent>
985
- </Card>
986
-
987
- <Card>
988
- <CardHeader>
989
- <CardTitle>Pass Rate by Category</CardTitle>
990
- </CardHeader>
991
- <CardContent>
992
- <div className="h-72">
993
- <Bar data={createDataset(m => m.passRate)} options={{ ...chartOptions, scales: { ...chartOptions.scales, y: { ...chartOptions.scales.y, max: 110 } } }} />
994
- </div>
995
- </CardContent>
996
- </Card>
997
-
998
- <Card>
999
- <CardHeader>
1000
- <CardTitle>Cost by Category ($)</CardTitle>
1001
- </CardHeader>
1002
- <CardContent>
1003
- <div className="h-72">
1004
- <Bar data={createDataset(m => m.avgCost)} options={chartOptions} />
1005
- </div>
1006
- </CardContent>
1007
- </Card>
1008
-
1009
- <Card>
1010
- <CardHeader>
1011
- <CardTitle>Time by Category (seconds)</CardTitle>
1012
- </CardHeader>
1013
- <CardContent>
1014
- <div className="h-72">
1015
- <Bar data={createDataset(m => m.avgTimeMs / 1000)} options={chartOptions} />
1016
- </div>
1017
- </CardContent>
1018
- </Card>
1019
-
1020
- <Card>
1021
- <CardHeader>
1022
- <CardTitle>Tokens by Category (K)</CardTitle>
1023
- </CardHeader>
1024
- <CardContent>
1025
- <div className="h-72">
1026
- <Bar data={createDataset(m => m.avgTokens / 1000)} options={chartOptions} />
1027
- </div>
1028
- </CardContent>
1029
- </Card>
1030
-
1031
- <Card>
1032
- <CardHeader>
1033
- <CardTitle>Category Strength</CardTitle>
1034
- </CardHeader>
1035
- <CardContent>
1036
- <div className="h-72">
1037
- <Radar data={radarData} options={{
1038
- responsive: true,
1039
- maintainAspectRatio: false,
1040
- plugins: { legend: { position: 'bottom' as const, labels: { usePointStyle: true, padding: 10, font: { size: 10 } } } },
1041
- scales: { r: { beginAtZero: true, max: 100, ticks: { stepSize: 25, display: false }, grid: { color: '#e2e8f0' }, angleLines: { color: '#e2e8f0' } } },
1042
- }} />
1043
- </div>
1044
- </CardContent>
1045
- </Card>
1046
- </div>
1047
-
1048
- {/* Table */}
1049
- <Card>
1050
- <CardHeader>
1051
- <CardTitle>Category Breakdown</CardTitle>
1052
- </CardHeader>
1053
- <div className="overflow-x-auto">
1054
- <table className="min-w-full text-sm">
1055
- <thead>
1056
- <tr className="bg-slate-50/50 border-b border-slate-100">
1057
- <th className="px-4 py-3 text-left text-xs font-semibold text-slate-600 uppercase">Category</th>
1058
- {selectedModels.map(model => (
1059
- <th key={model} className="px-3 py-3 text-left text-xs font-semibold text-slate-600 uppercase" style={{ minWidth: 90 }}>
1060
- {model.split(' ').slice(0, 2).join(' ')}
1061
- </th>
1062
- ))}
1063
- </tr>
1064
- </thead>
1065
- <tbody className="divide-y divide-slate-100">
1066
- {categoryData.map((cat, i) => {
1067
- const sorted = [...cat.models].sort((a, b) => b.avgScore - a.avgScore);
1068
- const top3 = sorted.slice(0, 3).map(m => m.modelName);
1069
- const getRank = (modelName: string) => top3.indexOf(modelName);
1070
-
1071
- return (
1072
- <tr key={cat.category} className={i % 2 === 0 ? 'bg-slate-50/30' : ''}>
1073
- <td className="px-4 py-3 font-medium text-slate-800 capitalize">{cat.category.replace(/-/g, ' ')}</td>
1074
- {selectedModels.map(model => {
1075
- const m = cat.models.find(x => x.modelName === model);
1076
- const rank = getRank(model);
1077
- return (
1078
- <td key={model} className="px-3 py-3">
1079
- <HoverCard content={
1080
- <div className="space-y-2">
1081
- <div className="font-semibold text-slate-900">{model}</div>
1082
- <div className="text-xs text-slate-500">{cat.category.replace(/-/g, ' ')}</div>
1083
- <div className="grid grid-cols-2 gap-2 text-xs pt-2">
1084
- <div>Score: <span className="font-semibold">{m?.avgScore.toFixed(1)}%</span></div>
1085
- <div>Pass: <span className="font-semibold">{m?.passRate.toFixed(0)}%</span></div>
1086
- <div>Cost: <span className="font-semibold text-emerald-600">${m?.avgCost.toFixed(3)}</span></div>
1087
- <div>Time: <span className="font-semibold">{((m?.avgTimeMs || 0) / 1000).toFixed(0)}s</span></div>
1088
- </div>
1089
- </div>
1090
- }>
1091
- <div className="cursor-pointer hover:bg-slate-100 rounded px-1 -mx-1 transition-colors">
1092
- <div className="font-medium flex items-center gap-1 text-blue-600">
1093
- {rank >= 0 && <span>{crowns[rank]}</span>}
1094
- {m?.avgScore.toFixed(1)}%
1095
- </div>
1096
- <div className="text-xs text-slate-400">${m?.avgCost.toFixed(3)}</div>
1097
- </div>
1098
- </HoverCard>
1099
- </td>
1100
- );
1101
- })}
1102
- </tr>
1103
- );
1104
- })}
1105
- </tbody>
1106
- </table>
1107
- </div>
1108
- </Card>
1109
- </div>
1110
- );
1111
- }
1112
-
1113
- // Per-Task Performance Charts Component
1114
- interface TaskResult {
1115
- taskId: string;
1116
- category: string;
1117
- subcategory: string;
1118
- results: Array<{
1119
- modelName: string;
1120
- score: number;
1121
- functional: number;
1122
- quality: number;
1123
- passed: boolean;
1124
- tokens: number;
1125
- timeMs: number;
1126
- cost: number;
1127
- }>;
1128
- }
1129
-
1130
- function PerTaskCharts() {
1131
- const taskResults = taskResultsData.tasks as TaskResult[] || [];
1132
- const models = taskResultsData.models as string[] || [];
1133
- const categories = taskResultsData.categories as string[] || [];
1134
- const [selectedCategory, setSelectedCategory] = useState<string | null>(null);
1135
- const [selectedModels, setSelectedModels] = useState<string[]>(models.slice(0, 5));
1136
- const [viewMode, setViewMode] = useState<'chart' | 'heatmap'>('chart');
1137
-
1138
- const filteredTasks = selectedCategory
1139
- ? taskResults.filter(t => t.category === selectedCategory)
1140
- : taskResults.slice(0, 30);
1141
-
1142
- const chartData = {
1143
- labels: filteredTasks.map((_, i) => `Task ${i + 1}`),
1144
- datasets: selectedModels.map((modelName, idx) => ({
1145
- label: modelName.split(' ').slice(0, 2).join(' '),
1146
- data: filteredTasks.map(task => {
1147
- const result = task.results.find(r => r.modelName === modelName);
1148
- return result?.score || 0;
1149
- }),
1150
- borderColor: CHART_COLORS[idx % CHART_COLORS.length],
1151
- backgroundColor: CHART_COLORS[idx % CHART_COLORS.length] + '20',
1152
- borderWidth: 2,
1153
- tension: 0.3,
1154
- fill: false,
1155
- pointRadius: 3,
1156
- pointHoverRadius: 6,
1157
- })),
1158
- };
1159
-
1160
- const chartOptions = {
1161
- responsive: true,
1162
- maintainAspectRatio: false,
1163
- plugins: {
1164
- legend: { position: 'bottom' as const, labels: { usePointStyle: true, padding: 12, font: { size: 10 } } },
1165
- tooltip: {
1166
- backgroundColor: '#ffffff',
1167
- titleColor: '#0f172a',
1168
- bodyColor: '#64748b',
1169
- borderColor: '#e2e8f0',
1170
- borderWidth: 1,
1171
- padding: 12,
1172
- cornerRadius: 8,
1173
- callbacks: {
1174
- title: (items: any[]) => {
1175
- const idx = items[0]?.dataIndex;
1176
- if (idx !== undefined && filteredTasks[idx]) {
1177
- return filteredTasks[idx].taskId.split('/').pop() || `Task ${idx + 1}`;
1178
- }
1179
- return '';
1180
- },
1181
- },
1182
- },
1183
- },
1184
- scales: {
1185
- y: { beginAtZero: true, max: 100, grid: { color: '#f1f5f9' }, ticks: { color: '#64748b', font: { size: 11 } } },
1186
- x: { grid: { display: false }, ticks: { color: '#64748b', font: { size: 9 }, maxRotation: 0 } },
1187
- },
1188
- };
1189
-
1190
- // Heatmap data for selected models and tasks
1191
- const getScoreColor = (score: number) => {
1192
- if (score >= 90) return '#22c55e';
1193
- if (score >= 80) return '#84cc16';
1194
- if (score >= 70) return '#eab308';
1195
- if (score >= 60) return '#f97316';
1196
- return '#ef4444';
1197
- };
1198
-
1199
- return (
1200
- <div className="space-y-8">
1201
- <div className="flex flex-col md:flex-row md:items-end md:justify-between gap-4">
1202
- <div>
1203
- <h1 className="text-2xl font-bold text-slate-900">Per-Task Performance</h1>
1204
- <p className="text-sm text-slate-500 mt-1">Model performance on individual benchmark tasks</p>
1205
- </div>
1206
- <div className="flex gap-2">
1207
- <button
1208
- onClick={() => setViewMode('chart')}
1209
- className={`px-3 py-1.5 text-sm font-medium rounded-lg transition-all ${
1210
- viewMode === 'chart' ? 'bg-slate-900 text-white' : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
1211
- }`}
1212
- >
1213
- Line Chart
1214
- </button>
1215
- <button
1216
- onClick={() => setViewMode('heatmap')}
1217
- className={`px-3 py-1.5 text-sm font-medium rounded-lg transition-all ${
1218
- viewMode === 'heatmap' ? 'bg-slate-900 text-white' : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
1219
- }`}
1220
- >
1221
- Heatmap
1222
- </button>
1223
- </div>
1224
- </div>
1225
-
1226
- {/* Category Filter */}
1227
- <div className="flex flex-wrap gap-2">
1228
- <button
1229
- onClick={() => setSelectedCategory(null)}
1230
- className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
1231
- !selectedCategory ? 'bg-slate-900 text-white' : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
1232
- }`}
1233
- >
1234
- All Categories
1235
- </button>
1236
- {categories.map((cat) => (
1237
- <button
1238
- key={cat}
1239
- onClick={() => setSelectedCategory(selectedCategory === cat ? null : cat)}
1240
- className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
1241
- selectedCategory === cat ? 'bg-slate-900 text-white' : 'bg-slate-100 text-slate-600 hover:bg-slate-200'
1242
- }`}
1243
- >
1244
- {cat.replace(/-/g, ' ')}
1245
- </button>
1246
- ))}
1247
- </div>
1248
-
1249
- {/* Model Selector */}
1250
- <Card>
1251
- <CardContent className="p-5">
1252
- <div className="text-sm font-medium text-slate-700 mb-3">Select models to compare:</div>
1253
- <div className="flex flex-wrap gap-2">
1254
- {models.map((model, i) => (
1255
- <button
1256
- key={model}
1257
- onClick={() => {
1258
- if (selectedModels.includes(model)) {
1259
- setSelectedModels(selectedModels.filter(m => m !== model));
1260
- } else if (selectedModels.length < 7) {
1261
- setSelectedModels([...selectedModels, model]);
1262
- }
1263
- }}
1264
- className={`px-3 py-1.5 text-sm font-medium rounded-lg border-2 transition-all ${
1265
- selectedModels.includes(model)
1266
- ? 'text-white'
1267
- : 'border-slate-200 text-slate-600 hover:border-slate-300 bg-white'
1268
- }`}
1269
- style={{
1270
- backgroundColor: selectedModels.includes(model) ? CHART_COLORS[i % CHART_COLORS.length] : undefined,
1271
- borderColor: selectedModels.includes(model) ? CHART_COLORS[i % CHART_COLORS.length] : undefined,
1272
- }}
1273
- >
1274
- {model.split(' ').slice(0, 2).join(' ')}
1275
- </button>
1276
- ))}
1277
- </div>
1278
- </CardContent>
1279
- </Card>
1280
-
1281
- {viewMode === 'chart' ? (
1282
- <Card>
1283
- <CardHeader>
1284
- <CardTitle>Score Trend Across Tasks</CardTitle>
1285
- <CardDescription>
1286
- {selectedCategory ? `${selectedCategory.replace(/-/g, ' ')} - ${filteredTasks.length} tasks` : `Showing first 30 tasks`}
1287
- </CardDescription>
1288
- </CardHeader>
1289
- <CardContent>
1290
- <div className="h-96">
1291
- <Line data={chartData} options={chartOptions} />
1292
- </div>
1293
- </CardContent>
1294
- </Card>
1295
- ) : (
1296
- <Card>
1297
- <CardHeader>
1298
- <CardTitle>Score Heatmap</CardTitle>
1299
- <CardDescription>Color indicates score: green (90+) → yellow (70-80) → red (&lt;60)</CardDescription>
1300
- </CardHeader>
1301
- <div className="overflow-x-auto">
1302
- <table className="min-w-full text-xs">
1303
- <thead>
1304
- <tr className="bg-slate-50/50 border-b border-slate-100">
1305
- <th className="px-3 py-2 text-left font-semibold text-slate-600 sticky left-0 bg-slate-50">Task</th>
1306
- {selectedModels.map(model => (
1307
- <th key={model} className="px-2 py-2 text-center font-semibold text-slate-600" style={{ minWidth: 60 }}>
1308
- {model.split(' ')[0]}
1309
- </th>
1310
- ))}
1311
- </tr>
1312
- </thead>
1313
- <tbody className="divide-y divide-slate-100">
1314
- {filteredTasks.slice(0, 50).map((task, i) => (
1315
- <tr key={task.taskId} className="hover:bg-slate-50/50">
1316
- <td className="px-3 py-1.5 font-medium text-slate-700 sticky left-0 bg-white">
1317
- <HoverCard content={
1318
- <div className="space-y-2">
1319
- <div className="font-semibold text-slate-900">{task.taskId}</div>
1320
- <div className="text-xs text-slate-500">Category: {task.category}</div>
1321
- <div className="text-xs text-slate-500">Subcategory: {task.subcategory}</div>
1322
- </div>
1323
- }>
1324
- <span className="cursor-pointer hover:text-blue-600">T{i + 1}</span>
1325
- </HoverCard>
1326
- </td>
1327
- {selectedModels.map(modelName => {
1328
- const result = task.results.find(r => r.modelName === modelName);
1329
- const score = result?.score || 0;
1330
- return (
1331
- <td key={modelName} className="px-2 py-1.5 text-center">
1332
- <HoverCard content={
1333
- <div className="space-y-2">
1334
- <div className="font-semibold text-slate-900">{modelName}</div>
1335
- <div className="text-xs text-slate-500">{task.taskId.split('/').pop()}</div>
1336
- <div className="grid grid-cols-2 gap-2 text-xs pt-2">
1337
- <div>Score: <span className="font-semibold">{score.toFixed(1)}%</span></div>
1338
- <div>Passed: <span className={result?.passed ? 'text-emerald-600' : 'text-red-600'}>{result?.passed ? 'Yes' : 'No'}</span></div>
1339
- <div>Tokens: <span className="font-semibold">{((result?.tokens || 0) / 1000).toFixed(1)}K</span></div>
1340
- <div>Cost: <span className="font-semibold text-emerald-600">${(result?.cost || 0).toFixed(4)}</span></div>
1341
- </div>
1342
- </div>
1343
- }>
1344
- <div
1345
- className="w-8 h-6 rounded flex items-center justify-center text-white text-xs font-medium cursor-pointer mx-auto"
1346
- style={{ backgroundColor: getScoreColor(score) }}
1347
- >
1348
- {score.toFixed(0)}
1349
- </div>
1350
- </HoverCard>
1351
- </td>
1352
- );
1353
- })}
1354
- </tr>
1355
- ))}
1356
- </tbody>
1357
- </table>
1358
- </div>
1359
- </Card>
1360
- )}
1361
-
1362
- {/* Summary Stats */}
1363
- <div className="grid grid-cols-2 lg:grid-cols-4 gap-4">
1364
- {selectedModels.slice(0, 4).map((modelName, i) => {
1365
- const modelResults = filteredTasks.flatMap(t => t.results.filter(r => r.modelName === modelName));
1366
- const avgScore = modelResults.reduce((sum, r) => sum + r.score, 0) / (modelResults.length || 1);
1367
- const passCount = modelResults.filter(r => r.passed).length;
1368
- return (
1369
- <Card key={modelName} hover>
1370
- <CardContent className="p-4">
1371
- <div className="flex items-center gap-2 mb-2">
1372
- <div
1373
- className="w-3 h-3 rounded-full"
1374
- style={{ backgroundColor: CHART_COLORS[i % CHART_COLORS.length] }}
1375
- />
1376
- <span className="font-medium text-slate-900 text-sm">{modelName.split(' ').slice(0, 2).join(' ')}</span>
1377
- </div>
1378
- <div className="text-2xl font-bold text-slate-900">{avgScore.toFixed(1)}%</div>
1379
- <div className="text-xs text-slate-500">{passCount}/{modelResults.length} tasks passed</div>
1380
- </CardContent>
1381
- </Card>
1382
- );
1383
- })}
1384
- </div>
1385
- </div>
1386
- );
1387
- }
1388
-
1389
- // Navigation Link Component
1390
- function NavLink({ to, children }: { to: string; children: React.ReactNode }) {
1391
- const location = useLocation();
1392
- const isActive = location.pathname === to || (to !== '/' && location.pathname.startsWith(to));
1393
-
1394
- return (
1395
- <Link
1396
- to={to}
1397
- className={`px-4 py-2 text-sm font-medium rounded-lg transition-all ${
1398
- isActive
1399
- ? 'bg-slate-100 text-slate-900'
1400
- : 'text-slate-600 hover:bg-slate-50 hover:text-slate-900'
1401
- }`}
1402
- >
1403
- {children}
1404
- </Link>
1405
- );
1406
- }
1407
-
1408
- // Main App Component
1409
- export default function App() {
1410
- return (
1411
- <div className="min-h-screen bg-slate-50">
1412
- {/* Navigation */}
1413
- <nav className="bg-white border-b border-slate-200 sticky top-0 z-50">
1414
- <div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
1415
- <div className="flex items-center justify-between h-16">
1416
- <div className="flex items-center gap-8">
1417
- <Link to="/" className="flex items-center gap-3">
1418
- <img src="/favicon.svg" alt="VibeCodingBench" className="w-9 h-9" />
1419
- <span className="text-lg font-semibold text-slate-900">VibeCodingBench</span>
1420
- </Link>
1421
- <div className="hidden md:flex items-center gap-1">
1422
- <NavLink to="/">Leaderboard</NavLink>
1423
- <NavLink to="/charts">Analytics</NavLink>
1424
- <NavLink to="/task-performance">Categories</NavLink>
1425
- <NavLink to="/per-task">Per-Task</NavLink>
1426
- <NavLink to="/tasks">Tasks</NavLink>
1427
- </div>
1428
- </div>
1429
- <a
1430
- href="https://github.com/alt-research/vibe-coding-benchmark-public"
1431
- target="_blank"
1432
- rel="noopener noreferrer"
1433
- className="text-sm text-slate-500 hover:text-slate-900 flex items-center gap-2 transition-colors"
1434
- >
1435
- <svg className="w-5 h-5" fill="currentColor" viewBox="0 0 24 24">
1436
- <path fillRule="evenodd" d="M12 2C6.477 2 2 6.484 2 12.017c0 4.425 2.865 8.18 6.839 9.504.5.092.682-.217.682-.483 0-.237-.008-.868-.013-1.703-2.782.605-3.369-1.343-3.369-1.343-.454-1.158-1.11-1.466-1.11-1.466-.908-.62.069-.608.069-.608 1.003.07 1.531 1.032 1.531 1.032.892 1.53 2.341 1.088 2.91.832.092-.647.35-1.088.636-1.338-2.22-.253-4.555-1.113-4.555-4.951 0-1.093.39-1.988 1.029-2.688-.103-.253-.446-1.272.098-2.65 0 0 .84-.27 2.75 1.026A9.564 9.564 0 0112 6.844c.85.004 1.705.115 2.504.337 1.909-1.296 2.747-1.027 2.747-1.027.546 1.379.202 2.398.1 2.651.64.7 1.028 1.595 1.028 2.688 0 3.848-2.339 4.695-4.566 4.943.359.309.678.92.678 1.855 0 1.338-.012 2.419-.012 2.747 0 .268.18.58.688.482A10.019 10.019 0 0022 12.017C22 6.484 17.522 2 12 2z" clipRule="evenodd" />
1437
- </svg>
1438
- GitHub
1439
- </a>
1440
- </div>
1441
- </div>
1442
- </nav>
1443
-
1444
- {/* Main Content */}
1445
- <main className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
1446
- <Routes>
1447
- <Route path="/" element={<Leaderboard />} />
1448
- <Route path="/live" element={<LiveDashboard />} />
1449
- <Route path="/tasks" element={<Tasks />} />
1450
- <Route path="/charts" element={<Charts />} />
1451
- <Route path="/task-performance" element={<TaskPerformance />} />
1452
- <Route path="/per-task" element={<PerTaskCharts />} />
1453
- <Route path="*" element={<NotFound />} />
1454
- </Routes>
1455
- </main>
1456
-
1457
- {/* Footer */}
1458
- <footer className="border-t border-slate-200 bg-white mt-12">
1459
- <div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-6">
1460
- <div className="flex items-center justify-between text-sm text-slate-500">
1461
- <span>VibeCodingBench - AI Coding Agent Benchmark</span>
1462
- <span>180 tasks · 14 models · Updated Jan 2026</span>
1463
- </div>
1464
- </div>
1465
- </footer>
1466
- </div>
1467
- );
1468
- }