gsd-trae 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (763) hide show
  1. package/CHANGELOG.md +40 -0
  2. package/README.md +7 -76
  3. package/assets/screenshot.png +0 -0
  4. package/package.json +12 -3
  5. package/.claude/settings.local.json +0 -8
  6. package/.gitmodules +0 -6
  7. package/.trae/project_rules.md +0 -56
  8. package/.trae/rules/project_rules.md +0 -56
  9. package/.vscode/code-counter/code-counter.db +0 -0
  10. package/.vscode/settings.json +0 -5
  11. package/refs/gsd/.github/CODEOWNERS +0 -2
  12. package/refs/gsd/.github/FUNDING.yml +0 -1
  13. package/refs/gsd/.github/ISSUE_TEMPLATE/bug_report.yml +0 -59
  14. package/refs/gsd/.github/ISSUE_TEMPLATE/feature_request.yml +0 -37
  15. package/refs/gsd/.github/pull_request_template.md +0 -24
  16. package/refs/gsd/.github/workflows/auto-label-issues.yml +0 -21
  17. package/refs/gsd/CHANGELOG.md +0 -1520
  18. package/refs/gsd/LICENSE +0 -21
  19. package/refs/gsd/README.md +0 -704
  20. package/refs/gsd/SECURITY.md +0 -33
  21. package/refs/gsd/agents/gsd-codebase-mapper.md +0 -764
  22. package/refs/gsd/agents/gsd-debugger.md +0 -1246
  23. package/refs/gsd/agents/gsd-executor.md +0 -469
  24. package/refs/gsd/agents/gsd-integration-checker.md +0 -443
  25. package/refs/gsd/agents/gsd-phase-researcher.md +0 -546
  26. package/refs/gsd/agents/gsd-plan-checker.md +0 -690
  27. package/refs/gsd/agents/gsd-planner.md +0 -1275
  28. package/refs/gsd/agents/gsd-project-researcher.md +0 -621
  29. package/refs/gsd/agents/gsd-research-synthesizer.md +0 -239
  30. package/refs/gsd/agents/gsd-roadmapper.md +0 -642
  31. package/refs/gsd/agents/gsd-verifier.md +0 -573
  32. package/refs/gsd/assets/gsd-logo-2000-transparent.png +0 -0
  33. package/refs/gsd/assets/gsd-logo-2000-transparent.svg +0 -17
  34. package/refs/gsd/assets/gsd-logo-2000.png +0 -0
  35. package/refs/gsd/assets/gsd-logo-2000.svg +0 -21
  36. package/refs/gsd/assets/terminal.svg +0 -68
  37. package/refs/gsd/bin/install.js +0 -2090
  38. package/refs/gsd/commands/gsd/add-phase.md +0 -43
  39. package/refs/gsd/commands/gsd/add-tests.md +0 -41
  40. package/refs/gsd/commands/gsd/add-todo.md +0 -47
  41. package/refs/gsd/commands/gsd/audit-milestone.md +0 -36
  42. package/refs/gsd/commands/gsd/check-todos.md +0 -45
  43. package/refs/gsd/commands/gsd/cleanup.md +0 -18
  44. package/refs/gsd/commands/gsd/complete-milestone.md +0 -136
  45. package/refs/gsd/commands/gsd/debug.md +0 -167
  46. package/refs/gsd/commands/gsd/discuss-phase.md +0 -83
  47. package/refs/gsd/commands/gsd/execute-phase.md +0 -41
  48. package/refs/gsd/commands/gsd/health.md +0 -22
  49. package/refs/gsd/commands/gsd/help.md +0 -22
  50. package/refs/gsd/commands/gsd/insert-phase.md +0 -32
  51. package/refs/gsd/commands/gsd/join-discord.md +0 -18
  52. package/refs/gsd/commands/gsd/list-phase-assumptions.md +0 -46
  53. package/refs/gsd/commands/gsd/map-codebase.md +0 -71
  54. package/refs/gsd/commands/gsd/new-milestone.md +0 -44
  55. package/refs/gsd/commands/gsd/new-project.md +0 -42
  56. package/refs/gsd/commands/gsd/new-project.md.bak +0 -1041
  57. package/refs/gsd/commands/gsd/pause-work.md +0 -38
  58. package/refs/gsd/commands/gsd/plan-milestone-gaps.md +0 -34
  59. package/refs/gsd/commands/gsd/plan-phase.md +0 -45
  60. package/refs/gsd/commands/gsd/progress.md +0 -24
  61. package/refs/gsd/commands/gsd/quick.md +0 -41
  62. package/refs/gsd/commands/gsd/reapply-patches.md +0 -110
  63. package/refs/gsd/commands/gsd/remove-phase.md +0 -31
  64. package/refs/gsd/commands/gsd/research-phase.md +0 -189
  65. package/refs/gsd/commands/gsd/resume-work.md +0 -40
  66. package/refs/gsd/commands/gsd/set-profile.md +0 -34
  67. package/refs/gsd/commands/gsd/settings.md +0 -36
  68. package/refs/gsd/commands/gsd/update.md +0 -37
  69. package/refs/gsd/commands/gsd/verify-work.md +0 -38
  70. package/refs/gsd/docs/USER-GUIDE.md +0 -471
  71. package/refs/gsd/docs/context-monitor.md +0 -96
  72. package/refs/gsd/get-shit-done/bin/gsd-tools.cjs +0 -585
  73. package/refs/gsd/get-shit-done/bin/lib/commands.cjs +0 -553
  74. package/refs/gsd/get-shit-done/bin/lib/config.cjs +0 -162
  75. package/refs/gsd/get-shit-done/bin/lib/core.cjs +0 -411
  76. package/refs/gsd/get-shit-done/bin/lib/frontmatter.cjs +0 -299
  77. package/refs/gsd/get-shit-done/bin/lib/init.cjs +0 -710
  78. package/refs/gsd/get-shit-done/bin/lib/milestone.cjs +0 -215
  79. package/refs/gsd/get-shit-done/bin/lib/phase.cjs +0 -870
  80. package/refs/gsd/get-shit-done/bin/lib/roadmap.cjs +0 -298
  81. package/refs/gsd/get-shit-done/bin/lib/state.cjs +0 -521
  82. package/refs/gsd/get-shit-done/bin/lib/template.cjs +0 -222
  83. package/refs/gsd/get-shit-done/bin/lib/verify.cjs +0 -772
  84. package/refs/gsd/get-shit-done/references/checkpoints.md +0 -776
  85. package/refs/gsd/get-shit-done/references/continuation-format.md +0 -249
  86. package/refs/gsd/get-shit-done/references/decimal-phase-calculation.md +0 -65
  87. package/refs/gsd/get-shit-done/references/git-integration.md +0 -248
  88. package/refs/gsd/get-shit-done/references/git-planning-commit.md +0 -38
  89. package/refs/gsd/get-shit-done/references/model-profile-resolution.md +0 -34
  90. package/refs/gsd/get-shit-done/references/model-profiles.md +0 -92
  91. package/refs/gsd/get-shit-done/references/phase-argument-parsing.md +0 -61
  92. package/refs/gsd/get-shit-done/references/planning-config.md +0 -196
  93. package/refs/gsd/get-shit-done/references/questioning.md +0 -145
  94. package/refs/gsd/get-shit-done/references/tdd.md +0 -263
  95. package/refs/gsd/get-shit-done/references/ui-brand.md +0 -160
  96. package/refs/gsd/get-shit-done/references/verification-patterns.md +0 -612
  97. package/refs/gsd/get-shit-done/templates/DEBUG.md +0 -164
  98. package/refs/gsd/get-shit-done/templates/UAT.md +0 -247
  99. package/refs/gsd/get-shit-done/templates/VALIDATION.md +0 -76
  100. package/refs/gsd/get-shit-done/templates/codebase/architecture.md +0 -255
  101. package/refs/gsd/get-shit-done/templates/codebase/concerns.md +0 -310
  102. package/refs/gsd/get-shit-done/templates/codebase/conventions.md +0 -307
  103. package/refs/gsd/get-shit-done/templates/codebase/integrations.md +0 -280
  104. package/refs/gsd/get-shit-done/templates/codebase/stack.md +0 -186
  105. package/refs/gsd/get-shit-done/templates/codebase/structure.md +0 -285
  106. package/refs/gsd/get-shit-done/templates/codebase/testing.md +0 -480
  107. package/refs/gsd/get-shit-done/templates/config.json +0 -37
  108. package/refs/gsd/get-shit-done/templates/context.md +0 -283
  109. package/refs/gsd/get-shit-done/templates/continue-here.md +0 -78
  110. package/refs/gsd/get-shit-done/templates/debug-subagent-prompt.md +0 -91
  111. package/refs/gsd/get-shit-done/templates/discovery.md +0 -146
  112. package/refs/gsd/get-shit-done/templates/milestone-archive.md +0 -123
  113. package/refs/gsd/get-shit-done/templates/milestone.md +0 -115
  114. package/refs/gsd/get-shit-done/templates/phase-prompt.md +0 -569
  115. package/refs/gsd/get-shit-done/templates/planner-subagent-prompt.md +0 -117
  116. package/refs/gsd/get-shit-done/templates/project.md +0 -184
  117. package/refs/gsd/get-shit-done/templates/requirements.md +0 -231
  118. package/refs/gsd/get-shit-done/templates/research-project/ARCHITECTURE.md +0 -204
  119. package/refs/gsd/get-shit-done/templates/research-project/FEATURES.md +0 -147
  120. package/refs/gsd/get-shit-done/templates/research-project/PITFALLS.md +0 -200
  121. package/refs/gsd/get-shit-done/templates/research-project/STACK.md +0 -120
  122. package/refs/gsd/get-shit-done/templates/research-project/SUMMARY.md +0 -170
  123. package/refs/gsd/get-shit-done/templates/research.md +0 -552
  124. package/refs/gsd/get-shit-done/templates/retrospective.md +0 -54
  125. package/refs/gsd/get-shit-done/templates/roadmap.md +0 -202
  126. package/refs/gsd/get-shit-done/templates/state.md +0 -176
  127. package/refs/gsd/get-shit-done/templates/summary-complex.md +0 -59
  128. package/refs/gsd/get-shit-done/templates/summary-minimal.md +0 -41
  129. package/refs/gsd/get-shit-done/templates/summary-standard.md +0 -48
  130. package/refs/gsd/get-shit-done/templates/summary.md +0 -248
  131. package/refs/gsd/get-shit-done/templates/user-setup.md +0 -311
  132. package/refs/gsd/get-shit-done/templates/verification-report.md +0 -322
  133. package/refs/gsd/get-shit-done/workflows/add-phase.md +0 -111
  134. package/refs/gsd/get-shit-done/workflows/add-tests.md +0 -350
  135. package/refs/gsd/get-shit-done/workflows/add-todo.md +0 -157
  136. package/refs/gsd/get-shit-done/workflows/audit-milestone.md +0 -297
  137. package/refs/gsd/get-shit-done/workflows/check-todos.md +0 -176
  138. package/refs/gsd/get-shit-done/workflows/cleanup.md +0 -152
  139. package/refs/gsd/get-shit-done/workflows/complete-milestone.md +0 -763
  140. package/refs/gsd/get-shit-done/workflows/diagnose-issues.md +0 -219
  141. package/refs/gsd/get-shit-done/workflows/discovery-phase.md +0 -289
  142. package/refs/gsd/get-shit-done/workflows/discuss-phase.md +0 -542
  143. package/refs/gsd/get-shit-done/workflows/execute-phase.md +0 -449
  144. package/refs/gsd/get-shit-done/workflows/execute-plan.md +0 -448
  145. package/refs/gsd/get-shit-done/workflows/health.md +0 -156
  146. package/refs/gsd/get-shit-done/workflows/help.md +0 -489
  147. package/refs/gsd/get-shit-done/workflows/insert-phase.md +0 -129
  148. package/refs/gsd/get-shit-done/workflows/list-phase-assumptions.md +0 -178
  149. package/refs/gsd/get-shit-done/workflows/map-codebase.md +0 -315
  150. package/refs/gsd/get-shit-done/workflows/new-milestone.md +0 -382
  151. package/refs/gsd/get-shit-done/workflows/new-project.md +0 -1116
  152. package/refs/gsd/get-shit-done/workflows/pause-work.md +0 -122
  153. package/refs/gsd/get-shit-done/workflows/plan-milestone-gaps.md +0 -274
  154. package/refs/gsd/get-shit-done/workflows/plan-phase.md +0 -569
  155. package/refs/gsd/get-shit-done/workflows/progress.md +0 -381
  156. package/refs/gsd/get-shit-done/workflows/quick.md +0 -453
  157. package/refs/gsd/get-shit-done/workflows/remove-phase.md +0 -154
  158. package/refs/gsd/get-shit-done/workflows/research-phase.md +0 -73
  159. package/refs/gsd/get-shit-done/workflows/resume-project.md +0 -306
  160. package/refs/gsd/get-shit-done/workflows/set-profile.md +0 -80
  161. package/refs/gsd/get-shit-done/workflows/settings.md +0 -213
  162. package/refs/gsd/get-shit-done/workflows/transition.md +0 -544
  163. package/refs/gsd/get-shit-done/workflows/update.md +0 -219
  164. package/refs/gsd/get-shit-done/workflows/verify-phase.md +0 -242
  165. package/refs/gsd/get-shit-done/workflows/verify-work.md +0 -569
  166. package/refs/gsd/hooks/gsd-check-update.js +0 -62
  167. package/refs/gsd/hooks/gsd-context-monitor.js +0 -122
  168. package/refs/gsd/hooks/gsd-statusline.js +0 -108
  169. package/refs/gsd/package.json +0 -50
  170. package/refs/gsd/scripts/build-hooks.js +0 -43
  171. package/refs/gsd/tests/commands.test.cjs +0 -661
  172. package/refs/gsd/tests/helpers.cjs +0 -40
  173. package/refs/gsd/tests/init.test.cjs +0 -205
  174. package/refs/gsd/tests/milestone.test.cjs +0 -98
  175. package/refs/gsd/tests/phase.test.cjs +0 -1241
  176. package/refs/gsd/tests/roadmap.test.cjs +0 -265
  177. package/refs/gsd/tests/state.test.cjs +0 -302
  178. package/refs/gsd/tests/verify.test.cjs +0 -80
  179. package/refs/vbenchmark/.agent/agents/codebase-explorer.md +0 -224
  180. package/refs/vbenchmark/.agent/agents/debugger.md +0 -180
  181. package/refs/vbenchmark/.agent/agents/documenter.md +0 -166
  182. package/refs/vbenchmark/.agent/agents/implementer.md +0 -70
  183. package/refs/vbenchmark/.agent/agents/orchestrator.md +0 -212
  184. package/refs/vbenchmark/.agent/agents/researcher.md +0 -80
  185. package/refs/vbenchmark/.agent/agents/reviewer.md +0 -184
  186. package/refs/vbenchmark/.agent/agents/tester.md +0 -170
  187. package/refs/vbenchmark/.agent/commands/commit.md +0 -29
  188. package/refs/vbenchmark/.agent/commands/debug.md +0 -59
  189. package/refs/vbenchmark/.agent/commands/document.md +0 -52
  190. package/refs/vbenchmark/.agent/commands/gather-context.md +0 -58
  191. package/refs/vbenchmark/.agent/commands/init.md +0 -56
  192. package/refs/vbenchmark/.agent/commands/preset-help.md +0 -50
  193. package/refs/vbenchmark/.agent/commands/refactor.md +0 -71
  194. package/refs/vbenchmark/.agent/commands/research.md +0 -37
  195. package/refs/vbenchmark/.agent/commands/review.md +0 -38
  196. package/refs/vbenchmark/.agent/commands/test.md +0 -61
  197. package/refs/vbenchmark/.agent/rules/01-code-quality.md +0 -33
  198. package/refs/vbenchmark/.agent/rules/02-typescript-go.md +0 -46
  199. package/refs/vbenchmark/.agent/rules/03-security-git.md +0 -34
  200. package/refs/vbenchmark/.agent/rules/04-architecture.md +0 -40
  201. package/refs/vbenchmark/.agent/sync.js +0 -536
  202. package/refs/vbenchmark/.agent/workflows/commit.md +0 -29
  203. package/refs/vbenchmark/.agent/workflows/debug.md +0 -59
  204. package/refs/vbenchmark/.agent/workflows/document.md +0 -52
  205. package/refs/vbenchmark/.agent/workflows/gather-context.md +0 -58
  206. package/refs/vbenchmark/.agent/workflows/init.md +0 -56
  207. package/refs/vbenchmark/.agent/workflows/preset-help.md +0 -50
  208. package/refs/vbenchmark/.agent/workflows/refactor.md +0 -71
  209. package/refs/vbenchmark/.agent/workflows/research.md +0 -37
  210. package/refs/vbenchmark/.agent/workflows/review.md +0 -38
  211. package/refs/vbenchmark/.agent/workflows/test.md +0 -61
  212. package/refs/vbenchmark/.claude/commands/agentic-dev/apply.md +0 -222
  213. package/refs/vbenchmark/.claude/commands/agentic-dev/done.md +0 -166
  214. package/refs/vbenchmark/.claude/commands/agentic-dev/proposal.md +0 -220
  215. package/refs/vbenchmark/.claude/commands/openspec/apply.md +0 -23
  216. package/refs/vbenchmark/.claude/commands/openspec/archive.md +0 -27
  217. package/refs/vbenchmark/.claude/commands/openspec/proposal.md +0 -28
  218. package/refs/vbenchmark/.clinerules/01-rules.md +0 -73
  219. package/refs/vbenchmark/.clinerules/02-agents.md +0 -34
  220. package/refs/vbenchmark/.cursor/commands/commit.md +0 -29
  221. package/refs/vbenchmark/.cursor/commands/debug.md +0 -59
  222. package/refs/vbenchmark/.cursor/commands/document.md +0 -52
  223. package/refs/vbenchmark/.cursor/commands/gather-context.md +0 -58
  224. package/refs/vbenchmark/.cursor/commands/init.md +0 -56
  225. package/refs/vbenchmark/.cursor/commands/preset-help.md +0 -50
  226. package/refs/vbenchmark/.cursor/commands/refactor.md +0 -71
  227. package/refs/vbenchmark/.cursor/commands/research.md +0 -37
  228. package/refs/vbenchmark/.cursor/commands/review.md +0 -38
  229. package/refs/vbenchmark/.cursor/commands/test.md +0 -61
  230. package/refs/vbenchmark/.cursor/rules/agents.mdc +0 -1357
  231. package/refs/vbenchmark/.factory/droids/codebase-explorer.md +0 -224
  232. package/refs/vbenchmark/.factory/droids/debugger.md +0 -180
  233. package/refs/vbenchmark/.factory/droids/documenter.md +0 -166
  234. package/refs/vbenchmark/.factory/droids/implementer.md +0 -70
  235. package/refs/vbenchmark/.factory/droids/orchestrator.md +0 -212
  236. package/refs/vbenchmark/.factory/droids/researcher.md +0 -80
  237. package/refs/vbenchmark/.factory/droids/reviewer.md +0 -184
  238. package/refs/vbenchmark/.factory/droids/tester.md +0 -170
  239. package/refs/vbenchmark/.gemini/workflows/commit.md +0 -29
  240. package/refs/vbenchmark/.gemini/workflows/debug.md +0 -59
  241. package/refs/vbenchmark/.gemini/workflows/document.md +0 -52
  242. package/refs/vbenchmark/.gemini/workflows/gather-context.md +0 -58
  243. package/refs/vbenchmark/.gemini/workflows/init.md +0 -56
  244. package/refs/vbenchmark/.gemini/workflows/preset-help.md +0 -50
  245. package/refs/vbenchmark/.gemini/workflows/refactor.md +0 -71
  246. package/refs/vbenchmark/.gemini/workflows/research.md +0 -37
  247. package/refs/vbenchmark/.gemini/workflows/review.md +0 -38
  248. package/refs/vbenchmark/.gemini/workflows/test.md +0 -61
  249. package/refs/vbenchmark/.github/CODEOWNERS +0 -20
  250. package/refs/vbenchmark/.github/FUNDING.yml +0 -4
  251. package/refs/vbenchmark/.github/ISSUE_TEMPLATE/bug-report.yml +0 -76
  252. package/refs/vbenchmark/.github/ISSUE_TEMPLATE/new-task.yml +0 -106
  253. package/refs/vbenchmark/.github/PULL_REQUEST_TEMPLATE.md +0 -38
  254. package/refs/vbenchmark/.github/copilot-instructions.md +0 -73
  255. package/refs/vbenchmark/.github/workflows/ci.yaml +0 -33
  256. package/refs/vbenchmark/.github/workflows/vercel-auto-pr.yml +0 -478
  257. package/refs/vbenchmark/.github/workflows/vercel-deploy.yaml +0 -487
  258. package/refs/vbenchmark/.github/workflows/vercel-pr-command.yaml +0 -337
  259. package/refs/vbenchmark/.github/workflows/vercel-project-init.yaml +0 -208
  260. package/refs/vbenchmark/.opencode/agent/codebase-explorer.md +0 -224
  261. package/refs/vbenchmark/.opencode/agent/debugger.md +0 -180
  262. package/refs/vbenchmark/.opencode/agent/documenter.md +0 -166
  263. package/refs/vbenchmark/.opencode/agent/implementer.md +0 -70
  264. package/refs/vbenchmark/.opencode/agent/orchestrator.md +0 -212
  265. package/refs/vbenchmark/.opencode/agent/researcher.md +0 -80
  266. package/refs/vbenchmark/.opencode/agent/reviewer.md +0 -184
  267. package/refs/vbenchmark/.opencode/agent/tester.md +0 -170
  268. package/refs/vbenchmark/.opencode/command/commit.md +0 -29
  269. package/refs/vbenchmark/.opencode/command/debug.md +0 -59
  270. package/refs/vbenchmark/.opencode/command/document.md +0 -52
  271. package/refs/vbenchmark/.opencode/command/gather-context.md +0 -58
  272. package/refs/vbenchmark/.opencode/command/init.md +0 -56
  273. package/refs/vbenchmark/.opencode/command/preset-help.md +0 -50
  274. package/refs/vbenchmark/.opencode/command/refactor.md +0 -71
  275. package/refs/vbenchmark/.opencode/command/research.md +0 -37
  276. package/refs/vbenchmark/.opencode/command/review.md +0 -38
  277. package/refs/vbenchmark/.opencode/command/test.md +0 -61
  278. package/refs/vbenchmark/.trae/project_rules.md +0 -73
  279. package/refs/vbenchmark/.windsurf/rules/rules.md +0 -85
  280. package/refs/vbenchmark/AGENTS.md +0 -73
  281. package/refs/vbenchmark/CONTRIBUTING.md +0 -332
  282. package/refs/vbenchmark/Caddyfile +0 -3
  283. package/refs/vbenchmark/LICENSE +0 -47
  284. package/refs/vbenchmark/README.md +0 -354
  285. package/refs/vbenchmark/docker-compose.prod.yaml +0 -35
  286. package/refs/vbenchmark/docker-compose.yaml +0 -53
  287. package/refs/vbenchmark/docs/TASK_EXPANSION_PLAN.md +0 -211
  288. package/refs/vbenchmark/docs/THESIS.md +0 -441
  289. package/refs/vbenchmark/docs/categories/code-evolution.md +0 -138
  290. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/design.md +0 -111
  291. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/proposal.md +0 -15
  292. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/evaluation/spec.md +0 -105
  293. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/leaderboard/spec.md +0 -68
  294. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/task-definition/spec.md +0 -45
  295. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/task-runner/spec.md +0 -49
  296. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/tasks.md +0 -413
  297. package/refs/vbenchmark/package.json +0 -51
  298. package/refs/vbenchmark/packages/cli/eslint.config.js +0 -16
  299. package/refs/vbenchmark/packages/cli/package.json +0 -35
  300. package/refs/vbenchmark/packages/cli/src/agents/index.ts +0 -655
  301. package/refs/vbenchmark/packages/cli/src/commands/eval.ts +0 -197
  302. package/refs/vbenchmark/packages/cli/src/commands/list.ts +0 -63
  303. package/refs/vbenchmark/packages/cli/src/commands/run.ts +0 -147
  304. package/refs/vbenchmark/packages/cli/src/evaluator.ts +0 -125
  305. package/refs/vbenchmark/packages/cli/src/index.ts +0 -21
  306. package/refs/vbenchmark/packages/cli/src/lib/task-variation.ts +0 -153
  307. package/refs/vbenchmark/packages/cli/src/loader.ts +0 -258
  308. package/refs/vbenchmark/packages/cli/src/reporter.ts +0 -222
  309. package/refs/vbenchmark/packages/cli/src/runtime/docker.ts +0 -385
  310. package/refs/vbenchmark/packages/cli/tsconfig.json +0 -8
  311. package/refs/vbenchmark/packages/dashboard/Dockerfile +0 -42
  312. package/refs/vbenchmark/packages/dashboard/index.html +0 -21
  313. package/refs/vbenchmark/packages/dashboard/package.json +0 -29
  314. package/refs/vbenchmark/packages/dashboard/postcss.config.js +0 -6
  315. package/refs/vbenchmark/packages/dashboard/public/favicon.svg +0 -24
  316. package/refs/vbenchmark/packages/dashboard/public/logo.png +0 -0
  317. package/refs/vbenchmark/packages/dashboard/public/logo.svg +0 -39
  318. package/refs/vbenchmark/packages/dashboard/src/App.tsx +0 -1468
  319. package/refs/vbenchmark/packages/dashboard/src/data/category-performance.json +0 -1
  320. package/refs/vbenchmark/packages/dashboard/src/data/leaderboard.json +0 -1
  321. package/refs/vbenchmark/packages/dashboard/src/data/task-results.json +0 -1
  322. package/refs/vbenchmark/packages/dashboard/src/data/tasks.json +0 -1
  323. package/refs/vbenchmark/packages/dashboard/src/index.css +0 -3
  324. package/refs/vbenchmark/packages/dashboard/src/main.tsx +0 -13
  325. package/refs/vbenchmark/packages/dashboard/src/vite-env.d.ts +0 -9
  326. package/refs/vbenchmark/packages/dashboard/tailwind.config.js +0 -11
  327. package/refs/vbenchmark/packages/dashboard/tsconfig.json +0 -21
  328. package/refs/vbenchmark/packages/dashboard/tsconfig.node.json +0 -11
  329. package/refs/vbenchmark/packages/dashboard/vercel.json +0 -6
  330. package/refs/vbenchmark/packages/dashboard/vite.config.ts +0 -28
  331. package/refs/vbenchmark/packages/evaluator/eslint.config.js +0 -16
  332. package/refs/vbenchmark/packages/evaluator/package.json +0 -24
  333. package/refs/vbenchmark/packages/evaluator/src/index.ts +0 -15
  334. package/refs/vbenchmark/packages/evaluator/src/runners/functional.ts +0 -88
  335. package/refs/vbenchmark/packages/evaluator/src/runners/quality.ts +0 -140
  336. package/refs/vbenchmark/packages/evaluator/src/runners/security.ts +0 -94
  337. package/refs/vbenchmark/packages/evaluator/src/runners/visual.ts +0 -108
  338. package/refs/vbenchmark/packages/evaluator/src/types.d.ts +0 -19
  339. package/refs/vbenchmark/packages/evaluator/tsconfig.json +0 -8
  340. package/refs/vbenchmark/packages/leaderboard/Dockerfile +0 -38
  341. package/refs/vbenchmark/packages/leaderboard/drizzle.config.ts +0 -10
  342. package/refs/vbenchmark/packages/leaderboard/eslint.config.js +0 -16
  343. package/refs/vbenchmark/packages/leaderboard/fly.toml +0 -29
  344. package/refs/vbenchmark/packages/leaderboard/package.json +0 -36
  345. package/refs/vbenchmark/packages/leaderboard/src/app.ts +0 -29
  346. package/refs/vbenchmark/packages/leaderboard/src/components/BrowserPreview.tsx +0 -190
  347. package/refs/vbenchmark/packages/leaderboard/src/components/ComparisonView.tsx +0 -205
  348. package/refs/vbenchmark/packages/leaderboard/src/components/LeaderboardTable.tsx +0 -150
  349. package/refs/vbenchmark/packages/leaderboard/src/components/LiveRunCard.tsx +0 -133
  350. package/refs/vbenchmark/packages/leaderboard/src/components/SubmissionForm.tsx +0 -406
  351. package/refs/vbenchmark/packages/leaderboard/src/components/SubmitForm.tsx +0 -293
  352. package/refs/vbenchmark/packages/leaderboard/src/components/TerminalStream.tsx +0 -111
  353. package/refs/vbenchmark/packages/leaderboard/src/config/pricing.ts +0 -206
  354. package/refs/vbenchmark/packages/leaderboard/src/db/index.ts +0 -31
  355. package/refs/vbenchmark/packages/leaderboard/src/db/schema.ts +0 -125
  356. package/refs/vbenchmark/packages/leaderboard/src/index.ts +0 -13
  357. package/refs/vbenchmark/packages/leaderboard/src/lib/websocket.ts +0 -124
  358. package/refs/vbenchmark/packages/leaderboard/src/routes/leaderboard.ts +0 -698
  359. package/refs/vbenchmark/packages/leaderboard/src/routes/live.ts +0 -175
  360. package/refs/vbenchmark/packages/leaderboard/src/routes/submissions.ts +0 -183
  361. package/refs/vbenchmark/packages/leaderboard/src/routes/tasks.ts +0 -215
  362. package/refs/vbenchmark/packages/leaderboard/tests/api.test.ts +0 -228
  363. package/refs/vbenchmark/packages/leaderboard/tsconfig.json +0 -9
  364. package/refs/vbenchmark/scripts/deploy.sh +0 -70
  365. package/refs/vbenchmark/tasks/ai-integration/advanced/context-management/PROMPT.md +0 -15
  366. package/refs/vbenchmark/tasks/ai-integration/advanced/context-management/task.yaml +0 -16
  367. package/refs/vbenchmark/tasks/ai-integration/advanced/evaluation-framework/PROMPT.md +0 -15
  368. package/refs/vbenchmark/tasks/ai-integration/advanced/evaluation-framework/task.yaml +0 -16
  369. package/refs/vbenchmark/tasks/ai-integration/advanced/guardrails-safety/PROMPT.md +0 -15
  370. package/refs/vbenchmark/tasks/ai-integration/advanced/guardrails-safety/task.yaml +0 -16
  371. package/refs/vbenchmark/tasks/ai-integration/advanced/memory-system/PROMPT.md +0 -15
  372. package/refs/vbenchmark/tasks/ai-integration/advanced/memory-system/task.yaml +0 -16
  373. package/refs/vbenchmark/tasks/ai-integration/advanced/model-routing/PROMPT.md +0 -15
  374. package/refs/vbenchmark/tasks/ai-integration/advanced/model-routing/task.yaml +0 -16
  375. package/refs/vbenchmark/tasks/ai-integration/advanced/multi-agent-system/PROMPT.md +0 -15
  376. package/refs/vbenchmark/tasks/ai-integration/advanced/multi-agent-system/task.yaml +0 -16
  377. package/refs/vbenchmark/tasks/ai-integration/advanced/prompt-optimization/PROMPT.md +0 -15
  378. package/refs/vbenchmark/tasks/ai-integration/advanced/prompt-optimization/task.yaml +0 -16
  379. package/refs/vbenchmark/tasks/ai-integration/advanced/reasoning-chain/PROMPT.md +0 -15
  380. package/refs/vbenchmark/tasks/ai-integration/advanced/reasoning-chain/task.yaml +0 -16
  381. package/refs/vbenchmark/tasks/ai-integration/advanced/streaming-pipeline/PROMPT.md +0 -15
  382. package/refs/vbenchmark/tasks/ai-integration/advanced/streaming-pipeline/task.yaml +0 -16
  383. package/refs/vbenchmark/tasks/ai-integration/advanced/tool-use-orchestration/PROMPT.md +0 -15
  384. package/refs/vbenchmark/tasks/ai-integration/advanced/tool-use-orchestration/task.yaml +0 -16
  385. package/refs/vbenchmark/tasks/ai-integration/agents/code-review-agent/PROMPT.md +0 -64
  386. package/refs/vbenchmark/tasks/ai-integration/agents/code-review-agent/task.yaml +0 -24
  387. package/refs/vbenchmark/tasks/ai-integration/agents/research-agent/PROMPT.md +0 -61
  388. package/refs/vbenchmark/tasks/ai-integration/agents/research-agent/task.yaml +0 -24
  389. package/refs/vbenchmark/tasks/ai-integration/agents/web-scraper-agent/PROMPT.md +0 -57
  390. package/refs/vbenchmark/tasks/ai-integration/agents/web-scraper-agent/task.yaml +0 -24
  391. package/refs/vbenchmark/tasks/ai-integration/embeddings/duplicate-detection/PROMPT.md +0 -50
  392. package/refs/vbenchmark/tasks/ai-integration/embeddings/duplicate-detection/task.yaml +0 -24
  393. package/refs/vbenchmark/tasks/ai-integration/embeddings/recommendation-engine/PROMPT.md +0 -51
  394. package/refs/vbenchmark/tasks/ai-integration/embeddings/recommendation-engine/task.yaml +0 -24
  395. package/refs/vbenchmark/tasks/ai-integration/embeddings/semantic-search/PROMPT.md +0 -50
  396. package/refs/vbenchmark/tasks/ai-integration/embeddings/semantic-search/task.yaml +0 -24
  397. package/refs/vbenchmark/tasks/ai-integration/fine-tuning/classification-model/PROMPT.md +0 -50
  398. package/refs/vbenchmark/tasks/ai-integration/fine-tuning/classification-model/task.yaml +0 -24
  399. package/refs/vbenchmark/tasks/ai-integration/function-calling/api-orchestrator/PROMPT.md +0 -60
  400. package/refs/vbenchmark/tasks/ai-integration/function-calling/api-orchestrator/task.yaml +0 -24
  401. package/refs/vbenchmark/tasks/ai-integration/function-calling/calendar-assistant/PROMPT.md +0 -50
  402. package/refs/vbenchmark/tasks/ai-integration/function-calling/calendar-assistant/task.yaml +0 -24
  403. package/refs/vbenchmark/tasks/ai-integration/function-calling/database-query/PROMPT.md +0 -62
  404. package/refs/vbenchmark/tasks/ai-integration/function-calling/database-query/task.yaml +0 -24
  405. package/refs/vbenchmark/tasks/ai-integration/multimodal/chart-interpreter/PROMPT.md +0 -60
  406. package/refs/vbenchmark/tasks/ai-integration/multimodal/chart-interpreter/task.yaml +0 -24
  407. package/refs/vbenchmark/tasks/ai-integration/multimodal/image-captioning/PROMPT.md +0 -49
  408. package/refs/vbenchmark/tasks/ai-integration/multimodal/image-captioning/task.yaml +0 -24
  409. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/code-assistant/PROMPT.md +0 -51
  410. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/code-assistant/task.yaml +0 -24
  411. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/doc-search/PROMPT.md +0 -51
  412. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/doc-search/task.yaml +0 -24
  413. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/PROMPT.md +0 -76
  414. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/docker-compose.yaml +0 -30
  415. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/task.yaml +0 -30
  416. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/tests/functional/qa.test.py +0 -146
  417. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/support-bot/PROMPT.md +0 -51
  418. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/support-bot/task.yaml +0 -24
  419. package/refs/vbenchmark/tasks/ai-integration/structured-output/contract-analyzer/PROMPT.md +0 -67
  420. package/refs/vbenchmark/tasks/ai-integration/structured-output/contract-analyzer/task.yaml +0 -24
  421. package/refs/vbenchmark/tasks/ai-integration/structured-output/invoice-parser/PROMPT.md +0 -61
  422. package/refs/vbenchmark/tasks/ai-integration/structured-output/invoice-parser/task.yaml +0 -27
  423. package/refs/vbenchmark/tasks/ai-integration/structured-output/receipt-scanner/PROMPT.md +0 -65
  424. package/refs/vbenchmark/tasks/ai-integration/structured-output/receipt-scanner/task.yaml +0 -24
  425. package/refs/vbenchmark/tasks/ai-integration/structured-output/resume-parser/PROMPT.md +0 -70
  426. package/refs/vbenchmark/tasks/ai-integration/structured-output/resume-parser/task.yaml +0 -24
  427. package/refs/vbenchmark/tasks/api-integrations/advanced/api-analytics/PROMPT.md +0 -15
  428. package/refs/vbenchmark/tasks/api-integrations/advanced/api-analytics/task.yaml +0 -16
  429. package/refs/vbenchmark/tasks/api-integrations/advanced/api-gateway/PROMPT.md +0 -15
  430. package/refs/vbenchmark/tasks/api-integrations/advanced/api-gateway/task.yaml +0 -16
  431. package/refs/vbenchmark/tasks/api-integrations/advanced/api-mocking/PROMPT.md +0 -15
  432. package/refs/vbenchmark/tasks/api-integrations/advanced/api-mocking/task.yaml +0 -16
  433. package/refs/vbenchmark/tasks/api-integrations/advanced/contract-testing/PROMPT.md +0 -15
  434. package/refs/vbenchmark/tasks/api-integrations/advanced/contract-testing/task.yaml +0 -16
  435. package/refs/vbenchmark/tasks/api-integrations/advanced/graphql-federation/PROMPT.md +0 -15
  436. package/refs/vbenchmark/tasks/api-integrations/advanced/graphql-federation/task.yaml +0 -16
  437. package/refs/vbenchmark/tasks/api-integrations/advanced/grpc-gateway/PROMPT.md +0 -15
  438. package/refs/vbenchmark/tasks/api-integrations/advanced/grpc-gateway/task.yaml +0 -16
  439. package/refs/vbenchmark/tasks/api-integrations/advanced/rate-limiter/PROMPT.md +0 -15
  440. package/refs/vbenchmark/tasks/api-integrations/advanced/rate-limiter/task.yaml +0 -16
  441. package/refs/vbenchmark/tasks/api-integrations/advanced/request-validator/PROMPT.md +0 -15
  442. package/refs/vbenchmark/tasks/api-integrations/advanced/request-validator/task.yaml +0 -16
  443. package/refs/vbenchmark/tasks/api-integrations/advanced/sdk-generator/PROMPT.md +0 -15
  444. package/refs/vbenchmark/tasks/api-integrations/advanced/sdk-generator/task.yaml +0 -16
  445. package/refs/vbenchmark/tasks/api-integrations/advanced/webhook-processor/PROMPT.md +0 -15
  446. package/refs/vbenchmark/tasks/api-integrations/advanced/webhook-processor/task.yaml +0 -16
  447. package/refs/vbenchmark/tasks/api-integrations/analytics/mixpanel-events/PROMPT.md +0 -42
  448. package/refs/vbenchmark/tasks/api-integrations/analytics/mixpanel-events/task.yaml +0 -24
  449. package/refs/vbenchmark/tasks/api-integrations/analytics/segment-tracking/PROMPT.md +0 -42
  450. package/refs/vbenchmark/tasks/api-integrations/analytics/segment-tracking/task.yaml +0 -24
  451. package/refs/vbenchmark/tasks/api-integrations/auth-provider/oauth2-github/PROMPT.md +0 -42
  452. package/refs/vbenchmark/tasks/api-integrations/auth-provider/oauth2-github/task.yaml +0 -24
  453. package/refs/vbenchmark/tasks/api-integrations/auth-provider/okta-integration/PROMPT.md +0 -44
  454. package/refs/vbenchmark/tasks/api-integrations/auth-provider/okta-integration/task.yaml +0 -24
  455. package/refs/vbenchmark/tasks/api-integrations/auth-provider/saml-sso/PROMPT.md +0 -42
  456. package/refs/vbenchmark/tasks/api-integrations/auth-provider/saml-sso/task.yaml +0 -24
  457. package/refs/vbenchmark/tasks/api-integrations/communication/discord-webhook/PROMPT.md +0 -44
  458. package/refs/vbenchmark/tasks/api-integrations/communication/discord-webhook/task.yaml +0 -24
  459. package/refs/vbenchmark/tasks/api-integrations/communication/slack-bot/PROMPT.md +0 -42
  460. package/refs/vbenchmark/tasks/api-integrations/communication/slack-bot/task.yaml +0 -24
  461. package/refs/vbenchmark/tasks/api-integrations/communication/twilio-sms/PROMPT.md +0 -42
  462. package/refs/vbenchmark/tasks/api-integrations/communication/twilio-sms/task.yaml +0 -24
  463. package/refs/vbenchmark/tasks/api-integrations/email/transactional/PROMPT.md +0 -82
  464. package/refs/vbenchmark/tasks/api-integrations/email/transactional/task.yaml +0 -27
  465. package/refs/vbenchmark/tasks/api-integrations/maps/google-maps-geocoding/PROMPT.md +0 -41
  466. package/refs/vbenchmark/tasks/api-integrations/maps/google-maps-geocoding/task.yaml +0 -24
  467. package/refs/vbenchmark/tasks/api-integrations/maps/mapbox-directions/PROMPT.md +0 -41
  468. package/refs/vbenchmark/tasks/api-integrations/maps/mapbox-directions/task.yaml +0 -24
  469. package/refs/vbenchmark/tasks/api-integrations/payment/crypto-payments/PROMPT.md +0 -43
  470. package/refs/vbenchmark/tasks/api-integrations/payment/crypto-payments/task.yaml +0 -24
  471. package/refs/vbenchmark/tasks/api-integrations/payment/paypal-integration/PROMPT.md +0 -41
  472. package/refs/vbenchmark/tasks/api-integrations/payment/paypal-integration/task.yaml +0 -24
  473. package/refs/vbenchmark/tasks/api-integrations/social/twitter-api/PROMPT.md +0 -41
  474. package/refs/vbenchmark/tasks/api-integrations/social/twitter-api/task.yaml +0 -24
  475. package/refs/vbenchmark/tasks/api-integrations/storage/cloudinary-upload/PROMPT.md +0 -43
  476. package/refs/vbenchmark/tasks/api-integrations/storage/cloudinary-upload/task.yaml +0 -24
  477. package/refs/vbenchmark/tasks/api-integrations/storage/gcs-streaming/PROMPT.md +0 -43
  478. package/refs/vbenchmark/tasks/api-integrations/storage/gcs-streaming/task.yaml +0 -24
  479. package/refs/vbenchmark/tasks/api-integrations/storage/s3-presigned-urls/PROMPT.md +0 -41
  480. package/refs/vbenchmark/tasks/api-integrations/storage/s3-presigned-urls/task.yaml +0 -24
  481. package/refs/vbenchmark/tasks/api-integrations/stripe/checkout-session/PROMPT.md +0 -41
  482. package/refs/vbenchmark/tasks/api-integrations/stripe/checkout-session/task.yaml +0 -24
  483. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/PROMPT.md +0 -60
  484. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/docker-compose.yaml +0 -38
  485. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/task.yaml +0 -31
  486. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/tests/webhook.test.ts +0 -193
  487. package/refs/vbenchmark/tasks/api-integrations/stripe/subscription-portal/PROMPT.md +0 -41
  488. package/refs/vbenchmark/tasks/api-integrations/stripe/subscription-portal/task.yaml +0 -24
  489. package/refs/vbenchmark/tasks/code-evolution/advanced/api-deprecation/PROMPT.md +0 -15
  490. package/refs/vbenchmark/tasks/code-evolution/advanced/api-deprecation/task.yaml +0 -16
  491. package/refs/vbenchmark/tasks/code-evolution/advanced/ast-refactoring/PROMPT.md +0 -15
  492. package/refs/vbenchmark/tasks/code-evolution/advanced/ast-refactoring/task.yaml +0 -16
  493. package/refs/vbenchmark/tasks/code-evolution/advanced/concurrency-fix/PROMPT.md +0 -15
  494. package/refs/vbenchmark/tasks/code-evolution/advanced/concurrency-fix/task.yaml +0 -16
  495. package/refs/vbenchmark/tasks/code-evolution/advanced/database-schema-migration/PROMPT.md +0 -15
  496. package/refs/vbenchmark/tasks/code-evolution/advanced/database-schema-migration/task.yaml +0 -16
  497. package/refs/vbenchmark/tasks/code-evolution/advanced/dead-code-elimination/PROMPT.md +0 -15
  498. package/refs/vbenchmark/tasks/code-evolution/advanced/dead-code-elimination/task.yaml +0 -16
  499. package/refs/vbenchmark/tasks/code-evolution/advanced/dependency-upgrade/PROMPT.md +0 -15
  500. package/refs/vbenchmark/tasks/code-evolution/advanced/dependency-upgrade/task.yaml +0 -16
  501. package/refs/vbenchmark/tasks/code-evolution/advanced/memory-optimization/PROMPT.md +0 -15
  502. package/refs/vbenchmark/tasks/code-evolution/advanced/memory-optimization/task.yaml +0 -16
  503. package/refs/vbenchmark/tasks/code-evolution/advanced/monorepo-extraction/PROMPT.md +0 -15
  504. package/refs/vbenchmark/tasks/code-evolution/advanced/monorepo-extraction/task.yaml +0 -16
  505. package/refs/vbenchmark/tasks/code-evolution/advanced/performance-profiling/PROMPT.md +0 -15
  506. package/refs/vbenchmark/tasks/code-evolution/advanced/performance-profiling/task.yaml +0 -16
  507. package/refs/vbenchmark/tasks/code-evolution/advanced/type-migration/PROMPT.md +0 -15
  508. package/refs/vbenchmark/tasks/code-evolution/advanced/type-migration/task.yaml +0 -16
  509. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/callback-to-async/PROMPT.md +0 -47
  510. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/callback-to-async/task.yaml +0 -24
  511. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/PROMPT.md +0 -49
  512. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/base-code/src/app.ts +0 -22
  513. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/task.yaml +0 -37
  514. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/tests/api.test.ts +0 -70
  515. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/flask-to-fastapi/PROMPT.md +0 -46
  516. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/flask-to-fastapi/task.yaml +0 -24
  517. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/java-to-kotlin/PROMPT.md +0 -45
  518. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/java-to-kotlin/task.yaml +0 -24
  519. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/jquery-to-react/PROMPT.md +0 -47
  520. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/jquery-to-react/task.yaml +0 -24
  521. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/rest-to-grpc/PROMPT.md +0 -47
  522. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/rest-to-grpc/task.yaml +0 -24
  523. package/refs/vbenchmark/tasks/code-evolution/performance/async-refactor/PROMPT.md +0 -47
  524. package/refs/vbenchmark/tasks/code-evolution/performance/async-refactor/task.yaml +0 -24
  525. package/refs/vbenchmark/tasks/code-evolution/performance/memory-leak-fix/PROMPT.md +0 -47
  526. package/refs/vbenchmark/tasks/code-evolution/performance/memory-leak-fix/task.yaml +0 -24
  527. package/refs/vbenchmark/tasks/code-evolution/performance/query-optimization/PROMPT.md +0 -49
  528. package/refs/vbenchmark/tasks/code-evolution/performance/query-optimization/task.yaml +0 -24
  529. package/refs/vbenchmark/tasks/code-evolution/refactoring/class-to-hooks/PROMPT.md +0 -96
  530. package/refs/vbenchmark/tasks/code-evolution/refactoring/class-to-hooks/task.yaml +0 -27
  531. package/refs/vbenchmark/tasks/code-evolution/refactoring/dependency-injection/PROMPT.md +0 -47
  532. package/refs/vbenchmark/tasks/code-evolution/refactoring/dependency-injection/task.yaml +0 -24
  533. package/refs/vbenchmark/tasks/code-evolution/refactoring/error-handling/PROMPT.md +0 -48
  534. package/refs/vbenchmark/tasks/code-evolution/refactoring/error-handling/task.yaml +0 -24
  535. package/refs/vbenchmark/tasks/code-evolution/refactoring/monolith-to-modules/PROMPT.md +0 -50
  536. package/refs/vbenchmark/tasks/code-evolution/refactoring/monolith-to-modules/task.yaml +0 -24
  537. package/refs/vbenchmark/tasks/code-evolution/refactoring/orm-migration/PROMPT.md +0 -47
  538. package/refs/vbenchmark/tasks/code-evolution/refactoring/orm-migration/task.yaml +0 -24
  539. package/refs/vbenchmark/tasks/code-evolution/security/secrets-rotation/PROMPT.md +0 -49
  540. package/refs/vbenchmark/tasks/code-evolution/security/secrets-rotation/task.yaml +0 -24
  541. package/refs/vbenchmark/tasks/code-evolution/security/sql-injection-fix/PROMPT.md +0 -50
  542. package/refs/vbenchmark/tasks/code-evolution/security/sql-injection-fix/task.yaml +0 -24
  543. package/refs/vbenchmark/tasks/code-evolution/security/xss-prevention/PROMPT.md +0 -47
  544. package/refs/vbenchmark/tasks/code-evolution/security/xss-prevention/task.yaml +0 -24
  545. package/refs/vbenchmark/tasks/code-evolution/testing/add-unit-tests/PROMPT.md +0 -48
  546. package/refs/vbenchmark/tasks/code-evolution/testing/add-unit-tests/task.yaml +0 -24
  547. package/refs/vbenchmark/tasks/code-evolution/testing/e2e-playwright/PROMPT.md +0 -50
  548. package/refs/vbenchmark/tasks/code-evolution/testing/e2e-playwright/task.yaml +0 -24
  549. package/refs/vbenchmark/tasks/code-evolution/testing/pytest-fixtures/PROMPT.md +0 -47
  550. package/refs/vbenchmark/tasks/code-evolution/testing/pytest-fixtures/task.yaml +0 -24
  551. package/refs/vbenchmark/tasks/frontend/accessibility/keyboard-shortcuts/PROMPT.md +0 -44
  552. package/refs/vbenchmark/tasks/frontend/accessibility/keyboard-shortcuts/task.yaml +0 -24
  553. package/refs/vbenchmark/tasks/frontend/accessibility/screen-reader-nav/PROMPT.md +0 -44
  554. package/refs/vbenchmark/tasks/frontend/accessibility/screen-reader-nav/task.yaml +0 -24
  555. package/refs/vbenchmark/tasks/frontend/advanced/canvas-editor/PROMPT.md +0 -15
  556. package/refs/vbenchmark/tasks/frontend/advanced/canvas-editor/task.yaml +0 -16
  557. package/refs/vbenchmark/tasks/frontend/advanced/micro-frontend/PROMPT.md +0 -15
  558. package/refs/vbenchmark/tasks/frontend/advanced/micro-frontend/task.yaml +0 -16
  559. package/refs/vbenchmark/tasks/frontend/advanced/offline-first/PROMPT.md +0 -15
  560. package/refs/vbenchmark/tasks/frontend/advanced/offline-first/task.yaml +0 -16
  561. package/refs/vbenchmark/tasks/frontend/advanced/realtime-collab/PROMPT.md +0 -15
  562. package/refs/vbenchmark/tasks/frontend/advanced/realtime-collab/task.yaml +0 -16
  563. package/refs/vbenchmark/tasks/frontend/advanced/service-worker/PROMPT.md +0 -15
  564. package/refs/vbenchmark/tasks/frontend/advanced/service-worker/task.yaml +0 -16
  565. package/refs/vbenchmark/tasks/frontend/advanced/state-machine/PROMPT.md +0 -15
  566. package/refs/vbenchmark/tasks/frontend/advanced/state-machine/task.yaml +0 -16
  567. package/refs/vbenchmark/tasks/frontend/advanced/virtual-list/PROMPT.md +0 -15
  568. package/refs/vbenchmark/tasks/frontend/advanced/virtual-list/task.yaml +0 -16
  569. package/refs/vbenchmark/tasks/frontend/advanced/wasm-integration/PROMPT.md +0 -15
  570. package/refs/vbenchmark/tasks/frontend/advanced/wasm-integration/task.yaml +0 -16
  571. package/refs/vbenchmark/tasks/frontend/advanced/web-worker/PROMPT.md +0 -15
  572. package/refs/vbenchmark/tasks/frontend/advanced/web-worker/task.yaml +0 -16
  573. package/refs/vbenchmark/tasks/frontend/advanced/webgl-visualization/PROMPT.md +0 -15
  574. package/refs/vbenchmark/tasks/frontend/advanced/webgl-visualization/task.yaml +0 -16
  575. package/refs/vbenchmark/tasks/frontend/animation/page-transitions/PROMPT.md +0 -44
  576. package/refs/vbenchmark/tasks/frontend/animation/page-transitions/task.yaml +0 -24
  577. package/refs/vbenchmark/tasks/frontend/components/data-grid/PROMPT.md +0 -59
  578. package/refs/vbenchmark/tasks/frontend/components/data-grid/task.yaml +0 -24
  579. package/refs/vbenchmark/tasks/frontend/components/date-range-picker/PROMPT.md +0 -57
  580. package/refs/vbenchmark/tasks/frontend/components/date-range-picker/task.yaml +0 -24
  581. package/refs/vbenchmark/tasks/frontend/components/file-uploader/PROMPT.md +0 -55
  582. package/refs/vbenchmark/tasks/frontend/components/file-uploader/task.yaml +0 -24
  583. package/refs/vbenchmark/tasks/frontend/components/form-builder/PROMPT.md +0 -96
  584. package/refs/vbenchmark/tasks/frontend/components/form-builder/task.yaml +0 -28
  585. package/refs/vbenchmark/tasks/frontend/components/rich-text-editor/PROMPT.md +0 -45
  586. package/refs/vbenchmark/tasks/frontend/components/rich-text-editor/task.yaml +0 -24
  587. package/refs/vbenchmark/tasks/frontend/figma-to-code/dashboard-layout/PROMPT.md +0 -50
  588. package/refs/vbenchmark/tasks/frontend/figma-to-code/dashboard-layout/task.yaml +0 -25
  589. package/refs/vbenchmark/tasks/frontend/figma-to-code/landing-page/PROMPT.md +0 -49
  590. package/refs/vbenchmark/tasks/frontend/figma-to-code/landing-page/task.yaml +0 -25
  591. package/refs/vbenchmark/tasks/frontend/figma-to-code/mobile-app-screen/PROMPT.md +0 -51
  592. package/refs/vbenchmark/tasks/frontend/figma-to-code/mobile-app-screen/task.yaml +0 -24
  593. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/PROMPT.md +0 -93
  594. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/docker-compose.yaml +0 -23
  595. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/task.yaml +0 -30
  596. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/tests/visual/diff.test.ts +0 -107
  597. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/tests/visual/interaction.test.ts +0 -88
  598. package/refs/vbenchmark/tasks/frontend/performance/image-lazy-load/PROMPT.md +0 -43
  599. package/refs/vbenchmark/tasks/frontend/performance/image-lazy-load/task.yaml +0 -24
  600. package/refs/vbenchmark/tasks/frontend/performance/infinite-scroll/PROMPT.md +0 -44
  601. package/refs/vbenchmark/tasks/frontend/performance/infinite-scroll/task.yaml +0 -24
  602. package/refs/vbenchmark/tasks/frontend/state-management/collaborative-editor/PROMPT.md +0 -44
  603. package/refs/vbenchmark/tasks/frontend/state-management/collaborative-editor/task.yaml +0 -24
  604. package/refs/vbenchmark/tasks/frontend/state-management/shopping-cart/PROMPT.md +0 -53
  605. package/refs/vbenchmark/tasks/frontend/state-management/shopping-cart/task.yaml +0 -24
  606. package/refs/vbenchmark/tasks/frontend/visualization/chart-dashboard/PROMPT.md +0 -83
  607. package/refs/vbenchmark/tasks/frontend/visualization/chart-dashboard/task.yaml +0 -28
  608. package/refs/vbenchmark/tasks/frontend/visualization/gantt-chart/PROMPT.md +0 -57
  609. package/refs/vbenchmark/tasks/frontend/visualization/gantt-chart/task.yaml +0 -24
  610. package/refs/vbenchmark/tasks/frontend/visualization/map-dashboard/PROMPT.md +0 -44
  611. package/refs/vbenchmark/tasks/frontend/visualization/map-dashboard/task.yaml +0 -24
  612. package/refs/vbenchmark/tasks/frontend/visualization/realtime-charts/PROMPT.md +0 -43
  613. package/refs/vbenchmark/tasks/frontend/visualization/realtime-charts/task.yaml +0 -24
  614. package/refs/vbenchmark/tasks/glue-code/advanced/blue-green-deploy/PROMPT.md +0 -15
  615. package/refs/vbenchmark/tasks/glue-code/advanced/blue-green-deploy/task.yaml +0 -16
  616. package/refs/vbenchmark/tasks/glue-code/advanced/canary-release/PROMPT.md +0 -15
  617. package/refs/vbenchmark/tasks/glue-code/advanced/canary-release/task.yaml +0 -16
  618. package/refs/vbenchmark/tasks/glue-code/advanced/change-data-capture/PROMPT.md +0 -15
  619. package/refs/vbenchmark/tasks/glue-code/advanced/change-data-capture/task.yaml +0 -16
  620. package/refs/vbenchmark/tasks/glue-code/advanced/config-management/PROMPT.md +0 -15
  621. package/refs/vbenchmark/tasks/glue-code/advanced/config-management/task.yaml +0 -16
  622. package/refs/vbenchmark/tasks/glue-code/advanced/data-pipeline/PROMPT.md +0 -15
  623. package/refs/vbenchmark/tasks/glue-code/advanced/data-pipeline/task.yaml +0 -16
  624. package/refs/vbenchmark/tasks/glue-code/advanced/distributed-tracing/PROMPT.md +0 -15
  625. package/refs/vbenchmark/tasks/glue-code/advanced/distributed-tracing/task.yaml +0 -16
  626. package/refs/vbenchmark/tasks/glue-code/advanced/log-aggregation/PROMPT.md +0 -15
  627. package/refs/vbenchmark/tasks/glue-code/advanced/log-aggregation/task.yaml +0 -16
  628. package/refs/vbenchmark/tasks/glue-code/advanced/schema-registry/PROMPT.md +0 -15
  629. package/refs/vbenchmark/tasks/glue-code/advanced/schema-registry/task.yaml +0 -16
  630. package/refs/vbenchmark/tasks/glue-code/advanced/secret-rotation/PROMPT.md +0 -15
  631. package/refs/vbenchmark/tasks/glue-code/advanced/secret-rotation/task.yaml +0 -16
  632. package/refs/vbenchmark/tasks/glue-code/advanced/stream-processing/PROMPT.md +0 -15
  633. package/refs/vbenchmark/tasks/glue-code/advanced/stream-processing/task.yaml +0 -16
  634. package/refs/vbenchmark/tasks/glue-code/api-sync/rest-to-graphql/PROMPT.md +0 -66
  635. package/refs/vbenchmark/tasks/glue-code/api-sync/rest-to-graphql/task.yaml +0 -27
  636. package/refs/vbenchmark/tasks/glue-code/caching/redis-cache/PROMPT.md +0 -82
  637. package/refs/vbenchmark/tasks/glue-code/caching/redis-cache/task.yaml +0 -27
  638. package/refs/vbenchmark/tasks/glue-code/data-transform/avro-schema-evolution/PROMPT.md +0 -51
  639. package/refs/vbenchmark/tasks/glue-code/data-transform/avro-schema-evolution/task.yaml +0 -24
  640. package/refs/vbenchmark/tasks/glue-code/data-transform/csv-normalizer/PROMPT.md +0 -49
  641. package/refs/vbenchmark/tasks/glue-code/data-transform/csv-normalizer/task.yaml +0 -24
  642. package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/PROMPT.md +0 -67
  643. package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/task.yaml +0 -28
  644. package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/tests/transform.test.py +0 -137
  645. package/refs/vbenchmark/tasks/glue-code/data-transform/json-to-xml/PROMPT.md +0 -45
  646. package/refs/vbenchmark/tasks/glue-code/data-transform/json-to-xml/task.yaml +0 -24
  647. package/refs/vbenchmark/tasks/glue-code/data-transform/protobuf-converter/PROMPT.md +0 -44
  648. package/refs/vbenchmark/tasks/glue-code/data-transform/protobuf-converter/task.yaml +0 -24
  649. package/refs/vbenchmark/tasks/glue-code/etl/cdc-pipeline/PROMPT.md +0 -52
  650. package/refs/vbenchmark/tasks/glue-code/etl/cdc-pipeline/task.yaml +0 -27
  651. package/refs/vbenchmark/tasks/glue-code/etl/database-sync/PROMPT.md +0 -51
  652. package/refs/vbenchmark/tasks/glue-code/etl/database-sync/task.yaml +0 -24
  653. package/refs/vbenchmark/tasks/glue-code/etl/s3-to-warehouse/PROMPT.md +0 -50
  654. package/refs/vbenchmark/tasks/glue-code/etl/s3-to-warehouse/task.yaml +0 -24
  655. package/refs/vbenchmark/tasks/glue-code/file-processing/image-resizer/PROMPT.md +0 -52
  656. package/refs/vbenchmark/tasks/glue-code/file-processing/image-resizer/task.yaml +0 -24
  657. package/refs/vbenchmark/tasks/glue-code/file-processing/pdf-merger/PROMPT.md +0 -50
  658. package/refs/vbenchmark/tasks/glue-code/file-processing/pdf-merger/task.yaml +0 -24
  659. package/refs/vbenchmark/tasks/glue-code/file-processing/video-transcoder/PROMPT.md +0 -50
  660. package/refs/vbenchmark/tasks/glue-code/file-processing/video-transcoder/task.yaml +0 -27
  661. package/refs/vbenchmark/tasks/glue-code/migration/data-backfill/PROMPT.md +0 -50
  662. package/refs/vbenchmark/tasks/glue-code/migration/data-backfill/task.yaml +0 -24
  663. package/refs/vbenchmark/tasks/glue-code/migration/database-versioning/PROMPT.md +0 -50
  664. package/refs/vbenchmark/tasks/glue-code/migration/database-versioning/task.yaml +0 -24
  665. package/refs/vbenchmark/tasks/glue-code/queue/kafka-producer/PROMPT.md +0 -49
  666. package/refs/vbenchmark/tasks/glue-code/queue/kafka-producer/task.yaml +0 -27
  667. package/refs/vbenchmark/tasks/glue-code/queue/rabbitmq-consumer/PROMPT.md +0 -50
  668. package/refs/vbenchmark/tasks/glue-code/queue/rabbitmq-consumer/task.yaml +0 -27
  669. package/refs/vbenchmark/tasks/glue-code/queue/sqs-batch-processor/PROMPT.md +0 -47
  670. package/refs/vbenchmark/tasks/glue-code/queue/sqs-batch-processor/task.yaml +0 -24
  671. package/refs/vbenchmark/tasks/glue-code/scheduler/cron-job-manager/PROMPT.md +0 -52
  672. package/refs/vbenchmark/tasks/glue-code/scheduler/cron-job-manager/task.yaml +0 -27
  673. package/refs/vbenchmark/tasks/glue-code/scheduler/delayed-tasks/PROMPT.md +0 -51
  674. package/refs/vbenchmark/tasks/glue-code/scheduler/delayed-tasks/task.yaml +0 -27
  675. package/refs/vbenchmark/tasks/saas-core/advanced/api-versioning/PROMPT.md +0 -15
  676. package/refs/vbenchmark/tasks/saas-core/advanced/api-versioning/task.yaml +0 -16
  677. package/refs/vbenchmark/tasks/saas-core/advanced/circuit-breaker/PROMPT.md +0 -13
  678. package/refs/vbenchmark/tasks/saas-core/advanced/circuit-breaker/task.yaml +0 -16
  679. package/refs/vbenchmark/tasks/saas-core/advanced/compliance-gdpr/PROMPT.md +0 -15
  680. package/refs/vbenchmark/tasks/saas-core/advanced/compliance-gdpr/task.yaml +0 -16
  681. package/refs/vbenchmark/tasks/saas-core/advanced/cqrs-pattern/PROMPT.md +0 -13
  682. package/refs/vbenchmark/tasks/saas-core/advanced/cqrs-pattern/task.yaml +0 -16
  683. package/refs/vbenchmark/tasks/saas-core/advanced/data-encryption/PROMPT.md +0 -15
  684. package/refs/vbenchmark/tasks/saas-core/advanced/data-encryption/task.yaml +0 -16
  685. package/refs/vbenchmark/tasks/saas-core/advanced/distributed-locking/PROMPT.md +0 -46
  686. package/refs/vbenchmark/tasks/saas-core/advanced/distributed-locking/task.yaml +0 -24
  687. package/refs/vbenchmark/tasks/saas-core/advanced/event-sourcing/PROMPT.md +0 -23
  688. package/refs/vbenchmark/tasks/saas-core/advanced/event-sourcing/task.yaml +0 -16
  689. package/refs/vbenchmark/tasks/saas-core/advanced/feature-flags-ab/PROMPT.md +0 -15
  690. package/refs/vbenchmark/tasks/saas-core/advanced/feature-flags-ab/task.yaml +0 -16
  691. package/refs/vbenchmark/tasks/saas-core/advanced/saga-orchestration/PROMPT.md +0 -13
  692. package/refs/vbenchmark/tasks/saas-core/advanced/saga-orchestration/task.yaml +0 -16
  693. package/refs/vbenchmark/tasks/saas-core/advanced/webhook-delivery/PROMPT.md +0 -15
  694. package/refs/vbenchmark/tasks/saas-core/advanced/webhook-delivery/task.yaml +0 -16
  695. package/refs/vbenchmark/tasks/saas-core/audit/activity-logging/PROMPT.md +0 -50
  696. package/refs/vbenchmark/tasks/saas-core/audit/activity-logging/task.yaml +0 -27
  697. package/refs/vbenchmark/tasks/saas-core/auth/jwt-refresh-tokens/PROMPT.md +0 -50
  698. package/refs/vbenchmark/tasks/saas-core/auth/jwt-refresh-tokens/task.yaml +0 -27
  699. package/refs/vbenchmark/tasks/saas-core/auth/magic-link-email/PROMPT.md +0 -53
  700. package/refs/vbenchmark/tasks/saas-core/auth/magic-link-email/task.yaml +0 -27
  701. package/refs/vbenchmark/tasks/saas-core/auth/mfa-totp/PROMPT.md +0 -79
  702. package/refs/vbenchmark/tasks/saas-core/auth/mfa-totp/task.yaml +0 -27
  703. package/refs/vbenchmark/tasks/saas-core/auth/rbac-permissions/PROMPT.md +0 -51
  704. package/refs/vbenchmark/tasks/saas-core/auth/rbac-permissions/task.yaml +0 -27
  705. package/refs/vbenchmark/tasks/saas-core/auth/session-management/PROMPT.md +0 -52
  706. package/refs/vbenchmark/tasks/saas-core/auth/session-management/task.yaml +0 -27
  707. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/PROMPT.md +0 -45
  708. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/docker-compose.yaml +0 -47
  709. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/task.yaml +0 -32
  710. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/tests/auth.test.ts +0 -59
  711. package/refs/vbenchmark/tasks/saas-core/billing/invoice-generation/PROMPT.md +0 -53
  712. package/refs/vbenchmark/tasks/saas-core/billing/invoice-generation/task.yaml +0 -27
  713. package/refs/vbenchmark/tasks/saas-core/billing/stripe-subscriptions/PROMPT.md +0 -51
  714. package/refs/vbenchmark/tasks/saas-core/billing/stripe-subscriptions/task.yaml +0 -27
  715. package/refs/vbenchmark/tasks/saas-core/billing/usage-metering/PROMPT.md +0 -52
  716. package/refs/vbenchmark/tasks/saas-core/billing/usage-metering/task.yaml +0 -27
  717. package/refs/vbenchmark/tasks/saas-core/crud/dashboard-table/PROMPT.md +0 -48
  718. package/refs/vbenchmark/tasks/saas-core/crud/dashboard-table/task.yaml +0 -28
  719. package/refs/vbenchmark/tasks/saas-core/multi-tenant/org-isolation/PROMPT.md +0 -50
  720. package/refs/vbenchmark/tasks/saas-core/multi-tenant/org-isolation/task.yaml +0 -27
  721. package/refs/vbenchmark/tasks/saas-core/multi-tenant/subdomain-routing/PROMPT.md +0 -50
  722. package/refs/vbenchmark/tasks/saas-core/multi-tenant/subdomain-routing/task.yaml +0 -27
  723. package/refs/vbenchmark/tasks/saas-core/notifications/email-queue/PROMPT.md +0 -53
  724. package/refs/vbenchmark/tasks/saas-core/notifications/email-queue/task.yaml +0 -27
  725. package/refs/vbenchmark/tasks/saas-core/notifications/in-app-alerts/PROMPT.md +0 -51
  726. package/refs/vbenchmark/tasks/saas-core/notifications/in-app-alerts/task.yaml +0 -27
  727. package/refs/vbenchmark/tasks/saas-core/notifications/push-notifications/PROMPT.md +0 -51
  728. package/refs/vbenchmark/tasks/saas-core/notifications/push-notifications/task.yaml +0 -27
  729. package/refs/vbenchmark/tasks/saas-core/realtime/websocket-chat/PROMPT.md +0 -80
  730. package/refs/vbenchmark/tasks/saas-core/realtime/websocket-chat/task.yaml +0 -27
  731. package/refs/vbenchmark/tasks/saas-core/search/full-text-search/PROMPT.md +0 -51
  732. package/refs/vbenchmark/tasks/saas-core/search/full-text-search/task.yaml +0 -27
  733. package/refs/vbenchmark/tasks/saas-core/security/rate-limiter/PROMPT.md +0 -99
  734. package/refs/vbenchmark/tasks/saas-core/security/rate-limiter/task.yaml +0 -27
  735. package/refs/vbenchmark/tasks/saas-core/settings/user-preferences/PROMPT.md +0 -78
  736. package/refs/vbenchmark/tasks/saas-core/settings/user-preferences/task.yaml +0 -27
  737. package/refs/vbenchmark/templates/fastapi-postgres/docker-compose.yaml +0 -36
  738. package/refs/vbenchmark/templates/fastapi-postgres/pyproject.toml +0 -34
  739. package/refs/vbenchmark/templates/fastapi-postgres/src/__init__.py +0 -0
  740. package/refs/vbenchmark/templates/fastapi-postgres/src/config.py +0 -12
  741. package/refs/vbenchmark/templates/fastapi-postgres/src/database.py +0 -15
  742. package/refs/vbenchmark/templates/fastapi-postgres/src/main.py +0 -51
  743. package/refs/vbenchmark/templates/fastapi-postgres/src/models.py +0 -12
  744. package/refs/vbenchmark/templates/fastapi-postgres/src/schemas.py +0 -20
  745. package/refs/vbenchmark/templates/go-fiber/docker-compose.yaml +0 -34
  746. package/refs/vbenchmark/templates/go-fiber/go.mod +0 -33
  747. package/refs/vbenchmark/templates/go-fiber/go.sum +0 -68
  748. package/refs/vbenchmark/templates/go-fiber/main.go +0 -98
  749. package/refs/vbenchmark/templates/nextjs-supabase/.env.example +0 -3
  750. package/refs/vbenchmark/templates/nextjs-supabase/docker-compose.yaml +0 -68
  751. package/refs/vbenchmark/templates/nextjs-supabase/src/app/globals.css +0 -13
  752. package/refs/vbenchmark/templates/nextjs-supabase/src/app/layout.tsx +0 -19
  753. package/refs/vbenchmark/templates/nextjs-supabase/src/app/page.tsx +0 -38
  754. package/refs/vbenchmark/templates/nextjs-supabase/src/lib/supabase/client.ts +0 -8
  755. package/refs/vbenchmark/templates/nextjs-supabase/src/lib/supabase/server.ts +0 -32
  756. package/refs/vbenchmark/templates/rust-axum/Cargo.lock +0 -2371
  757. package/refs/vbenchmark/templates/rust-axum/Cargo.toml +0 -16
  758. package/refs/vbenchmark/templates/rust-axum/docker-compose.yaml +0 -34
  759. package/refs/vbenchmark/templates/rust-axum/migrations/20240101000000_init.sql +0 -20
  760. package/refs/vbenchmark/templates/rust-axum/src/main.rs +0 -121
  761. package/refs/vbenchmark/tsconfig.base.json +0 -18
  762. package/refs/vbenchmark/turbo.json +0 -23
  763. package/refs/vbenchmark/vercel.json +0 -10
@@ -1,698 +0,0 @@
1
- import { Hono } from 'hono';
2
- import { eq, desc, sql } from 'drizzle-orm';
3
- import { db, benchmarkRuns } from '../db/index.js';
4
- import { MODEL_PRICING } from '../config/pricing.js';
5
-
6
- const leaderboardRoutes = new Hono();
7
-
8
- // In-memory fallback when database is not available
9
- interface LeaderboardEntry {
10
- rank: number;
11
- agentName: string;
12
- agentVersion: string;
13
- modelName: string;
14
- tasksCompleted: number;
15
- passedTasks: number;
16
- failedTasks: number;
17
- avgScore: number;
18
- avgFunctional: number;
19
- avgVisual: number;
20
- avgQuality: number;
21
- avgSecurity: number;
22
- avgCost: number;
23
- avgSpeed: number;
24
- totalTokens: number;
25
- inputTokens: number;
26
- outputTokens: number;
27
- totalCostUSD: number;
28
- avgTimeMs: number;
29
- pricingInput: number;
30
- pricingOutput: number;
31
- lastUpdated: string;
32
- }
33
-
34
- // Total tasks in benchmark (6 categories × 30 tasks each = 180)
35
- const TOTAL_TASKS = 180;
36
- const TASKS_PER_CATEGORY = 30;
37
-
38
- // Real benchmark results from 2026-01-20 evaluation (180 tasks each)
39
- const mockLeaderboard: LeaderboardEntry[] = [
40
- {
41
- rank: 1,
42
- agentName: 'GLM-4',
43
- agentVersion: 'GLM-4-Plus',
44
- modelName: 'GLM 4-Plus',
45
- tasksCompleted: 180,
46
- passedTasks: 178,
47
- failedTasks: 2,
48
- avgScore: 88.20,
49
- avgFunctional: 84.06,
50
- avgVisual: 80.0,
51
- avgQuality: 80.0,
52
- avgSecurity: 100.0,
53
- avgCost: 92.0,
54
- avgSpeed: 75.0,
55
- totalTokens: 794105,
56
- inputTokens: 238232,
57
- outputTokens: 555873,
58
- totalCostUSD: 0.93,
59
- avgTimeMs: 96210,
60
- pricingInput: 0.40,
61
- pricingOutput: 1.50,
62
- lastUpdated: '2026-01-20T10:20:00.000Z',
63
- },
64
- {
65
- rank: 2,
66
- agentName: 'MiniMax',
67
- agentVersion: 'M2.1',
68
- modelName: 'MiniMax M2.1',
69
- tasksCompleted: 180,
70
- passedTasks: 179,
71
- failedTasks: 1,
72
- avgScore: 87.42,
73
- avgFunctional: 84.53,
74
- avgVisual: 80.0,
75
- avgQuality: 80.0,
76
- avgSecurity: 100.0,
77
- avgCost: 85.0,
78
- avgSpeed: 60.0,
79
- totalTokens: 2778476,
80
- inputTokens: 833543,
81
- outputTokens: 1944933,
82
- totalCostUSD: 2.40,
83
- avgTimeMs: 164907,
84
- pricingInput: 0.27,
85
- pricingOutput: 1.12,
86
- lastUpdated: '2026-01-20T11:26:00.000Z',
87
- },
88
- {
89
- rank: 3,
90
- agentName: 'GLM-4',
91
- agentVersion: 'GLM-4.7',
92
- modelName: 'GLM-4.7',
93
- tasksCompleted: 180,
94
- passedTasks: 154,
95
- failedTasks: 26,
96
- avgScore: 83.90,
97
- avgFunctional: 72.72,
98
- avgVisual: 80.0,
99
- avgQuality: 79.56,
100
- avgSecurity: 100.0,
101
- avgCost: 94.0,
102
- avgSpeed: 82.0,
103
- totalTokens: 623474,
104
- inputTokens: 187042,
105
- outputTokens: 436432,
106
- totalCostUSD: 0.73,
107
- avgTimeMs: 56805,
108
- pricingInput: 0.40,
109
- pricingOutput: 1.50,
110
- lastUpdated: '2026-01-20T10:20:00.000Z',
111
- },
112
- {
113
- rank: 4,
114
- agentName: 'Gemini',
115
- agentVersion: '3-Flash-Preview',
116
- modelName: 'Gemini 3 Flash',
117
- tasksCompleted: 180,
118
- passedTasks: 166,
119
- failedTasks: 14,
120
- avgScore: 83.44,
121
- avgFunctional: 78.39,
122
- avgVisual: 80.0,
123
- avgQuality: 75.11,
124
- avgSecurity: 100.0,
125
- avgCost: 95.0,
126
- avgSpeed: 90.0,
127
- totalTokens: 383991,
128
- inputTokens: 115197,
129
- outputTokens: 268794,
130
- totalCostUSD: 0.86, // (115197/1M)*0.5 + (268794/1M)*3
131
- avgTimeMs: 27822,
132
- pricingInput: 0.5,
133
- pricingOutput: 3.0,
134
- lastUpdated: '2026-01-20T07:29:00.000Z',
135
- },
136
- {
137
- rank: 5,
138
- agentName: 'Gemini',
139
- agentVersion: '3-Pro-Preview',
140
- modelName: 'Gemini 3 Pro Preview',
141
- tasksCompleted: 180,
142
- passedTasks: 136,
143
- failedTasks: 44,
144
- avgScore: 80.17,
145
- avgFunctional: 64.22,
146
- avgVisual: 80.0,
147
- avgQuality: 78.67,
148
- avgSecurity: 100.0,
149
- avgCost: 94.0,
150
- avgSpeed: 88.0,
151
- totalTokens: 612000,
152
- inputTokens: 183600,
153
- outputTokens: 428400,
154
- totalCostUSD: 5.51,
155
- avgTimeMs: 32000,
156
- pricingInput: 2.0,
157
- pricingOutput: 12.0,
158
- lastUpdated: '2026-01-20T15:21:00.000Z',
159
- },
160
- {
161
- rank: 6,
162
- agentName: 'Claude',
163
- agentVersion: 'Sonnet-4.5',
164
- modelName: 'Claude Sonnet 4.5',
165
- tasksCompleted: 180,
166
- passedTasks: 177,
167
- failedTasks: 3,
168
- avgScore: 88.56,
169
- avgFunctional: 83.58,
170
- avgVisual: 80.0,
171
- avgQuality: 80.0,
172
- avgSecurity: 100.0,
173
- avgCost: 85.0,
174
- avgSpeed: 80.0,
175
- totalTokens: 612000,
176
- inputTokens: 183600,
177
- outputTokens: 428400,
178
- totalCostUSD: 6.98, // (183600/1M)*3 + (428400/1M)*15
179
- avgTimeMs: 42000,
180
- pricingInput: 3.0,
181
- pricingOutput: 15.0,
182
- lastUpdated: '2026-01-20T16:46:00.000Z',
183
- },
184
- {
185
- rank: 7,
186
- agentName: 'Claude Opus',
187
- agentVersion: 'Opus-4.5',
188
- modelName: 'Claude Opus 4.5',
189
- tasksCompleted: 180,
190
- passedTasks: 180,
191
- failedTasks: 0,
192
- avgScore: 89.15,
193
- avgFunctional: 85.0,
194
- avgVisual: 80.0,
195
- avgQuality: 80.0,
196
- avgSecurity: 100.0,
197
- avgCost: 70.0,
198
- avgSpeed: 80.0,
199
- totalTokens: 647747,
200
- inputTokens: 194324,
201
- outputTokens: 453423,
202
- totalCostUSD: 12.31, // (194324/1M)*5 + (453423/1M)*25
203
- avgTimeMs: 43958,
204
- pricingInput: 5.0,
205
- pricingOutput: 25.0,
206
- lastUpdated: '2026-01-19T19:25:00.000Z',
207
- },
208
- {
209
- rank: 8,
210
- agentName: 'Claude Haiku',
211
- agentVersion: 'Haiku-4.5',
212
- modelName: 'Claude Haiku 4.5',
213
- tasksCompleted: 180,
214
- passedTasks: 179,
215
- failedTasks: 1,
216
- avgScore: 88.97,
217
- avgFunctional: 84.53,
218
- avgVisual: 80.0,
219
- avgQuality: 79.56,
220
- avgSecurity: 100.0,
221
- avgCost: 88.0,
222
- avgSpeed: 95.0,
223
- totalTokens: 798291,
224
- inputTokens: 239487,
225
- outputTokens: 558804,
226
- totalCostUSD: 3.03,
227
- avgTimeMs: 21570,
228
- pricingInput: 1.0,
229
- pricingOutput: 5.0,
230
- lastUpdated: '2026-01-20T15:21:00.000Z',
231
- },
232
- {
233
- rank: 9,
234
- agentName: 'DeepSeek',
235
- agentVersion: 'v3.2',
236
- modelName: 'DeepSeek v3.2',
237
- tasksCompleted: 180,
238
- passedTasks: 177,
239
- failedTasks: 3,
240
- avgScore: 88.19,
241
- avgFunctional: 83.58,
242
- avgVisual: 80.0,
243
- avgQuality: 80.0,
244
- avgSecurity: 100.0,
245
- avgCost: 96.0,
246
- avgSpeed: 65.0,
247
- totalTokens: 542685,
248
- inputTokens: 162806,
249
- outputTokens: 379879,
250
- totalCostUSD: 0.50,
251
- avgTimeMs: 89633,
252
- pricingInput: 0.30,
253
- pricingOutput: 1.20,
254
- lastUpdated: '2026-01-19T19:44:00.000Z',
255
- },
256
- {
257
- rank: 9,
258
- agentName: 'OpenAI',
259
- agentVersion: 'GPT-5.2',
260
- modelName: 'OpenAI GPT-5.2',
261
- tasksCompleted: 180,
262
- passedTasks: 177,
263
- failedTasks: 3,
264
- avgScore: 88.75,
265
- avgFunctional: 83.58,
266
- avgVisual: 80.0,
267
- avgQuality: 79.56,
268
- avgSecurity: 100.0,
269
- avgCost: 98.0,
270
- avgSpeed: 92.0,
271
- totalTokens: 485000,
272
- inputTokens: 145500,
273
- outputTokens: 339500,
274
- totalCostUSD: 5.01, // (145500/1M)*1.75 + (339500/1M)*14
275
- avgTimeMs: 28000,
276
- pricingInput: 1.75,
277
- pricingOutput: 14.0,
278
- lastUpdated: '2026-01-20T16:35:00.000Z',
279
- },
280
- {
281
- rank: 10,
282
- agentName: 'GLM',
283
- agentVersion: '4.7-Flash',
284
- modelName: 'GLM 4.7 Flash',
285
- tasksCompleted: 180,
286
- passedTasks: 8,
287
- failedTasks: 172,
288
- avgScore: 57.27,
289
- avgFunctional: 3.78,
290
- avgVisual: 80.0,
291
- avgQuality: 80.0,
292
- avgSecurity: 100.0,
293
- avgCost: 99.0,
294
- avgSpeed: 98.0,
295
- totalTokens: 245000,
296
- inputTokens: 73500,
297
- outputTokens: 171500,
298
- totalCostUSD: 0.07,
299
- avgTimeMs: 8000,
300
- pricingInput: 0.07,
301
- pricingOutput: 0.40,
302
- lastUpdated: '2026-01-20T16:23:00.000Z',
303
- },
304
- {
305
- rank: 11,
306
- agentName: 'Grok',
307
- agentVersion: '4-Fast',
308
- modelName: 'Grok 4 Fast',
309
- tasksCompleted: 180,
310
- passedTasks: 178,
311
- failedTasks: 2,
312
- avgScore: 88.80,
313
- avgFunctional: 84.10,
314
- avgVisual: 80.0,
315
- avgQuality: 80.0,
316
- avgSecurity: 100.0,
317
- avgCost: 94.0,
318
- avgSpeed: 72.0,
319
- totalTokens: 520000,
320
- inputTokens: 156000,
321
- outputTokens: 364000,
322
- totalCostUSD: 0.21,
323
- avgTimeMs: 70000,
324
- pricingInput: 0.20,
325
- pricingOutput: 0.50,
326
- lastUpdated: '2026-01-20T18:30:00.000Z',
327
- },
328
- {
329
- rank: 12,
330
- agentName: 'Grok',
331
- agentVersion: '4',
332
- modelName: 'Grok 4',
333
- tasksCompleted: 180,
334
- passedTasks: 176,
335
- failedTasks: 4,
336
- avgScore: 88.00,
337
- avgFunctional: 83.60,
338
- avgVisual: 80.0,
339
- avgQuality: 80.0,
340
- avgSecurity: 100.0,
341
- avgCost: 94.0,
342
- avgSpeed: 70.0,
343
- totalTokens: 480000,
344
- inputTokens: 144000,
345
- outputTokens: 336000,
346
- totalCostUSD: 5.47,
347
- avgTimeMs: 75000,
348
- pricingInput: 3.0,
349
- pricingOutput: 15.0,
350
- lastUpdated: '2026-01-20T18:37:00.000Z',
351
- },
352
- {
353
- rank: 13,
354
- agentName: 'Grok',
355
- agentVersion: '4.1-Fast',
356
- modelName: 'Grok 4.1 Fast',
357
- tasksCompleted: 180,
358
- passedTasks: 175,
359
- failedTasks: 5,
360
- avgScore: 86.80,
361
- avgFunctional: 82.60,
362
- avgVisual: 80.0,
363
- avgQuality: 78.70,
364
- avgSecurity: 100.0,
365
- avgCost: 90.0,
366
- avgSpeed: 68.0,
367
- totalTokens: 580000,
368
- inputTokens: 174000,
369
- outputTokens: 406000,
370
- totalCostUSD: 0.24,
371
- avgTimeMs: 88500,
372
- pricingInput: 0.20,
373
- pricingOutput: 0.50,
374
- lastUpdated: '2026-01-20T18:35:00.000Z',
375
- },
376
- ];
377
-
378
- // Get leaderboard from database
379
- async function getLeaderboardFromDB() {
380
- if (!db) return null;
381
-
382
- try {
383
- const results = await db
384
- .select({
385
- agentName: benchmarkRuns.agentName,
386
- agentVersion: benchmarkRuns.agentVersion,
387
- tasksCompleted: sql<number>`max(${benchmarkRuns.totalTasks})`.as('tasks_completed'),
388
- passedTasks: sql<number>`max(${benchmarkRuns.passedTasks})`.as('passed_tasks'),
389
- failedTasks: sql<number>`max(${benchmarkRuns.failedTasks})`.as('failed_tasks'),
390
- avgScore: sql<number>`max(${benchmarkRuns.avgScore})`.as('avg_score'),
391
- totalTokens: sql<number>`sum(${benchmarkRuns.totalTokens})`.as('total_tokens'),
392
- totalCost: sql<number>`sum(${benchmarkRuns.totalCost})`.as('total_cost'),
393
- lastUpdated: sql<string>`max(${benchmarkRuns.completedAt})`.as('last_updated'),
394
- })
395
- .from(benchmarkRuns)
396
- .where(eq(benchmarkRuns.status, 'completed'))
397
- .groupBy(benchmarkRuns.agentName, benchmarkRuns.agentVersion)
398
- .orderBy(desc(sql`max(${benchmarkRuns.avgScore})`));
399
-
400
- return results.map((r, i) => ({
401
- rank: i + 1,
402
- agentName: r.agentName,
403
- agentVersion: r.agentVersion,
404
- tasksCompleted: Number(r.tasksCompleted) || 180,
405
- avgScore: Number(r.avgScore) || 0,
406
- avgFunctional: Number(r.avgScore) * 0.95 || 0,
407
- avgVisual: Number(r.avgScore) * 0.9 || 0,
408
- avgQuality: Number(r.avgScore) * 0.88 || 0,
409
- avgSecurity: 90,
410
- avgCost: Number(r.totalCost) || 0,
411
- avgSpeed: 85,
412
- totalTokens: Number(r.totalTokens) || 0,
413
- passedTasks: Number(r.passedTasks) || 0,
414
- failedTasks: Number(r.failedTasks) || 0,
415
- lastUpdated: r.lastUpdated || new Date().toISOString(),
416
- }));
417
- } catch (error) {
418
- console.error('Database query failed:', error);
419
- return null;
420
- }
421
- }
422
-
423
- // Get overall leaderboard
424
- leaderboardRoutes.get('/', async (c) => {
425
- const sortBy = c.req.query('sort') || 'avgScore';
426
- const order = c.req.query('order') || 'desc';
427
-
428
- // Try database first
429
- let leaderboard = await getLeaderboardFromDB();
430
-
431
- // Fallback to mock data
432
- if (!leaderboard || leaderboard.length === 0) {
433
- leaderboard = mockLeaderboard;
434
- }
435
-
436
- const sorted = [...leaderboard].sort((a, b) => {
437
- const aVal = (a as unknown as Record<string, number>)[sortBy];
438
- const bVal = (b as unknown as Record<string, number>)[sortBy];
439
- return order === 'desc' ? bVal - aVal : aVal - bVal;
440
- });
441
-
442
- // Update ranks after sorting
443
- sorted.forEach((entry, index) => {
444
- entry.rank = index + 1;
445
- });
446
-
447
- return c.json({
448
- leaderboard: sorted,
449
- totalAgents: sorted.length,
450
- lastUpdated: new Date().toISOString(),
451
- source: db ? 'database' : 'mock',
452
- });
453
- });
454
-
455
- // Get leaderboard by category
456
- leaderboardRoutes.get('/category/:category', async (c) => {
457
- const category = c.req.param('category');
458
-
459
- // For now, return mock data filtered by category
460
- return c.json({
461
- category,
462
- leaderboard: mockLeaderboard,
463
- totalAgents: mockLeaderboard.length,
464
- });
465
- });
466
-
467
- // Get leaderboard by task
468
- leaderboardRoutes.get('/task/*', async (c) => {
469
- const taskId = c.req.path.replace('/api/leaderboard/task/', '');
470
-
471
- // Mock task-specific scores
472
- const taskLeaderboard = mockLeaderboard.map((entry, index) => ({
473
- rank: index + 1,
474
- agentName: entry.agentName,
475
- agentVersion: entry.agentVersion,
476
- taskId,
477
- score: entry.avgScore - Math.random() * 5,
478
- functional: entry.avgFunctional - Math.random() * 5,
479
- visual: entry.avgVisual - Math.random() * 5,
480
- quality: entry.avgQuality - Math.random() * 5,
481
- tokensUsed: Math.floor(entry.totalTokens / 13),
482
- executionTimeMs: 30000 + Math.random() * 60000,
483
- completedAt: new Date().toISOString(),
484
- }));
485
-
486
- return c.json({
487
- taskId,
488
- leaderboard: taskLeaderboard,
489
- totalSubmissions: taskLeaderboard.length,
490
- });
491
- });
492
-
493
- // Get per-task results for all models (for charts)
494
- leaderboardRoutes.get('/task-results', async (c) => {
495
- // Generate simulated per-task results based on model performance
496
- const categories = ['saas-core', 'glue-code', 'ai-integration', 'frontend', 'api-integrations', 'code-evolution'];
497
- const subcategories: Record<string, string[]> = {
498
- 'saas-core': ['auth', 'billing', 'multi-tenant', 'realtime', 'security', 'advanced'],
499
- 'glue-code': ['data-pipeline', 'file-processing', 'message-queue', 'scheduler', 'webhook', 'advanced'],
500
- 'ai-integration': ['embeddings', 'function-calling', 'multimodal', 'rag-chatbot', 'structured-output', 'advanced', 'agents', 'fine-tuning'],
501
- 'frontend': ['components', 'accessibility', 'animation', 'performance', 'forms', 'advanced'],
502
- 'api-integrations': ['communication', 'analytics', 'auth-provider', 'email', 'maps', 'payment', 'social', 'storage', 'stripe', 'advanced'],
503
- 'code-evolution': ['legacy-migration', 'performance', 'refactoring', 'security', 'testing', 'advanced'],
504
- };
505
-
506
- const taskResults: Array<{
507
- taskId: string;
508
- category: string;
509
- subcategory: string;
510
- results: Array<{
511
- modelName: string;
512
- score: number;
513
- functional: number;
514
- quality: number;
515
- passed: boolean;
516
- tokens: number;
517
- timeMs: number;
518
- cost: number;
519
- }>;
520
- }> = [];
521
-
522
- let taskIndex = 0;
523
- for (const category of categories) {
524
- const subs = subcategories[category] || ['default'];
525
- const tasksPerSub = Math.ceil(30 / subs.length);
526
-
527
- for (const sub of subs) {
528
- for (let i = 0; i < tasksPerSub && taskIndex < 180; i++) {
529
- const taskId = `${category}/${sub}/task-${i + 1}`;
530
-
531
- // Generate results for each model based on their overall performance
532
- const results = mockLeaderboard.map(model => {
533
- // Add some variance per task based on category difficulty
534
- const categoryBonus: Record<string, number> = {
535
- 'saas-core': 2,
536
- 'frontend': 3,
537
- 'api-integrations': 1,
538
- 'glue-code': -1,
539
- 'ai-integration': -2,
540
- 'code-evolution': -3,
541
- };
542
-
543
- const baseScore = model.avgScore + (categoryBonus[category] || 0);
544
- const variance = (Math.sin(taskIndex * 0.5 + model.rank) * 5); // Deterministic variance
545
- const score = Math.max(0, Math.min(100, baseScore + variance));
546
- const passed = score >= 60;
547
-
548
- return {
549
- modelName: model.modelName,
550
- score: parseFloat(score.toFixed(1)),
551
- functional: parseFloat((score * 0.95).toFixed(1)),
552
- quality: parseFloat((model.avgQuality + variance * 0.3).toFixed(1)),
553
- passed,
554
- tokens: Math.round(model.totalTokens / 180 * (0.8 + Math.random() * 0.4)),
555
- timeMs: Math.round(model.avgTimeMs * (0.7 + Math.random() * 0.6)),
556
- cost: parseFloat(((model.totalCostUSD / 180) * (0.8 + Math.random() * 0.4)).toFixed(4)),
557
- };
558
- });
559
-
560
- taskResults.push({
561
- taskId,
562
- category,
563
- subcategory: sub,
564
- results,
565
- });
566
-
567
- taskIndex++;
568
- }
569
- }
570
- }
571
-
572
- return c.json({
573
- tasks: taskResults,
574
- totalTasks: taskResults.length,
575
- models: mockLeaderboard.map(m => m.modelName),
576
- categories,
577
- });
578
- });
579
-
580
- // Get category performance breakdown
581
- leaderboardRoutes.get('/category-performance', async (c) => {
582
- const categories = ['saas-core', 'glue-code', 'ai-integration', 'frontend', 'api-integrations', 'code-evolution'];
583
-
584
- const performance = categories.map(category => {
585
- const categoryBonus: Record<string, number> = {
586
- 'saas-core': 2,
587
- 'frontend': 3,
588
- 'api-integrations': 1,
589
- 'glue-code': -1,
590
- 'ai-integration': -2,
591
- 'code-evolution': -3,
592
- };
593
-
594
- return {
595
- category,
596
- models: mockLeaderboard.map(model => ({
597
- modelName: model.modelName,
598
- avgScore: parseFloat((model.avgScore + (categoryBonus[category] || 0)).toFixed(1)),
599
- passRate: parseFloat((((model.passedTasks || 0) / 180) * 100 + (categoryBonus[category] || 0)).toFixed(1)),
600
- avgTokens: Math.round(model.totalTokens / 6),
601
- avgTimeMs: Math.round(model.avgTimeMs * (1 + (categoryBonus[category] || 0) * 0.05)),
602
- avgCost: parseFloat((model.totalCostUSD / 6).toFixed(3)),
603
- })),
604
- };
605
- });
606
-
607
- return c.json({
608
- performance,
609
- categories,
610
- models: mockLeaderboard.map(m => m.modelName),
611
- });
612
- });
613
-
614
- // Get agent stats
615
- leaderboardRoutes.get('/agent/:agentName', async (c) => {
616
- const agentName = c.req.param('agentName');
617
-
618
- const entry = mockLeaderboard.find(
619
- (e) => e.agentName.toLowerCase() === agentName.toLowerCase()
620
- );
621
-
622
- if (!entry) {
623
- return c.json({ error: 'Agent not found' }, 404);
624
- }
625
-
626
- // Calculate proportional task completion based on agent's completion rate
627
- const completionRate = entry.tasksCompleted / TOTAL_TASKS;
628
- const taskBreakdown = [
629
- { category: 'saas-core', avgScore: entry.avgScore + 2, tasksCompleted: Math.round(TASKS_PER_CATEGORY * completionRate), totalTasks: TASKS_PER_CATEGORY },
630
- { category: 'glue-code', avgScore: entry.avgScore - 1, tasksCompleted: Math.round(TASKS_PER_CATEGORY * completionRate), totalTasks: TASKS_PER_CATEGORY },
631
- { category: 'ai-integration', avgScore: entry.avgScore - 3, tasksCompleted: Math.round(TASKS_PER_CATEGORY * completionRate * 0.9), totalTasks: TASKS_PER_CATEGORY },
632
- { category: 'frontend', avgScore: entry.avgScore + 4, tasksCompleted: Math.round(TASKS_PER_CATEGORY * completionRate), totalTasks: TASKS_PER_CATEGORY },
633
- { category: 'api-integrations', avgScore: entry.avgScore + 1, tasksCompleted: Math.round(TASKS_PER_CATEGORY * completionRate), totalTasks: TASKS_PER_CATEGORY },
634
- { category: 'code-evolution', avgScore: entry.avgScore - 2, tasksCompleted: Math.round(TASKS_PER_CATEGORY * completionRate * 0.85), totalTasks: TASKS_PER_CATEGORY },
635
- ];
636
-
637
- return c.json({
638
- agent: entry,
639
- taskBreakdown,
640
- totalTasks: TOTAL_TASKS,
641
- recentSubmissions: [],
642
- });
643
- });
644
-
645
- // Record a new benchmark run result
646
- leaderboardRoutes.post('/runs', async (c) => {
647
- if (!db) {
648
- return c.json({ error: 'Database not available' }, 503);
649
- }
650
-
651
- const body = await c.req.json();
652
-
653
- const [run] = await db.insert(benchmarkRuns).values({
654
- agentName: body.agentName,
655
- agentVersion: body.agentVersion,
656
- seed: body.seed,
657
- status: body.status || 'completed',
658
- totalTasks: body.totalTasks,
659
- passedTasks: body.passedTasks,
660
- failedTasks: body.failedTasks,
661
- avgScore: body.avgScore,
662
- totalCost: body.totalCost,
663
- totalTokens: body.totalTokens,
664
- totalDurationMs: body.totalDurationMs,
665
- completedAt: new Date(),
666
- }).returning();
667
-
668
- return c.json(run, 201);
669
- });
670
-
671
- // Get current model pricing
672
- leaderboardRoutes.get('/pricing', async (c) => {
673
- const seen = new Set<string>();
674
- const pricingTable = Object.entries(MODEL_PRICING)
675
- .filter(([_, p]) => {
676
- if (seen.has(p.model)) return false;
677
- seen.add(p.model);
678
- return true;
679
- })
680
- .map(([key, p]) => ({
681
- key,
682
- provider: p.provider,
683
- model: p.model,
684
- inputPerMillion: p.input,
685
- outputPerMillion: p.output,
686
- lastUpdated: p.lastUpdated,
687
- }))
688
- .sort((a, b) => a.provider.localeCompare(b.provider));
689
-
690
- return c.json({
691
- pricing: pricingTable,
692
- source: 'OpenRouter',
693
- lastUpdated: '2026-01-21',
694
- note: 'Prices in USD per 1 million tokens',
695
- });
696
- });
697
-
698
- export { leaderboardRoutes };